1 /* Coding system handler (conversion, detection, and etc).
2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
4 Copyright (C) 2001 Free Software Foundation, Inc.
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
9 This file is part of GNU Emacs.
11 GNU Emacs is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2, or (at your option)
16 GNU Emacs is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GNU Emacs; see the file COPYING. If not, write to
23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 Boston, MA 02111-1307, USA. */
26 /*** TABLE OF CONTENTS ***
30 2. Emacs' internal format (emacs-utf-8) handlers
33 5. Charset-base coding systems handlers
34 6. emacs-mule (old Emacs' internal format) handlers
36 8. Shift-JIS and BIG5 handlers
38 10. C library functions
39 11. Emacs Lisp library functions
44 /*** 0. General comments ***
49 A coding system is an object for an encoding mechanism that contains
50 information about how to convert byte sequences to character
51 sequences and vice versa. When we say "decode", it means converting
52 a byte sequence of a specific coding system into a character
53 sequence that is represented by Emacs' internal coding system
54 `emacs-utf-8', and when we say "encode", it means converting a
55 character sequence of emacs-utf-8 to a byte sequence of a specific
58 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
59 C level, a coding system is represented by a vector of attributes
60 stored in the hash table Vcharset_hash_table. The conversion from
61 coding system symbol to attributes vector is done by looking up
62 Vcharset_hash_table by the symbol.
64 Coding systems are classified into the following types depending on
65 the encoding mechanism. Here's a brief description of the types.
71 o Charset-base coding system
73 A coding system defined by one or more (coded) character sets.
74 Decoding and encoding are done by a code converter defined for each
77 o Old Emacs internal format (emacs-mule)
79 The coding system adopted by old versions of Emacs (20 and 21).
81 o ISO2022-base coding system
83 The most famous coding system for multiple character sets. X's
84 Compound Text, various EUCs (Extended Unix Code), and coding systems
85 used in the Internet communication such as ISO-2022-JP are all
88 o SJIS (or Shift-JIS or MS-Kanji-Code)
90 A coding system to encode character sets: ASCII, JISX0201, and
91 JISX0208. Widely used for PC's in Japan. Details are described in
96 A coding system to encode character sets: ASCII and Big5. Widely
97 used by Chinese (mainly in Taiwan and Hong Kong). Details are
98 described in section 8. In this file, when we write "big5" (all
99 lowercase), we mean the coding system, and when we write "Big5"
100 (capitalized), we mean the character set.
104 If a user wants to decode/encode text encoded in a coding system
105 not listed above, he can supply a decoder and an encoder for it in
106 CCL (Code Conversion Language) programs. Emacs executes the CCL
107 program while decoding/encoding.
111 A coding system for a text containing raw eight-bit data. Emacs
112 treats each byte of source text as a character (except for
113 end-of-line conversion).
117 Like raw text, but don't do end-of-line conversion.
122 How text end-of-line is encoded depends on operating system. For
123 instance, Unix's format is just one byte of LF (line-feed) code,
124 whereas DOS's format is two-byte sequence of `carriage-return' and
125 `line-feed' codes. MacOS's format is usually one byte of
128 Since text character encoding and end-of-line encoding are
129 independent, any coding system described above can take any format
130 of end-of-line (except for no-conversion).
134 Before using a coding system for code conversion (i.e. decoding and
135 encoding), we setup a structure of type `struct coding_system'.
136 This structure keeps various information about a specific code
137 conversion (e.g. the location of source and destination data).
144 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
146 These functions check if a byte sequence specified as a source in
147 CODING conforms to the format of XXX. Return 1 if the data contains
148 a byte sequence which can be decoded into non-ASCII characters by
149 the coding system. Otherwize (i.e. the data contains only ASCII
150 characters or invalid sequence) return 0.
152 It also resets some bits of an integer pointed by MASK. The macros
153 CATEGORY_MASK_XXX specifies each bit of this integer.
155 Below is the template of these functions. */
159 detect_coding_XXX (coding
, mask
)
160 struct coding_system
*coding
;
163 unsigned char *src
= coding
->source
;
164 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
165 int multibytep
= coding
->src_multibyte
;
172 /* Get one byte from the source. If the souce is exausted, jump
173 to no_more_source:. */
175 /* Check if it conforms to XXX. If not, break the loop. */
177 /* As the data is invalid for XXX, reset a proper bits. */
178 *mask
&= ~CODING_CATEGORY_XXX
;
181 /* The source exausted. */
183 /* ASCII characters only. */
185 /* Some data should be decoded into non-ASCII characters. */
186 *mask
&= CODING_CATEGORY_XXX
;
191 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
193 These functions decode a byte sequence specified as a source by
194 CODING. The resulting multibyte text goes to a place pointed to by
195 CODING->charbuf, the length of which should not exceed
196 CODING->charbuf_size;
198 These functions set the information of original and decoded texts in
199 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
200 They also set CODING->result to one of CODING_RESULT_XXX indicating
201 how the decoding is finished.
203 Below is the template of these functions. */
207 decode_coding_XXXX (coding
)
208 struct coding_system
*coding
;
210 unsigned char *src
= coding
->source
+ coding
->consumed
;
211 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
212 /* SRC_BASE remembers the start position in source in each loop.
213 The loop will be exited when there's not enough source code, or
214 when there's no room in CHARBUF for a decoded character. */
215 unsigned char *src_base
;
216 /* A buffer to produce decoded characters. */
217 int *charbuf
= coding
->charbuf
;
218 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
219 int multibytep
= coding
->src_multibyte
;
224 if (charbuf
< charbuf_end
)
225 /* No more room to produce a decoded character. */
232 if (src_base
< src_end
233 && coding
->mode
& CODING_MODE_LAST_BLOCK
)
234 /* If the source ends by partial bytes to construct a character,
235 treat them as eight-bit raw data. */
236 while (src_base
< src_end
&& charbuf
< charbuf_end
)
237 *charbuf
++ = *src_base
++;
238 /* Remember how many bytes and characters we consumed. If the
239 source is multibyte, the bytes and chars are not identical. */
240 coding
->consumed
= coding
->consumed_char
= src_base
- coding
->source
;
241 /* Remember how many characters we produced. */
242 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
246 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
248 These functions encode SRC_BYTES length text at SOURCE of Emacs'
249 internal multibyte format by CODING. The resulting byte sequence
250 goes to a place pointed to by DESTINATION, the length of which
251 should not exceed DST_BYTES.
253 These functions set the information of original and encoded texts in
254 the members produced, produced_char, consumed, and consumed_char of
255 the structure *CODING. They also set the member result to one of
256 CODING_RESULT_XXX indicating how the encoding finished.
258 DST_BYTES zero means that source area and destination area are
259 overlapped, which means that we can produce a encoded text until it
260 reaches at the head of not-yet-encoded source text.
262 Below is a template of these functions. */
265 encode_coding_XXX (coding
)
266 struct coding_system
*coding
;
268 int multibytep
= coding
->dst_multibyte
;
269 int *charbuf
= coding
->charbuf
;
270 int *charbuf_end
= charbuf
->charbuf
+ coding
->charbuf_used
;
271 unsigned char *dst
= coding
->destination
+ coding
->produced
;
272 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
273 unsigned char *adjusted_dst_end
= dst_end
- _MAX_BYTES_PRODUCED_IN_LOOP_
;
274 int produced_chars
= 0;
276 for (; charbuf
< charbuf_end
&& dst
< adjusted_dst_end
; charbuf
++)
279 /* Encode C into DST, and increment DST. */
281 label_no_more_destination
:
282 /* How many chars and bytes we produced. */
283 coding
->produced_char
+= produced_chars
;
284 coding
->produced
= dst
- coding
->destination
;
289 /*** 1. Preamble ***/
296 #include "character.h"
299 #include "composite.h"
303 Lisp_Object Vcoding_system_hash_table
;
305 Lisp_Object Qcoding_system
, Qcoding_aliases
, Qeol_type
;
306 Lisp_Object Qunix
, Qdos
, Qmac
;
307 Lisp_Object Qbuffer_file_coding_system
;
308 Lisp_Object Qpost_read_conversion
, Qpre_write_conversion
;
309 Lisp_Object Qdefault_char
;
310 Lisp_Object Qno_conversion
, Qundecided
;
311 Lisp_Object Qcharset
, Qiso_2022
, Qutf_8
, Qutf_16
, Qshift_jis
, Qbig5
;
312 Lisp_Object Qutf_16_be_nosig
, Qutf_16_be
, Qutf_16_le_nosig
, Qutf_16_le
;
313 Lisp_Object Qsignature
, Qendian
, Qbig
, Qlittle
;
314 Lisp_Object Qcoding_system_history
;
315 Lisp_Object Qvalid_codes
;
317 extern Lisp_Object Qinsert_file_contents
, Qwrite_region
;
318 Lisp_Object Qcall_process
, Qcall_process_region
, Qprocess_argument
;
319 Lisp_Object Qstart_process
, Qopen_network_stream
;
320 Lisp_Object Qtarget_idx
;
322 Lisp_Object Vselect_safe_coding_system_function
;
324 /* Mnemonic string for each format of end-of-line. */
325 Lisp_Object eol_mnemonic_unix
, eol_mnemonic_dos
, eol_mnemonic_mac
;
326 /* Mnemonic string to indicate format of end-of-line is not yet
328 Lisp_Object eol_mnemonic_undecided
;
332 Lisp_Object Vcoding_system_list
, Vcoding_system_alist
;
334 Lisp_Object Qcoding_system_p
, Qcoding_system_error
;
336 /* Coding system emacs-mule and raw-text are for converting only
337 end-of-line format. */
338 Lisp_Object Qemacs_mule
, Qraw_text
;
340 /* Coding-systems are handed between Emacs Lisp programs and C internal
341 routines by the following three variables. */
342 /* Coding-system for reading files and receiving data from process. */
343 Lisp_Object Vcoding_system_for_read
;
344 /* Coding-system for writing files and sending data to process. */
345 Lisp_Object Vcoding_system_for_write
;
346 /* Coding-system actually used in the latest I/O. */
347 Lisp_Object Vlast_coding_system_used
;
349 /* A vector of length 256 which contains information about special
350 Latin codes (especially for dealing with Microsoft codes). */
351 Lisp_Object Vlatin_extra_code_table
;
353 /* Flag to inhibit code conversion of end-of-line format. */
354 int inhibit_eol_conversion
;
356 /* Flag to inhibit ISO2022 escape sequence detection. */
357 int inhibit_iso_escape_detection
;
359 /* Flag to make buffer-file-coding-system inherit from process-coding. */
360 int inherit_process_coding_system
;
362 /* Coding system to be used to encode text for terminal display. */
363 struct coding_system terminal_coding
;
365 /* Coding system to be used to encode text for terminal display when
366 terminal coding system is nil. */
367 struct coding_system safe_terminal_coding
;
369 /* Coding system of what is sent from terminal keyboard. */
370 struct coding_system keyboard_coding
;
372 Lisp_Object Vfile_coding_system_alist
;
373 Lisp_Object Vprocess_coding_system_alist
;
374 Lisp_Object Vnetwork_coding_system_alist
;
376 Lisp_Object Vlocale_coding_system
;
380 /* Flag to tell if we look up translation table on character code
382 Lisp_Object Venable_character_translation
;
383 /* Standard translation table to look up on decoding (reading). */
384 Lisp_Object Vstandard_translation_table_for_decode
;
385 /* Standard translation table to look up on encoding (writing). */
386 Lisp_Object Vstandard_translation_table_for_encode
;
388 Lisp_Object Qtranslation_table
;
389 Lisp_Object Qtranslation_table_id
;
390 Lisp_Object Qtranslation_table_for_decode
;
391 Lisp_Object Qtranslation_table_for_encode
;
393 /* Alist of charsets vs revision number. */
394 static Lisp_Object Vcharset_revision_table
;
396 /* Default coding systems used for process I/O. */
397 Lisp_Object Vdefault_process_coding_system
;
399 /* Global flag to tell that we can't call post-read-conversion and
400 pre-write-conversion functions. Usually the value is zero, but it
401 is set to 1 temporarily while such functions are running. This is
402 to avoid infinite recursive call. */
403 static int inhibit_pre_post_conversion
;
405 /* Two special coding systems. */
406 Lisp_Object Vsjis_coding_system
;
407 Lisp_Object Vbig5_coding_system
;
410 static int detect_coding_utf_8
P_ ((struct coding_system
*, int *));
411 static void decode_coding_utf_8
P_ ((struct coding_system
*));
412 static int encode_coding_utf_8
P_ ((struct coding_system
*));
414 static int detect_coding_utf_16
P_ ((struct coding_system
*, int *));
415 static void decode_coding_utf_16
P_ ((struct coding_system
*));
416 static int encode_coding_utf_16
P_ ((struct coding_system
*));
418 static int detect_coding_iso_2022
P_ ((struct coding_system
*, int *));
419 static void decode_coding_iso_2022
P_ ((struct coding_system
*));
420 static int encode_coding_iso_2022
P_ ((struct coding_system
*));
422 static int detect_coding_emacs_mule
P_ ((struct coding_system
*, int *));
423 static void decode_coding_emacs_mule
P_ ((struct coding_system
*));
424 static int encode_coding_emacs_mule
P_ ((struct coding_system
*));
426 static int detect_coding_sjis
P_ ((struct coding_system
*, int *));
427 static void decode_coding_sjis
P_ ((struct coding_system
*));
428 static int encode_coding_sjis
P_ ((struct coding_system
*));
430 static int detect_coding_big5
P_ ((struct coding_system
*, int *));
431 static void decode_coding_big5
P_ ((struct coding_system
*));
432 static int encode_coding_big5
P_ ((struct coding_system
*));
434 static int detect_coding_ccl
P_ ((struct coding_system
*, int *));
435 static void decode_coding_ccl
P_ ((struct coding_system
*));
436 static int encode_coding_ccl
P_ ((struct coding_system
*));
438 static void decode_coding_raw_text
P_ ((struct coding_system
*));
439 static int encode_coding_raw_text
P_ ((struct coding_system
*));
442 /* ISO2022 section */
444 #define CODING_ISO_INITIAL(coding, reg) \
445 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
446 coding_attr_iso_initial), \
450 #define CODING_ISO_REQUEST(coding, charset_id) \
451 ((charset_id <= (coding)->max_charset_id \
452 ? (coding)->safe_charsets[charset_id] \
456 #define CODING_ISO_FLAGS(coding) \
457 ((coding)->spec.iso_2022.flags)
458 #define CODING_ISO_DESIGNATION(coding, reg) \
459 ((coding)->spec.iso_2022.current_designation[reg])
460 #define CODING_ISO_INVOCATION(coding, plane) \
461 ((coding)->spec.iso_2022.current_invocation[plane])
462 #define CODING_ISO_SINGLE_SHIFTING(coding) \
463 ((coding)->spec.iso_2022.single_shifting)
464 #define CODING_ISO_BOL(coding) \
465 ((coding)->spec.iso_2022.bol)
466 #define CODING_ISO_INVOKED_CHARSET(coding, plane) \
467 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
469 /* Control characters of ISO2022. */
470 /* code */ /* function */
471 #define ISO_CODE_LF 0x0A /* line-feed */
472 #define ISO_CODE_CR 0x0D /* carriage-return */
473 #define ISO_CODE_SO 0x0E /* shift-out */
474 #define ISO_CODE_SI 0x0F /* shift-in */
475 #define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
476 #define ISO_CODE_ESC 0x1B /* escape */
477 #define ISO_CODE_SS2 0x8E /* single-shift-2 */
478 #define ISO_CODE_SS3 0x8F /* single-shift-3 */
479 #define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
481 /* All code (1-byte) of ISO2022 is classified into one of the
483 enum iso_code_class_type
485 ISO_control_0
, /* Control codes in the range
486 0x00..0x1F and 0x7F, except for the
487 following 5 codes. */
488 ISO_carriage_return
, /* ISO_CODE_CR (0x0D) */
489 ISO_shift_out
, /* ISO_CODE_SO (0x0E) */
490 ISO_shift_in
, /* ISO_CODE_SI (0x0F) */
491 ISO_single_shift_2_7
, /* ISO_CODE_SS2_7 (0x19) */
492 ISO_escape
, /* ISO_CODE_SO (0x1B) */
493 ISO_control_1
, /* Control codes in the range
494 0x80..0x9F, except for the
495 following 3 codes. */
496 ISO_single_shift_2
, /* ISO_CODE_SS2 (0x8E) */
497 ISO_single_shift_3
, /* ISO_CODE_SS3 (0x8F) */
498 ISO_control_sequence_introducer
, /* ISO_CODE_CSI (0x9B) */
499 ISO_0x20_or_0x7F
, /* Codes of the values 0x20 or 0x7F. */
500 ISO_graphic_plane_0
, /* Graphic codes in the range 0x21..0x7E. */
501 ISO_0xA0_or_0xFF
, /* Codes of the values 0xA0 or 0xFF. */
502 ISO_graphic_plane_1
/* Graphic codes in the range 0xA1..0xFE. */
505 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
506 `iso-flags' attribute of an iso2022 coding system. */
508 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
509 instead of the correct short-form sequence (e.g. ESC $ A). */
510 #define CODING_ISO_FLAG_LONG_FORM 0x0001
512 /* If set, reset graphic planes and registers at end-of-line to the
514 #define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
516 /* If set, reset graphic planes and registers before any control
517 characters to the initial state. */
518 #define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
520 /* If set, encode by 7-bit environment. */
521 #define CODING_ISO_FLAG_SEVEN_BITS 0x0008
523 /* If set, use locking-shift function. */
524 #define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
526 /* If set, use single-shift function. Overwrite
527 CODING_ISO_FLAG_LOCKING_SHIFT. */
528 #define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
530 /* If set, use designation escape sequence. */
531 #define CODING_ISO_FLAG_DESIGNATION 0x0040
533 /* If set, produce revision number sequence. */
534 #define CODING_ISO_FLAG_REVISION 0x0080
536 /* If set, produce ISO6429's direction specifying sequence. */
537 #define CODING_ISO_FLAG_DIRECTION 0x0100
539 /* If set, assume designation states are reset at beginning of line on
541 #define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
543 /* If set, designation sequence should be placed at beginning of line
545 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
547 /* If set, do not encode unsafe charactes on output. */
548 #define CODING_ISO_FLAG_SAFE 0x0800
550 /* If set, extra latin codes (128..159) are accepted as a valid code
552 #define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
554 #define CODING_ISO_FLAG_COMPOSITION 0x2000
556 #define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
558 #define CODING_ISO_FLAG_FULL_SUPPORT 0x8000
560 /* A character to be produced on output if encoding of the original
561 character is prohibited by CODING_ISO_FLAG_SAFE. */
562 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
566 #define CODING_UTF_16_BOM(coding) \
567 ((coding)->spec.utf_16.bom)
569 #define CODING_UTF_16_ENDIAN(coding) \
570 ((coding)->spec.utf_16.endian)
572 #define CODING_UTF_16_SURROGATE(coding) \
573 ((coding)->spec.utf_16.surrogate)
577 #define CODING_CCL_DECODER(coding) \
578 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
579 #define CODING_CCL_ENCODER(coding) \
580 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
581 #define CODING_CCL_VALIDS(coding) \
582 (XSTRING (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)) \
585 /* Index for each coding category in `coding_category_table' */
589 coding_category_iso_7
,
590 coding_category_iso_7_tight
,
591 coding_category_iso_8_1
,
592 coding_category_iso_8_2
,
593 coding_category_iso_7_else
,
594 coding_category_iso_8_else
,
595 coding_category_utf_8
,
596 coding_category_utf_16_auto
,
597 coding_category_utf_16_be
,
598 coding_category_utf_16_le
,
599 coding_category_utf_16_be_nosig
,
600 coding_category_utf_16_le_nosig
,
601 coding_category_charset
,
602 coding_category_sjis
,
603 coding_category_big5
,
605 coding_category_emacs_mule
,
606 /* All above are targets of code detection. */
607 coding_category_raw_text
,
608 coding_category_undecided
,
612 /* Definitions of flag bits used in detect_coding_XXXX. */
613 #define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
614 #define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
615 #define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
616 #define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
617 #define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
618 #define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
619 #define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
620 #define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
621 #define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
622 #define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
623 #define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
624 #define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
625 #define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
626 #define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
627 #define CATEGORY_MASK_CCL (1 << coding_category_ccl)
628 #define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
630 /* This value is returned if detect_coding_mask () find nothing other
631 than ASCII characters. */
632 #define CATEGORY_MASK_ANY \
633 (CATEGORY_MASK_ISO_7 \
634 | CATEGORY_MASK_ISO_7_TIGHT \
635 | CATEGORY_MASK_ISO_8_1 \
636 | CATEGORY_MASK_ISO_8_2 \
637 | CATEGORY_MASK_ISO_7_ELSE \
638 | CATEGORY_MASK_ISO_8_ELSE \
639 | CATEGORY_MASK_UTF_8 \
640 | CATEGORY_MASK_UTF_16_BE \
641 | CATEGORY_MASK_UTF_16_LE \
642 | CATEGORY_MASK_UTF_16_BE_NOSIG \
643 | CATEGORY_MASK_UTF_16_LE_NOSIG \
644 | CATEGORY_MASK_CHARSET \
645 | CATEGORY_MASK_SJIS \
646 | CATEGORY_MASK_BIG5 \
647 | CATEGORY_MASK_CCL \
648 | CATEGORY_MASK_EMACS_MULE)
651 #define CATEGORY_MASK_ISO_7BIT \
652 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
654 #define CATEGORY_MASK_ISO_8BIT \
655 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
657 #define CATEGORY_MASK_ISO_ELSE \
658 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
660 #define CATEGORY_MASK_ISO_ESCAPE \
661 (CATEGORY_MASK_ISO_7 \
662 | CATEGORY_MASK_ISO_7_TIGHT \
663 | CATEGORY_MASK_ISO_7_ELSE \
664 | CATEGORY_MASK_ISO_8_ELSE)
666 #define CATEGORY_MASK_ISO \
667 ( CATEGORY_MASK_ISO_7BIT \
668 | CATEGORY_MASK_ISO_8BIT \
669 | CATEGORY_MASK_ISO_ELSE)
671 #define CATEGORY_MASK_UTF_16 \
672 (CATEGORY_MASK_UTF_16_BE \
673 | CATEGORY_MASK_UTF_16_LE \
674 | CATEGORY_MASK_UTF_16_BE_NOSIG \
675 | CATEGORY_MASK_UTF_16_LE_NOSIG)
678 /* List of symbols `coding-category-xxx' ordered by priority. This
679 variable is exposed to Emacs Lisp. */
680 static Lisp_Object Vcoding_category_list
;
682 /* Table of coding categories (Lisp symbols). This variable is for
684 static Lisp_Object Vcoding_category_table
;
686 /* Table of coding-categories ordered by priority. */
687 static enum coding_category coding_priorities
[coding_category_max
];
689 /* Nth element is a coding context for the coding system bound to the
690 Nth coding category. */
691 static struct coding_system coding_categories
[coding_category_max
];
693 static int detected_mask
[coding_category_raw_text
] =
701 CATEGORY_MASK_UTF_16
,
702 CATEGORY_MASK_UTF_16
,
703 CATEGORY_MASK_UTF_16
,
704 CATEGORY_MASK_UTF_16
,
705 CATEGORY_MASK_UTF_16
,
706 CATEGORY_MASK_CHARSET
,
710 CATEGORY_MASK_EMACS_MULE
713 /*** Commonly used macros and functions ***/
716 #define min(a, b) ((a) < (b) ? (a) : (b))
719 #define max(a, b) ((a) > (b) ? (a) : (b))
722 #define CODING_GET_INFO(coding, attrs, eol_type, charset_list) \
724 attrs = CODING_ID_ATTRS (coding->id); \
725 eol_type = CODING_ID_EOL_TYPE (coding->id); \
726 if (VECTORP (eol_type)) \
728 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
732 /* Safely get one byte from the source text pointed by SRC which ends
733 at SRC_END, and set C to that byte. If there are not enough bytes
734 in the source, it jumps to `no_more_source'. The caller
735 should declare and set these variables appropriately in advance:
736 src, src_end, multibytep
739 #define ONE_MORE_BYTE(c) \
741 if (src == src_end) \
743 if (src_base < src) \
744 coding->result = CODING_RESULT_INSUFFICIENT_SRC; \
745 goto no_more_source; \
748 if (multibytep && (c & 0x80)) \
750 if ((c & 0xFE) != 0xC0) \
751 error ("Undecodable char found"); \
752 c = ((c & 1) << 6) | *src++; \
758 #define ONE_MORE_BYTE_NO_CHECK(c) \
761 if (multibytep && (c & 0x80)) \
763 if ((c & 0xFE) != 0xC0) \
764 error ("Undecodable char found"); \
765 c = ((c & 1) << 6) | *src++; \
770 /* Store a byte C in the place pointed by DST and increment DST to the
771 next free point, and increment PRODUCED_CHARS. The caller should
772 assure that C is 0..127, and declare and set the variable `dst'
773 appropriately in advance.
777 #define EMIT_ONE_ASCII_BYTE(c) \
784 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
786 #define EMIT_TWO_ASCII_BYTES(c1, c2) \
788 produced_chars += 2; \
789 *dst++ = (c1), *dst++ = (c2); \
793 /* Store a byte C in the place pointed by DST and increment DST to the
794 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
795 nonzero, store in an appropriate multibyte from. The caller should
796 declare and set the variables `dst' and `multibytep' appropriately
799 #define EMIT_ONE_BYTE(c) \
806 ch = BYTE8_TO_CHAR (ch); \
807 CHAR_STRING_ADVANCE (ch, dst); \
814 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
816 #define EMIT_TWO_BYTES(c1, c2) \
818 produced_chars += 2; \
825 ch = BYTE8_TO_CHAR (ch); \
826 CHAR_STRING_ADVANCE (ch, dst); \
829 ch = BYTE8_TO_CHAR (ch); \
830 CHAR_STRING_ADVANCE (ch, dst); \
840 #define EMIT_THREE_BYTES(c1, c2, c3) \
842 EMIT_ONE_BYTE (c1); \
843 EMIT_TWO_BYTES (c2, c3); \
847 #define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
849 EMIT_TWO_BYTES (c1, c2); \
850 EMIT_TWO_BYTES (c3, c4); \
854 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
856 charset_map_loaded = 0; \
857 c = DECODE_CHAR (charset, code); \
858 if (charset_map_loaded) \
860 unsigned char *orig = coding->source; \
863 coding_set_source (coding); \
864 offset = coding->source - orig; \
866 src_base += offset; \
872 #define ASSURE_DESTINATION(bytes) \
874 if (dst + (bytes) >= dst_end) \
876 int more_bytes = charbuf_end - charbuf + (bytes); \
878 dst = alloc_destination (coding, more_bytes, dst); \
879 dst_end = coding->destination + coding->dst_bytes; \
886 coding_set_source (coding
)
887 struct coding_system
*coding
;
889 if (BUFFERP (coding
->src_object
))
891 if (coding
->src_pos
< 0)
892 coding
->source
= GAP_END_ADDR
+ coding
->src_pos_byte
;
895 struct buffer
*buf
= XBUFFER (coding
->src_object
);
896 EMACS_INT gpt_byte
= BUF_GPT_BYTE (buf
);
897 unsigned char *beg_addr
= BUF_BEG_ADDR (buf
);
899 coding
->source
= beg_addr
+ coding
->src_pos_byte
- 1;
900 if (coding
->src_pos_byte
>= gpt_byte
)
901 coding
->source
+= BUF_GAP_SIZE (buf
);
904 else if (STRINGP (coding
->src_object
))
906 coding
->source
= (XSTRING (coding
->src_object
)->data
907 + coding
->src_pos_byte
);
910 /* Otherwise, the source is C string and is never relocated
911 automatically. Thus we don't have to update anything. */
916 coding_set_destination (coding
)
917 struct coding_system
*coding
;
919 if (BUFFERP (coding
->dst_object
))
921 /* We are sure that coding->dst_pos_byte is before the gap of the
923 coding
->destination
= (BUF_BEG_ADDR (XBUFFER (coding
->dst_object
))
924 + coding
->dst_pos_byte
- 1);
925 if (coding
->src_pos
< 0)
926 coding
->dst_bytes
= (GAP_END_ADDR
927 - (coding
->src_bytes
- coding
->consumed
)
928 - coding
->destination
);
930 coding
->dst_bytes
= (BUF_GAP_END_ADDR (XBUFFER (coding
->dst_object
))
931 - coding
->destination
);
934 /* Otherwise, the destination is C string and is never relocated
935 automatically. Thus we don't have to update anything. */
941 coding_alloc_by_realloc (coding
, bytes
)
942 struct coding_system
*coding
;
945 coding
->destination
= (unsigned char *) xrealloc (coding
->destination
,
946 coding
->dst_bytes
+ bytes
);
947 coding
->dst_bytes
+= bytes
;
951 coding_alloc_by_making_gap (coding
, bytes
)
952 struct coding_system
*coding
;
955 if (BUFFERP (coding
->dst_object
)
956 && EQ (coding
->src_object
, coding
->dst_object
))
958 EMACS_INT add
= coding
->src_bytes
- coding
->consumed
;
960 GAP_SIZE
-= add
; ZV
+= add
; Z
+= add
; ZV_BYTE
+= add
; Z_BYTE
+= add
;
962 GAP_SIZE
+= add
; ZV
-= add
; Z
-= add
; ZV_BYTE
-= add
; Z_BYTE
-= add
;
966 Lisp_Object this_buffer
;
968 this_buffer
= Fcurrent_buffer ();
969 set_buffer_internal (XBUFFER (coding
->dst_object
));
971 set_buffer_internal (XBUFFER (this_buffer
));
976 static unsigned char *
977 alloc_destination (coding
, nbytes
, dst
)
978 struct coding_system
*coding
;
982 EMACS_INT offset
= dst
- coding
->destination
;
984 if (BUFFERP (coding
->dst_object
))
985 coding_alloc_by_making_gap (coding
, nbytes
);
987 coding_alloc_by_realloc (coding
, nbytes
);
988 coding
->result
= CODING_RESULT_SUCCESS
;
989 coding_set_destination (coding
);
990 dst
= coding
->destination
+ offset
;
995 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1002 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1003 Check if a text is encoded in UTF-8. If it is, return
1004 CATEGORY_MASK_UTF_8, else return 0. */
1006 #define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1007 #define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1008 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1009 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1010 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1011 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1014 detect_coding_utf_8 (coding
, mask
)
1015 struct coding_system
*coding
;
1018 unsigned char *src
= coding
->source
, *src_base
= src
;
1019 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1020 int multibytep
= coding
->src_multibyte
;
1021 int consumed_chars
= 0;
1024 /* A coding system of this category is always ASCII compatible. */
1025 src
+= coding
->head_ascii
;
1029 int c
, c1
, c2
, c3
, c4
;
1032 if (UTF_8_1_OCTET_P (c
))
1035 if (! UTF_8_EXTRA_OCTET_P (c1
))
1037 if (UTF_8_2_OCTET_LEADING_P (c
))
1043 if (! UTF_8_EXTRA_OCTET_P (c2
))
1045 if (UTF_8_3_OCTET_LEADING_P (c
))
1051 if (! UTF_8_EXTRA_OCTET_P (c3
))
1053 if (UTF_8_4_OCTET_LEADING_P (c
))
1059 if (! UTF_8_EXTRA_OCTET_P (c4
))
1061 if (UTF_8_5_OCTET_LEADING_P (c
))
1068 *mask
&= ~CATEGORY_MASK_UTF_8
;
1074 *mask
&= CATEGORY_MASK_UTF_8
;
1080 decode_coding_utf_8 (coding
)
1081 struct coding_system
*coding
;
1083 unsigned char *src
= coding
->source
+ coding
->consumed
;
1084 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1085 unsigned char *src_base
;
1086 int *charbuf
= coding
->charbuf
;
1087 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
1088 int consumed_chars
= 0, consumed_chars_base
;
1089 int multibytep
= coding
->src_multibyte
;
1090 Lisp_Object attr
, eol_type
, charset_list
;
1092 CODING_GET_INFO (coding
, attr
, eol_type
, charset_list
);
1096 int c
, c1
, c2
, c3
, c4
, c5
;
1099 consumed_chars_base
= consumed_chars
;
1101 if (charbuf
>= charbuf_end
)
1105 if (UTF_8_1_OCTET_P(c1
))
1110 if (EQ (eol_type
, Qdos
))
1113 goto no_more_source
;
1117 else if (EQ (eol_type
, Qmac
))
1124 if (! UTF_8_EXTRA_OCTET_P (c2
))
1126 if (UTF_8_2_OCTET_LEADING_P (c1
))
1127 c
= ((c1
& 0x1F) << 6) | (c2
& 0x3F);
1131 if (! UTF_8_EXTRA_OCTET_P (c3
))
1133 if (UTF_8_3_OCTET_LEADING_P (c1
))
1134 c
= (((c1
& 0xF) << 12)
1135 | ((c2
& 0x3F) << 6) | (c3
& 0x3F));
1139 if (! UTF_8_EXTRA_OCTET_P (c4
))
1141 if (UTF_8_4_OCTET_LEADING_P (c1
))
1142 c
= (((c1
& 0x7) << 18) | ((c2
& 0x3F) << 12)
1143 | ((c3
& 0x3F) << 6) | (c4
& 0x3F));
1147 if (! UTF_8_EXTRA_OCTET_P (c5
))
1149 if (UTF_8_5_OCTET_LEADING_P (c1
))
1151 c
= (((c1
& 0x3) << 24) | ((c2
& 0x3F) << 18)
1152 | ((c3
& 0x3F) << 12) | ((c4
& 0x3F) << 6)
1169 consumed_chars
= consumed_chars_base
;
1171 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
1176 coding
->consumed_char
+= consumed_chars_base
;
1177 coding
->consumed
= src_base
- coding
->source
;
1178 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
1183 encode_coding_utf_8 (coding
)
1184 struct coding_system
*coding
;
1186 int multibytep
= coding
->dst_multibyte
;
1187 int *charbuf
= coding
->charbuf
;
1188 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
1189 unsigned char *dst
= coding
->destination
+ coding
->produced
;
1190 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
1191 int produced_chars
= 0;
1196 int safe_room
= MAX_MULTIBYTE_LENGTH
* 2;
1198 while (charbuf
< charbuf_end
)
1200 unsigned char str
[MAX_MULTIBYTE_LENGTH
], *p
, *pend
= str
;
1202 ASSURE_DESTINATION (safe_room
);
1204 CHAR_STRING_ADVANCE (c
, pend
);
1205 for (p
= str
; p
< pend
; p
++)
1211 int safe_room
= MAX_MULTIBYTE_LENGTH
;
1213 while (charbuf
< charbuf_end
)
1215 ASSURE_DESTINATION (safe_room
);
1217 dst
+= CHAR_STRING (c
, dst
);
1221 coding
->result
= CODING_RESULT_SUCCESS
;
1222 coding
->produced_char
+= produced_chars
;
1223 coding
->produced
= dst
- coding
->destination
;
1228 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1229 Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
1230 Little Endian (otherwise). If it is, return
1231 CATEGORY_MASK_UTF_16_BE or CATEGORY_MASK_UTF_16_LE,
1234 #define UTF_16_HIGH_SURROGATE_P(val) \
1235 (((val) & 0xFC00) == 0xD800)
1237 #define UTF_16_LOW_SURROGATE_P(val) \
1238 (((val) & 0xFC00) == 0xDC00)
1240 #define UTF_16_INVALID_P(val) \
1241 (((val) == 0xFFFE) \
1242 || ((val) == 0xFFFF) \
1243 || UTF_16_LOW_SURROGATE_P (val))
1247 detect_coding_utf_16 (coding
, mask
)
1248 struct coding_system
*coding
;
1251 unsigned char *src
= coding
->source
, *src_base
= src
;
1252 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1253 int multibytep
= coding
->src_multibyte
;
1254 int consumed_chars
= 0;
1260 if ((c1
== 0xFF) && (c2
== 0xFE))
1262 *mask
&= CATEGORY_MASK_UTF_16_LE
;
1265 else if ((c1
== 0xFE) && (c2
== 0xFF))
1267 *mask
&= CATEGORY_MASK_UTF_16_BE
;
1275 decode_coding_utf_16 (coding
)
1276 struct coding_system
*coding
;
1278 unsigned char *src
= coding
->source
+ coding
->consumed
;
1279 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1280 unsigned char *src_base
;
1281 int *charbuf
= coding
->charbuf
;
1282 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
1283 int consumed_chars
= 0, consumed_chars_base
;
1284 int multibytep
= coding
->src_multibyte
;
1285 enum utf_16_bom_type bom
= CODING_UTF_16_BOM (coding
);
1286 enum utf_16_endian_type endian
= CODING_UTF_16_ENDIAN (coding
);
1287 int surrogate
= CODING_UTF_16_SURROGATE (coding
);
1288 Lisp_Object attr
, eol_type
, charset_list
;
1290 CODING_GET_INFO (coding
, attr
, eol_type
, charset_list
);
1292 if (bom
!= utf_16_without_bom
)
1300 if (bom
== utf_16_with_bom
)
1302 if (endian
== utf_16_big_endian
1303 ? c
!= 0xFFFE : c
!= 0xFEFF)
1305 /* We are sure that there's enouph room at CHARBUF. */
1314 CODING_UTF_16_ENDIAN (coding
)
1315 = endian
= utf_16_big_endian
;
1316 else if (c
== 0xFEFF)
1317 CODING_UTF_16_ENDIAN (coding
)
1318 = endian
= utf_16_little_endian
;
1321 CODING_UTF_16_ENDIAN (coding
)
1322 = endian
= utf_16_big_endian
;
1326 CODING_UTF_16_BOM (coding
) = utf_16_with_bom
;
1334 consumed_chars_base
= consumed_chars
;
1336 if (charbuf
+ 2 >= charbuf_end
)
1341 c
= (endian
== utf_16_big_endian
1342 ? ((c1
<< 8) | c2
) : ((c2
<< 8) | c1
));
1345 if (! UTF_16_LOW_SURROGATE_P (c
))
1347 if (endian
== utf_16_big_endian
)
1348 c1
= surrogate
>> 8, c2
= surrogate
& 0xFF;
1350 c1
= surrogate
& 0xFF, c2
= surrogate
>> 8;
1354 if (UTF_16_HIGH_SURROGATE_P (c
))
1355 CODING_UTF_16_SURROGATE (coding
) = surrogate
= c
;
1361 c
= ((surrogate
- 0xD800) << 10) | (c
- 0xDC00);
1362 CODING_UTF_16_SURROGATE (coding
) = surrogate
= 0;
1368 if (UTF_16_HIGH_SURROGATE_P (c
))
1369 CODING_UTF_16_SURROGATE (coding
) = surrogate
= c
;
1376 coding
->consumed_char
+= consumed_chars_base
;
1377 coding
->consumed
= src_base
- coding
->source
;
1378 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
1382 encode_coding_utf_16 (coding
)
1383 struct coding_system
*coding
;
1385 int multibytep
= coding
->dst_multibyte
;
1386 int *charbuf
= coding
->charbuf
;
1387 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
1388 unsigned char *dst
= coding
->destination
+ coding
->produced
;
1389 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
1391 enum utf_16_bom_type bom
= CODING_UTF_16_BOM (coding
);
1392 int big_endian
= CODING_UTF_16_ENDIAN (coding
) == utf_16_big_endian
;
1393 int produced_chars
= 0;
1394 Lisp_Object attrs
, eol_type
, charset_list
;
1397 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
1399 if (bom
== utf_16_with_bom
)
1401 ASSURE_DESTINATION (safe_room
);
1403 EMIT_TWO_BYTES (0xFF, 0xFE);
1405 EMIT_TWO_BYTES (0xFE, 0xFF);
1406 CODING_UTF_16_BOM (coding
) = utf_16_without_bom
;
1409 while (charbuf
< charbuf_end
)
1411 ASSURE_DESTINATION (safe_room
);
1413 if (c
>= MAX_UNICODE_CHAR
)
1414 c
= coding
->default_char
;
1419 EMIT_TWO_BYTES (c
>> 8, c
& 0xFF);
1421 EMIT_TWO_BYTES (c
& 0xFF, c
>> 8);
1428 c1
= (c
>> 10) + 0xD800;
1429 c2
= (c
& 0x3FF) + 0xDC00;
1431 EMIT_FOUR_BYTES (c1
>> 8, c1
& 0xFF, c2
>> 8, c2
& 0xFF);
1433 EMIT_FOUR_BYTES (c1
& 0xFF, c1
>> 8, c2
& 0xFF, c2
>> 8);
1436 coding
->result
= CODING_RESULT_SUCCESS
;
1437 coding
->produced
= dst
- coding
->destination
;
1438 coding
->produced_char
+= produced_chars
;
1443 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1445 /* Emacs' internal format for representation of multiple character
1446 sets is a kind of multi-byte encoding, i.e. characters are
1447 represented by variable-length sequences of one-byte codes.
1449 ASCII characters and control characters (e.g. `tab', `newline') are
1450 represented by one-byte sequences which are their ASCII codes, in
1451 the range 0x00 through 0x7F.
1453 8-bit characters of the range 0x80..0x9F are represented by
1454 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1457 8-bit characters of the range 0xA0..0xFF are represented by
1458 one-byte sequences which are their 8-bit code.
1460 The other characters are represented by a sequence of `base
1461 leading-code', optional `extended leading-code', and one or two
1462 `position-code's. The length of the sequence is determined by the
1463 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1464 whereas extended leading-code and position-code take the range 0xA0
1465 through 0xFF. See `charset.h' for more details about leading-code
1468 --- CODE RANGE of Emacs' internal format ---
1472 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1473 eight-bit-graphic 0xA0..0xBF
1474 ELSE 0x81..0x9D + [0xA0..0xFF]+
1475 ---------------------------------------------
1477 As this is the internal character representation, the format is
1478 usually not used externally (i.e. in a file or in a data sent to a
1479 process). But, it is possible to have a text externally in this
1480 format (i.e. by encoding by the coding system `emacs-mule').
1482 In that case, a sequence of one-byte codes has a slightly different
1485 At first, all characters in eight-bit-control are represented by
1486 one-byte sequences which are their 8-bit code.
1488 Next, character composition data are represented by the byte
1489 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1491 METHOD is 0xF0 plus one of composition method (enum
1492 composition_method),
1494 BYTES is 0xA0 plus a byte length of this composition data,
1496 CHARS is 0x20 plus a number of characters composed by this
1499 COMPONENTs are characters of multibye form or composition
1500 rules encoded by two-byte of ASCII codes.
1502 In addition, for backward compatibility, the following formats are
1503 also recognized as composition data on decoding.
1506 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1509 MSEQ is a multibyte form but in these special format:
1510 ASCII: 0xA0 ASCII_CODE+0x80,
1511 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1512 RULE is a one byte code of the range 0xA0..0xF0 that
1513 represents a composition rule.
1516 char emacs_mule_bytes
[256];
1518 /* Leading-code followed by extended leading-code. */
1519 #define LEADING_CODE_PRIVATE_11 0x9A /* for private DIMENSION1 of 1-column */
1520 #define LEADING_CODE_PRIVATE_12 0x9B /* for private DIMENSION1 of 2-column */
1521 #define LEADING_CODE_PRIVATE_21 0x9C /* for private DIMENSION2 of 1-column */
1522 #define LEADING_CODE_PRIVATE_22 0x9D /* for private DIMENSION2 of 2-column */
1526 emacs_mule_char (coding
, composition
, nbytes
, nchars
)
1527 struct coding_system
*coding
;
1529 int *nbytes
, *nchars
;
1531 unsigned char *src
= coding
->source
+ coding
->consumed
;
1532 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1533 int multibytep
= coding
->src_multibyte
;
1534 unsigned char *src_base
= src
;
1535 struct charset
*charset
;
1538 int consumed_chars
= 0;
1549 *nbytes
= src
- src_base
;
1550 *nchars
= consumed_chars
;
1555 switch (emacs_mule_bytes
[c
])
1558 if (! (charset
= emacs_mule_charset
[c
]))
1565 if (c
== LEADING_CODE_PRIVATE_11
1566 || c
== LEADING_CODE_PRIVATE_12
)
1569 if (! (charset
= emacs_mule_charset
[c
]))
1576 if (! (charset
= emacs_mule_charset
[c
]))
1579 code
= (c
& 0x7F) << 7;
1586 if (! (charset
= emacs_mule_charset
[c
]))
1589 code
= (c
& 0x7F) << 7;
1596 charset
= CHARSET_FROM_ID (ASCII_BYTE_P (code
) ? charset_ascii
1597 : code
< 0xA0 ? charset_8_bit_control
1598 : charset_8_bit_graphic
);
1604 c
= DECODE_CHAR (charset
, code
);
1607 *nbytes
= src
- src_base
;
1608 *nchars
= consumed_chars
;
1619 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1620 Check if a text is encoded in `emacs-mule'. */
1623 detect_coding_emacs_mule (coding
, mask
)
1624 struct coding_system
*coding
;
1627 unsigned char *src
= coding
->source
, *src_base
= src
;
1628 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1629 int multibytep
= coding
->src_multibyte
;
1630 int consumed_chars
= 0;
1634 /* A coding system of this category is always ASCII compatible. */
1635 src
+= coding
->head_ascii
;
1643 /* Perhaps the start of composite character. We simple skip
1644 it because analyzing it is too heavy for detecting. But,
1645 at least, we check that the composite character
1646 constitues of more than 4 bytes. */
1647 unsigned char *src_base
;
1657 if (src
- src_base
<= 4)
1667 && (c
== ISO_CODE_ESC
|| c
== ISO_CODE_SI
|| c
== ISO_CODE_SO
))
1672 unsigned char *src_base
= src
- 1;
1679 if (src
- src_base
!= emacs_mule_bytes
[*src_base
])
1684 *mask
&= ~CATEGORY_MASK_EMACS_MULE
;
1690 *mask
&= CATEGORY_MASK_EMACS_MULE
;
1695 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1697 /* Decode a character represented as a component of composition
1698 sequence of Emacs 20/21 style at SRC. Set C to that character and
1699 update SRC to the head of next character (or an encoded composition
1700 rule). If SRC doesn't points a composition component, set C to -1.
1701 If SRC points an invalid byte sequence, global exit by a return
1704 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
1708 int nbytes, nchars; \
1710 if (src == src_end) \
1712 c = emacs_mule_char (coding, 1, &nbytes, &nchars); \
1717 goto invalid_code; \
1721 consumed_chars += nchars; \
1726 /* Decode a composition rule represented as a component of composition
1727 sequence of Emacs 20 style at SRC. Set C to the rule. If SRC
1728 points an invalid byte sequence, set C to -1. */
1730 #define DECODE_EMACS_MULE_COMPOSITION_RULE(buf) \
1732 int c, gref, nref; \
1734 if (src < src_end) \
1735 goto invalid_code; \
1736 ONE_MORE_BYTE_NO_CHECK (c); \
1738 if (c < 0 || c >= 81) \
1739 goto invalid_code; \
1741 gref = c / 9, nref = c % 9; \
1742 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1746 #define ADD_COMPOSITION_DATA(buf, method, nchars) \
1749 *buf++ = coding->produced_char + char_offset; \
1750 *buf++ = CODING_ANNOTATE_COMPOSITION_MASK; \
1756 #define DECODE_EMACS_MULE_21_COMPOSITION(c) \
1758 /* Emacs 21 style format. The first three bytes at SRC are \
1759 (METHOD - 0xF0), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
1760 the byte length of this composition information, CHARS is the \
1761 number of characters composed by this composition. */ \
1762 enum composition_method method = c - 0xF0; \
1763 int consumed_chars_limit; \
1764 int nbytes, nchars; \
1766 ONE_MORE_BYTE (c); \
1767 nbytes = c - 0xA0; \
1769 goto invalid_code; \
1770 ONE_MORE_BYTE (c); \
1771 nchars = c - 0xA0; \
1772 ADD_COMPOSITION_DATA (charbuf, method, nchars); \
1773 consumed_chars_limit = consumed_chars_base + nbytes; \
1774 if (method != COMPOSITION_RELATIVE) \
1777 while (consumed_chars < consumed_chars_limit) \
1779 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
1780 DECODE_EMACS_MULE_COMPOSITION_RULE (charbuf); \
1782 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
1784 if (consumed_chars < consumed_chars_limit) \
1785 goto invalid_code; \
1790 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
1792 /* Emacs 20 style format for relative composition. */ \
1793 /* Store multibyte form of characters to be composed. */ \
1794 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1795 int *buf = components; \
1799 ONE_MORE_BYTE (c); /* skip 0x80 */ \
1800 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1801 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1803 goto invalid_code; \
1804 ADD_COMPOSITION_DATA (charbuf, COMPOSITION_RELATIVE, i); \
1805 for (j = 0; j < i; j++) \
1806 *charbuf++ = components[j]; \
1810 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
1812 /* Emacs 20 style format for rule-base composition. */ \
1813 /* Store multibyte form of characters to be composed. */ \
1814 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1815 int *buf = components; \
1818 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1819 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1821 DECODE_EMACS_MULE_COMPOSITION_RULE (buf); \
1822 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1824 if (i < 1 || (buf - components) % 2 == 0) \
1825 goto invalid_code; \
1826 if (charbuf + i + (i / 2) + 1 < charbuf_end) \
1827 goto no_more_source; \
1828 ADD_COMPOSITION_DATA (buf, COMPOSITION_WITH_RULE, i); \
1829 for (j = 0; j < i; j++) \
1830 *charbuf++ = components[j]; \
1831 for (j = 0; j < i; j += 2) \
1832 *charbuf++ = components[j]; \
1837 decode_coding_emacs_mule (coding
)
1838 struct coding_system
*coding
;
1840 unsigned char *src
= coding
->source
+ coding
->consumed
;
1841 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1842 unsigned char *src_base
;
1843 int *charbuf
= coding
->charbuf
;
1844 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
1845 int consumed_chars
= 0, consumed_chars_base
;
1846 int char_offset
= 0;
1847 int multibytep
= coding
->src_multibyte
;
1848 Lisp_Object attrs
, eol_type
, charset_list
;
1850 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
1857 consumed_chars_base
= consumed_chars
;
1859 if (charbuf
>= charbuf_end
)
1868 if (EQ (eol_type
, Qdos
))
1871 goto no_more_source
;
1875 else if (EQ (eol_type
, Qmac
))
1883 if (charbuf
+ 5 + (MAX_COMPOSITION_COMPONENTS
* 2) - 1 > charbuf_end
)
1886 if (c
- 0xF0 >= COMPOSITION_RELATIVE
1887 && c
- 0xF0 <= COMPOSITION_WITH_RULE_ALTCHARS
)
1888 DECODE_EMACS_MULE_21_COMPOSITION (c
);
1890 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c
);
1892 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c
);
1896 else if (c
< 0xA0 && emacs_mule_bytes
[c
] > 1)
1900 c
= emacs_mule_char (coding
, 0, &nbytes
, &nchars
);
1914 consumed_chars
= consumed_chars_base
;
1916 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
1921 coding
->consumed_char
+= consumed_chars_base
;
1922 coding
->consumed
= src_base
- coding
->source
;
1923 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
1927 #define EMACS_MULE_LEADING_CODES(id, codes) \
1930 codes[0] = id, codes[1] = 0; \
1931 else if (id < 0xE0) \
1932 codes[0] = 0x9A, codes[1] = id; \
1933 else if (id < 0xF0) \
1934 codes[0] = 0x9B, codes[1] = id; \
1935 else if (id < 0xF5) \
1936 codes[0] = 0x9C, codes[1] = id; \
1938 codes[0] = 0x9D, codes[1] = id; \
1943 encode_coding_emacs_mule (coding
)
1944 struct coding_system
*coding
;
1946 int multibytep
= coding
->dst_multibyte
;
1947 int *charbuf
= coding
->charbuf
;
1948 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
1949 unsigned char *dst
= coding
->destination
+ coding
->produced
;
1950 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
1952 int produced_chars
= 0;
1953 Lisp_Object attrs
, eol_type
, charset_list
;
1956 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
1958 while (charbuf
< charbuf_end
)
1960 ASSURE_DESTINATION (safe_room
);
1962 if (ASCII_CHAR_P (c
))
1963 EMIT_ONE_ASCII_BYTE (c
);
1966 struct charset
*charset
;
1970 unsigned char leading_codes
[2];
1972 charset
= char_charset (c
, charset_list
, &code
);
1975 c
= coding
->default_char
;
1976 if (ASCII_CHAR_P (c
))
1978 EMIT_ONE_ASCII_BYTE (c
);
1981 charset
= char_charset (c
, charset_list
, &code
);
1983 dimension
= CHARSET_DIMENSION (charset
);
1984 emacs_mule_id
= CHARSET_EMACS_MULE_ID (charset
);
1985 EMACS_MULE_LEADING_CODES (emacs_mule_id
, leading_codes
);
1986 EMIT_ONE_BYTE (leading_codes
[0]);
1987 if (leading_codes
[1])
1988 EMIT_ONE_BYTE (leading_codes
[1]);
1990 EMIT_ONE_BYTE (code
);
1993 EMIT_ONE_BYTE (code
>> 8);
1994 EMIT_ONE_BYTE (code
& 0xFF);
1998 coding
->result
= CODING_RESULT_SUCCESS
;
1999 coding
->produced_char
+= produced_chars
;
2000 coding
->produced
= dst
- coding
->destination
;
2005 /*** 7. ISO2022 handlers ***/
2007 /* The following note describes the coding system ISO2022 briefly.
2008 Since the intention of this note is to help understand the
2009 functions in this file, some parts are NOT ACCURATE or OVERLY
2010 SIMPLIFIED. For thorough understanding, please refer to the
2011 original document of ISO2022.
2013 ISO2022 provides many mechanisms to encode several character sets
2014 in 7-bit and 8-bit environments. For 7-bite environments, all text
2015 is encoded using bytes less than 128. This may make the encoded
2016 text a little bit longer, but the text passes more easily through
2017 several gateways, some of which strip off MSB (Most Signigant Bit).
2019 There are two kinds of character sets: control character set and
2020 graphic character set. The former contains control characters such
2021 as `newline' and `escape' to provide control functions (control
2022 functions are also provided by escape sequences). The latter
2023 contains graphic characters such as 'A' and '-'. Emacs recognizes
2024 two control character sets and many graphic character sets.
2026 Graphic character sets are classified into one of the following
2027 four classes, according to the number of bytes (DIMENSION) and
2028 number of characters in one dimension (CHARS) of the set:
2029 - DIMENSION1_CHARS94
2030 - DIMENSION1_CHARS96
2031 - DIMENSION2_CHARS94
2032 - DIMENSION2_CHARS96
2034 In addition, each character set is assigned an identification tag,
2035 unique for each set, called "final character" (denoted as <F>
2036 hereafter). The <F> of each character set is decided by ECMA(*)
2037 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2038 (0x30..0x3F are for private use only).
2040 Note (*): ECMA = European Computer Manufacturers Association
2042 Here are examples of graphic character set [NAME(<F>)]:
2043 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2044 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2045 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2046 o DIMENSION2_CHARS96 -- none for the moment
2048 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2049 C0 [0x00..0x1F] -- control character plane 0
2050 GL [0x20..0x7F] -- graphic character plane 0
2051 C1 [0x80..0x9F] -- control character plane 1
2052 GR [0xA0..0xFF] -- graphic character plane 1
2054 A control character set is directly designated and invoked to C0 or
2055 C1 by an escape sequence. The most common case is that:
2056 - ISO646's control character set is designated/invoked to C0, and
2057 - ISO6429's control character set is designated/invoked to C1,
2058 and usually these designations/invocations are omitted in encoded
2059 text. In a 7-bit environment, only C0 can be used, and a control
2060 character for C1 is encoded by an appropriate escape sequence to
2061 fit into the environment. All control characters for C1 are
2062 defined to have corresponding escape sequences.
2064 A graphic character set is at first designated to one of four
2065 graphic registers (G0 through G3), then these graphic registers are
2066 invoked to GL or GR. These designations and invocations can be
2067 done independently. The most common case is that G0 is invoked to
2068 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2069 these invocations and designations are omitted in encoded text.
2070 In a 7-bit environment, only GL can be used.
2072 When a graphic character set of CHARS94 is invoked to GL, codes
2073 0x20 and 0x7F of the GL area work as control characters SPACE and
2074 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2077 There are two ways of invocation: locking-shift and single-shift.
2078 With locking-shift, the invocation lasts until the next different
2079 invocation, whereas with single-shift, the invocation affects the
2080 following character only and doesn't affect the locking-shift
2081 state. Invocations are done by the following control characters or
2084 ----------------------------------------------------------------------
2085 abbrev function cntrl escape seq description
2086 ----------------------------------------------------------------------
2087 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2088 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2089 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2090 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2091 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2092 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2093 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2094 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2095 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
2096 ----------------------------------------------------------------------
2097 (*) These are not used by any known coding system.
2099 Control characters for these functions are defined by macros
2100 ISO_CODE_XXX in `coding.h'.
2102 Designations are done by the following escape sequences:
2103 ----------------------------------------------------------------------
2104 escape sequence description
2105 ----------------------------------------------------------------------
2106 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2107 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2108 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2109 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2110 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2111 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2112 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2113 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2114 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2115 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2116 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2117 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2118 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2119 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2120 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2121 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2122 ----------------------------------------------------------------------
2124 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2125 of dimension 1, chars 94, and final character <F>, etc...
2127 Note (*): Although these designations are not allowed in ISO2022,
2128 Emacs accepts them on decoding, and produces them on encoding
2129 CHARS96 character sets in a coding system which is characterized as
2130 7-bit environment, non-locking-shift, and non-single-shift.
2132 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2133 '(' must be omitted. We refer to this as "short-form" hereafter.
2135 Now you may notice that there are a lot of ways for encoding the
2136 same multilingual text in ISO2022. Actually, there exist many
2137 coding systems such as Compound Text (used in X11's inter client
2138 communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR
2139 (used in Korean internet), EUC (Extended UNIX Code, used in Asian
2140 localized platforms), and all of these are variants of ISO2022.
2142 In addition to the above, Emacs handles two more kinds of escape
2143 sequences: ISO6429's direction specification and Emacs' private
2144 sequence for specifying character composition.
2146 ISO6429's direction specification takes the following form:
2147 o CSI ']' -- end of the current direction
2148 o CSI '0' ']' -- end of the current direction
2149 o CSI '1' ']' -- start of left-to-right text
2150 o CSI '2' ']' -- start of right-to-left text
2151 The control character CSI (0x9B: control sequence introducer) is
2152 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2154 Character composition specification takes the following form:
2155 o ESC '0' -- start relative composition
2156 o ESC '1' -- end composition
2157 o ESC '2' -- start rule-base composition (*)
2158 o ESC '3' -- start relative composition with alternate chars (**)
2159 o ESC '4' -- start rule-base composition with alternate chars (**)
2160 Since these are not standard escape sequences of any ISO standard,
2161 the use of them for these meaning is restricted to Emacs only.
2163 (*) This form is used only in Emacs 20.5 and the older versions,
2164 but the newer versions can safely decode it.
2165 (**) This form is used only in Emacs 21.1 and the newer versions,
2166 and the older versions can't decode it.
2168 Here's a list of examples usages of these composition escape
2169 sequences (categorized by `enum composition_method').
2171 COMPOSITION_RELATIVE:
2172 ESC 0 CHAR [ CHAR ] ESC 1
2173 COMPOSITOIN_WITH_RULE:
2174 ESC 2 CHAR [ RULE CHAR ] ESC 1
2175 COMPOSITION_WITH_ALTCHARS:
2176 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2177 COMPOSITION_WITH_RULE_ALTCHARS:
2178 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2180 enum iso_code_class_type iso_code_class
[256];
2182 #define SAFE_CHARSET_P(coding, id) \
2183 ((id) <= (coding)->max_charset_id \
2184 && (coding)->safe_charsets[id] >= 0)
2187 #define SHIFT_OUT_OK(category) \
2188 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2191 setup_iso_safe_charsets (Lisp_Object attrs
)
2193 Lisp_Object charset_list
, safe_charsets
;
2194 Lisp_Object request
;
2195 Lisp_Object reg_usage
;
2198 int flags
= XINT (AREF (attrs
, coding_attr_iso_flags
));
2201 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
2202 if ((flags
& CODING_ISO_FLAG_FULL_SUPPORT
)
2203 && ! EQ (charset_list
, Viso_2022_charset_list
))
2205 CODING_ATTR_CHARSET_LIST (attrs
)
2206 = charset_list
= Viso_2022_charset_list
;
2207 ASET (attrs
, coding_attr_safe_charsets
, Qnil
);
2210 if (STRINGP (AREF (attrs
, coding_attr_safe_charsets
)))
2214 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
2216 int id
= XINT (XCAR (tail
));
2217 if (max_charset_id
< id
)
2218 max_charset_id
= id
;
2221 safe_charsets
= Fmake_string (make_number (max_charset_id
+ 1),
2223 request
= AREF (attrs
, coding_attr_iso_request
);
2224 reg_usage
= AREF (attrs
, coding_attr_iso_usage
);
2225 reg94
= XINT (XCAR (reg_usage
));
2226 reg96
= XINT (XCDR (reg_usage
));
2228 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
2232 struct charset
*charset
;
2235 charset
= CHARSET_FROM_ID (XINT (id
));
2236 reg
= Fcdr (Fassq (request
, id
));
2238 XSTRING (safe_charsets
)->data
[XINT (id
)] = XINT (reg
);
2239 else if (charset
->iso_chars_96
)
2242 XSTRING (safe_charsets
)->data
[XINT (id
)] = reg96
;
2247 XSTRING (safe_charsets
)->data
[XINT (id
)] = reg94
;
2250 ASET (attrs
, coding_attr_safe_charsets
, safe_charsets
);
2254 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2255 Check if a text is encoded in ISO2022. If it is, returns an
2256 integer in which appropriate flag bits any of:
2258 CATEGORY_MASK_ISO_7_TIGHT
2259 CATEGORY_MASK_ISO_8_1
2260 CATEGORY_MASK_ISO_8_2
2261 CATEGORY_MASK_ISO_7_ELSE
2262 CATEGORY_MASK_ISO_8_ELSE
2263 are set. If a code which should never appear in ISO2022 is found,
2267 detect_coding_iso_2022 (coding
, mask
)
2268 struct coding_system
*coding
;
2271 unsigned char *src
= coding
->source
, *src_base
= src
;
2272 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
2273 int multibytep
= coding
->src_multibyte
;
2274 int mask_iso
= CATEGORY_MASK_ISO
;
2275 int mask_found
= 0, mask_8bit_found
= 0;
2276 int reg
[4], shift_out
= 0, single_shifting
= 0;
2279 int consumed_chars
= 0;
2282 for (i
= coding_category_iso_7
; i
<= coding_category_iso_8_else
; i
++)
2284 struct coding_system
*this = &(coding_categories
[i
]);
2285 Lisp_Object attrs
, val
;
2287 attrs
= CODING_ID_ATTRS (this->id
);
2288 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2289 && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs
), Viso_2022_charset_list
))
2290 setup_iso_safe_charsets (attrs
);
2291 val
= CODING_ATTR_SAFE_CHARSETS (attrs
);
2292 this->max_charset_id
= XSTRING (val
)->size
- 1;
2293 this->safe_charsets
= (char *) XSTRING (val
)->data
;
2296 /* A coding system of this category is always ASCII compatible. */
2297 src
+= coding
->head_ascii
;
2299 reg
[0] = charset_ascii
, reg
[1] = reg
[2] = reg
[3] = -1;
2300 while (mask_iso
&& src
< src_end
)
2306 if (inhibit_iso_escape_detection
)
2308 single_shifting
= 0;
2310 if (c
>= '(' && c
<= '/')
2312 /* Designation sequence for a charset of dimension 1. */
2314 if (c1
< ' ' || c1
>= 0x80
2315 || (id
= iso_charset_table
[0][c
>= ','][c1
]) < 0)
2316 /* Invalid designation sequence. Just ignore. */
2318 reg
[(c
- '(') % 4] = id
;
2322 /* Designation sequence for a charset of dimension 2. */
2324 if (c
>= '@' && c
<= 'B')
2325 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
2326 reg
[0] = id
= iso_charset_table
[1][0][c
];
2327 else if (c
>= '(' && c
<= '/')
2330 if (c1
< ' ' || c1
>= 0x80
2331 || (id
= iso_charset_table
[1][c
>= ','][c1
]) < 0)
2332 /* Invalid designation sequence. Just ignore. */
2334 reg
[(c
- '(') % 4] = id
;
2337 /* Invalid designation sequence. Just ignore. */
2340 else if (c
== 'N' || c
== 'O')
2342 /* ESC <Fe> for SS2 or SS3. */
2343 mask_iso
&= CATEGORY_MASK_ISO_7_ELSE
;
2346 else if (c
>= '0' && c
<= '4')
2348 /* ESC <Fp> for start/end composition. */
2349 mask_found
|= CATEGORY_MASK_ISO
;
2354 /* Invalid escape sequence. */
2355 mask_iso
&= ~CATEGORY_MASK_ISO_ESCAPE
;
2359 /* We found a valid designation sequence for CHARSET. */
2360 mask_iso
&= ~CATEGORY_MASK_ISO_8BIT
;
2361 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_7
],
2363 mask_found
|= CATEGORY_MASK_ISO_7
;
2365 mask_iso
&= ~CATEGORY_MASK_ISO_7
;
2366 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_7_tight
],
2368 mask_found
|= CATEGORY_MASK_ISO_7_TIGHT
;
2370 mask_iso
&= ~CATEGORY_MASK_ISO_7_TIGHT
;
2371 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_7_else
],
2373 mask_found
|= CATEGORY_MASK_ISO_7_ELSE
;
2375 mask_iso
&= ~CATEGORY_MASK_ISO_7_ELSE
;
2376 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_8_else
],
2378 mask_found
|= CATEGORY_MASK_ISO_8_ELSE
;
2380 mask_iso
&= ~CATEGORY_MASK_ISO_8_ELSE
;
2384 if (inhibit_iso_escape_detection
)
2386 single_shifting
= 0;
2389 || SHIFT_OUT_OK (coding_category_iso_7_else
)
2390 || SHIFT_OUT_OK (coding_category_iso_8_else
)))
2392 /* Locking shift out. */
2393 mask_iso
&= ~CATEGORY_MASK_ISO_7BIT
;
2394 mask_found
|= CATEGORY_MASK_ISO_ELSE
;
2399 if (inhibit_iso_escape_detection
)
2401 single_shifting
= 0;
2404 /* Locking shift in. */
2405 mask_iso
&= ~CATEGORY_MASK_ISO_7BIT
;
2406 mask_found
|= CATEGORY_MASK_ISO_ELSE
;
2411 single_shifting
= 0;
2415 int newmask
= CATEGORY_MASK_ISO_8_ELSE
;
2417 if (inhibit_iso_escape_detection
)
2419 if (c
!= ISO_CODE_CSI
)
2421 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_1
])
2422 & CODING_ISO_FLAG_SINGLE_SHIFT
)
2423 newmask
|= CATEGORY_MASK_ISO_8_1
;
2424 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_2
])
2425 & CODING_ISO_FLAG_SINGLE_SHIFT
)
2426 newmask
|= CATEGORY_MASK_ISO_8_2
;
2427 single_shifting
= 1;
2429 if (VECTORP (Vlatin_extra_code_table
)
2430 && !NILP (XVECTOR (Vlatin_extra_code_table
)->contents
[c
]))
2432 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_1
])
2433 & CODING_ISO_FLAG_LATIN_EXTRA
)
2434 newmask
|= CATEGORY_MASK_ISO_8_1
;
2435 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_2
])
2436 & CODING_ISO_FLAG_LATIN_EXTRA
)
2437 newmask
|= CATEGORY_MASK_ISO_8_2
;
2439 mask_iso
&= newmask
;
2440 mask_found
|= newmask
;
2447 single_shifting
= 0;
2452 single_shifting
= 0;
2453 mask_8bit_found
= 1;
2454 if (VECTORP (Vlatin_extra_code_table
)
2455 && !NILP (XVECTOR (Vlatin_extra_code_table
)->contents
[c
]))
2459 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_1
])
2460 & CODING_ISO_FLAG_LATIN_EXTRA
)
2461 newmask
|= CATEGORY_MASK_ISO_8_1
;
2462 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_2
])
2463 & CODING_ISO_FLAG_LATIN_EXTRA
)
2464 newmask
|= CATEGORY_MASK_ISO_8_2
;
2465 mask_iso
&= newmask
;
2466 mask_found
|= newmask
;
2473 mask_iso
&= ~(CATEGORY_MASK_ISO_7BIT
2474 | CATEGORY_MASK_ISO_7_ELSE
);
2475 mask_found
|= CATEGORY_MASK_ISO_8_1
;
2476 mask_8bit_found
= 1;
2477 /* Check the length of succeeding codes of the range
2478 0xA0..0FF. If the byte length is odd, we exclude
2479 CATEGORY_MASK_ISO_8_2. We can check this only
2480 when we are not single shifting. */
2481 if (!single_shifting
2482 && mask_iso
& CATEGORY_MASK_ISO_8_2
)
2485 while (src
< src_end
)
2493 if (i
& 1 && src
< src_end
)
2494 mask_iso
&= ~CATEGORY_MASK_ISO_8_2
;
2496 mask_found
|= CATEGORY_MASK_ISO_8_2
;
2505 *mask
&= ~CATEGORY_MASK_ISO
;
2510 *mask
&= mask_iso
& mask_found
;
2511 if (! mask_8bit_found
)
2512 *mask
&= ~(CATEGORY_MASK_ISO_8BIT
| CATEGORY_MASK_ISO_8_ELSE
);
2517 /* Set designation state into CODING. */
2518 #define DECODE_DESIGNATION(reg, dim, chars_96, final) \
2522 if (final < '0' || final >= 128 \
2523 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
2524 || !SAFE_CHARSET_P (coding, id)) \
2526 CODING_ISO_DESIGNATION (coding, reg) = -2; \
2527 goto invalid_code; \
2529 prev = CODING_ISO_DESIGNATION (coding, reg); \
2530 CODING_ISO_DESIGNATION (coding, reg) = id; \
2531 /* If there was an invalid designation to REG previously, and this \
2532 designation is ASCII to REG, we should keep this designation \
2534 if (prev == -2 && id == charset_ascii) \
2535 goto invalid_code; \
2539 #define MAYBE_FINISH_COMPOSITION() \
2542 if (composition_state == COMPOSING_NO) \
2544 /* It is assured that we have enough room for producing \
2545 characters stored in the table `components'. */ \
2546 if (charbuf + component_idx > charbuf_end) \
2547 goto no_more_source; \
2548 composition_state = COMPOSING_NO; \
2549 if (method == COMPOSITION_RELATIVE \
2550 || method == COMPOSITION_WITH_ALTCHARS) \
2552 for (i = 0; i < component_idx; i++) \
2553 *charbuf++ = components[i]; \
2554 char_offset += component_idx; \
2558 for (i = 0; i < component_idx; i += 2) \
2559 *charbuf++ = components[i]; \
2560 char_offset += (component_idx / 2) + 1; \
2565 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2566 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2567 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
2568 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2569 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
2572 #define DECODE_COMPOSITION_START(c1) \
2575 && composition_state == COMPOSING_COMPONENT_CHAR) \
2577 component_len = component_idx; \
2578 composition_state = COMPOSING_CHAR; \
2584 MAYBE_FINISH_COMPOSITION (); \
2585 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
2586 goto no_more_source; \
2587 for (p = src; p < src_end - 1; p++) \
2588 if (*p == ISO_CODE_ESC && p[1] == '1') \
2590 if (p == src_end - 1) \
2592 if (coding->mode & CODING_MODE_LAST_BLOCK) \
2593 goto invalid_code; \
2594 goto no_more_source; \
2597 /* This is surely the start of a composition. */ \
2598 method = (c1 == '0' ? COMPOSITION_RELATIVE \
2599 : c1 == '2' ? COMPOSITION_WITH_RULE \
2600 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
2601 : COMPOSITION_WITH_RULE_ALTCHARS); \
2602 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
2603 : COMPOSING_COMPONENT_CHAR); \
2604 component_idx = component_len = 0; \
2609 /* Handle compositoin end sequence ESC 1. */
2611 #define DECODE_COMPOSITION_END() \
2613 int nchars = (component_len > 0 ? component_idx - component_len \
2614 : method == COMPOSITION_RELATIVE ? component_idx \
2615 : (component_idx + 1) / 2); \
2617 int *saved_charbuf = charbuf; \
2619 ADD_COMPOSITION_DATA (charbuf, method, nchars); \
2620 if (method != COMPOSITION_RELATIVE) \
2622 if (component_len == 0) \
2623 for (i = 0; i < component_idx; i++) \
2624 *charbuf++ = components[i]; \
2626 for (i = 0; i < component_len; i++) \
2627 *charbuf++ = components[i]; \
2628 *saved_charbuf = saved_charbuf - charbuf; \
2630 if (method == COMPOSITION_WITH_RULE) \
2631 for (i = 0; i < component_idx; i += 2, char_offset++) \
2632 *charbuf++ = components[i]; \
2634 for (i = component_len; i < component_idx; i++, char_offset++) \
2635 *charbuf++ = components[i]; \
2636 coding->annotated = 1; \
2637 composition_state = COMPOSING_NO; \
2641 /* Decode a composition rule from the byte C1 (and maybe one more byte
2642 from SRC) and store one encoded composition rule in
2643 coding->cmp_data. */
2645 #define DECODE_COMPOSITION_RULE(c1) \
2648 if (c1 < 81) /* old format (before ver.21) */ \
2650 int gref = (c1) / 9; \
2651 int nref = (c1) % 9; \
2652 if (gref == 4) gref = 10; \
2653 if (nref == 4) nref = 10; \
2654 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
2656 else if (c1 < 93) /* new format (after ver.21) */ \
2658 ONE_MORE_BYTE (c2); \
2659 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
2666 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2669 decode_coding_iso_2022 (coding
)
2670 struct coding_system
*coding
;
2672 unsigned char *src
= coding
->source
+ coding
->consumed
;
2673 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
2674 unsigned char *src_base
;
2675 int *charbuf
= coding
->charbuf
;
2676 int *charbuf_end
= charbuf
+ coding
->charbuf_size
- 4;
2677 int consumed_chars
= 0, consumed_chars_base
;
2678 int char_offset
= 0;
2679 int multibytep
= coding
->src_multibyte
;
2680 /* Charsets invoked to graphic plane 0 and 1 respectively. */
2681 int charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2682 int charset_id_1
= CODING_ISO_INVOKED_CHARSET (coding
, 1);
2683 struct charset
*charset
;
2685 /* For handling composition sequence. */
2686 #define COMPOSING_NO 0
2687 #define COMPOSING_CHAR 1
2688 #define COMPOSING_RULE 2
2689 #define COMPOSING_COMPONENT_CHAR 3
2690 #define COMPOSING_COMPONENT_RULE 4
2692 int composition_state
= COMPOSING_NO
;
2693 enum composition_method method
;
2694 int components
[MAX_COMPOSITION_COMPONENTS
* 2 + 1];
2697 Lisp_Object attrs
, eol_type
, charset_list
;
2699 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
2700 setup_iso_safe_charsets (attrs
);
2707 consumed_chars_base
= consumed_chars
;
2709 if (charbuf
>= charbuf_end
)
2714 /* We produce no character or one character. */
2715 switch (iso_code_class
[c1
])
2717 case ISO_0x20_or_0x7F
:
2718 if (composition_state
!= COMPOSING_NO
)
2720 if (composition_state
== COMPOSING_RULE
2721 || composition_state
== COMPOSING_COMPONENT_RULE
)
2723 DECODE_COMPOSITION_RULE (c1
);
2724 components
[component_idx
++] = c1
;
2725 composition_state
--;
2728 else if (method
== COMPOSITION_WITH_RULE
)
2729 composition_state
= COMPOSING_RULE
;
2730 else if (method
== COMPOSITION_WITH_RULE_ALTCHARS
2731 && composition_state
== COMPOSING_COMPONENT_CHAR
)
2732 composition_state
= COMPOSING_COMPONENT_CHAR
;
2734 if (charset_id_0
< 0
2735 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0
)))
2737 /* This is SPACE or DEL. */
2738 charset
= CHARSET_FROM_ID (charset_ascii
);
2741 /* This is a graphic character, we fall down ... */
2743 case ISO_graphic_plane_0
:
2744 if (composition_state
== COMPOSING_RULE
)
2746 DECODE_COMPOSITION_RULE (c1
);
2747 components
[component_idx
++] = c1
;
2748 composition_state
= COMPOSING_CHAR
;
2750 charset
= CHARSET_FROM_ID (charset_id_0
);
2753 case ISO_0xA0_or_0xFF
:
2754 if (charset_id_1
< 0
2755 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1
))
2756 || CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SEVEN_BITS
)
2758 /* This is a graphic character, we fall down ... */
2760 case ISO_graphic_plane_1
:
2761 if (charset_id_1
< 0)
2763 charset
= CHARSET_FROM_ID (charset_id_1
);
2766 case ISO_carriage_return
:
2769 if (EQ (eol_type
, Qdos
))
2772 goto no_more_source
;
2776 else if (EQ (eol_type
, Qmac
))
2782 MAYBE_FINISH_COMPOSITION ();
2783 charset
= CHARSET_FROM_ID (charset_ascii
);
2787 MAYBE_FINISH_COMPOSITION ();
2791 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
)
2792 || CODING_ISO_DESIGNATION (coding
, 1) < 0)
2794 CODING_ISO_INVOCATION (coding
, 0) = 1;
2795 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2799 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
))
2801 CODING_ISO_INVOCATION (coding
, 0) = 0;
2802 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2805 case ISO_single_shift_2_7
:
2806 case ISO_single_shift_2
:
2807 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
))
2809 /* SS2 is handled as an escape sequence of ESC 'N' */
2811 goto label_escape_sequence
;
2813 case ISO_single_shift_3
:
2814 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
))
2816 /* SS2 is handled as an escape sequence of ESC 'O' */
2818 goto label_escape_sequence
;
2820 case ISO_control_sequence_introducer
:
2821 /* CSI is handled as an escape sequence of ESC '[' ... */
2823 goto label_escape_sequence
;
2827 label_escape_sequence
:
2828 /* Escape sequences handled here are invocation,
2829 designation, direction specification, and character
2830 composition specification. */
2833 case '&': /* revision of following character set */
2835 if (!(c1
>= '@' && c1
<= '~'))
2838 if (c1
!= ISO_CODE_ESC
)
2841 goto label_escape_sequence
;
2843 case '$': /* designation of 2-byte character set */
2844 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATION
))
2847 if (c1
>= '@' && c1
<= 'B')
2848 { /* designation of JISX0208.1978, GB2312.1980,
2850 DECODE_DESIGNATION (0, 2, 0, c1
);
2852 else if (c1
>= 0x28 && c1
<= 0x2B)
2853 { /* designation of DIMENSION2_CHARS94 character set */
2855 DECODE_DESIGNATION (c1
- 0x28, 2, 0, c2
);
2857 else if (c1
>= 0x2C && c1
<= 0x2F)
2858 { /* designation of DIMENSION2_CHARS96 character set */
2860 DECODE_DESIGNATION (c1
- 0x2C, 2, 1, c2
);
2864 /* We must update these variables now. */
2865 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2866 charset_id_1
= CODING_ISO_INVOKED_CHARSET (coding
, 1);
2869 case 'n': /* invocation of locking-shift-2 */
2870 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
)
2871 || CODING_ISO_DESIGNATION (coding
, 2) < 0)
2873 CODING_ISO_INVOCATION (coding
, 0) = 2;
2874 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2877 case 'o': /* invocation of locking-shift-3 */
2878 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
)
2879 || CODING_ISO_DESIGNATION (coding
, 3) < 0)
2881 CODING_ISO_INVOCATION (coding
, 0) = 3;
2882 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2885 case 'N': /* invocation of single-shift-2 */
2886 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
2887 || CODING_ISO_DESIGNATION (coding
, 2) < 0)
2889 charset
= CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding
, 2));
2891 if (c1
< 0x20 || (c1
>= 0x80 && c1
< 0xA0))
2895 case 'O': /* invocation of single-shift-3 */
2896 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
2897 || CODING_ISO_DESIGNATION (coding
, 3) < 0)
2899 charset
= CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding
, 3));
2901 if (c1
< 0x20 || (c1
>= 0x80 && c1
< 0xA0))
2905 case '0': case '2': case '3': case '4': /* start composition */
2906 if (! (coding
->common_flags
& CODING_ANNOTATE_COMPOSITION_MASK
))
2908 DECODE_COMPOSITION_START (c1
);
2911 case '1': /* end composition */
2912 if (composition_state
== COMPOSING_NO
)
2914 DECODE_COMPOSITION_END ();
2917 case '[': /* specification of direction */
2918 if (! CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DIRECTION
)
2920 /* For the moment, nested direction is not supported.
2921 So, `coding->mode & CODING_MODE_DIRECTION' zero means
2922 left-to-right, and nozero means right-to-left. */
2926 case ']': /* end of the current direction */
2927 coding
->mode
&= ~CODING_MODE_DIRECTION
;
2929 case '0': /* end of the current direction */
2930 case '1': /* start of left-to-right direction */
2933 coding
->mode
&= ~CODING_MODE_DIRECTION
;
2938 case '2': /* start of right-to-left direction */
2941 coding
->mode
|= CODING_MODE_DIRECTION
;
2952 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATION
))
2954 if (c1
>= 0x28 && c1
<= 0x2B)
2955 { /* designation of DIMENSION1_CHARS94 character set */
2957 DECODE_DESIGNATION (c1
- 0x28, 1, 0, c2
);
2959 else if (c1
>= 0x2C && c1
<= 0x2F)
2960 { /* designation of DIMENSION1_CHARS96 character set */
2962 DECODE_DESIGNATION (c1
- 0x2C, 1, 1, c2
);
2966 /* We must update these variables now. */
2967 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2968 charset_id_1
= CODING_ISO_INVOKED_CHARSET (coding
, 1);
2973 /* Now we know CHARSET and 1st position code C1 of a character.
2974 Produce a decoded character while getting 2nd position code
2977 if (CHARSET_DIMENSION (charset
) > 1)
2980 if (c2
< 0x20 || (c2
>= 0x80 && c2
< 0xA0))
2981 /* C2 is not in a valid range. */
2983 c1
= (c1
<< 8) | (c2
& 0x7F);
2984 if (CHARSET_DIMENSION (charset
) > 2)
2987 if (c2
< 0x20 || (c2
>= 0x80 && c2
< 0xA0))
2988 /* C2 is not in a valid range. */
2990 c1
= (c1
<< 8) | (c2
& 0x7F);
2994 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c1
, c
);
2997 MAYBE_FINISH_COMPOSITION ();
2998 for (; src_base
< src
; src_base
++, char_offset
++)
3000 if (ASCII_BYTE_P (*src_base
))
3001 *charbuf
++ = *src_base
;
3003 *charbuf
++ = BYTE8_TO_CHAR (*src_base
);
3006 else if (composition_state
== COMPOSING_NO
)
3012 components
[component_idx
++] = c
;
3016 MAYBE_FINISH_COMPOSITION ();
3018 consumed_chars
= consumed_chars_base
;
3020 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
3025 coding
->consumed_char
+= consumed_chars_base
;
3026 coding
->consumed
= src_base
- coding
->source
;
3027 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
3031 /* ISO2022 encoding stuff. */
3034 It is not enough to say just "ISO2022" on encoding, we have to
3035 specify more details. In Emacs, each coding system of ISO2022
3036 variant has the following specifications:
3037 1. Initial designation to G0 thru G3.
3038 2. Allows short-form designation?
3039 3. ASCII should be designated to G0 before control characters?
3040 4. ASCII should be designated to G0 at end of line?
3041 5. 7-bit environment or 8-bit environment?
3042 6. Use locking-shift?
3043 7. Use Single-shift?
3044 And the following two are only for Japanese:
3045 8. Use ASCII in place of JIS0201-1976-Roman?
3046 9. Use JISX0208-1983 in place of JISX0208-1978?
3047 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3048 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
3052 /* Produce codes (escape sequence) for designating CHARSET to graphic
3053 register REG at DST, and increment DST. If <final-char> of CHARSET is
3054 '@', 'A', or 'B' and the coding system CODING allows, produce
3055 designation sequence of short-form. */
3057 #define ENCODE_DESIGNATION(charset, reg, coding) \
3059 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
3060 char *intermediate_char_94 = "()*+"; \
3061 char *intermediate_char_96 = ",-./"; \
3062 int revision = -1; \
3065 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
3066 revision = XINT (CHARSET_ISO_REVISION (charset)); \
3068 if (revision >= 0) \
3070 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3071 EMIT_ONE_BYTE ('@' + revision); \
3073 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
3074 if (CHARSET_DIMENSION (charset) == 1) \
3076 if (! CHARSET_ISO_CHARS_96 (charset)) \
3077 c = intermediate_char_94[reg]; \
3079 c = intermediate_char_96[reg]; \
3080 EMIT_ONE_ASCII_BYTE (c); \
3084 EMIT_ONE_ASCII_BYTE ('$'); \
3085 if (! CHARSET_ISO_CHARS_96 (charset)) \
3087 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
3089 || final_char < '@' || final_char > 'B') \
3090 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
3093 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
3095 EMIT_ONE_ASCII_BYTE (final_char); \
3097 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
3101 /* The following two macros produce codes (control character or escape
3102 sequence) for ISO2022 single-shift functions (single-shift-2 and
3105 #define ENCODE_SINGLE_SHIFT_2 \
3107 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3108 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3110 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3111 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
3115 #define ENCODE_SINGLE_SHIFT_3 \
3117 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3118 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3120 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3121 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
3125 /* The following four macros produce codes (control character or
3126 escape sequence) for ISO2022 locking-shift functions (shift-in,
3127 shift-out, locking-shift-2, and locking-shift-3). */
3129 #define ENCODE_SHIFT_IN \
3131 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3132 CODING_ISO_INVOCATION (coding, 0) = 0; \
3136 #define ENCODE_SHIFT_OUT \
3138 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3139 CODING_ISO_INVOCATION (coding, 0) = 1; \
3143 #define ENCODE_LOCKING_SHIFT_2 \
3145 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3146 CODING_ISO_INVOCATION (coding, 0) = 2; \
3150 #define ENCODE_LOCKING_SHIFT_3 \
3152 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3153 CODING_ISO_INVOCATION (coding, 0) = 3; \
3157 /* Produce codes for a DIMENSION1 character whose character set is
3158 CHARSET and whose position-code is C1. Designation and invocation
3159 sequences are also produced in advance if necessary. */
3161 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3163 int id = CHARSET_ID (charset); \
3164 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
3166 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3167 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
3169 EMIT_ONE_BYTE (c1 | 0x80); \
3170 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
3173 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
3175 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
3178 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
3180 EMIT_ONE_BYTE (c1 | 0x80); \
3184 /* Since CHARSET is not yet invoked to any graphic planes, we \
3185 must invoke it, or, at first, designate it to some graphic \
3186 register. Then repeat the loop to actually produce the \
3188 dst = encode_invocation_designation (charset, coding, dst, \
3193 /* Produce codes for a DIMENSION2 character whose character set is
3194 CHARSET and whose position-codes are C1 and C2. Designation and
3195 invocation codes are also produced in advance if necessary. */
3197 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3199 int id = CHARSET_ID (charset); \
3200 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
3202 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3203 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
3205 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3206 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
3209 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
3211 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
3214 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
3216 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3220 /* Since CHARSET is not yet invoked to any graphic planes, we \
3221 must invoke it, or, at first, designate it to some graphic \
3222 register. Then repeat the loop to actually produce the \
3224 dst = encode_invocation_designation (charset, coding, dst, \
3229 #define ENCODE_ISO_CHARACTER(charset, c) \
3231 int code = ENCODE_CHAR ((charset),(c)); \
3233 if (CHARSET_DIMENSION (charset) == 1) \
3234 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3236 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3240 /* Produce designation and invocation codes at a place pointed by DST
3241 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
3245 encode_invocation_designation (charset
, coding
, dst
, p_nchars
)
3246 struct charset
*charset
;
3247 struct coding_system
*coding
;
3251 int multibytep
= coding
->dst_multibyte
;
3252 int produced_chars
= *p_nchars
;
3253 int reg
; /* graphic register number */
3254 int id
= CHARSET_ID (charset
);
3256 /* At first, check designations. */
3257 for (reg
= 0; reg
< 4; reg
++)
3258 if (id
== CODING_ISO_DESIGNATION (coding
, reg
))
3263 /* CHARSET is not yet designated to any graphic registers. */
3264 /* At first check the requested designation. */
3265 reg
= CODING_ISO_REQUEST (coding
, id
);
3267 /* Since CHARSET requests no special designation, designate it
3268 to graphic register 0. */
3271 ENCODE_DESIGNATION (charset
, reg
, coding
);
3274 if (CODING_ISO_INVOCATION (coding
, 0) != reg
3275 && CODING_ISO_INVOCATION (coding
, 1) != reg
)
3277 /* Since the graphic register REG is not invoked to any graphic
3278 planes, invoke it to graphic plane 0. */
3281 case 0: /* graphic register 0 */
3285 case 1: /* graphic register 1 */
3289 case 2: /* graphic register 2 */
3290 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3291 ENCODE_SINGLE_SHIFT_2
;
3293 ENCODE_LOCKING_SHIFT_2
;
3296 case 3: /* graphic register 3 */
3297 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3298 ENCODE_SINGLE_SHIFT_3
;
3300 ENCODE_LOCKING_SHIFT_3
;
3305 *p_nchars
= produced_chars
;
3309 /* The following three macros produce codes for indicating direction
3311 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
3313 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
3314 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
3316 EMIT_ONE_BYTE (ISO_CODE_CSI); \
3320 #define ENCODE_DIRECTION_R2L() \
3322 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3323 EMIT_TWO_ASCII_BYTES ('2', ']'); \
3327 #define ENCODE_DIRECTION_L2R() \
3329 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3330 EMIT_TWO_ASCII_BYTES ('0', ']'); \
3334 /* Produce codes for designation and invocation to reset the graphic
3335 planes and registers to initial state. */
3336 #define ENCODE_RESET_PLANE_AND_REGISTER() \
3339 struct charset *charset; \
3341 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
3343 for (reg = 0; reg < 4; reg++) \
3344 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
3345 && (CODING_ISO_DESIGNATION (coding, reg) \
3346 != CODING_ISO_INITIAL (coding, reg))) \
3348 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3349 ENCODE_DESIGNATION (charset, reg, coding); \
3354 /* Produce designation sequences of charsets in the line started from
3355 SRC to a place pointed by DST, and return updated DST.
3357 If the current block ends before any end-of-line, we may fail to
3358 find all the necessary designations. */
3360 static unsigned char *
3361 encode_designation_at_bol (coding
, charbuf
, charbuf_end
, dst
)
3362 struct coding_system
*coding
;
3363 int *charbuf
, *charbuf_end
;
3366 struct charset
*charset
;
3367 /* Table of charsets to be designated to each graphic register. */
3369 int c
, found
= 0, reg
;
3370 int produced_chars
= 0;
3371 int multibytep
= coding
->dst_multibyte
;
3373 Lisp_Object charset_list
;
3375 attrs
= CODING_ID_ATTRS (coding
->id
);
3376 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
3377 if (EQ (charset_list
, Qiso_2022
))
3378 charset_list
= Viso_2022_charset_list
;
3380 for (reg
= 0; reg
< 4; reg
++)
3390 charset
= char_charset (c
, charset_list
, NULL
);
3391 id
= CHARSET_ID (charset
);
3392 reg
= CODING_ISO_REQUEST (coding
, id
);
3393 if (reg
>= 0 && r
[reg
] < 0)
3402 for (reg
= 0; reg
< 4; reg
++)
3404 && CODING_ISO_DESIGNATION (coding
, reg
) != r
[reg
])
3405 ENCODE_DESIGNATION (CHARSET_FROM_ID (r
[reg
]), reg
, coding
);
3411 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
3414 encode_coding_iso_2022 (coding
)
3415 struct coding_system
*coding
;
3417 int multibytep
= coding
->dst_multibyte
;
3418 int *charbuf
= coding
->charbuf
;
3419 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
3420 unsigned char *dst
= coding
->destination
+ coding
->produced
;
3421 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
3424 = (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3425 && CODING_ISO_BOL (coding
));
3426 int produced_chars
= 0;
3427 Lisp_Object attrs
, eol_type
, charset_list
;
3428 int ascii_compatible
;
3431 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
3432 setup_iso_safe_charsets (attrs
);
3433 coding
->safe_charsets
3434 = (char *) XSTRING (CODING_ATTR_SAFE_CHARSETS(attrs
))->data
;
3436 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
3438 while (charbuf
< charbuf_end
)
3440 ASSURE_DESTINATION (safe_room
);
3442 if (bol_designation
)
3444 unsigned char *dst_prev
= dst
;
3446 /* We have to produce designation sequences if any now. */
3447 dst
= encode_designation_at_bol (coding
, charbuf
, charbuf_end
, dst
);
3448 bol_designation
= 0;
3449 /* We are sure that designation sequences are all ASCII bytes. */
3450 produced_chars
+= dst
- dst_prev
;
3455 /* Now encode the character C. */
3456 if (c
< 0x20 || c
== 0x7F)
3459 || (c
== '\r' && EQ (eol_type
, Qmac
)))
3461 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_RESET_AT_EOL
)
3462 ENCODE_RESET_PLANE_AND_REGISTER ();
3463 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_INIT_AT_BOL
)
3467 for (i
= 0; i
< 4; i
++)
3468 CODING_ISO_DESIGNATION (coding
, i
)
3469 = CODING_ISO_INITIAL (coding
, i
);
3472 = CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
;
3474 else if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_RESET_AT_CNTL
)
3475 ENCODE_RESET_PLANE_AND_REGISTER ();
3476 EMIT_ONE_ASCII_BYTE (c
);
3478 else if (ASCII_CHAR_P (c
))
3480 if (ascii_compatible
)
3481 EMIT_ONE_ASCII_BYTE (c
);
3483 ENCODE_ISO_CHARACTER (CHARSET_FROM_ID (charset_ascii
), c
);
3487 struct charset
*charset
= char_charset (c
, charset_list
, NULL
);
3491 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
3493 c
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
3494 charset
= CHARSET_FROM_ID (charset_ascii
);
3498 c
= coding
->default_char
;
3499 charset
= char_charset (c
, charset_list
, NULL
);
3502 ENCODE_ISO_CHARACTER (charset
, c
);
3506 if (coding
->mode
& CODING_MODE_LAST_BLOCK
3507 && CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_RESET_AT_EOL
)
3509 ASSURE_DESTINATION (safe_room
);
3510 ENCODE_RESET_PLANE_AND_REGISTER ();
3512 coding
->result
= CODING_RESULT_SUCCESS
;
3513 CODING_ISO_BOL (coding
) = bol_designation
;
3514 coding
->produced_char
+= produced_chars
;
3515 coding
->produced
= dst
- coding
->destination
;
3520 /*** 8,9. SJIS and BIG5 handlers ***/
3522 /* Although SJIS and BIG5 are not ISO's coding system, they are used
3523 quite widely. So, for the moment, Emacs supports them in the bare
3524 C code. But, in the future, they may be supported only by CCL. */
3526 /* SJIS is a coding system encoding three character sets: ASCII, right
3527 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
3528 as is. A character of charset katakana-jisx0201 is encoded by
3529 "position-code + 0x80". A character of charset japanese-jisx0208
3530 is encoded in 2-byte but two position-codes are divided and shifted
3531 so that it fit in the range below.
3533 --- CODE RANGE of SJIS ---
3534 (character set) (range)
3536 KATAKANA-JISX0201 0xA0 .. 0xDF
3537 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
3538 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
3539 -------------------------------
3543 /* BIG5 is a coding system encoding two character sets: ASCII and
3544 Big5. An ASCII character is encoded as is. Big5 is a two-byte
3545 character set and is encoded in two-byte.
3547 --- CODE RANGE of BIG5 ---
3548 (character set) (range)
3550 Big5 (1st byte) 0xA1 .. 0xFE
3551 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
3552 --------------------------
3556 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3557 Check if a text is encoded in SJIS. If it is, return
3558 CATEGORY_MASK_SJIS, else return 0. */
3561 detect_coding_sjis (coding
, mask
)
3562 struct coding_system
*coding
;
3565 unsigned char *src
= coding
->source
, *src_base
= src
;
3566 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3567 int multibytep
= coding
->src_multibyte
;
3568 int consumed_chars
= 0;
3572 /* A coding system of this category is always ASCII compatible. */
3573 src
+= coding
->head_ascii
;
3580 if ((c
>= 0x81 && c
<= 0x9F) || (c
>= 0xE0 && c
<= 0xEF))
3583 if (c
< 0x40 || c
== 0x7F || c
> 0xFC)
3587 else if (c
>= 0xA0 && c
< 0xE0)
3592 *mask
&= ~CATEGORY_MASK_SJIS
;
3598 *mask
&= CATEGORY_MASK_SJIS
;
3602 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3603 Check if a text is encoded in BIG5. If it is, return
3604 CATEGORY_MASK_BIG5, else return 0. */
3607 detect_coding_big5 (coding
, mask
)
3608 struct coding_system
*coding
;
3611 unsigned char *src
= coding
->source
, *src_base
= src
;
3612 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3613 int multibytep
= coding
->src_multibyte
;
3614 int consumed_chars
= 0;
3618 /* A coding system of this category is always ASCII compatible. */
3619 src
+= coding
->head_ascii
;
3629 if (c
< 0x40 || (c
>= 0x7F && c
<= 0xA0))
3636 *mask
&= ~CATEGORY_MASK_BIG5
;
3642 *mask
&= CATEGORY_MASK_BIG5
;
3646 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3647 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
3650 decode_coding_sjis (coding
)
3651 struct coding_system
*coding
;
3653 unsigned char *src
= coding
->source
+ coding
->consumed
;
3654 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3655 unsigned char *src_base
;
3656 int *charbuf
= coding
->charbuf
;
3657 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
3658 int consumed_chars
= 0, consumed_chars_base
;
3659 int multibytep
= coding
->src_multibyte
;
3660 struct charset
*charset_roman
, *charset_kanji
, *charset_kana
;
3661 Lisp_Object attrs
, eol_type
, charset_list
, val
;
3663 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
3666 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
3667 charset_kanji
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
3668 charset_kana
= CHARSET_FROM_ID (XINT (XCAR (val
)));
3675 consumed_chars_base
= consumed_chars
;
3677 if (charbuf
>= charbuf_end
)
3684 if (EQ (eol_type
, Qdos
))
3687 goto no_more_source
;
3691 else if (EQ (eol_type
, Qmac
))
3696 struct charset
*charset
;
3699 charset
= charset_roman
;
3704 if (c
< 0xA0 || c
>= 0xE0)
3706 /* SJIS -> JISX0208 */
3708 if (c1
< 0x40 || c1
== 0x7F || c1
> 0xFC)
3712 charset
= charset_kanji
;
3715 /* SJIS -> JISX0201-Kana */
3716 charset
= charset_kana
;
3718 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c
, c
);
3725 consumed_chars
= consumed_chars_base
;
3727 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
3732 coding
->consumed_char
+= consumed_chars_base
;
3733 coding
->consumed
= src_base
- coding
->source
;
3734 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
3738 decode_coding_big5 (coding
)
3739 struct coding_system
*coding
;
3741 unsigned char *src
= coding
->source
+ coding
->consumed
;
3742 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3743 unsigned char *src_base
;
3744 int *charbuf
= coding
->charbuf
;
3745 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
3746 int consumed_chars
= 0, consumed_chars_base
;
3747 int multibytep
= coding
->src_multibyte
;
3748 struct charset
*charset_roman
, *charset_big5
;
3749 Lisp_Object attrs
, eol_type
, charset_list
, val
;
3751 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
3753 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
3754 charset_big5
= CHARSET_FROM_ID (XINT (XCAR (val
)));
3761 consumed_chars_base
= consumed_chars
;
3763 if (charbuf
>= charbuf_end
)
3770 if (EQ (eol_type
, Qdos
))
3773 goto no_more_source
;
3777 else if (EQ (eol_type
, Qmac
))
3782 struct charset
*charset
;
3784 charset
= charset_roman
;
3788 if (c
< 0xA1 || c
> 0xFE)
3791 if (c1
< 0x40 || (c1
> 0x7E && c1
< 0xA1) || c1
> 0xFE)
3794 charset
= charset_big5
;
3796 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c
, c
);
3804 consumed_chars
= consumed_chars_base
;
3806 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
3811 coding
->consumed_char
+= consumed_chars_base
;
3812 coding
->consumed
= src_base
- coding
->source
;
3813 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
3816 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
3817 This function can encode charsets `ascii', `katakana-jisx0201',
3818 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
3819 are sure that all these charsets are registered as official charset
3820 (i.e. do not have extended leading-codes). Characters of other
3821 charsets are produced without any encoding. If SJIS_P is 1, encode
3822 SJIS text, else encode BIG5 text. */
3825 encode_coding_sjis (coding
)
3826 struct coding_system
*coding
;
3828 int multibytep
= coding
->dst_multibyte
;
3829 int *charbuf
= coding
->charbuf
;
3830 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
3831 unsigned char *dst
= coding
->destination
+ coding
->produced
;
3832 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
3834 int produced_chars
= 0;
3835 Lisp_Object attrs
, eol_type
, charset_list
, val
;
3836 int ascii_compatible
;
3837 struct charset
*charset_roman
, *charset_kanji
, *charset_kana
;
3840 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
3842 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
3843 charset_kana
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
3844 charset_kanji
= CHARSET_FROM_ID (XINT (XCAR (val
)));
3846 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
3848 while (charbuf
< charbuf_end
)
3850 ASSURE_DESTINATION (safe_room
);
3852 /* Now encode the character C. */
3853 if (ASCII_CHAR_P (c
) && ascii_compatible
)
3854 EMIT_ONE_ASCII_BYTE (c
);
3858 struct charset
*charset
= char_charset (c
, charset_list
, &code
);
3862 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
3864 code
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
3865 charset
= CHARSET_FROM_ID (charset_ascii
);
3869 c
= coding
->default_char
;
3870 charset
= char_charset (c
, charset_list
, &code
);
3873 if (code
== CHARSET_INVALID_CODE (charset
))
3875 if (charset
== charset_kanji
)
3879 c1
= code
>> 8, c2
= code
& 0xFF;
3880 EMIT_TWO_BYTES (c1
, c2
);
3882 else if (charset
== charset_kana
)
3883 EMIT_ONE_BYTE (code
| 0x80);
3885 EMIT_ONE_ASCII_BYTE (code
& 0x7F);
3888 coding
->result
= CODING_RESULT_SUCCESS
;
3889 coding
->produced_char
+= produced_chars
;
3890 coding
->produced
= dst
- coding
->destination
;
3895 encode_coding_big5 (coding
)
3896 struct coding_system
*coding
;
3898 int multibytep
= coding
->dst_multibyte
;
3899 int *charbuf
= coding
->charbuf
;
3900 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
3901 unsigned char *dst
= coding
->destination
+ coding
->produced
;
3902 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
3904 int produced_chars
= 0;
3905 Lisp_Object attrs
, eol_type
, charset_list
, val
;
3906 int ascii_compatible
;
3907 struct charset
*charset_roman
, *charset_big5
;
3910 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
3912 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
3913 charset_big5
= CHARSET_FROM_ID (XINT (XCAR (val
)));
3914 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
3916 while (charbuf
< charbuf_end
)
3918 ASSURE_DESTINATION (safe_room
);
3920 /* Now encode the character C. */
3921 if (ASCII_CHAR_P (c
) && ascii_compatible
)
3922 EMIT_ONE_ASCII_BYTE (c
);
3926 struct charset
*charset
= char_charset (c
, charset_list
, &code
);
3930 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
3932 code
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
3933 charset
= CHARSET_FROM_ID (charset_ascii
);
3937 c
= coding
->default_char
;
3938 charset
= char_charset (c
, charset_list
, &code
);
3941 if (code
== CHARSET_INVALID_CODE (charset
))
3943 if (charset
== charset_big5
)
3947 c1
= code
>> 8, c2
= code
& 0xFF;
3948 EMIT_TWO_BYTES (c1
, c2
);
3951 EMIT_ONE_ASCII_BYTE (code
& 0x7F);
3954 coding
->result
= CODING_RESULT_SUCCESS
;
3955 coding
->produced_char
+= produced_chars
;
3956 coding
->produced
= dst
- coding
->destination
;
3961 /*** 10. CCL handlers ***/
3963 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3964 Check if a text is encoded in a coding system of which
3965 encoder/decoder are written in CCL program. If it is, return
3966 CATEGORY_MASK_CCL, else return 0. */
3969 detect_coding_ccl (coding
, mask
)
3970 struct coding_system
*coding
;
3973 unsigned char *src
= coding
->source
, *src_base
= src
;
3974 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3975 int multibytep
= coding
->src_multibyte
;
3976 int consumed_chars
= 0;
3978 unsigned char *valids
= CODING_CCL_VALIDS (coding
);
3979 int head_ascii
= coding
->head_ascii
;
3982 coding
= &coding_categories
[coding_category_ccl
];
3983 attrs
= CODING_ID_ATTRS (coding
->id
);
3984 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
3993 if (!found
&& valids
[c
] > 1)
3996 *mask
&= ~CATEGORY_MASK_CCL
;
4002 *mask
&= CATEGORY_MASK_CCL
;
4007 decode_coding_ccl (coding
)
4008 struct coding_system
*coding
;
4010 unsigned char *src
= coding
->source
+ coding
->consumed
;
4011 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4012 int *charbuf
= coding
->charbuf
;
4013 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
4014 int consumed_chars
= 0;
4015 int multibytep
= coding
->src_multibyte
;
4016 struct ccl_program ccl
;
4017 int source_charbuf
[1024];
4018 int source_byteidx
[1024];
4020 setup_ccl_program (&ccl
, CODING_CCL_DECODER (coding
));
4022 while (src
< src_end
)
4024 unsigned char *p
= src
;
4025 int *source
, *source_end
;
4029 while (i
< 1024 && p
< src_end
)
4031 source_byteidx
[i
] = p
- src
;
4032 source_charbuf
[i
++] = STRING_CHAR_ADVANCE (p
);
4035 while (i
< 1024 && p
< src_end
)
4036 source_charbuf
[i
++] = *p
++;
4038 if (p
== src_end
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
4041 source
= source_charbuf
;
4042 source_end
= source
+ i
;
4043 while (source
< source_end
)
4045 ccl_driver (&ccl
, source
, charbuf
,
4046 source_end
- source
, charbuf_end
- charbuf
);
4047 source
+= ccl
.consumed
;
4048 charbuf
+= ccl
.produced
;
4049 if (ccl
.status
!= CCL_STAT_SUSPEND_BY_DST
)
4052 if (source
< source_end
)
4053 src
+= source_byteidx
[source
- source_charbuf
];
4056 consumed_chars
+= source
- source_charbuf
;
4058 if (ccl
.status
!= CCL_STAT_SUSPEND_BY_SRC
4059 && ccl
.status
!= CODING_RESULT_INSUFFICIENT_SRC
)
4065 case CCL_STAT_SUSPEND_BY_SRC
:
4066 coding
->result
= CODING_RESULT_INSUFFICIENT_SRC
;
4068 case CCL_STAT_SUSPEND_BY_DST
:
4071 case CCL_STAT_INVALID_CMD
:
4072 coding
->result
= CODING_RESULT_INTERRUPT
;
4075 coding
->result
= CODING_RESULT_SUCCESS
;
4078 coding
->consumed_char
+= consumed_chars
;
4079 coding
->consumed
= src
- coding
->source
;
4080 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4084 encode_coding_ccl (coding
)
4085 struct coding_system
*coding
;
4087 struct ccl_program ccl
;
4088 int multibytep
= coding
->dst_multibyte
;
4089 int *charbuf
= coding
->charbuf
;
4090 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4091 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4092 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4093 unsigned char *adjusted_dst_end
= dst_end
- 1;
4094 int destination_charbuf
[1024];
4095 int i
, produced_chars
= 0;
4097 setup_ccl_program (&ccl
, CODING_CCL_ENCODER (coding
));
4099 ccl
.last_block
= coding
->mode
& CODING_MODE_LAST_BLOCK
;
4100 ccl
.dst_multibyte
= coding
->dst_multibyte
;
4102 while (charbuf
< charbuf_end
&& dst
< adjusted_dst_end
)
4104 int dst_bytes
= dst_end
- dst
;
4105 if (dst_bytes
> 1024)
4108 ccl_driver (&ccl
, charbuf
, destination_charbuf
,
4109 charbuf_end
- charbuf
, dst_bytes
);
4110 charbuf
+= ccl
.consumed
;
4112 for (i
= 0; i
< ccl
.produced
; i
++)
4113 EMIT_ONE_BYTE (destination_charbuf
[i
] & 0xFF);
4116 for (i
= 0; i
< ccl
.produced
; i
++)
4117 *dst
++ = destination_charbuf
[i
] & 0xFF;
4118 produced_chars
+= ccl
.produced
;
4124 case CCL_STAT_SUSPEND_BY_SRC
:
4125 coding
->result
= CODING_RESULT_INSUFFICIENT_SRC
;
4127 case CCL_STAT_SUSPEND_BY_DST
:
4128 coding
->result
= CODING_RESULT_INSUFFICIENT_DST
;
4131 case CCL_STAT_INVALID_CMD
:
4132 coding
->result
= CODING_RESULT_INTERRUPT
;
4135 coding
->result
= CODING_RESULT_SUCCESS
;
4139 coding
->produced_char
+= produced_chars
;
4140 coding
->produced
= dst
- coding
->destination
;
4146 /*** 10, 11. no-conversion handlers ***/
4148 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4151 decode_coding_raw_text (coding
)
4152 struct coding_system
*coding
;
4154 coding
->chars_at_source
= 1;
4155 coding
->consumed_char
= 0;
4156 coding
->consumed
= 0;
4157 coding
->result
= CODING_RESULT_SUCCESS
;
4161 encode_coding_raw_text (coding
)
4162 struct coding_system
*coding
;
4164 int multibytep
= coding
->dst_multibyte
;
4165 int *charbuf
= coding
->charbuf
;
4166 int *charbuf_end
= coding
->charbuf
+ coding
->charbuf_used
;
4167 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4168 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4169 int produced_chars
= 0;
4174 int safe_room
= MAX_MULTIBYTE_LENGTH
* 2;
4176 if (coding
->src_multibyte
)
4177 while (charbuf
< charbuf_end
)
4179 ASSURE_DESTINATION (safe_room
);
4181 if (ASCII_CHAR_P (c
))
4182 EMIT_ONE_ASCII_BYTE (c
);
4183 else if (CHAR_BYTE8_P (c
))
4185 c
= CHAR_TO_BYTE8 (c
);
4190 unsigned char str
[MAX_MULTIBYTE_LENGTH
], *p0
= str
, *p1
= str
;
4192 CHAR_STRING_ADVANCE (c
, p1
);
4194 EMIT_ONE_BYTE (*p0
);
4198 while (charbuf
< charbuf_end
)
4200 ASSURE_DESTINATION (safe_room
);
4207 if (coding
->src_multibyte
)
4209 int safe_room
= MAX_MULTIBYTE_LENGTH
;
4211 while (charbuf
< charbuf_end
)
4213 ASSURE_DESTINATION (safe_room
);
4215 if (ASCII_CHAR_P (c
))
4217 else if (CHAR_BYTE8_P (c
))
4218 *dst
++ = CHAR_TO_BYTE8 (c
);
4220 CHAR_STRING_ADVANCE (c
, dst
);
4226 ASSURE_DESTINATION (charbuf_end
- charbuf
);
4227 while (charbuf
< charbuf_end
&& dst
< dst_end
)
4228 *dst
++ = *charbuf
++;
4229 produced_chars
= dst
- (coding
->destination
+ coding
->dst_bytes
);
4232 coding
->result
= CODING_RESULT_SUCCESS
;
4233 coding
->produced_char
+= produced_chars
;
4234 coding
->produced
= dst
- coding
->destination
;
4239 detect_coding_charset (coding
, mask
)
4240 struct coding_system
*coding
;
4243 unsigned char *src
= coding
->source
, *src_base
= src
;
4244 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4245 int multibytep
= coding
->src_multibyte
;
4246 int consumed_chars
= 0;
4247 Lisp_Object attrs
, valids
;
4249 coding
= &coding_categories
[coding_category_charset
];
4250 attrs
= CODING_ID_ATTRS (coding
->id
);
4251 valids
= AREF (attrs
, coding_attr_charset_valids
);
4253 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
4254 src
+= coding
->head_ascii
;
4261 if (NILP (AREF (valids
, c
)))
4264 *mask
&= ~CATEGORY_MASK_CHARSET
;
4268 *mask
&= CATEGORY_MASK_CHARSET
;
4273 decode_coding_charset (coding
)
4274 struct coding_system
*coding
;
4276 unsigned char *src
= coding
->source
+ coding
->consumed
;
4277 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4278 unsigned char *src_base
;
4279 int *charbuf
= coding
->charbuf
;
4280 int *charbuf_end
= charbuf
+ coding
->charbuf_size
;
4281 int consumed_chars
= 0, consumed_chars_base
;
4282 int multibytep
= coding
->src_multibyte
;
4283 Lisp_Object attrs
, eol_type
, charset_list
, valids
;
4285 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
4286 valids
= AREF (attrs
, coding_attr_charset_valids
);
4293 consumed_chars_base
= consumed_chars
;
4295 if (charbuf
>= charbuf_end
)
4301 if (EQ (eol_type
, Qdos
))
4307 else if (EQ (eol_type
, Qmac
))
4313 struct charset
*charset
;
4316 val
= AREF (valids
, c
);
4319 charset
= CHARSET_FROM_ID (XFASTINT (val
));
4320 if (CHARSET_DIMENSION (charset
) > 1)
4324 if (CHARSET_DIMENSION (charset
) > 2)
4328 if (CHARSET_DIMENSION (charset
) > 3)
4335 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c
, c
);
4344 consumed_chars
= consumed_chars_base
;
4346 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
4351 coding
->consumed_char
+= consumed_chars_base
;
4352 coding
->consumed
= src_base
- coding
->source
;
4353 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4357 encode_coding_charset (coding
)
4358 struct coding_system
*coding
;
4360 int multibytep
= coding
->dst_multibyte
;
4361 int *charbuf
= coding
->charbuf
;
4362 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4363 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4364 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4365 int safe_room
= MAX_MULTIBYTE_LENGTH
;
4366 int produced_chars
= 0;
4367 Lisp_Object attrs
, eol_type
, charset_list
;
4368 int ascii_compatible
;
4371 CODING_GET_INFO (coding
, attrs
, eol_type
, charset_list
);
4372 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
4374 while (charbuf
< charbuf_end
)
4376 struct charset
*charset
;
4379 ASSURE_DESTINATION (safe_room
);
4381 if (ascii_compatible
&& ASCII_CHAR_P (c
))
4382 EMIT_ONE_ASCII_BYTE (c
);
4385 charset
= char_charset (c
, charset_list
, &code
);
4388 if (CHARSET_DIMENSION (charset
) == 1)
4389 EMIT_ONE_BYTE (code
);
4390 else if (CHARSET_DIMENSION (charset
) == 2)
4391 EMIT_TWO_BYTES (code
>> 8, code
& 0xFF);
4392 else if (CHARSET_DIMENSION (charset
) == 3)
4393 EMIT_THREE_BYTES (code
>> 16, (code
>> 8) & 0xFF, code
& 0xFF);
4395 EMIT_FOUR_BYTES (code
>> 24, (code
>> 16) & 0xFF,
4396 (code
>> 8) & 0xFF, code
& 0xFF);
4400 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
4401 c
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
4403 c
= coding
->default_char
;
4409 coding
->result
= CODING_RESULT_SUCCESS
;
4410 coding
->produced_char
+= produced_chars
;
4411 coding
->produced
= dst
- coding
->destination
;
4416 /*** 7. C library functions ***/
4418 /* In Emacs Lisp, coding system is represented by a Lisp symbol which
4419 has a property `coding-system'. The value of this property is a
4420 vector of length 5 (called as coding-vector). Among elements of
4421 this vector, the first (element[0]) and the fifth (element[4])
4422 carry important information for decoding/encoding. Before
4423 decoding/encoding, this information should be set in fields of a
4424 structure of type `coding_system'.
4426 A value of property `coding-system' can be a symbol of another
4427 subsidiary coding-system. In that case, Emacs gets coding-vector
4430 `element[0]' contains information to be set in `coding->type'. The
4431 value and its meaning is as follows:
4433 0 -- coding_type_emacs_mule
4434 1 -- coding_type_sjis
4435 2 -- coding_type_iso_2022
4436 3 -- coding_type_big5
4437 4 -- coding_type_ccl encoder/decoder written in CCL
4438 nil -- coding_type_no_conversion
4439 t -- coding_type_undecided (automatic conversion on decoding,
4440 no-conversion on encoding)
4442 `element[4]' contains information to be set in `coding->flags' and
4443 `coding->spec'. The meaning varies by `coding->type'.
4445 If `coding->type' is `coding_type_iso_2022', element[4] is a vector
4446 of length 32 (of which the first 13 sub-elements are used now).
4447 Meanings of these sub-elements are:
4449 sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso_2022'
4450 If the value is an integer of valid charset, the charset is
4451 assumed to be designated to graphic register N initially.
4453 If the value is minus, it is a minus value of charset which
4454 reserves graphic register N, which means that the charset is
4455 not designated initially but should be designated to graphic
4456 register N just before encoding a character in that charset.
4458 If the value is nil, graphic register N is never used on
4461 sub-element[N] where N is 4 through 11: to be set in `coding->flags'
4462 Each value takes t or nil. See the section ISO2022 of
4463 `coding.h' for more information.
4465 If `coding->type' is `coding_type_big5', element[4] is t to denote
4466 BIG5-ETen or nil to denote BIG5-HKU.
4468 If `coding->type' takes the other value, element[4] is ignored.
4470 Emacs Lisp's coding system also carries information about format of
4471 end-of-line in a value of property `eol-type'. If the value is
4472 integer, 0 means eol_lf, 1 means eol_crlf, and 2 means eol_cr. If
4473 it is not integer, it should be a vector of subsidiary coding
4474 systems of which property `eol-type' has one of above values.
4478 /* Setup coding context CODING from information about CODING_SYSTEM.
4479 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
4480 CODING_SYSTEM is invalid, signal an error. */
4483 setup_coding_system (coding_system
, coding
)
4484 Lisp_Object coding_system
;
4485 struct coding_system
*coding
;
4488 Lisp_Object eol_type
;
4489 Lisp_Object coding_type
;
4492 if (NILP (coding_system
))
4493 coding_system
= Qno_conversion
;
4495 CHECK_CODING_SYSTEM_GET_ID (coding_system
, coding
->id
);
4497 attrs
= CODING_ID_ATTRS (coding
->id
);
4498 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
4501 coding
->head_ascii
= -1;
4502 coding
->common_flags
4503 = (VECTORP (eol_type
) ? CODING_REQUIRE_DETECTION_MASK
: 0);
4505 val
= CODING_ATTR_SAFE_CHARSETS (attrs
);
4506 coding
->max_charset_id
= XSTRING (val
)->size
- 1;
4507 coding
->safe_charsets
= (char *) XSTRING (val
)->data
;
4508 coding
->default_char
= XINT (CODING_ATTR_DEFAULT_CHAR (attrs
));
4510 coding_type
= CODING_ATTR_TYPE (attrs
);
4511 if (EQ (coding_type
, Qundecided
))
4513 coding
->detector
= NULL
;
4514 coding
->decoder
= decode_coding_raw_text
;
4515 coding
->encoder
= encode_coding_raw_text
;
4516 coding
->common_flags
|= CODING_REQUIRE_DETECTION_MASK
;
4518 else if (EQ (coding_type
, Qiso_2022
))
4521 int flags
= XINT (AREF (attrs
, coding_attr_iso_flags
));
4523 /* Invoke graphic register 0 to plane 0. */
4524 CODING_ISO_INVOCATION (coding
, 0) = 0;
4525 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
4526 CODING_ISO_INVOCATION (coding
, 1)
4527 = (flags
& CODING_ISO_FLAG_SEVEN_BITS
? -1 : 1);
4528 /* Setup the initial status of designation. */
4529 for (i
= 0; i
< 4; i
++)
4530 CODING_ISO_DESIGNATION (coding
, i
) = CODING_ISO_INITIAL (coding
, i
);
4531 /* Not single shifting initially. */
4532 CODING_ISO_SINGLE_SHIFTING (coding
) = 0;
4533 /* Beginning of buffer should also be regarded as bol. */
4534 CODING_ISO_BOL (coding
) = 1;
4535 coding
->detector
= detect_coding_iso_2022
;
4536 coding
->decoder
= decode_coding_iso_2022
;
4537 coding
->encoder
= encode_coding_iso_2022
;
4538 if (flags
& CODING_ISO_FLAG_SAFE
)
4539 coding
->mode
|= CODING_MODE_SAFE_ENCODING
;
4540 coding
->common_flags
4541 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
4542 | CODING_REQUIRE_FLUSHING_MASK
);
4543 if (flags
& CODING_ISO_FLAG_COMPOSITION
)
4544 coding
->common_flags
|= CODING_ANNOTATE_COMPOSITION_MASK
;
4545 if (flags
& CODING_ISO_FLAG_FULL_SUPPORT
)
4547 setup_iso_safe_charsets (attrs
);
4548 val
= CODING_ATTR_SAFE_CHARSETS (attrs
);
4549 coding
->max_charset_id
= XSTRING (val
)->size
- 1;
4550 coding
->safe_charsets
= (char *) XSTRING (val
)->data
;
4552 CODING_ISO_FLAGS (coding
) = flags
;
4554 else if (EQ (coding_type
, Qcharset
))
4556 coding
->detector
= detect_coding_charset
;
4557 coding
->decoder
= decode_coding_charset
;
4558 coding
->encoder
= encode_coding_charset
;
4559 coding
->common_flags
4560 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4562 else if (EQ (coding_type
, Qutf_8
))
4564 coding
->detector
= detect_coding_utf_8
;
4565 coding
->decoder
= decode_coding_utf_8
;
4566 coding
->encoder
= encode_coding_utf_8
;
4567 coding
->common_flags
4568 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4570 else if (EQ (coding_type
, Qutf_16
))
4572 val
= AREF (attrs
, coding_attr_utf_16_bom
);
4573 CODING_UTF_16_BOM (coding
) = (CONSP (val
) ? utf_16_detect_bom
4574 : EQ (val
, Qt
) ? utf_16_with_bom
4575 : utf_16_without_bom
);
4576 val
= AREF (attrs
, coding_attr_utf_16_endian
);
4577 CODING_UTF_16_ENDIAN (coding
) = (NILP (val
) ? utf_16_big_endian
4578 : utf_16_little_endian
);
4579 CODING_UTF_16_SURROGATE (coding
) = 0;
4580 coding
->detector
= detect_coding_utf_16
;
4581 coding
->decoder
= decode_coding_utf_16
;
4582 coding
->encoder
= encode_coding_utf_16
;
4583 coding
->common_flags
4584 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4586 else if (EQ (coding_type
, Qccl
))
4588 coding
->detector
= detect_coding_ccl
;
4589 coding
->decoder
= decode_coding_ccl
;
4590 coding
->encoder
= encode_coding_ccl
;
4591 coding
->common_flags
4592 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
4593 | CODING_REQUIRE_FLUSHING_MASK
);
4595 else if (EQ (coding_type
, Qemacs_mule
))
4597 coding
->detector
= detect_coding_emacs_mule
;
4598 coding
->decoder
= decode_coding_emacs_mule
;
4599 coding
->encoder
= encode_coding_emacs_mule
;
4600 coding
->common_flags
4601 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4602 if (! NILP (AREF (attrs
, coding_attr_emacs_mule_full
))
4603 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs
), Vemacs_mule_charset_list
))
4605 Lisp_Object tail
, safe_charsets
;
4606 int max_charset_id
= 0;
4608 for (tail
= Vemacs_mule_charset_list
; CONSP (tail
);
4610 if (max_charset_id
< XFASTINT (XCAR (tail
)))
4611 max_charset_id
= XFASTINT (XCAR (tail
));
4612 safe_charsets
= Fmake_string (make_number (max_charset_id
+ 1),
4614 for (tail
= Vemacs_mule_charset_list
; CONSP (tail
);
4616 XSTRING (safe_charsets
)->data
[XFASTINT (XCAR (tail
))] = 0;
4617 coding
->max_charset_id
= max_charset_id
;
4618 coding
->safe_charsets
= (char *) XSTRING (safe_charsets
)->data
;
4621 else if (EQ (coding_type
, Qshift_jis
))
4623 coding
->detector
= detect_coding_sjis
;
4624 coding
->decoder
= decode_coding_sjis
;
4625 coding
->encoder
= encode_coding_sjis
;
4626 coding
->common_flags
4627 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4629 else if (EQ (coding_type
, Qbig5
))
4631 coding
->detector
= detect_coding_big5
;
4632 coding
->decoder
= decode_coding_big5
;
4633 coding
->encoder
= encode_coding_big5
;
4634 coding
->common_flags
4635 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4637 else /* EQ (coding_type, Qraw_text) */
4639 coding
->detector
= NULL
;
4640 coding
->decoder
= decode_coding_raw_text
;
4641 coding
->encoder
= encode_coding_raw_text
;
4642 coding
->common_flags
|= CODING_FOR_UNIBYTE_MASK
;
4648 /* Return raw-text or one of its subsidiaries that has the same
4649 eol_type as CODING-SYSTEM. */
4652 raw_text_coding_system (coding_system
)
4653 Lisp_Object coding_system
;
4655 Lisp_Object spec
, attrs
;
4656 Lisp_Object eol_type
, raw_text_eol_type
;
4658 spec
= CODING_SYSTEM_SPEC (coding_system
);
4659 attrs
= AREF (spec
, 0);
4661 if (EQ (CODING_ATTR_TYPE (attrs
), Qraw_text
))
4662 return coding_system
;
4664 eol_type
= AREF (spec
, 2);
4665 if (VECTORP (eol_type
))
4667 spec
= CODING_SYSTEM_SPEC (Qraw_text
);
4668 raw_text_eol_type
= AREF (spec
, 2);
4669 return (EQ (eol_type
, Qunix
) ? AREF (raw_text_eol_type
, 0)
4670 : EQ (eol_type
, Qdos
) ? AREF (raw_text_eol_type
, 1)
4671 : AREF (raw_text_eol_type
, 2));
4675 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
4676 does, return one of the subsidiary that has the same eol-spec as
4677 PARENT. Otherwise, return CODING_SYSTEM. */
4680 coding_inherit_eol_type (coding_system
, parent
)
4681 Lisp_Object coding_system
, parent
;
4683 Lisp_Object spec
, attrs
, eol_type
;
4685 spec
= CODING_SYSTEM_SPEC (coding_system
);
4686 attrs
= AREF (spec
, 0);
4687 eol_type
= AREF (spec
, 2);
4688 if (VECTORP (eol_type
))
4690 Lisp_Object parent_spec
;
4691 Lisp_Object parent_eol_type
;
4694 = CODING_SYSTEM_SPEC (buffer_defaults
.buffer_file_coding_system
);
4695 parent_eol_type
= AREF (parent_spec
, 2);
4696 if (EQ (parent_eol_type
, Qunix
))
4697 coding_system
= AREF (eol_type
, 0);
4698 else if (EQ (parent_eol_type
, Qdos
))
4699 coding_system
= AREF (eol_type
, 1);
4700 else if (EQ (parent_eol_type
, Qmac
))
4701 coding_system
= AREF (eol_type
, 2);
4703 return coding_system
;
4706 /* Emacs has a mechanism to automatically detect a coding system if it
4707 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
4708 it's impossible to distinguish some coding systems accurately
4709 because they use the same range of codes. So, at first, coding
4710 systems are categorized into 7, those are:
4712 o coding-category-emacs-mule
4714 The category for a coding system which has the same code range
4715 as Emacs' internal format. Assigned the coding-system (Lisp
4716 symbol) `emacs-mule' by default.
4718 o coding-category-sjis
4720 The category for a coding system which has the same code range
4721 as SJIS. Assigned the coding-system (Lisp
4722 symbol) `japanese-shift-jis' by default.
4724 o coding-category-iso-7
4726 The category for a coding system which has the same code range
4727 as ISO2022 of 7-bit environment. This doesn't use any locking
4728 shift and single shift functions. This can encode/decode all
4729 charsets. Assigned the coding-system (Lisp symbol)
4730 `iso-2022-7bit' by default.
4732 o coding-category-iso-7-tight
4734 Same as coding-category-iso-7 except that this can
4735 encode/decode only the specified charsets.
4737 o coding-category-iso-8-1
4739 The category for a coding system which has the same code range
4740 as ISO2022 of 8-bit environment and graphic plane 1 used only
4741 for DIMENSION1 charset. This doesn't use any locking shift
4742 and single shift functions. Assigned the coding-system (Lisp
4743 symbol) `iso-latin-1' by default.
4745 o coding-category-iso-8-2
4747 The category for a coding system which has the same code range
4748 as ISO2022 of 8-bit environment and graphic plane 1 used only
4749 for DIMENSION2 charset. This doesn't use any locking shift
4750 and single shift functions. Assigned the coding-system (Lisp
4751 symbol) `japanese-iso-8bit' by default.
4753 o coding-category-iso-7-else
4755 The category for a coding system which has the same code range
4756 as ISO2022 of 7-bit environemnt but uses locking shift or
4757 single shift functions. Assigned the coding-system (Lisp
4758 symbol) `iso-2022-7bit-lock' by default.
4760 o coding-category-iso-8-else
4762 The category for a coding system which has the same code range
4763 as ISO2022 of 8-bit environemnt but uses locking shift or
4764 single shift functions. Assigned the coding-system (Lisp
4765 symbol) `iso-2022-8bit-ss2' by default.
4767 o coding-category-big5
4769 The category for a coding system which has the same code range
4770 as BIG5. Assigned the coding-system (Lisp symbol)
4771 `cn-big5' by default.
4773 o coding-category-utf-8
4775 The category for a coding system which has the same code range
4776 as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp
4777 symbol) `utf-8' by default.
4779 o coding-category-utf-16-be
4781 The category for a coding system in which a text has an
4782 Unicode signature (cf. Unicode Standard) in the order of BIG
4783 endian at the head. Assigned the coding-system (Lisp symbol)
4784 `utf-16-be' by default.
4786 o coding-category-utf-16-le
4788 The category for a coding system in which a text has an
4789 Unicode signature (cf. Unicode Standard) in the order of
4790 LITTLE endian at the head. Assigned the coding-system (Lisp
4791 symbol) `utf-16-le' by default.
4793 o coding-category-ccl
4795 The category for a coding system of which encoder/decoder is
4796 written in CCL programs. The default value is nil, i.e., no
4797 coding system is assigned.
4799 o coding-category-binary
4801 The category for a coding system not categorized in any of the
4802 above. Assigned the coding-system (Lisp symbol)
4803 `no-conversion' by default.
4805 Each of them is a Lisp symbol and the value is an actual
4806 `coding-system's (this is also a Lisp symbol) assigned by a user.
4807 What Emacs does actually is to detect a category of coding system.
4808 Then, it uses a `coding-system' assigned to it. If Emacs can't
4809 decide only one possible category, it selects a category of the
4810 highest priority. Priorities of categories are also specified by a
4811 user in a Lisp variable `coding-category-list'.
4815 #define EOL_SEEN_NONE 0
4816 #define EOL_SEEN_LF 1
4817 #define EOL_SEEN_CR 2
4818 #define EOL_SEEN_CRLF 4
4820 /* Detect how end-of-line of a text of length CODING->src_bytes
4821 pointed by CODING->source is encoded. Return one of
4824 #define MAX_EOL_CHECK_COUNT 3
4827 detect_eol (coding
, source
, src_bytes
)
4828 struct coding_system
*coding
;
4829 unsigned char *source
;
4830 EMACS_INT src_bytes
;
4832 Lisp_Object attrs
, coding_type
;
4833 unsigned char *src
= source
, *src_end
= src
+ src_bytes
;
4836 int eol_seen
= EOL_SEEN_NONE
;
4838 attrs
= CODING_ID_ATTRS (coding
->id
);
4839 coding_type
= CODING_ATTR_TYPE (attrs
);
4841 if (EQ (coding_type
, Qccl
))
4845 msb
= coding
->spec
.utf_16
.endian
== utf_16_little_endian
;
4848 while (src
+ 1 < src_end
)
4851 if (src
[msb
] == 0 && (c
== '\n' || c
== '\r'))
4856 this_eol
= EOL_SEEN_LF
;
4857 else if (src
+ 3 >= src_end
4858 || src
[msb
+ 2] != 0
4859 || src
[lsb
+ 2] != '\n')
4860 this_eol
= EOL_SEEN_CR
;
4862 this_eol
= EOL_SEEN_CRLF
;
4864 if (eol_seen
== EOL_SEEN_NONE
)
4865 /* This is the first end-of-line. */
4866 eol_seen
= this_eol
;
4867 else if (eol_seen
!= this_eol
)
4869 /* The found type is different from what found before. */
4870 eol_seen
= EOL_SEEN_LF
;
4873 if (++total
== MAX_EOL_CHECK_COUNT
)
4881 while (src
< src_end
)
4884 if (c
== '\n' || c
== '\r')
4889 this_eol
= EOL_SEEN_LF
;
4890 else if (src
>= src_end
|| *src
!= '\n')
4891 this_eol
= EOL_SEEN_CR
;
4893 this_eol
= EOL_SEEN_CRLF
, src
++;
4895 if (eol_seen
== EOL_SEEN_NONE
)
4896 /* This is the first end-of-line. */
4897 eol_seen
= this_eol
;
4898 else if (eol_seen
!= this_eol
)
4900 /* The found type is different from what found before. */
4901 eol_seen
= EOL_SEEN_LF
;
4904 if (++total
== MAX_EOL_CHECK_COUNT
)
4914 adjust_coding_eol_type (coding
, eol_seen
)
4915 struct coding_system
*coding
;
4918 Lisp_Object eol_type
;
4920 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
4921 if (eol_seen
& EOL_SEEN_LF
)
4922 coding
->id
= CODING_SYSTEM_ID (AREF (eol_type
, 0));
4923 else if (eol_type
& EOL_SEEN_CRLF
)
4924 coding
->id
= CODING_SYSTEM_ID (AREF (eol_type
, 1));
4925 else if (eol_type
& EOL_SEEN_CR
)
4926 coding
->id
= CODING_SYSTEM_ID (AREF (eol_type
, 2));
4929 /* Detect how a text specified in CODING is encoded. If a coding
4930 system is detected, update fields of CODING by the detected coding
4934 detect_coding (coding
)
4935 struct coding_system
*coding
;
4937 unsigned char *src
, *src_end
;
4938 Lisp_Object attrs
, coding_type
;
4940 coding
->consumed
= coding
->consumed_char
= 0;
4941 coding
->produced
= coding
->produced_char
= 0;
4942 coding_set_source (coding
);
4944 src_end
= coding
->source
+ coding
->src_bytes
;
4946 /* If we have not yet decided the text encoding type, detect it
4948 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding
->id
)), Qundecided
))
4950 int mask
= CATEGORY_MASK_ANY
;
4953 for (src
= coding
->source
; src
< src_end
; src
++)
4956 if (c
& 0x80 || (c
< 0x20 && (c
== ISO_CODE_ESC
4958 || c
== ISO_CODE_SO
)))
4961 coding
->head_ascii
= src
- (coding
->source
+ coding
->consumed
);
4963 if (coding
->head_ascii
< coding
->src_bytes
)
4967 for (i
= 0; i
< coding_category_raw_text
; i
++)
4969 enum coding_category category
= coding_priorities
[i
];
4970 struct coding_system
*this = coding_categories
+ category
;
4972 if (category
>= coding_category_raw_text
4973 || detected
& (1 << category
))
4978 /* No coding system of this category is defined. */
4979 mask
&= ~(1 << category
);
4983 detected
|= detected_mask
[category
];
4984 if ((*(this->detector
)) (coding
, &mask
))
4989 setup_coding_system (Qraw_text
, coding
);
4990 else if (mask
!= CATEGORY_MASK_ANY
)
4991 for (i
= 0; i
< coding_category_raw_text
; i
++)
4993 enum coding_category category
= coding_priorities
[i
];
4994 struct coding_system
*this = coding_categories
+ category
;
4996 if (mask
& (1 << category
))
4998 setup_coding_system (CODING_ID_NAME (this->id
), coding
);
5005 attrs
= CODING_ID_ATTRS (coding
->id
);
5006 coding_type
= CODING_ATTR_TYPE (attrs
);
5008 /* If we have not yet decided the EOL type, detect it now. But, the
5009 detection is impossible for a CCL based coding system, in which
5010 case, we detct the EOL type after decoding. */
5011 if (VECTORP (CODING_ID_EOL_TYPE (coding
->id
))
5012 && ! EQ (coding_type
, Qccl
))
5014 int eol_seen
= detect_eol (coding
, coding
->source
, coding
->src_bytes
);
5016 if (eol_seen
!= EOL_SEEN_NONE
)
5017 adjust_coding_eol_type (coding
, eol_seen
);
5024 struct coding_system
*coding
;
5026 if (VECTORP (CODING_ID_EOL_TYPE (coding
->id
)))
5028 unsigned char *p
= CHAR_POS_ADDR (coding
->dst_pos
);
5029 unsigned char *pend
= p
+ coding
->produced
;
5030 int eol_seen
= EOL_SEEN_NONE
;
5032 for (; p
< pend
; p
++)
5035 eol_seen
|= EOL_SEEN_LF
;
5036 else if (*p
== '\r')
5038 if (p
+ 1 < pend
&& *(p
+ 1) == '\n')
5040 eol_seen
|= EOL_SEEN_CRLF
;
5044 eol_seen
|= EOL_SEEN_CR
;
5047 if (eol_seen
!= EOL_SEEN_NONE
)
5048 adjust_coding_eol_type (coding
, eol_seen
);
5051 if (EQ (CODING_ID_EOL_TYPE (coding
->id
), Qmac
))
5053 unsigned char *p
= CHAR_POS_ADDR (coding
->dst_pos
);
5054 unsigned char *pend
= p
+ coding
->produced
;
5056 for (; p
< pend
; p
++)
5060 else if (EQ (CODING_ID_EOL_TYPE (coding
->id
), Qdos
))
5062 unsigned char *p
, *pbeg
, *pend
;
5063 Lisp_Object undo_list
;
5065 move_gap_both (coding
->dst_pos
+ coding
->produced_char
,
5066 coding
->dst_pos_byte
+ coding
->produced
);
5067 undo_list
= current_buffer
->undo_list
;
5068 current_buffer
->undo_list
= Qt
;
5069 del_range_2 (coding
->dst_pos
, coding
->dst_pos_byte
, GPT
, GPT_BYTE
, Qnil
);
5070 current_buffer
->undo_list
= undo_list
;
5072 pend
= pbeg
+ coding
->produced
;
5074 for (p
= pend
- 1; p
>= pbeg
; p
--)
5077 safe_bcopy ((char *) (p
+ 1), (char *) p
, pend
- p
- 1);
5080 coding
->produced_char
-= coding
->produced
- (pend
- pbeg
);
5081 coding
->produced
= pend
- pbeg
;
5082 insert_from_gap (coding
->produced_char
, coding
->produced
);
5087 translate_chars (coding
, table
)
5088 struct coding_system
*coding
;
5091 int *charbuf
= coding
->charbuf
;
5092 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
5095 if (coding
->chars_at_source
)
5098 while (charbuf
< charbuf_end
)
5104 *charbuf
++ = translate_char (table
, c
);
5109 produce_chars (coding
)
5110 struct coding_system
*coding
;
5112 unsigned char *dst
= coding
->destination
+ coding
->produced
;
5113 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
5115 int produced_chars
= 0;
5117 if (! coding
->chars_at_source
)
5119 /* Characters are in coding->charbuf. */
5120 int *buf
= coding
->charbuf
;
5121 int *buf_end
= buf
+ coding
->charbuf_used
;
5122 unsigned char *adjusted_dst_end
;
5124 if (BUFFERP (coding
->src_object
)
5125 && EQ (coding
->src_object
, coding
->dst_object
))
5126 dst_end
= coding
->source
+ coding
->consumed
;
5127 adjusted_dst_end
= dst_end
- MAX_MULTIBYTE_LENGTH
;
5129 while (buf
< buf_end
)
5133 if (dst
>= adjusted_dst_end
)
5135 dst
= alloc_destination (coding
,
5136 buf_end
- buf
+ MAX_MULTIBYTE_LENGTH
,
5138 dst_end
= coding
->destination
+ coding
->dst_bytes
;
5139 adjusted_dst_end
= dst_end
- MAX_MULTIBYTE_LENGTH
;
5143 if (coding
->dst_multibyte
5144 || ! CHAR_BYTE8_P (c
))
5145 CHAR_STRING_ADVANCE (c
, dst
);
5147 *dst
++ = CHAR_TO_BYTE8 (c
);
5151 /* This is an annotation data. */
5157 unsigned char *src
= coding
->source
;
5158 unsigned char *src_end
= src
+ coding
->src_bytes
;
5159 Lisp_Object eol_type
;
5161 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
5163 if (coding
->src_multibyte
!= coding
->dst_multibyte
)
5165 if (coding
->src_multibyte
)
5172 unsigned char *src_base
= src
;
5178 if (EQ (eol_type
, Qdos
))
5184 else if (EQ (eol_type
, Qmac
))
5189 coding
->consumed
= src
- coding
->source
;
5191 if (EQ (coding
->src_object
, coding
->dst_object
))
5195 dst
= alloc_destination (coding
, src_end
- src
+ 1,
5197 dst_end
= coding
->destination
+ coding
->dst_bytes
;
5198 coding_set_source (coding
);
5199 src
= coding
->source
+ coding
->consumed
;
5200 src_end
= coding
->source
+ coding
->src_bytes
;
5210 while (src
< src_end
)
5217 if (EQ (eol_type
, Qdos
))
5223 else if (EQ (eol_type
, Qmac
))
5226 if (dst
>= dst_end
- 1)
5228 coding
->consumed
= src
- coding
->source
;
5230 if (EQ (coding
->src_object
, coding
->dst_object
))
5232 if (dst
>= dst_end
- 1)
5234 dst
= alloc_destination (coding
, src_end
- src
+ 2,
5236 dst_end
= coding
->destination
+ coding
->dst_bytes
;
5237 coding_set_source (coding
);
5238 src
= coding
->source
+ coding
->consumed
;
5239 src_end
= coding
->source
+ coding
->src_bytes
;
5247 if (!EQ (coding
->src_object
, coding
->dst_object
))
5249 int require
= coding
->src_bytes
- coding
->dst_bytes
;
5253 EMACS_INT offset
= src
- coding
->source
;
5255 dst
= alloc_destination (coding
, require
, dst
);
5256 coding_set_source (coding
);
5257 src
= coding
->source
+ offset
;
5258 src_end
= coding
->source
+ coding
->src_bytes
;
5261 produced_chars
= coding
->src_chars
;
5262 while (src
< src_end
)
5268 if (EQ (eol_type
, Qdos
))
5275 else if (EQ (eol_type
, Qmac
))
5281 coding
->consumed
= coding
->src_bytes
;
5282 coding
->consumed_char
= coding
->src_chars
;
5285 produced
= dst
- (coding
->destination
+ coding
->produced
);
5286 if (BUFFERP (coding
->dst_object
))
5287 insert_from_gap (produced_chars
, produced
);
5288 coding
->produced
+= produced
;
5289 coding
->produced_char
+= produced_chars
;
5290 return produced_chars
;
5293 /* [ -LENGTH CHAR_POS_OFFSET MASK METHOD COMP_LEN ]
5295 [ -LENGTH CHAR_POS_OFFSET MASK METHOD COMP_LEN COMPONENTS... ]
5299 produce_composition (coding
, charbuf
)
5300 struct coding_system
*coding
;
5306 enum composition_method method
;
5308 Lisp_Object components
;
5310 buffer
= coding
->dst_object
;
5312 pos
= coding
->dst_pos
+ charbuf
[1];
5313 method
= (enum composition_method
) (charbuf
[3]);
5314 cmp_len
= charbuf
[4];
5316 if (method
== COMPOSITION_RELATIVE
)
5320 Lisp_Object args
[MAX_COMPOSITION_COMPONENTS
* 2 - 1];
5325 for (i
= 0; i
< len
; i
++)
5326 args
[i
] = make_number (charbuf
[i
]);
5327 components
= (method
== COMPOSITION_WITH_ALTCHARS
5328 ? Fstring (len
, args
) : Fvector (len
, args
));
5330 compose_text (pos
, pos
+ cmp_len
, components
, Qnil
, Qnil
);
5334 save_composition_data (buf
, buf_end
, prop
)
5338 enum composition_method method
= COMPOSITION_METHOD (prop
);
5339 int cmp_len
= COMPOSITION_LENGTH (prop
);
5341 if (buf
+ 4 + (MAX_COMPOSITION_COMPONENTS
* 2 - 1) > buf_end
)
5344 buf
[1] = CODING_ANNOTATE_COMPOSITION_MASK
;
5348 if (method
== COMPOSITION_RELATIVE
)
5352 Lisp_Object components
;
5355 components
= COMPOSITION_COMPONENTS (prop
);
5356 if (VECTORP (components
))
5358 len
= XVECTOR (components
)->size
;
5359 for (i
= 0; i
< len
; i
++)
5360 buf
[4 + i
] = XINT (AREF (components
, i
));
5362 else if (STRINGP (components
))
5366 len
= XSTRING (components
)->size
;
5369 FETCH_STRING_CHAR_ADVANCE (buf
[4 + i
], components
, i
, i_byte
);
5371 else if (INTEGERP (components
))
5374 buf
[4] = XINT (components
);
5376 else if (CONSP (components
))
5378 for (len
= 0; CONSP (components
);
5379 len
++, components
= XCDR (components
))
5380 buf
[4 + len
] = XINT (XCAR (components
));
5386 return (buf
+ buf
[0]);
5389 #define CHARBUF_SIZE 0x4000
5391 #define ALLOC_CONVERSION_WORK_AREA(coding) \
5393 int size = CHARBUF_SIZE;; \
5395 coding->charbuf = NULL; \
5396 while (size > 1024) \
5398 coding->charbuf = (int *) alloca (sizeof (int) * size); \
5399 if (coding->charbuf) \
5403 if (! coding->charbuf) \
5405 coding->result = CODING_RESULT_INSUFFICIENT_MEM; \
5406 return coding->result; \
5408 coding->charbuf_size = size; \
5413 produce_annotation (coding
)
5414 struct coding_system
*coding
;
5416 int *charbuf
= coding
->charbuf
;
5417 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
5419 while (charbuf
< charbuf_end
)
5425 int len
= -*charbuf
;
5428 case CODING_ANNOTATE_COMPOSITION_MASK
:
5429 produce_composition (coding
, charbuf
);
5439 /* Decode the data at CODING->src_object into CODING->dst_object.
5440 CODING->src_object is a buffer, a string, or nil.
5441 CODING->dst_object is a buffer.
5443 If CODING->src_object is a buffer, it must be the current buffer.
5444 In this case, if CODING->src_pos is positive, it is a position of
5445 the source text in the buffer, otherwise, the source text is in the
5446 gap area of the buffer, and CODING->src_pos specifies the offset of
5447 the text from GPT (which must be the same as PT). If this is the
5448 same buffer as CODING->dst_object, CODING->src_pos must be
5451 If CODING->src_object is a string, CODING->src_pos in an index to
5454 If CODING->src_object is nil, CODING->source must already point to
5455 the non-relocatable memory area. In this case, CODING->src_pos is
5456 an offset from CODING->source.
5458 The decoded data is inserted at the current point of the buffer
5463 decode_coding (coding
)
5464 struct coding_system
*coding
;
5468 if (BUFFERP (coding
->src_object
)
5469 && coding
->src_pos
> 0
5470 && coding
->src_pos
< GPT
5471 && coding
->src_pos
+ coding
->src_chars
> GPT
)
5472 move_gap_both (coding
->src_pos
, coding
->src_pos_byte
);
5474 if (BUFFERP (coding
->dst_object
))
5476 if (current_buffer
!= XBUFFER (coding
->dst_object
))
5477 set_buffer_internal (XBUFFER (coding
->dst_object
));
5479 move_gap_both (PT
, PT_BYTE
);
5482 coding
->consumed
= coding
->consumed_char
= 0;
5483 coding
->produced
= coding
->produced_char
= 0;
5484 coding
->chars_at_source
= 0;
5485 coding
->result
= CODING_RESULT_SUCCESS
;
5488 ALLOC_CONVERSION_WORK_AREA (coding
);
5490 attrs
= CODING_ID_ATTRS (coding
->id
);
5494 coding_set_source (coding
);
5495 coding
->annotated
= 0;
5496 (*(coding
->decoder
)) (coding
);
5497 if (!NILP (CODING_ATTR_DECODE_TBL (attrs
)))
5498 translate_chars (CODING_ATTR_DECODE_TBL (attrs
), coding
);
5499 coding_set_destination (coding
);
5500 produce_chars (coding
);
5501 if (coding
->annotated
)
5502 produce_annotation (coding
);
5504 while (coding
->consumed
< coding
->src_bytes
5505 && ! coding
->result
);
5507 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding
->id
)), Qccl
)
5508 && SYMBOLP (CODING_ID_EOL_TYPE (coding
->id
))
5509 && ! EQ (CODING_ID_EOL_TYPE (coding
->id
), Qunix
))
5510 decode_eol (coding
);
5512 coding
->carryover_bytes
= 0;
5513 if (coding
->consumed
< coding
->src_bytes
)
5515 int nbytes
= coding
->src_bytes
- coding
->consumed
;
5518 coding_set_source (coding
);
5519 coding_set_destination (coding
);
5520 src
= coding
->source
+ coding
->consumed
;
5522 if (coding
->mode
& CODING_MODE_LAST_BLOCK
)
5524 /* Flush out unprocessed data as binary chars. We are sure
5525 that the number of data is less than the size of
5527 int *charbuf
= coding
->charbuf
;
5529 while (nbytes
-- > 0)
5532 *charbuf
++ = (c
& 0x80 ? - c
: c
);
5534 produce_chars (coding
);
5538 /* Record unprocessed bytes in coding->carryover. We are
5539 sure that the number of data is less than the size of
5540 coding->carryover. */
5541 unsigned char *p
= coding
->carryover
;
5543 coding
->carryover_bytes
= nbytes
;
5544 while (nbytes
-- > 0)
5547 coding
->consumed
= coding
->src_bytes
;
5550 return coding
->result
;
5554 consume_chars (coding
)
5555 struct coding_system
*coding
;
5557 int *buf
= coding
->charbuf
;
5558 /* -1 is to compensate for CRLF. */
5559 int *buf_end
= coding
->charbuf
+ coding
->charbuf_size
- 1;
5560 unsigned char *src
= coding
->source
+ coding
->consumed
;
5561 int pos
= coding
->src_pos
+ coding
->consumed_char
;
5562 int end_pos
= coding
->src_pos
+ coding
->src_chars
;
5563 int multibytep
= coding
->src_multibyte
;
5564 Lisp_Object eol_type
;
5566 int start
, end
, stop
;
5567 Lisp_Object object
, prop
;
5569 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
5570 if (VECTORP (eol_type
))
5573 object
= coding
->src_object
;
5575 /* Note: composition handling is not yet implemented. */
5576 coding
->common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
5578 if (coding
->common_flags
& CODING_ANNOTATE_COMPOSITION_MASK
5579 && find_composition (pos
, end_pos
, &start
, &end
, &prop
, object
)
5582 || (find_composition (end
, end_pos
, &start
, &end
, &prop
, object
)
5583 && end
<= end_pos
)))
5588 while (buf
< buf_end
)
5596 p
= save_composition_data (buf
, buf_end
, prop
);
5600 if (find_composition (end
, end_pos
, &start
, &end
, &prop
, object
)
5610 c
= STRING_CHAR_ADVANCE (src
);
5611 if ((c
== '\r') && (coding
->mode
& CODING_MODE_SELECTIVE_DISPLAY
))
5613 if (! EQ (eol_type
, Qunix
))
5617 if (EQ (eol_type
, Qdos
))
5627 coding
->consumed
= src
- coding
->source
;
5628 coding
->consumed_char
= pos
- coding
->src_pos
;
5629 coding
->charbuf_used
= buf
- coding
->charbuf
;
5630 coding
->chars_at_source
= 0;
5634 /* Encode the text at CODING->src_object into CODING->dst_object.
5635 CODING->src_object is a buffer or a string.
5636 CODING->dst_object is a buffer or nil.
5638 If CODING->src_object is a buffer, it must be the current buffer.
5639 In this case, if CODING->src_pos is positive, it is a position of
5640 the source text in the buffer, otherwise. the source text is in the
5641 gap area of the buffer, and coding->src_pos specifies the offset of
5642 the text from GPT (which must be the same as PT). If this is the
5643 same buffer as CODING->dst_object, CODING->src_pos must be
5644 negative and CODING should not have `pre-write-conversion'.
5646 If CODING->src_object is a string, CODING should not have
5647 `pre-write-conversion'.
5649 If CODING->dst_object is a buffer, the encoded data is inserted at
5650 the current point of that buffer.
5652 If CODING->dst_object is nil, the encoded data is placed at the
5653 memory area specified by CODING->destination. */
5656 encode_coding (coding
)
5657 struct coding_system
*coding
;
5661 attrs
= CODING_ID_ATTRS (coding
->id
);
5663 if (BUFFERP (coding
->dst_object
))
5665 set_buffer_internal (XBUFFER (coding
->dst_object
));
5666 coding
->dst_multibyte
5667 = ! NILP (current_buffer
->enable_multibyte_characters
);
5670 coding
->consumed
= coding
->consumed_char
= 0;
5671 coding
->produced
= coding
->produced_char
= 0;
5672 coding
->result
= CODING_RESULT_SUCCESS
;
5675 ALLOC_CONVERSION_WORK_AREA (coding
);
5678 coding_set_source (coding
);
5679 consume_chars (coding
);
5681 if (!NILP (CODING_ATTR_ENCODE_TBL (attrs
)))
5682 translate_chars (CODING_ATTR_ENCODE_TBL (attrs
), coding
);
5684 coding_set_destination (coding
);
5685 (*(coding
->encoder
)) (coding
);
5686 } while (coding
->consumed_char
< coding
->src_chars
);
5688 if (BUFFERP (coding
->dst_object
))
5689 insert_from_gap (coding
->produced_char
, coding
->produced
);
5691 return (coding
->result
);
5696 /* List of currently used working buffer. */
5697 Lisp_Object Vcode_conversion_work_buf_list
;
5699 /* A working buffer used by the top level conversion. */
5700 Lisp_Object Vcode_conversion_reused_work_buf
;
5703 /* Return a working buffer that can be freely used by the following
5704 code conversion. MULTIBYTEP specifies the multibyteness of the
5708 make_conversion_work_buffer (multibytep
)
5711 struct buffer
*current
= current_buffer
;
5714 if (NILP (Vcode_conversion_work_buf_list
))
5716 if (NILP (Vcode_conversion_reused_work_buf
))
5717 Vcode_conversion_reused_work_buf
5718 = Fget_buffer_create (build_string (" *code-conversion-work*"));
5719 Vcode_conversion_work_buf_list
5720 = Fcons (Vcode_conversion_reused_work_buf
, Qnil
);
5724 int depth
= Flength (Vcode_conversion_work_buf_list
);
5727 sprintf (str
, " *code-conversion-work*<%d>", depth
);
5728 Vcode_conversion_work_buf_list
5729 = Fcons (Fget_buffer_create (build_string (str
)),
5730 Vcode_conversion_work_buf_list
);
5733 buf
= XCAR (Vcode_conversion_work_buf_list
);
5734 set_buffer_internal (XBUFFER (buf
));
5735 current_buffer
->undo_list
= Qt
;
5737 Fset_buffer_multibyte (multibytep
? Qt
: Qnil
);
5738 set_buffer_internal (current
);
5742 static struct coding_system
*saved_coding
;
5745 code_conversion_restore (info
)
5748 int depth
= Flength (Vcode_conversion_work_buf_list
);
5753 buf
= XCAR (Vcode_conversion_work_buf_list
);
5754 Vcode_conversion_work_buf_list
= XCDR (Vcode_conversion_work_buf_list
);
5755 if (depth
> 1 && !NILP (Fbuffer_live_p (buf
)))
5759 if (saved_coding
->dst_object
== Qt
5760 && saved_coding
->destination
)
5761 xfree (saved_coding
->destination
);
5763 return save_excursion_restore (info
);
5768 decode_coding_gap (coding
, chars
, bytes
)
5769 struct coding_system
*coding
;
5770 EMACS_INT chars
, bytes
;
5772 int count
= specpdl_ptr
- specpdl
;
5774 saved_coding
= coding
;
5775 record_unwind_protect (code_conversion_restore
, save_excursion_save ());
5777 coding
->src_object
= Fcurrent_buffer ();
5778 coding
->src_chars
= chars
;
5779 coding
->src_bytes
= bytes
;
5780 coding
->src_pos
= -chars
;
5781 coding
->src_pos_byte
= -bytes
;
5782 coding
->src_multibyte
= chars
< bytes
;
5783 coding
->dst_object
= coding
->src_object
;
5784 coding
->dst_pos
= PT
;
5785 coding
->dst_pos_byte
= PT_BYTE
;
5786 coding
->dst_multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
5788 if (CODING_REQUIRE_DETECTION (coding
))
5789 detect_coding (coding
);
5791 decode_coding (coding
);
5793 unbind_to (count
, Qnil
);
5794 return coding
->result
;
5798 encode_coding_gap (coding
, chars
, bytes
)
5799 struct coding_system
*coding
;
5800 EMACS_INT chars
, bytes
;
5802 int count
= specpdl_ptr
- specpdl
;
5805 saved_coding
= coding
;
5806 record_unwind_protect (code_conversion_restore
, save_excursion_save ());
5808 buffer
= Fcurrent_buffer ();
5809 coding
->src_object
= buffer
;
5810 coding
->src_chars
= chars
;
5811 coding
->src_bytes
= bytes
;
5812 coding
->src_pos
= -chars
;
5813 coding
->src_pos_byte
= -bytes
;
5814 coding
->src_multibyte
= chars
< bytes
;
5815 coding
->dst_object
= coding
->src_object
;
5816 coding
->dst_pos
= PT
;
5817 coding
->dst_pos_byte
= PT_BYTE
;
5819 encode_coding (coding
);
5821 unbind_to (count
, Qnil
);
5822 return coding
->result
;
5826 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
5827 SRC_OBJECT into DST_OBJECT by coding context CODING.
5829 SRC_OBJECT is a buffer, a string, or Qnil.
5831 If it is a buffer, the text is at point of the buffer. FROM and TO
5832 are positions in the buffer.
5834 If it is a string, the text is at the beginning of the string.
5835 FROM and TO are indices to the string.
5837 If it is nil, the text is at coding->source. FROM and TO are
5838 indices to coding->source.
5840 DST_OBJECT is a buffer, Qt, or Qnil.
5842 If it is a buffer, the decoded text is inserted at point of the
5843 buffer. If the buffer is the same as SRC_OBJECT, the source text
5846 If it is Qt, a string is made from the decoded text, and
5847 set in CODING->dst_object.
5849 If it is Qnil, the decoded text is stored at CODING->destination.
5850 The called must allocate CODING->dst_bytes bytes at
5851 CODING->destination by xmalloc. If the decoded text is longer than
5852 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
5856 decode_coding_object (coding
, src_object
, from
, from_byte
, to
, to_byte
,
5858 struct coding_system
*coding
;
5859 Lisp_Object src_object
;
5860 EMACS_INT from
, from_byte
, to
, to_byte
;
5861 Lisp_Object dst_object
;
5863 int count
= specpdl_ptr
- specpdl
;
5864 unsigned char *destination
;
5865 EMACS_INT dst_bytes
;
5866 EMACS_INT chars
= to
- from
;
5867 EMACS_INT bytes
= to_byte
- from_byte
;
5870 saved_coding
= coding
;
5871 record_unwind_protect (code_conversion_restore
, save_excursion_save ());
5873 if (NILP (dst_object
))
5875 destination
= coding
->destination
;
5876 dst_bytes
= coding
->dst_bytes
;
5879 coding
->src_object
= src_object
;
5880 coding
->src_chars
= chars
;
5881 coding
->src_bytes
= bytes
;
5882 coding
->src_multibyte
= chars
< bytes
;
5884 if (STRINGP (src_object
))
5886 coding
->src_pos
= from
;
5887 coding
->src_pos_byte
= from_byte
;
5889 else if (BUFFERP (src_object
))
5891 set_buffer_internal (XBUFFER (src_object
));
5893 move_gap_both (from
, from_byte
);
5894 if (EQ (src_object
, dst_object
))
5896 TEMP_SET_PT_BOTH (from
, from_byte
);
5897 del_range_both (from
, from_byte
, to
, to_byte
, 1);
5898 coding
->src_pos
= -chars
;
5899 coding
->src_pos_byte
= -bytes
;
5903 coding
->src_pos
= from
;
5904 coding
->src_pos_byte
= from_byte
;
5908 if (CODING_REQUIRE_DETECTION (coding
))
5909 detect_coding (coding
);
5910 attrs
= CODING_ID_ATTRS (coding
->id
);
5912 if (! NILP (CODING_ATTR_POST_READ (attrs
))
5913 || EQ (dst_object
, Qt
))
5915 coding
->dst_object
= make_conversion_work_buffer (1);
5916 coding
->dst_pos
= BEG
;
5917 coding
->dst_pos_byte
= BEG_BYTE
;
5918 coding
->dst_multibyte
= 1;
5920 else if (BUFFERP (dst_object
))
5922 coding
->dst_object
= dst_object
;
5923 coding
->dst_pos
= BUF_PT (XBUFFER (dst_object
));
5924 coding
->dst_pos_byte
= BUF_PT_BYTE (XBUFFER (dst_object
));
5925 coding
->dst_multibyte
5926 = ! NILP (XBUFFER (dst_object
)->enable_multibyte_characters
);
5930 coding
->dst_object
= Qnil
;
5931 coding
->dst_multibyte
= 1;
5934 decode_coding (coding
);
5936 if (BUFFERP (coding
->dst_object
))
5937 set_buffer_internal (XBUFFER (coding
->dst_object
));
5939 if (! NILP (CODING_ATTR_POST_READ (attrs
)))
5941 struct gcpro gcpro1
, gcpro2
;
5942 EMACS_INT prev_Z
= Z
, prev_Z_BYTE
= Z_BYTE
;
5945 TEMP_SET_PT_BOTH (coding
->dst_pos
, coding
->dst_pos_byte
);
5946 GCPRO2 (coding
->src_object
, coding
->dst_object
);
5947 val
= call1 (CODING_ATTR_POST_READ (attrs
),
5948 make_number (coding
->produced_char
));
5951 coding
->produced_char
+= Z
- prev_Z
;
5952 coding
->produced
+= Z_BYTE
- prev_Z_BYTE
;
5955 if (EQ (dst_object
, Qt
))
5957 coding
->dst_object
= Fbuffer_string ();
5959 else if (NILP (dst_object
) && BUFFERP (coding
->dst_object
))
5961 set_buffer_internal (XBUFFER (coding
->dst_object
));
5962 if (dst_bytes
< coding
->produced
)
5965 = (unsigned char *) xrealloc (destination
, coding
->produced
);
5968 coding
->result
= CODING_RESULT_INSUFFICIENT_DST
;
5969 unbind_to (count
, Qnil
);
5972 if (BEGV
< GPT
&& GPT
< BEGV
+ coding
->produced_char
)
5973 move_gap_both (BEGV
, BEGV_BYTE
);
5974 bcopy (BEGV_ADDR
, destination
, coding
->produced
);
5975 coding
->destination
= destination
;
5979 unbind_to (count
, Qnil
);
5984 encode_coding_object (coding
, src_object
, from
, from_byte
, to
, to_byte
,
5986 struct coding_system
*coding
;
5987 Lisp_Object src_object
;
5988 EMACS_INT from
, from_byte
, to
, to_byte
;
5989 Lisp_Object dst_object
;
5991 int count
= specpdl_ptr
- specpdl
;
5992 EMACS_INT chars
= to
- from
;
5993 EMACS_INT bytes
= to_byte
- from_byte
;
5996 saved_coding
= coding
;
5997 record_unwind_protect (code_conversion_restore
, save_excursion_save ());
5999 coding
->src_object
= src_object
;
6000 coding
->src_chars
= chars
;
6001 coding
->src_bytes
= bytes
;
6002 coding
->src_multibyte
= chars
< bytes
;
6004 attrs
= CODING_ID_ATTRS (coding
->id
);
6006 if (! NILP (CODING_ATTR_PRE_WRITE (attrs
)))
6008 coding
->src_object
= make_conversion_work_buffer (coding
->src_multibyte
);
6009 set_buffer_internal (XBUFFER (coding
->src_object
));
6010 if (STRINGP (src_object
))
6011 insert_from_string (src_object
, from
, from_byte
, chars
, bytes
, 0);
6012 else if (BUFFERP (src_object
))
6013 insert_from_buffer (XBUFFER (src_object
), from
, chars
, 0);
6015 insert_1_both (coding
->source
+ from
, chars
, bytes
, 0, 0, 0);
6017 if (EQ (src_object
, dst_object
))
6019 set_buffer_internal (XBUFFER (src_object
));
6020 del_range_both (from
, from_byte
, to
, to_byte
, 1);
6021 set_buffer_internal (XBUFFER (coding
->src_object
));
6024 call2 (CODING_ATTR_PRE_WRITE (attrs
),
6025 make_number (BEG
), make_number (Z
));
6026 coding
->src_object
= Fcurrent_buffer ();
6028 move_gap_both (BEG
, BEG_BYTE
);
6029 coding
->src_chars
= Z
- BEG
;
6030 coding
->src_bytes
= Z_BYTE
- BEG_BYTE
;
6031 coding
->src_pos
= BEG
;
6032 coding
->src_pos_byte
= BEG_BYTE
;
6033 coding
->src_multibyte
= Z
< Z_BYTE
;
6035 else if (STRINGP (src_object
))
6037 coding
->src_pos
= from
;
6038 coding
->src_pos_byte
= from_byte
;
6040 else if (BUFFERP (src_object
))
6042 set_buffer_internal (XBUFFER (src_object
));
6044 move_gap_both (from
, from_byte
);
6045 if (EQ (src_object
, dst_object
))
6047 del_range_both (from
, from_byte
, to
, to_byte
, 1);
6048 coding
->src_pos
= -chars
;
6049 coding
->src_pos_byte
= -bytes
;
6053 coding
->src_pos
= from
;
6054 coding
->src_pos_byte
= from_byte
;
6058 if (BUFFERP (dst_object
))
6060 coding
->dst_object
= dst_object
;
6061 coding
->dst_pos
= BUF_PT (XBUFFER (dst_object
));
6062 coding
->dst_pos_byte
= BUF_PT_BYTE (XBUFFER (dst_object
));
6063 coding
->dst_multibyte
6064 = ! NILP (XBUFFER (dst_object
)->enable_multibyte_characters
);
6066 else if (EQ (dst_object
, Qt
))
6068 coding
->dst_object
= Qnil
;
6069 coding
->dst_bytes
= coding
->src_chars
;
6070 if (coding
->dst_bytes
== 0)
6071 coding
->dst_bytes
= 1;
6072 coding
->destination
= (unsigned char *) xmalloc (coding
->dst_bytes
);
6073 coding
->dst_multibyte
= 0;
6077 coding
->dst_object
= Qnil
;
6078 coding
->dst_multibyte
= 0;
6081 encode_coding (coding
);
6083 if (EQ (dst_object
, Qt
))
6085 if (BUFFERP (coding
->dst_object
))
6086 coding
->dst_object
= Fbuffer_string ();
6090 = make_unibyte_string ((char *) coding
->destination
,
6092 xfree (coding
->destination
);
6096 unbind_to (count
, Qnil
);
6101 preferred_coding_system ()
6103 int id
= coding_categories
[coding_priorities
[0]].id
;
6105 return CODING_ID_NAME (id
);
6110 /*** 8. Emacs Lisp library functions ***/
6112 DEFUN ("coding-system-p", Fcoding_system_p
, Scoding_system_p
, 1, 1, 0,
6113 doc
: /* Return t if OBJECT is nil or a coding-system.
6114 See the documentation of `define-coding-system' for information
6115 about coding-system objects. */)
6119 return ((NILP (obj
) || CODING_SYSTEM_P (obj
)) ? Qt
: Qnil
);
6122 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system
,
6123 Sread_non_nil_coding_system
, 1, 1, 0,
6124 doc
: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
6131 val
= Fcompleting_read (prompt
, Vcoding_system_alist
, Qnil
,
6132 Qt
, Qnil
, Qcoding_system_history
, Qnil
, Qnil
);
6134 while (XSTRING (val
)->size
== 0);
6135 return (Fintern (val
, Qnil
));
6138 DEFUN ("read-coding-system", Fread_coding_system
, Sread_coding_system
, 1, 2, 0,
6139 doc
: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6140 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */)
6141 (prompt
, default_coding_system
)
6142 Lisp_Object prompt
, default_coding_system
;
6145 if (SYMBOLP (default_coding_system
))
6146 XSETSTRING (default_coding_system
, XSYMBOL (default_coding_system
)->name
);
6147 val
= Fcompleting_read (prompt
, Vcoding_system_alist
, Qnil
,
6148 Qt
, Qnil
, Qcoding_system_history
,
6149 default_coding_system
, Qnil
);
6150 return (XSTRING (val
)->size
== 0 ? Qnil
: Fintern (val
, Qnil
));
6153 DEFUN ("check-coding-system", Fcheck_coding_system
, Scheck_coding_system
,
6155 doc
: /* Check validity of CODING-SYSTEM.
6156 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
6157 It is valid if it is a symbol with a non-nil `coding-system' property.
6158 The value of property should be a vector of length 5. */)
6160 Lisp_Object coding_system
;
6162 CHECK_SYMBOL (coding_system
);
6163 if (!NILP (Fcoding_system_p (coding_system
)))
6164 return coding_system
;
6166 Fsignal (Qcoding_system_error
, Fcons (coding_system
, Qnil
));
6171 detect_coding_system (src
, src_bytes
, highest
, multibytep
, coding_system
)
6173 int src_bytes
, highest
;
6175 Lisp_Object coding_system
;
6177 unsigned char *src_end
= src
+ src_bytes
;
6178 int mask
= CATEGORY_MASK_ANY
;
6181 Lisp_Object attrs
, eol_type
;
6183 struct coding_system coding
;
6185 if (NILP (coding_system
))
6186 coding_system
= Qundecided
;
6187 setup_coding_system (coding_system
, &coding
);
6188 attrs
= CODING_ID_ATTRS (coding
.id
);
6189 eol_type
= CODING_ID_EOL_TYPE (coding
.id
);
6191 coding
.source
= src
;
6192 coding
.src_bytes
= src_bytes
;
6193 coding
.src_multibyte
= multibytep
;
6194 coding
.consumed
= 0;
6196 if (XINT (CODING_ATTR_CATEGORY (attrs
)) != coding_category_undecided
)
6198 mask
= 1 << XINT (CODING_ATTR_CATEGORY (attrs
));
6202 coding_system
= Qnil
;
6203 for (; src
< src_end
; src
++)
6206 if (c
& 0x80 || (c
< 0x20 && (c
== ISO_CODE_ESC
6208 || c
== ISO_CODE_SO
)))
6211 coding
.head_ascii
= src
- coding
.source
;
6214 for (i
= 0; i
< coding_category_raw_text
; i
++)
6216 enum coding_category category
= coding_priorities
[i
];
6217 struct coding_system
*this = coding_categories
+ category
;
6219 if (category
>= coding_category_raw_text
6220 || detected
& (1 << category
))
6225 /* No coding system of this category is defined. */
6226 mask
&= ~(1 << category
);
6230 detected
|= detected_mask
[category
];
6231 if ((*(coding_categories
[category
].detector
)) (&coding
, &mask
)
6234 mask
&= detected_mask
[category
];
6242 val
= Fcons (make_number (coding_category_raw_text
), Qnil
);
6243 else if (mask
== CATEGORY_MASK_ANY
)
6244 val
= Fcons (make_number (coding_category_undecided
), Qnil
);
6247 for (i
= 0; i
< coding_category_raw_text
; i
++)
6248 if (mask
& (1 << coding_priorities
[i
]))
6250 val
= Fcons (make_number (coding_priorities
[i
]), Qnil
);
6257 for (i
= coding_category_raw_text
- 1; i
>= 0; i
--)
6258 if (mask
& (1 << coding_priorities
[i
]))
6259 val
= Fcons (make_number (coding_priorities
[i
]), val
);
6263 int one_byte_eol
= -1, two_byte_eol
= -1;
6266 for (tail
= val
; CONSP (tail
); tail
= XCDR (tail
))
6268 struct coding_system
*this
6269 = (NILP (coding_system
) ? coding_categories
+ XINT (XCAR (tail
))
6273 attrs
= CODING_ID_ATTRS (this->id
);
6274 eol_type
= CODING_ID_EOL_TYPE (this->id
);
6275 XSETCAR (tail
, CODING_ID_NAME (this->id
));
6276 if (VECTORP (eol_type
))
6278 if (EQ (CODING_ATTR_TYPE (attrs
), Qutf_16
))
6280 if (two_byte_eol
< 0)
6281 two_byte_eol
= detect_eol (this, coding
.source
, src_bytes
);
6282 this_eol
= two_byte_eol
;
6286 if (one_byte_eol
< 0)
6287 one_byte_eol
=detect_eol (this, coding
.source
, src_bytes
);
6288 this_eol
= one_byte_eol
;
6290 if (this_eol
== EOL_SEEN_LF
)
6291 XSETCAR (tail
, AREF (eol_type
, 0));
6292 else if (this_eol
== EOL_SEEN_CRLF
)
6293 XSETCAR (tail
, AREF (eol_type
, 1));
6294 else if (this_eol
== EOL_SEEN_CR
)
6295 XSETCAR (tail
, AREF (eol_type
, 2));
6300 return (highest
? XCAR (val
) : val
);
6304 DEFUN ("detect-coding-region", Fdetect_coding_region
, Sdetect_coding_region
,
6306 doc
: /* Detect coding system of the text in the region between START and END.
6307 Return a list of possible coding systems ordered by priority.
6309 If only ASCII characters are found, it returns a list of single element
6310 `undecided' or its subsidiary coding system according to a detected
6313 If optional argument HIGHEST is non-nil, return the coding system of
6314 highest priority. */)
6315 (start
, end
, highest
)
6316 Lisp_Object start
, end
, highest
;
6319 int from_byte
, to_byte
;
6321 CHECK_NUMBER_COERCE_MARKER (start
);
6322 CHECK_NUMBER_COERCE_MARKER (end
);
6324 validate_region (&start
, &end
);
6325 from
= XINT (start
), to
= XINT (end
);
6326 from_byte
= CHAR_TO_BYTE (from
);
6327 to_byte
= CHAR_TO_BYTE (to
);
6329 if (from
< GPT
&& to
>= GPT
)
6330 move_gap_both (to
, to_byte
);
6332 return detect_coding_system (BYTE_POS_ADDR (from_byte
),
6333 to_byte
- from_byte
,
6335 !NILP (current_buffer
6336 ->enable_multibyte_characters
),
6340 DEFUN ("detect-coding-string", Fdetect_coding_string
, Sdetect_coding_string
,
6342 doc
: /* Detect coding system of the text in STRING.
6343 Return a list of possible coding systems ordered by priority.
6345 If only ASCII characters are found, it returns a list of single element
6346 `undecided' or its subsidiary coding system according to a detected
6349 If optional argument HIGHEST is non-nil, return the coding system of
6350 highest priority. */)
6352 Lisp_Object string
, highest
;
6354 CHECK_STRING (string
);
6356 return detect_coding_system (XSTRING (string
)->data
,
6357 STRING_BYTES (XSTRING (string
)),
6359 STRING_MULTIBYTE (string
),
6365 char_encodable_p (c
, attrs
)
6370 struct charset
*charset
;
6372 for (tail
= CODING_ATTR_CHARSET_LIST (attrs
);
6373 CONSP (tail
); tail
= XCDR (tail
))
6375 charset
= CHARSET_FROM_ID (XINT (XCAR (tail
)));
6376 if (CHAR_CHARSET_P (c
, charset
))
6379 return (! NILP (tail
));
6383 /* Return a list of coding systems that safely encode the text between
6384 START and END. If EXCLUDE is non-nil, it is a list of coding
6385 systems not to check. The returned list doesn't contain any such
6386 coding systems. In any case, If the text contains only ASCII or is
6387 unibyte, return t. */
6389 DEFUN ("find-coding-systems-region-internal",
6390 Ffind_coding_systems_region_internal
,
6391 Sfind_coding_systems_region_internal
, 2, 3, 0,
6392 doc
: /* Internal use only. */)
6393 (start
, end
, exclude
)
6394 Lisp_Object start
, end
, exclude
;
6396 Lisp_Object coding_attrs_list
, safe_codings
;
6397 EMACS_INT start_byte
, end_byte
;
6398 unsigned char *p
, *pbeg
, *pend
;
6400 Lisp_Object tail
, elt
;
6402 if (STRINGP (start
))
6404 if (!STRING_MULTIBYTE (start
)
6405 && XSTRING (start
)->size
!= STRING_BYTES (XSTRING (start
)))
6408 end_byte
= STRING_BYTES (XSTRING (start
));
6412 CHECK_NUMBER_COERCE_MARKER (start
);
6413 CHECK_NUMBER_COERCE_MARKER (end
);
6414 if (XINT (start
) < BEG
|| XINT (end
) > Z
|| XINT (start
) > XINT (end
))
6415 args_out_of_range (start
, end
);
6416 if (NILP (current_buffer
->enable_multibyte_characters
))
6418 start_byte
= CHAR_TO_BYTE (XINT (start
));
6419 end_byte
= CHAR_TO_BYTE (XINT (end
));
6420 if (XINT (end
) - XINT (start
) == end_byte
- start_byte
)
6423 if (start
< GPT
&& end
> GPT
)
6425 if ((GPT
- start
) < (end
- GPT
))
6426 move_gap_both (start
, start_byte
);
6428 move_gap_both (end
, end_byte
);
6432 coding_attrs_list
= Qnil
;
6433 for (tail
= Vcoding_system_list
; CONSP (tail
); tail
= XCDR (tail
))
6435 || NILP (Fmemq (XCAR (tail
), exclude
)))
6439 attrs
= AREF (CODING_SYSTEM_SPEC (XCAR (tail
)), 0);
6440 if (EQ (XCAR (tail
), CODING_ATTR_BASE_NAME (attrs
))
6441 && ! EQ (CODING_ATTR_TYPE (attrs
), Qundecided
))
6442 coding_attrs_list
= Fcons (attrs
, coding_attrs_list
);
6445 if (STRINGP (start
))
6446 p
= pbeg
= XSTRING (start
)->data
;
6448 p
= pbeg
= BYTE_POS_ADDR (start_byte
);
6449 pend
= p
+ (end_byte
- start_byte
);
6451 while (p
< pend
&& ASCII_BYTE_P (*p
)) p
++;
6452 while (p
< pend
&& ASCII_BYTE_P (*(pend
- 1))) pend
--;
6456 if (ASCII_BYTE_P (*p
))
6460 c
= STRING_CHAR_ADVANCE (p
);
6462 charset_map_loaded
= 0;
6463 for (tail
= coding_attrs_list
; CONSP (tail
);)
6468 else if (char_encodable_p (c
, elt
))
6470 else if (CONSP (XCDR (tail
)))
6472 XSETCAR (tail
, XCAR (XCDR (tail
)));
6473 XSETCDR (tail
, XCDR (XCDR (tail
)));
6477 XSETCAR (tail
, Qnil
);
6481 if (charset_map_loaded
)
6483 EMACS_INT p_offset
= p
- pbeg
, pend_offset
= pend
- pbeg
;
6485 if (STRINGP (start
))
6486 pbeg
= XSTRING (start
)->data
;
6488 pbeg
= BYTE_POS_ADDR (start_byte
);
6489 p
= pbeg
+ p_offset
;
6490 pend
= pbeg
+ pend_offset
;
6495 safe_codings
= Qnil
;
6496 for (tail
= coding_attrs_list
; CONSP (tail
); tail
= XCDR (tail
))
6497 if (! NILP (XCAR (tail
)))
6498 safe_codings
= Fcons (CODING_ATTR_BASE_NAME (XCAR (tail
)), safe_codings
);
6500 return safe_codings
;
6504 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region
,
6505 Scheck_coding_systems_region
, 3, 3, 0,
6506 doc
: /* Check if the region is encodable by coding systems.
6508 START and END are buffer positions specifying the region.
6509 CODING-SYSTEM-LIST is a list of coding systems to check.
6511 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
6512 CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
6513 whole region, POS0, POS1, ... are buffer positions where non-encodable
6514 characters are found.
6516 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
6519 START may be a string. In that case, check if the string is
6520 encodable, and the value contains indices to the string instead of
6521 buffer positions. END is ignored. */)
6522 (start
, end
, coding_system_list
)
6523 Lisp_Object start
, end
, coding_system_list
;
6526 EMACS_INT start_byte
, end_byte
;
6528 unsigned char *p
, *pbeg
, *pend
;
6530 Lisp_Object tail
, elt
;
6532 if (STRINGP (start
))
6534 if (!STRING_MULTIBYTE (start
)
6535 && XSTRING (start
)->size
!= STRING_BYTES (XSTRING (start
)))
6538 end_byte
= STRING_BYTES (XSTRING (start
));
6543 CHECK_NUMBER_COERCE_MARKER (start
);
6544 CHECK_NUMBER_COERCE_MARKER (end
);
6545 if (XINT (start
) < BEG
|| XINT (end
) > Z
|| XINT (start
) > XINT (end
))
6546 args_out_of_range (start
, end
);
6547 if (NILP (current_buffer
->enable_multibyte_characters
))
6549 start_byte
= CHAR_TO_BYTE (XINT (start
));
6550 end_byte
= CHAR_TO_BYTE (XINT (end
));
6551 if (XINT (end
) - XINT (start
) == end_byte
- start_byte
)
6554 if (start
< GPT
&& end
> GPT
)
6556 if ((GPT
- start
) < (end
- GPT
))
6557 move_gap_both (start
, start_byte
);
6559 move_gap_both (end
, end_byte
);
6565 for (tail
= coding_system_list
; CONSP (tail
); tail
= XCDR (tail
))
6568 list
= Fcons (Fcons (elt
, Fcons (AREF (CODING_SYSTEM_SPEC (elt
), 0),
6573 if (STRINGP (start
))
6574 p
= pbeg
= XSTRING (start
)->data
;
6576 p
= pbeg
= BYTE_POS_ADDR (start_byte
);
6577 pend
= p
+ (end_byte
- start_byte
);
6579 while (p
< pend
&& ASCII_BYTE_P (*p
)) p
++, pos
++;
6580 while (p
< pend
&& ASCII_BYTE_P (*(pend
- 1))) pend
--;
6584 if (ASCII_BYTE_P (*p
))
6588 c
= STRING_CHAR_ADVANCE (p
);
6590 charset_map_loaded
= 0;
6591 for (tail
= list
; CONSP (tail
); tail
= XCDR (tail
))
6593 elt
= XCDR (XCAR (tail
));
6594 if (! char_encodable_p (c
, XCAR (elt
)))
6595 XSETCDR (elt
, Fcons (make_number (pos
), XCDR (elt
)));
6597 if (charset_map_loaded
)
6599 EMACS_INT p_offset
= p
- pbeg
, pend_offset
= pend
- pbeg
;
6601 if (STRINGP (start
))
6602 pbeg
= XSTRING (start
)->data
;
6604 pbeg
= BYTE_POS_ADDR (start_byte
);
6605 p
= pbeg
+ p_offset
;
6606 pend
= pbeg
+ pend_offset
;
6614 for (; CONSP (tail
); tail
= XCDR (tail
))
6617 if (CONSP (XCDR (XCDR (elt
))))
6618 list
= Fcons (Fcons (XCAR (elt
), Fnreverse (XCDR (XCDR (elt
)))),
6628 code_convert_region (start
, end
, coding_system
, dst_object
, encodep
, norecord
)
6629 Lisp_Object start
, end
, coding_system
, dst_object
;
6630 int encodep
, norecord
;
6632 struct coding_system coding
;
6633 EMACS_INT from
, from_byte
, to
, to_byte
;
6634 Lisp_Object src_object
;
6636 CHECK_NUMBER_COERCE_MARKER (start
);
6637 CHECK_NUMBER_COERCE_MARKER (end
);
6638 if (NILP (coding_system
))
6639 coding_system
= Qno_conversion
;
6641 CHECK_CODING_SYSTEM (coding_system
);
6642 src_object
= Fcurrent_buffer ();
6643 if (NILP (dst_object
))
6644 dst_object
= src_object
;
6645 else if (! EQ (dst_object
, Qt
))
6646 CHECK_BUFFER (dst_object
);
6648 validate_region (&start
, &end
);
6649 from
= XFASTINT (start
);
6650 from_byte
= CHAR_TO_BYTE (from
);
6651 to
= XFASTINT (end
);
6652 to_byte
= CHAR_TO_BYTE (to
);
6654 setup_coding_system (coding_system
, &coding
);
6655 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
6658 encode_coding_object (&coding
, src_object
, from
, from_byte
, to
, to_byte
,
6661 decode_coding_object (&coding
, src_object
, from
, from_byte
, to
, to_byte
,
6664 Vlast_coding_system_used
= CODING_ID_NAME (coding
.id
);
6666 if (coding
.result
!= CODING_RESULT_SUCCESS
)
6667 error ("Code conversion error: %d", coding
.result
);
6669 return (BUFFERP (dst_object
)
6670 ? make_number (coding
.produced_char
)
6671 : coding
.dst_object
);
6675 DEFUN ("decode-coding-region", Fdecode_coding_region
, Sdecode_coding_region
,
6676 3, 4, "r\nzCoding system: ",
6677 doc
: /* Decode the current region from the specified coding system.
6678 When called from a program, takes four arguments:
6679 START, END, CODING-SYSTEM, and DESTINATION.
6680 START and END are buffer positions.
6682 Optional 4th arguments DESTINATION specifies where the decoded text goes.
6683 If nil, the region between START and END is replace by the decoded text.
6684 If buffer, the decoded text is inserted in the buffer.
6685 If t, the decoded text is returned.
6687 This function sets `last-coding-system-used' to the precise coding system
6688 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6689 not fully specified.)
6690 It returns the length of the decoded text. */)
6691 (start
, end
, coding_system
, destination
)
6692 Lisp_Object start
, end
, coding_system
, destination
;
6694 return code_convert_region (start
, end
, coding_system
, destination
, 0, 0);
6697 DEFUN ("encode-coding-region", Fencode_coding_region
, Sencode_coding_region
,
6698 3, 4, "r\nzCoding system: ",
6699 doc
: /* Encode the current region by specified coding system.
6700 When called from a program, takes three arguments:
6701 START, END, and CODING-SYSTEM. START and END are buffer positions.
6703 Optional 4th arguments DESTINATION specifies where the encoded text goes.
6704 If nil, the region between START and END is replace by the encoded text.
6705 If buffer, the encoded text is inserted in the buffer.
6706 If t, the encoded text is returned.
6708 This function sets `last-coding-system-used' to the precise coding system
6709 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6710 not fully specified.)
6711 It returns the length of the encoded text. */)
6712 (start
, end
, coding_system
, destination
)
6713 Lisp_Object start
, end
, coding_system
, destination
;
6715 return code_convert_region (start
, end
, coding_system
, destination
, 1, 0);
6719 code_convert_string (string
, coding_system
, dst_object
,
6720 encodep
, nocopy
, norecord
)
6721 Lisp_Object string
, coding_system
, dst_object
;
6722 int encodep
, nocopy
, norecord
;
6724 struct coding_system coding
;
6725 EMACS_INT chars
, bytes
;
6727 CHECK_STRING (string
);
6728 if (NILP (coding_system
))
6731 Vlast_coding_system_used
= Qno_conversion
;
6732 if (NILP (dst_object
))
6733 return (nocopy
? Fcopy_sequence (string
) : string
);
6736 if (NILP (coding_system
))
6737 coding_system
= Qno_conversion
;
6739 CHECK_CODING_SYSTEM (coding_system
);
6740 if (NILP (dst_object
))
6742 else if (! EQ (dst_object
, Qt
))
6743 CHECK_BUFFER (dst_object
);
6745 setup_coding_system (coding_system
, &coding
);
6746 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
6747 chars
= XSTRING (string
)->size
;
6748 bytes
= STRING_BYTES (XSTRING (string
));
6750 encode_coding_object (&coding
, string
, 0, 0, chars
, bytes
, dst_object
);
6752 decode_coding_object (&coding
, string
, 0, 0, chars
, bytes
, dst_object
);
6754 Vlast_coding_system_used
= CODING_ID_NAME (coding
.id
);
6756 if (coding
.result
!= CODING_RESULT_SUCCESS
)
6757 error ("Code conversion error: %d", coding
.result
);
6759 return (BUFFERP (dst_object
)
6760 ? make_number (coding
.produced_char
)
6761 : coding
.dst_object
);
6765 /* Encode or decode STRING according to CODING_SYSTEM.
6766 Do not set Vlast_coding_system_used.
6768 This function is called only from macros DECODE_FILE and
6769 ENCODE_FILE, thus we ignore character composition. */
6772 code_convert_string_norecord (string
, coding_system
, encodep
)
6773 Lisp_Object string
, coding_system
;
6776 return code_convert_string (string
, coding_system
, Qt
, encodep
, 0, 1);
6780 DEFUN ("decode-coding-string", Fdecode_coding_string
, Sdecode_coding_string
,
6782 doc
: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
6784 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
6785 if the decoding operation is trivial.
6787 Optional fourth arg BUFFER non-nil meant that the decoded text is
6788 inserted in BUFFER instead of returned as a astring. In this case,
6789 the return value is BUFFER.
6791 This function sets `last-coding-system-used' to the precise coding system
6792 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6793 not fully specified. */)
6794 (string
, coding_system
, nocopy
, buffer
)
6795 Lisp_Object string
, coding_system
, nocopy
, buffer
;
6797 return code_convert_string (string
, coding_system
, buffer
,
6798 0, ! NILP (nocopy
), 0);
6801 DEFUN ("encode-coding-string", Fencode_coding_string
, Sencode_coding_string
,
6803 doc
: /* Encode STRING to CODING-SYSTEM, and return the result.
6805 Optional third arg NOCOPY non-nil means it is OK to return STRING
6806 itself if the encoding operation is trivial.
6808 Optional fourth arg BUFFER non-nil meant that the encoded text is
6809 inserted in BUFFER instead of returned as a astring. In this case,
6810 the return value is BUFFER.
6812 This function sets `last-coding-system-used' to the precise coding system
6813 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6814 not fully specified.) */)
6815 (string
, coding_system
, nocopy
, buffer
)
6816 Lisp_Object string
, coding_system
, nocopy
, buffer
;
6818 return code_convert_string (string
, coding_system
, buffer
,
6819 nocopy
, ! NILP (nocopy
), 1);
6823 DEFUN ("decode-sjis-char", Fdecode_sjis_char
, Sdecode_sjis_char
, 1, 1, 0,
6824 doc
: /* Decode a Japanese character which has CODE in shift_jis encoding.
6825 Return the corresponding character. */)
6829 Lisp_Object spec
, attrs
, val
;
6830 struct charset
*charset_roman
, *charset_kanji
, *charset_kana
, *charset
;
6833 CHECK_NATNUM (code
);
6834 c
= XFASTINT (code
);
6835 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system
, spec
);
6836 attrs
= AREF (spec
, 0);
6838 if (ASCII_BYTE_P (c
)
6839 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
6842 val
= CODING_ATTR_CHARSET_LIST (attrs
);
6843 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
6844 charset_kana
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
6845 charset_kanji
= CHARSET_FROM_ID (XINT (XCAR (val
)));
6848 charset
= charset_roman
;
6849 else if (c
>= 0xA0 && c
< 0xDF)
6851 charset
= charset_kana
;
6856 int s1
= c
>> 8, s2
= c
& 0xFF;
6858 if (s1
< 0x81 || (s1
> 0x9F && s1
< 0xE0) || s1
> 0xEF
6859 || s2
< 0x40 || s2
== 0x7F || s2
> 0xFC)
6860 error ("Invalid code: %d", code
);
6862 charset
= charset_kanji
;
6864 c
= DECODE_CHAR (charset
, c
);
6866 error ("Invalid code: %d", code
);
6867 return make_number (c
);
6871 DEFUN ("encode-sjis-char", Fencode_sjis_char
, Sencode_sjis_char
, 1, 1, 0,
6872 doc
: /* Encode a Japanese character CHAR to shift_jis encoding.
6873 Return the corresponding code in SJIS. */)
6877 Lisp_Object spec
, attrs
, charset_list
;
6879 struct charset
*charset
;
6882 CHECK_CHARACTER (ch
);
6884 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system
, spec
);
6885 attrs
= AREF (spec
, 0);
6887 if (ASCII_CHAR_P (c
)
6888 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
6891 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
6892 charset
= char_charset (c
, charset_list
, &code
);
6893 if (code
== CHARSET_INVALID_CODE (charset
))
6894 error ("Can't encode by shift_jis encoding: %d", c
);
6897 return make_number (code
);
6900 DEFUN ("decode-big5-char", Fdecode_big5_char
, Sdecode_big5_char
, 1, 1, 0,
6901 doc
: /* Decode a Big5 character which has CODE in BIG5 coding system.
6902 Return the corresponding character. */)
6906 Lisp_Object spec
, attrs
, val
;
6907 struct charset
*charset_roman
, *charset_big5
, *charset
;
6910 CHECK_NATNUM (code
);
6911 c
= XFASTINT (code
);
6912 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system
, spec
);
6913 attrs
= AREF (spec
, 0);
6915 if (ASCII_BYTE_P (c
)
6916 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
6919 val
= CODING_ATTR_CHARSET_LIST (attrs
);
6920 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
6921 charset_big5
= CHARSET_FROM_ID (XINT (XCAR (val
)));
6924 charset
= charset_roman
;
6927 int b1
= c
>> 8, b2
= c
& 0x7F;
6928 if (b1
< 0xA1 || b1
> 0xFE
6929 || b2
< 0x40 || (b2
> 0x7E && b2
< 0xA1) || b2
> 0xFE)
6930 error ("Invalid code: %d", code
);
6931 charset
= charset_big5
;
6933 c
= DECODE_CHAR (charset
, (unsigned )c
);
6935 error ("Invalid code: %d", code
);
6936 return make_number (c
);
6939 DEFUN ("encode-big5-char", Fencode_big5_char
, Sencode_big5_char
, 1, 1, 0,
6940 doc
: /* Encode the Big5 character CHAR to BIG5 coding system.
6941 Return the corresponding character code in Big5. */)
6945 Lisp_Object spec
, attrs
, charset_list
;
6946 struct charset
*charset
;
6950 CHECK_CHARACTER (ch
);
6952 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system
, spec
);
6953 attrs
= AREF (spec
, 0);
6954 if (ASCII_CHAR_P (c
)
6955 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
6958 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
6959 charset
= char_charset (c
, charset_list
, &code
);
6960 if (code
== CHARSET_INVALID_CODE (charset
))
6961 error ("Can't encode by Big5 encoding: %d", c
);
6963 return make_number (code
);
6967 DEFUN ("set-terminal-coding-system-internal",
6968 Fset_terminal_coding_system_internal
,
6969 Sset_terminal_coding_system_internal
, 1, 1, 0,
6970 doc
: /* Internal use only. */)
6972 Lisp_Object coding_system
;
6974 CHECK_SYMBOL (coding_system
);
6975 setup_coding_system (Fcheck_coding_system (coding_system
),
6978 /* We had better not send unsafe characters to terminal. */
6979 terminal_coding
.mode
|= CODING_MODE_SAFE_ENCODING
;
6980 /* Characer composition should be disabled. */
6981 terminal_coding
.common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
6982 terminal_coding
.src_multibyte
= 1;
6983 terminal_coding
.dst_multibyte
= 0;
6987 DEFUN ("set-safe-terminal-coding-system-internal",
6988 Fset_safe_terminal_coding_system_internal
,
6989 Sset_safe_terminal_coding_system_internal
, 1, 1, 0,
6990 doc
: /* Internal use only. */)
6992 Lisp_Object coding_system
;
6994 CHECK_SYMBOL (coding_system
);
6995 setup_coding_system (Fcheck_coding_system (coding_system
),
6996 &safe_terminal_coding
);
6997 /* Characer composition should be disabled. */
6998 safe_terminal_coding
.common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
6999 safe_terminal_coding
.src_multibyte
= 1;
7000 safe_terminal_coding
.dst_multibyte
= 0;
7004 DEFUN ("terminal-coding-system",
7005 Fterminal_coding_system
, Sterminal_coding_system
, 0, 0, 0,
7006 doc
: /* Return coding system specified for terminal output. */)
7009 return CODING_ID_NAME (terminal_coding
.id
);
7012 DEFUN ("set-keyboard-coding-system-internal",
7013 Fset_keyboard_coding_system_internal
,
7014 Sset_keyboard_coding_system_internal
, 1, 1, 0,
7015 doc
: /* Internal use only. */)
7017 Lisp_Object coding_system
;
7019 CHECK_SYMBOL (coding_system
);
7020 setup_coding_system (Fcheck_coding_system (coding_system
),
7022 /* Characer composition should be disabled. */
7023 keyboard_coding
.common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
7027 DEFUN ("keyboard-coding-system",
7028 Fkeyboard_coding_system
, Skeyboard_coding_system
, 0, 0, 0,
7029 doc
: /* Return coding system specified for decoding keyboard input. */)
7032 return CODING_ID_NAME (keyboard_coding
.id
);
7036 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system
,
7037 Sfind_operation_coding_system
, 1, MANY
, 0,
7038 doc
: /* Choose a coding system for an operation based on the target name.
7039 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7040 DECODING-SYSTEM is the coding system to use for decoding
7041 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7042 for encoding (in case OPERATION does encoding).
7044 The first argument OPERATION specifies an I/O primitive:
7045 For file I/O, `insert-file-contents' or `write-region'.
7046 For process I/O, `call-process', `call-process-region', or `start-process'.
7047 For network I/O, `open-network-stream'.
7049 The remaining arguments should be the same arguments that were passed
7050 to the primitive. Depending on which primitive, one of those arguments
7051 is selected as the TARGET. For example, if OPERATION does file I/O,
7052 whichever argument specifies the file name is TARGET.
7054 TARGET has a meaning which depends on OPERATION:
7055 For file I/O, TARGET is a file name.
7056 For process I/O, TARGET is a process name.
7057 For network I/O, TARGET is a service name or a port number
7059 This function looks up what specified for TARGET in,
7060 `file-coding-system-alist', `process-coding-system-alist',
7061 or `network-coding-system-alist' depending on OPERATION.
7062 They may specify a coding system, a cons of coding systems,
7063 or a function symbol to call.
7064 In the last case, we call the function with one argument,
7065 which is a list of all the arguments given to this function.
7067 usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
7072 Lisp_Object operation
, target_idx
, target
, val
;
7073 register Lisp_Object chain
;
7076 error ("Too few arguments");
7077 operation
= args
[0];
7078 if (!SYMBOLP (operation
)
7079 || !INTEGERP (target_idx
= Fget (operation
, Qtarget_idx
)))
7080 error ("Invalid first arguement");
7081 if (nargs
< 1 + XINT (target_idx
))
7082 error ("Too few arguments for operation: %s",
7083 XSYMBOL (operation
)->name
->data
);
7084 target
= args
[XINT (target_idx
) + 1];
7085 if (!(STRINGP (target
)
7086 || (EQ (operation
, Qopen_network_stream
) && INTEGERP (target
))))
7087 error ("Invalid %dth argument", XINT (target_idx
) + 1);
7089 chain
= ((EQ (operation
, Qinsert_file_contents
)
7090 || EQ (operation
, Qwrite_region
))
7091 ? Vfile_coding_system_alist
7092 : (EQ (operation
, Qopen_network_stream
)
7093 ? Vnetwork_coding_system_alist
7094 : Vprocess_coding_system_alist
));
7098 for (; CONSP (chain
); chain
= XCDR (chain
))
7104 && ((STRINGP (target
)
7105 && STRINGP (XCAR (elt
))
7106 && fast_string_match (XCAR (elt
), target
) >= 0)
7107 || (INTEGERP (target
) && EQ (target
, XCAR (elt
)))))
7110 /* Here, if VAL is both a valid coding system and a valid
7111 function symbol, we return VAL as a coding system. */
7114 if (! SYMBOLP (val
))
7116 if (! NILP (Fcoding_system_p (val
)))
7117 return Fcons (val
, val
);
7118 if (! NILP (Ffboundp (val
)))
7120 val
= call1 (val
, Flist (nargs
, args
));
7123 if (SYMBOLP (val
) && ! NILP (Fcoding_system_p (val
)))
7124 return Fcons (val
, val
);
7132 DEFUN ("set-coding-system-priority", Fset_coding_system_priority
,
7133 Sset_coding_system_priority
, 1, MANY
, 0,
7134 doc
: /* Assign higher priority to coding systems given as arguments.
7135 usage: (set-coding-system-priority CODING-SYSTEM ...) */)
7141 int changed
[coding_category_max
];
7142 enum coding_category priorities
[coding_category_max
];
7144 bzero (changed
, sizeof changed
);
7146 for (i
= j
= 0; i
< nargs
; i
++)
7148 enum coding_category category
;
7149 Lisp_Object spec
, attrs
;
7151 CHECK_CODING_SYSTEM_GET_SPEC (args
[i
], spec
);
7152 attrs
= AREF (spec
, 0);
7153 category
= XINT (CODING_ATTR_CATEGORY (attrs
));
7154 if (changed
[category
])
7155 /* Ignore this coding system because a coding system of the
7156 same category already had a higher priority. */
7158 changed
[category
] = 1;
7159 priorities
[j
++] = category
;
7160 if (coding_categories
[category
].id
>= 0
7161 && ! EQ (args
[i
], CODING_ID_NAME (coding_categories
[category
].id
)))
7162 setup_coding_system (args
[i
], &coding_categories
[category
]);
7165 /* Now we have decided top J priorities. Reflect the order of the
7166 original priorities to the remaining priorities. */
7168 for (i
= j
, j
= 0; i
< coding_category_max
; i
++, j
++)
7170 while (j
< coding_category_max
7171 && changed
[coding_priorities
[j
]])
7173 if (j
== coding_category_max
)
7175 priorities
[i
] = coding_priorities
[j
];
7178 bcopy (priorities
, coding_priorities
, sizeof priorities
);
7182 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list
,
7183 Scoding_system_priority_list
, 0, 1, 0,
7184 doc
: /* Return a list of coding systems ordered by their priorities. */)
7186 Lisp_Object highestp
;
7191 for (i
= 0, val
= Qnil
; i
< coding_category_max
; i
++)
7193 enum coding_category category
= coding_priorities
[i
];
7194 int id
= coding_categories
[category
].id
;
7199 attrs
= CODING_ID_ATTRS (id
);
7200 if (! NILP (highestp
))
7201 return CODING_ATTR_BASE_NAME (attrs
);
7202 val
= Fcons (CODING_ATTR_BASE_NAME (attrs
), val
);
7204 return Fnreverse (val
);
7208 make_subsidiaries (base
)
7211 Lisp_Object subsidiaries
;
7212 char *suffixes
[] = { "-unix", "-dos", "-mac" };
7213 int base_name_len
= STRING_BYTES (XSYMBOL (base
)->name
);
7214 char *buf
= (char *) alloca (base_name_len
+ 6);
7217 bcopy (XSYMBOL (base
)->name
->data
, buf
, base_name_len
);
7218 subsidiaries
= Fmake_vector (make_number (3), Qnil
);
7219 for (i
= 0; i
< 3; i
++)
7221 bcopy (suffixes
[i
], buf
+ base_name_len
, strlen (suffixes
[i
]) + 1);
7222 ASET (subsidiaries
, i
, intern (buf
));
7224 return subsidiaries
;
7228 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal
,
7229 Sdefine_coding_system_internal
, coding_arg_max
, MANY
, 0,
7230 doc
: /* For internal use only.
7231 usage: (define-coding-system-internal ...) */)
7237 Lisp_Object spec_vec
; /* [ ATTRS ALIASE EOL_TYPE ] */
7238 Lisp_Object attrs
; /* Vector of attributes. */
7239 Lisp_Object eol_type
;
7240 Lisp_Object aliases
;
7241 Lisp_Object coding_type
, charset_list
, safe_charsets
;
7242 enum coding_category category
;
7243 Lisp_Object tail
, val
;
7244 int max_charset_id
= 0;
7247 if (nargs
< coding_arg_max
)
7250 attrs
= Fmake_vector (make_number (coding_attr_last_index
), Qnil
);
7252 name
= args
[coding_arg_name
];
7253 CHECK_SYMBOL (name
);
7254 CODING_ATTR_BASE_NAME (attrs
) = name
;
7256 val
= args
[coding_arg_mnemonic
];
7257 if (! STRINGP (val
))
7258 CHECK_CHARACTER (val
);
7259 CODING_ATTR_MNEMONIC (attrs
) = val
;
7261 coding_type
= args
[coding_arg_coding_type
];
7262 CHECK_SYMBOL (coding_type
);
7263 CODING_ATTR_TYPE (attrs
) = coding_type
;
7265 charset_list
= args
[coding_arg_charset_list
];
7266 if (SYMBOLP (charset_list
))
7268 if (EQ (charset_list
, Qiso_2022
))
7270 if (! EQ (coding_type
, Qiso_2022
))
7271 error ("Invalid charset-list");
7272 charset_list
= Viso_2022_charset_list
;
7274 else if (EQ (charset_list
, Qemacs_mule
))
7276 if (! EQ (coding_type
, Qemacs_mule
))
7277 error ("Invalid charset-list");
7278 charset_list
= Vemacs_mule_charset_list
;
7280 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
7281 if (max_charset_id
< XFASTINT (XCAR (tail
)))
7282 max_charset_id
= XFASTINT (XCAR (tail
));
7286 charset_list
= Fcopy_sequence (charset_list
);
7287 for (tail
= charset_list
; !NILP (tail
); tail
= Fcdr (tail
))
7289 struct charset
*charset
;
7292 CHECK_CHARSET_GET_CHARSET (val
, charset
);
7293 if (EQ (coding_type
, Qiso_2022
)
7294 ? CHARSET_ISO_FINAL (charset
) < 0
7295 : EQ (coding_type
, Qemacs_mule
)
7296 ? CHARSET_EMACS_MULE_ID (charset
) < 0
7298 error ("Can't handle charset `%s'",
7299 XSYMBOL (CHARSET_NAME (charset
))->name
->data
);
7301 XCAR (tail
) = make_number (charset
->id
);
7302 if (max_charset_id
< charset
->id
)
7303 max_charset_id
= charset
->id
;
7306 CODING_ATTR_CHARSET_LIST (attrs
) = charset_list
;
7308 safe_charsets
= Fmake_string (make_number (max_charset_id
+ 1),
7310 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
7311 XSTRING (safe_charsets
)->data
[XFASTINT (XCAR (tail
))] = 0;
7312 CODING_ATTR_SAFE_CHARSETS (attrs
) = safe_charsets
;
7314 val
= args
[coding_arg_decode_translation_table
];
7316 CHECK_CHAR_TABLE (val
);
7317 CODING_ATTR_DECODE_TBL (attrs
) = val
;
7319 val
= args
[coding_arg_encode_translation_table
];
7321 CHECK_CHAR_TABLE (val
);
7322 CODING_ATTR_ENCODE_TBL (attrs
) = val
;
7324 val
= args
[coding_arg_post_read_conversion
];
7326 CODING_ATTR_POST_READ (attrs
) = val
;
7328 val
= args
[coding_arg_pre_write_conversion
];
7330 CODING_ATTR_PRE_WRITE (attrs
) = val
;
7332 val
= args
[coding_arg_default_char
];
7334 CODING_ATTR_DEFAULT_CHAR (attrs
) = make_number (' ');
7337 CHECK_CHARACTER (val
);
7338 CODING_ATTR_DEFAULT_CHAR (attrs
) = val
;
7341 val
= args
[coding_arg_plist
];
7343 CODING_ATTR_PLIST (attrs
) = val
;
7345 if (EQ (coding_type
, Qcharset
))
7347 val
= Fmake_vector (make_number (256), Qnil
);
7349 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
7351 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (tail
)));
7352 int idx
= (CHARSET_DIMENSION (charset
) - 1) * 4;
7354 for (i
= charset
->code_space
[idx
];
7355 i
<= charset
->code_space
[idx
+ 1]; i
++)
7357 if (NILP (AREF (val
, i
)))
7358 ASET (val
, i
, XCAR (tail
));
7360 error ("Charsets conflicts in the first byte");
7363 ASET (attrs
, coding_attr_charset_valids
, val
);
7364 category
= coding_category_charset
;
7366 else if (EQ (coding_type
, Qccl
))
7370 if (nargs
< coding_arg_ccl_max
)
7373 val
= args
[coding_arg_ccl_decoder
];
7374 CHECK_CCL_PROGRAM (val
);
7376 val
= Fcopy_sequence (val
);
7377 ASET (attrs
, coding_attr_ccl_decoder
, val
);
7379 val
= args
[coding_arg_ccl_encoder
];
7380 CHECK_CCL_PROGRAM (val
);
7382 val
= Fcopy_sequence (val
);
7383 ASET (attrs
, coding_attr_ccl_encoder
, val
);
7385 val
= args
[coding_arg_ccl_valids
];
7386 valids
= Fmake_string (make_number (256), make_number (0));
7387 for (tail
= val
; !NILP (tail
); tail
= Fcdr (tail
))
7391 ASET (valids
, XINT (val
), 1);
7397 CHECK_NUMBER (XCAR (val
));
7398 CHECK_NUMBER (XCDR (val
));
7399 from
= XINT (XCAR (val
));
7400 to
= XINT (XCDR (val
));
7401 for (i
= from
; i
<= to
; i
++)
7402 ASET (valids
, i
, 1);
7405 ASET (attrs
, coding_attr_ccl_valids
, valids
);
7407 category
= coding_category_ccl
;
7409 else if (EQ (coding_type
, Qutf_16
))
7411 Lisp_Object bom
, endian
;
7413 if (nargs
< coding_arg_utf16_max
)
7416 bom
= args
[coding_arg_utf16_bom
];
7417 if (! NILP (bom
) && ! EQ (bom
, Qt
))
7420 CHECK_CODING_SYSTEM (XCAR (bom
));
7421 CHECK_CODING_SYSTEM (XCDR (bom
));
7423 ASET (attrs
, coding_attr_utf_16_bom
, bom
);
7425 endian
= args
[coding_arg_utf16_endian
];
7426 ASET (attrs
, coding_attr_utf_16_endian
, endian
);
7428 category
= (CONSP (bom
)
7429 ? coding_category_utf_16_auto
7432 ? coding_category_utf_16_be_nosig
7433 : coding_category_utf_16_le_nosig
)
7435 ? coding_category_utf_16_be
7436 : coding_category_utf_16_le
));
7438 else if (EQ (coding_type
, Qiso_2022
))
7440 Lisp_Object initial
, reg_usage
, request
, flags
;
7443 if (nargs
< coding_arg_iso2022_max
)
7446 initial
= Fcopy_sequence (args
[coding_arg_iso2022_initial
]);
7447 CHECK_VECTOR (initial
);
7448 for (i
= 0; i
< 4; i
++)
7450 val
= Faref (initial
, make_number (i
));
7453 CHECK_CHARSET_GET_ID (val
, id
);
7454 ASET (initial
, i
, make_number (id
));
7457 ASET (initial
, i
, make_number (-1));
7460 reg_usage
= args
[coding_arg_iso2022_reg_usage
];
7461 CHECK_CONS (reg_usage
);
7462 CHECK_NATNUM (XCAR (reg_usage
));
7463 CHECK_NATNUM (XCDR (reg_usage
));
7465 request
= Fcopy_sequence (args
[coding_arg_iso2022_request
]);
7466 for (tail
= request
; ! NILP (tail
); tail
= Fcdr (tail
))
7472 CHECK_CHARSET_GET_ID (XCAR (val
), id
);
7473 CHECK_NATNUM (XCDR (val
));
7474 if (XINT (XCDR (val
)) >= 4)
7475 error ("Invalid graphic register number: %d", XINT (XCDR (val
)));
7476 XCAR (val
) = make_number (id
);
7479 flags
= args
[coding_arg_iso2022_flags
];
7480 CHECK_NATNUM (flags
);
7482 if (EQ (args
[coding_arg_charset_list
], Qiso_2022
))
7483 flags
= make_number (i
| CODING_ISO_FLAG_FULL_SUPPORT
);
7485 ASET (attrs
, coding_attr_iso_initial
, initial
);
7486 ASET (attrs
, coding_attr_iso_usage
, reg_usage
);
7487 ASET (attrs
, coding_attr_iso_request
, request
);
7488 ASET (attrs
, coding_attr_iso_flags
, flags
);
7489 setup_iso_safe_charsets (attrs
);
7491 if (i
& CODING_ISO_FLAG_SEVEN_BITS
)
7492 category
= ((i
& (CODING_ISO_FLAG_LOCKING_SHIFT
7493 | CODING_ISO_FLAG_SINGLE_SHIFT
))
7494 ? coding_category_iso_7_else
7495 : EQ (args
[coding_arg_charset_list
], Qiso_2022
)
7496 ? coding_category_iso_7
7497 : coding_category_iso_7_tight
);
7500 int id
= XINT (AREF (initial
, 1));
7502 category
= (((i
& (CODING_ISO_FLAG_LOCKING_SHIFT
7503 | CODING_ISO_FLAG_SINGLE_SHIFT
))
7504 || EQ (args
[coding_arg_charset_list
], Qiso_2022
)
7506 ? coding_category_iso_8_else
7507 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id
)) == 1)
7508 ? coding_category_iso_8_1
7509 : coding_category_iso_8_2
);
7512 else if (EQ (coding_type
, Qemacs_mule
))
7514 if (EQ (args
[coding_arg_charset_list
], Qemacs_mule
))
7515 ASET (attrs
, coding_attr_emacs_mule_full
, Qt
);
7517 category
= coding_category_emacs_mule
;
7519 else if (EQ (coding_type
, Qshift_jis
))
7522 struct charset
*charset
;
7524 if (XINT (Flength (charset_list
)) != 3)
7525 error ("There should be just three charsets");
7527 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
7528 if (CHARSET_DIMENSION (charset
) != 1)
7529 error ("Dimension of charset %s is not one",
7530 XSYMBOL (CHARSET_NAME (charset
))->name
->data
);
7532 charset_list
= XCDR (charset_list
);
7533 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
7534 if (CHARSET_DIMENSION (charset
) != 1)
7535 error ("Dimension of charset %s is not one",
7536 XSYMBOL (CHARSET_NAME (charset
))->name
->data
);
7538 charset_list
= XCDR (charset_list
);
7539 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
7540 if (CHARSET_DIMENSION (charset
) != 2)
7541 error ("Dimension of charset %s is not two",
7542 XSYMBOL (CHARSET_NAME (charset
))->name
->data
);
7544 category
= coding_category_sjis
;
7545 Vsjis_coding_system
= name
;
7547 else if (EQ (coding_type
, Qbig5
))
7549 struct charset
*charset
;
7551 if (XINT (Flength (charset_list
)) != 2)
7552 error ("There should be just two charsets");
7554 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
7555 if (CHARSET_DIMENSION (charset
) != 1)
7556 error ("Dimension of charset %s is not one",
7557 XSYMBOL (CHARSET_NAME (charset
))->name
->data
);
7559 charset_list
= XCDR (charset_list
);
7560 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
7561 if (CHARSET_DIMENSION (charset
) != 2)
7562 error ("Dimension of charset %s is not two",
7563 XSYMBOL (CHARSET_NAME (charset
))->name
->data
);
7565 category
= coding_category_big5
;
7566 Vbig5_coding_system
= name
;
7568 else if (EQ (coding_type
, Qraw_text
))
7569 category
= coding_category_raw_text
;
7570 else if (EQ (coding_type
, Qutf_8
))
7571 category
= coding_category_utf_8
;
7572 else if (EQ (coding_type
, Qundecided
))
7573 category
= coding_category_undecided
;
7575 error ("Invalid coding system type: %s",
7576 XSYMBOL (coding_type
)->name
->data
);
7578 CODING_ATTR_CATEGORY (attrs
) = make_number (category
);
7580 eol_type
= args
[coding_arg_eol_type
];
7581 if (! NILP (eol_type
)
7582 && ! EQ (eol_type
, Qunix
)
7583 && ! EQ (eol_type
, Qdos
)
7584 && ! EQ (eol_type
, Qmac
))
7585 error ("Invalid eol-type");
7587 aliases
= Fcons (name
, Qnil
);
7589 if (NILP (eol_type
))
7591 eol_type
= make_subsidiaries (name
);
7592 for (i
= 0; i
< 3; i
++)
7594 Lisp_Object this_spec
, this_name
, this_aliases
, this_eol_type
;
7596 this_name
= AREF (eol_type
, i
);
7597 this_aliases
= Fcons (this_name
, Qnil
);
7598 this_eol_type
= (i
== 0 ? Qunix
: i
== 1 ? Qdos
: Qmac
);
7599 this_spec
= Fmake_vector (make_number (3), attrs
);
7600 ASET (this_spec
, 1, this_aliases
);
7601 ASET (this_spec
, 2, this_eol_type
);
7602 Fputhash (this_name
, this_spec
, Vcoding_system_hash_table
);
7603 Vcoding_system_list
= Fcons (this_name
, Vcoding_system_list
);
7604 Vcoding_system_alist
= Fcons (Fcons (Fsymbol_name (this_name
), Qnil
),
7605 Vcoding_system_alist
);
7609 spec_vec
= Fmake_vector (make_number (3), attrs
);
7610 ASET (spec_vec
, 1, aliases
);
7611 ASET (spec_vec
, 2, eol_type
);
7613 Fputhash (name
, spec_vec
, Vcoding_system_hash_table
);
7614 Vcoding_system_list
= Fcons (name
, Vcoding_system_list
);
7615 Vcoding_system_alist
= Fcons (Fcons (Fsymbol_name (name
), Qnil
),
7616 Vcoding_system_alist
);
7619 int id
= coding_categories
[category
].id
;
7621 if (id
< 0 || EQ (name
, CODING_ID_NAME (id
)))
7622 setup_coding_system (name
, &coding_categories
[category
]);
7628 return Fsignal (Qwrong_number_of_arguments
,
7629 Fcons (intern ("define-coding-system-internal"),
7630 make_number (nargs
)));
7633 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias
,
7634 Sdefine_coding_system_alias
, 2, 2, 0,
7635 doc
: /* Define ALIAS as an alias for CODING-SYSTEM. */)
7636 (alias
, coding_system
)
7637 Lisp_Object alias
, coding_system
;
7639 Lisp_Object spec
, aliases
, eol_type
;
7641 CHECK_SYMBOL (alias
);
7642 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
7643 aliases
= AREF (spec
, 1);
7644 while (!NILP (XCDR (aliases
)))
7645 aliases
= XCDR (aliases
);
7646 XCDR (aliases
) = Fcons (alias
, Qnil
);
7648 eol_type
= AREF (spec
, 2);
7649 if (VECTORP (eol_type
))
7651 Lisp_Object subsidiaries
;
7654 subsidiaries
= make_subsidiaries (alias
);
7655 for (i
= 0; i
< 3; i
++)
7656 Fdefine_coding_system_alias (AREF (subsidiaries
, i
),
7657 AREF (eol_type
, i
));
7659 ASET (spec
, 2, subsidiaries
);
7662 Fputhash (alias
, spec
, Vcoding_system_hash_table
);
7663 Vcoding_system_alist
= Fcons (Fcons (Fsymbol_name (alias
), Qnil
),
7664 Vcoding_system_alist
);
7669 DEFUN ("coding-system-base", Fcoding_system_base
, Scoding_system_base
,
7671 doc
: /* Return the base of CODING-SYSTEM.
7672 Any alias or subsidiary coding systems are not base coding system. */)
7674 Lisp_Object coding_system
;
7676 Lisp_Object spec
, attrs
;
7678 if (NILP (coding_system
))
7679 return (Qno_conversion
);
7680 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
7681 attrs
= AREF (spec
, 0);
7682 return CODING_ATTR_BASE_NAME (attrs
);
7685 DEFUN ("coding-system-plist", Fcoding_system_plist
, Scoding_system_plist
,
7687 doc
: "Return the property list of CODING-SYSTEM.")
7689 Lisp_Object coding_system
;
7691 Lisp_Object spec
, attrs
;
7693 if (NILP (coding_system
))
7694 coding_system
= Qno_conversion
;
7695 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
7696 attrs
= AREF (spec
, 0);
7697 return CODING_ATTR_PLIST (attrs
);
7701 DEFUN ("coding-system-aliases", Fcoding_system_aliases
, Scoding_system_aliases
,
7703 doc
: /* Return the list of aliases of CODING-SYSTEM.
7704 A base coding system is what made by `define-coding-system'.
7705 Any alias nor subsidiary coding systems are not base coding system. */)
7707 Lisp_Object coding_system
;
7711 if (NILP (coding_system
))
7712 coding_system
= Qno_conversion
;
7713 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
7714 return AREF (spec
, 2);
7717 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type
,
7718 Scoding_system_eol_type
, 1, 1, 0,
7719 doc
: /* Return eol-type of CODING-SYSTEM.
7720 An eol-type is integer 0, 1, 2, or a vector of coding systems.
7722 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
7723 and CR respectively.
7725 A vector value indicates that a format of end-of-line should be
7726 detected automatically. Nth element of the vector is the subsidiary
7727 coding system whose eol-type is N. */)
7729 Lisp_Object coding_system
;
7731 Lisp_Object spec
, eol_type
;
7734 if (NILP (coding_system
))
7735 coding_system
= Qno_conversion
;
7736 if (! CODING_SYSTEM_P (coding_system
))
7738 spec
= CODING_SYSTEM_SPEC (coding_system
);
7739 eol_type
= AREF (spec
, 2);
7740 if (VECTORP (eol_type
))
7741 return Fcopy_sequence (eol_type
);
7742 n
= EQ (eol_type
, Qunix
) ? 0 : EQ (eol_type
, Qdos
) ? 1 : 2;
7743 return make_number (n
);
7749 /*** 9. Post-amble ***/
7756 for (i
= 0; i
< coding_category_max
; i
++)
7758 coding_categories
[i
].id
= -1;
7759 coding_priorities
[i
] = i
;
7762 /* ISO2022 specific initialize routine. */
7763 for (i
= 0; i
< 0x20; i
++)
7764 iso_code_class
[i
] = ISO_control_0
;
7765 for (i
= 0x21; i
< 0x7F; i
++)
7766 iso_code_class
[i
] = ISO_graphic_plane_0
;
7767 for (i
= 0x80; i
< 0xA0; i
++)
7768 iso_code_class
[i
] = ISO_control_1
;
7769 for (i
= 0xA1; i
< 0xFF; i
++)
7770 iso_code_class
[i
] = ISO_graphic_plane_1
;
7771 iso_code_class
[0x20] = iso_code_class
[0x7F] = ISO_0x20_or_0x7F
;
7772 iso_code_class
[0xA0] = iso_code_class
[0xFF] = ISO_0xA0_or_0xFF
;
7773 iso_code_class
[ISO_CODE_CR
] = ISO_carriage_return
;
7774 iso_code_class
[ISO_CODE_SO
] = ISO_shift_out
;
7775 iso_code_class
[ISO_CODE_SI
] = ISO_shift_in
;
7776 iso_code_class
[ISO_CODE_SS2_7
] = ISO_single_shift_2_7
;
7777 iso_code_class
[ISO_CODE_ESC
] = ISO_escape
;
7778 iso_code_class
[ISO_CODE_SS2
] = ISO_single_shift_2
;
7779 iso_code_class
[ISO_CODE_SS3
] = ISO_single_shift_3
;
7780 iso_code_class
[ISO_CODE_CSI
] = ISO_control_sequence_introducer
;
7782 inhibit_pre_post_conversion
= 0;
7784 for (i
= 0; i
< 256; i
++)
7786 emacs_mule_bytes
[i
] = 1;
7795 staticpro (&Vcoding_system_hash_table
);
7796 Vcoding_system_hash_table
= Fmakehash (Qeq
);
7798 staticpro (&Vsjis_coding_system
);
7799 Vsjis_coding_system
= Qnil
;
7801 staticpro (&Vbig5_coding_system
);
7802 Vbig5_coding_system
= Qnil
;
7804 staticpro (&Vcode_conversion_work_buf_list
);
7805 Vcode_conversion_work_buf_list
= Qnil
;
7807 staticpro (&Vcode_conversion_reused_work_buf
);
7808 Vcode_conversion_reused_work_buf
= Qnil
;
7810 DEFSYM (Qcharset
, "charset");
7811 DEFSYM (Qtarget_idx
, "target-idx");
7812 DEFSYM (Qcoding_system_history
, "coding-system-history");
7813 Fset (Qcoding_system_history
, Qnil
);
7815 /* Target FILENAME is the first argument. */
7816 Fput (Qinsert_file_contents
, Qtarget_idx
, make_number (0));
7817 /* Target FILENAME is the third argument. */
7818 Fput (Qwrite_region
, Qtarget_idx
, make_number (2));
7820 DEFSYM (Qcall_process
, "call-process");
7821 /* Target PROGRAM is the first argument. */
7822 Fput (Qcall_process
, Qtarget_idx
, make_number (0));
7824 DEFSYM (Qcall_process_region
, "call-process-region");
7825 /* Target PROGRAM is the third argument. */
7826 Fput (Qcall_process_region
, Qtarget_idx
, make_number (2));
7828 DEFSYM (Qstart_process
, "start-process");
7829 /* Target PROGRAM is the third argument. */
7830 Fput (Qstart_process
, Qtarget_idx
, make_number (2));
7832 DEFSYM (Qopen_network_stream
, "open-network-stream");
7833 /* Target SERVICE is the fourth argument. */
7834 Fput (Qopen_network_stream
, Qtarget_idx
, make_number (3));
7836 DEFSYM (Qcoding_system
, "coding-system");
7837 DEFSYM (Qcoding_aliases
, "coding-aliases");
7839 DEFSYM (Qeol_type
, "eol-type");
7840 DEFSYM (Qunix
, "unix");
7841 DEFSYM (Qdos
, "dos");
7842 DEFSYM (Qmac
, "mac");
7844 DEFSYM (Qbuffer_file_coding_system
, "buffer-file-coding-system");
7845 DEFSYM (Qpost_read_conversion
, "post-read-conversion");
7846 DEFSYM (Qpre_write_conversion
, "pre-write-conversion");
7847 DEFSYM (Qdefault_char
, "default-char");
7848 DEFSYM (Qundecided
, "undecided");
7849 DEFSYM (Qno_conversion
, "no-conversion");
7850 DEFSYM (Qraw_text
, "raw-text");
7852 DEFSYM (Qiso_2022
, "iso-2022");
7854 DEFSYM (Qutf_8
, "utf-8");
7856 DEFSYM (Qutf_16
, "utf-16");
7857 DEFSYM (Qutf_16_be
, "utf-16-be");
7858 DEFSYM (Qutf_16_be_nosig
, "utf-16-be-nosig");
7859 DEFSYM (Qutf_16_le
, "utf-16-l3");
7860 DEFSYM (Qutf_16_le_nosig
, "utf-16-le-nosig");
7861 DEFSYM (Qsignature
, "signature");
7862 DEFSYM (Qendian
, "endian");
7863 DEFSYM (Qbig
, "big");
7864 DEFSYM (Qlittle
, "little");
7866 DEFSYM (Qshift_jis
, "shift-jis");
7867 DEFSYM (Qbig5
, "big5");
7869 DEFSYM (Qcoding_system_p
, "coding-system-p");
7871 DEFSYM (Qcoding_system_error
, "coding-system-error");
7872 Fput (Qcoding_system_error
, Qerror_conditions
,
7873 Fcons (Qcoding_system_error
, Fcons (Qerror
, Qnil
)));
7874 Fput (Qcoding_system_error
, Qerror_message
,
7875 build_string ("Invalid coding system"));
7877 /* Intern this now in case it isn't already done.
7878 Setting this variable twice is harmless.
7879 But don't staticpro it here--that is done in alloc.c. */
7880 Qchar_table_extra_slots
= intern ("char-table-extra-slots");
7882 DEFSYM (Qtranslation_table
, "translation-table");
7883 Fput (Qtranslation_table
, Qchar_table_extra_slots
, make_number (1));
7884 DEFSYM (Qtranslation_table_id
, "translation-table-id");
7885 DEFSYM (Qtranslation_table_for_decode
, "translation-table-for-decode");
7886 DEFSYM (Qtranslation_table_for_encode
, "translation-table-for-encode");
7888 DEFSYM (Qvalid_codes
, "valid-codes");
7890 DEFSYM (Qemacs_mule
, "emacs-mule");
7892 Vcoding_category_table
7893 = Fmake_vector (make_number (coding_category_max
), Qnil
);
7894 staticpro (&Vcoding_category_table
);
7895 /* Followings are target of code detection. */
7896 ASET (Vcoding_category_table
, coding_category_iso_7
,
7897 intern ("coding-category-iso-7"));
7898 ASET (Vcoding_category_table
, coding_category_iso_7_tight
,
7899 intern ("coding-category-iso-7-tight"));
7900 ASET (Vcoding_category_table
, coding_category_iso_8_1
,
7901 intern ("coding-category-iso-8-1"));
7902 ASET (Vcoding_category_table
, coding_category_iso_8_2
,
7903 intern ("coding-category-iso-8-2"));
7904 ASET (Vcoding_category_table
, coding_category_iso_7_else
,
7905 intern ("coding-category-iso-7-else"));
7906 ASET (Vcoding_category_table
, coding_category_iso_8_else
,
7907 intern ("coding-category-iso-8-else"));
7908 ASET (Vcoding_category_table
, coding_category_utf_8
,
7909 intern ("coding-category-utf-8"));
7910 ASET (Vcoding_category_table
, coding_category_utf_16_be
,
7911 intern ("coding-category-utf-16-be"));
7912 ASET (Vcoding_category_table
, coding_category_utf_16_le
,
7913 intern ("coding-category-utf-16-le"));
7914 ASET (Vcoding_category_table
, coding_category_utf_16_be_nosig
,
7915 intern ("coding-category-utf-16-be-nosig"));
7916 ASET (Vcoding_category_table
, coding_category_utf_16_le_nosig
,
7917 intern ("coding-category-utf-16-le-nosig"));
7918 ASET (Vcoding_category_table
, coding_category_charset
,
7919 intern ("coding-category-charset"));
7920 ASET (Vcoding_category_table
, coding_category_sjis
,
7921 intern ("coding-category-sjis"));
7922 ASET (Vcoding_category_table
, coding_category_big5
,
7923 intern ("coding-category-big5"));
7924 ASET (Vcoding_category_table
, coding_category_ccl
,
7925 intern ("coding-category-ccl"));
7926 ASET (Vcoding_category_table
, coding_category_emacs_mule
,
7927 intern ("coding-category-emacs-mule"));
7928 /* Followings are NOT target of code detection. */
7929 ASET (Vcoding_category_table
, coding_category_raw_text
,
7930 intern ("coding-category-raw-text"));
7931 ASET (Vcoding_category_table
, coding_category_undecided
,
7932 intern ("coding-category-undecided"));
7934 defsubr (&Scoding_system_p
);
7935 defsubr (&Sread_coding_system
);
7936 defsubr (&Sread_non_nil_coding_system
);
7937 defsubr (&Scheck_coding_system
);
7938 defsubr (&Sdetect_coding_region
);
7939 defsubr (&Sdetect_coding_string
);
7940 defsubr (&Sfind_coding_systems_region_internal
);
7941 defsubr (&Scheck_coding_systems_region
);
7942 defsubr (&Sdecode_coding_region
);
7943 defsubr (&Sencode_coding_region
);
7944 defsubr (&Sdecode_coding_string
);
7945 defsubr (&Sencode_coding_string
);
7946 defsubr (&Sdecode_sjis_char
);
7947 defsubr (&Sencode_sjis_char
);
7948 defsubr (&Sdecode_big5_char
);
7949 defsubr (&Sencode_big5_char
);
7950 defsubr (&Sset_terminal_coding_system_internal
);
7951 defsubr (&Sset_safe_terminal_coding_system_internal
);
7952 defsubr (&Sterminal_coding_system
);
7953 defsubr (&Sset_keyboard_coding_system_internal
);
7954 defsubr (&Skeyboard_coding_system
);
7955 defsubr (&Sfind_operation_coding_system
);
7956 defsubr (&Sset_coding_system_priority
);
7957 defsubr (&Sdefine_coding_system_internal
);
7958 defsubr (&Sdefine_coding_system_alias
);
7959 defsubr (&Scoding_system_base
);
7960 defsubr (&Scoding_system_plist
);
7961 defsubr (&Scoding_system_aliases
);
7962 defsubr (&Scoding_system_eol_type
);
7963 defsubr (&Scoding_system_priority_list
);
7965 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list
,
7966 doc
: /* List of coding systems.
7968 Do not alter the value of this variable manually. This variable should be
7969 updated by the functions `define-coding-system' and
7970 `define-coding-system-alias'. */);
7971 Vcoding_system_list
= Qnil
;
7973 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist
,
7974 doc
: /* Alist of coding system names.
7975 Each element is one element list of coding system name.
7976 This variable is given to `completing-read' as TABLE argument.
7978 Do not alter the value of this variable manually. This variable should be
7979 updated by the functions `make-coding-system' and
7980 `define-coding-system-alias'. */);
7981 Vcoding_system_alist
= Qnil
;
7983 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list
,
7984 doc
: /* List of coding-categories (symbols) ordered by priority.
7986 On detecting a coding system, Emacs tries code detection algorithms
7987 associated with each coding-category one by one in this order. When
7988 one algorithm agrees with a byte sequence of source text, the coding
7989 system bound to the corresponding coding-category is selected. */);
7993 Vcoding_category_list
= Qnil
;
7994 for (i
= coding_category_max
- 1; i
>= 0; i
--)
7995 Vcoding_category_list
7996 = Fcons (XVECTOR (Vcoding_category_table
)->contents
[i
],
7997 Vcoding_category_list
);
8000 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read
,
8001 doc
: /* Specify the coding system for read operations.
8002 It is useful to bind this variable with `let', but do not set it globally.
8003 If the value is a coding system, it is used for decoding on read operation.
8004 If not, an appropriate element is used from one of the coding system alists:
8005 There are three such tables, `file-coding-system-alist',
8006 `process-coding-system-alist', and `network-coding-system-alist'. */);
8007 Vcoding_system_for_read
= Qnil
;
8009 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write
,
8010 doc
: /* Specify the coding system for write operations.
8011 Programs bind this variable with `let', but you should not set it globally.
8012 If the value is a coding system, it is used for encoding of output,
8013 when writing it to a file and when sending it to a file or subprocess.
8015 If this does not specify a coding system, an appropriate element
8016 is used from one of the coding system alists:
8017 There are three such tables, `file-coding-system-alist',
8018 `process-coding-system-alist', and `network-coding-system-alist'.
8019 For output to files, if the above procedure does not specify a coding system,
8020 the value of `buffer-file-coding-system' is used. */);
8021 Vcoding_system_for_write
= Qnil
;
8023 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used
,
8025 Coding system used in the latest file or process I/O. */);
8026 Vlast_coding_system_used
= Qnil
;
8028 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion
,
8030 *Non-nil means always inhibit code conversion of end-of-line format.
8031 See info node `Coding Systems' and info node `Text and Binary' concerning
8032 such conversion. */);
8033 inhibit_eol_conversion
= 0;
8035 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system
,
8037 Non-nil means process buffer inherits coding system of process output.
8038 Bind it to t if the process output is to be treated as if it were a file
8039 read from some filesystem. */);
8040 inherit_process_coding_system
= 0;
8042 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist
,
8044 Alist to decide a coding system to use for a file I/O operation.
8045 The format is ((PATTERN . VAL) ...),
8046 where PATTERN is a regular expression matching a file name,
8047 VAL is a coding system, a cons of coding systems, or a function symbol.
8048 If VAL is a coding system, it is used for both decoding and encoding
8050 If VAL is a cons of coding systems, the car part is used for decoding,
8051 and the cdr part is used for encoding.
8052 If VAL is a function symbol, the function must return a coding system
8053 or a cons of coding systems which are used as above. The function gets
8054 the arguments with which `find-operation-coding-systems' was called.
8056 See also the function `find-operation-coding-system'
8057 and the variable `auto-coding-alist'. */);
8058 Vfile_coding_system_alist
= Qnil
;
8060 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist
,
8062 Alist to decide a coding system to use for a process I/O operation.
8063 The format is ((PATTERN . VAL) ...),
8064 where PATTERN is a regular expression matching a program name,
8065 VAL is a coding system, a cons of coding systems, or a function symbol.
8066 If VAL is a coding system, it is used for both decoding what received
8067 from the program and encoding what sent to the program.
8068 If VAL is a cons of coding systems, the car part is used for decoding,
8069 and the cdr part is used for encoding.
8070 If VAL is a function symbol, the function must return a coding system
8071 or a cons of coding systems which are used as above.
8073 See also the function `find-operation-coding-system'. */);
8074 Vprocess_coding_system_alist
= Qnil
;
8076 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist
,
8078 Alist to decide a coding system to use for a network I/O operation.
8079 The format is ((PATTERN . VAL) ...),
8080 where PATTERN is a regular expression matching a network service name
8081 or is a port number to connect to,
8082 VAL is a coding system, a cons of coding systems, or a function symbol.
8083 If VAL is a coding system, it is used for both decoding what received
8084 from the network stream and encoding what sent to the network stream.
8085 If VAL is a cons of coding systems, the car part is used for decoding,
8086 and the cdr part is used for encoding.
8087 If VAL is a function symbol, the function must return a coding system
8088 or a cons of coding systems which are used as above.
8090 See also the function `find-operation-coding-system'. */);
8091 Vnetwork_coding_system_alist
= Qnil
;
8093 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system
,
8094 doc
: /* Coding system to use with system messages.
8095 Also used for decoding keyboard input on X Window system. */);
8096 Vlocale_coding_system
= Qnil
;
8098 /* The eol mnemonics are reset in startup.el system-dependently. */
8099 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix
,
8101 *String displayed in mode line for UNIX-like (LF) end-of-line format. */);
8102 eol_mnemonic_unix
= build_string (":");
8104 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos
,
8106 *String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
8107 eol_mnemonic_dos
= build_string ("\\");
8109 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac
,
8111 *String displayed in mode line for MAC-like (CR) end-of-line format. */);
8112 eol_mnemonic_mac
= build_string ("/");
8114 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided
,
8116 *String displayed in mode line when end-of-line format is not yet determined. */);
8117 eol_mnemonic_undecided
= build_string (":");
8119 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation
,
8121 *Non-nil enables character translation while encoding and decoding. */);
8122 Venable_character_translation
= Qt
;
8124 DEFVAR_LISP ("standard-translation-table-for-decode",
8125 &Vstandard_translation_table_for_decode
,
8126 doc
: /* Table for translating characters while decoding. */);
8127 Vstandard_translation_table_for_decode
= Qnil
;
8129 DEFVAR_LISP ("standard-translation-table-for-encode",
8130 &Vstandard_translation_table_for_encode
,
8131 doc
: /* Table for translating characters while encoding. */);
8132 Vstandard_translation_table_for_encode
= Qnil
;
8134 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table
,
8135 doc
: /* Alist of charsets vs revision numbers.
8136 While encoding, if a charset (car part of an element) is found,
8137 designate it with the escape sequence identifying revision (cdr part
8138 of the element). */);
8139 Vcharset_revision_table
= Qnil
;
8141 DEFVAR_LISP ("default-process-coding-system",
8142 &Vdefault_process_coding_system
,
8143 doc
: /* Cons of coding systems used for process I/O by default.
8144 The car part is used for decoding a process output,
8145 the cdr part is used for encoding a text to be sent to a process. */);
8146 Vdefault_process_coding_system
= Qnil
;
8148 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table
,
8150 Table of extra Latin codes in the range 128..159 (inclusive).
8151 This is a vector of length 256.
8152 If Nth element is non-nil, the existence of code N in a file
8153 \(or output of subprocess) doesn't prevent it to be detected as
8154 a coding system of ISO 2022 variant which has a flag
8155 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
8156 or reading output of a subprocess.
8157 Only 128th through 159th elements has a meaning. */);
8158 Vlatin_extra_code_table
= Fmake_vector (make_number (256), Qnil
);
8160 DEFVAR_LISP ("select-safe-coding-system-function",
8161 &Vselect_safe_coding_system_function
,
8163 Function to call to select safe coding system for encoding a text.
8165 If set, this function is called to force a user to select a proper
8166 coding system which can encode the text in the case that a default
8167 coding system used in each operation can't encode the text.
8169 The default value is `select-safe-coding-system' (which see). */);
8170 Vselect_safe_coding_system_function
= Qnil
;
8172 DEFVAR_BOOL ("inhibit-iso-escape-detection",
8173 &inhibit_iso_escape_detection
,
8175 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
8177 By default, on reading a file, Emacs tries to detect how the text is
8178 encoded. This code detection is sensitive to escape sequences. If
8179 the sequence is valid as ISO2022, the code is determined as one of
8180 the ISO2022 encodings, and the file is decoded by the corresponding
8181 coding system (e.g. `iso-2022-7bit').
8183 However, there may be a case that you want to read escape sequences in
8184 a file as is. In such a case, you can set this variable to non-nil.
8185 Then, as the code detection ignores any escape sequences, no file is
8186 detected as encoded in some ISO2022 encoding. The result is that all
8187 escape sequences become visible in a buffer.
8189 The default value is nil, and it is strongly recommended not to change
8190 it. That is because many Emacs Lisp source files that contain
8191 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
8192 in Emacs's distribution, and they won't be decoded correctly on
8193 reading if you suppress escape sequence detection.
8195 The other way to read escape sequences in a file without decoding is
8196 to explicitly specify some coding system that doesn't use ISO2022's
8197 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
8198 inhibit_iso_escape_detection
= 0;
8201 Lisp_Object args
[coding_arg_max
];
8202 Lisp_Object plist
[14];
8205 for (i
= 0; i
< coding_arg_max
; i
++)
8208 plist
[0] = intern (":name");
8209 plist
[1] = args
[coding_arg_name
] = Qno_conversion
;
8210 plist
[2] = intern (":mnemonic");
8211 plist
[3] = args
[coding_arg_mnemonic
] = make_number ('=');
8212 plist
[4] = intern (":coding-type");
8213 plist
[5] = args
[coding_arg_coding_type
] = Qraw_text
;
8214 plist
[6] = intern (":ascii-compatible-p");
8215 plist
[7] = args
[coding_arg_ascii_compatible_p
] = Qt
;
8216 plist
[8] = intern (":default-char");
8217 plist
[9] = args
[coding_arg_default_char
] = make_number (0);
8218 plist
[10] = intern (":docstring");
8219 plist
[11] = build_string ("Do no conversion.\n\
8221 When you visit a file with this coding, the file is read into a\n\
8222 unibyte buffer as is, thus each byte of a file is treated as a\n\
8224 plist
[12] = intern (":eol-type");
8225 plist
[13] = args
[coding_arg_eol_type
] = Qunix
;
8226 args
[coding_arg_plist
] = Flist (14, plist
);
8227 Fdefine_coding_system_internal (coding_arg_max
, args
);
8230 setup_coding_system (Qno_conversion
, &keyboard_coding
);
8231 setup_coding_system (Qno_conversion
, &terminal_coding
);
8232 setup_coding_system (Qno_conversion
, &safe_terminal_coding
);
8236 emacs_strerror (error_number
)
8241 synchronize_system_messages_locale ();
8242 str
= strerror (error_number
);
8244 if (! NILP (Vlocale_coding_system
))
8246 Lisp_Object dec
= code_convert_string_norecord (build_string (str
),
8247 Vlocale_coding_system
,
8249 str
= (char *) XSTRING (dec
)->data
;