1 /* Coding system handler (conversion, detection, etc).
2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
4 Copyright (C) 2001, 2002 Free Software Foundation, Inc.
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
9 This file is part of GNU Emacs.
11 GNU Emacs is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2, or (at your option)
16 GNU Emacs is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GNU Emacs; see the file COPYING. If not, write to
23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 Boston, MA 02111-1307, USA. */
26 /*** TABLE OF CONTENTS ***
30 2. Emacs' internal format (emacs-utf-8) handlers
33 5. Charset-base coding systems handlers
34 6. emacs-mule (old Emacs' internal format) handlers
36 8. Shift-JIS and BIG5 handlers
38 10. C library functions
39 11. Emacs Lisp library functions
44 /*** 0. General comments ***
49 A coding system is an object for an encoding mechanism that contains
50 information about how to convert byte sequences to character
51 sequences and vice versa. When we say "decode", it means converting
52 a byte sequence of a specific coding system into a character
53 sequence that is represented by Emacs' internal coding system
54 `emacs-utf-8', and when we say "encode", it means converting a
55 character sequence of emacs-utf-8 to a byte sequence of a specific
58 In Emacs Lisp, a coding system is represented by a Lisp symbol. In
59 C level, a coding system is represented by a vector of attributes
60 stored in the hash table Vcharset_hash_table. The conversion from
61 coding system symbol to attributes vector is done by looking up
62 Vcharset_hash_table by the symbol.
64 Coding systems are classified into the following types depending on
65 the encoding mechanism. Here's a brief description of the types.
71 o Charset-base coding system
73 A coding system defined by one or more (coded) character sets.
74 Decoding and encoding are done by a code converter defined for each
77 o Old Emacs internal format (emacs-mule)
79 The coding system adopted by old versions of Emacs (20 and 21).
81 o ISO2022-base coding system
83 The most famous coding system for multiple character sets. X's
84 Compound Text, various EUCs (Extended Unix Code), and coding systems
85 used in the Internet communication such as ISO-2022-JP are all
88 o SJIS (or Shift-JIS or MS-Kanji-Code)
90 A coding system to encode character sets: ASCII, JISX0201, and
91 JISX0208. Widely used for PC's in Japan. Details are described in
96 A coding system to encode character sets: ASCII and Big5. Widely
97 used for Chinese (mainly in Taiwan and Hong Kong). Details are
98 described in section 8. In this file, when we write "big5" (all
99 lowercase), we mean the coding system, and when we write "Big5"
100 (capitalized), we mean the character set.
104 If a user wants to decode/encode text encoded in a coding system
105 not listed above, he can supply a decoder and an encoder for it in
106 CCL (Code Conversion Language) programs. Emacs executes the CCL
107 program while decoding/encoding.
111 A coding system for text containing raw eight-bit data. Emacs
112 treats each byte of source text as a character (except for
113 end-of-line conversion).
117 Like raw text, but don't do end-of-line conversion.
122 How text end-of-line is encoded depends on operating system. For
123 instance, Unix's format is just one byte of LF (line-feed) code,
124 whereas DOS's format is two-byte sequence of `carriage-return' and
125 `line-feed' codes. MacOS's format is usually one byte of
128 Since text character encoding and end-of-line encoding are
129 independent, any coding system described above can take any format
130 of end-of-line (except for no-conversion).
134 Before using a coding system for code conversion (i.e. decoding and
135 encoding), we setup a structure of type `struct coding_system'.
136 This structure keeps various information about a specific code
137 conversion (e.g. the location of source and destination data).
144 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
146 These functions check if a byte sequence specified as a source in
147 CODING conforms to the format of XXX, and update the members of
150 Return 1 if the byte sequence conforms to XXX, otherwise return 0.
152 Below is the template of these functions. */
156 detect_coding_XXX (coding
, detect_info
)
157 struct coding_system
*coding
;
158 struct coding_detection_info
*detect_info
;
160 unsigned char *src
= coding
->source
;
161 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
162 int multibytep
= coding
->src_multibyte
;
163 int consumed_chars
= 0;
169 /* Get one byte from the source. If the souce is exausted, jump
170 to no_more_source:. */
173 if (! __C_conforms_to_XXX___ (c
))
175 if (! __C_strongly_suggests_XXX__ (c
))
176 found
= CATEGORY_MASK_XXX
;
178 /* The byte sequence is invalid for XXX. */
179 detect_info
->rejected
|= CATEGORY_MASK_XXX
;
183 /* The source exausted successfully. */
184 detect_info
->found
|= found
;
189 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
191 These functions decode a byte sequence specified as a source by
192 CODING. The resulting multibyte text goes to a place pointed to by
193 CODING->charbuf, the length of which should not exceed
194 CODING->charbuf_size;
196 These functions set the information of original and decoded texts in
197 CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
198 They also set CODING->result to one of CODING_RESULT_XXX indicating
199 how the decoding is finished.
201 Below is the template of these functions. */
205 decode_coding_XXXX (coding
)
206 struct coding_system
*coding
;
208 unsigned char *src
= coding
->source
+ coding
->consumed
;
209 unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
210 /* SRC_BASE remembers the start position in source in each loop.
211 The loop will be exited when there's not enough source code, or
212 when there's no room in CHARBUF for a decoded character. */
213 unsigned char *src_base
;
214 /* A buffer to produce decoded characters. */
215 int *charbuf
= coding
->charbuf
+ coding
->charbuf_used
;
216 int *charbuf_end
= coding
->charbuf
+ coding
->charbuf_size
;
217 int multibytep
= coding
->src_multibyte
;
222 if (charbuf
< charbuf_end
)
223 /* No more room to produce a decoded character. */
230 if (src_base
< src_end
231 && coding
->mode
& CODING_MODE_LAST_BLOCK
)
232 /* If the source ends by partial bytes to construct a character,
233 treat them as eight-bit raw data. */
234 while (src_base
< src_end
&& charbuf
< charbuf_end
)
235 *charbuf
++ = *src_base
++;
236 /* Remember how many bytes and characters we consumed. If the
237 source is multibyte, the bytes and chars are not identical. */
238 coding
->consumed
= coding
->consumed_char
= src_base
- coding
->source
;
239 /* Remember how many characters we produced. */
240 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
244 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
246 These functions encode SRC_BYTES length text at SOURCE of Emacs'
247 internal multibyte format by CODING. The resulting byte sequence
248 goes to a place pointed to by DESTINATION, the length of which
249 should not exceed DST_BYTES.
251 These functions set the information of original and encoded texts in
252 the members produced, produced_char, consumed, and consumed_char of
253 the structure *CODING. They also set the member result to one of
254 CODING_RESULT_XXX indicating how the encoding finished.
256 DST_BYTES zero means that source area and destination area are
257 overlapped, which means that we can produce a encoded text until it
258 reaches at the head of not-yet-encoded source text.
260 Below is a template of these functions. */
263 encode_coding_XXX (coding
)
264 struct coding_system
*coding
;
266 int multibytep
= coding
->dst_multibyte
;
267 int *charbuf
= coding
->charbuf
;
268 int *charbuf_end
= charbuf
->charbuf
+ coding
->charbuf_used
;
269 unsigned char *dst
= coding
->destination
+ coding
->produced
;
270 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
271 unsigned char *adjusted_dst_end
= dst_end
- _MAX_BYTES_PRODUCED_IN_LOOP_
;
272 int produced_chars
= 0;
274 for (; charbuf
< charbuf_end
&& dst
< adjusted_dst_end
; charbuf
++)
277 /* Encode C into DST, and increment DST. */
279 label_no_more_destination
:
280 /* How many chars and bytes we produced. */
281 coding
->produced_char
+= produced_chars
;
282 coding
->produced
= dst
- coding
->destination
;
287 /*** 1. Preamble ***/
294 #include "character.h"
297 #include "composite.h"
301 Lisp_Object Vcoding_system_hash_table
;
303 Lisp_Object Qcoding_system
, Qcoding_aliases
, Qeol_type
;
304 Lisp_Object Qunix
, Qdos
;
305 extern Lisp_Object Qmac
; /* frame.c */
306 Lisp_Object Qbuffer_file_coding_system
;
307 Lisp_Object Qpost_read_conversion
, Qpre_write_conversion
;
308 Lisp_Object Qdefault_char
;
309 Lisp_Object Qno_conversion
, Qundecided
;
310 Lisp_Object Qcharset
, Qiso_2022
, Qutf_8
, Qutf_16
, Qshift_jis
, Qbig5
;
311 Lisp_Object Qbig
, Qlittle
;
312 Lisp_Object Qcoding_system_history
;
313 Lisp_Object Qvalid_codes
;
314 Lisp_Object QCcategory
, QCmnemonic
, QCdefalut_char
;
315 Lisp_Object QCdecode_translation_table
, QCencode_translation_table
;
316 Lisp_Object QCpost_read_conversion
, QCpre_write_conversion
;
318 extern Lisp_Object Qinsert_file_contents
, Qwrite_region
;
319 Lisp_Object Qcall_process
, Qcall_process_region
, Qprocess_argument
;
320 Lisp_Object Qstart_process
, Qopen_network_stream
;
321 Lisp_Object Qtarget_idx
;
323 Lisp_Object Qinsufficient_source
, Qinconsistent_eol
, Qinvalid_source
;
324 Lisp_Object Qinterrupted
, Qinsufficient_memory
;
326 int coding_system_require_warning
;
328 Lisp_Object Vselect_safe_coding_system_function
;
330 /* Mnemonic string for each format of end-of-line. */
331 Lisp_Object eol_mnemonic_unix
, eol_mnemonic_dos
, eol_mnemonic_mac
;
332 /* Mnemonic string to indicate format of end-of-line is not yet
334 Lisp_Object eol_mnemonic_undecided
;
338 Lisp_Object Vcoding_system_list
, Vcoding_system_alist
;
340 Lisp_Object Qcoding_system_p
, Qcoding_system_error
;
342 /* Coding system emacs-mule and raw-text are for converting only
343 end-of-line format. */
344 Lisp_Object Qemacs_mule
, Qraw_text
;
345 Lisp_Object Qutf_8_emacs
;
347 /* Coding-systems are handed between Emacs Lisp programs and C internal
348 routines by the following three variables. */
349 /* Coding-system for reading files and receiving data from process. */
350 Lisp_Object Vcoding_system_for_read
;
351 /* Coding-system for writing files and sending data to process. */
352 Lisp_Object Vcoding_system_for_write
;
353 /* Coding-system actually used in the latest I/O. */
354 Lisp_Object Vlast_coding_system_used
;
355 /* Set to non-nil when an error is detected while code conversion. */
356 Lisp_Object Vlast_code_conversion_error
;
357 /* A vector of length 256 which contains information about special
358 Latin codes (especially for dealing with Microsoft codes). */
359 Lisp_Object Vlatin_extra_code_table
;
361 /* Flag to inhibit code conversion of end-of-line format. */
362 int inhibit_eol_conversion
;
364 /* Flag to inhibit ISO2022 escape sequence detection. */
365 int inhibit_iso_escape_detection
;
367 /* Flag to make buffer-file-coding-system inherit from process-coding. */
368 int inherit_process_coding_system
;
370 /* Coding system to be used to encode text for terminal display. */
371 struct coding_system terminal_coding
;
373 /* Coding system to be used to encode text for terminal display when
374 terminal coding system is nil. */
375 struct coding_system safe_terminal_coding
;
377 /* Coding system of what is sent from terminal keyboard. */
378 struct coding_system keyboard_coding
;
380 Lisp_Object Vfile_coding_system_alist
;
381 Lisp_Object Vprocess_coding_system_alist
;
382 Lisp_Object Vnetwork_coding_system_alist
;
384 Lisp_Object Vlocale_coding_system
;
388 /* Flag to tell if we look up translation table on character code
390 Lisp_Object Venable_character_translation
;
391 /* Standard translation table to look up on decoding (reading). */
392 Lisp_Object Vstandard_translation_table_for_decode
;
393 /* Standard translation table to look up on encoding (writing). */
394 Lisp_Object Vstandard_translation_table_for_encode
;
396 Lisp_Object Qtranslation_table
;
397 Lisp_Object Qtranslation_table_id
;
398 Lisp_Object Qtranslation_table_for_decode
;
399 Lisp_Object Qtranslation_table_for_encode
;
401 /* Alist of charsets vs revision number. */
402 static Lisp_Object Vcharset_revision_table
;
404 /* Default coding systems used for process I/O. */
405 Lisp_Object Vdefault_process_coding_system
;
407 /* Char table for translating Quail and self-inserting input. */
408 Lisp_Object Vtranslation_table_for_input
;
410 /* Two special coding systems. */
411 Lisp_Object Vsjis_coding_system
;
412 Lisp_Object Vbig5_coding_system
;
414 /* ISO2022 section */
416 #define CODING_ISO_INITIAL(coding, reg) \
417 (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \
418 coding_attr_iso_initial), \
422 #define CODING_ISO_REQUEST(coding, charset_id) \
423 ((charset_id <= (coding)->max_charset_id \
424 ? (coding)->safe_charsets[charset_id] \
428 #define CODING_ISO_FLAGS(coding) \
429 ((coding)->spec.iso_2022.flags)
430 #define CODING_ISO_DESIGNATION(coding, reg) \
431 ((coding)->spec.iso_2022.current_designation[reg])
432 #define CODING_ISO_INVOCATION(coding, plane) \
433 ((coding)->spec.iso_2022.current_invocation[plane])
434 #define CODING_ISO_SINGLE_SHIFTING(coding) \
435 ((coding)->spec.iso_2022.single_shifting)
436 #define CODING_ISO_BOL(coding) \
437 ((coding)->spec.iso_2022.bol)
438 #define CODING_ISO_INVOKED_CHARSET(coding, plane) \
439 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
441 /* Control characters of ISO2022. */
442 /* code */ /* function */
443 #define ISO_CODE_LF 0x0A /* line-feed */
444 #define ISO_CODE_CR 0x0D /* carriage-return */
445 #define ISO_CODE_SO 0x0E /* shift-out */
446 #define ISO_CODE_SI 0x0F /* shift-in */
447 #define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
448 #define ISO_CODE_ESC 0x1B /* escape */
449 #define ISO_CODE_SS2 0x8E /* single-shift-2 */
450 #define ISO_CODE_SS3 0x8F /* single-shift-3 */
451 #define ISO_CODE_CSI 0x9B /* control-sequence-introducer */
453 /* All code (1-byte) of ISO2022 is classified into one of the
455 enum iso_code_class_type
457 ISO_control_0
, /* Control codes in the range
458 0x00..0x1F and 0x7F, except for the
459 following 5 codes. */
460 ISO_shift_out
, /* ISO_CODE_SO (0x0E) */
461 ISO_shift_in
, /* ISO_CODE_SI (0x0F) */
462 ISO_single_shift_2_7
, /* ISO_CODE_SS2_7 (0x19) */
463 ISO_escape
, /* ISO_CODE_SO (0x1B) */
464 ISO_control_1
, /* Control codes in the range
465 0x80..0x9F, except for the
466 following 3 codes. */
467 ISO_single_shift_2
, /* ISO_CODE_SS2 (0x8E) */
468 ISO_single_shift_3
, /* ISO_CODE_SS3 (0x8F) */
469 ISO_control_sequence_introducer
, /* ISO_CODE_CSI (0x9B) */
470 ISO_0x20_or_0x7F
, /* Codes of the values 0x20 or 0x7F. */
471 ISO_graphic_plane_0
, /* Graphic codes in the range 0x21..0x7E. */
472 ISO_0xA0_or_0xFF
, /* Codes of the values 0xA0 or 0xFF. */
473 ISO_graphic_plane_1
/* Graphic codes in the range 0xA1..0xFE. */
476 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
477 `iso-flags' attribute of an iso2022 coding system. */
479 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
480 instead of the correct short-form sequence (e.g. ESC $ A). */
481 #define CODING_ISO_FLAG_LONG_FORM 0x0001
483 /* If set, reset graphic planes and registers at end-of-line to the
485 #define CODING_ISO_FLAG_RESET_AT_EOL 0x0002
487 /* If set, reset graphic planes and registers before any control
488 characters to the initial state. */
489 #define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004
491 /* If set, encode by 7-bit environment. */
492 #define CODING_ISO_FLAG_SEVEN_BITS 0x0008
494 /* If set, use locking-shift function. */
495 #define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010
497 /* If set, use single-shift function. Overwrite
498 CODING_ISO_FLAG_LOCKING_SHIFT. */
499 #define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020
501 /* If set, use designation escape sequence. */
502 #define CODING_ISO_FLAG_DESIGNATION 0x0040
504 /* If set, produce revision number sequence. */
505 #define CODING_ISO_FLAG_REVISION 0x0080
507 /* If set, produce ISO6429's direction specifying sequence. */
508 #define CODING_ISO_FLAG_DIRECTION 0x0100
510 /* If set, assume designation states are reset at beginning of line on
512 #define CODING_ISO_FLAG_INIT_AT_BOL 0x0200
514 /* If set, designation sequence should be placed at beginning of line
516 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
518 /* If set, do not encode unsafe charactes on output. */
519 #define CODING_ISO_FLAG_SAFE 0x0800
521 /* If set, extra latin codes (128..159) are accepted as a valid code
523 #define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
525 #define CODING_ISO_FLAG_COMPOSITION 0x2000
527 #define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000
529 #define CODING_ISO_FLAG_USE_ROMAN 0x8000
531 #define CODING_ISO_FLAG_USE_OLDJIS 0x10000
533 #define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
535 /* A character to be produced on output if encoding of the original
536 character is prohibited by CODING_ISO_FLAG_SAFE. */
537 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?'
541 #define CODING_UTF_16_BOM(coding) \
542 ((coding)->spec.utf_16.bom)
544 #define CODING_UTF_16_ENDIAN(coding) \
545 ((coding)->spec.utf_16.endian)
547 #define CODING_UTF_16_SURROGATE(coding) \
548 ((coding)->spec.utf_16.surrogate)
552 #define CODING_CCL_DECODER(coding) \
553 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
554 #define CODING_CCL_ENCODER(coding) \
555 AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
556 #define CODING_CCL_VALIDS(coding) \
557 (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
559 /* Index for each coding category in `coding_categories' */
563 coding_category_iso_7
,
564 coding_category_iso_7_tight
,
565 coding_category_iso_8_1
,
566 coding_category_iso_8_2
,
567 coding_category_iso_7_else
,
568 coding_category_iso_8_else
,
569 coding_category_utf_8
,
570 coding_category_utf_16_auto
,
571 coding_category_utf_16_be
,
572 coding_category_utf_16_le
,
573 coding_category_utf_16_be_nosig
,
574 coding_category_utf_16_le_nosig
,
575 coding_category_charset
,
576 coding_category_sjis
,
577 coding_category_big5
,
579 coding_category_emacs_mule
,
580 /* All above are targets of code detection. */
581 coding_category_raw_text
,
582 coding_category_undecided
,
586 /* Definitions of flag bits used in detect_coding_XXXX. */
587 #define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7)
588 #define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight)
589 #define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1)
590 #define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2)
591 #define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
592 #define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
593 #define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
594 #define CATEGORY_MASK_UTF_16_AUTO (1 << coding_category_utf_16_auto)
595 #define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
596 #define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
597 #define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
598 #define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig)
599 #define CATEGORY_MASK_CHARSET (1 << coding_category_charset)
600 #define CATEGORY_MASK_SJIS (1 << coding_category_sjis)
601 #define CATEGORY_MASK_BIG5 (1 << coding_category_big5)
602 #define CATEGORY_MASK_CCL (1 << coding_category_ccl)
603 #define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule)
604 #define CATEGORY_MASK_RAW_TEXT (1 << coding_category_raw_text)
606 /* This value is returned if detect_coding_mask () find nothing other
607 than ASCII characters. */
608 #define CATEGORY_MASK_ANY \
609 (CATEGORY_MASK_ISO_7 \
610 | CATEGORY_MASK_ISO_7_TIGHT \
611 | CATEGORY_MASK_ISO_8_1 \
612 | CATEGORY_MASK_ISO_8_2 \
613 | CATEGORY_MASK_ISO_7_ELSE \
614 | CATEGORY_MASK_ISO_8_ELSE \
615 | CATEGORY_MASK_UTF_8 \
616 | CATEGORY_MASK_UTF_16_BE \
617 | CATEGORY_MASK_UTF_16_LE \
618 | CATEGORY_MASK_UTF_16_BE_NOSIG \
619 | CATEGORY_MASK_UTF_16_LE_NOSIG \
620 | CATEGORY_MASK_CHARSET \
621 | CATEGORY_MASK_SJIS \
622 | CATEGORY_MASK_BIG5 \
623 | CATEGORY_MASK_CCL \
624 | CATEGORY_MASK_EMACS_MULE)
627 #define CATEGORY_MASK_ISO_7BIT \
628 (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
630 #define CATEGORY_MASK_ISO_8BIT \
631 (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
633 #define CATEGORY_MASK_ISO_ELSE \
634 (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
636 #define CATEGORY_MASK_ISO_ESCAPE \
637 (CATEGORY_MASK_ISO_7 \
638 | CATEGORY_MASK_ISO_7_TIGHT \
639 | CATEGORY_MASK_ISO_7_ELSE \
640 | CATEGORY_MASK_ISO_8_ELSE)
642 #define CATEGORY_MASK_ISO \
643 ( CATEGORY_MASK_ISO_7BIT \
644 | CATEGORY_MASK_ISO_8BIT \
645 | CATEGORY_MASK_ISO_ELSE)
647 #define CATEGORY_MASK_UTF_16 \
648 (CATEGORY_MASK_UTF_16_BE \
649 | CATEGORY_MASK_UTF_16_LE \
650 | CATEGORY_MASK_UTF_16_BE_NOSIG \
651 | CATEGORY_MASK_UTF_16_LE_NOSIG)
654 /* List of symbols `coding-category-xxx' ordered by priority. This
655 variable is exposed to Emacs Lisp. */
656 static Lisp_Object Vcoding_category_list
;
658 /* Table of coding categories (Lisp symbols). This variable is for
660 static Lisp_Object Vcoding_category_table
;
662 /* Table of coding-categories ordered by priority. */
663 static enum coding_category coding_priorities
[coding_category_max
];
665 /* Nth element is a coding context for the coding system bound to the
666 Nth coding category. */
667 static struct coding_system coding_categories
[coding_category_max
];
669 /*** Commonly used macros and functions ***/
672 #define min(a, b) ((a) < (b) ? (a) : (b))
675 #define max(a, b) ((a) > (b) ? (a) : (b))
678 #define CODING_GET_INFO(coding, attrs, charset_list) \
680 (attrs) = CODING_ID_ATTRS ((coding)->id); \
681 (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \
685 /* Safely get one byte from the source text pointed by SRC which ends
686 at SRC_END, and set C to that byte. If there are not enough bytes
687 in the source, it jumps to `no_more_source'. If multibytep is
688 nonzero, and a multibyte character is found at SRC, set C to the
689 negative value of the character code. The caller should declare
690 and set these variables appropriately in advance:
691 src, src_end, multibytep */
693 #define ONE_MORE_BYTE(c) \
695 if (src == src_end) \
697 if (src_base < src) \
698 record_conversion_result \
699 (coding, CODING_RESULT_INSUFFICIENT_SRC); \
700 goto no_more_source; \
703 if (multibytep && (c & 0x80)) \
705 if ((c & 0xFE) == 0xC0) \
706 c = ((c & 1) << 6) | *src++; \
709 c = - string_char (--src, &src, NULL); \
710 record_conversion_result \
711 (coding, CODING_RESULT_INVALID_SRC); \
718 #define ONE_MORE_BYTE_NO_CHECK(c) \
721 if (multibytep && (c & 0x80)) \
723 if ((c & 0xFE) == 0xC0) \
724 c = ((c & 1) << 6) | *src++; \
727 c = - string_char (--src, &src, NULL); \
728 record_conversion_result \
729 (coding, CODING_RESULT_INVALID_SRC); \
736 /* Store a byte C in the place pointed by DST and increment DST to the
737 next free point, and increment PRODUCED_CHARS. The caller should
738 assure that C is 0..127, and declare and set the variable `dst'
739 appropriately in advance.
743 #define EMIT_ONE_ASCII_BYTE(c) \
750 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */
752 #define EMIT_TWO_ASCII_BYTES(c1, c2) \
754 produced_chars += 2; \
755 *dst++ = (c1), *dst++ = (c2); \
759 /* Store a byte C in the place pointed by DST and increment DST to the
760 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is
761 nonzero, store in an appropriate multibyte from. The caller should
762 declare and set the variables `dst' and `multibytep' appropriately
765 #define EMIT_ONE_BYTE(c) \
772 ch = BYTE8_TO_CHAR (ch); \
773 CHAR_STRING_ADVANCE (ch, dst); \
780 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2. */
782 #define EMIT_TWO_BYTES(c1, c2) \
784 produced_chars += 2; \
791 ch = BYTE8_TO_CHAR (ch); \
792 CHAR_STRING_ADVANCE (ch, dst); \
795 ch = BYTE8_TO_CHAR (ch); \
796 CHAR_STRING_ADVANCE (ch, dst); \
806 #define EMIT_THREE_BYTES(c1, c2, c3) \
808 EMIT_ONE_BYTE (c1); \
809 EMIT_TWO_BYTES (c2, c3); \
813 #define EMIT_FOUR_BYTES(c1, c2, c3, c4) \
815 EMIT_TWO_BYTES (c1, c2); \
816 EMIT_TWO_BYTES (c3, c4); \
820 /* Prototypes for static functions. */
821 static void record_conversion_result
P_ ((struct coding_system
*coding
,
822 enum coding_result_code result
));
823 static int detect_coding_utf_8
P_ ((struct coding_system
*,
824 struct coding_detection_info
*info
));
825 static void decode_coding_utf_8
P_ ((struct coding_system
*));
826 static int encode_coding_utf_8
P_ ((struct coding_system
*));
828 static int detect_coding_utf_16
P_ ((struct coding_system
*,
829 struct coding_detection_info
*info
));
830 static void decode_coding_utf_16
P_ ((struct coding_system
*));
831 static int encode_coding_utf_16
P_ ((struct coding_system
*));
833 static int detect_coding_iso_2022
P_ ((struct coding_system
*,
834 struct coding_detection_info
*info
));
835 static void decode_coding_iso_2022
P_ ((struct coding_system
*));
836 static int encode_coding_iso_2022
P_ ((struct coding_system
*));
838 static int detect_coding_emacs_mule
P_ ((struct coding_system
*,
839 struct coding_detection_info
*info
));
840 static void decode_coding_emacs_mule
P_ ((struct coding_system
*));
841 static int encode_coding_emacs_mule
P_ ((struct coding_system
*));
843 static int detect_coding_sjis
P_ ((struct coding_system
*,
844 struct coding_detection_info
*info
));
845 static void decode_coding_sjis
P_ ((struct coding_system
*));
846 static int encode_coding_sjis
P_ ((struct coding_system
*));
848 static int detect_coding_big5
P_ ((struct coding_system
*,
849 struct coding_detection_info
*info
));
850 static void decode_coding_big5
P_ ((struct coding_system
*));
851 static int encode_coding_big5
P_ ((struct coding_system
*));
853 static int detect_coding_ccl
P_ ((struct coding_system
*,
854 struct coding_detection_info
*info
));
855 static void decode_coding_ccl
P_ ((struct coding_system
*));
856 static int encode_coding_ccl
P_ ((struct coding_system
*));
858 static void decode_coding_raw_text
P_ ((struct coding_system
*));
859 static int encode_coding_raw_text
P_ ((struct coding_system
*));
861 static void coding_set_source
P_ ((struct coding_system
*));
862 static void coding_set_destination
P_ ((struct coding_system
*));
863 static void coding_alloc_by_realloc
P_ ((struct coding_system
*, EMACS_INT
));
864 static void coding_alloc_by_making_gap
P_ ((struct coding_system
*,
866 static unsigned char *alloc_destination
P_ ((struct coding_system
*,
867 EMACS_INT
, unsigned char *));
868 static void setup_iso_safe_charsets
P_ ((Lisp_Object
));
869 static unsigned char *encode_designation_at_bol
P_ ((struct coding_system
*,
872 static int detect_eol
P_ ((const unsigned char *,
873 EMACS_INT
, enum coding_category
));
874 static Lisp_Object adjust_coding_eol_type
P_ ((struct coding_system
*, int));
875 static void decode_eol
P_ ((struct coding_system
*));
876 static Lisp_Object get_translation_table
P_ ((Lisp_Object
, int, int *));
877 static Lisp_Object get_translation
P_ ((Lisp_Object
, int *, int *,
879 static int produce_chars
P_ ((struct coding_system
*, Lisp_Object
, int));
880 static INLINE
void produce_composition
P_ ((struct coding_system
*, int *,
882 static INLINE
void produce_charset
P_ ((struct coding_system
*, int *,
884 static void produce_annotation
P_ ((struct coding_system
*, EMACS_INT
));
885 static int decode_coding
P_ ((struct coding_system
*));
886 static INLINE
int *handle_composition_annotation
P_ ((EMACS_INT
, EMACS_INT
,
887 struct coding_system
*,
888 int *, EMACS_INT
*));
889 static INLINE
int *handle_charset_annotation
P_ ((EMACS_INT
, EMACS_INT
,
890 struct coding_system
*,
891 int *, EMACS_INT
*));
892 static void consume_chars
P_ ((struct coding_system
*, Lisp_Object
, int));
893 static int encode_coding
P_ ((struct coding_system
*));
894 static Lisp_Object make_conversion_work_buffer
P_ ((int));
895 static Lisp_Object code_conversion_restore
P_ ((Lisp_Object
));
896 static INLINE
int char_encodable_p
P_ ((int, Lisp_Object
));
897 static Lisp_Object make_subsidiaries
P_ ((Lisp_Object
));
900 record_conversion_result (struct coding_system
*coding
,
901 enum coding_result_code result
)
903 coding
->result
= result
;
906 case CODING_RESULT_INSUFFICIENT_SRC
:
907 Vlast_code_conversion_error
= Qinsufficient_source
;
909 case CODING_RESULT_INCONSISTENT_EOL
:
910 Vlast_code_conversion_error
= Qinconsistent_eol
;
912 case CODING_RESULT_INVALID_SRC
:
913 Vlast_code_conversion_error
= Qinvalid_source
;
915 case CODING_RESULT_INTERRUPT
:
916 Vlast_code_conversion_error
= Qinterrupted
;
918 case CODING_RESULT_INSUFFICIENT_MEM
:
919 Vlast_code_conversion_error
= Qinsufficient_memory
;
924 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
926 charset_map_loaded = 0; \
927 c = DECODE_CHAR (charset, code); \
928 if (charset_map_loaded) \
930 const unsigned char *orig = coding->source; \
933 coding_set_source (coding); \
934 offset = coding->source - orig; \
936 src_base += offset; \
942 #define ASSURE_DESTINATION(bytes) \
944 if (dst + (bytes) >= dst_end) \
946 int more_bytes = charbuf_end - charbuf + (bytes); \
948 dst = alloc_destination (coding, more_bytes, dst); \
949 dst_end = coding->destination + coding->dst_bytes; \
956 coding_set_source (coding
)
957 struct coding_system
*coding
;
959 if (BUFFERP (coding
->src_object
))
961 struct buffer
*buf
= XBUFFER (coding
->src_object
);
963 if (coding
->src_pos
< 0)
964 coding
->source
= BUF_GAP_END_ADDR (buf
) + coding
->src_pos_byte
;
966 coding
->source
= BUF_BYTE_ADDRESS (buf
, coding
->src_pos_byte
);
968 else if (STRINGP (coding
->src_object
))
970 coding
->source
= SDATA (coding
->src_object
) + coding
->src_pos_byte
;
973 /* Otherwise, the source is C string and is never relocated
974 automatically. Thus we don't have to update anything. */
979 coding_set_destination (coding
)
980 struct coding_system
*coding
;
982 if (BUFFERP (coding
->dst_object
))
984 if (coding
->src_pos
< 0)
986 coding
->destination
= BEG_ADDR
+ coding
->dst_pos_byte
- 1;
987 coding
->dst_bytes
= (GAP_END_ADDR
988 - (coding
->src_bytes
- coding
->consumed
)
989 - coding
->destination
);
993 /* We are sure that coding->dst_pos_byte is before the gap
995 coding
->destination
= (BUF_BEG_ADDR (XBUFFER (coding
->dst_object
))
996 + coding
->dst_pos_byte
- 1);
997 coding
->dst_bytes
= (BUF_GAP_END_ADDR (XBUFFER (coding
->dst_object
))
998 - coding
->destination
);
1002 /* Otherwise, the destination is C string and is never relocated
1003 automatically. Thus we don't have to update anything. */
1009 coding_alloc_by_realloc (coding
, bytes
)
1010 struct coding_system
*coding
;
1013 coding
->destination
= (unsigned char *) xrealloc (coding
->destination
,
1014 coding
->dst_bytes
+ bytes
);
1015 coding
->dst_bytes
+= bytes
;
1019 coding_alloc_by_making_gap (coding
, bytes
)
1020 struct coding_system
*coding
;
1023 if (BUFFERP (coding
->dst_object
)
1024 && EQ (coding
->src_object
, coding
->dst_object
))
1026 EMACS_INT add
= coding
->src_bytes
- coding
->consumed
;
1028 GAP_SIZE
-= add
; ZV
+= add
; Z
+= add
; ZV_BYTE
+= add
; Z_BYTE
+= add
;
1030 GAP_SIZE
+= add
; ZV
-= add
; Z
-= add
; ZV_BYTE
-= add
; Z_BYTE
-= add
;
1034 Lisp_Object this_buffer
;
1036 this_buffer
= Fcurrent_buffer ();
1037 set_buffer_internal (XBUFFER (coding
->dst_object
));
1039 set_buffer_internal (XBUFFER (this_buffer
));
1044 static unsigned char *
1045 alloc_destination (coding
, nbytes
, dst
)
1046 struct coding_system
*coding
;
1050 EMACS_INT offset
= dst
- coding
->destination
;
1052 if (BUFFERP (coding
->dst_object
))
1053 coding_alloc_by_making_gap (coding
, nbytes
);
1055 coding_alloc_by_realloc (coding
, nbytes
);
1056 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
1057 coding_set_destination (coding
);
1058 dst
= coding
->destination
+ offset
;
1062 /** Macros for annotations. */
1064 /* Maximum length of annotation data (sum of annotations for
1065 composition and charset). */
1066 #define MAX_ANNOTATION_LENGTH (4 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 4)
1068 /* An annotation data is stored in the array coding->charbuf in this
1070 [ -LENGTH ANNOTATION_MASK NCHARS ... ]
1071 LENGTH is the number of elements in the annotation.
1072 ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1073 NCHARS is the number of characters in the text annotated.
1075 The format of the following elements depend on ANNOTATION_MASK.
1077 In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1079 ... METHOD [ COMPOSITION-COMPONENTS ... ]
1080 METHOD is one of enum composition_method.
1081 Optionnal COMPOSITION-COMPONENTS are characters and composition
1084 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1087 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars) \
1089 *(buf)++ = -(len); \
1090 *(buf)++ = (mask); \
1091 *(buf)++ = (nchars); \
1092 coding->annotated = 1; \
1095 #define ADD_COMPOSITION_DATA(buf, nchars, method) \
1097 ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1102 #define ADD_CHARSET_DATA(buf, nchars, id) \
1104 ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1109 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1116 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1117 Check if a text is encoded in UTF-8. If it is, return 1, else
1120 #define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1121 #define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
1122 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1123 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1124 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1125 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1128 detect_coding_utf_8 (coding
, detect_info
)
1129 struct coding_system
*coding
;
1130 struct coding_detection_info
*detect_info
;
1132 const unsigned char *src
= coding
->source
, *src_base
;
1133 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1134 int multibytep
= coding
->src_multibyte
;
1135 int consumed_chars
= 0;
1138 detect_info
->checked
|= CATEGORY_MASK_UTF_8
;
1139 /* A coding system of this category is always ASCII compatible. */
1140 src
+= coding
->head_ascii
;
1144 int c
, c1
, c2
, c3
, c4
;
1148 if (c
< 0 || UTF_8_1_OCTET_P (c
))
1151 if (c1
< 0 || ! UTF_8_EXTRA_OCTET_P (c1
))
1153 if (UTF_8_2_OCTET_LEADING_P (c
))
1155 found
= CATEGORY_MASK_UTF_8
;
1159 if (c2
< 0 || ! UTF_8_EXTRA_OCTET_P (c2
))
1161 if (UTF_8_3_OCTET_LEADING_P (c
))
1163 found
= CATEGORY_MASK_UTF_8
;
1167 if (c3
< 0 || ! UTF_8_EXTRA_OCTET_P (c3
))
1169 if (UTF_8_4_OCTET_LEADING_P (c
))
1171 found
= CATEGORY_MASK_UTF_8
;
1175 if (c4
< 0 || ! UTF_8_EXTRA_OCTET_P (c4
))
1177 if (UTF_8_5_OCTET_LEADING_P (c
))
1179 found
= CATEGORY_MASK_UTF_8
;
1184 detect_info
->rejected
|= CATEGORY_MASK_UTF_8
;
1188 if (src_base
< src
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
1190 detect_info
->rejected
|= CATEGORY_MASK_UTF_8
;
1193 detect_info
->found
|= found
;
1199 decode_coding_utf_8 (coding
)
1200 struct coding_system
*coding
;
1202 const unsigned char *src
= coding
->source
+ coding
->consumed
;
1203 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1204 const unsigned char *src_base
;
1205 int *charbuf
= coding
->charbuf
+ coding
->charbuf_used
;
1206 int *charbuf_end
= coding
->charbuf
+ coding
->charbuf_size
;
1207 int consumed_chars
= 0, consumed_chars_base
;
1208 int multibytep
= coding
->src_multibyte
;
1209 Lisp_Object attr
, charset_list
;
1211 CODING_GET_INFO (coding
, attr
, charset_list
);
1215 int c
, c1
, c2
, c3
, c4
, c5
;
1218 consumed_chars_base
= consumed_chars
;
1220 if (charbuf
>= charbuf_end
)
1228 else if (UTF_8_1_OCTET_P(c1
))
1235 if (c2
< 0 || ! UTF_8_EXTRA_OCTET_P (c2
))
1237 if (UTF_8_2_OCTET_LEADING_P (c1
))
1239 c
= ((c1
& 0x1F) << 6) | (c2
& 0x3F);
1240 /* Reject overlong sequences here and below. Encoders
1241 producing them are incorrect, they can be misleading,
1242 and they mess up read/write invariance. */
1249 if (c3
< 0 || ! UTF_8_EXTRA_OCTET_P (c3
))
1251 if (UTF_8_3_OCTET_LEADING_P (c1
))
1253 c
= (((c1
& 0xF) << 12)
1254 | ((c2
& 0x3F) << 6) | (c3
& 0x3F));
1256 || (c
>= 0xd800 && c
< 0xe000)) /* surrogates (invalid) */
1262 if (c4
< 0 || ! UTF_8_EXTRA_OCTET_P (c4
))
1264 if (UTF_8_4_OCTET_LEADING_P (c1
))
1266 c
= (((c1
& 0x7) << 18) | ((c2
& 0x3F) << 12)
1267 | ((c3
& 0x3F) << 6) | (c4
& 0x3F));
1274 if (c5
< 0 || ! UTF_8_EXTRA_OCTET_P (c5
))
1276 if (UTF_8_5_OCTET_LEADING_P (c1
))
1278 c
= (((c1
& 0x3) << 24) | ((c2
& 0x3F) << 18)
1279 | ((c3
& 0x3F) << 12) | ((c4
& 0x3F) << 6)
1281 if ((c
> MAX_CHAR
) || (c
< 0x200000))
1296 consumed_chars
= consumed_chars_base
;
1298 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
1303 coding
->consumed_char
+= consumed_chars_base
;
1304 coding
->consumed
= src_base
- coding
->source
;
1305 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
1310 encode_coding_utf_8 (coding
)
1311 struct coding_system
*coding
;
1313 int multibytep
= coding
->dst_multibyte
;
1314 int *charbuf
= coding
->charbuf
;
1315 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
1316 unsigned char *dst
= coding
->destination
+ coding
->produced
;
1317 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
1318 int produced_chars
= 0;
1323 int safe_room
= MAX_MULTIBYTE_LENGTH
* 2;
1325 while (charbuf
< charbuf_end
)
1327 unsigned char str
[MAX_MULTIBYTE_LENGTH
], *p
, *pend
= str
;
1329 ASSURE_DESTINATION (safe_room
);
1331 if (CHAR_BYTE8_P (c
))
1333 c
= CHAR_TO_BYTE8 (c
);
1338 CHAR_STRING_ADVANCE (c
, pend
);
1339 for (p
= str
; p
< pend
; p
++)
1346 int safe_room
= MAX_MULTIBYTE_LENGTH
;
1348 while (charbuf
< charbuf_end
)
1350 ASSURE_DESTINATION (safe_room
);
1352 dst
+= CHAR_STRING (c
, dst
);
1356 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
1357 coding
->produced_char
+= produced_chars
;
1358 coding
->produced
= dst
- coding
->destination
;
1363 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1364 Check if a text is encoded in one of UTF-16 based coding systems.
1365 If it is, return 1, else return 0. */
1367 #define UTF_16_HIGH_SURROGATE_P(val) \
1368 (((val) & 0xFC00) == 0xD800)
1370 #define UTF_16_LOW_SURROGATE_P(val) \
1371 (((val) & 0xFC00) == 0xDC00)
1373 #define UTF_16_INVALID_P(val) \
1374 (((val) == 0xFFFE) \
1375 || ((val) == 0xFFFF) \
1376 || UTF_16_LOW_SURROGATE_P (val))
1380 detect_coding_utf_16 (coding
, detect_info
)
1381 struct coding_system
*coding
;
1382 struct coding_detection_info
*detect_info
;
1384 const unsigned char *src
= coding
->source
, *src_base
= src
;
1385 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1386 int multibytep
= coding
->src_multibyte
;
1387 int consumed_chars
= 0;
1390 detect_info
->checked
|= CATEGORY_MASK_UTF_16
;
1391 if (coding
->mode
& CODING_MODE_LAST_BLOCK
1392 && (coding
->src_chars
& 1))
1394 detect_info
->rejected
|= CATEGORY_MASK_UTF_16
;
1400 if ((c1
== 0xFF) && (c2
== 0xFE))
1402 detect_info
->found
|= (CATEGORY_MASK_UTF_16_LE
1403 | CATEGORY_MASK_UTF_16_AUTO
);
1404 detect_info
->rejected
|= (CATEGORY_MASK_UTF_16_BE
1405 | CATEGORY_MASK_UTF_16_BE_NOSIG
1406 | CATEGORY_MASK_UTF_16_LE_NOSIG
);
1408 else if ((c1
== 0xFE) && (c2
== 0xFF))
1410 detect_info
->found
|= (CATEGORY_MASK_UTF_16_BE
1411 | CATEGORY_MASK_UTF_16_AUTO
);
1412 detect_info
->rejected
|= (CATEGORY_MASK_UTF_16_LE
1413 | CATEGORY_MASK_UTF_16_BE_NOSIG
1414 | CATEGORY_MASK_UTF_16_LE_NOSIG
);
1416 else if (c1
>= 0 && c2
>= 0)
1418 unsigned char b1
[256], b2
[256];
1419 int b1_variants
= 1, b2_variants
= 1;
1422 bzero (b1
, 256), bzero (b2
, 256);
1424 for (n
= 0; n
< 256 && src
< src_end
; n
++)
1429 if (c1
< 0 || c2
< 0)
1431 if (! b1
[c1
++]) b1_variants
++;
1432 if (! b2
[c2
++]) b2_variants
++;
1434 if (b1_variants
< b2_variants
)
1435 detect_info
->found
|= CATEGORY_MASK_UTF_16_BE_NOSIG
;
1437 detect_info
->found
|= CATEGORY_MASK_UTF_16_LE_NOSIG
;
1438 detect_info
->rejected
1439 |= (CATEGORY_MASK_UTF_16_BE
| CATEGORY_MASK_UTF_16_LE
);
1446 decode_coding_utf_16 (coding
)
1447 struct coding_system
*coding
;
1449 const unsigned char *src
= coding
->source
+ coding
->consumed
;
1450 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1451 const unsigned char *src_base
;
1452 int *charbuf
= coding
->charbuf
+ coding
->charbuf_used
;
1453 int *charbuf_end
= coding
->charbuf
+ coding
->charbuf_size
;
1454 int consumed_chars
= 0, consumed_chars_base
;
1455 int multibytep
= coding
->src_multibyte
;
1456 enum utf_16_bom_type bom
= CODING_UTF_16_BOM (coding
);
1457 enum utf_16_endian_type endian
= CODING_UTF_16_ENDIAN (coding
);
1458 int surrogate
= CODING_UTF_16_SURROGATE (coding
);
1459 Lisp_Object attr
, charset_list
;
1461 CODING_GET_INFO (coding
, attr
, charset_list
);
1463 if (bom
== utf_16_with_bom
)
1472 if (endian
== utf_16_big_endian
1473 ? c
!= 0xFEFF : c
!= 0xFFFE)
1475 /* The first two bytes are not BOM. Treat them as bytes
1476 for a normal character. */
1480 CODING_UTF_16_BOM (coding
) = utf_16_without_bom
;
1482 else if (bom
== utf_16_detect_bom
)
1484 /* We have already tried to detect BOM and failed in
1486 CODING_UTF_16_BOM (coding
) = utf_16_without_bom
;
1494 consumed_chars_base
= consumed_chars
;
1496 if (charbuf
+ 2 >= charbuf_end
)
1508 *charbuf
++ = ASCII_BYTE_P (c1
) ? c1
: BYTE8_TO_CHAR (c1
);
1512 c
= (endian
== utf_16_big_endian
1513 ? ((c1
<< 8) | c2
) : ((c2
<< 8) | c1
));
1516 if (! UTF_16_LOW_SURROGATE_P (c
))
1518 if (endian
== utf_16_big_endian
)
1519 c1
= surrogate
>> 8, c2
= surrogate
& 0xFF;
1521 c1
= surrogate
& 0xFF, c2
= surrogate
>> 8;
1525 if (UTF_16_HIGH_SURROGATE_P (c
))
1526 CODING_UTF_16_SURROGATE (coding
) = surrogate
= c
;
1532 c
= ((surrogate
- 0xD800) << 10) | (c
- 0xDC00);
1533 CODING_UTF_16_SURROGATE (coding
) = surrogate
= 0;
1534 *charbuf
++ = 0x10000 + c
;
1539 if (UTF_16_HIGH_SURROGATE_P (c
))
1540 CODING_UTF_16_SURROGATE (coding
) = surrogate
= c
;
1547 coding
->consumed_char
+= consumed_chars_base
;
1548 coding
->consumed
= src_base
- coding
->source
;
1549 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
1553 encode_coding_utf_16 (coding
)
1554 struct coding_system
*coding
;
1556 int multibytep
= coding
->dst_multibyte
;
1557 int *charbuf
= coding
->charbuf
;
1558 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
1559 unsigned char *dst
= coding
->destination
+ coding
->produced
;
1560 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
1562 enum utf_16_bom_type bom
= CODING_UTF_16_BOM (coding
);
1563 int big_endian
= CODING_UTF_16_ENDIAN (coding
) == utf_16_big_endian
;
1564 int produced_chars
= 0;
1565 Lisp_Object attrs
, charset_list
;
1568 CODING_GET_INFO (coding
, attrs
, charset_list
);
1570 if (bom
!= utf_16_without_bom
)
1572 ASSURE_DESTINATION (safe_room
);
1574 EMIT_TWO_BYTES (0xFE, 0xFF);
1576 EMIT_TWO_BYTES (0xFF, 0xFE);
1577 CODING_UTF_16_BOM (coding
) = utf_16_without_bom
;
1580 while (charbuf
< charbuf_end
)
1582 ASSURE_DESTINATION (safe_room
);
1584 if (c
>= MAX_UNICODE_CHAR
)
1585 c
= coding
->default_char
;
1590 EMIT_TWO_BYTES (c
>> 8, c
& 0xFF);
1592 EMIT_TWO_BYTES (c
& 0xFF, c
>> 8);
1599 c1
= (c
>> 10) + 0xD800;
1600 c2
= (c
& 0x3FF) + 0xDC00;
1602 EMIT_FOUR_BYTES (c1
>> 8, c1
& 0xFF, c2
>> 8, c2
& 0xFF);
1604 EMIT_FOUR_BYTES (c1
& 0xFF, c1
>> 8, c2
& 0xFF, c2
>> 8);
1607 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
1608 coding
->produced
= dst
- coding
->destination
;
1609 coding
->produced_char
+= produced_chars
;
1614 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1616 /* Emacs' internal format for representation of multiple character
1617 sets is a kind of multi-byte encoding, i.e. characters are
1618 represented by variable-length sequences of one-byte codes.
1620 ASCII characters and control characters (e.g. `tab', `newline') are
1621 represented by one-byte sequences which are their ASCII codes, in
1622 the range 0x00 through 0x7F.
1624 8-bit characters of the range 0x80..0x9F are represented by
1625 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1628 8-bit characters of the range 0xA0..0xFF are represented by
1629 one-byte sequences which are their 8-bit code.
1631 The other characters are represented by a sequence of `base
1632 leading-code', optional `extended leading-code', and one or two
1633 `position-code's. The length of the sequence is determined by the
1634 base leading-code. Leading-code takes the range 0x81 through 0x9D,
1635 whereas extended leading-code and position-code take the range 0xA0
1636 through 0xFF. See `charset.h' for more details about leading-code
1639 --- CODE RANGE of Emacs' internal format ---
1643 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1644 eight-bit-graphic 0xA0..0xBF
1645 ELSE 0x81..0x9D + [0xA0..0xFF]+
1646 ---------------------------------------------
1648 As this is the internal character representation, the format is
1649 usually not used externally (i.e. in a file or in a data sent to a
1650 process). But, it is possible to have a text externally in this
1651 format (i.e. by encoding by the coding system `emacs-mule').
1653 In that case, a sequence of one-byte codes has a slightly different
1656 At first, all characters in eight-bit-control are represented by
1657 one-byte sequences which are their 8-bit code.
1659 Next, character composition data are represented by the byte
1660 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1662 METHOD is 0xF0 plus one of composition method (enum
1663 composition_method),
1665 BYTES is 0xA0 plus a byte length of this composition data,
1667 CHARS is 0x20 plus a number of characters composed by this
1670 COMPONENTs are characters of multibye form or composition
1671 rules encoded by two-byte of ASCII codes.
1673 In addition, for backward compatibility, the following formats are
1674 also recognized as composition data on decoding.
1677 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1680 MSEQ is a multibyte form but in these special format:
1681 ASCII: 0xA0 ASCII_CODE+0x80,
1682 other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1683 RULE is a one byte code of the range 0xA0..0xF0 that
1684 represents a composition rule.
1687 char emacs_mule_bytes
[256];
1690 emacs_mule_char (coding
, src
, nbytes
, nchars
, id
)
1691 struct coding_system
*coding
;
1692 const unsigned char *src
;
1693 int *nbytes
, *nchars
, *id
;
1695 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1696 const unsigned char *src_base
= src
;
1697 int multibytep
= coding
->src_multibyte
;
1698 struct charset
*charset
;
1701 int consumed_chars
= 0;
1707 charset
= emacs_mule_charset
[0];
1711 switch (emacs_mule_bytes
[c
])
1714 if (! (charset
= emacs_mule_charset
[c
]))
1723 if (c
== EMACS_MULE_LEADING_CODE_PRIVATE_11
1724 || c
== EMACS_MULE_LEADING_CODE_PRIVATE_12
)
1727 if (c
< 0 || ! (charset
= emacs_mule_charset
[c
]))
1736 if (! (charset
= emacs_mule_charset
[c
]))
1741 code
= (c
& 0x7F) << 8;
1751 if (c
< 0 || ! (charset
= emacs_mule_charset
[c
]))
1756 code
= (c
& 0x7F) << 8;
1765 charset
= CHARSET_FROM_ID (ASCII_BYTE_P (code
)
1766 ? charset_ascii
: charset_eight_bit
);
1772 c
= DECODE_CHAR (charset
, code
);
1776 *nbytes
= src
- src_base
;
1777 *nchars
= consumed_chars
;
1790 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1791 Check if a text is encoded in `emacs-mule'. If it is, return 1,
1795 detect_coding_emacs_mule (coding
, detect_info
)
1796 struct coding_system
*coding
;
1797 struct coding_detection_info
*detect_info
;
1799 const unsigned char *src
= coding
->source
, *src_base
;
1800 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
1801 int multibytep
= coding
->src_multibyte
;
1802 int consumed_chars
= 0;
1806 detect_info
->checked
|= CATEGORY_MASK_EMACS_MULE
;
1807 /* A coding system of this category is always ASCII compatible. */
1808 src
+= coding
->head_ascii
;
1818 /* Perhaps the start of composite character. We simple skip
1819 it because analyzing it is too heavy for detecting. But,
1820 at least, we check that the composite character
1821 constitues of more than 4 bytes. */
1822 const unsigned char *src_base
;
1832 if (src
- src_base
<= 4)
1834 found
= CATEGORY_MASK_EMACS_MULE
;
1842 && (c
== ISO_CODE_ESC
|| c
== ISO_CODE_SI
|| c
== ISO_CODE_SO
))
1847 int more_bytes
= emacs_mule_bytes
[*src_base
] - 1;
1849 while (more_bytes
> 0)
1854 src
--; /* Unread the last byte. */
1859 if (more_bytes
!= 0)
1861 found
= CATEGORY_MASK_EMACS_MULE
;
1864 detect_info
->rejected
|= CATEGORY_MASK_EMACS_MULE
;
1868 if (src_base
< src
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
1870 detect_info
->rejected
|= CATEGORY_MASK_EMACS_MULE
;
1873 detect_info
->found
|= found
;
1878 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1880 /* Decode a character represented as a component of composition
1881 sequence of Emacs 20/21 style at SRC. Set C to that character and
1882 update SRC to the head of next character (or an encoded composition
1883 rule). If SRC doesn't points a composition component, set C to -1.
1884 If SRC points an invalid byte sequence, global exit by a return
1887 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \
1891 int nbytes, nchars; \
1893 if (src == src_end) \
1895 c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
1900 goto invalid_code; \
1904 consumed_chars += nchars; \
1909 /* Decode a composition rule represented as a component of composition
1910 sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
1911 and increment BUF. If SRC points an invalid byte sequence, set C
1914 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
1916 int c, gref, nref; \
1918 if (src >= src_end) \
1919 goto invalid_code; \
1920 ONE_MORE_BYTE_NO_CHECK (c); \
1922 if (c < 0 || c >= 81) \
1923 goto invalid_code; \
1925 gref = c / 9, nref = c % 9; \
1926 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1930 /* Decode a composition rule represented as a component of composition
1931 sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF,
1932 and increment BUF. If SRC points an invalid byte sequence, set C
1935 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
1939 if (src + 1>= src_end) \
1940 goto invalid_code; \
1941 ONE_MORE_BYTE_NO_CHECK (gref); \
1943 ONE_MORE_BYTE_NO_CHECK (nref); \
1945 if (gref < 0 || gref >= 81 \
1946 || nref < 0 || nref >= 81) \
1947 goto invalid_code; \
1948 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1952 #define DECODE_EMACS_MULE_21_COMPOSITION(c) \
1954 /* Emacs 21 style format. The first three bytes at SRC are \
1955 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
1956 the byte length of this composition information, CHARS is the \
1957 number of characters composed by this composition. */ \
1958 enum composition_method method = c - 0xF2; \
1959 int *charbuf_base = charbuf; \
1960 int consumed_chars_limit; \
1961 int nbytes, nchars; \
1963 ONE_MORE_BYTE (c); \
1965 goto invalid_code; \
1966 nbytes = c - 0xA0; \
1968 goto invalid_code; \
1969 ONE_MORE_BYTE (c); \
1971 goto invalid_code; \
1972 nchars = c - 0xA0; \
1973 ADD_COMPOSITION_DATA (charbuf, nchars, method); \
1974 consumed_chars_limit = consumed_chars_base + nbytes; \
1975 if (method != COMPOSITION_RELATIVE) \
1978 while (consumed_chars < consumed_chars_limit) \
1980 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
1981 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \
1983 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
1986 if (consumed_chars < consumed_chars_limit) \
1987 goto invalid_code; \
1988 charbuf_base[0] -= i; \
1993 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
1995 /* Emacs 20 style format for relative composition. */ \
1996 /* Store multibyte form of characters to be composed. */ \
1997 enum composition_method method = COMPOSITION_RELATIVE; \
1998 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1999 int *buf = components; \
2003 ONE_MORE_BYTE (c); /* skip 0x80 */ \
2004 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
2005 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
2007 goto invalid_code; \
2008 ADD_COMPOSITION_DATA (charbuf, i, method); \
2009 for (j = 0; j < i; j++) \
2010 *charbuf++ = components[j]; \
2014 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \
2016 /* Emacs 20 style format for rule-base composition. */ \
2017 /* Store multibyte form of characters to be composed. */ \
2018 enum composition_method method = COMPOSITION_WITH_RULE; \
2019 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
2020 int *buf = components; \
2023 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
2024 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
2026 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
2027 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
2029 if (i < 1 || (buf - components) % 2 == 0) \
2030 goto invalid_code; \
2031 if (charbuf + i + (i / 2) + 1 < charbuf_end) \
2032 goto no_more_source; \
2033 ADD_COMPOSITION_DATA (buf, i, method); \
2034 for (j = 0; j < i; j++) \
2035 *charbuf++ = components[j]; \
2036 for (j = 0; j < i; j += 2) \
2037 *charbuf++ = components[j]; \
2042 decode_coding_emacs_mule (coding
)
2043 struct coding_system
*coding
;
2045 const unsigned char *src
= coding
->source
+ coding
->consumed
;
2046 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
2047 const unsigned char *src_base
;
2048 int *charbuf
= coding
->charbuf
+ coding
->charbuf_used
;
2050 = coding
->charbuf
+ coding
->charbuf_size
- MAX_ANNOTATION_LENGTH
;
2051 int consumed_chars
= 0, consumed_chars_base
;
2052 int multibytep
= coding
->src_multibyte
;
2053 Lisp_Object attrs
, charset_list
;
2054 int char_offset
= coding
->produced_char
;
2055 int last_offset
= char_offset
;
2056 int last_id
= charset_ascii
;
2058 CODING_GET_INFO (coding
, attrs
, charset_list
);
2065 consumed_chars_base
= consumed_chars
;
2067 if (charbuf
>= charbuf_end
)
2086 if (c
- 0xF2 >= COMPOSITION_RELATIVE
2087 && c
- 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS
)
2088 DECODE_EMACS_MULE_21_COMPOSITION (c
);
2090 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c
);
2092 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c
);
2096 else if (c
< 0xA0 && emacs_mule_bytes
[c
] > 1)
2102 consumed_chars
= consumed_chars_base
;
2103 c
= emacs_mule_char (coding
, src
, &nbytes
, &nchars
, &id
);
2112 if (last_id
!= charset_ascii
)
2113 ADD_CHARSET_DATA (charbuf
, char_offset
- last_offset
, last_id
);
2115 last_offset
= char_offset
;
2119 consumed_chars
+= nchars
;
2126 consumed_chars
= consumed_chars_base
;
2128 *charbuf
++ = ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
2134 if (last_id
!= charset_ascii
)
2135 ADD_CHARSET_DATA (charbuf
, char_offset
- last_offset
, last_id
);
2136 coding
->consumed_char
+= consumed_chars_base
;
2137 coding
->consumed
= src_base
- coding
->source
;
2138 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
2142 #define EMACS_MULE_LEADING_CODES(id, codes) \
2145 codes[0] = id, codes[1] = 0; \
2146 else if (id < 0xE0) \
2147 codes[0] = 0x9A, codes[1] = id; \
2148 else if (id < 0xF0) \
2149 codes[0] = 0x9B, codes[1] = id; \
2150 else if (id < 0xF5) \
2151 codes[0] = 0x9C, codes[1] = id; \
2153 codes[0] = 0x9D, codes[1] = id; \
2158 encode_coding_emacs_mule (coding
)
2159 struct coding_system
*coding
;
2161 int multibytep
= coding
->dst_multibyte
;
2162 int *charbuf
= coding
->charbuf
;
2163 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
2164 unsigned char *dst
= coding
->destination
+ coding
->produced
;
2165 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
2167 int produced_chars
= 0;
2168 Lisp_Object attrs
, charset_list
;
2170 int preferred_charset_id
= -1;
2172 CODING_GET_INFO (coding
, attrs
, charset_list
);
2173 if (! EQ (charset_list
, Vemacs_mule_charset_list
))
2175 CODING_ATTR_CHARSET_LIST (attrs
)
2176 = charset_list
= Vemacs_mule_charset_list
;
2179 while (charbuf
< charbuf_end
)
2181 ASSURE_DESTINATION (safe_room
);
2186 /* Handle an annotation. */
2189 case CODING_ANNOTATE_COMPOSITION_MASK
:
2190 /* Not yet implemented. */
2192 case CODING_ANNOTATE_CHARSET_MASK
:
2193 preferred_charset_id
= charbuf
[3];
2194 if (preferred_charset_id
>= 0
2195 && NILP (Fmemq (make_number (preferred_charset_id
),
2197 preferred_charset_id
= -1;
2206 if (ASCII_CHAR_P (c
))
2207 EMIT_ONE_ASCII_BYTE (c
);
2208 else if (CHAR_BYTE8_P (c
))
2210 c
= CHAR_TO_BYTE8 (c
);
2215 struct charset
*charset
;
2219 unsigned char leading_codes
[2];
2221 if (preferred_charset_id
>= 0)
2223 charset
= CHARSET_FROM_ID (preferred_charset_id
);
2224 if (! CHAR_CHARSET_P (c
, charset
))
2225 charset
= char_charset (c
, charset_list
, NULL
);
2228 charset
= char_charset (c
, charset_list
, &code
);
2231 c
= coding
->default_char
;
2232 if (ASCII_CHAR_P (c
))
2234 EMIT_ONE_ASCII_BYTE (c
);
2237 charset
= char_charset (c
, charset_list
, &code
);
2239 dimension
= CHARSET_DIMENSION (charset
);
2240 emacs_mule_id
= CHARSET_EMACS_MULE_ID (charset
);
2241 EMACS_MULE_LEADING_CODES (emacs_mule_id
, leading_codes
);
2242 EMIT_ONE_BYTE (leading_codes
[0]);
2243 if (leading_codes
[1])
2244 EMIT_ONE_BYTE (leading_codes
[1]);
2246 EMIT_ONE_BYTE (code
| 0x80);
2250 EMIT_ONE_BYTE (code
>> 8);
2251 EMIT_ONE_BYTE (code
& 0xFF);
2255 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
2256 coding
->produced_char
+= produced_chars
;
2257 coding
->produced
= dst
- coding
->destination
;
2262 /*** 7. ISO2022 handlers ***/
2264 /* The following note describes the coding system ISO2022 briefly.
2265 Since the intention of this note is to help understand the
2266 functions in this file, some parts are NOT ACCURATE or are OVERLY
2267 SIMPLIFIED. For thorough understanding, please refer to the
2268 original document of ISO2022. This is equivalent to the standard
2269 ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2271 ISO2022 provides many mechanisms to encode several character sets
2272 in 7-bit and 8-bit environments. For 7-bit environments, all text
2273 is encoded using bytes less than 128. This may make the encoded
2274 text a little bit longer, but the text passes more easily through
2275 several types of gateway, some of which strip off the MSB (Most
2278 There are two kinds of character sets: control character sets and
2279 graphic character sets. The former contain control characters such
2280 as `newline' and `escape' to provide control functions (control
2281 functions are also provided by escape sequences). The latter
2282 contain graphic characters such as 'A' and '-'. Emacs recognizes
2283 two control character sets and many graphic character sets.
2285 Graphic character sets are classified into one of the following
2286 four classes, according to the number of bytes (DIMENSION) and
2287 number of characters in one dimension (CHARS) of the set:
2288 - DIMENSION1_CHARS94
2289 - DIMENSION1_CHARS96
2290 - DIMENSION2_CHARS94
2291 - DIMENSION2_CHARS96
2293 In addition, each character set is assigned an identification tag,
2294 unique for each set, called the "final character" (denoted as <F>
2295 hereafter). The <F> of each character set is decided by ECMA(*)
2296 when it is registered in ISO. The code range of <F> is 0x30..0x7F
2297 (0x30..0x3F are for private use only).
2299 Note (*): ECMA = European Computer Manufacturers Association
2301 Here are examples of graphic character sets [NAME(<F>)]:
2302 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2303 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2304 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2305 o DIMENSION2_CHARS96 -- none for the moment
2307 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2308 C0 [0x00..0x1F] -- control character plane 0
2309 GL [0x20..0x7F] -- graphic character plane 0
2310 C1 [0x80..0x9F] -- control character plane 1
2311 GR [0xA0..0xFF] -- graphic character plane 1
2313 A control character set is directly designated and invoked to C0 or
2314 C1 by an escape sequence. The most common case is that:
2315 - ISO646's control character set is designated/invoked to C0, and
2316 - ISO6429's control character set is designated/invoked to C1,
2317 and usually these designations/invocations are omitted in encoded
2318 text. In a 7-bit environment, only C0 can be used, and a control
2319 character for C1 is encoded by an appropriate escape sequence to
2320 fit into the environment. All control characters for C1 are
2321 defined to have corresponding escape sequences.
2323 A graphic character set is at first designated to one of four
2324 graphic registers (G0 through G3), then these graphic registers are
2325 invoked to GL or GR. These designations and invocations can be
2326 done independently. The most common case is that G0 is invoked to
2327 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually
2328 these invocations and designations are omitted in encoded text.
2329 In a 7-bit environment, only GL can be used.
2331 When a graphic character set of CHARS94 is invoked to GL, codes
2332 0x20 and 0x7F of the GL area work as control characters SPACE and
2333 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2336 There are two ways of invocation: locking-shift and single-shift.
2337 With locking-shift, the invocation lasts until the next different
2338 invocation, whereas with single-shift, the invocation affects the
2339 following character only and doesn't affect the locking-shift
2340 state. Invocations are done by the following control characters or
2343 ----------------------------------------------------------------------
2344 abbrev function cntrl escape seq description
2345 ----------------------------------------------------------------------
2346 SI/LS0 (shift-in) 0x0F none invoke G0 into GL
2347 SO/LS1 (shift-out) 0x0E none invoke G1 into GL
2348 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL
2349 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL
2350 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*)
2351 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*)
2352 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*)
2353 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char
2354 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char
2355 ----------------------------------------------------------------------
2356 (*) These are not used by any known coding system.
2358 Control characters for these functions are defined by macros
2359 ISO_CODE_XXX in `coding.h'.
2361 Designations are done by the following escape sequences:
2362 ----------------------------------------------------------------------
2363 escape sequence description
2364 ----------------------------------------------------------------------
2365 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0
2366 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1
2367 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2
2368 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3
2369 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*)
2370 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1
2371 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2
2372 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3
2373 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**)
2374 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1
2375 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2
2376 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3
2377 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*)
2378 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1
2379 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2
2380 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3
2381 ----------------------------------------------------------------------
2383 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2384 of dimension 1, chars 94, and final character <F>, etc...
2386 Note (*): Although these designations are not allowed in ISO2022,
2387 Emacs accepts them on decoding, and produces them on encoding
2388 CHARS96 character sets in a coding system which is characterized as
2389 7-bit environment, non-locking-shift, and non-single-shift.
2391 Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2392 '(' must be omitted. We refer to this as "short-form" hereafter.
2394 Now you may notice that there are a lot of ways of encoding the
2395 same multilingual text in ISO2022. Actually, there exist many
2396 coding systems such as Compound Text (used in X11's inter client
2397 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2398 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2399 localized platforms), and all of these are variants of ISO2022.
2401 In addition to the above, Emacs handles two more kinds of escape
2402 sequences: ISO6429's direction specification and Emacs' private
2403 sequence for specifying character composition.
2405 ISO6429's direction specification takes the following form:
2406 o CSI ']' -- end of the current direction
2407 o CSI '0' ']' -- end of the current direction
2408 o CSI '1' ']' -- start of left-to-right text
2409 o CSI '2' ']' -- start of right-to-left text
2410 The control character CSI (0x9B: control sequence introducer) is
2411 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2413 Character composition specification takes the following form:
2414 o ESC '0' -- start relative composition
2415 o ESC '1' -- end composition
2416 o ESC '2' -- start rule-base composition (*)
2417 o ESC '3' -- start relative composition with alternate chars (**)
2418 o ESC '4' -- start rule-base composition with alternate chars (**)
2419 Since these are not standard escape sequences of any ISO standard,
2420 the use of them with these meanings is restricted to Emacs only.
2422 (*) This form is used only in Emacs 20.7 and older versions,
2423 but newer versions can safely decode it.
2424 (**) This form is used only in Emacs 21.1 and newer versions,
2425 and older versions can't decode it.
2427 Here's a list of example usages of these composition escape
2428 sequences (categorized by `enum composition_method').
2430 COMPOSITION_RELATIVE:
2431 ESC 0 CHAR [ CHAR ] ESC 1
2432 COMPOSITION_WITH_RULE:
2433 ESC 2 CHAR [ RULE CHAR ] ESC 1
2434 COMPOSITION_WITH_ALTCHARS:
2435 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2436 COMPOSITION_WITH_RULE_ALTCHARS:
2437 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2439 enum iso_code_class_type iso_code_class
[256];
2441 #define SAFE_CHARSET_P(coding, id) \
2442 ((id) <= (coding)->max_charset_id \
2443 && (coding)->safe_charsets[id] >= 0)
2446 #define SHIFT_OUT_OK(category) \
2447 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2450 setup_iso_safe_charsets (attrs
)
2453 Lisp_Object charset_list
, safe_charsets
;
2454 Lisp_Object request
;
2455 Lisp_Object reg_usage
;
2458 int flags
= XINT (AREF (attrs
, coding_attr_iso_flags
));
2461 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
2462 if ((flags
& CODING_ISO_FLAG_FULL_SUPPORT
)
2463 && ! EQ (charset_list
, Viso_2022_charset_list
))
2465 CODING_ATTR_CHARSET_LIST (attrs
)
2466 = charset_list
= Viso_2022_charset_list
;
2467 ASET (attrs
, coding_attr_safe_charsets
, Qnil
);
2470 if (STRINGP (AREF (attrs
, coding_attr_safe_charsets
)))
2474 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
2476 int id
= XINT (XCAR (tail
));
2477 if (max_charset_id
< id
)
2478 max_charset_id
= id
;
2481 safe_charsets
= Fmake_string (make_number (max_charset_id
+ 1),
2483 request
= AREF (attrs
, coding_attr_iso_request
);
2484 reg_usage
= AREF (attrs
, coding_attr_iso_usage
);
2485 reg94
= XINT (XCAR (reg_usage
));
2486 reg96
= XINT (XCDR (reg_usage
));
2488 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
2492 struct charset
*charset
;
2495 charset
= CHARSET_FROM_ID (XINT (id
));
2496 reg
= Fcdr (Fassq (id
, request
));
2498 SSET (safe_charsets
, XINT (id
), XINT (reg
));
2499 else if (charset
->iso_chars_96
)
2502 SSET (safe_charsets
, XINT (id
), reg96
);
2507 SSET (safe_charsets
, XINT (id
), reg94
);
2510 ASET (attrs
, coding_attr_safe_charsets
, safe_charsets
);
2514 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2515 Check if a text is encoded in one of ISO-2022 based codig systems.
2516 If it is, return 1, else return 0. */
2519 detect_coding_iso_2022 (coding
, detect_info
)
2520 struct coding_system
*coding
;
2521 struct coding_detection_info
*detect_info
;
2523 const unsigned char *src
= coding
->source
, *src_base
= src
;
2524 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
2525 int multibytep
= coding
->src_multibyte
;
2526 int single_shifting
= 0;
2529 int consumed_chars
= 0;
2534 detect_info
->checked
|= CATEGORY_MASK_ISO
;
2536 for (i
= coding_category_iso_7
; i
<= coding_category_iso_8_else
; i
++)
2538 struct coding_system
*this = &(coding_categories
[i
]);
2539 Lisp_Object attrs
, val
;
2541 attrs
= CODING_ID_ATTRS (this->id
);
2542 if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2543 && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs
), Viso_2022_charset_list
))
2544 setup_iso_safe_charsets (attrs
);
2545 val
= CODING_ATTR_SAFE_CHARSETS (attrs
);
2546 this->max_charset_id
= SCHARS (val
) - 1;
2547 this->safe_charsets
= (char *) SDATA (val
);
2550 /* A coding system of this category is always ASCII compatible. */
2551 src
+= coding
->head_ascii
;
2553 while (rejected
!= CATEGORY_MASK_ISO
)
2560 if (inhibit_iso_escape_detection
)
2562 single_shifting
= 0;
2564 if (c
>= '(' && c
<= '/')
2566 /* Designation sequence for a charset of dimension 1. */
2568 if (c1
< ' ' || c1
>= 0x80
2569 || (id
= iso_charset_table
[0][c
>= ','][c1
]) < 0)
2570 /* Invalid designation sequence. Just ignore. */
2575 /* Designation sequence for a charset of dimension 2. */
2577 if (c
>= '@' && c
<= 'B')
2578 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
2579 id
= iso_charset_table
[1][0][c
];
2580 else if (c
>= '(' && c
<= '/')
2583 if (c1
< ' ' || c1
>= 0x80
2584 || (id
= iso_charset_table
[1][c
>= ','][c1
]) < 0)
2585 /* Invalid designation sequence. Just ignore. */
2589 /* Invalid designation sequence. Just ignore it. */
2592 else if (c
== 'N' || c
== 'O')
2594 /* ESC <Fe> for SS2 or SS3. */
2595 single_shifting
= 1;
2596 rejected
|= CATEGORY_MASK_ISO_7BIT
| CATEGORY_MASK_ISO_8BIT
;
2599 else if (c
>= '0' && c
<= '4')
2601 /* ESC <Fp> for start/end composition. */
2602 found
|= CATEGORY_MASK_ISO
;
2607 /* Invalid escape sequence. Just ignore it. */
2611 /* We found a valid designation sequence for CHARSET. */
2612 rejected
|= CATEGORY_MASK_ISO_8BIT
;
2613 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_7
],
2615 found
|= CATEGORY_MASK_ISO_7
;
2617 rejected
|= CATEGORY_MASK_ISO_7
;
2618 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_7_tight
],
2620 found
|= CATEGORY_MASK_ISO_7_TIGHT
;
2622 rejected
|= CATEGORY_MASK_ISO_7_TIGHT
;
2623 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_7_else
],
2625 found
|= CATEGORY_MASK_ISO_7_ELSE
;
2627 rejected
|= CATEGORY_MASK_ISO_7_ELSE
;
2628 if (SAFE_CHARSET_P (&coding_categories
[coding_category_iso_8_else
],
2630 found
|= CATEGORY_MASK_ISO_8_ELSE
;
2632 rejected
|= CATEGORY_MASK_ISO_8_ELSE
;
2637 /* Locking shift out/in. */
2638 if (inhibit_iso_escape_detection
)
2640 single_shifting
= 0;
2641 rejected
|= CATEGORY_MASK_ISO_7BIT
| CATEGORY_MASK_ISO_8BIT
;
2642 found
|= CATEGORY_MASK_ISO_ELSE
;
2646 /* Control sequence introducer. */
2647 single_shifting
= 0;
2648 rejected
|= CATEGORY_MASK_ISO_7BIT
| CATEGORY_MASK_ISO_7_ELSE
;
2649 found
|= CATEGORY_MASK_ISO_8_ELSE
;
2650 goto check_extra_latin
;
2655 if (inhibit_iso_escape_detection
)
2657 single_shifting
= 0;
2658 rejected
|= CATEGORY_MASK_ISO_7BIT
;
2659 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_1
])
2660 & CODING_ISO_FLAG_SINGLE_SHIFT
)
2661 found
|= CATEGORY_MASK_ISO_8_1
, single_shifting
= 1;
2662 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_2
])
2663 & CODING_ISO_FLAG_SINGLE_SHIFT
)
2664 found
|= CATEGORY_MASK_ISO_8_2
, single_shifting
= 1;
2665 if (single_shifting
)
2667 goto check_extra_latin
;
2674 single_shifting
= 0;
2679 rejected
|= CATEGORY_MASK_ISO_7BIT
| CATEGORY_MASK_ISO_7_ELSE
;
2680 found
|= CATEGORY_MASK_ISO_8_1
;
2681 /* Check the length of succeeding codes of the range
2682 0xA0..0FF. If the byte length is even, we include
2683 CATEGORY_MASK_ISO_8_2 in `found'. We can check this
2684 only when we are not single shifting. */
2685 if (! single_shifting
2686 && ! (rejected
& CATEGORY_MASK_ISO_8_2
))
2689 while (src
< src_end
)
2697 if (i
& 1 && src
< src_end
)
2698 rejected
|= CATEGORY_MASK_ISO_8_2
;
2700 found
|= CATEGORY_MASK_ISO_8_2
;
2705 single_shifting
= 0;
2706 if (! VECTORP (Vlatin_extra_code_table
)
2707 || NILP (XVECTOR (Vlatin_extra_code_table
)->contents
[c
]))
2709 rejected
= CATEGORY_MASK_ISO
;
2712 if (CODING_ISO_FLAGS (&coding_categories
[coding_category_iso_8_1
])
2713 & CODING_ISO_FLAG_LATIN_EXTRA
)
2714 found
|= CATEGORY_MASK_ISO_8_1
;
2716 rejected
|= CATEGORY_MASK_ISO_8_1
;
2717 rejected
|= CATEGORY_MASK_ISO_8_2
;
2720 detect_info
->rejected
|= CATEGORY_MASK_ISO
;
2724 detect_info
->rejected
|= rejected
;
2725 detect_info
->found
|= (found
& ~rejected
);
2730 /* Set designation state into CODING. */
2731 #define DECODE_DESIGNATION(reg, dim, chars_96, final) \
2735 if (final < '0' || final >= 128 \
2736 || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0) \
2737 || !SAFE_CHARSET_P (coding, id)) \
2739 CODING_ISO_DESIGNATION (coding, reg) = -2; \
2740 goto invalid_code; \
2742 prev = CODING_ISO_DESIGNATION (coding, reg); \
2743 if (id == charset_jisx0201_roman) \
2745 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
2746 id = charset_ascii; \
2748 else if (id == charset_jisx0208_1978) \
2750 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
2751 id = charset_jisx0208; \
2753 CODING_ISO_DESIGNATION (coding, reg) = id; \
2754 /* If there was an invalid designation to REG previously, and this \
2755 designation is ASCII to REG, we should keep this designation \
2757 if (prev == -2 && id == charset_ascii) \
2758 goto invalid_code; \
2762 #define MAYBE_FINISH_COMPOSITION() \
2765 if (composition_state == COMPOSING_NO) \
2767 /* It is assured that we have enough room for producing \
2768 characters stored in the table `components'. */ \
2769 if (charbuf + component_idx > charbuf_end) \
2770 goto no_more_source; \
2771 composition_state = COMPOSING_NO; \
2772 if (method == COMPOSITION_RELATIVE \
2773 || method == COMPOSITION_WITH_ALTCHARS) \
2775 for (i = 0; i < component_idx; i++) \
2776 *charbuf++ = components[i]; \
2777 char_offset += component_idx; \
2781 for (i = 0; i < component_idx; i += 2) \
2782 *charbuf++ = components[i]; \
2783 char_offset += (component_idx / 2) + 1; \
2788 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2789 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2790 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
2791 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2792 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
2795 #define DECODE_COMPOSITION_START(c1) \
2798 && composition_state == COMPOSING_COMPONENT_RULE) \
2800 component_len = component_idx; \
2801 composition_state = COMPOSING_CHAR; \
2805 const unsigned char *p; \
2807 MAYBE_FINISH_COMPOSITION (); \
2808 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \
2809 goto no_more_source; \
2810 for (p = src; p < src_end - 1; p++) \
2811 if (*p == ISO_CODE_ESC && p[1] == '1') \
2813 if (p == src_end - 1) \
2815 if (coding->mode & CODING_MODE_LAST_BLOCK) \
2816 goto invalid_code; \
2817 goto no_more_source; \
2820 /* This is surely the start of a composition. */ \
2821 method = (c1 == '0' ? COMPOSITION_RELATIVE \
2822 : c1 == '2' ? COMPOSITION_WITH_RULE \
2823 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
2824 : COMPOSITION_WITH_RULE_ALTCHARS); \
2825 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
2826 : COMPOSING_COMPONENT_CHAR); \
2827 component_idx = component_len = 0; \
2832 /* Handle compositoin end sequence ESC 1. */
2834 #define DECODE_COMPOSITION_END() \
2836 int nchars = (component_len > 0 ? component_idx - component_len \
2837 : method == COMPOSITION_RELATIVE ? component_idx \
2838 : (component_idx + 1) / 2); \
2840 int *saved_charbuf = charbuf; \
2842 ADD_COMPOSITION_DATA (charbuf, nchars, method); \
2843 if (method != COMPOSITION_RELATIVE) \
2845 if (component_len == 0) \
2846 for (i = 0; i < component_idx; i++) \
2847 *charbuf++ = components[i]; \
2849 for (i = 0; i < component_len; i++) \
2850 *charbuf++ = components[i]; \
2851 *saved_charbuf = saved_charbuf - charbuf; \
2853 if (method == COMPOSITION_WITH_RULE) \
2854 for (i = 0; i < component_idx; i += 2, char_offset++) \
2855 *charbuf++ = components[i]; \
2857 for (i = component_len; i < component_idx; i++, char_offset++) \
2858 *charbuf++ = components[i]; \
2859 coding->annotated = 1; \
2860 composition_state = COMPOSING_NO; \
2864 /* Decode a composition rule from the byte C1 (and maybe one more byte
2865 from SRC) and store one encoded composition rule in
2866 coding->cmp_data. */
2868 #define DECODE_COMPOSITION_RULE(c1) \
2871 if (c1 < 81) /* old format (before ver.21) */ \
2873 int gref = (c1) / 9; \
2874 int nref = (c1) % 9; \
2875 if (gref == 4) gref = 10; \
2876 if (nref == 4) nref = 10; \
2877 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
2879 else if (c1 < 93) /* new format (after ver.21) */ \
2881 ONE_MORE_BYTE (c2); \
2882 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
2889 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2892 decode_coding_iso_2022 (coding
)
2893 struct coding_system
*coding
;
2895 const unsigned char *src
= coding
->source
+ coding
->consumed
;
2896 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
2897 const unsigned char *src_base
;
2898 int *charbuf
= coding
->charbuf
+ coding
->charbuf_used
;
2900 = coding
->charbuf
+ coding
->charbuf_size
- 4 - MAX_ANNOTATION_LENGTH
;
2901 int consumed_chars
= 0, consumed_chars_base
;
2902 int multibytep
= coding
->src_multibyte
;
2903 /* Charsets invoked to graphic plane 0 and 1 respectively. */
2904 int charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
2905 int charset_id_1
= CODING_ISO_INVOKED_CHARSET (coding
, 1);
2906 struct charset
*charset
;
2908 /* For handling composition sequence. */
2909 #define COMPOSING_NO 0
2910 #define COMPOSING_CHAR 1
2911 #define COMPOSING_RULE 2
2912 #define COMPOSING_COMPONENT_CHAR 3
2913 #define COMPOSING_COMPONENT_RULE 4
2915 int composition_state
= COMPOSING_NO
;
2916 enum composition_method method
;
2917 int components
[MAX_COMPOSITION_COMPONENTS
* 2 + 1];
2920 Lisp_Object attrs
, charset_list
;
2921 int char_offset
= coding
->produced_char
;
2922 int last_offset
= char_offset
;
2923 int last_id
= charset_ascii
;
2925 CODING_GET_INFO (coding
, attrs
, charset_list
);
2926 setup_iso_safe_charsets (attrs
);
2933 consumed_chars_base
= consumed_chars
;
2935 if (charbuf
>= charbuf_end
)
2942 /* We produce at most one character. */
2943 switch (iso_code_class
[c1
])
2945 case ISO_0x20_or_0x7F
:
2946 if (composition_state
!= COMPOSING_NO
)
2948 if (composition_state
== COMPOSING_RULE
2949 || composition_state
== COMPOSING_COMPONENT_RULE
)
2951 DECODE_COMPOSITION_RULE (c1
);
2952 components
[component_idx
++] = c1
;
2953 composition_state
--;
2957 if (charset_id_0
< 0
2958 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0
)))
2959 /* This is SPACE or DEL. */
2960 charset
= CHARSET_FROM_ID (charset_ascii
);
2962 charset
= CHARSET_FROM_ID (charset_id_0
);
2965 case ISO_graphic_plane_0
:
2966 if (composition_state
!= COMPOSING_NO
)
2968 if (composition_state
== COMPOSING_RULE
2969 || composition_state
== COMPOSING_COMPONENT_RULE
)
2971 DECODE_COMPOSITION_RULE (c1
);
2972 components
[component_idx
++] = c1
;
2973 composition_state
--;
2977 charset
= CHARSET_FROM_ID (charset_id_0
);
2980 case ISO_0xA0_or_0xFF
:
2981 if (charset_id_1
< 0
2982 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1
))
2983 || CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SEVEN_BITS
)
2985 /* This is a graphic character, we fall down ... */
2987 case ISO_graphic_plane_1
:
2988 if (charset_id_1
< 0)
2990 charset
= CHARSET_FROM_ID (charset_id_1
);
2994 MAYBE_FINISH_COMPOSITION ();
2995 charset
= CHARSET_FROM_ID (charset_ascii
);
2999 MAYBE_FINISH_COMPOSITION ();
3003 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
)
3004 || CODING_ISO_DESIGNATION (coding
, 1) < 0)
3006 CODING_ISO_INVOCATION (coding
, 0) = 1;
3007 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3011 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
))
3013 CODING_ISO_INVOCATION (coding
, 0) = 0;
3014 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3017 case ISO_single_shift_2_7
:
3018 case ISO_single_shift_2
:
3019 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
))
3021 /* SS2 is handled as an escape sequence of ESC 'N' */
3023 goto label_escape_sequence
;
3025 case ISO_single_shift_3
:
3026 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
))
3028 /* SS2 is handled as an escape sequence of ESC 'O' */
3030 goto label_escape_sequence
;
3032 case ISO_control_sequence_introducer
:
3033 /* CSI is handled as an escape sequence of ESC '[' ... */
3035 goto label_escape_sequence
;
3039 label_escape_sequence
:
3040 /* Escape sequences handled here are invocation,
3041 designation, direction specification, and character
3042 composition specification. */
3045 case '&': /* revision of following character set */
3047 if (!(c1
>= '@' && c1
<= '~'))
3050 if (c1
!= ISO_CODE_ESC
)
3053 goto label_escape_sequence
;
3055 case '$': /* designation of 2-byte character set */
3056 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATION
))
3059 if (c1
>= '@' && c1
<= 'B')
3060 { /* designation of JISX0208.1978, GB2312.1980,
3062 DECODE_DESIGNATION (0, 2, 0, c1
);
3064 else if (c1
>= 0x28 && c1
<= 0x2B)
3065 { /* designation of DIMENSION2_CHARS94 character set */
3067 DECODE_DESIGNATION (c1
- 0x28, 2, 0, c2
);
3069 else if (c1
>= 0x2C && c1
<= 0x2F)
3070 { /* designation of DIMENSION2_CHARS96 character set */
3072 DECODE_DESIGNATION (c1
- 0x2C, 2, 1, c2
);
3076 /* We must update these variables now. */
3077 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3078 charset_id_1
= CODING_ISO_INVOKED_CHARSET (coding
, 1);
3081 case 'n': /* invocation of locking-shift-2 */
3082 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
)
3083 || CODING_ISO_DESIGNATION (coding
, 2) < 0)
3085 CODING_ISO_INVOCATION (coding
, 0) = 2;
3086 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3089 case 'o': /* invocation of locking-shift-3 */
3090 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_LOCKING_SHIFT
)
3091 || CODING_ISO_DESIGNATION (coding
, 3) < 0)
3093 CODING_ISO_INVOCATION (coding
, 0) = 3;
3094 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3097 case 'N': /* invocation of single-shift-2 */
3098 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3099 || CODING_ISO_DESIGNATION (coding
, 2) < 0)
3101 charset
= CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding
, 2));
3103 if (c1
< 0x20 || (c1
>= 0x80 && c1
< 0xA0))
3107 case 'O': /* invocation of single-shift-3 */
3108 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3109 || CODING_ISO_DESIGNATION (coding
, 3) < 0)
3111 charset
= CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding
, 3));
3113 if (c1
< 0x20 || (c1
>= 0x80 && c1
< 0xA0))
3117 case '0': case '2': case '3': case '4': /* start composition */
3118 if (! (coding
->common_flags
& CODING_ANNOTATE_COMPOSITION_MASK
))
3120 DECODE_COMPOSITION_START (c1
);
3123 case '1': /* end composition */
3124 if (composition_state
== COMPOSING_NO
)
3126 DECODE_COMPOSITION_END ();
3129 case '[': /* specification of direction */
3130 if (! CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DIRECTION
)
3132 /* For the moment, nested direction is not supported.
3133 So, `coding->mode & CODING_MODE_DIRECTION' zero means
3134 left-to-right, and nozero means right-to-left. */
3138 case ']': /* end of the current direction */
3139 coding
->mode
&= ~CODING_MODE_DIRECTION
;
3141 case '0': /* end of the current direction */
3142 case '1': /* start of left-to-right direction */
3145 coding
->mode
&= ~CODING_MODE_DIRECTION
;
3150 case '2': /* start of right-to-left direction */
3153 coding
->mode
|= CODING_MODE_DIRECTION
;
3167 /* CTEXT extended segment:
3168 ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3169 We keep these bytes as is for the moment.
3170 They may be decoded by post-read-conversion. */
3174 ONE_MORE_BYTE (dim
);
3177 size
= ((M
- 128) * 128) + (L
- 128);
3178 if (charbuf
+ 8 + size
> charbuf_end
)
3180 *charbuf
++ = ISO_CODE_ESC
;
3184 *charbuf
++ = BYTE8_TO_CHAR (M
);
3185 *charbuf
++ = BYTE8_TO_CHAR (L
);
3189 *charbuf
++ = ASCII_BYTE_P (c1
) ? c1
: BYTE8_TO_CHAR (c1
);
3194 /* XFree86 extension for embedding UTF-8 in CTEXT:
3195 ESC % G --UTF-8-BYTES-- ESC % @
3196 We keep these bytes as is for the moment.
3197 They may be decoded by post-read-conversion. */
3200 if (p
+ 6 > charbuf_end
)
3202 *p
++ = ISO_CODE_ESC
;
3205 while (p
< charbuf_end
)
3208 if (c1
== ISO_CODE_ESC
3209 && src
+ 1 < src_end
3213 *p
++ = ASCII_BYTE_P (c1
) ? c1
: BYTE8_TO_CHAR (c1
);
3215 if (p
+ 3 > charbuf_end
)
3217 *p
++ = ISO_CODE_ESC
;
3228 if (! (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATION
))
3230 if (c1
>= 0x28 && c1
<= 0x2B)
3231 { /* designation of DIMENSION1_CHARS94 character set */
3233 DECODE_DESIGNATION (c1
- 0x28, 1, 0, c2
);
3235 else if (c1
>= 0x2C && c1
<= 0x2F)
3236 { /* designation of DIMENSION1_CHARS96 character set */
3238 DECODE_DESIGNATION (c1
- 0x2C, 1, 1, c2
);
3242 /* We must update these variables now. */
3243 charset_id_0
= CODING_ISO_INVOKED_CHARSET (coding
, 0);
3244 charset_id_1
= CODING_ISO_INVOKED_CHARSET (coding
, 1);
3249 if (charset
->id
!= charset_ascii
3250 && last_id
!= charset
->id
)
3252 if (last_id
!= charset_ascii
)
3253 ADD_CHARSET_DATA (charbuf
, char_offset
- last_offset
, last_id
);
3254 last_id
= charset
->id
;
3255 last_offset
= char_offset
;
3258 /* Now we know CHARSET and 1st position code C1 of a character.
3259 Produce a decoded character while getting 2nd position code
3262 if (CHARSET_DIMENSION (charset
) > 1)
3265 if (c2
< 0x20 || (c2
>= 0x80 && c2
< 0xA0))
3266 /* C2 is not in a valid range. */
3268 c1
= (c1
<< 8) | (c2
& 0x7F);
3269 if (CHARSET_DIMENSION (charset
) > 2)
3272 if (c2
< 0x20 || (c2
>= 0x80 && c2
< 0xA0))
3273 /* C2 is not in a valid range. */
3275 c1
= (c1
<< 8) | (c2
& 0x7F);
3279 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c1
, c
);
3282 MAYBE_FINISH_COMPOSITION ();
3283 for (; src_base
< src
; src_base
++, char_offset
++)
3285 if (ASCII_BYTE_P (*src_base
))
3286 *charbuf
++ = *src_base
;
3288 *charbuf
++ = BYTE8_TO_CHAR (*src_base
);
3291 else if (composition_state
== COMPOSING_NO
)
3298 components
[component_idx
++] = c
;
3299 if (method
== COMPOSITION_WITH_RULE
3300 || (method
== COMPOSITION_WITH_RULE_ALTCHARS
3301 && composition_state
== COMPOSING_COMPONENT_CHAR
))
3302 composition_state
++;
3307 MAYBE_FINISH_COMPOSITION ();
3309 consumed_chars
= consumed_chars_base
;
3311 *charbuf
++ = c
< 0 ? -c
: ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
3321 if (last_id
!= charset_ascii
)
3322 ADD_CHARSET_DATA (charbuf
, char_offset
- last_offset
, last_id
);
3323 coding
->consumed_char
+= consumed_chars_base
;
3324 coding
->consumed
= src_base
- coding
->source
;
3325 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
3329 /* ISO2022 encoding stuff. */
3332 It is not enough to say just "ISO2022" on encoding, we have to
3333 specify more details. In Emacs, each coding system of ISO2022
3334 variant has the following specifications:
3335 1. Initial designation to G0 thru G3.
3336 2. Allows short-form designation?
3337 3. ASCII should be designated to G0 before control characters?
3338 4. ASCII should be designated to G0 at end of line?
3339 5. 7-bit environment or 8-bit environment?
3340 6. Use locking-shift?
3341 7. Use Single-shift?
3342 And the following two are only for Japanese:
3343 8. Use ASCII in place of JIS0201-1976-Roman?
3344 9. Use JISX0208-1983 in place of JISX0208-1978?
3345 These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3346 defined by macros CODING_ISO_FLAG_XXX. See `coding.h' for more
3350 /* Produce codes (escape sequence) for designating CHARSET to graphic
3351 register REG at DST, and increment DST. If <final-char> of CHARSET is
3352 '@', 'A', or 'B' and the coding system CODING allows, produce
3353 designation sequence of short-form. */
3355 #define ENCODE_DESIGNATION(charset, reg, coding) \
3357 unsigned char final_char = CHARSET_ISO_FINAL (charset); \
3358 char *intermediate_char_94 = "()*+"; \
3359 char *intermediate_char_96 = ",-./"; \
3360 int revision = -1; \
3363 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
3364 revision = CHARSET_ISO_REVISION (charset); \
3366 if (revision >= 0) \
3368 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&'); \
3369 EMIT_ONE_BYTE ('@' + revision); \
3371 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
3372 if (CHARSET_DIMENSION (charset) == 1) \
3374 if (! CHARSET_ISO_CHARS_96 (charset)) \
3375 c = intermediate_char_94[reg]; \
3377 c = intermediate_char_96[reg]; \
3378 EMIT_ONE_ASCII_BYTE (c); \
3382 EMIT_ONE_ASCII_BYTE ('$'); \
3383 if (! CHARSET_ISO_CHARS_96 (charset)) \
3385 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM \
3387 || final_char < '@' || final_char > 'B') \
3388 EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]); \
3391 EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]); \
3393 EMIT_ONE_ASCII_BYTE (final_char); \
3395 CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset); \
3399 /* The following two macros produce codes (control character or escape
3400 sequence) for ISO2022 single-shift functions (single-shift-2 and
3403 #define ENCODE_SINGLE_SHIFT_2 \
3405 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3406 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N'); \
3408 EMIT_ONE_BYTE (ISO_CODE_SS2); \
3409 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
3413 #define ENCODE_SINGLE_SHIFT_3 \
3415 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3416 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O'); \
3418 EMIT_ONE_BYTE (ISO_CODE_SS3); \
3419 CODING_ISO_SINGLE_SHIFTING (coding) = 1; \
3423 /* The following four macros produce codes (control character or
3424 escape sequence) for ISO2022 locking-shift functions (shift-in,
3425 shift-out, locking-shift-2, and locking-shift-3). */
3427 #define ENCODE_SHIFT_IN \
3429 EMIT_ONE_ASCII_BYTE (ISO_CODE_SI); \
3430 CODING_ISO_INVOCATION (coding, 0) = 0; \
3434 #define ENCODE_SHIFT_OUT \
3436 EMIT_ONE_ASCII_BYTE (ISO_CODE_SO); \
3437 CODING_ISO_INVOCATION (coding, 0) = 1; \
3441 #define ENCODE_LOCKING_SHIFT_2 \
3443 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3444 CODING_ISO_INVOCATION (coding, 0) = 2; \
3448 #define ENCODE_LOCKING_SHIFT_3 \
3450 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n'); \
3451 CODING_ISO_INVOCATION (coding, 0) = 3; \
3455 /* Produce codes for a DIMENSION1 character whose character set is
3456 CHARSET and whose position-code is C1. Designation and invocation
3457 sequences are also produced in advance if necessary. */
3459 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
3461 int id = CHARSET_ID (charset); \
3463 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN) \
3464 && id == charset_ascii) \
3466 id = charset_jisx0201_roman; \
3467 charset = CHARSET_FROM_ID (id); \
3470 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
3472 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3473 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
3475 EMIT_ONE_BYTE (c1 | 0x80); \
3476 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
3479 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
3481 EMIT_ONE_ASCII_BYTE (c1 & 0x7F); \
3484 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
3486 EMIT_ONE_BYTE (c1 | 0x80); \
3490 /* Since CHARSET is not yet invoked to any graphic planes, we \
3491 must invoke it, or, at first, designate it to some graphic \
3492 register. Then repeat the loop to actually produce the \
3494 dst = encode_invocation_designation (charset, coding, dst, \
3499 /* Produce codes for a DIMENSION2 character whose character set is
3500 CHARSET and whose position-codes are C1 and C2. Designation and
3501 invocation codes are also produced in advance if necessary. */
3503 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
3505 int id = CHARSET_ID (charset); \
3507 if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS) \
3508 && id == charset_jisx0208) \
3510 id = charset_jisx0208_1978; \
3511 charset = CHARSET_FROM_ID (id); \
3514 if (CODING_ISO_SINGLE_SHIFTING (coding)) \
3516 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) \
3517 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
3519 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3520 CODING_ISO_SINGLE_SHIFTING (coding) = 0; \
3523 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0)) \
3525 EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F); \
3528 else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1)) \
3530 EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80); \
3534 /* Since CHARSET is not yet invoked to any graphic planes, we \
3535 must invoke it, or, at first, designate it to some graphic \
3536 register. Then repeat the loop to actually produce the \
3538 dst = encode_invocation_designation (charset, coding, dst, \
3543 #define ENCODE_ISO_CHARACTER(charset, c) \
3545 int code = ENCODE_CHAR ((charset),(c)); \
3547 if (CHARSET_DIMENSION (charset) == 1) \
3548 ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \
3550 ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3554 /* Produce designation and invocation codes at a place pointed by DST
3555 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
3559 encode_invocation_designation (charset
, coding
, dst
, p_nchars
)
3560 struct charset
*charset
;
3561 struct coding_system
*coding
;
3565 int multibytep
= coding
->dst_multibyte
;
3566 int produced_chars
= *p_nchars
;
3567 int reg
; /* graphic register number */
3568 int id
= CHARSET_ID (charset
);
3570 /* At first, check designations. */
3571 for (reg
= 0; reg
< 4; reg
++)
3572 if (id
== CODING_ISO_DESIGNATION (coding
, reg
))
3577 /* CHARSET is not yet designated to any graphic registers. */
3578 /* At first check the requested designation. */
3579 reg
= CODING_ISO_REQUEST (coding
, id
);
3581 /* Since CHARSET requests no special designation, designate it
3582 to graphic register 0. */
3585 ENCODE_DESIGNATION (charset
, reg
, coding
);
3588 if (CODING_ISO_INVOCATION (coding
, 0) != reg
3589 && CODING_ISO_INVOCATION (coding
, 1) != reg
)
3591 /* Since the graphic register REG is not invoked to any graphic
3592 planes, invoke it to graphic plane 0. */
3595 case 0: /* graphic register 0 */
3599 case 1: /* graphic register 1 */
3603 case 2: /* graphic register 2 */
3604 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3605 ENCODE_SINGLE_SHIFT_2
;
3607 ENCODE_LOCKING_SHIFT_2
;
3610 case 3: /* graphic register 3 */
3611 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_SINGLE_SHIFT
)
3612 ENCODE_SINGLE_SHIFT_3
;
3614 ENCODE_LOCKING_SHIFT_3
;
3619 *p_nchars
= produced_chars
;
3623 /* The following three macros produce codes for indicating direction
3625 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
3627 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
3628 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
3630 EMIT_ONE_BYTE (ISO_CODE_CSI); \
3634 #define ENCODE_DIRECTION_R2L() \
3636 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3637 EMIT_TWO_ASCII_BYTES ('2', ']'); \
3641 #define ENCODE_DIRECTION_L2R() \
3643 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
3644 EMIT_TWO_ASCII_BYTES ('0', ']'); \
3648 /* Produce codes for designation and invocation to reset the graphic
3649 planes and registers to initial state. */
3650 #define ENCODE_RESET_PLANE_AND_REGISTER() \
3653 struct charset *charset; \
3655 if (CODING_ISO_INVOCATION (coding, 0) != 0) \
3657 for (reg = 0; reg < 4; reg++) \
3658 if (CODING_ISO_INITIAL (coding, reg) >= 0 \
3659 && (CODING_ISO_DESIGNATION (coding, reg) \
3660 != CODING_ISO_INITIAL (coding, reg))) \
3662 charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3663 ENCODE_DESIGNATION (charset, reg, coding); \
3668 /* Produce designation sequences of charsets in the line started from
3669 SRC to a place pointed by DST, and return updated DST.
3671 If the current block ends before any end-of-line, we may fail to
3672 find all the necessary designations. */
3674 static unsigned char *
3675 encode_designation_at_bol (coding
, charbuf
, charbuf_end
, dst
)
3676 struct coding_system
*coding
;
3677 int *charbuf
, *charbuf_end
;
3680 struct charset
*charset
;
3681 /* Table of charsets to be designated to each graphic register. */
3683 int c
, found
= 0, reg
;
3684 int produced_chars
= 0;
3685 int multibytep
= coding
->dst_multibyte
;
3687 Lisp_Object charset_list
;
3689 attrs
= CODING_ID_ATTRS (coding
->id
);
3690 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
3691 if (EQ (charset_list
, Qiso_2022
))
3692 charset_list
= Viso_2022_charset_list
;
3694 for (reg
= 0; reg
< 4; reg
++)
3704 charset
= char_charset (c
, charset_list
, NULL
);
3705 id
= CHARSET_ID (charset
);
3706 reg
= CODING_ISO_REQUEST (coding
, id
);
3707 if (reg
>= 0 && r
[reg
] < 0)
3716 for (reg
= 0; reg
< 4; reg
++)
3718 && CODING_ISO_DESIGNATION (coding
, reg
) != r
[reg
])
3719 ENCODE_DESIGNATION (CHARSET_FROM_ID (r
[reg
]), reg
, coding
);
3725 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
3728 encode_coding_iso_2022 (coding
)
3729 struct coding_system
*coding
;
3731 int multibytep
= coding
->dst_multibyte
;
3732 int *charbuf
= coding
->charbuf
;
3733 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
3734 unsigned char *dst
= coding
->destination
+ coding
->produced
;
3735 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
3738 = (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3739 && CODING_ISO_BOL (coding
));
3740 int produced_chars
= 0;
3741 Lisp_Object attrs
, eol_type
, charset_list
;
3742 int ascii_compatible
;
3744 int preferred_charset_id
= -1;
3746 CODING_GET_INFO (coding
, attrs
, charset_list
);
3747 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
3748 if (VECTORP (eol_type
))
3751 setup_iso_safe_charsets (attrs
);
3752 /* Charset list may have been changed. */
3753 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
); \
3754 coding
->safe_charsets
= (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs
));
3756 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
3758 while (charbuf
< charbuf_end
)
3760 ASSURE_DESTINATION (safe_room
);
3762 if (bol_designation
)
3764 unsigned char *dst_prev
= dst
;
3766 /* We have to produce designation sequences if any now. */
3767 dst
= encode_designation_at_bol (coding
, charbuf
, charbuf_end
, dst
);
3768 bol_designation
= 0;
3769 /* We are sure that designation sequences are all ASCII bytes. */
3770 produced_chars
+= dst
- dst_prev
;
3777 /* Handle an annotation. */
3780 case CODING_ANNOTATE_COMPOSITION_MASK
:
3781 /* Not yet implemented. */
3783 case CODING_ANNOTATE_CHARSET_MASK
:
3784 preferred_charset_id
= charbuf
[3];
3785 if (preferred_charset_id
>= 0
3786 && NILP (Fmemq (make_number (preferred_charset_id
),
3788 preferred_charset_id
= -1;
3797 /* Now encode the character C. */
3798 if (c
< 0x20 || c
== 0x7F)
3801 || (c
== '\r' && EQ (eol_type
, Qmac
)))
3803 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_RESET_AT_EOL
)
3804 ENCODE_RESET_PLANE_AND_REGISTER ();
3805 if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_INIT_AT_BOL
)
3809 for (i
= 0; i
< 4; i
++)
3810 CODING_ISO_DESIGNATION (coding
, i
)
3811 = CODING_ISO_INITIAL (coding
, i
);
3814 = CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
;
3816 else if (CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_RESET_AT_CNTL
)
3817 ENCODE_RESET_PLANE_AND_REGISTER ();
3818 EMIT_ONE_ASCII_BYTE (c
);
3820 else if (ASCII_CHAR_P (c
))
3822 if (ascii_compatible
)
3823 EMIT_ONE_ASCII_BYTE (c
);
3826 struct charset
*charset
= CHARSET_FROM_ID (charset_ascii
);
3827 ENCODE_ISO_CHARACTER (charset
, c
);
3830 else if (CHAR_BYTE8_P (c
))
3832 c
= CHAR_TO_BYTE8 (c
);
3837 struct charset
*charset
;
3839 if (preferred_charset_id
>= 0)
3841 charset
= CHARSET_FROM_ID (preferred_charset_id
);
3842 if (! CHAR_CHARSET_P (c
, charset
))
3843 charset
= char_charset (c
, charset_list
, NULL
);
3846 charset
= char_charset (c
, charset_list
, NULL
);
3849 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
3851 c
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
3852 charset
= CHARSET_FROM_ID (charset_ascii
);
3856 c
= coding
->default_char
;
3857 charset
= char_charset (c
, charset_list
, NULL
);
3860 ENCODE_ISO_CHARACTER (charset
, c
);
3864 if (coding
->mode
& CODING_MODE_LAST_BLOCK
3865 && CODING_ISO_FLAGS (coding
) & CODING_ISO_FLAG_RESET_AT_EOL
)
3867 ASSURE_DESTINATION (safe_room
);
3868 ENCODE_RESET_PLANE_AND_REGISTER ();
3870 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
3871 CODING_ISO_BOL (coding
) = bol_designation
;
3872 coding
->produced_char
+= produced_chars
;
3873 coding
->produced
= dst
- coding
->destination
;
3878 /*** 8,9. SJIS and BIG5 handlers ***/
3880 /* Although SJIS and BIG5 are not ISO's coding system, they are used
3881 quite widely. So, for the moment, Emacs supports them in the bare
3882 C code. But, in the future, they may be supported only by CCL. */
3884 /* SJIS is a coding system encoding three character sets: ASCII, right
3885 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded
3886 as is. A character of charset katakana-jisx0201 is encoded by
3887 "position-code + 0x80". A character of charset japanese-jisx0208
3888 is encoded in 2-byte but two position-codes are divided and shifted
3889 so that it fit in the range below.
3891 --- CODE RANGE of SJIS ---
3892 (character set) (range)
3894 KATAKANA-JISX0201 0xA0 .. 0xDF
3895 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
3896 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
3897 -------------------------------
3901 /* BIG5 is a coding system encoding two character sets: ASCII and
3902 Big5. An ASCII character is encoded as is. Big5 is a two-byte
3903 character set and is encoded in two-byte.
3905 --- CODE RANGE of BIG5 ---
3906 (character set) (range)
3908 Big5 (1st byte) 0xA1 .. 0xFE
3909 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
3910 --------------------------
3914 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3915 Check if a text is encoded in SJIS. If it is, return
3916 CATEGORY_MASK_SJIS, else return 0. */
3919 detect_coding_sjis (coding
, detect_info
)
3920 struct coding_system
*coding
;
3921 struct coding_detection_info
*detect_info
;
3923 const unsigned char *src
= coding
->source
, *src_base
;
3924 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3925 int multibytep
= coding
->src_multibyte
;
3926 int consumed_chars
= 0;
3930 detect_info
->checked
|= CATEGORY_MASK_SJIS
;
3931 /* A coding system of this category is always ASCII compatible. */
3932 src
+= coding
->head_ascii
;
3940 if ((c
>= 0x81 && c
<= 0x9F) || (c
>= 0xE0 && c
<= 0xEF))
3943 if (c
< 0x40 || c
== 0x7F || c
> 0xFC)
3945 found
= CATEGORY_MASK_SJIS
;
3947 else if (c
>= 0xA0 && c
< 0xE0)
3948 found
= CATEGORY_MASK_SJIS
;
3952 detect_info
->rejected
|= CATEGORY_MASK_SJIS
;
3956 if (src_base
< src
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
3958 detect_info
->rejected
|= CATEGORY_MASK_SJIS
;
3961 detect_info
->found
|= found
;
3965 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3966 Check if a text is encoded in BIG5. If it is, return
3967 CATEGORY_MASK_BIG5, else return 0. */
3970 detect_coding_big5 (coding
, detect_info
)
3971 struct coding_system
*coding
;
3972 struct coding_detection_info
*detect_info
;
3974 const unsigned char *src
= coding
->source
, *src_base
;
3975 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
3976 int multibytep
= coding
->src_multibyte
;
3977 int consumed_chars
= 0;
3981 detect_info
->checked
|= CATEGORY_MASK_BIG5
;
3982 /* A coding system of this category is always ASCII compatible. */
3983 src
+= coding
->head_ascii
;
3994 if (c
< 0x40 || (c
>= 0x7F && c
<= 0xA0))
3996 found
= CATEGORY_MASK_BIG5
;
4001 detect_info
->rejected
|= CATEGORY_MASK_BIG5
;
4005 if (src_base
< src
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
4007 detect_info
->rejected
|= CATEGORY_MASK_BIG5
;
4010 detect_info
->found
|= found
;
4014 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4015 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
4018 decode_coding_sjis (coding
)
4019 struct coding_system
*coding
;
4021 const unsigned char *src
= coding
->source
+ coding
->consumed
;
4022 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4023 const unsigned char *src_base
;
4024 int *charbuf
= coding
->charbuf
+ coding
->charbuf_used
;
4026 = coding
->charbuf
+ coding
->charbuf_size
- MAX_ANNOTATION_LENGTH
;
4027 int consumed_chars
= 0, consumed_chars_base
;
4028 int multibytep
= coding
->src_multibyte
;
4029 struct charset
*charset_roman
, *charset_kanji
, *charset_kana
;
4030 struct charset
*charset_kanji2
;
4031 Lisp_Object attrs
, charset_list
, val
;
4032 int char_offset
= coding
->produced_char
;
4033 int last_offset
= char_offset
;
4034 int last_id
= charset_ascii
;
4036 CODING_GET_INFO (coding
, attrs
, charset_list
);
4039 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4040 charset_kana
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4041 charset_kanji
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4042 charset_kanji2
= NILP (val
) ? NULL
: CHARSET_FROM_ID (XINT (XCAR (val
)));
4047 struct charset
*charset
;
4050 consumed_chars_base
= consumed_chars
;
4052 if (charbuf
>= charbuf_end
)
4059 charset
= charset_roman
;
4060 else if (c
== 0x80 || c
== 0xA0)
4062 else if (c
>= 0xA1 && c
<= 0xDF)
4064 /* SJIS -> JISX0201-Kana */
4066 charset
= charset_kana
;
4070 /* SJIS -> JISX0208 */
4072 if (c1
< 0x40 || c1
== 0x7F || c1
> 0xFC)
4076 charset
= charset_kanji
;
4078 else if (c
<= 0xFC && charset_kanji2
)
4080 /* SJIS -> JISX0213-2 */
4082 if (c1
< 0x40 || c1
== 0x7F || c1
> 0xFC)
4086 charset
= charset_kanji2
;
4090 if (charset
->id
!= charset_ascii
4091 && last_id
!= charset
->id
)
4093 if (last_id
!= charset_ascii
)
4094 ADD_CHARSET_DATA (charbuf
, char_offset
- last_offset
, last_id
);
4095 last_id
= charset
->id
;
4096 last_offset
= char_offset
;
4098 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c
, c
);
4105 consumed_chars
= consumed_chars_base
;
4107 *charbuf
++ = c
< 0 ? -c
: BYTE8_TO_CHAR (c
);
4113 if (last_id
!= charset_ascii
)
4114 ADD_CHARSET_DATA (charbuf
, char_offset
- last_offset
, last_id
);
4115 coding
->consumed_char
+= consumed_chars_base
;
4116 coding
->consumed
= src_base
- coding
->source
;
4117 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4121 decode_coding_big5 (coding
)
4122 struct coding_system
*coding
;
4124 const unsigned char *src
= coding
->source
+ coding
->consumed
;
4125 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4126 const unsigned char *src_base
;
4127 int *charbuf
= coding
->charbuf
+ coding
->charbuf_used
;
4129 = coding
->charbuf
+ coding
->charbuf_size
- MAX_ANNOTATION_LENGTH
;
4130 int consumed_chars
= 0, consumed_chars_base
;
4131 int multibytep
= coding
->src_multibyte
;
4132 struct charset
*charset_roman
, *charset_big5
;
4133 Lisp_Object attrs
, charset_list
, val
;
4134 int char_offset
= coding
->produced_char
;
4135 int last_offset
= char_offset
;
4136 int last_id
= charset_ascii
;
4138 CODING_GET_INFO (coding
, attrs
, charset_list
);
4140 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4141 charset_big5
= CHARSET_FROM_ID (XINT (XCAR (val
)));
4146 struct charset
*charset
;
4149 consumed_chars_base
= consumed_chars
;
4151 if (charbuf
>= charbuf_end
)
4159 charset
= charset_roman
;
4163 if (c
< 0xA1 || c
> 0xFE)
4166 if (c1
< 0x40 || (c1
> 0x7E && c1
< 0xA1) || c1
> 0xFE)
4169 charset
= charset_big5
;
4171 if (charset
->id
!= charset_ascii
4172 && last_id
!= charset
->id
)
4174 if (last_id
!= charset_ascii
)
4175 ADD_CHARSET_DATA (charbuf
, char_offset
- last_offset
, last_id
);
4176 last_id
= charset
->id
;
4177 last_offset
= char_offset
;
4179 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
, charset
, c
, c
);
4186 consumed_chars
= consumed_chars_base
;
4188 *charbuf
++ = c
< 0 ? -c
: BYTE8_TO_CHAR (c
);
4194 if (last_id
!= charset_ascii
)
4195 ADD_CHARSET_DATA (charbuf
, char_offset
- last_offset
, last_id
);
4196 coding
->consumed_char
+= consumed_chars_base
;
4197 coding
->consumed
= src_base
- coding
->source
;
4198 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4201 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4202 This function can encode charsets `ascii', `katakana-jisx0201',
4203 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
4204 are sure that all these charsets are registered as official charset
4205 (i.e. do not have extended leading-codes). Characters of other
4206 charsets are produced without any encoding. If SJIS_P is 1, encode
4207 SJIS text, else encode BIG5 text. */
4210 encode_coding_sjis (coding
)
4211 struct coding_system
*coding
;
4213 int multibytep
= coding
->dst_multibyte
;
4214 int *charbuf
= coding
->charbuf
;
4215 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4216 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4217 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4219 int produced_chars
= 0;
4220 Lisp_Object attrs
, charset_list
, val
;
4221 int ascii_compatible
;
4222 struct charset
*charset_roman
, *charset_kanji
, *charset_kana
;
4223 struct charset
*charset_kanji2
;
4226 CODING_GET_INFO (coding
, attrs
, charset_list
);
4228 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4229 charset_kana
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4230 charset_kanji
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4231 charset_kanji2
= NILP (val
) ? NULL
: CHARSET_FROM_ID (XINT (XCAR (val
)));
4233 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
4235 while (charbuf
< charbuf_end
)
4237 ASSURE_DESTINATION (safe_room
);
4239 /* Now encode the character C. */
4240 if (ASCII_CHAR_P (c
) && ascii_compatible
)
4241 EMIT_ONE_ASCII_BYTE (c
);
4242 else if (CHAR_BYTE8_P (c
))
4244 c
= CHAR_TO_BYTE8 (c
);
4250 struct charset
*charset
= char_charset (c
, charset_list
, &code
);
4254 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
4256 code
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
4257 charset
= CHARSET_FROM_ID (charset_ascii
);
4261 c
= coding
->default_char
;
4262 charset
= char_charset (c
, charset_list
, &code
);
4265 if (code
== CHARSET_INVALID_CODE (charset
))
4267 if (charset
== charset_kanji
)
4271 c1
= code
>> 8, c2
= code
& 0xFF;
4272 EMIT_TWO_BYTES (c1
, c2
);
4274 else if (charset
== charset_kana
)
4275 EMIT_ONE_BYTE (code
| 0x80);
4276 else if (charset_kanji2
&& charset
== charset_kanji2
)
4281 if (c1
== 0x21 || (c1
>= 0x23 && c1
< 0x25)
4282 || (c1
>= 0x2C && c1
<= 0x2F) || c1
>= 0x6E)
4284 JIS_TO_SJIS2 (code
);
4285 c1
= code
>> 8, c2
= code
& 0xFF;
4286 EMIT_TWO_BYTES (c1
, c2
);
4289 EMIT_ONE_ASCII_BYTE (code
& 0x7F);
4292 EMIT_ONE_ASCII_BYTE (code
& 0x7F);
4295 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4296 coding
->produced_char
+= produced_chars
;
4297 coding
->produced
= dst
- coding
->destination
;
4302 encode_coding_big5 (coding
)
4303 struct coding_system
*coding
;
4305 int multibytep
= coding
->dst_multibyte
;
4306 int *charbuf
= coding
->charbuf
;
4307 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4308 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4309 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4311 int produced_chars
= 0;
4312 Lisp_Object attrs
, charset_list
, val
;
4313 int ascii_compatible
;
4314 struct charset
*charset_roman
, *charset_big5
;
4317 CODING_GET_INFO (coding
, attrs
, charset_list
);
4319 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
4320 charset_big5
= CHARSET_FROM_ID (XINT (XCAR (val
)));
4321 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
4323 while (charbuf
< charbuf_end
)
4325 ASSURE_DESTINATION (safe_room
);
4327 /* Now encode the character C. */
4328 if (ASCII_CHAR_P (c
) && ascii_compatible
)
4329 EMIT_ONE_ASCII_BYTE (c
);
4330 else if (CHAR_BYTE8_P (c
))
4332 c
= CHAR_TO_BYTE8 (c
);
4338 struct charset
*charset
= char_charset (c
, charset_list
, &code
);
4342 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
4344 code
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
4345 charset
= CHARSET_FROM_ID (charset_ascii
);
4349 c
= coding
->default_char
;
4350 charset
= char_charset (c
, charset_list
, &code
);
4353 if (code
== CHARSET_INVALID_CODE (charset
))
4355 if (charset
== charset_big5
)
4359 c1
= code
>> 8, c2
= code
& 0xFF;
4360 EMIT_TWO_BYTES (c1
, c2
);
4363 EMIT_ONE_ASCII_BYTE (code
& 0x7F);
4366 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4367 coding
->produced_char
+= produced_chars
;
4368 coding
->produced
= dst
- coding
->destination
;
4373 /*** 10. CCL handlers ***/
4375 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4376 Check if a text is encoded in a coding system of which
4377 encoder/decoder are written in CCL program. If it is, return
4378 CATEGORY_MASK_CCL, else return 0. */
4381 detect_coding_ccl (coding
, detect_info
)
4382 struct coding_system
*coding
;
4383 struct coding_detection_info
*detect_info
;
4385 const unsigned char *src
= coding
->source
, *src_base
;
4386 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4387 int multibytep
= coding
->src_multibyte
;
4388 int consumed_chars
= 0;
4390 unsigned char *valids
;
4391 int head_ascii
= coding
->head_ascii
;
4394 detect_info
->checked
|= CATEGORY_MASK_CCL
;
4396 coding
= &coding_categories
[coding_category_ccl
];
4397 valids
= CODING_CCL_VALIDS (coding
);
4398 attrs
= CODING_ID_ATTRS (coding
->id
);
4399 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
4408 if (c
< 0 || ! valids
[c
])
4410 if ((valids
[c
] > 1))
4411 found
= CATEGORY_MASK_CCL
;
4413 detect_info
->rejected
|= CATEGORY_MASK_CCL
;
4417 detect_info
->found
|= found
;
4422 decode_coding_ccl (coding
)
4423 struct coding_system
*coding
;
4425 const unsigned char *src
= coding
->source
+ coding
->consumed
;
4426 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4427 int *charbuf
= coding
->charbuf
+ coding
->charbuf_used
;
4428 int *charbuf_end
= coding
->charbuf
+ coding
->charbuf_size
;
4429 int consumed_chars
= 0;
4430 int multibytep
= coding
->src_multibyte
;
4431 struct ccl_program ccl
;
4432 int source_charbuf
[1024];
4433 int source_byteidx
[1024];
4434 Lisp_Object attrs
, charset_list
;
4436 CODING_GET_INFO (coding
, attrs
, charset_list
);
4437 setup_ccl_program (&ccl
, CODING_CCL_DECODER (coding
));
4439 while (src
< src_end
)
4441 const unsigned char *p
= src
;
4442 int *source
, *source_end
;
4446 while (i
< 1024 && p
< src_end
)
4448 source_byteidx
[i
] = p
- src
;
4449 source_charbuf
[i
++] = STRING_CHAR_ADVANCE (p
);
4452 while (i
< 1024 && p
< src_end
)
4453 source_charbuf
[i
++] = *p
++;
4455 if (p
== src_end
&& coding
->mode
& CODING_MODE_LAST_BLOCK
)
4458 source
= source_charbuf
;
4459 source_end
= source
+ i
;
4460 while (source
< source_end
)
4462 ccl_driver (&ccl
, source
, charbuf
,
4463 source_end
- source
, charbuf_end
- charbuf
,
4465 source
+= ccl
.consumed
;
4466 charbuf
+= ccl
.produced
;
4467 if (ccl
.status
!= CCL_STAT_SUSPEND_BY_DST
)
4470 if (source
< source_end
)
4471 src
+= source_byteidx
[source
- source_charbuf
];
4474 consumed_chars
+= source
- source_charbuf
;
4476 if (ccl
.status
!= CCL_STAT_SUSPEND_BY_SRC
4477 && ccl
.status
!= CODING_RESULT_INSUFFICIENT_SRC
)
4483 case CCL_STAT_SUSPEND_BY_SRC
:
4484 record_conversion_result (coding
, CODING_RESULT_INSUFFICIENT_SRC
);
4486 case CCL_STAT_SUSPEND_BY_DST
:
4489 case CCL_STAT_INVALID_CMD
:
4490 record_conversion_result (coding
, CODING_RESULT_INTERRUPT
);
4493 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4496 coding
->consumed_char
+= consumed_chars
;
4497 coding
->consumed
= src
- coding
->source
;
4498 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4502 encode_coding_ccl (coding
)
4503 struct coding_system
*coding
;
4505 struct ccl_program ccl
;
4506 int multibytep
= coding
->dst_multibyte
;
4507 int *charbuf
= coding
->charbuf
;
4508 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4509 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4510 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4511 unsigned char *adjusted_dst_end
= dst_end
- 1;
4512 int destination_charbuf
[1024];
4513 int i
, produced_chars
= 0;
4514 Lisp_Object attrs
, charset_list
;
4516 CODING_GET_INFO (coding
, attrs
, charset_list
);
4517 setup_ccl_program (&ccl
, CODING_CCL_ENCODER (coding
));
4519 ccl
.last_block
= coding
->mode
& CODING_MODE_LAST_BLOCK
;
4520 ccl
.dst_multibyte
= coding
->dst_multibyte
;
4522 while (charbuf
< charbuf_end
&& dst
< adjusted_dst_end
)
4524 int dst_bytes
= dst_end
- dst
;
4525 if (dst_bytes
> 1024)
4528 ccl_driver (&ccl
, charbuf
, destination_charbuf
,
4529 charbuf_end
- charbuf
, dst_bytes
, charset_list
);
4530 charbuf
+= ccl
.consumed
;
4532 for (i
= 0; i
< ccl
.produced
; i
++)
4533 EMIT_ONE_BYTE (destination_charbuf
[i
] & 0xFF);
4536 for (i
= 0; i
< ccl
.produced
; i
++)
4537 *dst
++ = destination_charbuf
[i
] & 0xFF;
4538 produced_chars
+= ccl
.produced
;
4544 case CCL_STAT_SUSPEND_BY_SRC
:
4545 record_conversion_result (coding
, CODING_RESULT_INSUFFICIENT_SRC
);
4547 case CCL_STAT_SUSPEND_BY_DST
:
4548 record_conversion_result (coding
, CODING_RESULT_INSUFFICIENT_DST
);
4551 case CCL_STAT_INVALID_CMD
:
4552 record_conversion_result (coding
, CODING_RESULT_INTERRUPT
);
4555 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4559 coding
->produced_char
+= produced_chars
;
4560 coding
->produced
= dst
- coding
->destination
;
4566 /*** 10, 11. no-conversion handlers ***/
4568 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4571 decode_coding_raw_text (coding
)
4572 struct coding_system
*coding
;
4574 coding
->chars_at_source
= 1;
4575 coding
->consumed_char
= 0;
4576 coding
->consumed
= 0;
4577 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4581 encode_coding_raw_text (coding
)
4582 struct coding_system
*coding
;
4584 int multibytep
= coding
->dst_multibyte
;
4585 int *charbuf
= coding
->charbuf
;
4586 int *charbuf_end
= coding
->charbuf
+ coding
->charbuf_used
;
4587 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4588 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4589 int produced_chars
= 0;
4594 int safe_room
= MAX_MULTIBYTE_LENGTH
* 2;
4596 if (coding
->src_multibyte
)
4597 while (charbuf
< charbuf_end
)
4599 ASSURE_DESTINATION (safe_room
);
4601 if (ASCII_CHAR_P (c
))
4602 EMIT_ONE_ASCII_BYTE (c
);
4603 else if (CHAR_BYTE8_P (c
))
4605 c
= CHAR_TO_BYTE8 (c
);
4610 unsigned char str
[MAX_MULTIBYTE_LENGTH
], *p0
= str
, *p1
= str
;
4612 CHAR_STRING_ADVANCE (c
, p1
);
4615 EMIT_ONE_BYTE (*p0
);
4621 while (charbuf
< charbuf_end
)
4623 ASSURE_DESTINATION (safe_room
);
4630 if (coding
->src_multibyte
)
4632 int safe_room
= MAX_MULTIBYTE_LENGTH
;
4634 while (charbuf
< charbuf_end
)
4636 ASSURE_DESTINATION (safe_room
);
4638 if (ASCII_CHAR_P (c
))
4640 else if (CHAR_BYTE8_P (c
))
4641 *dst
++ = CHAR_TO_BYTE8 (c
);
4643 CHAR_STRING_ADVANCE (c
, dst
);
4649 ASSURE_DESTINATION (charbuf_end
- charbuf
);
4650 while (charbuf
< charbuf_end
&& dst
< dst_end
)
4651 *dst
++ = *charbuf
++;
4652 produced_chars
= dst
- (coding
->destination
+ coding
->dst_bytes
);
4655 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4656 coding
->produced_char
+= produced_chars
;
4657 coding
->produced
= dst
- coding
->destination
;
4661 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4662 Check if a text is encoded in a charset-based coding system. If it
4663 is, return 1, else return 0. */
4666 detect_coding_charset (coding
, detect_info
)
4667 struct coding_system
*coding
;
4668 struct coding_detection_info
*detect_info
;
4670 const unsigned char *src
= coding
->source
, *src_base
;
4671 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4672 int multibytep
= coding
->src_multibyte
;
4673 int consumed_chars
= 0;
4674 Lisp_Object attrs
, valids
;
4677 detect_info
->checked
|= CATEGORY_MASK_CHARSET
;
4679 coding
= &coding_categories
[coding_category_charset
];
4680 attrs
= CODING_ID_ATTRS (coding
->id
);
4681 valids
= AREF (attrs
, coding_attr_charset_valids
);
4683 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
4684 src
+= coding
->head_ascii
;
4694 if (NILP (AREF (valids
, c
)))
4697 found
= CATEGORY_MASK_CHARSET
;
4699 detect_info
->rejected
|= CATEGORY_MASK_CHARSET
;
4703 detect_info
->found
|= found
;
4708 decode_coding_charset (coding
)
4709 struct coding_system
*coding
;
4711 const unsigned char *src
= coding
->source
+ coding
->consumed
;
4712 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
4713 const unsigned char *src_base
;
4714 int *charbuf
= coding
->charbuf
+ coding
->charbuf_used
;
4716 = coding
->charbuf
+ coding
->charbuf_size
- MAX_ANNOTATION_LENGTH
;
4717 int consumed_chars
= 0, consumed_chars_base
;
4718 int multibytep
= coding
->src_multibyte
;
4719 Lisp_Object attrs
, charset_list
, valids
;
4720 int char_offset
= coding
->produced_char
;
4721 int last_offset
= char_offset
;
4722 int last_id
= charset_ascii
;
4724 CODING_GET_INFO (coding
, attrs
, charset_list
);
4725 valids
= AREF (attrs
, coding_attr_charset_valids
);
4731 struct charset
*charset
;
4737 consumed_chars_base
= consumed_chars
;
4739 if (charbuf
>= charbuf_end
)
4747 val
= AREF (valids
, c
);
4752 charset
= CHARSET_FROM_ID (XFASTINT (val
));
4753 dim
= CHARSET_DIMENSION (charset
);
4757 code
= (code
<< 8) | c
;
4760 CODING_DECODE_CHAR (coding
, src
, src_base
, src_end
,
4765 /* VAL is a list of charset IDs. It is assured that the
4766 list is sorted by charset dimensions (smaller one
4770 charset
= CHARSET_FROM_ID (XFASTINT (XCAR (val
)));
4771 dim
= CHARSET_DIMENSION (charset
);
4775 code
= (code
<< 8) | c
;
4778 CODING_DECODE_CHAR (coding
, src
, src_base
,
4779 src_end
, charset
, code
, c
);
4787 if (charset
->id
!= charset_ascii
4788 && last_id
!= charset
->id
)
4790 if (last_id
!= charset_ascii
)
4791 ADD_CHARSET_DATA (charbuf
, char_offset
- last_offset
, last_id
);
4792 last_id
= charset
->id
;
4793 last_offset
= char_offset
;
4802 consumed_chars
= consumed_chars_base
;
4804 *charbuf
++ = c
< 0 ? -c
: ASCII_BYTE_P (c
) ? c
: BYTE8_TO_CHAR (c
);
4810 if (last_id
!= charset_ascii
)
4811 ADD_CHARSET_DATA (charbuf
, char_offset
- last_offset
, last_id
);
4812 coding
->consumed_char
+= consumed_chars_base
;
4813 coding
->consumed
= src_base
- coding
->source
;
4814 coding
->charbuf_used
= charbuf
- coding
->charbuf
;
4818 encode_coding_charset (coding
)
4819 struct coding_system
*coding
;
4821 int multibytep
= coding
->dst_multibyte
;
4822 int *charbuf
= coding
->charbuf
;
4823 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
4824 unsigned char *dst
= coding
->destination
+ coding
->produced
;
4825 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
4826 int safe_room
= MAX_MULTIBYTE_LENGTH
;
4827 int produced_chars
= 0;
4828 Lisp_Object attrs
, charset_list
;
4829 int ascii_compatible
;
4832 CODING_GET_INFO (coding
, attrs
, charset_list
);
4833 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
4835 while (charbuf
< charbuf_end
)
4837 struct charset
*charset
;
4840 ASSURE_DESTINATION (safe_room
);
4842 if (ascii_compatible
&& ASCII_CHAR_P (c
))
4843 EMIT_ONE_ASCII_BYTE (c
);
4844 else if (CHAR_BYTE8_P (c
))
4846 c
= CHAR_TO_BYTE8 (c
);
4851 charset
= char_charset (c
, charset_list
, &code
);
4854 if (CHARSET_DIMENSION (charset
) == 1)
4855 EMIT_ONE_BYTE (code
);
4856 else if (CHARSET_DIMENSION (charset
) == 2)
4857 EMIT_TWO_BYTES (code
>> 8, code
& 0xFF);
4858 else if (CHARSET_DIMENSION (charset
) == 3)
4859 EMIT_THREE_BYTES (code
>> 16, (code
>> 8) & 0xFF, code
& 0xFF);
4861 EMIT_FOUR_BYTES (code
>> 24, (code
>> 16) & 0xFF,
4862 (code
>> 8) & 0xFF, code
& 0xFF);
4866 if (coding
->mode
& CODING_MODE_SAFE_ENCODING
)
4867 c
= CODING_INHIBIT_CHARACTER_SUBSTITUTION
;
4869 c
= coding
->default_char
;
4875 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
4876 coding
->produced_char
+= produced_chars
;
4877 coding
->produced
= dst
- coding
->destination
;
4882 /*** 7. C library functions ***/
4884 /* Setup coding context CODING from information about CODING_SYSTEM.
4885 If CODING_SYSTEM is nil, `no-conversion' is assumed. If
4886 CODING_SYSTEM is invalid, signal an error. */
4889 setup_coding_system (coding_system
, coding
)
4890 Lisp_Object coding_system
;
4891 struct coding_system
*coding
;
4894 Lisp_Object eol_type
;
4895 Lisp_Object coding_type
;
4898 if (NILP (coding_system
))
4899 coding_system
= Qno_conversion
;
4901 CHECK_CODING_SYSTEM_GET_ID (coding_system
, coding
->id
);
4903 attrs
= CODING_ID_ATTRS (coding
->id
);
4904 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
4907 coding
->head_ascii
= -1;
4908 coding
->common_flags
4909 = (VECTORP (eol_type
) ? CODING_REQUIRE_DETECTION_MASK
: 0);
4910 if (! NILP (CODING_ATTR_POST_READ (attrs
)))
4911 coding
->common_flags
|= CODING_REQUIRE_DECODING_MASK
;
4912 if (! NILP (CODING_ATTR_PRE_WRITE (attrs
)))
4913 coding
->common_flags
|= CODING_REQUIRE_ENCODING_MASK
;
4914 if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs
)))
4915 coding
->common_flags
|= CODING_FOR_UNIBYTE_MASK
;
4917 val
= CODING_ATTR_SAFE_CHARSETS (attrs
);
4918 coding
->max_charset_id
= SCHARS (val
) - 1;
4919 coding
->safe_charsets
= (char *) SDATA (val
);
4920 coding
->default_char
= XINT (CODING_ATTR_DEFAULT_CHAR (attrs
));
4922 coding_type
= CODING_ATTR_TYPE (attrs
);
4923 if (EQ (coding_type
, Qundecided
))
4925 coding
->detector
= NULL
;
4926 coding
->decoder
= decode_coding_raw_text
;
4927 coding
->encoder
= encode_coding_raw_text
;
4928 coding
->common_flags
|= CODING_REQUIRE_DETECTION_MASK
;
4930 else if (EQ (coding_type
, Qiso_2022
))
4933 int flags
= XINT (AREF (attrs
, coding_attr_iso_flags
));
4935 /* Invoke graphic register 0 to plane 0. */
4936 CODING_ISO_INVOCATION (coding
, 0) = 0;
4937 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */
4938 CODING_ISO_INVOCATION (coding
, 1)
4939 = (flags
& CODING_ISO_FLAG_SEVEN_BITS
? -1 : 1);
4940 /* Setup the initial status of designation. */
4941 for (i
= 0; i
< 4; i
++)
4942 CODING_ISO_DESIGNATION (coding
, i
) = CODING_ISO_INITIAL (coding
, i
);
4943 /* Not single shifting initially. */
4944 CODING_ISO_SINGLE_SHIFTING (coding
) = 0;
4945 /* Beginning of buffer should also be regarded as bol. */
4946 CODING_ISO_BOL (coding
) = 1;
4947 coding
->detector
= detect_coding_iso_2022
;
4948 coding
->decoder
= decode_coding_iso_2022
;
4949 coding
->encoder
= encode_coding_iso_2022
;
4950 if (flags
& CODING_ISO_FLAG_SAFE
)
4951 coding
->mode
|= CODING_MODE_SAFE_ENCODING
;
4952 coding
->common_flags
4953 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
4954 | CODING_REQUIRE_FLUSHING_MASK
);
4955 if (flags
& CODING_ISO_FLAG_COMPOSITION
)
4956 coding
->common_flags
|= CODING_ANNOTATE_COMPOSITION_MASK
;
4957 if (flags
& CODING_ISO_FLAG_DESIGNATION
)
4958 coding
->common_flags
|= CODING_ANNOTATE_CHARSET_MASK
;
4959 if (flags
& CODING_ISO_FLAG_FULL_SUPPORT
)
4961 setup_iso_safe_charsets (attrs
);
4962 val
= CODING_ATTR_SAFE_CHARSETS (attrs
);
4963 coding
->max_charset_id
= SCHARS (val
) - 1;
4964 coding
->safe_charsets
= (char *) SDATA (val
);
4966 CODING_ISO_FLAGS (coding
) = flags
;
4968 else if (EQ (coding_type
, Qcharset
))
4970 coding
->detector
= detect_coding_charset
;
4971 coding
->decoder
= decode_coding_charset
;
4972 coding
->encoder
= encode_coding_charset
;
4973 coding
->common_flags
4974 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4976 else if (EQ (coding_type
, Qutf_8
))
4978 coding
->detector
= detect_coding_utf_8
;
4979 coding
->decoder
= decode_coding_utf_8
;
4980 coding
->encoder
= encode_coding_utf_8
;
4981 coding
->common_flags
4982 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4984 else if (EQ (coding_type
, Qutf_16
))
4986 val
= AREF (attrs
, coding_attr_utf_16_bom
);
4987 CODING_UTF_16_BOM (coding
) = (CONSP (val
) ? utf_16_detect_bom
4988 : EQ (val
, Qt
) ? utf_16_with_bom
4989 : utf_16_without_bom
);
4990 val
= AREF (attrs
, coding_attr_utf_16_endian
);
4991 CODING_UTF_16_ENDIAN (coding
) = (EQ (val
, Qbig
) ? utf_16_big_endian
4992 : utf_16_little_endian
);
4993 CODING_UTF_16_SURROGATE (coding
) = 0;
4994 coding
->detector
= detect_coding_utf_16
;
4995 coding
->decoder
= decode_coding_utf_16
;
4996 coding
->encoder
= encode_coding_utf_16
;
4997 coding
->common_flags
4998 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
4999 if (CODING_UTF_16_BOM (coding
) == utf_16_detect_bom
)
5000 coding
->common_flags
|= CODING_REQUIRE_DETECTION_MASK
;
5002 else if (EQ (coding_type
, Qccl
))
5004 coding
->detector
= detect_coding_ccl
;
5005 coding
->decoder
= decode_coding_ccl
;
5006 coding
->encoder
= encode_coding_ccl
;
5007 coding
->common_flags
5008 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
5009 | CODING_REQUIRE_FLUSHING_MASK
);
5011 else if (EQ (coding_type
, Qemacs_mule
))
5013 coding
->detector
= detect_coding_emacs_mule
;
5014 coding
->decoder
= decode_coding_emacs_mule
;
5015 coding
->encoder
= encode_coding_emacs_mule
;
5016 coding
->common_flags
5017 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
5018 if (! NILP (AREF (attrs
, coding_attr_emacs_mule_full
))
5019 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs
), Vemacs_mule_charset_list
))
5021 Lisp_Object tail
, safe_charsets
;
5022 int max_charset_id
= 0;
5024 for (tail
= Vemacs_mule_charset_list
; CONSP (tail
);
5026 if (max_charset_id
< XFASTINT (XCAR (tail
)))
5027 max_charset_id
= XFASTINT (XCAR (tail
));
5028 safe_charsets
= Fmake_string (make_number (max_charset_id
+ 1),
5030 for (tail
= Vemacs_mule_charset_list
; CONSP (tail
);
5032 SSET (safe_charsets
, XFASTINT (XCAR (tail
)), 0);
5033 coding
->max_charset_id
= max_charset_id
;
5034 coding
->safe_charsets
= (char *) SDATA (safe_charsets
);
5037 else if (EQ (coding_type
, Qshift_jis
))
5039 coding
->detector
= detect_coding_sjis
;
5040 coding
->decoder
= decode_coding_sjis
;
5041 coding
->encoder
= encode_coding_sjis
;
5042 coding
->common_flags
5043 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
5045 else if (EQ (coding_type
, Qbig5
))
5047 coding
->detector
= detect_coding_big5
;
5048 coding
->decoder
= decode_coding_big5
;
5049 coding
->encoder
= encode_coding_big5
;
5050 coding
->common_flags
5051 |= (CODING_REQUIRE_DECODING_MASK
| CODING_REQUIRE_ENCODING_MASK
);
5053 else /* EQ (coding_type, Qraw_text) */
5055 coding
->detector
= NULL
;
5056 coding
->decoder
= decode_coding_raw_text
;
5057 coding
->encoder
= encode_coding_raw_text
;
5063 /* Return raw-text or one of its subsidiaries that has the same
5064 eol_type as CODING-SYSTEM. */
5067 raw_text_coding_system (coding_system
)
5068 Lisp_Object coding_system
;
5070 Lisp_Object spec
, attrs
;
5071 Lisp_Object eol_type
, raw_text_eol_type
;
5073 if (NILP (coding_system
))
5075 spec
= CODING_SYSTEM_SPEC (coding_system
);
5076 attrs
= AREF (spec
, 0);
5078 if (EQ (CODING_ATTR_TYPE (attrs
), Qraw_text
))
5079 return coding_system
;
5081 eol_type
= AREF (spec
, 2);
5082 if (VECTORP (eol_type
))
5084 spec
= CODING_SYSTEM_SPEC (Qraw_text
);
5085 raw_text_eol_type
= AREF (spec
, 2);
5086 return (EQ (eol_type
, Qunix
) ? AREF (raw_text_eol_type
, 0)
5087 : EQ (eol_type
, Qdos
) ? AREF (raw_text_eol_type
, 1)
5088 : AREF (raw_text_eol_type
, 2));
5092 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
5093 does, return one of the subsidiary that has the same eol-spec as
5094 PARENT. Otherwise, return CODING_SYSTEM. */
5097 coding_inherit_eol_type (coding_system
, parent
)
5098 Lisp_Object coding_system
, parent
;
5100 Lisp_Object spec
, eol_type
;
5102 if (NILP (coding_system
))
5103 coding_system
= Qraw_text
;
5104 spec
= CODING_SYSTEM_SPEC (coding_system
);
5105 eol_type
= AREF (spec
, 2);
5106 if (VECTORP (eol_type
)
5109 Lisp_Object parent_spec
;
5110 Lisp_Object parent_eol_type
;
5113 = CODING_SYSTEM_SPEC (buffer_defaults
.buffer_file_coding_system
);
5114 parent_eol_type
= AREF (parent_spec
, 2);
5115 if (EQ (parent_eol_type
, Qunix
))
5116 coding_system
= AREF (eol_type
, 0);
5117 else if (EQ (parent_eol_type
, Qdos
))
5118 coding_system
= AREF (eol_type
, 1);
5119 else if (EQ (parent_eol_type
, Qmac
))
5120 coding_system
= AREF (eol_type
, 2);
5122 return coding_system
;
5125 /* Emacs has a mechanism to automatically detect a coding system if it
5126 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
5127 it's impossible to distinguish some coding systems accurately
5128 because they use the same range of codes. So, at first, coding
5129 systems are categorized into 7, those are:
5131 o coding-category-emacs-mule
5133 The category for a coding system which has the same code range
5134 as Emacs' internal format. Assigned the coding-system (Lisp
5135 symbol) `emacs-mule' by default.
5137 o coding-category-sjis
5139 The category for a coding system which has the same code range
5140 as SJIS. Assigned the coding-system (Lisp
5141 symbol) `japanese-shift-jis' by default.
5143 o coding-category-iso-7
5145 The category for a coding system which has the same code range
5146 as ISO2022 of 7-bit environment. This doesn't use any locking
5147 shift and single shift functions. This can encode/decode all
5148 charsets. Assigned the coding-system (Lisp symbol)
5149 `iso-2022-7bit' by default.
5151 o coding-category-iso-7-tight
5153 Same as coding-category-iso-7 except that this can
5154 encode/decode only the specified charsets.
5156 o coding-category-iso-8-1
5158 The category for a coding system which has the same code range
5159 as ISO2022 of 8-bit environment and graphic plane 1 used only
5160 for DIMENSION1 charset. This doesn't use any locking shift
5161 and single shift functions. Assigned the coding-system (Lisp
5162 symbol) `iso-latin-1' by default.
5164 o coding-category-iso-8-2
5166 The category for a coding system which has the same code range
5167 as ISO2022 of 8-bit environment and graphic plane 1 used only
5168 for DIMENSION2 charset. This doesn't use any locking shift
5169 and single shift functions. Assigned the coding-system (Lisp
5170 symbol) `japanese-iso-8bit' by default.
5172 o coding-category-iso-7-else
5174 The category for a coding system which has the same code range
5175 as ISO2022 of 7-bit environemnt but uses locking shift or
5176 single shift functions. Assigned the coding-system (Lisp
5177 symbol) `iso-2022-7bit-lock' by default.
5179 o coding-category-iso-8-else
5181 The category for a coding system which has the same code range
5182 as ISO2022 of 8-bit environemnt but uses locking shift or
5183 single shift functions. Assigned the coding-system (Lisp
5184 symbol) `iso-2022-8bit-ss2' by default.
5186 o coding-category-big5
5188 The category for a coding system which has the same code range
5189 as BIG5. Assigned the coding-system (Lisp symbol)
5190 `cn-big5' by default.
5192 o coding-category-utf-8
5194 The category for a coding system which has the same code range
5195 as UTF-8 (cf. RFC3629). Assigned the coding-system (Lisp
5196 symbol) `utf-8' by default.
5198 o coding-category-utf-16-be
5200 The category for a coding system in which a text has an
5201 Unicode signature (cf. Unicode Standard) in the order of BIG
5202 endian at the head. Assigned the coding-system (Lisp symbol)
5203 `utf-16-be' by default.
5205 o coding-category-utf-16-le
5207 The category for a coding system in which a text has an
5208 Unicode signature (cf. Unicode Standard) in the order of
5209 LITTLE endian at the head. Assigned the coding-system (Lisp
5210 symbol) `utf-16-le' by default.
5212 o coding-category-ccl
5214 The category for a coding system of which encoder/decoder is
5215 written in CCL programs. The default value is nil, i.e., no
5216 coding system is assigned.
5218 o coding-category-binary
5220 The category for a coding system not categorized in any of the
5221 above. Assigned the coding-system (Lisp symbol)
5222 `no-conversion' by default.
5224 Each of them is a Lisp symbol and the value is an actual
5225 `coding-system's (this is also a Lisp symbol) assigned by a user.
5226 What Emacs does actually is to detect a category of coding system.
5227 Then, it uses a `coding-system' assigned to it. If Emacs can't
5228 decide only one possible category, it selects a category of the
5229 highest priority. Priorities of categories are also specified by a
5230 user in a Lisp variable `coding-category-list'.
5234 #define EOL_SEEN_NONE 0
5235 #define EOL_SEEN_LF 1
5236 #define EOL_SEEN_CR 2
5237 #define EOL_SEEN_CRLF 4
5239 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
5240 SOURCE is encoded. If CATEGORY is one of
5241 coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5242 two-byte, else they are encoded by one-byte.
5244 Return one of EOL_SEEN_XXX. */
5246 #define MAX_EOL_CHECK_COUNT 3
5249 detect_eol (source
, src_bytes
, category
)
5250 const unsigned char *source
;
5251 EMACS_INT src_bytes
;
5252 enum coding_category category
;
5254 const unsigned char *src
= source
, *src_end
= src
+ src_bytes
;
5257 int eol_seen
= EOL_SEEN_NONE
;
5259 if ((1 << category
) & CATEGORY_MASK_UTF_16
)
5263 msb
= category
== (coding_category_utf_16_le
5264 | coding_category_utf_16_le_nosig
);
5267 while (src
+ 1 < src_end
)
5270 if (src
[msb
] == 0 && (c
== '\n' || c
== '\r'))
5275 this_eol
= EOL_SEEN_LF
;
5276 else if (src
+ 3 >= src_end
5277 || src
[msb
+ 2] != 0
5278 || src
[lsb
+ 2] != '\n')
5279 this_eol
= EOL_SEEN_CR
;
5281 this_eol
= EOL_SEEN_CRLF
;
5283 if (eol_seen
== EOL_SEEN_NONE
)
5284 /* This is the first end-of-line. */
5285 eol_seen
= this_eol
;
5286 else if (eol_seen
!= this_eol
)
5288 /* The found type is different from what found before. */
5289 eol_seen
= EOL_SEEN_LF
;
5292 if (++total
== MAX_EOL_CHECK_COUNT
)
5300 while (src
< src_end
)
5303 if (c
== '\n' || c
== '\r')
5308 this_eol
= EOL_SEEN_LF
;
5309 else if (src
>= src_end
|| *src
!= '\n')
5310 this_eol
= EOL_SEEN_CR
;
5312 this_eol
= EOL_SEEN_CRLF
, src
++;
5314 if (eol_seen
== EOL_SEEN_NONE
)
5315 /* This is the first end-of-line. */
5316 eol_seen
= this_eol
;
5317 else if (eol_seen
!= this_eol
)
5319 /* The found type is different from what found before. */
5320 eol_seen
= EOL_SEEN_LF
;
5323 if (++total
== MAX_EOL_CHECK_COUNT
)
5333 adjust_coding_eol_type (coding
, eol_seen
)
5334 struct coding_system
*coding
;
5337 Lisp_Object eol_type
;
5339 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
5340 if (eol_seen
& EOL_SEEN_LF
)
5342 coding
->id
= CODING_SYSTEM_ID (AREF (eol_type
, 0));
5345 else if (eol_seen
& EOL_SEEN_CRLF
)
5347 coding
->id
= CODING_SYSTEM_ID (AREF (eol_type
, 1));
5350 else if (eol_seen
& EOL_SEEN_CR
)
5352 coding
->id
= CODING_SYSTEM_ID (AREF (eol_type
, 2));
5358 /* Detect how a text specified in CODING is encoded. If a coding
5359 system is detected, update fields of CODING by the detected coding
5363 detect_coding (coding
)
5364 struct coding_system
*coding
;
5366 const unsigned char *src
, *src_end
;
5367 Lisp_Object attrs
, coding_type
;
5369 coding
->consumed
= coding
->consumed_char
= 0;
5370 coding
->produced
= coding
->produced_char
= 0;
5371 coding_set_source (coding
);
5373 src_end
= coding
->source
+ coding
->src_bytes
;
5375 /* If we have not yet decided the text encoding type, detect it
5377 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding
->id
)), Qundecided
))
5381 for (i
= 0, src
= coding
->source
; src
< src_end
; i
++, src
++)
5384 if (c
& 0x80 || (c
< 0x20 && (c
== ISO_CODE_ESC
5386 || c
== ISO_CODE_SO
)))
5389 coding
->head_ascii
= src
- (coding
->source
+ coding
->consumed
);
5391 if (coding
->head_ascii
< coding
->src_bytes
)
5393 struct coding_detection_info detect_info
;
5394 enum coding_category category
;
5395 struct coding_system
*this;
5397 detect_info
.checked
= detect_info
.found
= detect_info
.rejected
= 0;
5398 for (i
= 0; i
< coding_category_raw_text
; i
++)
5400 category
= coding_priorities
[i
];
5401 this = coding_categories
+ category
;
5404 /* No coding system of this category is defined. */
5405 detect_info
.rejected
|= (1 << category
);
5407 else if (category
>= coding_category_raw_text
)
5409 else if (detect_info
.checked
& (1 << category
))
5411 if (detect_info
.found
& (1 << category
))
5414 else if ((*(this->detector
)) (coding
, &detect_info
)
5415 && detect_info
.found
& (1 << category
))
5417 if (category
== coding_category_utf_16_auto
)
5419 if (detect_info
.found
& CATEGORY_MASK_UTF_16_LE
)
5420 category
= coding_category_utf_16_le
;
5422 category
= coding_category_utf_16_be
;
5427 if (i
< coding_category_raw_text
)
5428 setup_coding_system (CODING_ID_NAME (this->id
), coding
);
5429 else if (detect_info
.rejected
== CATEGORY_MASK_ANY
)
5430 setup_coding_system (Qraw_text
, coding
);
5431 else if (detect_info
.rejected
)
5432 for (i
= 0; i
< coding_category_raw_text
; i
++)
5433 if (! (detect_info
.rejected
& (1 << coding_priorities
[i
])))
5435 this = coding_categories
+ coding_priorities
[i
];
5436 setup_coding_system (CODING_ID_NAME (this->id
), coding
);
5441 else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding
->id
)))
5442 == coding_category_utf_16_auto
)
5444 Lisp_Object coding_systems
;
5445 struct coding_detection_info detect_info
;
5448 = AREF (CODING_ID_ATTRS (coding
->id
), coding_attr_utf_16_bom
);
5449 detect_info
.found
= detect_info
.rejected
= 0;
5450 if (CONSP (coding_systems
)
5451 && detect_coding_utf_16 (coding
, &detect_info
))
5453 if (detect_info
.found
& CATEGORY_MASK_UTF_16_LE
)
5454 setup_coding_system (XCAR (coding_systems
), coding
);
5455 else if (detect_info
.found
& CATEGORY_MASK_UTF_16_BE
)
5456 setup_coding_system (XCDR (coding_systems
), coding
);
5464 struct coding_system
*coding
;
5466 Lisp_Object eol_type
;
5467 unsigned char *p
, *pbeg
, *pend
;
5469 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
5470 if (EQ (eol_type
, Qunix
))
5473 if (NILP (coding
->dst_object
))
5474 pbeg
= coding
->destination
;
5476 pbeg
= BYTE_POS_ADDR (coding
->dst_pos_byte
);
5477 pend
= pbeg
+ coding
->produced
;
5479 if (VECTORP (eol_type
))
5481 int eol_seen
= EOL_SEEN_NONE
;
5483 for (p
= pbeg
; p
< pend
; p
++)
5486 eol_seen
|= EOL_SEEN_LF
;
5487 else if (*p
== '\r')
5489 if (p
+ 1 < pend
&& *(p
+ 1) == '\n')
5491 eol_seen
|= EOL_SEEN_CRLF
;
5495 eol_seen
|= EOL_SEEN_CR
;
5498 if (eol_seen
!= EOL_SEEN_NONE
5499 && eol_seen
!= EOL_SEEN_LF
5500 && eol_seen
!= EOL_SEEN_CRLF
5501 && eol_seen
!= EOL_SEEN_CR
)
5502 eol_seen
= EOL_SEEN_LF
;
5503 if (eol_seen
!= EOL_SEEN_NONE
)
5504 eol_type
= adjust_coding_eol_type (coding
, eol_seen
);
5507 if (EQ (eol_type
, Qmac
))
5509 for (p
= pbeg
; p
< pend
; p
++)
5513 else if (EQ (eol_type
, Qdos
))
5517 if (NILP (coding
->dst_object
))
5519 for (p
= pend
- 2; p
>= pbeg
; p
--)
5522 safe_bcopy ((char *) (p
+ 1), (char *) p
, pend
-- - p
- 1);
5528 for (p
= pend
- 2; p
>= pbeg
; p
--)
5531 int pos_byte
= coding
->dst_pos_byte
+ (p
- pbeg
);
5532 int pos
= BYTE_TO_CHAR (pos_byte
);
5534 del_range_2 (pos
, pos_byte
, pos
+ 1, pos_byte
+ 1, 0);
5538 coding
->produced
-= n
;
5539 coding
->produced_char
-= n
;
5544 /* Return a translation table (or list of them) from coding system
5545 attribute vector ATTRS for encoding (ENCODEP is nonzero) or
5546 decoding (ENCODEP is zero). */
5549 get_translation_table (attrs
, encodep
, max_lookup
)
5551 int encodep
, *max_lookup
;
5553 Lisp_Object standard
, translation_table
;
5557 translation_table
= CODING_ATTR_ENCODE_TBL (attrs
),
5558 standard
= Vstandard_translation_table_for_encode
;
5560 translation_table
= CODING_ATTR_DECODE_TBL (attrs
),
5561 standard
= Vstandard_translation_table_for_decode
;
5562 if (NILP (translation_table
))
5563 translation_table
= standard
;
5566 if (SYMBOLP (translation_table
))
5567 translation_table
= Fget (translation_table
, Qtranslation_table
);
5568 else if (CONSP (translation_table
))
5570 translation_table
= Fcopy_sequence (translation_table
);
5571 for (val
= translation_table
; CONSP (val
); val
= XCDR (val
))
5572 if (SYMBOLP (XCAR (val
)))
5573 XSETCAR (val
, Fget (XCAR (val
), Qtranslation_table
));
5575 if (CHAR_TABLE_P (standard
))
5577 if (CONSP (translation_table
))
5578 translation_table
= nconc2 (translation_table
,
5579 Fcons (standard
, Qnil
));
5581 translation_table
= Fcons (translation_table
,
5582 Fcons (standard
, Qnil
));
5589 if (CHAR_TABLE_P (translation_table
)
5590 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table
)) > 1)
5592 val
= XCHAR_TABLE (translation_table
)->extras
[1];
5593 if (NATNUMP (val
) && *max_lookup
< XFASTINT (val
))
5594 *max_lookup
= XFASTINT (val
);
5596 else if (CONSP (translation_table
))
5598 Lisp_Object tail
, val
;
5600 for (tail
= translation_table
; CONSP (tail
); tail
= XCDR (tail
))
5601 if (CHAR_TABLE_P (XCAR (tail
))
5602 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail
))) > 1)
5604 val
= XCHAR_TABLE (XCAR (tail
))->extras
[1];
5605 if (NATNUMP (val
) && *max_lookup
< XFASTINT (val
))
5606 *max_lookup
= XFASTINT (val
);
5610 return translation_table
;
5613 #define LOOKUP_TRANSLATION_TABLE(table, c, trans) \
5616 if (CHAR_TABLE_P (table)) \
5618 trans = CHAR_TABLE_REF (table, c); \
5619 if (CHARACTERP (trans)) \
5620 c = XFASTINT (trans), trans = Qnil; \
5622 else if (CONSP (table)) \
5626 for (tail = table; CONSP (tail); tail = XCDR (tail)) \
5627 if (CHAR_TABLE_P (XCAR (tail))) \
5629 trans = CHAR_TABLE_REF (XCAR (tail), c); \
5630 if (CHARACTERP (trans)) \
5631 c = XFASTINT (trans), trans = Qnil; \
5632 else if (! NILP (trans)) \
5640 get_translation (val
, buf
, buf_end
, last_block
, from_nchars
, to_nchars
)
5644 int *from_nchars
, *to_nchars
;
5646 /* VAL is TO or (([FROM-CHAR ...] . TO) ...) where TO is TO-CHAR or
5650 Lisp_Object from
, tail
;
5653 for (tail
= val
; CONSP (tail
); tail
= XCDR (tail
))
5658 for (i
= 0; i
< len
; i
++)
5660 if (buf
+ i
== buf_end
)
5666 if (XINT (AREF (from
, i
)) != buf
[i
])
5680 *buf
= XINT (AREF (val
, 0)), *to_nchars
= ASIZE (val
);
5688 produce_chars (coding
, translation_table
, last_block
)
5689 struct coding_system
*coding
;
5690 Lisp_Object translation_table
;
5693 unsigned char *dst
= coding
->destination
+ coding
->produced
;
5694 unsigned char *dst_end
= coding
->destination
+ coding
->dst_bytes
;
5696 int produced_chars
= 0;
5699 if (! coding
->chars_at_source
)
5701 /* Characters are in coding->charbuf. */
5702 int *buf
= coding
->charbuf
;
5703 int *buf_end
= buf
+ coding
->charbuf_used
;
5705 if (BUFFERP (coding
->src_object
)
5706 && EQ (coding
->src_object
, coding
->dst_object
))
5707 dst_end
= ((unsigned char *) coding
->source
) + coding
->consumed
;
5709 while (buf
< buf_end
)
5715 int from_nchars
= 1, to_nchars
= 1;
5716 Lisp_Object trans
= Qnil
;
5718 LOOKUP_TRANSLATION_TABLE (translation_table
, c
, trans
);
5721 trans
= get_translation (trans
, buf
, buf_end
, last_block
,
5722 &from_nchars
, &to_nchars
);
5728 if (dst
+ MAX_MULTIBYTE_LENGTH
* to_nchars
> dst_end
)
5730 dst
= alloc_destination (coding
,
5732 + MAX_MULTIBYTE_LENGTH
* to_nchars
,
5734 dst_end
= coding
->destination
+ coding
->dst_bytes
;
5737 for (i
= 0; i
< to_nchars
; i
++)
5740 c
= XINT (AREF (trans
, i
));
5741 if (coding
->dst_multibyte
5742 || ! CHAR_BYTE8_P (c
))
5743 CHAR_STRING_ADVANCE (c
, dst
);
5745 *dst
++ = CHAR_TO_BYTE8 (c
);
5747 produced_chars
+= to_nchars
;
5749 while (--from_nchars
> 0)
5753 /* This is an annotation datum. (-C) is the length. */
5756 carryover
= buf_end
- buf
;
5760 const unsigned char *src
= coding
->source
;
5761 const unsigned char *src_end
= src
+ coding
->src_bytes
;
5762 Lisp_Object eol_type
;
5764 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
5766 if (coding
->src_multibyte
!= coding
->dst_multibyte
)
5768 if (coding
->src_multibyte
)
5775 const unsigned char *src_base
= src
;
5781 if (EQ (eol_type
, Qdos
))
5785 record_conversion_result
5786 (coding
, CODING_RESULT_INSUFFICIENT_SRC
);
5787 goto no_more_source
;
5792 else if (EQ (eol_type
, Qmac
))
5797 coding
->consumed
= src
- coding
->source
;
5799 if (EQ (coding
->src_object
, coding
->dst_object
))
5800 dst_end
= (unsigned char *) src
;
5803 dst
= alloc_destination (coding
, src_end
- src
+ 1,
5805 dst_end
= coding
->destination
+ coding
->dst_bytes
;
5806 coding_set_source (coding
);
5807 src
= coding
->source
+ coding
->consumed
;
5808 src_end
= coding
->source
+ coding
->src_bytes
;
5818 while (src
< src_end
)
5825 if (EQ (eol_type
, Qdos
))
5831 else if (EQ (eol_type
, Qmac
))
5834 if (dst
>= dst_end
- 1)
5836 coding
->consumed
= src
- coding
->source
;
5838 if (EQ (coding
->src_object
, coding
->dst_object
))
5839 dst_end
= (unsigned char *) src
;
5840 if (dst
>= dst_end
- 1)
5842 dst
= alloc_destination (coding
, src_end
- src
+ 2,
5844 dst_end
= coding
->destination
+ coding
->dst_bytes
;
5845 coding_set_source (coding
);
5846 src
= coding
->source
+ coding
->consumed
;
5847 src_end
= coding
->source
+ coding
->src_bytes
;
5855 if (!EQ (coding
->src_object
, coding
->dst_object
))
5857 int require
= coding
->src_bytes
- coding
->dst_bytes
;
5861 EMACS_INT offset
= src
- coding
->source
;
5863 dst
= alloc_destination (coding
, require
, dst
);
5864 coding_set_source (coding
);
5865 src
= coding
->source
+ offset
;
5866 src_end
= coding
->source
+ coding
->src_bytes
;
5869 produced_chars
= coding
->src_chars
;
5870 while (src
< src_end
)
5876 if (EQ (eol_type
, Qdos
))
5883 else if (EQ (eol_type
, Qmac
))
5889 coding
->consumed
= coding
->src_bytes
;
5890 coding
->consumed_char
= coding
->src_chars
;
5893 produced
= dst
- (coding
->destination
+ coding
->produced
);
5894 if (BUFFERP (coding
->dst_object
))
5895 insert_from_gap (produced_chars
, produced
);
5896 coding
->produced
+= produced
;
5897 coding
->produced_char
+= produced_chars
;
5901 /* Compose text in CODING->object according to the annotation data at
5902 CHARBUF. CHARBUF is an array:
5903 [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
5907 produce_composition (coding
, charbuf
, pos
)
5908 struct coding_system
*coding
;
5914 enum composition_method method
;
5915 Lisp_Object components
;
5918 to
= pos
+ charbuf
[2];
5919 method
= (enum composition_method
) (charbuf
[3]);
5921 if (method
== COMPOSITION_RELATIVE
)
5925 Lisp_Object args
[MAX_COMPOSITION_COMPONENTS
* 2 - 1];
5930 for (i
= 0; i
< len
; i
++)
5931 args
[i
] = make_number (charbuf
[i
]);
5932 components
= (method
== COMPOSITION_WITH_ALTCHARS
5933 ? Fstring (len
, args
) : Fvector (len
, args
));
5935 compose_text (pos
, to
, components
, Qnil
, coding
->dst_object
);
5939 /* Put `charset' property on text in CODING->object according to
5940 the annotation data at CHARBUF. CHARBUF is an array:
5941 [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
5945 produce_charset (coding
, charbuf
, pos
)
5946 struct coding_system
*coding
;
5950 EMACS_INT from
= pos
- charbuf
[2];
5951 struct charset
*charset
= CHARSET_FROM_ID (charbuf
[3]);
5953 Fput_text_property (make_number (from
), make_number (pos
),
5954 Qcharset
, CHARSET_NAME (charset
),
5955 coding
->dst_object
);
5959 #define CHARBUF_SIZE 0x4000
5961 #define ALLOC_CONVERSION_WORK_AREA(coding) \
5963 int size = CHARBUF_SIZE;; \
5965 coding->charbuf = NULL; \
5966 while (size > 1024) \
5968 coding->charbuf = (int *) alloca (sizeof (int) * size); \
5969 if (coding->charbuf) \
5973 if (! coding->charbuf) \
5975 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
5976 return coding->result; \
5978 coding->charbuf_size = size; \
5983 produce_annotation (coding
, pos
)
5984 struct coding_system
*coding
;
5987 int *charbuf
= coding
->charbuf
;
5988 int *charbuf_end
= charbuf
+ coding
->charbuf_used
;
5990 if (NILP (coding
->dst_object
))
5993 while (charbuf
< charbuf_end
)
5999 int len
= -*charbuf
;
6002 case CODING_ANNOTATE_COMPOSITION_MASK
:
6003 produce_composition (coding
, charbuf
, pos
);
6005 case CODING_ANNOTATE_CHARSET_MASK
:
6006 produce_charset (coding
, charbuf
, pos
);
6016 /* Decode the data at CODING->src_object into CODING->dst_object.
6017 CODING->src_object is a buffer, a string, or nil.
6018 CODING->dst_object is a buffer.
6020 If CODING->src_object is a buffer, it must be the current buffer.
6021 In this case, if CODING->src_pos is positive, it is a position of
6022 the source text in the buffer, otherwise, the source text is in the
6023 gap area of the buffer, and CODING->src_pos specifies the offset of
6024 the text from GPT (which must be the same as PT). If this is the
6025 same buffer as CODING->dst_object, CODING->src_pos must be
6028 If CODING->src_object is a string, CODING->src_pos in an index to
6031 If CODING->src_object is nil, CODING->source must already point to
6032 the non-relocatable memory area. In this case, CODING->src_pos is
6033 an offset from CODING->source.
6035 The decoded data is inserted at the current point of the buffer
6040 decode_coding (coding
)
6041 struct coding_system
*coding
;
6044 Lisp_Object undo_list
;
6045 Lisp_Object translation_table
;
6049 if (BUFFERP (coding
->src_object
)
6050 && coding
->src_pos
> 0
6051 && coding
->src_pos
< GPT
6052 && coding
->src_pos
+ coding
->src_chars
> GPT
)
6053 move_gap_both (coding
->src_pos
, coding
->src_pos_byte
);
6056 if (BUFFERP (coding
->dst_object
))
6058 if (current_buffer
!= XBUFFER (coding
->dst_object
))
6059 set_buffer_internal (XBUFFER (coding
->dst_object
));
6061 move_gap_both (PT
, PT_BYTE
);
6062 undo_list
= current_buffer
->undo_list
;
6063 current_buffer
->undo_list
= Qt
;
6066 coding
->consumed
= coding
->consumed_char
= 0;
6067 coding
->produced
= coding
->produced_char
= 0;
6068 coding
->chars_at_source
= 0;
6069 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
6072 ALLOC_CONVERSION_WORK_AREA (coding
);
6074 attrs
= CODING_ID_ATTRS (coding
->id
);
6075 translation_table
= get_translation_table (attrs
, 0, NULL
);
6080 EMACS_INT pos
= coding
->dst_pos
+ coding
->produced_char
;
6082 coding_set_source (coding
);
6083 coding
->annotated
= 0;
6084 coding
->charbuf_used
= carryover
;
6085 (*(coding
->decoder
)) (coding
);
6086 coding_set_destination (coding
);
6087 carryover
= produce_chars (coding
, translation_table
, 0);
6088 if (coding
->annotated
)
6089 produce_annotation (coding
, pos
);
6090 for (i
= 0; i
< carryover
; i
++)
6092 = coding
->charbuf
[coding
->charbuf_used
- carryover
+ i
];
6094 while (coding
->consumed
< coding
->src_bytes
6095 && ! coding
->result
);
6099 coding_set_destination (coding
);
6100 coding
->charbuf_used
= carryover
;
6101 produce_chars (coding
, translation_table
, 1);
6104 coding
->carryover_bytes
= 0;
6105 if (coding
->consumed
< coding
->src_bytes
)
6107 int nbytes
= coding
->src_bytes
- coding
->consumed
;
6108 const unsigned char *src
;
6110 coding_set_source (coding
);
6111 coding_set_destination (coding
);
6112 src
= coding
->source
+ coding
->consumed
;
6114 if (coding
->mode
& CODING_MODE_LAST_BLOCK
)
6116 /* Flush out unprocessed data as binary chars. We are sure
6117 that the number of data is less than the size of
6119 coding
->charbuf_used
= 0;
6120 while (nbytes
-- > 0)
6124 coding
->charbuf
[coding
->charbuf_used
++] = (c
& 0x80 ? - c
: c
);
6126 produce_chars (coding
, Qnil
, 1);
6130 /* Record unprocessed bytes in coding->carryover. We are
6131 sure that the number of data is less than the size of
6132 coding->carryover. */
6133 unsigned char *p
= coding
->carryover
;
6135 coding
->carryover_bytes
= nbytes
;
6136 while (nbytes
-- > 0)
6139 coding
->consumed
= coding
->src_bytes
;
6142 if (BUFFERP (coding
->dst_object
))
6144 current_buffer
->undo_list
= undo_list
;
6145 record_insert (coding
->dst_pos
, coding
->produced_char
);
6147 if (! EQ (CODING_ID_EOL_TYPE (coding
->id
), Qunix
))
6148 decode_eol (coding
);
6149 return coding
->result
;
6153 /* Extract an annotation datum from a composition starting at POS and
6154 ending before LIMIT of CODING->src_object (buffer or string), store
6155 the data in BUF, set *STOP to a starting position of the next
6156 composition (if any) or to LIMIT, and return the address of the
6157 next element of BUF.
6159 If such an annotation is not found, set *STOP to a starting
6160 position of a composition after POS (if any) or to LIMIT, and
6164 handle_composition_annotation (pos
, limit
, coding
, buf
, stop
)
6165 EMACS_INT pos
, limit
;
6166 struct coding_system
*coding
;
6170 EMACS_INT start
, end
;
6173 if (! find_composition (pos
, limit
, &start
, &end
, &prop
, coding
->src_object
)
6176 else if (start
> pos
)
6182 /* We found a composition. Store the corresponding
6183 annotation data in BUF. */
6185 enum composition_method method
= COMPOSITION_METHOD (prop
);
6186 int nchars
= COMPOSITION_LENGTH (prop
);
6188 ADD_COMPOSITION_DATA (buf
, nchars
, method
);
6189 if (method
!= COMPOSITION_RELATIVE
)
6191 Lisp_Object components
;
6194 components
= COMPOSITION_COMPONENTS (prop
);
6195 if (VECTORP (components
))
6197 len
= XVECTOR (components
)->size
;
6198 for (i
= 0; i
< len
; i
++)
6199 *buf
++ = XINT (AREF (components
, i
));
6201 else if (STRINGP (components
))
6203 len
= SCHARS (components
);
6207 FETCH_STRING_CHAR_ADVANCE (*buf
, components
, i
, i_byte
);
6211 else if (INTEGERP (components
))
6214 *buf
++ = XINT (components
);
6216 else if (CONSP (components
))
6218 for (len
= 0; CONSP (components
);
6219 len
++, components
= XCDR (components
))
6220 *buf
++ = XINT (XCAR (components
));
6228 if (find_composition (end
, limit
, &start
, &end
, &prop
,
6239 /* Extract an annotation datum from a text property `charset' at POS of
6240 CODING->src_object (buffer of string), store the data in BUF, set
6241 *STOP to the position where the value of `charset' property changes
6242 (limiting by LIMIT), and return the address of the next element of
6245 If the property value is nil, set *STOP to the position where the
6246 property value is non-nil (limiting by LIMIT), and return BUF. */
6249 handle_charset_annotation (pos
, limit
, coding
, buf
, stop
)
6250 EMACS_INT pos
, limit
;
6251 struct coding_system
*coding
;
6255 Lisp_Object val
, next
;
6258 val
= Fget_text_property (make_number (pos
), Qcharset
, coding
->src_object
);
6259 if (! NILP (val
) && CHARSETP (val
))
6260 id
= XINT (CHARSET_SYMBOL_ID (val
));
6263 ADD_CHARSET_DATA (buf
, 0, id
);
6264 next
= Fnext_single_property_change (make_number (pos
), Qcharset
,
6266 make_number (limit
));
6267 *stop
= XINT (next
);
6273 consume_chars (coding
, translation_table
, max_lookup
)
6274 struct coding_system
*coding
;
6275 Lisp_Object translation_table
;
6278 int *buf
= coding
->charbuf
;
6279 int *buf_end
= coding
->charbuf
+ coding
->charbuf_size
;
6280 const unsigned char *src
= coding
->source
+ coding
->consumed
;
6281 const unsigned char *src_end
= coding
->source
+ coding
->src_bytes
;
6282 EMACS_INT pos
= coding
->src_pos
+ coding
->consumed_char
;
6283 EMACS_INT end_pos
= coding
->src_pos
+ coding
->src_chars
;
6284 int multibytep
= coding
->src_multibyte
;
6285 Lisp_Object eol_type
;
6287 EMACS_INT stop
, stop_composition
, stop_charset
;
6288 int *lookup_buf
= NULL
;
6290 if (! NILP (translation_table
))
6291 lookup_buf
= alloca (sizeof (int) * max_lookup
);
6293 eol_type
= CODING_ID_EOL_TYPE (coding
->id
);
6294 if (VECTORP (eol_type
))
6297 /* Note: composition handling is not yet implemented. */
6298 coding
->common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
6300 if (NILP (coding
->src_object
))
6301 stop
= stop_composition
= stop_charset
= end_pos
;
6304 if (coding
->common_flags
& CODING_ANNOTATE_COMPOSITION_MASK
)
6305 stop
= stop_composition
= pos
;
6307 stop
= stop_composition
= end_pos
;
6308 if (coding
->common_flags
& CODING_ANNOTATE_CHARSET_MASK
)
6309 stop
= stop_charset
= pos
;
6311 stop_charset
= end_pos
;
6314 /* Compensate for CRLF and conversion. */
6315 buf_end
-= 1 + MAX_ANNOTATION_LENGTH
;
6316 while (buf
< buf_end
)
6324 if (pos
== stop_composition
)
6325 buf
= handle_composition_annotation (pos
, end_pos
, coding
,
6326 buf
, &stop_composition
);
6327 if (pos
== stop_charset
)
6328 buf
= handle_charset_annotation (pos
, end_pos
, coding
,
6329 buf
, &stop_charset
);
6330 stop
= (stop_composition
< stop_charset
6331 ? stop_composition
: stop_charset
);
6338 if (! CODING_FOR_UNIBYTE (coding
)
6339 && (bytes
= MULTIBYTE_LENGTH (src
, src_end
)) > 0)
6340 c
= STRING_CHAR_ADVANCE (src
), pos
+= bytes
;
6345 c
= STRING_CHAR_ADVANCE (src
), pos
++;
6346 if ((c
== '\r') && (coding
->mode
& CODING_MODE_SELECTIVE_DISPLAY
))
6348 if (! EQ (eol_type
, Qunix
))
6352 if (EQ (eol_type
, Qdos
))
6360 LOOKUP_TRANSLATION_TABLE (translation_table
, c
, trans
);
6365 int from_nchars
= 1, to_nchars
= 1;
6366 int *lookup_buf_end
;
6367 const unsigned char *p
= src
;
6371 for (i
= 1; i
< max_lookup
&& p
< src_end
; i
++)
6372 lookup_buf
[i
] = STRING_CHAR_ADVANCE (p
);
6373 lookup_buf_end
= lookup_buf
+ i
;
6374 trans
= get_translation (trans
, lookup_buf
, lookup_buf_end
, 1,
6375 &from_nchars
, &to_nchars
);
6377 || buf
+ to_nchars
> buf_end
)
6379 *buf
++ = *lookup_buf
;
6380 for (i
= 1; i
< to_nchars
; i
++)
6381 *buf
++ = XINT (AREF (trans
, i
));
6382 for (i
= 1; i
< from_nchars
; i
++, pos
++)
6383 src
+= MULTIBYTE_LENGTH_NO_CHECK (src
);
6387 coding
->consumed
= src
- coding
->source
;
6388 coding
->consumed_char
= pos
- coding
->src_pos
;
6389 coding
->charbuf_used
= buf
- coding
->charbuf
;
6390 coding
->chars_at_source
= 0;
6394 /* Encode the text at CODING->src_object into CODING->dst_object.
6395 CODING->src_object is a buffer or a string.
6396 CODING->dst_object is a buffer or nil.
6398 If CODING->src_object is a buffer, it must be the current buffer.
6399 In this case, if CODING->src_pos is positive, it is a position of
6400 the source text in the buffer, otherwise. the source text is in the
6401 gap area of the buffer, and coding->src_pos specifies the offset of
6402 the text from GPT (which must be the same as PT). If this is the
6403 same buffer as CODING->dst_object, CODING->src_pos must be
6404 negative and CODING should not have `pre-write-conversion'.
6406 If CODING->src_object is a string, CODING should not have
6407 `pre-write-conversion'.
6409 If CODING->dst_object is a buffer, the encoded data is inserted at
6410 the current point of that buffer.
6412 If CODING->dst_object is nil, the encoded data is placed at the
6413 memory area specified by CODING->destination. */
6416 encode_coding (coding
)
6417 struct coding_system
*coding
;
6420 Lisp_Object translation_table
;
6423 attrs
= CODING_ID_ATTRS (coding
->id
);
6424 translation_table
= get_translation_table (attrs
, 1, &max_lookup
);
6426 if (BUFFERP (coding
->dst_object
))
6428 set_buffer_internal (XBUFFER (coding
->dst_object
));
6429 coding
->dst_multibyte
6430 = ! NILP (current_buffer
->enable_multibyte_characters
);
6433 coding
->consumed
= coding
->consumed_char
= 0;
6434 coding
->produced
= coding
->produced_char
= 0;
6435 record_conversion_result (coding
, CODING_RESULT_SUCCESS
);
6438 ALLOC_CONVERSION_WORK_AREA (coding
);
6441 coding_set_source (coding
);
6442 consume_chars (coding
, translation_table
, max_lookup
);
6443 coding_set_destination (coding
);
6444 (*(coding
->encoder
)) (coding
);
6445 } while (coding
->consumed_char
< coding
->src_chars
);
6447 if (BUFFERP (coding
->dst_object
))
6448 insert_from_gap (coding
->produced_char
, coding
->produced
);
6450 return (coding
->result
);
6454 /* Name (or base name) of work buffer for code conversion. */
6455 static Lisp_Object Vcode_conversion_workbuf_name
;
6457 /* A working buffer used by the top level conversion. Once it is
6458 created, it is never destroyed. It has the name
6459 Vcode_conversion_workbuf_name. The other working buffers are
6460 destroyed after the use is finished, and their names are modified
6461 versions of Vcode_conversion_workbuf_name. */
6462 static Lisp_Object Vcode_conversion_reused_workbuf
;
6464 /* 1 iff Vcode_conversion_reused_workbuf is already in use. */
6465 static int reused_workbuf_in_use
;
6468 /* Return a working buffer of code convesion. MULTIBYTE specifies the
6469 multibyteness of returning buffer. */
6472 make_conversion_work_buffer (multibyte
)
6475 Lisp_Object name
, workbuf
;
6476 struct buffer
*current
;
6478 if (reused_workbuf_in_use
++)
6480 name
= Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name
, Qnil
);
6481 workbuf
= Fget_buffer_create (name
);
6485 name
= Vcode_conversion_workbuf_name
;
6486 workbuf
= Fget_buffer_create (name
);
6487 if (NILP (Vcode_conversion_reused_workbuf
))
6488 Vcode_conversion_reused_workbuf
= workbuf
;
6490 current
= current_buffer
;
6491 set_buffer_internal (XBUFFER (workbuf
));
6493 current_buffer
->undo_list
= Qt
;
6494 current_buffer
->enable_multibyte_characters
= multibyte
? Qt
: Qnil
;
6495 set_buffer_internal (current
);
6501 code_conversion_restore (arg
)
6504 Lisp_Object current
, workbuf
;
6506 current
= XCAR (arg
);
6507 workbuf
= XCDR (arg
);
6508 if (! NILP (workbuf
))
6510 if (EQ (workbuf
, Vcode_conversion_reused_workbuf
))
6511 reused_workbuf_in_use
= 0;
6512 else if (! NILP (Fbuffer_live_p (workbuf
)))
6513 Fkill_buffer (workbuf
);
6515 set_buffer_internal (XBUFFER (current
));
6520 code_conversion_save (with_work_buf
, multibyte
)
6521 int with_work_buf
, multibyte
;
6523 Lisp_Object workbuf
= Qnil
;
6526 workbuf
= make_conversion_work_buffer (multibyte
);
6527 record_unwind_protect (code_conversion_restore
,
6528 Fcons (Fcurrent_buffer (), workbuf
));
6533 decode_coding_gap (coding
, chars
, bytes
)
6534 struct coding_system
*coding
;
6535 EMACS_INT chars
, bytes
;
6537 int count
= specpdl_ptr
- specpdl
;
6540 code_conversion_save (0, 0);
6542 coding
->src_object
= Fcurrent_buffer ();
6543 coding
->src_chars
= chars
;
6544 coding
->src_bytes
= bytes
;
6545 coding
->src_pos
= -chars
;
6546 coding
->src_pos_byte
= -bytes
;
6547 coding
->src_multibyte
= chars
< bytes
;
6548 coding
->dst_object
= coding
->src_object
;
6549 coding
->dst_pos
= PT
;
6550 coding
->dst_pos_byte
= PT_BYTE
;
6551 coding
->dst_multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
6552 coding
->mode
|= CODING_MODE_LAST_BLOCK
;
6554 if (CODING_REQUIRE_DETECTION (coding
))
6555 detect_coding (coding
);
6557 decode_coding (coding
);
6559 attrs
= CODING_ID_ATTRS (coding
->id
);
6560 if (! NILP (CODING_ATTR_POST_READ (attrs
)))
6562 EMACS_INT prev_Z
= Z
, prev_Z_BYTE
= Z_BYTE
;
6565 TEMP_SET_PT_BOTH (coding
->dst_pos
, coding
->dst_pos_byte
);
6566 val
= call1 (CODING_ATTR_POST_READ (attrs
),
6567 make_number (coding
->produced_char
));
6569 coding
->produced_char
+= Z
- prev_Z
;
6570 coding
->produced
+= Z_BYTE
- prev_Z_BYTE
;
6573 unbind_to (count
, Qnil
);
6574 return coding
->result
;
6578 encode_coding_gap (coding
, chars
, bytes
)
6579 struct coding_system
*coding
;
6580 EMACS_INT chars
, bytes
;
6582 int count
= specpdl_ptr
- specpdl
;
6584 code_conversion_save (0, 0);
6586 coding
->src_object
= Fcurrent_buffer ();
6587 coding
->src_chars
= chars
;
6588 coding
->src_bytes
= bytes
;
6589 coding
->src_pos
= -chars
;
6590 coding
->src_pos_byte
= -bytes
;
6591 coding
->src_multibyte
= chars
< bytes
;
6592 coding
->dst_object
= coding
->src_object
;
6593 coding
->dst_pos
= PT
;
6594 coding
->dst_pos_byte
= PT_BYTE
;
6596 encode_coding (coding
);
6598 unbind_to (count
, Qnil
);
6599 return coding
->result
;
6603 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6604 SRC_OBJECT into DST_OBJECT by coding context CODING.
6606 SRC_OBJECT is a buffer, a string, or Qnil.
6608 If it is a buffer, the text is at point of the buffer. FROM and TO
6609 are positions in the buffer.
6611 If it is a string, the text is at the beginning of the string.
6612 FROM and TO are indices to the string.
6614 If it is nil, the text is at coding->source. FROM and TO are
6615 indices to coding->source.
6617 DST_OBJECT is a buffer, Qt, or Qnil.
6619 If it is a buffer, the decoded text is inserted at point of the
6620 buffer. If the buffer is the same as SRC_OBJECT, the source text
6623 If it is Qt, a string is made from the decoded text, and
6624 set in CODING->dst_object.
6626 If it is Qnil, the decoded text is stored at CODING->destination.
6627 The caller must allocate CODING->dst_bytes bytes at
6628 CODING->destination by xmalloc. If the decoded text is longer than
6629 CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6633 decode_coding_object (coding
, src_object
, from
, from_byte
, to
, to_byte
,
6635 struct coding_system
*coding
;
6636 Lisp_Object src_object
;
6637 EMACS_INT from
, from_byte
, to
, to_byte
;
6638 Lisp_Object dst_object
;
6640 int count
= specpdl_ptr
- specpdl
;
6641 unsigned char *destination
;
6642 EMACS_INT dst_bytes
;
6643 EMACS_INT chars
= to
- from
;
6644 EMACS_INT bytes
= to_byte
- from_byte
;
6647 int saved_pt
= -1, saved_pt_byte
;
6649 buffer
= Fcurrent_buffer ();
6651 if (NILP (dst_object
))
6653 destination
= coding
->destination
;
6654 dst_bytes
= coding
->dst_bytes
;
6657 coding
->src_object
= src_object
;
6658 coding
->src_chars
= chars
;
6659 coding
->src_bytes
= bytes
;
6660 coding
->src_multibyte
= chars
< bytes
;
6662 if (STRINGP (src_object
))
6664 coding
->src_pos
= from
;
6665 coding
->src_pos_byte
= from_byte
;
6667 else if (BUFFERP (src_object
))
6669 set_buffer_internal (XBUFFER (src_object
));
6671 move_gap_both (from
, from_byte
);
6672 if (EQ (src_object
, dst_object
))
6674 saved_pt
= PT
, saved_pt_byte
= PT_BYTE
;
6675 TEMP_SET_PT_BOTH (from
, from_byte
);
6676 del_range_both (from
, from_byte
, to
, to_byte
, 1);
6677 coding
->src_pos
= -chars
;
6678 coding
->src_pos_byte
= -bytes
;
6682 coding
->src_pos
= from
;
6683 coding
->src_pos_byte
= from_byte
;
6687 if (CODING_REQUIRE_DETECTION (coding
))
6688 detect_coding (coding
);
6689 attrs
= CODING_ID_ATTRS (coding
->id
);
6691 if (EQ (dst_object
, Qt
)
6692 || (! NILP (CODING_ATTR_POST_READ (attrs
))
6693 && NILP (dst_object
)))
6695 coding
->dst_object
= code_conversion_save (1, 1);
6696 coding
->dst_pos
= BEG
;
6697 coding
->dst_pos_byte
= BEG_BYTE
;
6698 coding
->dst_multibyte
= 1;
6700 else if (BUFFERP (dst_object
))
6702 code_conversion_save (0, 0);
6703 coding
->dst_object
= dst_object
;
6704 coding
->dst_pos
= BUF_PT (XBUFFER (dst_object
));
6705 coding
->dst_pos_byte
= BUF_PT_BYTE (XBUFFER (dst_object
));
6706 coding
->dst_multibyte
6707 = ! NILP (XBUFFER (dst_object
)->enable_multibyte_characters
);
6711 code_conversion_save (0, 0);
6712 coding
->dst_object
= Qnil
;
6713 coding
->dst_multibyte
= 1;
6716 decode_coding (coding
);
6718 if (BUFFERP (coding
->dst_object
))
6719 set_buffer_internal (XBUFFER (coding
->dst_object
));
6721 if (! NILP (CODING_ATTR_POST_READ (attrs
)))
6723 struct gcpro gcpro1
, gcpro2
;
6724 EMACS_INT prev_Z
= Z
, prev_Z_BYTE
= Z_BYTE
;
6727 TEMP_SET_PT_BOTH (coding
->dst_pos
, coding
->dst_pos_byte
);
6728 GCPRO2 (coding
->src_object
, coding
->dst_object
);
6729 val
= call1 (CODING_ATTR_POST_READ (attrs
),
6730 make_number (coding
->produced_char
));
6733 coding
->produced_char
+= Z
- prev_Z
;
6734 coding
->produced
+= Z_BYTE
- prev_Z_BYTE
;
6737 if (EQ (dst_object
, Qt
))
6739 coding
->dst_object
= Fbuffer_string ();
6741 else if (NILP (dst_object
) && BUFFERP (coding
->dst_object
))
6743 set_buffer_internal (XBUFFER (coding
->dst_object
));
6744 if (dst_bytes
< coding
->produced
)
6747 = (unsigned char *) xrealloc (destination
, coding
->produced
);
6750 record_conversion_result (coding
,
6751 CODING_RESULT_INSUFFICIENT_DST
);
6752 unbind_to (count
, Qnil
);
6755 if (BEGV
< GPT
&& GPT
< BEGV
+ coding
->produced_char
)
6756 move_gap_both (BEGV
, BEGV_BYTE
);
6757 bcopy (BEGV_ADDR
, destination
, coding
->produced
);
6758 coding
->destination
= destination
;
6764 /* This is the case of:
6765 (BUFFERP (src_object) && EQ (src_object, dst_object))
6766 As we have moved PT while replacing the original buffer
6767 contents, we must recover it now. */
6768 set_buffer_internal (XBUFFER (src_object
));
6769 if (saved_pt
< from
)
6770 TEMP_SET_PT_BOTH (saved_pt
, saved_pt_byte
);
6771 else if (saved_pt
< from
+ chars
)
6772 TEMP_SET_PT_BOTH (from
, from_byte
);
6773 else if (! NILP (current_buffer
->enable_multibyte_characters
))
6774 TEMP_SET_PT_BOTH (saved_pt
+ (coding
->produced_char
- chars
),
6775 saved_pt_byte
+ (coding
->produced
- bytes
));
6777 TEMP_SET_PT_BOTH (saved_pt
+ (coding
->produced
- bytes
),
6778 saved_pt_byte
+ (coding
->produced
- bytes
));
6781 unbind_to (count
, coding
->dst_object
);
6786 encode_coding_object (coding
, src_object
, from
, from_byte
, to
, to_byte
,
6788 struct coding_system
*coding
;
6789 Lisp_Object src_object
;
6790 EMACS_INT from
, from_byte
, to
, to_byte
;
6791 Lisp_Object dst_object
;
6793 int count
= specpdl_ptr
- specpdl
;
6794 EMACS_INT chars
= to
- from
;
6795 EMACS_INT bytes
= to_byte
- from_byte
;
6798 int saved_pt
= -1, saved_pt_byte
;
6800 buffer
= Fcurrent_buffer ();
6802 coding
->src_object
= src_object
;
6803 coding
->src_chars
= chars
;
6804 coding
->src_bytes
= bytes
;
6805 coding
->src_multibyte
= chars
< bytes
;
6807 attrs
= CODING_ID_ATTRS (coding
->id
);
6809 if (! NILP (CODING_ATTR_PRE_WRITE (attrs
)))
6811 coding
->src_object
= code_conversion_save (1, coding
->src_multibyte
);
6812 set_buffer_internal (XBUFFER (coding
->src_object
));
6813 if (STRINGP (src_object
))
6814 insert_from_string (src_object
, from
, from_byte
, chars
, bytes
, 0);
6815 else if (BUFFERP (src_object
))
6816 insert_from_buffer (XBUFFER (src_object
), from
, chars
, 0);
6818 insert_1_both (coding
->source
+ from
, chars
, bytes
, 0, 0, 0);
6820 if (EQ (src_object
, dst_object
))
6822 set_buffer_internal (XBUFFER (src_object
));
6823 saved_pt
= PT
, saved_pt_byte
= PT_BYTE
;
6824 del_range_both (from
, from_byte
, to
, to_byte
, 1);
6825 set_buffer_internal (XBUFFER (coding
->src_object
));
6828 call2 (CODING_ATTR_PRE_WRITE (attrs
),
6829 make_number (BEG
), make_number (Z
));
6830 coding
->src_object
= Fcurrent_buffer ();
6832 move_gap_both (BEG
, BEG_BYTE
);
6833 coding
->src_chars
= Z
- BEG
;
6834 coding
->src_bytes
= Z_BYTE
- BEG_BYTE
;
6835 coding
->src_pos
= BEG
;
6836 coding
->src_pos_byte
= BEG_BYTE
;
6837 coding
->src_multibyte
= Z
< Z_BYTE
;
6839 else if (STRINGP (src_object
))
6841 code_conversion_save (0, 0);
6842 coding
->src_pos
= from
;
6843 coding
->src_pos_byte
= from_byte
;
6845 else if (BUFFERP (src_object
))
6847 code_conversion_save (0, 0);
6848 set_buffer_internal (XBUFFER (src_object
));
6849 if (EQ (src_object
, dst_object
))
6851 saved_pt
= PT
, saved_pt_byte
= PT_BYTE
;
6852 coding
->src_object
= del_range_1 (from
, to
, 1, 1);
6853 coding
->src_pos
= 0;
6854 coding
->src_pos_byte
= 0;
6858 if (from
< GPT
&& to
>= GPT
)
6859 move_gap_both (from
, from_byte
);
6860 coding
->src_pos
= from
;
6861 coding
->src_pos_byte
= from_byte
;
6865 code_conversion_save (0, 0);
6867 if (BUFFERP (dst_object
))
6869 coding
->dst_object
= dst_object
;
6870 if (EQ (src_object
, dst_object
))
6872 coding
->dst_pos
= from
;
6873 coding
->dst_pos_byte
= from_byte
;
6877 coding
->dst_pos
= BUF_PT (XBUFFER (dst_object
));
6878 coding
->dst_pos_byte
= BUF_PT_BYTE (XBUFFER (dst_object
));
6880 coding
->dst_multibyte
6881 = ! NILP (XBUFFER (dst_object
)->enable_multibyte_characters
);
6883 else if (EQ (dst_object
, Qt
))
6885 coding
->dst_object
= Qnil
;
6886 coding
->dst_bytes
= coding
->src_chars
;
6887 if (coding
->dst_bytes
== 0)
6888 coding
->dst_bytes
= 1;
6889 coding
->destination
= (unsigned char *) xmalloc (coding
->dst_bytes
);
6890 coding
->dst_multibyte
= 0;
6894 coding
->dst_object
= Qnil
;
6895 coding
->dst_multibyte
= 0;
6898 encode_coding (coding
);
6900 if (EQ (dst_object
, Qt
))
6902 if (BUFFERP (coding
->dst_object
))
6903 coding
->dst_object
= Fbuffer_string ();
6907 = make_unibyte_string ((char *) coding
->destination
,
6909 xfree (coding
->destination
);
6915 /* This is the case of:
6916 (BUFFERP (src_object) && EQ (src_object, dst_object))
6917 As we have moved PT while replacing the original buffer
6918 contents, we must recover it now. */
6919 set_buffer_internal (XBUFFER (src_object
));
6920 if (saved_pt
< from
)
6921 TEMP_SET_PT_BOTH (saved_pt
, saved_pt_byte
);
6922 else if (saved_pt
< from
+ chars
)
6923 TEMP_SET_PT_BOTH (from
, from_byte
);
6924 else if (! NILP (current_buffer
->enable_multibyte_characters
))
6925 TEMP_SET_PT_BOTH (saved_pt
+ (coding
->produced_char
- chars
),
6926 saved_pt_byte
+ (coding
->produced
- bytes
));
6928 TEMP_SET_PT_BOTH (saved_pt
+ (coding
->produced
- bytes
),
6929 saved_pt_byte
+ (coding
->produced
- bytes
));
6932 unbind_to (count
, Qnil
);
6937 preferred_coding_system ()
6939 int id
= coding_categories
[coding_priorities
[0]].id
;
6941 return CODING_ID_NAME (id
);
6946 /*** 8. Emacs Lisp library functions ***/
6948 DEFUN ("coding-system-p", Fcoding_system_p
, Scoding_system_p
, 1, 1, 0,
6949 doc
: /* Return t if OBJECT is nil or a coding-system.
6950 See the documentation of `define-coding-system' for information
6951 about coding-system objects. */)
6955 return ((NILP (obj
) || CODING_SYSTEM_P (obj
)) ? Qt
: Qnil
);
6958 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system
,
6959 Sread_non_nil_coding_system
, 1, 1, 0,
6960 doc
: /* Read a coding system from the minibuffer, prompting with string PROMPT. */)
6967 val
= Fcompleting_read (prompt
, Vcoding_system_alist
, Qnil
,
6968 Qt
, Qnil
, Qcoding_system_history
, Qnil
, Qnil
);
6970 while (SCHARS (val
) == 0);
6971 return (Fintern (val
, Qnil
));
6974 DEFUN ("read-coding-system", Fread_coding_system
, Sread_coding_system
, 1, 2, 0,
6975 doc
: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6976 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */)
6977 (prompt
, default_coding_system
)
6978 Lisp_Object prompt
, default_coding_system
;
6981 if (SYMBOLP (default_coding_system
))
6982 XSETSTRING (default_coding_system
, XPNTR (SYMBOL_NAME (default_coding_system
)));
6983 val
= Fcompleting_read (prompt
, Vcoding_system_alist
, Qnil
,
6984 Qt
, Qnil
, Qcoding_system_history
,
6985 default_coding_system
, Qnil
);
6986 return (SCHARS (val
) == 0 ? Qnil
: Fintern (val
, Qnil
));
6989 DEFUN ("check-coding-system", Fcheck_coding_system
, Scheck_coding_system
,
6991 doc
: /* Check validity of CODING-SYSTEM.
6992 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error. */)
6994 Lisp_Object coding_system
;
6996 CHECK_SYMBOL (coding_system
);
6997 if (!NILP (Fcoding_system_p (coding_system
)))
6998 return coding_system
;
7000 Fsignal (Qcoding_system_error
, Fcons (coding_system
, Qnil
));
7004 /* Detect how the bytes at SRC of length SRC_BYTES are encoded. If
7005 HIGHEST is nonzero, return the coding system of the highest
7006 priority among the detected coding systems. Otherwize return a
7007 list of detected coding systems sorted by their priorities. If
7008 MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
7009 multibyte form but contains only ASCII and eight-bit chars.
7010 Otherwise, the bytes are raw bytes.
7012 CODING-SYSTEM controls the detection as below:
7014 If it is nil, detect both text-format and eol-format. If the
7015 text-format part of CODING-SYSTEM is already specified
7016 (e.g. `iso-latin-1'), detect only eol-format. If the eol-format
7017 part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
7018 detect only text-format. */
7021 detect_coding_system (src
, src_chars
, src_bytes
, highest
, multibytep
,
7023 const unsigned char *src
;
7024 int src_chars
, src_bytes
, highest
;
7026 Lisp_Object coding_system
;
7028 const unsigned char *src_end
= src
+ src_bytes
;
7029 Lisp_Object attrs
, eol_type
;
7031 struct coding_system coding
;
7033 struct coding_detection_info detect_info
;
7034 enum coding_category base_category
;
7036 if (NILP (coding_system
))
7037 coding_system
= Qundecided
;
7038 setup_coding_system (coding_system
, &coding
);
7039 attrs
= CODING_ID_ATTRS (coding
.id
);
7040 eol_type
= CODING_ID_EOL_TYPE (coding
.id
);
7041 coding_system
= CODING_ATTR_BASE_NAME (attrs
);
7043 coding
.source
= src
;
7044 coding
.src_chars
= src_chars
;
7045 coding
.src_bytes
= src_bytes
;
7046 coding
.src_multibyte
= multibytep
;
7047 coding
.consumed
= 0;
7048 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
7050 detect_info
.checked
= detect_info
.found
= detect_info
.rejected
= 0;
7052 /* At first, detect text-format if necessary. */
7053 base_category
= XINT (CODING_ATTR_CATEGORY (attrs
));
7054 if (base_category
== coding_category_undecided
)
7056 enum coding_category category
;
7057 struct coding_system
*this;
7060 /* Skip all ASCII bytes except for a few ISO2022 controls. */
7061 for (i
= 0; src
< src_end
; i
++, src
++)
7064 if (c
& 0x80 || (c
< 0x20 && (c
== ISO_CODE_ESC
7066 || c
== ISO_CODE_SO
)))
7069 coding
.head_ascii
= src
- coding
.source
;
7072 for (i
= 0; i
< coding_category_raw_text
; i
++)
7074 category
= coding_priorities
[i
];
7075 this = coding_categories
+ category
;
7079 /* No coding system of this category is defined. */
7080 detect_info
.rejected
|= (1 << category
);
7082 else if (category
>= coding_category_raw_text
)
7084 else if (detect_info
.checked
& (1 << category
))
7087 && (detect_info
.found
& (1 << category
)))
7092 if ((*(this->detector
)) (&coding
, &detect_info
)
7094 && (detect_info
.found
& (1 << category
)))
7096 if (category
== coding_category_utf_16_auto
)
7098 if (detect_info
.found
& CATEGORY_MASK_UTF_16_LE
)
7099 category
= coding_category_utf_16_le
;
7101 category
= coding_category_utf_16_be
;
7108 if (detect_info
.rejected
== CATEGORY_MASK_ANY
)
7110 detect_info
.found
= CATEGORY_MASK_RAW_TEXT
;
7111 id
= coding_categories
[coding_category_raw_text
].id
;
7112 val
= Fcons (make_number (id
), Qnil
);
7114 else if (! detect_info
.rejected
&& ! detect_info
.found
)
7116 detect_info
.found
= CATEGORY_MASK_ANY
;
7117 id
= coding_categories
[coding_category_undecided
].id
;
7118 val
= Fcons (make_number (id
), Qnil
);
7122 if (detect_info
.found
)
7124 detect_info
.found
= 1 << category
;
7125 val
= Fcons (make_number (this->id
), Qnil
);
7128 for (i
= 0; i
< coding_category_raw_text
; i
++)
7129 if (! (detect_info
.rejected
& (1 << coding_priorities
[i
])))
7131 detect_info
.found
= 1 << coding_priorities
[i
];
7132 id
= coding_categories
[coding_priorities
[i
]].id
;
7133 val
= Fcons (make_number (id
), Qnil
);
7139 int mask
= detect_info
.rejected
| detect_info
.found
;
7143 for (i
= coding_category_raw_text
- 1; i
>= 0; i
--)
7145 category
= coding_priorities
[i
];
7146 if (! (mask
& (1 << category
)))
7148 found
|= 1 << category
;
7149 id
= coding_categories
[category
].id
;
7150 val
= Fcons (make_number (id
), val
);
7153 for (i
= coding_category_raw_text
- 1; i
>= 0; i
--)
7155 category
= coding_priorities
[i
];
7156 if (detect_info
.found
& (1 << category
))
7158 id
= coding_categories
[category
].id
;
7159 val
= Fcons (make_number (id
), val
);
7162 detect_info
.found
|= found
;
7165 else if (base_category
== coding_category_utf_16_auto
)
7167 if (detect_coding_utf_16 (&coding
, &detect_info
))
7169 enum coding_category category
;
7170 struct coding_system
*this;
7172 if (detect_info
.found
& CATEGORY_MASK_UTF_16_LE
)
7173 this = coding_categories
+ coding_category_utf_16_le
;
7174 else if (detect_info
.found
& CATEGORY_MASK_UTF_16_BE
)
7175 this = coding_categories
+ coding_category_utf_16_be
;
7176 else if (detect_info
.rejected
& CATEGORY_MASK_UTF_16_LE_NOSIG
)
7177 this = coding_categories
+ coding_category_utf_16_be_nosig
;
7179 this = coding_categories
+ coding_category_utf_16_le_nosig
;
7180 val
= Fcons (make_number (this->id
), Qnil
);
7185 detect_info
.found
= 1 << XINT (CODING_ATTR_CATEGORY (attrs
));
7186 val
= Fcons (make_number (coding
.id
), Qnil
);
7189 /* Then, detect eol-format if necessary. */
7191 int normal_eol
= -1, utf_16_be_eol
= -1, utf_16_le_eol
;
7194 if (VECTORP (eol_type
))
7196 if (detect_info
.found
& ~CATEGORY_MASK_UTF_16
)
7197 normal_eol
= detect_eol (coding
.source
, src_bytes
,
7198 coding_category_raw_text
);
7199 if (detect_info
.found
& (CATEGORY_MASK_UTF_16_BE
7200 | CATEGORY_MASK_UTF_16_BE_NOSIG
))
7201 utf_16_be_eol
= detect_eol (coding
.source
, src_bytes
,
7202 coding_category_utf_16_be
);
7203 if (detect_info
.found
& (CATEGORY_MASK_UTF_16_LE
7204 | CATEGORY_MASK_UTF_16_LE_NOSIG
))
7205 utf_16_le_eol
= detect_eol (coding
.source
, src_bytes
,
7206 coding_category_utf_16_le
);
7210 if (EQ (eol_type
, Qunix
))
7211 normal_eol
= utf_16_be_eol
= utf_16_le_eol
= EOL_SEEN_LF
;
7212 else if (EQ (eol_type
, Qdos
))
7213 normal_eol
= utf_16_be_eol
= utf_16_le_eol
= EOL_SEEN_CRLF
;
7215 normal_eol
= utf_16_be_eol
= utf_16_le_eol
= EOL_SEEN_CR
;
7218 for (tail
= val
; CONSP (tail
); tail
= XCDR (tail
))
7220 enum coding_category category
;
7223 id
= XINT (XCAR (tail
));
7224 attrs
= CODING_ID_ATTRS (id
);
7225 category
= XINT (CODING_ATTR_CATEGORY (attrs
));
7226 eol_type
= CODING_ID_EOL_TYPE (id
);
7227 if (VECTORP (eol_type
))
7229 if (category
== coding_category_utf_16_be
7230 || category
== coding_category_utf_16_be_nosig
)
7231 this_eol
= utf_16_be_eol
;
7232 else if (category
== coding_category_utf_16_le
7233 || category
== coding_category_utf_16_le_nosig
)
7234 this_eol
= utf_16_le_eol
;
7236 this_eol
= normal_eol
;
7238 if (this_eol
== EOL_SEEN_LF
)
7239 XSETCAR (tail
, AREF (eol_type
, 0));
7240 else if (this_eol
== EOL_SEEN_CRLF
)
7241 XSETCAR (tail
, AREF (eol_type
, 1));
7242 else if (this_eol
== EOL_SEEN_CR
)
7243 XSETCAR (tail
, AREF (eol_type
, 2));
7245 XSETCAR (tail
, CODING_ID_NAME (id
));
7248 XSETCAR (tail
, CODING_ID_NAME (id
));
7252 return (highest
? XCAR (val
) : val
);
7256 DEFUN ("detect-coding-region", Fdetect_coding_region
, Sdetect_coding_region
,
7258 doc
: /* Detect coding system of the text in the region between START and END.
7259 Return a list of possible coding systems ordered by priority.
7261 If only ASCII characters are found, it returns a list of single element
7262 `undecided' or its subsidiary coding system according to a detected
7265 If optional argument HIGHEST is non-nil, return the coding system of
7266 highest priority. */)
7267 (start
, end
, highest
)
7268 Lisp_Object start
, end
, highest
;
7271 int from_byte
, to_byte
;
7273 CHECK_NUMBER_COERCE_MARKER (start
);
7274 CHECK_NUMBER_COERCE_MARKER (end
);
7276 validate_region (&start
, &end
);
7277 from
= XINT (start
), to
= XINT (end
);
7278 from_byte
= CHAR_TO_BYTE (from
);
7279 to_byte
= CHAR_TO_BYTE (to
);
7281 if (from
< GPT
&& to
>= GPT
)
7282 move_gap_both (to
, to_byte
);
7284 return detect_coding_system (BYTE_POS_ADDR (from_byte
),
7285 to
- from
, to_byte
- from_byte
,
7287 !NILP (current_buffer
7288 ->enable_multibyte_characters
),
7292 DEFUN ("detect-coding-string", Fdetect_coding_string
, Sdetect_coding_string
,
7294 doc
: /* Detect coding system of the text in STRING.
7295 Return a list of possible coding systems ordered by priority.
7297 If only ASCII characters are found, it returns a list of single element
7298 `undecided' or its subsidiary coding system according to a detected
7301 If optional argument HIGHEST is non-nil, return the coding system of
7302 highest priority. */)
7304 Lisp_Object string
, highest
;
7306 CHECK_STRING (string
);
7308 return detect_coding_system (SDATA (string
),
7309 SCHARS (string
), SBYTES (string
),
7310 !NILP (highest
), STRING_MULTIBYTE (string
),
7316 char_encodable_p (c
, attrs
)
7321 struct charset
*charset
;
7322 Lisp_Object translation_table
;
7324 translation_table
= CODING_ATTR_TRANS_TBL (attrs
);
7325 if (! NILP (translation_table
))
7326 c
= translate_char (translation_table
, c
);
7327 for (tail
= CODING_ATTR_CHARSET_LIST (attrs
);
7328 CONSP (tail
); tail
= XCDR (tail
))
7330 charset
= CHARSET_FROM_ID (XINT (XCAR (tail
)));
7331 if (CHAR_CHARSET_P (c
, charset
))
7334 return (! NILP (tail
));
7338 /* Return a list of coding systems that safely encode the text between
7339 START and END. If EXCLUDE is non-nil, it is a list of coding
7340 systems not to check. The returned list doesn't contain any such
7341 coding systems. In any case, if the text contains only ASCII or is
7342 unibyte, return t. */
7344 DEFUN ("find-coding-systems-region-internal",
7345 Ffind_coding_systems_region_internal
,
7346 Sfind_coding_systems_region_internal
, 2, 3, 0,
7347 doc
: /* Internal use only. */)
7348 (start
, end
, exclude
)
7349 Lisp_Object start
, end
, exclude
;
7351 Lisp_Object coding_attrs_list
, safe_codings
;
7352 EMACS_INT start_byte
, end_byte
;
7353 const unsigned char *p
, *pbeg
, *pend
;
7355 Lisp_Object tail
, elt
;
7357 if (STRINGP (start
))
7359 if (!STRING_MULTIBYTE (start
)
7360 || SCHARS (start
) == SBYTES (start
))
7363 end_byte
= SBYTES (start
);
7367 CHECK_NUMBER_COERCE_MARKER (start
);
7368 CHECK_NUMBER_COERCE_MARKER (end
);
7369 if (XINT (start
) < BEG
|| XINT (end
) > Z
|| XINT (start
) > XINT (end
))
7370 args_out_of_range (start
, end
);
7371 if (NILP (current_buffer
->enable_multibyte_characters
))
7373 start_byte
= CHAR_TO_BYTE (XINT (start
));
7374 end_byte
= CHAR_TO_BYTE (XINT (end
));
7375 if (XINT (end
) - XINT (start
) == end_byte
- start_byte
)
7378 if (XINT (start
) < GPT
&& XINT (end
) > GPT
)
7380 if ((GPT
- XINT (start
)) < (XINT (end
) - GPT
))
7381 move_gap_both (XINT (start
), start_byte
);
7383 move_gap_both (XINT (end
), end_byte
);
7387 coding_attrs_list
= Qnil
;
7388 for (tail
= Vcoding_system_list
; CONSP (tail
); tail
= XCDR (tail
))
7390 || NILP (Fmemq (XCAR (tail
), exclude
)))
7394 attrs
= AREF (CODING_SYSTEM_SPEC (XCAR (tail
)), 0);
7395 if (EQ (XCAR (tail
), CODING_ATTR_BASE_NAME (attrs
))
7396 && ! EQ (CODING_ATTR_TYPE (attrs
), Qundecided
))
7398 ASET (attrs
, coding_attr_trans_tbl
,
7399 get_translation_table (attrs
, 1, NULL
));
7400 coding_attrs_list
= Fcons (attrs
, coding_attrs_list
);
7404 if (STRINGP (start
))
7405 p
= pbeg
= SDATA (start
);
7407 p
= pbeg
= BYTE_POS_ADDR (start_byte
);
7408 pend
= p
+ (end_byte
- start_byte
);
7410 while (p
< pend
&& ASCII_BYTE_P (*p
)) p
++;
7411 while (p
< pend
&& ASCII_BYTE_P (*(pend
- 1))) pend
--;
7415 if (ASCII_BYTE_P (*p
))
7419 c
= STRING_CHAR_ADVANCE (p
);
7421 charset_map_loaded
= 0;
7422 for (tail
= coding_attrs_list
; CONSP (tail
);)
7427 else if (char_encodable_p (c
, elt
))
7429 else if (CONSP (XCDR (tail
)))
7431 XSETCAR (tail
, XCAR (XCDR (tail
)));
7432 XSETCDR (tail
, XCDR (XCDR (tail
)));
7436 XSETCAR (tail
, Qnil
);
7440 if (charset_map_loaded
)
7442 EMACS_INT p_offset
= p
- pbeg
, pend_offset
= pend
- pbeg
;
7444 if (STRINGP (start
))
7445 pbeg
= SDATA (start
);
7447 pbeg
= BYTE_POS_ADDR (start_byte
);
7448 p
= pbeg
+ p_offset
;
7449 pend
= pbeg
+ pend_offset
;
7454 safe_codings
= Qnil
;
7455 for (tail
= coding_attrs_list
; CONSP (tail
); tail
= XCDR (tail
))
7456 if (! NILP (XCAR (tail
)))
7457 safe_codings
= Fcons (CODING_ATTR_BASE_NAME (XCAR (tail
)), safe_codings
);
7459 return safe_codings
;
7463 DEFUN ("unencodable-char-position", Funencodable_char_position
,
7464 Sunencodable_char_position
, 3, 5, 0,
7466 Return position of first un-encodable character in a region.
7467 START and END specfiy the region and CODING-SYSTEM specifies the
7468 encoding to check. Return nil if CODING-SYSTEM does encode the region.
7470 If optional 4th argument COUNT is non-nil, it specifies at most how
7471 many un-encodable characters to search. In this case, the value is a
7474 If optional 5th argument STRING is non-nil, it is a string to search
7475 for un-encodable characters. In that case, START and END are indexes
7477 (start
, end
, coding_system
, count
, string
)
7478 Lisp_Object start
, end
, coding_system
, count
, string
;
7481 struct coding_system coding
;
7482 Lisp_Object attrs
, charset_list
, translation_table
;
7483 Lisp_Object positions
;
7485 const unsigned char *p
, *stop
, *pend
;
7486 int ascii_compatible
;
7488 setup_coding_system (Fcheck_coding_system (coding_system
), &coding
);
7489 attrs
= CODING_ID_ATTRS (coding
.id
);
7490 if (EQ (CODING_ATTR_TYPE (attrs
), Qraw_text
))
7492 ascii_compatible
= ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
));
7493 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
7494 translation_table
= get_translation_table (attrs
, 1, NULL
);
7498 validate_region (&start
, &end
);
7499 from
= XINT (start
);
7501 if (NILP (current_buffer
->enable_multibyte_characters
)
7502 || (ascii_compatible
7503 && (to
- from
) == (CHAR_TO_BYTE (to
) - (CHAR_TO_BYTE (from
)))))
7505 p
= CHAR_POS_ADDR (from
);
7506 pend
= CHAR_POS_ADDR (to
);
7507 if (from
< GPT
&& to
>= GPT
)
7514 CHECK_STRING (string
);
7515 CHECK_NATNUM (start
);
7517 from
= XINT (start
);
7520 || to
> SCHARS (string
))
7521 args_out_of_range_3 (string
, start
, end
);
7522 if (! STRING_MULTIBYTE (string
))
7524 p
= SDATA (string
) + string_char_to_byte (string
, from
);
7525 stop
= pend
= SDATA (string
) + string_char_to_byte (string
, to
);
7526 if (ascii_compatible
&& (to
- from
) == (pend
- p
))
7534 CHECK_NATNUM (count
);
7543 if (ascii_compatible
)
7544 while (p
< stop
&& ASCII_BYTE_P (*p
))
7554 c
= STRING_CHAR_ADVANCE (p
);
7555 if (! (ASCII_CHAR_P (c
) && ascii_compatible
)
7556 && ! char_charset (translate_char (translation_table
, c
),
7557 charset_list
, NULL
))
7559 positions
= Fcons (make_number (from
), positions
);
7568 return (NILP (count
) ? Fcar (positions
) : Fnreverse (positions
));
7572 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region
,
7573 Scheck_coding_systems_region
, 3, 3, 0,
7574 doc
: /* Check if the region is encodable by coding systems.
7576 START and END are buffer positions specifying the region.
7577 CODING-SYSTEM-LIST is a list of coding systems to check.
7579 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
7580 CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
7581 whole region, POS0, POS1, ... are buffer positions where non-encodable
7582 characters are found.
7584 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
7587 START may be a string. In that case, check if the string is
7588 encodable, and the value contains indices to the string instead of
7589 buffer positions. END is ignored. */)
7590 (start
, end
, coding_system_list
)
7591 Lisp_Object start
, end
, coding_system_list
;
7594 EMACS_INT start_byte
, end_byte
;
7596 const unsigned char *p
, *pbeg
, *pend
;
7598 Lisp_Object tail
, elt
, attrs
;
7600 if (STRINGP (start
))
7602 if (!STRING_MULTIBYTE (start
)
7603 && SCHARS (start
) != SBYTES (start
))
7606 end_byte
= SBYTES (start
);
7611 CHECK_NUMBER_COERCE_MARKER (start
);
7612 CHECK_NUMBER_COERCE_MARKER (end
);
7613 if (XINT (start
) < BEG
|| XINT (end
) > Z
|| XINT (start
) > XINT (end
))
7614 args_out_of_range (start
, end
);
7615 if (NILP (current_buffer
->enable_multibyte_characters
))
7617 start_byte
= CHAR_TO_BYTE (XINT (start
));
7618 end_byte
= CHAR_TO_BYTE (XINT (end
));
7619 if (XINT (end
) - XINT (start
) == end_byte
- start_byte
)
7622 if (XINT (start
) < GPT
&& XINT (end
) > GPT
)
7624 if ((GPT
- XINT (start
)) < (XINT (end
) - GPT
))
7625 move_gap_both (XINT (start
), start_byte
);
7627 move_gap_both (XINT (end
), end_byte
);
7633 for (tail
= coding_system_list
; CONSP (tail
); tail
= XCDR (tail
))
7636 attrs
= AREF (CODING_SYSTEM_SPEC (elt
), 0);
7637 ASET (attrs
, coding_attr_trans_tbl
,
7638 get_translation_table (attrs
, 1, NULL
));
7639 list
= Fcons (Fcons (elt
, Fcons (attrs
, Qnil
)), list
);
7642 if (STRINGP (start
))
7643 p
= pbeg
= SDATA (start
);
7645 p
= pbeg
= BYTE_POS_ADDR (start_byte
);
7646 pend
= p
+ (end_byte
- start_byte
);
7648 while (p
< pend
&& ASCII_BYTE_P (*p
)) p
++, pos
++;
7649 while (p
< pend
&& ASCII_BYTE_P (*(pend
- 1))) pend
--;
7653 if (ASCII_BYTE_P (*p
))
7657 c
= STRING_CHAR_ADVANCE (p
);
7659 charset_map_loaded
= 0;
7660 for (tail
= list
; CONSP (tail
); tail
= XCDR (tail
))
7662 elt
= XCDR (XCAR (tail
));
7663 if (! char_encodable_p (c
, XCAR (elt
)))
7664 XSETCDR (elt
, Fcons (make_number (pos
), XCDR (elt
)));
7666 if (charset_map_loaded
)
7668 EMACS_INT p_offset
= p
- pbeg
, pend_offset
= pend
- pbeg
;
7670 if (STRINGP (start
))
7671 pbeg
= SDATA (start
);
7673 pbeg
= BYTE_POS_ADDR (start_byte
);
7674 p
= pbeg
+ p_offset
;
7675 pend
= pbeg
+ pend_offset
;
7683 for (; CONSP (tail
); tail
= XCDR (tail
))
7686 if (CONSP (XCDR (XCDR (elt
))))
7687 list
= Fcons (Fcons (XCAR (elt
), Fnreverse (XCDR (XCDR (elt
)))),
7696 code_convert_region (start
, end
, coding_system
, dst_object
, encodep
, norecord
)
7697 Lisp_Object start
, end
, coding_system
, dst_object
;
7698 int encodep
, norecord
;
7700 struct coding_system coding
;
7701 EMACS_INT from
, from_byte
, to
, to_byte
;
7702 Lisp_Object src_object
;
7704 CHECK_NUMBER_COERCE_MARKER (start
);
7705 CHECK_NUMBER_COERCE_MARKER (end
);
7706 if (NILP (coding_system
))
7707 coding_system
= Qno_conversion
;
7709 CHECK_CODING_SYSTEM (coding_system
);
7710 src_object
= Fcurrent_buffer ();
7711 if (NILP (dst_object
))
7712 dst_object
= src_object
;
7713 else if (! EQ (dst_object
, Qt
))
7714 CHECK_BUFFER (dst_object
);
7716 validate_region (&start
, &end
);
7717 from
= XFASTINT (start
);
7718 from_byte
= CHAR_TO_BYTE (from
);
7719 to
= XFASTINT (end
);
7720 to_byte
= CHAR_TO_BYTE (to
);
7722 setup_coding_system (coding_system
, &coding
);
7723 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
7726 encode_coding_object (&coding
, src_object
, from
, from_byte
, to
, to_byte
,
7729 decode_coding_object (&coding
, src_object
, from
, from_byte
, to
, to_byte
,
7732 Vlast_coding_system_used
= CODING_ID_NAME (coding
.id
);
7734 return (BUFFERP (dst_object
)
7735 ? make_number (coding
.produced_char
)
7736 : coding
.dst_object
);
7740 DEFUN ("decode-coding-region", Fdecode_coding_region
, Sdecode_coding_region
,
7741 3, 4, "r\nzCoding system: ",
7742 doc
: /* Decode the current region from the specified coding system.
7743 When called from a program, takes four arguments:
7744 START, END, CODING-SYSTEM, and DESTINATION.
7745 START and END are buffer positions.
7747 Optional 4th arguments DESTINATION specifies where the decoded text goes.
7748 If nil, the region between START and END is replace by the decoded text.
7749 If buffer, the decoded text is inserted in the buffer.
7750 If t, the decoded text is returned.
7752 This function sets `last-coding-system-used' to the precise coding system
7753 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7754 not fully specified.)
7755 It returns the length of the decoded text. */)
7756 (start
, end
, coding_system
, destination
)
7757 Lisp_Object start
, end
, coding_system
, destination
;
7759 return code_convert_region (start
, end
, coding_system
, destination
, 0, 0);
7762 DEFUN ("encode-coding-region", Fencode_coding_region
, Sencode_coding_region
,
7763 3, 4, "r\nzCoding system: ",
7764 doc
: /* Encode the current region by specified coding system.
7765 When called from a program, takes three arguments:
7766 START, END, and CODING-SYSTEM. START and END are buffer positions.
7768 Optional 4th arguments DESTINATION specifies where the encoded text goes.
7769 If nil, the region between START and END is replace by the encoded text.
7770 If buffer, the encoded text is inserted in the buffer.
7771 If t, the encoded text is returned.
7773 This function sets `last-coding-system-used' to the precise coding system
7774 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7775 not fully specified.)
7776 It returns the length of the encoded text. */)
7777 (start
, end
, coding_system
, destination
)
7778 Lisp_Object start
, end
, coding_system
, destination
;
7780 return code_convert_region (start
, end
, coding_system
, destination
, 1, 0);
7784 code_convert_string (string
, coding_system
, dst_object
,
7785 encodep
, nocopy
, norecord
)
7786 Lisp_Object string
, coding_system
, dst_object
;
7787 int encodep
, nocopy
, norecord
;
7789 struct coding_system coding
;
7790 EMACS_INT chars
, bytes
;
7792 CHECK_STRING (string
);
7793 if (NILP (coding_system
))
7796 Vlast_coding_system_used
= Qno_conversion
;
7797 if (NILP (dst_object
))
7798 return (nocopy
? Fcopy_sequence (string
) : string
);
7801 if (NILP (coding_system
))
7802 coding_system
= Qno_conversion
;
7804 CHECK_CODING_SYSTEM (coding_system
);
7805 if (NILP (dst_object
))
7807 else if (! EQ (dst_object
, Qt
))
7808 CHECK_BUFFER (dst_object
);
7810 setup_coding_system (coding_system
, &coding
);
7811 coding
.mode
|= CODING_MODE_LAST_BLOCK
;
7812 chars
= SCHARS (string
);
7813 bytes
= SBYTES (string
);
7815 encode_coding_object (&coding
, string
, 0, 0, chars
, bytes
, dst_object
);
7817 decode_coding_object (&coding
, string
, 0, 0, chars
, bytes
, dst_object
);
7819 Vlast_coding_system_used
= CODING_ID_NAME (coding
.id
);
7821 return (BUFFERP (dst_object
)
7822 ? make_number (coding
.produced_char
)
7823 : coding
.dst_object
);
7827 /* Encode or decode STRING according to CODING_SYSTEM.
7828 Do not set Vlast_coding_system_used.
7830 This function is called only from macros DECODE_FILE and
7831 ENCODE_FILE, thus we ignore character composition. */
7834 code_convert_string_norecord (string
, coding_system
, encodep
)
7835 Lisp_Object string
, coding_system
;
7838 return code_convert_string (string
, coding_system
, Qt
, encodep
, 0, 1);
7842 DEFUN ("decode-coding-string", Fdecode_coding_string
, Sdecode_coding_string
,
7844 doc
: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
7846 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
7847 if the decoding operation is trivial.
7849 Optional fourth arg BUFFER non-nil meant that the decoded text is
7850 inserted in BUFFER instead of returned as a string. In this case,
7851 the return value is BUFFER.
7853 This function sets `last-coding-system-used' to the precise coding system
7854 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7855 not fully specified. */)
7856 (string
, coding_system
, nocopy
, buffer
)
7857 Lisp_Object string
, coding_system
, nocopy
, buffer
;
7859 return code_convert_string (string
, coding_system
, buffer
,
7860 0, ! NILP (nocopy
), 0);
7863 DEFUN ("encode-coding-string", Fencode_coding_string
, Sencode_coding_string
,
7865 doc
: /* Encode STRING to CODING-SYSTEM, and return the result.
7867 Optional third arg NOCOPY non-nil means it is OK to return STRING
7868 itself if the encoding operation is trivial.
7870 Optional fourth arg BUFFER non-nil meant that the encoded text is
7871 inserted in BUFFER instead of returned as a string. In this case,
7872 the return value is BUFFER.
7874 This function sets `last-coding-system-used' to the precise coding system
7875 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7876 not fully specified.) */)
7877 (string
, coding_system
, nocopy
, buffer
)
7878 Lisp_Object string
, coding_system
, nocopy
, buffer
;
7880 return code_convert_string (string
, coding_system
, buffer
,
7881 1, ! NILP (nocopy
), 1);
7885 DEFUN ("decode-sjis-char", Fdecode_sjis_char
, Sdecode_sjis_char
, 1, 1, 0,
7886 doc
: /* Decode a Japanese character which has CODE in shift_jis encoding.
7887 Return the corresponding character. */)
7891 Lisp_Object spec
, attrs
, val
;
7892 struct charset
*charset_roman
, *charset_kanji
, *charset_kana
, *charset
;
7895 CHECK_NATNUM (code
);
7896 c
= XFASTINT (code
);
7897 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system
, spec
);
7898 attrs
= AREF (spec
, 0);
7900 if (ASCII_BYTE_P (c
)
7901 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
7904 val
= CODING_ATTR_CHARSET_LIST (attrs
);
7905 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
7906 charset_kana
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
7907 charset_kanji
= CHARSET_FROM_ID (XINT (XCAR (val
)));
7910 charset
= charset_roman
;
7911 else if (c
>= 0xA0 && c
< 0xDF)
7913 charset
= charset_kana
;
7918 int s1
= c
>> 8, s2
= c
& 0xFF;
7920 if (s1
< 0x81 || (s1
> 0x9F && s1
< 0xE0) || s1
> 0xEF
7921 || s2
< 0x40 || s2
== 0x7F || s2
> 0xFC)
7922 error ("Invalid code: %d", code
);
7924 charset
= charset_kanji
;
7926 c
= DECODE_CHAR (charset
, c
);
7928 error ("Invalid code: %d", code
);
7929 return make_number (c
);
7933 DEFUN ("encode-sjis-char", Fencode_sjis_char
, Sencode_sjis_char
, 1, 1, 0,
7934 doc
: /* Encode a Japanese character CHAR to shift_jis encoding.
7935 Return the corresponding code in SJIS. */)
7939 Lisp_Object spec
, attrs
, charset_list
;
7941 struct charset
*charset
;
7944 CHECK_CHARACTER (ch
);
7946 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system
, spec
);
7947 attrs
= AREF (spec
, 0);
7949 if (ASCII_CHAR_P (c
)
7950 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
7953 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
7954 charset
= char_charset (c
, charset_list
, &code
);
7955 if (code
== CHARSET_INVALID_CODE (charset
))
7956 error ("Can't encode by shift_jis encoding: %d", c
);
7959 return make_number (code
);
7962 DEFUN ("decode-big5-char", Fdecode_big5_char
, Sdecode_big5_char
, 1, 1, 0,
7963 doc
: /* Decode a Big5 character which has CODE in BIG5 coding system.
7964 Return the corresponding character. */)
7968 Lisp_Object spec
, attrs
, val
;
7969 struct charset
*charset_roman
, *charset_big5
, *charset
;
7972 CHECK_NATNUM (code
);
7973 c
= XFASTINT (code
);
7974 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system
, spec
);
7975 attrs
= AREF (spec
, 0);
7977 if (ASCII_BYTE_P (c
)
7978 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
7981 val
= CODING_ATTR_CHARSET_LIST (attrs
);
7982 charset_roman
= CHARSET_FROM_ID (XINT (XCAR (val
))), val
= XCDR (val
);
7983 charset_big5
= CHARSET_FROM_ID (XINT (XCAR (val
)));
7986 charset
= charset_roman
;
7989 int b1
= c
>> 8, b2
= c
& 0x7F;
7990 if (b1
< 0xA1 || b1
> 0xFE
7991 || b2
< 0x40 || (b2
> 0x7E && b2
< 0xA1) || b2
> 0xFE)
7992 error ("Invalid code: %d", code
);
7993 charset
= charset_big5
;
7995 c
= DECODE_CHAR (charset
, (unsigned )c
);
7997 error ("Invalid code: %d", code
);
7998 return make_number (c
);
8001 DEFUN ("encode-big5-char", Fencode_big5_char
, Sencode_big5_char
, 1, 1, 0,
8002 doc
: /* Encode the Big5 character CHAR to BIG5 coding system.
8003 Return the corresponding character code in Big5. */)
8007 Lisp_Object spec
, attrs
, charset_list
;
8008 struct charset
*charset
;
8012 CHECK_CHARACTER (ch
);
8014 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system
, spec
);
8015 attrs
= AREF (spec
, 0);
8016 if (ASCII_CHAR_P (c
)
8017 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs
)))
8020 charset_list
= CODING_ATTR_CHARSET_LIST (attrs
);
8021 charset
= char_charset (c
, charset_list
, &code
);
8022 if (code
== CHARSET_INVALID_CODE (charset
))
8023 error ("Can't encode by Big5 encoding: %d", c
);
8025 return make_number (code
);
8029 DEFUN ("set-terminal-coding-system-internal",
8030 Fset_terminal_coding_system_internal
,
8031 Sset_terminal_coding_system_internal
, 1, 1, 0,
8032 doc
: /* Internal use only. */)
8034 Lisp_Object coding_system
;
8036 CHECK_SYMBOL (coding_system
);
8037 setup_coding_system (Fcheck_coding_system (coding_system
),
8040 /* We had better not send unsafe characters to terminal. */
8041 terminal_coding
.mode
|= CODING_MODE_SAFE_ENCODING
;
8042 /* Characer composition should be disabled. */
8043 terminal_coding
.common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
8044 terminal_coding
.src_multibyte
= 1;
8045 terminal_coding
.dst_multibyte
= 0;
8049 DEFUN ("set-safe-terminal-coding-system-internal",
8050 Fset_safe_terminal_coding_system_internal
,
8051 Sset_safe_terminal_coding_system_internal
, 1, 1, 0,
8052 doc
: /* Internal use only. */)
8054 Lisp_Object coding_system
;
8056 CHECK_SYMBOL (coding_system
);
8057 setup_coding_system (Fcheck_coding_system (coding_system
),
8058 &safe_terminal_coding
);
8059 /* Characer composition should be disabled. */
8060 safe_terminal_coding
.common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
8061 safe_terminal_coding
.src_multibyte
= 1;
8062 safe_terminal_coding
.dst_multibyte
= 0;
8066 DEFUN ("terminal-coding-system",
8067 Fterminal_coding_system
, Sterminal_coding_system
, 0, 0, 0,
8068 doc
: /* Return coding system specified for terminal output. */)
8071 return CODING_ID_NAME (terminal_coding
.id
);
8074 DEFUN ("set-keyboard-coding-system-internal",
8075 Fset_keyboard_coding_system_internal
,
8076 Sset_keyboard_coding_system_internal
, 1, 1, 0,
8077 doc
: /* Internal use only. */)
8079 Lisp_Object coding_system
;
8081 CHECK_SYMBOL (coding_system
);
8082 setup_coding_system (Fcheck_coding_system (coding_system
),
8084 /* Characer composition should be disabled. */
8085 keyboard_coding
.common_flags
&= ~CODING_ANNOTATE_COMPOSITION_MASK
;
8089 DEFUN ("keyboard-coding-system",
8090 Fkeyboard_coding_system
, Skeyboard_coding_system
, 0, 0, 0,
8091 doc
: /* Return coding system specified for decoding keyboard input. */)
8094 return CODING_ID_NAME (keyboard_coding
.id
);
8098 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system
,
8099 Sfind_operation_coding_system
, 1, MANY
, 0,
8100 doc
: /* Choose a coding system for an operation based on the target name.
8101 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
8102 DECODING-SYSTEM is the coding system to use for decoding
8103 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
8104 for encoding (in case OPERATION does encoding).
8106 The first argument OPERATION specifies an I/O primitive:
8107 For file I/O, `insert-file-contents' or `write-region'.
8108 For process I/O, `call-process', `call-process-region', or `start-process'.
8109 For network I/O, `open-network-stream'.
8111 The remaining arguments should be the same arguments that were passed
8112 to the primitive. Depending on which primitive, one of those arguments
8113 is selected as the TARGET. For example, if OPERATION does file I/O,
8114 whichever argument specifies the file name is TARGET.
8116 TARGET has a meaning which depends on OPERATION:
8117 For file I/O, TARGET is a file name.
8118 For process I/O, TARGET is a process name.
8119 For network I/O, TARGET is a service name or a port number
8121 This function looks up what specified for TARGET in,
8122 `file-coding-system-alist', `process-coding-system-alist',
8123 or `network-coding-system-alist' depending on OPERATION.
8124 They may specify a coding system, a cons of coding systems,
8125 or a function symbol to call.
8126 In the last case, we call the function with one argument,
8127 which is a list of all the arguments given to this function.
8129 usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */)
8134 Lisp_Object operation
, target_idx
, target
, val
;
8135 register Lisp_Object chain
;
8138 error ("Too few arguments");
8139 operation
= args
[0];
8140 if (!SYMBOLP (operation
)
8141 || !INTEGERP (target_idx
= Fget (operation
, Qtarget_idx
)))
8142 error ("Invalid first arguement");
8143 if (nargs
< 1 + XINT (target_idx
))
8144 error ("Too few arguments for operation: %s",
8145 SDATA (SYMBOL_NAME (operation
)));
8146 target
= args
[XINT (target_idx
) + 1];
8147 if (!(STRINGP (target
)
8148 || (EQ (operation
, Qopen_network_stream
) && INTEGERP (target
))))
8149 error ("Invalid %dth argument", XINT (target_idx
) + 1);
8151 chain
= ((EQ (operation
, Qinsert_file_contents
)
8152 || EQ (operation
, Qwrite_region
))
8153 ? Vfile_coding_system_alist
8154 : (EQ (operation
, Qopen_network_stream
)
8155 ? Vnetwork_coding_system_alist
8156 : Vprocess_coding_system_alist
));
8160 for (; CONSP (chain
); chain
= XCDR (chain
))
8166 && ((STRINGP (target
)
8167 && STRINGP (XCAR (elt
))
8168 && fast_string_match (XCAR (elt
), target
) >= 0)
8169 || (INTEGERP (target
) && EQ (target
, XCAR (elt
)))))
8172 /* Here, if VAL is both a valid coding system and a valid
8173 function symbol, we return VAL as a coding system. */
8176 if (! SYMBOLP (val
))
8178 if (! NILP (Fcoding_system_p (val
)))
8179 return Fcons (val
, val
);
8180 if (! NILP (Ffboundp (val
)))
8182 val
= call1 (val
, Flist (nargs
, args
));
8185 if (SYMBOLP (val
) && ! NILP (Fcoding_system_p (val
)))
8186 return Fcons (val
, val
);
8194 DEFUN ("set-coding-system-priority", Fset_coding_system_priority
,
8195 Sset_coding_system_priority
, 0, MANY
, 0,
8196 doc
: /* Assign higher priority to the coding systems given as arguments.
8197 If multiple coding systems belongs to the same category,
8198 all but the first one are ignored.
8200 usage: (set-coding-system-priority ...) */)
8206 int changed
[coding_category_max
];
8207 enum coding_category priorities
[coding_category_max
];
8209 bzero (changed
, sizeof changed
);
8211 for (i
= j
= 0; i
< nargs
; i
++)
8213 enum coding_category category
;
8214 Lisp_Object spec
, attrs
;
8216 CHECK_CODING_SYSTEM_GET_SPEC (args
[i
], spec
);
8217 attrs
= AREF (spec
, 0);
8218 category
= XINT (CODING_ATTR_CATEGORY (attrs
));
8219 if (changed
[category
])
8220 /* Ignore this coding system because a coding system of the
8221 same category already had a higher priority. */
8223 changed
[category
] = 1;
8224 priorities
[j
++] = category
;
8225 if (coding_categories
[category
].id
>= 0
8226 && ! EQ (args
[i
], CODING_ID_NAME (coding_categories
[category
].id
)))
8227 setup_coding_system (args
[i
], &coding_categories
[category
]);
8228 Fset (AREF (Vcoding_category_table
, category
), args
[i
]);
8231 /* Now we have decided top J priorities. Reflect the order of the
8232 original priorities to the remaining priorities. */
8234 for (i
= j
, j
= 0; i
< coding_category_max
; i
++, j
++)
8236 while (j
< coding_category_max
8237 && changed
[coding_priorities
[j
]])
8239 if (j
== coding_category_max
)
8241 priorities
[i
] = coding_priorities
[j
];
8244 bcopy (priorities
, coding_priorities
, sizeof priorities
);
8246 /* Update `coding-category-list'. */
8247 Vcoding_category_list
= Qnil
;
8248 for (i
= coding_category_max
- 1; i
>= 0; i
--)
8249 Vcoding_category_list
8250 = Fcons (AREF (Vcoding_category_table
, priorities
[i
]),
8251 Vcoding_category_list
);
8256 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list
,
8257 Scoding_system_priority_list
, 0, 1, 0,
8258 doc
: /* Return a list of coding systems ordered by their priorities.
8259 HIGHESTP non-nil means just return the highest priority one. */)
8261 Lisp_Object highestp
;
8266 for (i
= 0, val
= Qnil
; i
< coding_category_max
; i
++)
8268 enum coding_category category
= coding_priorities
[i
];
8269 int id
= coding_categories
[category
].id
;
8274 attrs
= CODING_ID_ATTRS (id
);
8275 if (! NILP (highestp
))
8276 return CODING_ATTR_BASE_NAME (attrs
);
8277 val
= Fcons (CODING_ATTR_BASE_NAME (attrs
), val
);
8279 return Fnreverse (val
);
8282 static char *suffixes
[] = { "-unix", "-dos", "-mac" };
8285 make_subsidiaries (base
)
8288 Lisp_Object subsidiaries
;
8289 int base_name_len
= SBYTES (SYMBOL_NAME (base
));
8290 char *buf
= (char *) alloca (base_name_len
+ 6);
8293 bcopy (SDATA (SYMBOL_NAME (base
)), buf
, base_name_len
);
8294 subsidiaries
= Fmake_vector (make_number (3), Qnil
);
8295 for (i
= 0; i
< 3; i
++)
8297 bcopy (suffixes
[i
], buf
+ base_name_len
, strlen (suffixes
[i
]) + 1);
8298 ASET (subsidiaries
, i
, intern (buf
));
8300 return subsidiaries
;
8304 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal
,
8305 Sdefine_coding_system_internal
, coding_arg_max
, MANY
, 0,
8306 doc
: /* For internal use only.
8307 usage: (define-coding-system-internal ...) */)
8313 Lisp_Object spec_vec
; /* [ ATTRS ALIASE EOL_TYPE ] */
8314 Lisp_Object attrs
; /* Vector of attributes. */
8315 Lisp_Object eol_type
;
8316 Lisp_Object aliases
;
8317 Lisp_Object coding_type
, charset_list
, safe_charsets
;
8318 enum coding_category category
;
8319 Lisp_Object tail
, val
;
8320 int max_charset_id
= 0;
8323 if (nargs
< coding_arg_max
)
8326 attrs
= Fmake_vector (make_number (coding_attr_last_index
), Qnil
);
8328 name
= args
[coding_arg_name
];
8329 CHECK_SYMBOL (name
);
8330 CODING_ATTR_BASE_NAME (attrs
) = name
;
8332 val
= args
[coding_arg_mnemonic
];
8333 if (! STRINGP (val
))
8334 CHECK_CHARACTER (val
);
8335 CODING_ATTR_MNEMONIC (attrs
) = val
;
8337 coding_type
= args
[coding_arg_coding_type
];
8338 CHECK_SYMBOL (coding_type
);
8339 CODING_ATTR_TYPE (attrs
) = coding_type
;
8341 charset_list
= args
[coding_arg_charset_list
];
8342 if (SYMBOLP (charset_list
))
8344 if (EQ (charset_list
, Qiso_2022
))
8346 if (! EQ (coding_type
, Qiso_2022
))
8347 error ("Invalid charset-list");
8348 charset_list
= Viso_2022_charset_list
;
8350 else if (EQ (charset_list
, Qemacs_mule
))
8352 if (! EQ (coding_type
, Qemacs_mule
))
8353 error ("Invalid charset-list");
8354 charset_list
= Vemacs_mule_charset_list
;
8356 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
8357 if (max_charset_id
< XFASTINT (XCAR (tail
)))
8358 max_charset_id
= XFASTINT (XCAR (tail
));
8362 charset_list
= Fcopy_sequence (charset_list
);
8363 for (tail
= charset_list
; !NILP (tail
); tail
= Fcdr (tail
))
8365 struct charset
*charset
;
8368 CHECK_CHARSET_GET_CHARSET (val
, charset
);
8369 if (EQ (coding_type
, Qiso_2022
)
8370 ? CHARSET_ISO_FINAL (charset
) < 0
8371 : EQ (coding_type
, Qemacs_mule
)
8372 ? CHARSET_EMACS_MULE_ID (charset
) < 0
8374 error ("Can't handle charset `%s'",
8375 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8377 XSETCAR (tail
, make_number (charset
->id
));
8378 if (max_charset_id
< charset
->id
)
8379 max_charset_id
= charset
->id
;
8382 CODING_ATTR_CHARSET_LIST (attrs
) = charset_list
;
8384 safe_charsets
= Fmake_string (make_number (max_charset_id
+ 1),
8386 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
8387 SSET (safe_charsets
, XFASTINT (XCAR (tail
)), 0);
8388 CODING_ATTR_SAFE_CHARSETS (attrs
) = safe_charsets
;
8390 CODING_ATTR_ASCII_COMPAT (attrs
) = args
[coding_arg_ascii_compatible_p
];
8392 val
= args
[coding_arg_decode_translation_table
];
8393 if (! CHAR_TABLE_P (val
) && ! CONSP (val
))
8395 CODING_ATTR_DECODE_TBL (attrs
) = val
;
8397 val
= args
[coding_arg_encode_translation_table
];
8398 if (! CHAR_TABLE_P (val
) && ! CONSP (val
))
8400 CODING_ATTR_ENCODE_TBL (attrs
) = val
;
8402 val
= args
[coding_arg_post_read_conversion
];
8404 CODING_ATTR_POST_READ (attrs
) = val
;
8406 val
= args
[coding_arg_pre_write_conversion
];
8408 CODING_ATTR_PRE_WRITE (attrs
) = val
;
8410 val
= args
[coding_arg_default_char
];
8412 CODING_ATTR_DEFAULT_CHAR (attrs
) = make_number (' ');
8415 CHECK_CHARACTER (val
);
8416 CODING_ATTR_DEFAULT_CHAR (attrs
) = val
;
8419 val
= args
[coding_arg_for_unibyte
];
8420 CODING_ATTR_FOR_UNIBYTE (attrs
) = NILP (val
) ? Qnil
: Qt
;
8422 val
= args
[coding_arg_plist
];
8424 CODING_ATTR_PLIST (attrs
) = val
;
8426 if (EQ (coding_type
, Qcharset
))
8428 /* Generate a lisp vector of 256 elements. Each element is nil,
8429 integer, or a list of charset IDs.
8431 If Nth element is nil, the byte code N is invalid in this
8434 If Nth element is a number NUM, N is the first byte of a
8435 charset whose ID is NUM.
8437 If Nth element is a list of charset IDs, N is the first byte
8438 of one of them. The list is sorted by dimensions of the
8439 charsets. A charset of smaller dimension comes firtst. */
8440 val
= Fmake_vector (make_number (256), Qnil
);
8442 for (tail
= charset_list
; CONSP (tail
); tail
= XCDR (tail
))
8444 struct charset
*charset
= CHARSET_FROM_ID (XFASTINT (XCAR (tail
)));
8445 int dim
= CHARSET_DIMENSION (charset
);
8446 int idx
= (dim
- 1) * 4;
8448 if (CHARSET_ASCII_COMPATIBLE_P (charset
))
8449 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8451 for (i
= charset
->code_space
[idx
];
8452 i
<= charset
->code_space
[idx
+ 1]; i
++)
8454 Lisp_Object tmp
, tmp2
;
8457 tmp
= AREF (val
, i
);
8460 else if (NUMBERP (tmp
))
8462 dim2
= CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp
)));
8464 tmp
= Fcons (XCAR (tail
), Fcons (tmp
, Qnil
));
8466 tmp
= Fcons (tmp
, Fcons (XCAR (tail
), Qnil
));
8470 for (tmp2
= tmp
; CONSP (tmp2
); tmp2
= XCDR (tmp2
))
8472 dim2
= CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2
))));
8477 tmp
= nconc2 (tmp
, Fcons (XCAR (tail
), Qnil
));
8480 XSETCDR (tmp2
, Fcons (XCAR (tmp2
), XCDR (tmp2
)));
8481 XSETCAR (tmp2
, XCAR (tail
));
8487 ASET (attrs
, coding_attr_charset_valids
, val
);
8488 category
= coding_category_charset
;
8490 else if (EQ (coding_type
, Qccl
))
8494 if (nargs
< coding_arg_ccl_max
)
8497 val
= args
[coding_arg_ccl_decoder
];
8498 CHECK_CCL_PROGRAM (val
);
8500 val
= Fcopy_sequence (val
);
8501 ASET (attrs
, coding_attr_ccl_decoder
, val
);
8503 val
= args
[coding_arg_ccl_encoder
];
8504 CHECK_CCL_PROGRAM (val
);
8506 val
= Fcopy_sequence (val
);
8507 ASET (attrs
, coding_attr_ccl_encoder
, val
);
8509 val
= args
[coding_arg_ccl_valids
];
8510 valids
= Fmake_string (make_number (256), make_number (0));
8511 for (tail
= val
; !NILP (tail
); tail
= Fcdr (tail
))
8518 from
= to
= XINT (val
);
8519 if (from
< 0 || from
> 255)
8520 args_out_of_range_3 (val
, make_number (0), make_number (255));
8525 CHECK_NATNUM_CAR (val
);
8526 CHECK_NATNUM_CDR (val
);
8527 from
= XINT (XCAR (val
));
8529 args_out_of_range_3 (XCAR (val
),
8530 make_number (0), make_number (255));
8531 to
= XINT (XCDR (val
));
8532 if (to
< from
|| to
> 255)
8533 args_out_of_range_3 (XCDR (val
),
8534 XCAR (val
), make_number (255));
8536 for (i
= from
; i
<= to
; i
++)
8537 SSET (valids
, i
, 1);
8539 ASET (attrs
, coding_attr_ccl_valids
, valids
);
8541 category
= coding_category_ccl
;
8543 else if (EQ (coding_type
, Qutf_16
))
8545 Lisp_Object bom
, endian
;
8547 CODING_ATTR_ASCII_COMPAT (attrs
) = Qnil
;
8549 if (nargs
< coding_arg_utf16_max
)
8552 bom
= args
[coding_arg_utf16_bom
];
8553 if (! NILP (bom
) && ! EQ (bom
, Qt
))
8557 CHECK_CODING_SYSTEM (val
);
8559 CHECK_CODING_SYSTEM (val
);
8561 ASET (attrs
, coding_attr_utf_16_bom
, bom
);
8563 endian
= args
[coding_arg_utf16_endian
];
8564 CHECK_SYMBOL (endian
);
8567 else if (! EQ (endian
, Qbig
) && ! EQ (endian
, Qlittle
))
8568 error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian
)));
8569 ASET (attrs
, coding_attr_utf_16_endian
, endian
);
8571 category
= (CONSP (bom
)
8572 ? coding_category_utf_16_auto
8574 ? (EQ (endian
, Qbig
)
8575 ? coding_category_utf_16_be_nosig
8576 : coding_category_utf_16_le_nosig
)
8577 : (EQ (endian
, Qbig
)
8578 ? coding_category_utf_16_be
8579 : coding_category_utf_16_le
));
8581 else if (EQ (coding_type
, Qiso_2022
))
8583 Lisp_Object initial
, reg_usage
, request
, flags
;
8586 if (nargs
< coding_arg_iso2022_max
)
8589 initial
= Fcopy_sequence (args
[coding_arg_iso2022_initial
]);
8590 CHECK_VECTOR (initial
);
8591 for (i
= 0; i
< 4; i
++)
8593 val
= Faref (initial
, make_number (i
));
8596 struct charset
*charset
;
8598 CHECK_CHARSET_GET_CHARSET (val
, charset
);
8599 ASET (initial
, i
, make_number (CHARSET_ID (charset
)));
8600 if (i
== 0 && CHARSET_ASCII_COMPATIBLE_P (charset
))
8601 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8604 ASET (initial
, i
, make_number (-1));
8607 reg_usage
= args
[coding_arg_iso2022_reg_usage
];
8608 CHECK_CONS (reg_usage
);
8609 CHECK_NUMBER_CAR (reg_usage
);
8610 CHECK_NUMBER_CDR (reg_usage
);
8612 request
= Fcopy_sequence (args
[coding_arg_iso2022_request
]);
8613 for (tail
= request
; ! NILP (tail
); tail
= Fcdr (tail
))
8621 CHECK_CHARSET_GET_ID (tmp
, id
);
8622 CHECK_NATNUM_CDR (val
);
8623 if (XINT (XCDR (val
)) >= 4)
8624 error ("Invalid graphic register number: %d", XINT (XCDR (val
)));
8625 XSETCAR (val
, make_number (id
));
8628 flags
= args
[coding_arg_iso2022_flags
];
8629 CHECK_NATNUM (flags
);
8631 if (EQ (args
[coding_arg_charset_list
], Qiso_2022
))
8632 flags
= make_number (i
| CODING_ISO_FLAG_FULL_SUPPORT
);
8634 ASET (attrs
, coding_attr_iso_initial
, initial
);
8635 ASET (attrs
, coding_attr_iso_usage
, reg_usage
);
8636 ASET (attrs
, coding_attr_iso_request
, request
);
8637 ASET (attrs
, coding_attr_iso_flags
, flags
);
8638 setup_iso_safe_charsets (attrs
);
8640 if (i
& CODING_ISO_FLAG_SEVEN_BITS
)
8641 category
= ((i
& (CODING_ISO_FLAG_LOCKING_SHIFT
8642 | CODING_ISO_FLAG_SINGLE_SHIFT
))
8643 ? coding_category_iso_7_else
8644 : EQ (args
[coding_arg_charset_list
], Qiso_2022
)
8645 ? coding_category_iso_7
8646 : coding_category_iso_7_tight
);
8649 int id
= XINT (AREF (initial
, 1));
8651 category
= (((i
& CODING_ISO_FLAG_LOCKING_SHIFT
)
8652 || EQ (args
[coding_arg_charset_list
], Qiso_2022
)
8654 ? coding_category_iso_8_else
8655 : (CHARSET_DIMENSION (CHARSET_FROM_ID (id
)) == 1)
8656 ? coding_category_iso_8_1
8657 : coding_category_iso_8_2
);
8659 if (category
!= coding_category_iso_8_1
8660 && category
!= coding_category_iso_8_2
)
8661 CODING_ATTR_ASCII_COMPAT (attrs
) = Qnil
;
8663 else if (EQ (coding_type
, Qemacs_mule
))
8665 if (EQ (args
[coding_arg_charset_list
], Qemacs_mule
))
8666 ASET (attrs
, coding_attr_emacs_mule_full
, Qt
);
8667 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8668 category
= coding_category_emacs_mule
;
8670 else if (EQ (coding_type
, Qshift_jis
))
8673 struct charset
*charset
;
8675 if (XINT (Flength (charset_list
)) != 3
8676 && XINT (Flength (charset_list
)) != 4)
8677 error ("There should be three or four charsets");
8679 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8680 if (CHARSET_DIMENSION (charset
) != 1)
8681 error ("Dimension of charset %s is not one",
8682 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8683 if (CHARSET_ASCII_COMPATIBLE_P (charset
))
8684 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8686 charset_list
= XCDR (charset_list
);
8687 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8688 if (CHARSET_DIMENSION (charset
) != 1)
8689 error ("Dimension of charset %s is not one",
8690 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8692 charset_list
= XCDR (charset_list
);
8693 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8694 if (CHARSET_DIMENSION (charset
) != 2)
8695 error ("Dimension of charset %s is not two",
8696 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8698 charset_list
= XCDR (charset_list
);
8699 if (! NILP (charset_list
))
8701 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8702 if (CHARSET_DIMENSION (charset
) != 2)
8703 error ("Dimension of charset %s is not two",
8704 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8707 category
= coding_category_sjis
;
8708 Vsjis_coding_system
= name
;
8710 else if (EQ (coding_type
, Qbig5
))
8712 struct charset
*charset
;
8714 if (XINT (Flength (charset_list
)) != 2)
8715 error ("There should be just two charsets");
8717 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8718 if (CHARSET_DIMENSION (charset
) != 1)
8719 error ("Dimension of charset %s is not one",
8720 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8721 if (CHARSET_ASCII_COMPATIBLE_P (charset
))
8722 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8724 charset_list
= XCDR (charset_list
);
8725 charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
8726 if (CHARSET_DIMENSION (charset
) != 2)
8727 error ("Dimension of charset %s is not two",
8728 SDATA (SYMBOL_NAME (CHARSET_NAME (charset
))));
8730 category
= coding_category_big5
;
8731 Vbig5_coding_system
= name
;
8733 else if (EQ (coding_type
, Qraw_text
))
8735 category
= coding_category_raw_text
;
8736 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8738 else if (EQ (coding_type
, Qutf_8
))
8740 category
= coding_category_utf_8
;
8741 CODING_ATTR_ASCII_COMPAT (attrs
) = Qt
;
8743 else if (EQ (coding_type
, Qundecided
))
8744 category
= coding_category_undecided
;
8746 error ("Invalid coding system type: %s",
8747 SDATA (SYMBOL_NAME (coding_type
)));
8749 CODING_ATTR_CATEGORY (attrs
) = make_number (category
);
8750 CODING_ATTR_PLIST (attrs
)
8751 = Fcons (QCcategory
, Fcons (AREF (Vcoding_category_table
, category
),
8752 CODING_ATTR_PLIST (attrs
)));
8754 eol_type
= args
[coding_arg_eol_type
];
8755 if (! NILP (eol_type
)
8756 && ! EQ (eol_type
, Qunix
)
8757 && ! EQ (eol_type
, Qdos
)
8758 && ! EQ (eol_type
, Qmac
))
8759 error ("Invalid eol-type");
8761 aliases
= Fcons (name
, Qnil
);
8763 if (NILP (eol_type
))
8765 eol_type
= make_subsidiaries (name
);
8766 for (i
= 0; i
< 3; i
++)
8768 Lisp_Object this_spec
, this_name
, this_aliases
, this_eol_type
;
8770 this_name
= AREF (eol_type
, i
);
8771 this_aliases
= Fcons (this_name
, Qnil
);
8772 this_eol_type
= (i
== 0 ? Qunix
: i
== 1 ? Qdos
: Qmac
);
8773 this_spec
= Fmake_vector (make_number (3), attrs
);
8774 ASET (this_spec
, 1, this_aliases
);
8775 ASET (this_spec
, 2, this_eol_type
);
8776 Fputhash (this_name
, this_spec
, Vcoding_system_hash_table
);
8777 Vcoding_system_list
= Fcons (this_name
, Vcoding_system_list
);
8778 Vcoding_system_alist
= Fcons (Fcons (Fsymbol_name (this_name
), Qnil
),
8779 Vcoding_system_alist
);
8783 spec_vec
= Fmake_vector (make_number (3), attrs
);
8784 ASET (spec_vec
, 1, aliases
);
8785 ASET (spec_vec
, 2, eol_type
);
8787 Fputhash (name
, spec_vec
, Vcoding_system_hash_table
);
8788 Vcoding_system_list
= Fcons (name
, Vcoding_system_list
);
8789 Vcoding_system_alist
= Fcons (Fcons (Fsymbol_name (name
), Qnil
),
8790 Vcoding_system_alist
);
8793 int id
= coding_categories
[category
].id
;
8795 if (id
< 0 || EQ (name
, CODING_ID_NAME (id
)))
8796 setup_coding_system (name
, &coding_categories
[category
]);
8802 return Fsignal (Qwrong_number_of_arguments
,
8803 Fcons (intern ("define-coding-system-internal"),
8804 make_number (nargs
)));
8808 DEFUN ("coding-system-put", Fcoding_system_put
, Scoding_system_put
,
8810 doc
: /* Change value in CODING-SYSTEM's property list PROP to VAL. */)
8811 (coding_system
, prop
, val
)
8812 Lisp_Object coding_system
, prop
, val
;
8814 Lisp_Object spec
, attrs
, plist
;
8816 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
8817 attrs
= AREF (spec
, 0);
8818 if (EQ (prop
, QCmnemonic
))
8820 if (! STRINGP (val
))
8821 CHECK_CHARACTER (val
);
8822 CODING_ATTR_MNEMONIC (attrs
) = val
;
8824 else if (EQ (prop
, QCdefalut_char
))
8827 val
= make_number (' ');
8829 CHECK_CHARACTER (val
);
8830 CODING_ATTR_DEFAULT_CHAR (attrs
) = val
;
8832 else if (EQ (prop
, QCdecode_translation_table
))
8834 if (! CHAR_TABLE_P (val
) && ! CONSP (val
))
8836 CODING_ATTR_DECODE_TBL (attrs
) = val
;
8838 else if (EQ (prop
, QCencode_translation_table
))
8840 if (! CHAR_TABLE_P (val
) && ! CONSP (val
))
8842 CODING_ATTR_ENCODE_TBL (attrs
) = val
;
8844 else if (EQ (prop
, QCpost_read_conversion
))
8847 CODING_ATTR_POST_READ (attrs
) = val
;
8849 else if (EQ (prop
, QCpre_write_conversion
))
8852 CODING_ATTR_PRE_WRITE (attrs
) = val
;
8855 CODING_ATTR_PLIST (attrs
)
8856 = Fplist_put (CODING_ATTR_PLIST (attrs
), prop
, val
);
8861 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias
,
8862 Sdefine_coding_system_alias
, 2, 2, 0,
8863 doc
: /* Define ALIAS as an alias for CODING-SYSTEM. */)
8864 (alias
, coding_system
)
8865 Lisp_Object alias
, coding_system
;
8867 Lisp_Object spec
, aliases
, eol_type
;
8869 CHECK_SYMBOL (alias
);
8870 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
8871 aliases
= AREF (spec
, 1);
8872 /* ALISES should be a list of length more than zero, and the first
8873 element is a base coding system. Append ALIAS at the tail of the
8875 while (!NILP (XCDR (aliases
)))
8876 aliases
= XCDR (aliases
);
8877 XSETCDR (aliases
, Fcons (alias
, Qnil
));
8879 eol_type
= AREF (spec
, 2);
8880 if (VECTORP (eol_type
))
8882 Lisp_Object subsidiaries
;
8885 subsidiaries
= make_subsidiaries (alias
);
8886 for (i
= 0; i
< 3; i
++)
8887 Fdefine_coding_system_alias (AREF (subsidiaries
, i
),
8888 AREF (eol_type
, i
));
8891 Fputhash (alias
, spec
, Vcoding_system_hash_table
);
8892 Vcoding_system_list
= Fcons (alias
, Vcoding_system_list
);
8893 Vcoding_system_alist
= Fcons (Fcons (Fsymbol_name (alias
), Qnil
),
8894 Vcoding_system_alist
);
8899 DEFUN ("coding-system-base", Fcoding_system_base
, Scoding_system_base
,
8901 doc
: /* Return the base of CODING-SYSTEM.
8902 Any alias or subsidiary coding system is not a base coding system. */)
8904 Lisp_Object coding_system
;
8906 Lisp_Object spec
, attrs
;
8908 if (NILP (coding_system
))
8909 return (Qno_conversion
);
8910 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
8911 attrs
= AREF (spec
, 0);
8912 return CODING_ATTR_BASE_NAME (attrs
);
8915 DEFUN ("coding-system-plist", Fcoding_system_plist
, Scoding_system_plist
,
8917 doc
: "Return the property list of CODING-SYSTEM.")
8919 Lisp_Object coding_system
;
8921 Lisp_Object spec
, attrs
;
8923 if (NILP (coding_system
))
8924 coding_system
= Qno_conversion
;
8925 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
8926 attrs
= AREF (spec
, 0);
8927 return CODING_ATTR_PLIST (attrs
);
8931 DEFUN ("coding-system-aliases", Fcoding_system_aliases
, Scoding_system_aliases
,
8933 doc
: /* Return the list of aliases of CODING-SYSTEM. */)
8935 Lisp_Object coding_system
;
8939 if (NILP (coding_system
))
8940 coding_system
= Qno_conversion
;
8941 CHECK_CODING_SYSTEM_GET_SPEC (coding_system
, spec
);
8942 return AREF (spec
, 1);
8945 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type
,
8946 Scoding_system_eol_type
, 1, 1, 0,
8947 doc
: /* Return eol-type of CODING-SYSTEM.
8948 An eol-type is integer 0, 1, 2, or a vector of coding systems.
8950 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
8951 and CR respectively.
8953 A vector value indicates that a format of end-of-line should be
8954 detected automatically. Nth element of the vector is the subsidiary
8955 coding system whose eol-type is N. */)
8957 Lisp_Object coding_system
;
8959 Lisp_Object spec
, eol_type
;
8962 if (NILP (coding_system
))
8963 coding_system
= Qno_conversion
;
8964 if (! CODING_SYSTEM_P (coding_system
))
8966 spec
= CODING_SYSTEM_SPEC (coding_system
);
8967 eol_type
= AREF (spec
, 2);
8968 if (VECTORP (eol_type
))
8969 return Fcopy_sequence (eol_type
);
8970 n
= EQ (eol_type
, Qunix
) ? 0 : EQ (eol_type
, Qdos
) ? 1 : 2;
8971 return make_number (n
);
8977 /*** 9. Post-amble ***/
8984 for (i
= 0; i
< coding_category_max
; i
++)
8986 coding_categories
[i
].id
= -1;
8987 coding_priorities
[i
] = i
;
8990 /* ISO2022 specific initialize routine. */
8991 for (i
= 0; i
< 0x20; i
++)
8992 iso_code_class
[i
] = ISO_control_0
;
8993 for (i
= 0x21; i
< 0x7F; i
++)
8994 iso_code_class
[i
] = ISO_graphic_plane_0
;
8995 for (i
= 0x80; i
< 0xA0; i
++)
8996 iso_code_class
[i
] = ISO_control_1
;
8997 for (i
= 0xA1; i
< 0xFF; i
++)
8998 iso_code_class
[i
] = ISO_graphic_plane_1
;
8999 iso_code_class
[0x20] = iso_code_class
[0x7F] = ISO_0x20_or_0x7F
;
9000 iso_code_class
[0xA0] = iso_code_class
[0xFF] = ISO_0xA0_or_0xFF
;
9001 iso_code_class
[ISO_CODE_SO
] = ISO_shift_out
;
9002 iso_code_class
[ISO_CODE_SI
] = ISO_shift_in
;
9003 iso_code_class
[ISO_CODE_SS2_7
] = ISO_single_shift_2_7
;
9004 iso_code_class
[ISO_CODE_ESC
] = ISO_escape
;
9005 iso_code_class
[ISO_CODE_SS2
] = ISO_single_shift_2
;
9006 iso_code_class
[ISO_CODE_SS3
] = ISO_single_shift_3
;
9007 iso_code_class
[ISO_CODE_CSI
] = ISO_control_sequence_introducer
;
9009 for (i
= 0; i
< 256; i
++)
9011 emacs_mule_bytes
[i
] = 1;
9013 emacs_mule_bytes
[EMACS_MULE_LEADING_CODE_PRIVATE_11
] = 3;
9014 emacs_mule_bytes
[EMACS_MULE_LEADING_CODE_PRIVATE_12
] = 3;
9015 emacs_mule_bytes
[EMACS_MULE_LEADING_CODE_PRIVATE_21
] = 4;
9016 emacs_mule_bytes
[EMACS_MULE_LEADING_CODE_PRIVATE_22
] = 4;
9024 staticpro (&Vcoding_system_hash_table
);
9026 Lisp_Object args
[2];
9029 Vcoding_system_hash_table
= Fmake_hash_table (2, args
);
9032 staticpro (&Vsjis_coding_system
);
9033 Vsjis_coding_system
= Qnil
;
9035 staticpro (&Vbig5_coding_system
);
9036 Vbig5_coding_system
= Qnil
;
9038 staticpro (&Vcode_conversion_reused_workbuf
);
9039 Vcode_conversion_reused_workbuf
= Qnil
;
9041 staticpro (&Vcode_conversion_workbuf_name
);
9042 Vcode_conversion_workbuf_name
= build_string (" *code-conversion-work*");
9044 reused_workbuf_in_use
= 0;
9046 DEFSYM (Qcharset
, "charset");
9047 DEFSYM (Qtarget_idx
, "target-idx");
9048 DEFSYM (Qcoding_system_history
, "coding-system-history");
9049 Fset (Qcoding_system_history
, Qnil
);
9051 /* Target FILENAME is the first argument. */
9052 Fput (Qinsert_file_contents
, Qtarget_idx
, make_number (0));
9053 /* Target FILENAME is the third argument. */
9054 Fput (Qwrite_region
, Qtarget_idx
, make_number (2));
9056 DEFSYM (Qcall_process
, "call-process");
9057 /* Target PROGRAM is the first argument. */
9058 Fput (Qcall_process
, Qtarget_idx
, make_number (0));
9060 DEFSYM (Qcall_process_region
, "call-process-region");
9061 /* Target PROGRAM is the third argument. */
9062 Fput (Qcall_process_region
, Qtarget_idx
, make_number (2));
9064 DEFSYM (Qstart_process
, "start-process");
9065 /* Target PROGRAM is the third argument. */
9066 Fput (Qstart_process
, Qtarget_idx
, make_number (2));
9068 DEFSYM (Qopen_network_stream
, "open-network-stream");
9069 /* Target SERVICE is the fourth argument. */
9070 Fput (Qopen_network_stream
, Qtarget_idx
, make_number (3));
9072 DEFSYM (Qcoding_system
, "coding-system");
9073 DEFSYM (Qcoding_aliases
, "coding-aliases");
9075 DEFSYM (Qeol_type
, "eol-type");
9076 DEFSYM (Qunix
, "unix");
9077 DEFSYM (Qdos
, "dos");
9079 DEFSYM (Qbuffer_file_coding_system
, "buffer-file-coding-system");
9080 DEFSYM (Qpost_read_conversion
, "post-read-conversion");
9081 DEFSYM (Qpre_write_conversion
, "pre-write-conversion");
9082 DEFSYM (Qdefault_char
, "default-char");
9083 DEFSYM (Qundecided
, "undecided");
9084 DEFSYM (Qno_conversion
, "no-conversion");
9085 DEFSYM (Qraw_text
, "raw-text");
9087 DEFSYM (Qiso_2022
, "iso-2022");
9089 DEFSYM (Qutf_8
, "utf-8");
9090 DEFSYM (Qutf_8_emacs
, "utf-8-emacs");
9092 DEFSYM (Qutf_16
, "utf-16");
9093 DEFSYM (Qbig
, "big");
9094 DEFSYM (Qlittle
, "little");
9096 DEFSYM (Qshift_jis
, "shift-jis");
9097 DEFSYM (Qbig5
, "big5");
9099 DEFSYM (Qcoding_system_p
, "coding-system-p");
9101 DEFSYM (Qcoding_system_error
, "coding-system-error");
9102 Fput (Qcoding_system_error
, Qerror_conditions
,
9103 Fcons (Qcoding_system_error
, Fcons (Qerror
, Qnil
)));
9104 Fput (Qcoding_system_error
, Qerror_message
,
9105 build_string ("Invalid coding system"));
9107 /* Intern this now in case it isn't already done.
9108 Setting this variable twice is harmless.
9109 But don't staticpro it here--that is done in alloc.c. */
9110 Qchar_table_extra_slots
= intern ("char-table-extra-slots");
9112 DEFSYM (Qtranslation_table
, "translation-table");
9113 Fput (Qtranslation_table
, Qchar_table_extra_slots
, make_number (2));
9114 DEFSYM (Qtranslation_table_id
, "translation-table-id");
9115 DEFSYM (Qtranslation_table_for_decode
, "translation-table-for-decode");
9116 DEFSYM (Qtranslation_table_for_encode
, "translation-table-for-encode");
9118 DEFSYM (Qvalid_codes
, "valid-codes");
9120 DEFSYM (Qemacs_mule
, "emacs-mule");
9122 DEFSYM (QCcategory
, ":category");
9123 DEFSYM (QCmnemonic
, ":mnemonic");
9124 DEFSYM (QCdefalut_char
, ":default-char");
9125 DEFSYM (QCdecode_translation_table
, ":decode-translation-table");
9126 DEFSYM (QCencode_translation_table
, ":encode-translation-table");
9127 DEFSYM (QCpost_read_conversion
, ":post-read-conversion");
9128 DEFSYM (QCpre_write_conversion
, ":pre-write-conversion");
9130 Vcoding_category_table
9131 = Fmake_vector (make_number (coding_category_max
), Qnil
);
9132 staticpro (&Vcoding_category_table
);
9133 /* Followings are target of code detection. */
9134 ASET (Vcoding_category_table
, coding_category_iso_7
,
9135 intern ("coding-category-iso-7"));
9136 ASET (Vcoding_category_table
, coding_category_iso_7_tight
,
9137 intern ("coding-category-iso-7-tight"));
9138 ASET (Vcoding_category_table
, coding_category_iso_8_1
,
9139 intern ("coding-category-iso-8-1"));
9140 ASET (Vcoding_category_table
, coding_category_iso_8_2
,
9141 intern ("coding-category-iso-8-2"));
9142 ASET (Vcoding_category_table
, coding_category_iso_7_else
,
9143 intern ("coding-category-iso-7-else"));
9144 ASET (Vcoding_category_table
, coding_category_iso_8_else
,
9145 intern ("coding-category-iso-8-else"));
9146 ASET (Vcoding_category_table
, coding_category_utf_8
,
9147 intern ("coding-category-utf-8"));
9148 ASET (Vcoding_category_table
, coding_category_utf_16_be
,
9149 intern ("coding-category-utf-16-be"));
9150 ASET (Vcoding_category_table
, coding_category_utf_16_auto
,
9151 intern ("coding-category-utf-16-auto"));
9152 ASET (Vcoding_category_table
, coding_category_utf_16_le
,
9153 intern ("coding-category-utf-16-le"));
9154 ASET (Vcoding_category_table
, coding_category_utf_16_be_nosig
,
9155 intern ("coding-category-utf-16-be-nosig"));
9156 ASET (Vcoding_category_table
, coding_category_utf_16_le_nosig
,
9157 intern ("coding-category-utf-16-le-nosig"));
9158 ASET (Vcoding_category_table
, coding_category_charset
,
9159 intern ("coding-category-charset"));
9160 ASET (Vcoding_category_table
, coding_category_sjis
,
9161 intern ("coding-category-sjis"));
9162 ASET (Vcoding_category_table
, coding_category_big5
,
9163 intern ("coding-category-big5"));
9164 ASET (Vcoding_category_table
, coding_category_ccl
,
9165 intern ("coding-category-ccl"));
9166 ASET (Vcoding_category_table
, coding_category_emacs_mule
,
9167 intern ("coding-category-emacs-mule"));
9168 /* Followings are NOT target of code detection. */
9169 ASET (Vcoding_category_table
, coding_category_raw_text
,
9170 intern ("coding-category-raw-text"));
9171 ASET (Vcoding_category_table
, coding_category_undecided
,
9172 intern ("coding-category-undecided"));
9174 DEFSYM (Qinsufficient_source
, "insufficient-source");
9175 DEFSYM (Qinconsistent_eol
, "inconsistent-eol");
9176 DEFSYM (Qinvalid_source
, "invalid-source");
9177 DEFSYM (Qinterrupted
, "interrupted");
9178 DEFSYM (Qinsufficient_memory
, "insufficient-memory");
9180 defsubr (&Scoding_system_p
);
9181 defsubr (&Sread_coding_system
);
9182 defsubr (&Sread_non_nil_coding_system
);
9183 defsubr (&Scheck_coding_system
);
9184 defsubr (&Sdetect_coding_region
);
9185 defsubr (&Sdetect_coding_string
);
9186 defsubr (&Sfind_coding_systems_region_internal
);
9187 defsubr (&Sunencodable_char_position
);
9188 defsubr (&Scheck_coding_systems_region
);
9189 defsubr (&Sdecode_coding_region
);
9190 defsubr (&Sencode_coding_region
);
9191 defsubr (&Sdecode_coding_string
);
9192 defsubr (&Sencode_coding_string
);
9193 defsubr (&Sdecode_sjis_char
);
9194 defsubr (&Sencode_sjis_char
);
9195 defsubr (&Sdecode_big5_char
);
9196 defsubr (&Sencode_big5_char
);
9197 defsubr (&Sset_terminal_coding_system_internal
);
9198 defsubr (&Sset_safe_terminal_coding_system_internal
);
9199 defsubr (&Sterminal_coding_system
);
9200 defsubr (&Sset_keyboard_coding_system_internal
);
9201 defsubr (&Skeyboard_coding_system
);
9202 defsubr (&Sfind_operation_coding_system
);
9203 defsubr (&Sset_coding_system_priority
);
9204 defsubr (&Sdefine_coding_system_internal
);
9205 defsubr (&Sdefine_coding_system_alias
);
9206 defsubr (&Scoding_system_put
);
9207 defsubr (&Scoding_system_base
);
9208 defsubr (&Scoding_system_plist
);
9209 defsubr (&Scoding_system_aliases
);
9210 defsubr (&Scoding_system_eol_type
);
9211 defsubr (&Scoding_system_priority_list
);
9213 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list
,
9214 doc
: /* List of coding systems.
9216 Do not alter the value of this variable manually. This variable should be
9217 updated by the functions `define-coding-system' and
9218 `define-coding-system-alias'. */);
9219 Vcoding_system_list
= Qnil
;
9221 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist
,
9222 doc
: /* Alist of coding system names.
9223 Each element is one element list of coding system name.
9224 This variable is given to `completing-read' as TABLE argument.
9226 Do not alter the value of this variable manually. This variable should be
9227 updated by the functions `make-coding-system' and
9228 `define-coding-system-alias'. */);
9229 Vcoding_system_alist
= Qnil
;
9231 DEFVAR_LISP ("coding-category-list", &Vcoding_category_list
,
9232 doc
: /* List of coding-categories (symbols) ordered by priority.
9234 On detecting a coding system, Emacs tries code detection algorithms
9235 associated with each coding-category one by one in this order. When
9236 one algorithm agrees with a byte sequence of source text, the coding
9237 system bound to the corresponding coding-category is selected. */);
9241 Vcoding_category_list
= Qnil
;
9242 for (i
= coding_category_max
- 1; i
>= 0; i
--)
9243 Vcoding_category_list
9244 = Fcons (XVECTOR (Vcoding_category_table
)->contents
[i
],
9245 Vcoding_category_list
);
9248 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read
,
9249 doc
: /* Specify the coding system for read operations.
9250 It is useful to bind this variable with `let', but do not set it globally.
9251 If the value is a coding system, it is used for decoding on read operation.
9252 If not, an appropriate element is used from one of the coding system alists:
9253 There are three such tables, `file-coding-system-alist',
9254 `process-coding-system-alist', and `network-coding-system-alist'. */);
9255 Vcoding_system_for_read
= Qnil
;
9257 DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write
,
9258 doc
: /* Specify the coding system for write operations.
9259 Programs bind this variable with `let', but you should not set it globally.
9260 If the value is a coding system, it is used for encoding of output,
9261 when writing it to a file and when sending it to a file or subprocess.
9263 If this does not specify a coding system, an appropriate element
9264 is used from one of the coding system alists:
9265 There are three such tables, `file-coding-system-alist',
9266 `process-coding-system-alist', and `network-coding-system-alist'.
9267 For output to files, if the above procedure does not specify a coding system,
9268 the value of `buffer-file-coding-system' is used. */);
9269 Vcoding_system_for_write
= Qnil
;
9271 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used
,
9273 Coding system used in the latest file or process I/O. */);
9274 Vlast_coding_system_used
= Qnil
;
9276 DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error
,
9278 Error status of the last code conversion.
9280 When an error was detected in the last code conversion, this variable
9281 is set to one of the following symbols.
9282 `insufficient-source'
9286 `insufficient-memory'
9287 When no error was detected, the value doesn't change. So, to check
9288 the error status of a code conversion by this variable, you must
9289 explicitly set this variable to nil before performing code
9291 Vlast_code_conversion_error
= Qnil
;
9293 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion
,
9295 *Non-nil means always inhibit code conversion of end-of-line format.
9296 See info node `Coding Systems' and info node `Text and Binary' concerning
9297 such conversion. */);
9298 inhibit_eol_conversion
= 0;
9300 DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system
,
9302 Non-nil means process buffer inherits coding system of process output.
9303 Bind it to t if the process output is to be treated as if it were a file
9304 read from some filesystem. */);
9305 inherit_process_coding_system
= 0;
9307 DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist
,
9309 Alist to decide a coding system to use for a file I/O operation.
9310 The format is ((PATTERN . VAL) ...),
9311 where PATTERN is a regular expression matching a file name,
9312 VAL is a coding system, a cons of coding systems, or a function symbol.
9313 If VAL is a coding system, it is used for both decoding and encoding
9315 If VAL is a cons of coding systems, the car part is used for decoding,
9316 and the cdr part is used for encoding.
9317 If VAL is a function symbol, the function must return a coding system
9318 or a cons of coding systems which are used as above. The function gets
9319 the arguments with which `find-operation-coding-systems' was called.
9321 See also the function `find-operation-coding-system'
9322 and the variable `auto-coding-alist'. */);
9323 Vfile_coding_system_alist
= Qnil
;
9325 DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist
,
9327 Alist to decide a coding system to use for a process I/O operation.
9328 The format is ((PATTERN . VAL) ...),
9329 where PATTERN is a regular expression matching a program name,
9330 VAL is a coding system, a cons of coding systems, or a function symbol.
9331 If VAL is a coding system, it is used for both decoding what received
9332 from the program and encoding what sent to the program.
9333 If VAL is a cons of coding systems, the car part is used for decoding,
9334 and the cdr part is used for encoding.
9335 If VAL is a function symbol, the function must return a coding system
9336 or a cons of coding systems which are used as above.
9338 See also the function `find-operation-coding-system'. */);
9339 Vprocess_coding_system_alist
= Qnil
;
9341 DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist
,
9343 Alist to decide a coding system to use for a network I/O operation.
9344 The format is ((PATTERN . VAL) ...),
9345 where PATTERN is a regular expression matching a network service name
9346 or is a port number to connect to,
9347 VAL is a coding system, a cons of coding systems, or a function symbol.
9348 If VAL is a coding system, it is used for both decoding what received
9349 from the network stream and encoding what sent to the network stream.
9350 If VAL is a cons of coding systems, the car part is used for decoding,
9351 and the cdr part is used for encoding.
9352 If VAL is a function symbol, the function must return a coding system
9353 or a cons of coding systems which are used as above.
9355 See also the function `find-operation-coding-system'. */);
9356 Vnetwork_coding_system_alist
= Qnil
;
9358 DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system
,
9359 doc
: /* Coding system to use with system messages.
9360 Also used for decoding keyboard input on X Window system. */);
9361 Vlocale_coding_system
= Qnil
;
9363 /* The eol mnemonics are reset in startup.el system-dependently. */
9364 DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix
,
9366 *String displayed in mode line for UNIX-like (LF) end-of-line format. */);
9367 eol_mnemonic_unix
= build_string (":");
9369 DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos
,
9371 *String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
9372 eol_mnemonic_dos
= build_string ("\\");
9374 DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac
,
9376 *String displayed in mode line for MAC-like (CR) end-of-line format. */);
9377 eol_mnemonic_mac
= build_string ("/");
9379 DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided
,
9381 *String displayed in mode line when end-of-line format is not yet determined. */);
9382 eol_mnemonic_undecided
= build_string (":");
9384 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation
,
9386 *Non-nil enables character translation while encoding and decoding. */);
9387 Venable_character_translation
= Qt
;
9389 DEFVAR_LISP ("standard-translation-table-for-decode",
9390 &Vstandard_translation_table_for_decode
,
9391 doc
: /* Table for translating characters while decoding. */);
9392 Vstandard_translation_table_for_decode
= Qnil
;
9394 DEFVAR_LISP ("standard-translation-table-for-encode",
9395 &Vstandard_translation_table_for_encode
,
9396 doc
: /* Table for translating characters while encoding. */);
9397 Vstandard_translation_table_for_encode
= Qnil
;
9399 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table
,
9400 doc
: /* Alist of charsets vs revision numbers.
9401 While encoding, if a charset (car part of an element) is found,
9402 designate it with the escape sequence identifying revision (cdr part
9403 of the element). */);
9404 Vcharset_revision_table
= Qnil
;
9406 DEFVAR_LISP ("default-process-coding-system",
9407 &Vdefault_process_coding_system
,
9408 doc
: /* Cons of coding systems used for process I/O by default.
9409 The car part is used for decoding a process output,
9410 the cdr part is used for encoding a text to be sent to a process. */);
9411 Vdefault_process_coding_system
= Qnil
;
9413 DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table
,
9415 Table of extra Latin codes in the range 128..159 (inclusive).
9416 This is a vector of length 256.
9417 If Nth element is non-nil, the existence of code N in a file
9418 \(or output of subprocess) doesn't prevent it to be detected as
9419 a coding system of ISO 2022 variant which has a flag
9420 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
9421 or reading output of a subprocess.
9422 Only 128th through 159th elements has a meaning. */);
9423 Vlatin_extra_code_table
= Fmake_vector (make_number (256), Qnil
);
9425 DEFVAR_LISP ("select-safe-coding-system-function",
9426 &Vselect_safe_coding_system_function
,
9428 Function to call to select safe coding system for encoding a text.
9430 If set, this function is called to force a user to select a proper
9431 coding system which can encode the text in the case that a default
9432 coding system used in each operation can't encode the text.
9434 The default value is `select-safe-coding-system' (which see). */);
9435 Vselect_safe_coding_system_function
= Qnil
;
9437 DEFVAR_BOOL ("coding-system-require-warning",
9438 &coding_system_require_warning
,
9439 doc
: /* Internal use only.
9440 If non-nil, on writing a file, `select-safe-coding-system-function' is
9441 called even if `coding-system-for-write' is non-nil. The command
9442 `universal-coding-system-argument' binds this variable to t temporarily. */);
9443 coding_system_require_warning
= 0;
9446 DEFVAR_BOOL ("inhibit-iso-escape-detection",
9447 &inhibit_iso_escape_detection
,
9449 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
9451 By default, on reading a file, Emacs tries to detect how the text is
9452 encoded. This code detection is sensitive to escape sequences. If
9453 the sequence is valid as ISO2022, the code is determined as one of
9454 the ISO2022 encodings, and the file is decoded by the corresponding
9455 coding system (e.g. `iso-2022-7bit').
9457 However, there may be a case that you want to read escape sequences in
9458 a file as is. In such a case, you can set this variable to non-nil.
9459 Then, as the code detection ignores any escape sequences, no file is
9460 detected as encoded in some ISO2022 encoding. The result is that all
9461 escape sequences become visible in a buffer.
9463 The default value is nil, and it is strongly recommended not to change
9464 it. That is because many Emacs Lisp source files that contain
9465 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
9466 in Emacs's distribution, and they won't be decoded correctly on
9467 reading if you suppress escape sequence detection.
9469 The other way to read escape sequences in a file without decoding is
9470 to explicitly specify some coding system that doesn't use ISO2022's
9471 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
9472 inhibit_iso_escape_detection
= 0;
9474 DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input
,
9475 doc
: /* Char table for translating self-inserting characters.
9476 This is applied to the result of input methods, not their input. See also
9477 `keyboard-translate-table'. */);
9478 Vtranslation_table_for_input
= Qnil
;
9481 Lisp_Object args
[coding_arg_max
];
9482 Lisp_Object plist
[16];
9485 for (i
= 0; i
< coding_arg_max
; i
++)
9488 plist
[0] = intern (":name");
9489 plist
[1] = args
[coding_arg_name
] = Qno_conversion
;
9490 plist
[2] = intern (":mnemonic");
9491 plist
[3] = args
[coding_arg_mnemonic
] = make_number ('=');
9492 plist
[4] = intern (":coding-type");
9493 plist
[5] = args
[coding_arg_coding_type
] = Qraw_text
;
9494 plist
[6] = intern (":ascii-compatible-p");
9495 plist
[7] = args
[coding_arg_ascii_compatible_p
] = Qt
;
9496 plist
[8] = intern (":default-char");
9497 plist
[9] = args
[coding_arg_default_char
] = make_number (0);
9498 plist
[10] = intern (":for-unibyte");
9499 plist
[11] = args
[coding_arg_for_unibyte
] = Qt
;
9500 plist
[12] = intern (":docstring");
9501 plist
[13] = build_string ("Do no conversion.\n\
9503 When you visit a file with this coding, the file is read into a\n\
9504 unibyte buffer as is, thus each byte of a file is treated as a\n\
9506 plist
[14] = intern (":eol-type");
9507 plist
[15] = args
[coding_arg_eol_type
] = Qunix
;
9508 args
[coding_arg_plist
] = Flist (16, plist
);
9509 Fdefine_coding_system_internal (coding_arg_max
, args
);
9512 setup_coding_system (Qno_conversion
, &keyboard_coding
);
9513 setup_coding_system (Qno_conversion
, &terminal_coding
);
9514 setup_coding_system (Qno_conversion
, &safe_terminal_coding
);
9519 for (i
= 0; i
< coding_category_max
; i
++)
9520 Fset (AREF (Vcoding_category_table
, i
), Qno_conversion
);
9525 emacs_strerror (error_number
)
9530 synchronize_system_messages_locale ();
9531 str
= strerror (error_number
);
9533 if (! NILP (Vlocale_coding_system
))
9535 Lisp_Object dec
= code_convert_string_norecord (build_string (str
),
9536 Vlocale_coding_system
,
9538 str
= (char *) SDATA (dec
);