src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 2001, 2002, 2003, 2004, 2005,
   3                  2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
   4    Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   5      2005, 2006, 2007, 2008, 2009, 2010
   6      National Institute of Advanced Industrial Science and Technology (AIST)
   7      Registration Number H14PRO021
   8    Copyright (C) 2003
   9      National Institute of Advanced Industrial Science and Technology (AIST)
  10      Registration Number H13PRO009
  11
  12 This file is part of GNU Emacs.
  13
  14 GNU Emacs is free software: you can redistribute it and/or modify
  15 it under the terms of the GNU General Public License as published by
  16 the Free Software Foundation, either version 3 of the License, or
  17 (at your option) any later version.
  18
  19 GNU Emacs is distributed in the hope that it will be useful,
  20 but WITHOUT ANY WARRANTY; without even the implied warranty of
  21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22 GNU General Public License for more details.
  23
  24 You should have received a copy of the GNU General Public License
  25 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  26
  27 /*** TABLE OF CONTENTS ***
  28
  29   0. General comments
  30   1. Preamble
  31   2. Emacs' internal format (emacs-utf-8) handlers
  32   3. UTF-8 handlers
  33   4. UTF-16 handlers
  34   5. Charset-base coding systems handlers
  35   6. emacs-mule (old Emacs' internal format) handlers
  36   7. ISO2022 handlers
  37   8. Shift-JIS and BIG5 handlers
  38   9. CCL handlers
  39   10. C library functions
  40   11. Emacs Lisp library functions
  41   12. Postamble
  42
  43 */
  44
  45 /*** 0. General comments ***
  46
  47
  48 CODING SYSTEM
  49
  50   A coding system is an object for an encoding mechanism that contains
  51   information about how to convert byte sequences to character
  52   sequences and vice versa.  When we say "decode", it means converting
  53   a byte sequence of a specific coding system into a character
  54   sequence that is represented by Emacs' internal coding system
  55   `emacs-utf-8', and when we say "encode", it means converting a
  56   character sequence of emacs-utf-8 to a byte sequence of a specific
  57   coding system.
  58
  59   In Emacs Lisp, a coding system is represented by a Lisp symbol.  In
  60   C level, a coding system is represented by a vector of attributes
  61   stored in the hash table Vcharset_hash_table.  The conversion from
  62   coding system symbol to attributes vector is done by looking up
  63   Vcharset_hash_table by the symbol.
  64
  65   Coding systems are classified into the following types depending on
  66   the encoding mechanism.  Here's a brief description of the types.
  67
  68   o UTF-8
  69
  70   o UTF-16
  71
  72   o Charset-base coding system
  73
  74   A coding system defined by one or more (coded) character sets.
  75   Decoding and encoding are done by a code converter defined for each
  76   character set.
  77
  78   o Old Emacs internal format (emacs-mule)
  79
  80   The coding system adopted by old versions of Emacs (20 and 21).
  81
  82   o ISO2022-base coding system
  83
  84   The most famous coding system for multiple character sets.  X's
  85   Compound Text, various EUCs (Extended Unix Code), and coding systems
  86   used in the Internet communication such as ISO-2022-JP are all
  87   variants of ISO2022.
  88
  89   o SJIS (or Shift-JIS or MS-Kanji-Code)
  90
  91   A coding system to encode character sets: ASCII, JISX0201, and
  92   JISX0208.  Widely used for PC's in Japan.  Details are described in
  93   section 8.
  94
  95   o BIG5
  96
  97   A coding system to encode character sets: ASCII and Big5.  Widely
  98   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  99   described in section 8.  In this file, when we write "big5" (all
 100   lowercase), we mean the coding system, and when we write "Big5"
 101   (capitalized), we mean the character set.
 102
 103   o CCL
 104
 105   If a user wants to decode/encode text encoded in a coding system
 106   not listed above, he can supply a decoder and an encoder for it in
 107   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 108   program while decoding/encoding.
 109
 110   o Raw-text
 111
 112   A coding system for text containing raw eight-bit data.  Emacs
 113   treats each byte of source text as a character (except for
 114   end-of-line conversion).
 115
 116   o No-conversion
 117
 118   Like raw text, but don't do end-of-line conversion.
 119
 120
 121 END-OF-LINE FORMAT
 122
 123   How text end-of-line is encoded depends on operating system.  For
 124   instance, Unix's format is just one byte of LF (line-feed) code,
 125   whereas DOS's format is two-byte sequence of `carriage-return' and
 126   `line-feed' codes.  MacOS's format is usually one byte of
 127   `carriage-return'.
 128
 129   Since text character encoding and end-of-line encoding are
 130   independent, any coding system described above can take any format
 131   of end-of-line (except for no-conversion).
 132
 133 STRUCT CODING_SYSTEM
 134
 135   Before using a coding system for code conversion (i.e. decoding and
 136   encoding), we setup a structure of type `struct coding_system'.
 137   This structure keeps various information about a specific code
 138   conversion (e.g. the location of source and destination data).
 139
 140 */
 141
 142 /* COMMON MACROS */
 143
 144
 145 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 146
 147   These functions check if a byte sequence specified as a source in
 148   CODING conforms to the format of XXX, and update the members of
 149   DETECT_INFO.
 150
 151   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 152
 153   Below is the template of these functions.  */
 154
 155 #if 0
 156 static int
 157 detect_coding_XXX (coding, detect_info)
 158      struct coding_system *coding;
 159      struct coding_detection_info *detect_info;
 160 {
 161   const unsigned char *src = coding->source;
 162   const unsigned char *src_end = coding->source + coding->src_bytes;
 163   int multibytep = coding->src_multibyte;
 164   int consumed_chars = 0;
 165   int found = 0;
 166   ...;
 167
 168   while (1)
 169     {
 170       /* Get one byte from the source.  If the souce is exausted, jump
 171          to no_more_source:.  */
 172       ONE_MORE_BYTE (c);
 173
 174       if (! __C_conforms_to_XXX___ (c))
 175         break;
 176       if (! __C_strongly_suggests_XXX__ (c))
 177         found = CATEGORY_MASK_XXX;
 178     }
 179   /* The byte sequence is invalid for XXX.  */
 180   detect_info->rejected |= CATEGORY_MASK_XXX;
 181   return 0;
 182
 183  no_more_source:
 184   /* The source exausted successfully.  */
 185   detect_info->found |= found;
 186   return 1;
 187 }
 188 #endif
 189
 190 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 191
 192   These functions decode a byte sequence specified as a source by
 193   CODING.  The resulting multibyte text goes to a place pointed to by
 194   CODING->charbuf, the length of which should not exceed
 195   CODING->charbuf_size;
 196
 197   These functions set the information of original and decoded texts in
 198   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 199   They also set CODING->result to one of CODING_RESULT_XXX indicating
 200   how the decoding is finished.
 201
 202   Below is the template of these functions.  */
 203
 204 #if 0
 205 static void
 206 decode_coding_XXXX (coding)
 207      struct coding_system *coding;
 208 {
 209   const unsigned char *src = coding->source + coding->consumed;
 210   const unsigned char *src_end = coding->source + coding->src_bytes;
 211   /* SRC_BASE remembers the start position in source in each loop.
 212      The loop will be exited when there's not enough source code, or
 213      when there's no room in CHARBUF for a decoded character.  */
 214   const unsigned char *src_base;
 215   /* A buffer to produce decoded characters.  */
 216   int *charbuf = coding->charbuf + coding->charbuf_used;
 217   int *charbuf_end = coding->charbuf + coding->charbuf_size;
 218   int multibytep = coding->src_multibyte;
 219
 220   while (1)
 221     {
 222       src_base = src;
 223       if (charbuf < charbuf_end)
 224         /* No more room to produce a decoded character.  */
 225         break;
 226       ONE_MORE_BYTE (c);
 227       /* Decode it. */
 228     }
 229
 230  no_more_source:
 231   if (src_base < src_end
 232       && coding->mode & CODING_MODE_LAST_BLOCK)
 233     /* If the source ends by partial bytes to construct a character,
 234        treat them as eight-bit raw data.  */
 235     while (src_base < src_end && charbuf < charbuf_end)
 236       *charbuf++ = *src_base++;
 237   /* Remember how many bytes and characters we consumed.  If the
 238      source is multibyte, the bytes and chars are not identical.  */
 239   coding->consumed = coding->consumed_char = src_base - coding->source;
 240   /* Remember how many characters we produced.  */
 241   coding->charbuf_used = charbuf - coding->charbuf;
 242 }
 243 #endif
 244
 245 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 246
 247   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 248   internal multibyte format by CODING.  The resulting byte sequence
 249   goes to a place pointed to by DESTINATION, the length of which
 250   should not exceed DST_BYTES.
 251
 252   These functions set the information of original and encoded texts in
 253   the members produced, produced_char, consumed, and consumed_char of
 254   the structure *CODING.  They also set the member result to one of
 255   CODING_RESULT_XXX indicating how the encoding finished.
 256
 257   DST_BYTES zero means that source area and destination area are
 258   overlapped, which means that we can produce a encoded text until it
 259   reaches at the head of not-yet-encoded source text.
 260
 261   Below is a template of these functions.  */
 262 #if 0
 263 static void
 264 encode_coding_XXX (coding)
 265      struct coding_system *coding;
 266 {
 267   int multibytep = coding->dst_multibyte;
 268   int *charbuf = coding->charbuf;
 269   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 270   unsigned char *dst = coding->destination + coding->produced;
 271   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 272   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 273   int produced_chars = 0;
 274
 275   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 276     {
 277       int c = *charbuf;
 278       /* Encode C into DST, and increment DST.  */
 279     }
 280  label_no_more_destination:
 281   /* How many chars and bytes we produced.  */
 282   coding->produced_char += produced_chars;
 283   coding->produced = dst - coding->destination;
 284 }
 285 #endif
 286
 287 \f
 288 /*** 1. Preamble ***/
 289
 290 #include <config.h>
 291 #include <stdio.h>
 292 #include <setjmp.h>
 293
 294 #include "lisp.h"
 295 #include "buffer.h"
 296 #include "character.h"
 297 #include "charset.h"
 298 #include "ccl.h"
 299 #include "composite.h"
 300 #include "coding.h"
 301 #include "window.h"
 302 #include "frame.h"
 303 #include "termhooks.h"
 304
 305 Lisp_Object Vcoding_system_hash_table;
 306
 307 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
 308 Lisp_Object Qunix, Qdos;
 309 extern Lisp_Object Qmac;        /* frame.c */
 310 Lisp_Object Qbuffer_file_coding_system;
 311 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 312 Lisp_Object Qdefault_char;
 313 Lisp_Object Qno_conversion, Qundecided;
 314 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
 315 Lisp_Object Qbig, Qlittle;
 316 Lisp_Object Qcoding_system_history;
 317 Lisp_Object Qvalid_codes;
 318 Lisp_Object QCcategory, QCmnemonic, QCdefault_char;
 319 Lisp_Object QCdecode_translation_table, QCencode_translation_table;
 320 Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
 321 Lisp_Object QCascii_compatible_p;
 322
 323 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 324 Lisp_Object Qcall_process, Qcall_process_region;
 325 Lisp_Object Qstart_process, Qopen_network_stream;
 326 Lisp_Object Qtarget_idx;
 327
 328 Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
 329 Lisp_Object Qinterrupted, Qinsufficient_memory;
 330
 331 extern Lisp_Object Qcompletion_ignore_case;
 332
 333 /* If a symbol has this property, evaluate the value to define the
 334    symbol as a coding system.  */
 335 static Lisp_Object Qcoding_system_define_form;
 336
 337 int coding_system_require_warning;
 338
 339 Lisp_Object Vselect_safe_coding_system_function;
 340
 341 /* Mnemonic string for each format of end-of-line.  */
 342 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 343 /* Mnemonic string to indicate format of end-of-line is not yet
 344    decided.  */
 345 Lisp_Object eol_mnemonic_undecided;
 346
 347 /* Format of end-of-line decided by system.  This is Qunix on
 348    Unix and Mac, Qdos on DOS/Windows.
 349    This has an effect only for external encoding (i.e. for output to
 350    file and process), not for in-buffer or Lisp string encoding.  */
 351 static Lisp_Object system_eol_type;
 352
 353 #ifdef emacs
 354
 355 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 356
 357 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 358
 359 /* Coding system emacs-mule and raw-text are for converting only
 360    end-of-line format.  */
 361 Lisp_Object Qemacs_mule, Qraw_text;
 362 Lisp_Object Qutf_8_emacs;
 363
 364 /* Coding-systems are handed between Emacs Lisp programs and C internal
 365    routines by the following three variables.  */
 366 /* Coding-system for reading files and receiving data from process.  */
 367 Lisp_Object Vcoding_system_for_read;
 368 /* Coding-system for writing files and sending data to process.  */
 369 Lisp_Object Vcoding_system_for_write;
 370 /* Coding-system actually used in the latest I/O.  */
 371 Lisp_Object Vlast_coding_system_used;
 372 /* Set to non-nil when an error is detected while code conversion.  */
 373 Lisp_Object Vlast_code_conversion_error;
 374 /* A vector of length 256 which contains information about special
 375    Latin codes (especially for dealing with Microsoft codes).  */
 376 Lisp_Object Vlatin_extra_code_table;
 377
 378 /* Flag to inhibit code conversion of end-of-line format.  */
 379 int inhibit_eol_conversion;
 380
 381 /* Flag to inhibit ISO2022 escape sequence detection.  */
 382 int inhibit_iso_escape_detection;
 383
 384 /* Flag to inhibit detection of binary files through null bytes.  */
 385 int inhibit_null_byte_detection;
 386
 387 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 388 int inherit_process_coding_system;
 389
 390 /* Coding system to be used to encode text for terminal display when
 391    terminal coding system is nil.  */
 392 struct coding_system safe_terminal_coding;
 393
 394 Lisp_Object Vfile_coding_system_alist;
 395 Lisp_Object Vprocess_coding_system_alist;
 396 Lisp_Object Vnetwork_coding_system_alist;
 397
 398 Lisp_Object Vlocale_coding_system;
 399
 400 #endif /* emacs */
 401
 402 /* Flag to tell if we look up translation table on character code
 403    conversion.  */
 404 Lisp_Object Venable_character_translation;
 405 /* Standard translation table to look up on decoding (reading).  */
 406 Lisp_Object Vstandard_translation_table_for_decode;
 407 /* Standard translation table to look up on encoding (writing).  */
 408 Lisp_Object Vstandard_translation_table_for_encode;
 409
 410 Lisp_Object Qtranslation_table;
 411 Lisp_Object Qtranslation_table_id;
 412 Lisp_Object Qtranslation_table_for_decode;
 413 Lisp_Object Qtranslation_table_for_encode;
 414
 415 /* Alist of charsets vs revision number.  */
 416 static Lisp_Object Vcharset_revision_table;
 417
 418 /* Default coding systems used for process I/O.  */
 419 Lisp_Object Vdefault_process_coding_system;
 420
 421 /* Char table for translating Quail and self-inserting input.  */
 422 Lisp_Object Vtranslation_table_for_input;
 423
 424 /* Two special coding systems.  */
 425 Lisp_Object Vsjis_coding_system;
 426 Lisp_Object Vbig5_coding_system;
 427
 428 /* ISO2022 section */
 429
 430 #define CODING_ISO_INITIAL(coding, reg)                 \
 431   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 432                      coding_attr_iso_initial),          \
 433                reg)))
 434
 435
 436 #define CODING_ISO_REQUEST(coding, charset_id)          \
 437   (((charset_id) <= (coding)->max_charset_id            \
 438     ? ((coding)->safe_charsets[charset_id] != 255       \
 439        ? (coding)->safe_charsets[charset_id]            \
 440        : -1)                                            \
 441     : -1))
 442
 443
 444 #define CODING_ISO_FLAGS(coding)        \
 445   ((coding)->spec.iso_2022.flags)
 446 #define CODING_ISO_DESIGNATION(coding, reg)     \
 447   ((coding)->spec.iso_2022.current_designation[reg])
 448 #define CODING_ISO_INVOCATION(coding, plane)    \
 449   ((coding)->spec.iso_2022.current_invocation[plane])
 450 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 451   ((coding)->spec.iso_2022.single_shifting)
 452 #define CODING_ISO_BOL(coding)  \
 453   ((coding)->spec.iso_2022.bol)
 454 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 455   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 456 #define CODING_ISO_CMP_STATUS(coding)   \
 457   (&(coding)->spec.iso_2022.cmp_status)
 458 #define CODING_ISO_EXTSEGMENT_LEN(coding)       \
 459   ((coding)->spec.iso_2022.ctext_extended_segment_len)
 460 #define CODING_ISO_EMBEDDED_UTF_8(coding)       \
 461   ((coding)->spec.iso_2022.embedded_utf_8)
 462
 463 /* Control characters of ISO2022.  */
 464                         /* code */      /* function */
 465 #define ISO_CODE_LF     0x0A            /* line-feed */
 466 #define ISO_CODE_CR     0x0D            /* carriage-return */
 467 #define ISO_CODE_SO     0x0E            /* shift-out */
 468 #define ISO_CODE_SI     0x0F            /* shift-in */
 469 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 470 #define ISO_CODE_ESC    0x1B            /* escape */
 471 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 472 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 473 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 474
 475 /* All code (1-byte) of ISO2022 is classified into one of the
 476    followings.  */
 477 enum iso_code_class_type
 478   {
 479     ISO_control_0,              /* Control codes in the range
 480                                    0x00..0x1F and 0x7F, except for the
 481                                    following 5 codes.  */
 482     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 483     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 484     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 485     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 486     ISO_control_1,              /* Control codes in the range
 487                                    0x80..0x9F, except for the
 488                                    following 3 codes.  */
 489     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 490     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 491     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 492     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 493     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 494     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 495     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 496   };
 497
 498 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 499     `iso-flags' attribute of an iso2022 coding system.  */
 500
 501 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 502    instead of the correct short-form sequence (e.g. ESC $ A).  */
 503 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 504
 505 /* If set, reset graphic planes and registers at end-of-line to the
 506    initial state.  */
 507 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 508
 509 /* If set, reset graphic planes and registers before any control
 510    characters to the initial state.  */
 511 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 512
 513 /* If set, encode by 7-bit environment.  */
 514 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 515
 516 /* If set, use locking-shift function.  */
 517 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 518
 519 /* If set, use single-shift function.  Overwrite
 520    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 521 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 522
 523 /* If set, use designation escape sequence.  */
 524 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 525
 526 /* If set, produce revision number sequence.  */
 527 #define CODING_ISO_FLAG_REVISION        0x0080
 528
 529 /* If set, produce ISO6429's direction specifying sequence.  */
 530 #define CODING_ISO_FLAG_DIRECTION       0x0100
 531
 532 /* If set, assume designation states are reset at beginning of line on
 533    output.  */
 534 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 535
 536 /* If set, designation sequence should be placed at beginning of line
 537    on output.  */
 538 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 539
 540 /* If set, do not encode unsafe charactes on output.  */
 541 #define CODING_ISO_FLAG_SAFE            0x0800
 542
 543 /* If set, extra latin codes (128..159) are accepted as a valid code
 544    on input.  */
 545 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 546
 547 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 548
 549 #define CODING_ISO_FLAG_EUC_TW_SHIFT    0x4000
 550
 551 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 552
 553 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 554
 555 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 556
 557 /* A character to be produced on output if encoding of the original
 558    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 559 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 560
 561 /* UTF-8 section */
 562 #define CODING_UTF_8_BOM(coding)        \
 563   ((coding)->spec.utf_8_bom)
 564
 565 /* UTF-16 section */
 566 #define CODING_UTF_16_BOM(coding)       \
 567   ((coding)->spec.utf_16.bom)
 568
 569 #define CODING_UTF_16_ENDIAN(coding)    \
 570   ((coding)->spec.utf_16.endian)
 571
 572 #define CODING_UTF_16_SURROGATE(coding) \
 573   ((coding)->spec.utf_16.surrogate)
 574
 575
 576 /* CCL section */
 577 #define CODING_CCL_DECODER(coding)      \
 578   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 579 #define CODING_CCL_ENCODER(coding)      \
 580   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 581 #define CODING_CCL_VALIDS(coding)                                          \
 582   (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
 583
 584 /* Index for each coding category in `coding_categories' */
 585
 586 enum coding_category
 587   {
 588     coding_category_iso_7,
 589     coding_category_iso_7_tight,
 590     coding_category_iso_8_1,
 591     coding_category_iso_8_2,
 592     coding_category_iso_7_else,
 593     coding_category_iso_8_else,
 594     coding_category_utf_8_auto,
 595     coding_category_utf_8_nosig,
 596     coding_category_utf_8_sig,
 597     coding_category_utf_16_auto,
 598     coding_category_utf_16_be,
 599     coding_category_utf_16_le,
 600     coding_category_utf_16_be_nosig,
 601     coding_category_utf_16_le_nosig,
 602     coding_category_charset,
 603     coding_category_sjis,
 604     coding_category_big5,
 605     coding_category_ccl,
 606     coding_category_emacs_mule,
 607     /* All above are targets of code detection.  */
 608     coding_category_raw_text,
 609     coding_category_undecided,
 610     coding_category_max
 611   };
 612
 613 /* Definitions of flag bits used in detect_coding_XXXX.  */
 614 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 615 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 616 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 617 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 618 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 619 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 620 #define CATEGORY_MASK_UTF_8_AUTO        (1 << coding_category_utf_8_auto)
 621 #define CATEGORY_MASK_UTF_8_NOSIG       (1 << coding_category_utf_8_nosig)
 622 #define CATEGORY_MASK_UTF_8_SIG         (1 << coding_category_utf_8_sig)
 623 #define CATEGORY_MASK_UTF_16_AUTO       (1 << coding_category_utf_16_auto)
 624 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 625 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 626 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 627 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 628 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 629 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 630 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 631 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 632 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 633 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 634
 635 /* This value is returned if detect_coding_mask () find nothing other
 636    than ASCII characters.  */
 637 #define CATEGORY_MASK_ANY               \
 638   (CATEGORY_MASK_ISO_7                  \
 639    | CATEGORY_MASK_ISO_7_TIGHT          \
 640    | CATEGORY_MASK_ISO_8_1              \
 641    | CATEGORY_MASK_ISO_8_2              \
 642    | CATEGORY_MASK_ISO_7_ELSE           \
 643    | CATEGORY_MASK_ISO_8_ELSE           \
 644    | CATEGORY_MASK_UTF_8_AUTO           \
 645    | CATEGORY_MASK_UTF_8_NOSIG          \
 646    | CATEGORY_MASK_UTF_8_SIG            \
 647    | CATEGORY_MASK_UTF_16_AUTO          \
 648    | CATEGORY_MASK_UTF_16_BE            \
 649    | CATEGORY_MASK_UTF_16_LE            \
 650    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 651    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 652    | CATEGORY_MASK_CHARSET              \
 653    | CATEGORY_MASK_SJIS                 \
 654    | CATEGORY_MASK_BIG5                 \
 655    | CATEGORY_MASK_CCL                  \
 656    | CATEGORY_MASK_EMACS_MULE)
 657
 658
 659 #define CATEGORY_MASK_ISO_7BIT \
 660   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 661
 662 #define CATEGORY_MASK_ISO_8BIT \
 663   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 664
 665 #define CATEGORY_MASK_ISO_ELSE \
 666   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 667
 668 #define CATEGORY_MASK_ISO_ESCAPE        \
 669   (CATEGORY_MASK_ISO_7                  \
 670    | CATEGORY_MASK_ISO_7_TIGHT          \
 671    | CATEGORY_MASK_ISO_7_ELSE           \
 672    | CATEGORY_MASK_ISO_8_ELSE)
 673
 674 #define CATEGORY_MASK_ISO       \
 675   (  CATEGORY_MASK_ISO_7BIT     \
 676      | CATEGORY_MASK_ISO_8BIT   \
 677      | CATEGORY_MASK_ISO_ELSE)
 678
 679 #define CATEGORY_MASK_UTF_16            \
 680   (CATEGORY_MASK_UTF_16_AUTO            \
 681    | CATEGORY_MASK_UTF_16_BE            \
 682    | CATEGORY_MASK_UTF_16_LE            \
 683    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 684    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 685
 686 #define CATEGORY_MASK_UTF_8     \
 687   (CATEGORY_MASK_UTF_8_AUTO     \
 688    | CATEGORY_MASK_UTF_8_NOSIG  \
 689    | CATEGORY_MASK_UTF_8_SIG)
 690
 691 /* List of symbols `coding-category-xxx' ordered by priority.  This
 692    variable is exposed to Emacs Lisp.  */
 693 static Lisp_Object Vcoding_category_list;
 694
 695 /* Table of coding categories (Lisp symbols).  This variable is for
 696    internal use oly.  */
 697 static Lisp_Object Vcoding_category_table;
 698
 699 /* Table of coding-categories ordered by priority.  */
 700 static enum coding_category coding_priorities[coding_category_max];
 701
 702 /* Nth element is a coding context for the coding system bound to the
 703    Nth coding category.  */
 704 static struct coding_system coding_categories[coding_category_max];
 705
 706 /*** Commonly used macros and functions ***/
 707
 708 #ifndef min
 709 #define min(a, b) ((a) < (b) ? (a) : (b))
 710 #endif
 711 #ifndef max
 712 #define max(a, b) ((a) > (b) ? (a) : (b))
 713 #endif
 714
 715 #define CODING_GET_INFO(coding, attrs, charset_list)    \
 716   do {                                                  \
 717     (attrs) = CODING_ID_ATTRS ((coding)->id);           \
 718     (charset_list) = CODING_ATTR_CHARSET_LIST (attrs);  \
 719   } while (0)
 720
 721
 722 /* Safely get one byte from the source text pointed by SRC which ends
 723    at SRC_END, and set C to that byte.  If there are not enough bytes
 724    in the source, it jumps to `no_more_source'.  If multibytep is
 725    nonzero, and a multibyte character is found at SRC, set C to the
 726    negative value of the character code.  The caller should declare
 727    and set these variables appropriately in advance:
 728         src, src_end, multibytep */
 729
 730 #define ONE_MORE_BYTE(c)                                \
 731   do {                                                  \
 732     if (src == src_end)                                 \
 733       {                                                 \
 734         if (src_base < src)                             \
 735           record_conversion_result                      \
 736             (coding, CODING_RESULT_INSUFFICIENT_SRC);   \
 737         goto no_more_source;                            \
 738       }                                                 \
 739     c = *src++;                                         \
 740     if (multibytep && (c & 0x80))                       \
 741       {                                                 \
 742         if ((c & 0xFE) == 0xC0)                         \
 743           c = ((c & 1) << 6) | *src++;                  \
 744         else                                            \
 745           {                                             \
 746             src--;                                      \
 747             c = - string_char (src, &src, NULL);        \
 748             record_conversion_result                    \
 749               (coding, CODING_RESULT_INVALID_SRC);      \
 750           }                                             \
 751       }                                                 \
 752     consumed_chars++;                                   \
 753   } while (0)
 754
 755 /* Safely get two bytes from the source text pointed by SRC which ends
 756    at SRC_END, and set C1 and C2 to those bytes while skipping the
 757    heading multibyte characters.  If there are not enough bytes in the
 758    source, it jumps to `no_more_source'.  If multibytep is nonzero and
 759    a multibyte character is found for C2, set C2 to the negative value
 760    of the character code.  The caller should declare and set these
 761    variables appropriately in advance:
 762         src, src_end, multibytep
 763    It is intended that this macro is used in detect_coding_utf_16.  */
 764
 765 #define TWO_MORE_BYTES(c1, c2)                          \
 766   do {                                                  \
 767     do {                                                \
 768       if (src == src_end)                               \
 769         goto no_more_source;                            \
 770       c1 = *src++;                                      \
 771       if (multibytep && (c1 & 0x80))                    \
 772         {                                               \
 773           if ((c1 & 0xFE) == 0xC0)                      \
 774             c1 = ((c1 & 1) << 6) | *src++;              \
 775           else                                          \
 776             {                                           \
 777               src += BYTES_BY_CHAR_HEAD (c1) - 1;       \
 778               c1 = -1;                                  \
 779             }                                           \
 780         }                                               \
 781     } while (c1 < 0);                                   \
 782     if (src == src_end)                                 \
 783       goto no_more_source;                              \
 784     c2 = *src++;                                        \
 785     if (multibytep && (c2 & 0x80))                      \
 786       {                                                 \
 787         if ((c2 & 0xFE) == 0xC0)                        \
 788           c2 = ((c2 & 1) << 6) | *src++;                \
 789         else                                            \
 790           c2 = -1;                                      \
 791       }                                                 \
 792   } while (0)
 793
 794
 795 #define ONE_MORE_BYTE_NO_CHECK(c)                       \
 796   do {                                                  \
 797     c = *src++;                                         \
 798     if (multibytep && (c & 0x80))                       \
 799       {                                                 \
 800         if ((c & 0xFE) == 0xC0)                         \
 801           c = ((c & 1) << 6) | *src++;                  \
 802         else                                            \
 803           {                                             \
 804             src--;                                      \
 805             c = - string_char (src, &src, NULL);        \
 806             record_conversion_result                    \
 807               (coding, CODING_RESULT_INVALID_SRC);      \
 808           }                                             \
 809       }                                                 \
 810     consumed_chars++;                                   \
 811   } while (0)
 812
 813
 814 /* Store a byte C in the place pointed by DST and increment DST to the
 815    next free point, and increment PRODUCED_CHARS.  The caller should
 816    assure that C is 0..127, and declare and set the variable `dst'
 817    appropriately in advance.
 818 */
 819
 820
 821 #define EMIT_ONE_ASCII_BYTE(c)  \
 822   do {                          \
 823     produced_chars++;           \
 824     *dst++ = (c);               \
 825   } while (0)
 826
 827
 828 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2.  */
 829
 830 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 831   do {                                  \
 832     produced_chars += 2;                \
 833     *dst++ = (c1), *dst++ = (c2);       \
 834   } while (0)
 835
 836
 837 /* Store a byte C in the place pointed by DST and increment DST to the
 838    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 839    nonzero, store in an appropriate multibyte from.  The caller should
 840    declare and set the variables `dst' and `multibytep' appropriately
 841    in advance.  */
 842
 843 #define EMIT_ONE_BYTE(c)                \
 844   do {                                  \
 845     produced_chars++;                   \
 846     if (multibytep)                     \
 847       {                                 \
 848         int ch = (c);                   \
 849         if (ch >= 0x80)                 \
 850           ch = BYTE8_TO_CHAR (ch);      \
 851         CHAR_STRING_ADVANCE (ch, dst);  \
 852       }                                 \
 853     else                                \
 854       *dst++ = (c);                     \
 855   } while (0)
 856
 857
 858 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 859
 860 #define EMIT_TWO_BYTES(c1, c2)          \
 861   do {                                  \
 862     produced_chars += 2;                \
 863     if (multibytep)                     \
 864       {                                 \
 865         int ch;                         \
 866                                         \
 867         ch = (c1);                      \
 868         if (ch >= 0x80)                 \
 869           ch = BYTE8_TO_CHAR (ch);      \
 870         CHAR_STRING_ADVANCE (ch, dst);  \
 871         ch = (c2);                      \
 872         if (ch >= 0x80)                 \
 873           ch = BYTE8_TO_CHAR (ch);      \
 874         CHAR_STRING_ADVANCE (ch, dst);  \
 875       }                                 \
 876     else                                \
 877       {                                 \
 878         *dst++ = (c1);                  \
 879         *dst++ = (c2);                  \
 880       }                                 \
 881   } while (0)
 882
 883
 884 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 885   do {                                  \
 886     EMIT_ONE_BYTE (c1);                 \
 887     EMIT_TWO_BYTES (c2, c3);            \
 888   } while (0)
 889
 890
 891 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 892   do {                                          \
 893     EMIT_TWO_BYTES (c1, c2);                    \
 894     EMIT_TWO_BYTES (c3, c4);                    \
 895   } while (0)
 896
 897
 898 /* Prototypes for static functions.  */
 899 static void record_conversion_result (struct coding_system *coding,
 900                                       enum coding_result_code result);
 901 static int detect_coding_utf_8 (struct coding_system *,
 902                                 struct coding_detection_info *info);
 903 static void decode_coding_utf_8 (struct coding_system *);
 904 static int encode_coding_utf_8 (struct coding_system *);
 905
 906 static int detect_coding_utf_16 (struct coding_system *,
 907                                  struct coding_detection_info *info);
 908 static void decode_coding_utf_16 (struct coding_system *);
 909 static int encode_coding_utf_16 (struct coding_system *);
 910
 911 static int detect_coding_iso_2022 (struct coding_system *,
 912                                    struct coding_detection_info *info);
 913 static void decode_coding_iso_2022 (struct coding_system *);
 914 static int encode_coding_iso_2022 (struct coding_system *);
 915
 916 static int detect_coding_emacs_mule (struct coding_system *,
 917                                      struct coding_detection_info *info);
 918 static void decode_coding_emacs_mule (struct coding_system *);
 919 static int encode_coding_emacs_mule (struct coding_system *);
 920
 921 static int detect_coding_sjis (struct coding_system *,
 922                                struct coding_detection_info *info);
 923 static void decode_coding_sjis (struct coding_system *);
 924 static int encode_coding_sjis (struct coding_system *);
 925
 926 static int detect_coding_big5 (struct coding_system *,
 927                                struct coding_detection_info *info);
 928 static void decode_coding_big5 (struct coding_system *);
 929 static int encode_coding_big5 (struct coding_system *);
 930
 931 static int detect_coding_ccl (struct coding_system *,
 932                               struct coding_detection_info *info);
 933 static void decode_coding_ccl (struct coding_system *);
 934 static int encode_coding_ccl (struct coding_system *);
 935
 936 static void decode_coding_raw_text (struct coding_system *);
 937 static int encode_coding_raw_text (struct coding_system *);
 938
 939 static void coding_set_source (struct coding_system *);
 940 static void coding_set_destination (struct coding_system *);
 941 static void coding_alloc_by_realloc (struct coding_system *, EMACS_INT);
 942 static void coding_alloc_by_making_gap (struct coding_system *,
 943                                         EMACS_INT, EMACS_INT);
 944 static unsigned char *alloc_destination (struct coding_system *,
 945                                          EMACS_INT, unsigned char *);
 946 static void setup_iso_safe_charsets (Lisp_Object);
 947 static unsigned char *encode_designation_at_bol (struct coding_system *,
 948                                                  int *, int *,
 949                                                  unsigned char *);
 950 static int detect_eol (const unsigned char *,
 951                        EMACS_INT, enum coding_category);
 952 static Lisp_Object adjust_coding_eol_type (struct coding_system *, int);
 953 static void decode_eol (struct coding_system *);
 954 static Lisp_Object get_translation_table (Lisp_Object, int, int *);
 955 static Lisp_Object get_translation (Lisp_Object, int *, int *);
 956 static int produce_chars (struct coding_system *, Lisp_Object, int);
 957 static INLINE void produce_charset (struct coding_system *, int *,
 958                                     EMACS_INT);
 959 static void produce_annotation (struct coding_system *, EMACS_INT);
 960 static int decode_coding (struct coding_system *);
 961 static INLINE int *handle_composition_annotation (EMACS_INT, EMACS_INT,
 962                                                   struct coding_system *,
 963                                                   int *, EMACS_INT *);
 964 static INLINE int *handle_charset_annotation (EMACS_INT, EMACS_INT,
 965                                               struct coding_system *,
 966                                               int *, EMACS_INT *);
 967 static void consume_chars (struct coding_system *, Lisp_Object, int);
 968 static int encode_coding (struct coding_system *);
 969 static Lisp_Object make_conversion_work_buffer (int);
 970 static Lisp_Object code_conversion_restore (Lisp_Object);
 971 static INLINE int char_encodable_p (int, Lisp_Object);
 972 static Lisp_Object make_subsidiaries (Lisp_Object);
 973
 974 static void
 975 record_conversion_result (struct coding_system *coding,
 976                           enum coding_result_code result)
 977 {
 978   coding->result = result;
 979   switch (result)
 980     {
 981     case CODING_RESULT_INSUFFICIENT_SRC:
 982       Vlast_code_conversion_error = Qinsufficient_source;
 983       break;
 984     case CODING_RESULT_INCONSISTENT_EOL:
 985       Vlast_code_conversion_error = Qinconsistent_eol;
 986       break;
 987     case CODING_RESULT_INVALID_SRC:
 988       Vlast_code_conversion_error = Qinvalid_source;
 989       break;
 990     case CODING_RESULT_INTERRUPT:
 991       Vlast_code_conversion_error = Qinterrupted;
 992       break;
 993     case CODING_RESULT_INSUFFICIENT_MEM:
 994       Vlast_code_conversion_error = Qinsufficient_memory;
 995       break;
 996     case CODING_RESULT_INSUFFICIENT_DST:
 997       /* Don't record this error in Vlast_code_conversion_error
 998          because it happens just temporarily and is resolved when the
 999          whole conversion is finished.  */
1000       break;
1001     case CODING_RESULT_SUCCESS:
1002       break;
1003     default:
1004       Vlast_code_conversion_error = intern ("Unknown error");
1005     }
1006 }
1007
1008 /* This wrapper macro is used to preserve validity of pointers into
1009    buffer text across calls to decode_char, which could cause
1010    relocation of buffers if it loads a charset map, because loading a
1011    charset map allocates large structures.  */
1012 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
1013   do {                                                                       \
1014     charset_map_loaded = 0;                                                  \
1015     c = DECODE_CHAR (charset, code);                                         \
1016     if (charset_map_loaded)                                                  \
1017       {                                                                      \
1018         const unsigned char *orig = coding->source;                          \
1019         EMACS_INT offset;                                                    \
1020                                                                              \
1021         coding_set_source (coding);                                          \
1022         offset = coding->source - orig;                                      \
1023         src += offset;                                                       \
1024         src_base += offset;                                                  \
1025         src_end += offset;                                                   \
1026       }                                                                      \
1027   } while (0)
1028
1029
1030 /* If there are at least BYTES length of room at dst, allocate memory
1031    for coding->destination and update dst and dst_end.  We don't have
1032    to take care of coding->source which will be relocated.  It is
1033    handled by calling coding_set_source in encode_coding.  */
1034
1035 #define ASSURE_DESTINATION(bytes)                               \
1036   do {                                                          \
1037     if (dst + (bytes) >= dst_end)                               \
1038       {                                                         \
1039         int more_bytes = charbuf_end - charbuf + (bytes);       \
1040                                                                 \
1041         dst = alloc_destination (coding, more_bytes, dst);      \
1042         dst_end = coding->destination + coding->dst_bytes;      \
1043       }                                                         \
1044   } while (0)
1045
1046
1047 /* Store multibyte form of the character C in P, and advance P to the
1048    end of the multibyte form.  This is like CHAR_STRING_ADVANCE but it
1049    never calls MAYBE_UNIFY_CHAR.  */
1050
1051 #define CHAR_STRING_ADVANCE_NO_UNIFY(c, p)      \
1052   do {                                          \
1053     if ((c) <= MAX_1_BYTE_CHAR)                 \
1054       *(p)++ = (c);                             \
1055     else if ((c) <= MAX_2_BYTE_CHAR)            \
1056       *(p)++ = (0xC0 | ((c) >> 6)),             \
1057         *(p)++ = (0x80 | ((c) & 0x3F));         \
1058     else if ((c) <= MAX_3_BYTE_CHAR)            \
1059       *(p)++ = (0xE0 | ((c) >> 12)),            \
1060         *(p)++ = (0x80 | (((c) >> 6) & 0x3F)),  \
1061         *(p)++ = (0x80 | ((c) & 0x3F));         \
1062     else if ((c) <= MAX_4_BYTE_CHAR)            \
1063       *(p)++ = (0xF0 | (c >> 18)),              \
1064         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1065         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1066         *(p)++ = (0x80 | (c & 0x3F));           \
1067     else if ((c) <= MAX_5_BYTE_CHAR)            \
1068       *(p)++ = 0xF8,                            \
1069         *(p)++ = (0x80 | ((c >> 18) & 0x0F)),   \
1070         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1071         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1072         *(p)++ = (0x80 | (c & 0x3F));           \
1073     else                                        \
1074       (p) += BYTE8_STRING ((c) - 0x3FFF80, p);  \
1075   } while (0)
1076
1077
1078 /* Return the character code of character whose multibyte form is at
1079    P, and advance P to the end of the multibyte form.  This is like
1080    STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR.  */
1081
1082 #define STRING_CHAR_ADVANCE_NO_UNIFY(p)                         \
1083   (!((p)[0] & 0x80)                                             \
1084    ? *(p)++                                                     \
1085    : ! ((p)[0] & 0x20)                                          \
1086    ? ((p) += 2,                                                 \
1087       ((((p)[-2] & 0x1F) << 6)                                  \
1088        | ((p)[-1] & 0x3F)                                       \
1089        | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))    \
1090    : ! ((p)[0] & 0x10)                                          \
1091    ? ((p) += 3,                                                 \
1092       ((((p)[-3] & 0x0F) << 12)                                 \
1093        | (((p)[-2] & 0x3F) << 6)                                \
1094        | ((p)[-1] & 0x3F)))                                     \
1095    : ! ((p)[0] & 0x08)                                          \
1096    ? ((p) += 4,                                                 \
1097       ((((p)[-4] & 0xF) << 18)                                  \
1098        | (((p)[-3] & 0x3F) << 12)                               \
1099        | (((p)[-2] & 0x3F) << 6)                                \
1100        | ((p)[-1] & 0x3F)))                                     \
1101    : ((p) += 5,                                                 \
1102       ((((p)[-4] & 0x3F) << 18)                                 \
1103        | (((p)[-3] & 0x3F) << 12)                               \
1104        | (((p)[-2] & 0x3F) << 6)                                \
1105        | ((p)[-1] & 0x3F))))
1106
1107
1108 static void
1109 coding_set_source (struct coding_system *coding)
1110 {
1111   if (BUFFERP (coding->src_object))
1112     {
1113       struct buffer *buf = XBUFFER (coding->src_object);
1114
1115       if (coding->src_pos < 0)
1116         coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
1117       else
1118         coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
1119     }
1120   else if (STRINGP (coding->src_object))
1121     {
1122       coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
1123     }
1124   else
1125     /* Otherwise, the source is C string and is never relocated
1126        automatically.  Thus we don't have to update anything.  */
1127     ;
1128 }
1129
1130 static void
1131 coding_set_destination (struct coding_system *coding)
1132 {
1133   if (BUFFERP (coding->dst_object))
1134     {
1135       if (coding->src_pos < 0)
1136         {
1137           coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
1138           coding->dst_bytes = (GAP_END_ADDR
1139                                - (coding->src_bytes - coding->consumed)
1140                                - coding->destination);
1141         }
1142       else
1143         {
1144           /* We are sure that coding->dst_pos_byte is before the gap
1145              of the buffer. */
1146           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
1147                                  + coding->dst_pos_byte - BEG_BYTE);
1148           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
1149                                - coding->destination);
1150         }
1151     }
1152   else
1153     /* Otherwise, the destination is C string and is never relocated
1154        automatically.  Thus we don't have to update anything.  */
1155     ;
1156 }
1157
1158
1159 static void
1160 coding_alloc_by_realloc (struct coding_system *coding, EMACS_INT bytes)
1161 {
1162   coding->destination = (unsigned char *) xrealloc (coding->destination,
1163                                                     coding->dst_bytes + bytes);
1164   coding->dst_bytes += bytes;
1165 }
1166
1167 static void
1168 coding_alloc_by_making_gap (struct coding_system *coding, EMACS_INT gap_head_used, EMACS_INT bytes)
1169 {
1170   if (EQ (coding->src_object, coding->dst_object))
1171     {
1172       /* The gap may contain the produced data at the head and not-yet
1173          consumed data at the tail.  To preserve those data, we at
1174          first make the gap size to zero, then increase the gap
1175          size.  */
1176       EMACS_INT add = GAP_SIZE;
1177
1178       GPT += gap_head_used, GPT_BYTE += gap_head_used;
1179       GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
1180       make_gap (bytes);
1181       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
1182       GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
1183     }
1184   else
1185     {
1186       Lisp_Object this_buffer;
1187
1188       this_buffer = Fcurrent_buffer ();
1189       set_buffer_internal (XBUFFER (coding->dst_object));
1190       make_gap (bytes);
1191       set_buffer_internal (XBUFFER (this_buffer));
1192     }
1193 }
1194
1195
1196 static unsigned char *
1197 alloc_destination (struct coding_system *coding, EMACS_INT nbytes, unsigned char *dst)
1198 {
1199   EMACS_INT offset = dst - coding->destination;
1200
1201   if (BUFFERP (coding->dst_object))
1202     {
1203       struct buffer *buf = XBUFFER (coding->dst_object);
1204
1205       coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
1206     }
1207   else
1208     coding_alloc_by_realloc (coding, nbytes);
1209   coding_set_destination (coding);
1210   dst = coding->destination + offset;
1211   return dst;
1212 }
1213
1214 /** Macros for annotations.  */
1215
1216 /* An annotation data is stored in the array coding->charbuf in this
1217    format:
1218      [ -LENGTH ANNOTATION_MASK NCHARS ... ]
1219    LENGTH is the number of elements in the annotation.
1220    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1221    NCHARS is the number of characters in the text annotated.
1222
1223    The format of the following elements depend on ANNOTATION_MASK.
1224
1225    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1226    follows:
1227      ... NBYTES METHOD [ COMPOSITION-COMPONENTS ... ]
1228
1229    NBYTES is the number of bytes specified in the header part of
1230    old-style emacs-mule encoding, or 0 for the other kind of
1231    composition.
1232
1233    METHOD is one of enum composition_method.
1234
1235    Optionnal COMPOSITION-COMPONENTS are characters and composition
1236    rules.
1237
1238    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1239    follows.
1240
1241    If ANNOTATION_MASK is 0, this annotation is just a space holder to
1242    recover from an invalid annotation, and should be skipped by
1243    produce_annotation.  */
1244
1245 /* Maximum length of the header of annotation data.  */
1246 #define MAX_ANNOTATION_LENGTH 5
1247
1248 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars)     \
1249   do {                                                  \
1250     *(buf)++ = -(len);                                  \
1251     *(buf)++ = (mask);                                  \
1252     *(buf)++ = (nchars);                                \
1253     coding->annotated = 1;                              \
1254   } while (0);
1255
1256 #define ADD_COMPOSITION_DATA(buf, nchars, nbytes, method)                   \
1257   do {                                                                      \
1258     ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1259     *buf++ = nbytes;                                                        \
1260     *buf++ = method;                                                        \
1261   } while (0)
1262
1263
1264 #define ADD_CHARSET_DATA(buf, nchars, id)                               \
1265   do {                                                                  \
1266     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1267     *buf++ = id;                                                        \
1268   } while (0)
1269
1270 \f
1271 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1272
1273
1274
1275 \f
1276 /*** 3. UTF-8 ***/
1277
1278 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1279    Check if a text is encoded in UTF-8.  If it is, return 1, else
1280    return 0.  */
1281
1282 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1283 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1284 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1285 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1286 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1287 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1288
1289 #define UTF_BOM 0xFEFF
1290 #define UTF_8_BOM_1 0xEF
1291 #define UTF_8_BOM_2 0xBB
1292 #define UTF_8_BOM_3 0xBF
1293
1294 static int
1295 detect_coding_utf_8 (struct coding_system *coding, struct coding_detection_info *detect_info)
1296 {
1297   const unsigned char *src = coding->source, *src_base;
1298   const unsigned char *src_end = coding->source + coding->src_bytes;
1299   int multibytep = coding->src_multibyte;
1300   int consumed_chars = 0;
1301   int bom_found = 0;
1302   int found = 0;
1303
1304   detect_info->checked |= CATEGORY_MASK_UTF_8;
1305   /* A coding system of this category is always ASCII compatible.  */
1306   src += coding->head_ascii;
1307
1308   while (1)
1309     {
1310       int c, c1, c2, c3, c4;
1311
1312       src_base = src;
1313       ONE_MORE_BYTE (c);
1314       if (c < 0 || UTF_8_1_OCTET_P (c))
1315         continue;
1316       ONE_MORE_BYTE (c1);
1317       if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
1318         break;
1319       if (UTF_8_2_OCTET_LEADING_P (c))
1320         {
1321           found = 1;
1322           continue;
1323         }
1324       ONE_MORE_BYTE (c2);
1325       if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1326         break;
1327       if (UTF_8_3_OCTET_LEADING_P (c))
1328         {
1329           found = 1;
1330           if (src_base == coding->source
1331               && c == UTF_8_BOM_1 && c1 == UTF_8_BOM_2 && c2 == UTF_8_BOM_3)
1332             bom_found = 1;
1333           continue;
1334         }
1335       ONE_MORE_BYTE (c3);
1336       if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1337         break;
1338       if (UTF_8_4_OCTET_LEADING_P (c))
1339         {
1340           found = 1;
1341           continue;
1342         }
1343       ONE_MORE_BYTE (c4);
1344       if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1345         break;
1346       if (UTF_8_5_OCTET_LEADING_P (c))
1347         {
1348           found = 1;
1349           continue;
1350         }
1351       break;
1352     }
1353   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1354   return 0;
1355
1356  no_more_source:
1357   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1358     {
1359       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1360       return 0;
1361     }
1362   if (bom_found)
1363     {
1364       /* The first character 0xFFFE doesn't necessarily mean a BOM.  */
1365       detect_info->found |= CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG;
1366     }
1367   else
1368     {
1369       detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG;
1370       if (found)
1371         detect_info->found |= CATEGORY_MASK_UTF_8_NOSIG;
1372     }
1373   return 1;
1374 }
1375
1376
1377 static void
1378 decode_coding_utf_8 (struct coding_system *coding)
1379 {
1380   const unsigned char *src = coding->source + coding->consumed;
1381   const unsigned char *src_end = coding->source + coding->src_bytes;
1382   const unsigned char *src_base;
1383   int *charbuf = coding->charbuf + coding->charbuf_used;
1384   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1385   int consumed_chars = 0, consumed_chars_base = 0;
1386   int multibytep = coding->src_multibyte;
1387   enum utf_bom_type bom = CODING_UTF_8_BOM (coding);
1388   Lisp_Object attr, charset_list;
1389   int eol_crlf =
1390     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1391   int byte_after_cr = -1;
1392
1393   CODING_GET_INFO (coding, attr, charset_list);
1394
1395   if (bom != utf_without_bom)
1396     {
1397       int c1, c2, c3;
1398
1399       src_base = src;
1400       ONE_MORE_BYTE (c1);
1401       if (! UTF_8_3_OCTET_LEADING_P (c1))
1402         src = src_base;
1403       else
1404         {
1405           ONE_MORE_BYTE (c2);
1406           if (! UTF_8_EXTRA_OCTET_P (c2))
1407             src = src_base;
1408           else
1409             {
1410               ONE_MORE_BYTE (c3);
1411               if (! UTF_8_EXTRA_OCTET_P (c3))
1412                 src = src_base;
1413               else
1414                 {
1415                   if ((c1 != UTF_8_BOM_1)
1416                       || (c2 != UTF_8_BOM_2) || (c3 != UTF_8_BOM_3))
1417                     src = src_base;
1418                   else
1419                     CODING_UTF_8_BOM (coding) = utf_without_bom;
1420                 }
1421             }
1422         }
1423     }
1424   CODING_UTF_8_BOM (coding) = utf_without_bom;
1425
1426
1427
1428   while (1)
1429     {
1430       int c, c1, c2, c3, c4, c5;
1431
1432       src_base = src;
1433       consumed_chars_base = consumed_chars;
1434
1435       if (charbuf >= charbuf_end)
1436         {
1437           if (byte_after_cr >= 0)
1438             src_base--;
1439           break;
1440         }
1441
1442       if (byte_after_cr >= 0)
1443         c1 = byte_after_cr, byte_after_cr = -1;
1444       else
1445         ONE_MORE_BYTE (c1);
1446       if (c1 < 0)
1447         {
1448           c = - c1;
1449         }
1450       else if (UTF_8_1_OCTET_P (c1))
1451         {
1452           if (eol_crlf && c1 == '\r')
1453             ONE_MORE_BYTE (byte_after_cr);
1454           c = c1;
1455         }
1456       else
1457         {
1458           ONE_MORE_BYTE (c2);
1459           if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1460             goto invalid_code;
1461           if (UTF_8_2_OCTET_LEADING_P (c1))
1462             {
1463               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1464               /* Reject overlong sequences here and below.  Encoders
1465                  producing them are incorrect, they can be misleading,
1466                  and they mess up read/write invariance.  */
1467               if (c < 128)
1468                 goto invalid_code;
1469             }
1470           else
1471             {
1472               ONE_MORE_BYTE (c3);
1473               if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1474                 goto invalid_code;
1475               if (UTF_8_3_OCTET_LEADING_P (c1))
1476                 {
1477                   c = (((c1 & 0xF) << 12)
1478                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1479                   if (c < 0x800
1480                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1481                     goto invalid_code;
1482                 }
1483               else
1484                 {
1485                   ONE_MORE_BYTE (c4);
1486                   if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1487                     goto invalid_code;
1488                   if (UTF_8_4_OCTET_LEADING_P (c1))
1489                     {
1490                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1491                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1492                     if (c < 0x10000)
1493                       goto invalid_code;
1494                     }
1495                   else
1496                     {
1497                       ONE_MORE_BYTE (c5);
1498                       if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
1499                         goto invalid_code;
1500                       if (UTF_8_5_OCTET_LEADING_P (c1))
1501                         {
1502                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1503                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1504                                | (c5 & 0x3F));
1505                           if ((c > MAX_CHAR) || (c < 0x200000))
1506                             goto invalid_code;
1507                         }
1508                       else
1509                         goto invalid_code;
1510                     }
1511                 }
1512             }
1513         }
1514
1515       *charbuf++ = c;
1516       continue;
1517
1518     invalid_code:
1519       src = src_base;
1520       consumed_chars = consumed_chars_base;
1521       ONE_MORE_BYTE (c);
1522       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1523       coding->errors++;
1524     }
1525
1526  no_more_source:
1527   coding->consumed_char += consumed_chars_base;
1528   coding->consumed = src_base - coding->source;
1529   coding->charbuf_used = charbuf - coding->charbuf;
1530 }
1531
1532
1533 static int
1534 encode_coding_utf_8 (struct coding_system *coding)
1535 {
1536   int multibytep = coding->dst_multibyte;
1537   int *charbuf = coding->charbuf;
1538   int *charbuf_end = charbuf + coding->charbuf_used;
1539   unsigned char *dst = coding->destination + coding->produced;
1540   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1541   int produced_chars = 0;
1542   int c;
1543
1544   if (CODING_UTF_8_BOM (coding) == utf_with_bom)
1545     {
1546       ASSURE_DESTINATION (3);
1547       EMIT_THREE_BYTES (UTF_8_BOM_1, UTF_8_BOM_2, UTF_8_BOM_3);
1548       CODING_UTF_8_BOM (coding) = utf_without_bom;
1549     }
1550
1551   if (multibytep)
1552     {
1553       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1554
1555       while (charbuf < charbuf_end)
1556         {
1557           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1558
1559           ASSURE_DESTINATION (safe_room);
1560           c = *charbuf++;
1561           if (CHAR_BYTE8_P (c))
1562             {
1563               c = CHAR_TO_BYTE8 (c);
1564               EMIT_ONE_BYTE (c);
1565             }
1566           else
1567             {
1568               CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
1569               for (p = str; p < pend; p++)
1570                 EMIT_ONE_BYTE (*p);
1571             }
1572         }
1573     }
1574   else
1575     {
1576       int safe_room = MAX_MULTIBYTE_LENGTH;
1577
1578       while (charbuf < charbuf_end)
1579         {
1580           ASSURE_DESTINATION (safe_room);
1581           c = *charbuf++;
1582           if (CHAR_BYTE8_P (c))
1583             *dst++ = CHAR_TO_BYTE8 (c);
1584           else
1585             CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
1586           produced_chars++;
1587         }
1588     }
1589   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1590   coding->produced_char += produced_chars;
1591   coding->produced = dst - coding->destination;
1592   return 0;
1593 }
1594
1595
1596 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1597    Check if a text is encoded in one of UTF-16 based coding systems.
1598    If it is, return 1, else return 0.  */
1599
1600 #define UTF_16_HIGH_SURROGATE_P(val) \
1601   (((val) & 0xFC00) == 0xD800)
1602
1603 #define UTF_16_LOW_SURROGATE_P(val) \
1604   (((val) & 0xFC00) == 0xDC00)
1605
1606 #define UTF_16_INVALID_P(val)   \
1607   (((val) == 0xFFFE)            \
1608    || ((val) == 0xFFFF)         \
1609    || UTF_16_LOW_SURROGATE_P (val))
1610
1611
1612 static int
1613 detect_coding_utf_16 (struct coding_system *coding, struct coding_detection_info *detect_info)
1614 {
1615   const unsigned char *src = coding->source, *src_base = src;
1616   const unsigned char *src_end = coding->source + coding->src_bytes;
1617   int multibytep = coding->src_multibyte;
1618   int consumed_chars = 0;
1619   int c1, c2;
1620
1621   detect_info->checked |= CATEGORY_MASK_UTF_16;
1622   if (coding->mode & CODING_MODE_LAST_BLOCK
1623       && (coding->src_chars & 1))
1624     {
1625       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1626       return 0;
1627     }
1628
1629   TWO_MORE_BYTES (c1, c2);
1630   if ((c1 == 0xFF) && (c2 == 0xFE))
1631     {
1632       detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1633                              | CATEGORY_MASK_UTF_16_AUTO);
1634       detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1635                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1636                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1637     }
1638   else if ((c1 == 0xFE) && (c2 == 0xFF))
1639     {
1640       detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1641                              | CATEGORY_MASK_UTF_16_AUTO);
1642       detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1643                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1644                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1645     }
1646   else if (c2 < 0)
1647     {
1648       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1649       return 0;
1650     }
1651   else
1652     {
1653       /* We check the dispersion of Eth and Oth bytes where E is even and
1654          O is odd.  If both are high, we assume binary data.*/
1655       unsigned char e[256], o[256];
1656       unsigned e_num = 1, o_num = 1;
1657
1658       memset (e, 0, 256);
1659       memset (o, 0, 256);
1660       e[c1] = 1;
1661       o[c2] = 1;
1662
1663       detect_info->rejected |= (CATEGORY_MASK_UTF_16_AUTO
1664                                 |CATEGORY_MASK_UTF_16_BE
1665                                 | CATEGORY_MASK_UTF_16_LE);
1666
1667       while ((detect_info->rejected & CATEGORY_MASK_UTF_16)
1668              != CATEGORY_MASK_UTF_16)
1669         {
1670           TWO_MORE_BYTES (c1, c2);
1671           if (c2 < 0)
1672             break;
1673           if (! e[c1])
1674             {
1675               e[c1] = 1;
1676               e_num++;
1677               if (e_num >= 128)
1678                 detect_info->rejected |= CATEGORY_MASK_UTF_16_BE_NOSIG;
1679             }
1680           if (! o[c2])
1681             {
1682               o[c2] = 1;
1683               o_num++;
1684               if (o_num >= 128)
1685                 detect_info->rejected |= CATEGORY_MASK_UTF_16_LE_NOSIG;
1686             }
1687         }
1688       return 0;
1689     }
1690
1691  no_more_source:
1692   return 1;
1693 }
1694
1695 static void
1696 decode_coding_utf_16 (struct coding_system *coding)
1697 {
1698   const unsigned char *src = coding->source + coding->consumed;
1699   const unsigned char *src_end = coding->source + coding->src_bytes;
1700   const unsigned char *src_base;
1701   int *charbuf = coding->charbuf + coding->charbuf_used;
1702   /* We may produces at most 3 chars in one loop.  */
1703   int *charbuf_end = coding->charbuf + coding->charbuf_size - 2;
1704   int consumed_chars = 0, consumed_chars_base = 0;
1705   int multibytep = coding->src_multibyte;
1706   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1707   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1708   int surrogate = CODING_UTF_16_SURROGATE (coding);
1709   Lisp_Object attr, charset_list;
1710   int eol_crlf =
1711     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1712   int byte_after_cr1 = -1, byte_after_cr2 = -1;
1713
1714   CODING_GET_INFO (coding, attr, charset_list);
1715
1716   if (bom == utf_with_bom)
1717     {
1718       int c, c1, c2;
1719
1720       src_base = src;
1721       ONE_MORE_BYTE (c1);
1722       ONE_MORE_BYTE (c2);
1723       c = (c1 << 8) | c2;
1724
1725       if (endian == utf_16_big_endian
1726           ? c != 0xFEFF : c != 0xFFFE)
1727         {
1728           /* The first two bytes are not BOM.  Treat them as bytes
1729              for a normal character.  */
1730           src = src_base;
1731           coding->errors++;
1732         }
1733       CODING_UTF_16_BOM (coding) = utf_without_bom;
1734     }
1735   else if (bom == utf_detect_bom)
1736     {
1737       /* We have already tried to detect BOM and failed in
1738          detect_coding.  */
1739       CODING_UTF_16_BOM (coding) = utf_without_bom;
1740     }
1741
1742   while (1)
1743     {
1744       int c, c1, c2;
1745
1746       src_base = src;
1747       consumed_chars_base = consumed_chars;
1748
1749       if (charbuf >= charbuf_end)
1750         {
1751           if (byte_after_cr1 >= 0)
1752             src_base -= 2;
1753           break;
1754         }
1755
1756       if (byte_after_cr1 >= 0)
1757         c1 = byte_after_cr1, byte_after_cr1 = -1;
1758       else
1759         ONE_MORE_BYTE (c1);
1760       if (c1 < 0)
1761         {
1762           *charbuf++ = -c1;
1763           continue;
1764         }
1765       if (byte_after_cr2 >= 0)
1766         c2 = byte_after_cr2, byte_after_cr2 = -1;
1767       else
1768         ONE_MORE_BYTE (c2);
1769       if (c2 < 0)
1770         {
1771           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1772           *charbuf++ = -c2;
1773           continue;
1774         }
1775       c = (endian == utf_16_big_endian
1776            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1777
1778       if (surrogate)
1779         {
1780           if (! UTF_16_LOW_SURROGATE_P (c))
1781             {
1782               if (endian == utf_16_big_endian)
1783                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1784               else
1785                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1786               *charbuf++ = c1;
1787               *charbuf++ = c2;
1788               coding->errors++;
1789               if (UTF_16_HIGH_SURROGATE_P (c))
1790                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1791               else
1792                 *charbuf++ = c;
1793             }
1794           else
1795             {
1796               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1797               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1798               *charbuf++ = 0x10000 + c;
1799             }
1800         }
1801       else
1802         {
1803           if (UTF_16_HIGH_SURROGATE_P (c))
1804             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1805           else
1806             {
1807               if (eol_crlf && c == '\r')
1808                 {
1809                   ONE_MORE_BYTE (byte_after_cr1);
1810                   ONE_MORE_BYTE (byte_after_cr2);
1811                 }
1812               *charbuf++ = c;
1813             }
1814         }
1815     }
1816
1817  no_more_source:
1818   coding->consumed_char += consumed_chars_base;
1819   coding->consumed = src_base - coding->source;
1820   coding->charbuf_used = charbuf - coding->charbuf;
1821 }
1822
1823 static int
1824 encode_coding_utf_16 (struct coding_system *coding)
1825 {
1826   int multibytep = coding->dst_multibyte;
1827   int *charbuf = coding->charbuf;
1828   int *charbuf_end = charbuf + coding->charbuf_used;
1829   unsigned char *dst = coding->destination + coding->produced;
1830   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1831   int safe_room = 8;
1832   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1833   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1834   int produced_chars = 0;
1835   Lisp_Object attrs, charset_list;
1836   int c;
1837
1838   CODING_GET_INFO (coding, attrs, charset_list);
1839
1840   if (bom != utf_without_bom)
1841     {
1842       ASSURE_DESTINATION (safe_room);
1843       if (big_endian)
1844         EMIT_TWO_BYTES (0xFE, 0xFF);
1845       else
1846         EMIT_TWO_BYTES (0xFF, 0xFE);
1847       CODING_UTF_16_BOM (coding) = utf_without_bom;
1848     }
1849
1850   while (charbuf < charbuf_end)
1851     {
1852       ASSURE_DESTINATION (safe_room);
1853       c = *charbuf++;
1854       if (c > MAX_UNICODE_CHAR)
1855         c = coding->default_char;
1856
1857       if (c < 0x10000)
1858         {
1859           if (big_endian)
1860             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1861           else
1862             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1863         }
1864       else
1865         {
1866           int c1, c2;
1867
1868           c -= 0x10000;
1869           c1 = (c >> 10) + 0xD800;
1870           c2 = (c & 0x3FF) + 0xDC00;
1871           if (big_endian)
1872             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1873           else
1874             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1875         }
1876     }
1877   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1878   coding->produced = dst - coding->destination;
1879   coding->produced_char += produced_chars;
1880   return 0;
1881 }
1882
1883 \f
1884 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1885
1886 /* Emacs' internal format for representation of multiple character
1887    sets is a kind of multi-byte encoding, i.e. characters are
1888    represented by variable-length sequences of one-byte codes.
1889
1890    ASCII characters and control characters (e.g. `tab', `newline') are
1891    represented by one-byte sequences which are their ASCII codes, in
1892    the range 0x00 through 0x7F.
1893
1894    8-bit characters of the range 0x80..0x9F are represented by
1895    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1896    code + 0x20).
1897
1898    8-bit characters of the range 0xA0..0xFF are represented by
1899    one-byte sequences which are their 8-bit code.
1900
1901    The other characters are represented by a sequence of `base
1902    leading-code', optional `extended leading-code', and one or two
1903    `position-code's.  The length of the sequence is determined by the
1904    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1905    whereas extended leading-code and position-code take the range 0xA0
1906    through 0xFF.  See `charset.h' for more details about leading-code
1907    and position-code.
1908
1909    --- CODE RANGE of Emacs' internal format ---
1910    character set        range
1911    -------------        -----
1912    ascii                0x00..0x7F
1913    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1914    eight-bit-graphic    0xA0..0xBF
1915    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1916    ---------------------------------------------
1917
1918    As this is the internal character representation, the format is
1919    usually not used externally (i.e. in a file or in a data sent to a
1920    process).  But, it is possible to have a text externally in this
1921    format (i.e. by encoding by the coding system `emacs-mule').
1922
1923    In that case, a sequence of one-byte codes has a slightly different
1924    form.
1925
1926    At first, all characters in eight-bit-control are represented by
1927    one-byte sequences which are their 8-bit code.
1928
1929    Next, character composition data are represented by the byte
1930    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1931    where,
1932         METHOD is 0xF2 plus one of composition method (enum
1933         composition_method),
1934
1935         BYTES is 0xA0 plus a byte length of this composition data,
1936
1937         CHARS is 0xA0 plus a number of characters composed by this
1938         data,
1939
1940         COMPONENTs are characters of multibye form or composition
1941         rules encoded by two-byte of ASCII codes.
1942
1943    In addition, for backward compatibility, the following formats are
1944    also recognized as composition data on decoding.
1945
1946    0x80 MSEQ ...
1947    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1948
1949    Here,
1950         MSEQ is a multibyte form but in these special format:
1951           ASCII: 0xA0 ASCII_CODE+0x80,
1952           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1953         RULE is a one byte code of the range 0xA0..0xF0 that
1954         represents a composition rule.
1955   */
1956
1957 char emacs_mule_bytes[256];
1958
1959
1960 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1961    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
1962    else return 0.  */
1963
1964 static int
1965 detect_coding_emacs_mule (struct coding_system *coding, struct coding_detection_info *detect_info)
1966 {
1967   const unsigned char *src = coding->source, *src_base;
1968   const unsigned char *src_end = coding->source + coding->src_bytes;
1969   int multibytep = coding->src_multibyte;
1970   int consumed_chars = 0;
1971   int c;
1972   int found = 0;
1973
1974   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1975   /* A coding system of this category is always ASCII compatible.  */
1976   src += coding->head_ascii;
1977
1978   while (1)
1979     {
1980       src_base = src;
1981       ONE_MORE_BYTE (c);
1982       if (c < 0)
1983         continue;
1984       if (c == 0x80)
1985         {
1986           /* Perhaps the start of composite character.  We simply skip
1987              it because analyzing it is too heavy for detecting.  But,
1988              at least, we check that the composite character
1989              constitutes of more than 4 bytes.  */
1990           const unsigned char *src_base;
1991
1992         repeat:
1993           src_base = src;
1994           do
1995             {
1996               ONE_MORE_BYTE (c);
1997             }
1998           while (c >= 0xA0);
1999
2000           if (src - src_base <= 4)
2001             break;
2002           found = CATEGORY_MASK_EMACS_MULE;
2003           if (c == 0x80)
2004             goto repeat;
2005         }
2006
2007       if (c < 0x80)
2008         {
2009           if (c < 0x20
2010               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
2011             break;
2012         }
2013       else
2014         {
2015           int more_bytes = emacs_mule_bytes[*src_base] - 1;
2016
2017           while (more_bytes > 0)
2018             {
2019               ONE_MORE_BYTE (c);
2020               if (c < 0xA0)
2021                 {
2022                   src--;        /* Unread the last byte.  */
2023                   break;
2024                 }
2025               more_bytes--;
2026             }
2027           if (more_bytes != 0)
2028             break;
2029           found = CATEGORY_MASK_EMACS_MULE;
2030         }
2031     }
2032   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2033   return 0;
2034
2035  no_more_source:
2036   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
2037     {
2038       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2039       return 0;
2040     }
2041   detect_info->found |= found;
2042   return 1;
2043 }
2044
2045
2046 /* Parse emacs-mule multibyte sequence at SRC and return the decoded
2047    character.  If CMP_STATUS indicates that we must expect MSEQ or
2048    RULE described above, decode it and return the negative value of
2049    the decoded character or rule.  If an invalid byte is found, return
2050    -1.  If SRC is too short, return -2.  */
2051
2052 int
2053 emacs_mule_char (struct coding_system *coding, const unsigned char *src, int *nbytes, int *nchars, int *id, struct composition_status *cmp_status)
2054 {
2055   const unsigned char *src_end = coding->source + coding->src_bytes;
2056   const unsigned char *src_base = src;
2057   int multibytep = coding->src_multibyte;
2058   struct charset *charset;
2059   unsigned code;
2060   int c;
2061   int consumed_chars = 0;
2062   int mseq_found = 0;
2063
2064   ONE_MORE_BYTE (c);
2065   if (c < 0)
2066     {
2067       c = -c;
2068       charset = emacs_mule_charset[0];
2069     }
2070   else
2071     {
2072       if (c >= 0xA0)
2073         {
2074           if (cmp_status->state != COMPOSING_NO
2075               && cmp_status->old_form)
2076             {
2077               if (cmp_status->state == COMPOSING_CHAR)
2078                 {
2079                   if (c == 0xA0)
2080                     {
2081                       ONE_MORE_BYTE (c);
2082                       c -= 0x80;
2083                       if (c < 0)
2084                         goto invalid_code;
2085                     }
2086                   else
2087                     c -= 0x20;
2088                   mseq_found = 1;
2089                 }
2090               else
2091                 {
2092                   *nbytes = src - src_base;
2093                   *nchars = consumed_chars;
2094                   return -c;
2095                 }
2096             }
2097           else
2098             goto invalid_code;
2099         }
2100
2101       switch (emacs_mule_bytes[c])
2102         {
2103         case 2:
2104           if (! (charset = emacs_mule_charset[c]))
2105             goto invalid_code;
2106           ONE_MORE_BYTE (c);
2107           if (c < 0xA0)
2108             goto invalid_code;
2109           code = c & 0x7F;
2110           break;
2111
2112         case 3:
2113           if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
2114               || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
2115             {
2116               ONE_MORE_BYTE (c);
2117               if (c < 0xA0 || ! (charset = emacs_mule_charset[c]))
2118                 goto invalid_code;
2119               ONE_MORE_BYTE (c);
2120               if (c < 0xA0)
2121                 goto invalid_code;
2122               code = c & 0x7F;
2123             }
2124           else
2125             {
2126               if (! (charset = emacs_mule_charset[c]))
2127                 goto invalid_code;
2128               ONE_MORE_BYTE (c);
2129               if (c < 0xA0)
2130                 goto invalid_code;
2131               code = (c & 0x7F) << 8;
2132               ONE_MORE_BYTE (c);
2133               if (c < 0xA0)
2134                 goto invalid_code;
2135               code |= c & 0x7F;
2136             }
2137           break;
2138
2139         case 4:
2140           ONE_MORE_BYTE (c);
2141           if (c < 0 || ! (charset = emacs_mule_charset[c]))
2142             goto invalid_code;
2143           ONE_MORE_BYTE (c);
2144           if (c < 0xA0)
2145             goto invalid_code;
2146           code = (c & 0x7F) << 8;
2147           ONE_MORE_BYTE (c);
2148           if (c < 0xA0)
2149             goto invalid_code;
2150           code |= c & 0x7F;
2151           break;
2152
2153         case 1:
2154           code = c;
2155           charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
2156                                      ? charset_ascii : charset_eight_bit);
2157           break;
2158
2159         default:
2160           abort ();
2161         }
2162       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, code, c);
2163       if (c < 0)
2164         goto invalid_code;
2165     }
2166   *nbytes = src - src_base;
2167   *nchars = consumed_chars;
2168   if (id)
2169     *id = charset->id;
2170   return (mseq_found ? -c : c);
2171
2172  no_more_source:
2173   return -2;
2174
2175  invalid_code:
2176   return -1;
2177 }
2178
2179
2180 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2181
2182 /* Handle these composition sequence ('|': the end of header elements,
2183    BYTES and CHARS >= 0xA0):
2184
2185    (1) relative composition: 0x80 0xF2 BYTES CHARS | CHAR ...
2186    (2) altchar composition:  0x80 0xF4 BYTES CHARS | ALT ... ALT CHAR ...
2187    (3) alt&rule composition: 0x80 0xF5 BYTES CHARS | ALT RULE ... ALT CHAR ...
2188
2189    and these old form:
2190
2191    (4) relative composition: 0x80 | MSEQ ... MSEQ
2192    (5) rulebase composition: 0x80 0xFF | MSEQ MRULE ... MSEQ
2193
2194    When the starter 0x80 and the following header elements are found,
2195    this annotation header is produced.
2196
2197         [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS NBYTES METHOD ]
2198
2199    NCHARS is CHARS - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2200    NBYTES is BYTES - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2201
2202    Then, upon reading the following elements, these codes are produced
2203    until the composition end is found:
2204
2205    (1) CHAR ... CHAR
2206    (2) ALT ... ALT CHAR ... CHAR
2207    (3) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT CHAR ... CHAR
2208    (4) CHAR ... CHAR
2209    (5) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
2210
2211    When the composition end is found, LENGTH and NCHARS in the
2212    annotation header is updated as below:
2213
2214    (1) LENGTH: unchanged, NCHARS: unchanged
2215    (2) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2216    (3) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2217    (4) LENGTH: unchanged,  NCHARS: number of CHARs
2218    (5) LENGTH: unchanged,  NCHARS: number of CHARs
2219
2220    If an error is found while composing, the annotation header is
2221    changed to the original composition header (plus filler -1s) as
2222    below:
2223
2224    (1),(2),(3)  [ 0x80 0xF2+METHOD BYTES CHARS -1 ]
2225    (5)          [ 0x80 0xFF -1 -1- -1 ]
2226
2227    and the sequence [ -2 DECODED-RULE ] is changed to the original
2228    byte sequence as below:
2229         o the original byte sequence is B: [ B -1 ]
2230         o the original byte sequence is B1 B2: [ B1 B2 ]
2231
2232    Most of the routines are implemented by macros because many
2233    variables and labels in the caller decode_coding_emacs_mule must be
2234    accessible, and they are usually called just once (thus doesn't
2235    increase the size of compiled object).  */
2236
2237 /* Decode a composition rule represented by C as a component of
2238    composition sequence of Emacs 20 style.  Set RULE to the decoded
2239    rule. */
2240
2241 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(c, rule)  \
2242   do {                                                  \
2243     int gref, nref;                                     \
2244                                                         \
2245     c -= 0xA0;                                          \
2246     if (c < 0 || c >= 81)                               \
2247       goto invalid_code;                                \
2248     gref = c / 9, nref = c % 9;                         \
2249     if (gref == 4) gref = 10;                           \
2250     if (nref == 4) nref = 10;                           \
2251     rule = COMPOSITION_ENCODE_RULE (gref, nref);        \
2252   } while (0)
2253
2254
2255 /* Decode a composition rule represented by C and the following byte
2256    at SRC as a component of composition sequence of Emacs 21 style.
2257    Set RULE to the decoded rule.  */
2258
2259 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(c, rule)  \
2260   do {                                                  \
2261     int gref, nref;                                     \
2262                                                         \
2263     gref = c - 0x20;                                    \
2264     if (gref < 0 || gref >= 81)                         \
2265       goto invalid_code;                                \
2266     ONE_MORE_BYTE (c);                                  \
2267     nref = c - 0x20;                                    \
2268     if (nref < 0 || nref >= 81)                         \
2269       goto invalid_code;                                \
2270     rule = COMPOSITION_ENCODE_RULE (gref, nref);        \
2271   } while (0)
2272
2273
2274 /* Start of Emacs 21 style format.  The first three bytes at SRC are
2275    (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is the
2276    byte length of this composition information, CHARS is the number of
2277    characters composed by this composition.  */
2278
2279 #define DECODE_EMACS_MULE_21_COMPOSITION()                              \
2280   do {                                                                  \
2281     enum composition_method method = c - 0xF2;                          \
2282     int *charbuf_base = charbuf;                                        \
2283     int nbytes, nchars;                                                 \
2284                                                                         \
2285     ONE_MORE_BYTE (c);                                                  \
2286     if (c < 0)                                                          \
2287       goto invalid_code;                                                \
2288     nbytes = c - 0xA0;                                                  \
2289     if (nbytes < 3 || (method == COMPOSITION_RELATIVE && nbytes != 4))  \
2290       goto invalid_code;                                                \
2291     ONE_MORE_BYTE (c);                                                  \
2292     nchars = c - 0xA0;                                                  \
2293     if (nchars <= 0 || nchars >= MAX_COMPOSITION_COMPONENTS)            \
2294       goto invalid_code;                                                \
2295     cmp_status->old_form = 0;                                           \
2296     cmp_status->method = method;                                        \
2297     if (method == COMPOSITION_RELATIVE)                                 \
2298       cmp_status->state = COMPOSING_CHAR;                               \
2299     else                                                                \
2300       cmp_status->state = COMPOSING_COMPONENT_CHAR;                     \
2301     cmp_status->length = MAX_ANNOTATION_LENGTH;                         \
2302     cmp_status->nchars = nchars;                                        \
2303     cmp_status->ncomps = nbytes - 4;                                    \
2304     ADD_COMPOSITION_DATA (charbuf, nchars, nbytes, method);             \
2305   } while (0)
2306
2307
2308 /* Start of Emacs 20 style format for relative composition.  */
2309
2310 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION()             \
2311   do {                                                          \
2312     cmp_status->old_form = 1;                                   \
2313     cmp_status->method = COMPOSITION_RELATIVE;                  \
2314     cmp_status->state = COMPOSING_CHAR;                         \
2315     cmp_status->length = MAX_ANNOTATION_LENGTH;                 \
2316     cmp_status->nchars = cmp_status->ncomps = 0;                \
2317     ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);   \
2318   } while (0)
2319
2320
2321 /* Start of Emacs 20 style format for rule-base composition.  */
2322
2323 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION()             \
2324   do {                                                          \
2325     cmp_status->old_form = 1;                                   \
2326     cmp_status->method = COMPOSITION_WITH_RULE;                 \
2327     cmp_status->state = COMPOSING_CHAR;                         \
2328     cmp_status->length = MAX_ANNOTATION_LENGTH;                 \
2329     cmp_status->nchars = cmp_status->ncomps = 0;                \
2330     ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);   \
2331   } while (0)
2332
2333
2334 #define DECODE_EMACS_MULE_COMPOSITION_START()           \
2335   do {                                                  \
2336     const unsigned char *current_src = src;             \
2337                                                         \
2338     ONE_MORE_BYTE (c);                                  \
2339     if (c < 0)                                          \
2340       goto invalid_code;                                \
2341     if (c - 0xF2 >= COMPOSITION_RELATIVE                \
2342         && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)  \
2343       DECODE_EMACS_MULE_21_COMPOSITION ();              \
2344     else if (c < 0xA0)                                  \
2345       goto invalid_code;                                \
2346     else if (c < 0xC0)                                  \
2347       {                                                 \
2348         DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION ();   \
2349         /* Re-read C as a composition component.  */    \
2350         src = current_src;                              \
2351       }                                                 \
2352     else if (c == 0xFF)                                 \
2353       DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION ();     \
2354     else                                                \
2355       goto invalid_code;                                \
2356   } while (0)
2357
2358 #define EMACS_MULE_COMPOSITION_END()                            \
2359   do {                                                          \
2360     int idx = - cmp_status->length;                             \
2361                                                                 \
2362     if (cmp_status->old_form)                                   \
2363       charbuf[idx + 2] = cmp_status->nchars;                    \
2364     else if (cmp_status->method > COMPOSITION_RELATIVE)         \
2365       charbuf[idx] = charbuf[idx + 2] - cmp_status->length;     \
2366     cmp_status->state = COMPOSING_NO;                           \
2367   } while (0)
2368
2369
2370 static int
2371 emacs_mule_finish_composition (int *charbuf, struct composition_status *cmp_status)
2372 {
2373   int idx = - cmp_status->length;
2374   int new_chars;
2375
2376   if (cmp_status->old_form && cmp_status->nchars > 0)
2377     {
2378       charbuf[idx + 2] = cmp_status->nchars;
2379       new_chars = 0;
2380       if (cmp_status->method == COMPOSITION_WITH_RULE
2381           && cmp_status->state == COMPOSING_CHAR)
2382         {
2383           /* The last rule was invalid.  */
2384           int rule = charbuf[-1] + 0xA0;
2385
2386           charbuf[-2] = BYTE8_TO_CHAR (rule);
2387           charbuf[-1] = -1;
2388           new_chars = 1;
2389         }
2390     }
2391   else
2392     {
2393       charbuf[idx++] = BYTE8_TO_CHAR (0x80);
2394
2395       if (cmp_status->method == COMPOSITION_WITH_RULE)
2396         {
2397           charbuf[idx++] = BYTE8_TO_CHAR (0xFF);
2398           charbuf[idx++] = -3;
2399           charbuf[idx++] = 0;
2400           new_chars = 1;
2401         }
2402       else
2403         {
2404           int nchars = charbuf[idx + 1] + 0xA0;
2405           int nbytes = charbuf[idx + 2] + 0xA0;
2406
2407           charbuf[idx++] = BYTE8_TO_CHAR (0xF2 + cmp_status->method);
2408           charbuf[idx++] = BYTE8_TO_CHAR (nbytes);
2409           charbuf[idx++] = BYTE8_TO_CHAR (nchars);
2410           charbuf[idx++] = -1;
2411           new_chars = 4;
2412         }
2413     }
2414   cmp_status->state = COMPOSING_NO;
2415   return new_chars;
2416 }
2417
2418 #define EMACS_MULE_MAYBE_FINISH_COMPOSITION()                             \
2419   do {                                                                    \
2420     if (cmp_status->state != COMPOSING_NO)                                \
2421       char_offset += emacs_mule_finish_composition (charbuf, cmp_status); \
2422   } while (0)
2423
2424
2425 static void
2426 decode_coding_emacs_mule (struct coding_system *coding)
2427 {
2428   const unsigned char *src = coding->source + coding->consumed;
2429   const unsigned char *src_end = coding->source + coding->src_bytes;
2430   const unsigned char *src_base;
2431   int *charbuf = coding->charbuf + coding->charbuf_used;
2432   /* We may produce two annocations (charset and composition) in one
2433      loop and one more charset annocation at the end.  */
2434   int *charbuf_end
2435     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
2436   int consumed_chars = 0, consumed_chars_base;
2437   int multibytep = coding->src_multibyte;
2438   Lisp_Object attrs, charset_list;
2439   int char_offset = coding->produced_char;
2440   int last_offset = char_offset;
2441   int last_id = charset_ascii;
2442   int eol_crlf =
2443     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
2444   int byte_after_cr = -1;
2445   struct composition_status *cmp_status = &coding->spec.emacs_mule.cmp_status;
2446
2447   CODING_GET_INFO (coding, attrs, charset_list);
2448
2449   if (cmp_status->state != COMPOSING_NO)
2450     {
2451       int i;
2452
2453       for (i = 0; i < cmp_status->length; i++)
2454         *charbuf++ = cmp_status->carryover[i];
2455       coding->annotated = 1;
2456     }
2457
2458   while (1)
2459     {
2460       int c, id;
2461
2462       src_base = src;
2463       consumed_chars_base = consumed_chars;
2464
2465       if (charbuf >= charbuf_end)
2466         {
2467           if (byte_after_cr >= 0)
2468             src_base--;
2469           break;
2470         }
2471
2472       if (byte_after_cr >= 0)
2473         c = byte_after_cr, byte_after_cr = -1;
2474       else
2475         ONE_MORE_BYTE (c);
2476
2477       if (c < 0 || c == 0x80)
2478         {
2479           EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2480           if (c < 0)
2481             {
2482               *charbuf++ = -c;
2483               char_offset++;
2484             }
2485           else
2486             DECODE_EMACS_MULE_COMPOSITION_START ();
2487           continue;
2488         }
2489
2490       if (c < 0x80)
2491         {
2492           if (eol_crlf && c == '\r')
2493             ONE_MORE_BYTE (byte_after_cr);
2494           id = charset_ascii;
2495           if (cmp_status->state != COMPOSING_NO)
2496             {
2497               if (cmp_status->old_form)
2498                 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2499               else if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2500                 cmp_status->ncomps--;
2501             }
2502         }
2503       else
2504         {
2505           int nchars, nbytes;
2506           /* emacs_mule_char can load a charset map from a file, which
2507              allocates a large structure and might cause buffer text
2508              to be relocated as result.  Thus, we need to remember the
2509              original pointer to buffer text, and fixup all related
2510              pointers after the call.  */
2511           const unsigned char *orig = coding->source;
2512           EMACS_INT offset;
2513
2514           c = emacs_mule_char (coding, src_base, &nbytes, &nchars, &id,
2515                                cmp_status);
2516           offset = coding->source - orig;
2517           if (offset)
2518             {
2519               src += offset;
2520               src_base += offset;
2521               src_end += offset;
2522             }
2523           if (c < 0)
2524             {
2525               if (c == -1)
2526                 goto invalid_code;
2527               if (c == -2)
2528                 break;
2529             }
2530           src = src_base + nbytes;
2531           consumed_chars = consumed_chars_base + nchars;
2532           if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2533             cmp_status->ncomps -= nchars;
2534         }
2535
2536       /* Now if C >= 0, we found a normally encoded characer, if C <
2537          0, we found an old-style composition component character or
2538          rule.  */
2539
2540       if (cmp_status->state == COMPOSING_NO)
2541         {
2542           if (last_id != id)
2543             {
2544               if (last_id != charset_ascii)
2545                 ADD_CHARSET_DATA (charbuf, char_offset - last_offset,
2546                                   last_id);
2547               last_id = id;
2548               last_offset = char_offset;
2549             }
2550           *charbuf++ = c;
2551           char_offset++;
2552         }
2553       else if (cmp_status->state == COMPOSING_CHAR)
2554         {
2555           if (cmp_status->old_form)
2556             {
2557               if (c >= 0)
2558                 {
2559                   EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2560                   *charbuf++ = c;
2561                   char_offset++;
2562                 }
2563               else
2564                 {
2565                   *charbuf++ = -c;
2566                   cmp_status->nchars++;
2567                   cmp_status->length++;
2568                   if (cmp_status->nchars == MAX_COMPOSITION_COMPONENTS)
2569                     EMACS_MULE_COMPOSITION_END ();
2570                   else if (cmp_status->method == COMPOSITION_WITH_RULE)
2571                     cmp_status->state = COMPOSING_RULE;
2572                 }
2573             }
2574           else
2575             {
2576               *charbuf++ = c;
2577               cmp_status->length++;
2578               cmp_status->nchars--;
2579               if (cmp_status->nchars == 0)
2580                 EMACS_MULE_COMPOSITION_END ();
2581             }
2582         }
2583       else if (cmp_status->state == COMPOSING_RULE)
2584         {
2585           int rule;
2586
2587           if (c >= 0)
2588             {
2589               EMACS_MULE_COMPOSITION_END ();
2590               *charbuf++ = c;
2591               char_offset++;
2592             }
2593           else
2594             {
2595               c = -c;
2596               DECODE_EMACS_MULE_COMPOSITION_RULE_20 (c, rule);
2597               if (rule < 0)
2598                 goto invalid_code;
2599               *charbuf++ = -2;
2600               *charbuf++ = rule;
2601               cmp_status->length += 2;
2602               cmp_status->state = COMPOSING_CHAR;
2603             }
2604         }
2605       else if (cmp_status->state == COMPOSING_COMPONENT_CHAR)
2606         {
2607           *charbuf++ = c;
2608           cmp_status->length++;
2609           if (cmp_status->ncomps == 0)
2610             cmp_status->state = COMPOSING_CHAR;
2611           else if (cmp_status->ncomps > 0)
2612             {
2613               if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)
2614                 cmp_status->state = COMPOSING_COMPONENT_RULE;
2615             }
2616           else
2617             EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2618         }
2619       else                      /* COMPOSING_COMPONENT_RULE */
2620         {
2621           int rule;
2622
2623           DECODE_EMACS_MULE_COMPOSITION_RULE_21 (c, rule);
2624           if (rule < 0)
2625             goto invalid_code;
2626           *charbuf++ = -2;
2627           *charbuf++ = rule;
2628           cmp_status->length += 2;
2629           cmp_status->ncomps--;
2630           if (cmp_status->ncomps > 0)
2631             cmp_status->state = COMPOSING_COMPONENT_CHAR;
2632           else
2633             EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2634         }
2635       continue;
2636
2637     retry:
2638       src = src_base;
2639       consumed_chars = consumed_chars_base;
2640       continue;
2641
2642     invalid_code:
2643       EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2644       src = src_base;
2645       consumed_chars = consumed_chars_base;
2646       ONE_MORE_BYTE (c);
2647       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2648       char_offset++;
2649       coding->errors++;
2650     }
2651
2652  no_more_source:
2653   if (cmp_status->state != COMPOSING_NO)
2654     {
2655       if (coding->mode & CODING_MODE_LAST_BLOCK)
2656         EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2657       else
2658         {
2659           int i;
2660
2661           charbuf -= cmp_status->length;
2662           for (i = 0; i < cmp_status->length; i++)
2663             cmp_status->carryover[i] = charbuf[i];
2664         }
2665     }
2666   if (last_id != charset_ascii)
2667     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2668   coding->consumed_char += consumed_chars_base;
2669   coding->consumed = src_base - coding->source;
2670   coding->charbuf_used = charbuf - coding->charbuf;
2671 }
2672
2673
2674 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2675   do {                                          \
2676     if (id < 0xA0)                              \
2677       codes[0] = id, codes[1] = 0;              \
2678     else if (id < 0xE0)                         \
2679       codes[0] = 0x9A, codes[1] = id;           \
2680     else if (id < 0xF0)                         \
2681       codes[0] = 0x9B, codes[1] = id;           \
2682     else if (id < 0xF5)                         \
2683       codes[0] = 0x9C, codes[1] = id;           \
2684     else                                        \
2685       codes[0] = 0x9D, codes[1] = id;           \
2686   } while (0);
2687
2688
2689 static int
2690 encode_coding_emacs_mule (struct coding_system *coding)
2691 {
2692   int multibytep = coding->dst_multibyte;
2693   int *charbuf = coding->charbuf;
2694   int *charbuf_end = charbuf + coding->charbuf_used;
2695   unsigned char *dst = coding->destination + coding->produced;
2696   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2697   int safe_room = 8;
2698   int produced_chars = 0;
2699   Lisp_Object attrs, charset_list;
2700   int c;
2701   int preferred_charset_id = -1;
2702
2703   CODING_GET_INFO (coding, attrs, charset_list);
2704   if (! EQ (charset_list, Vemacs_mule_charset_list))
2705     {
2706       CODING_ATTR_CHARSET_LIST (attrs)
2707         = charset_list = Vemacs_mule_charset_list;
2708     }
2709
2710   while (charbuf < charbuf_end)
2711     {
2712       ASSURE_DESTINATION (safe_room);
2713       c = *charbuf++;
2714
2715       if (c < 0)
2716         {
2717           /* Handle an annotation.  */
2718           switch (*charbuf)
2719             {
2720             case CODING_ANNOTATE_COMPOSITION_MASK:
2721               /* Not yet implemented.  */
2722               break;
2723             case CODING_ANNOTATE_CHARSET_MASK:
2724               preferred_charset_id = charbuf[3];
2725               if (preferred_charset_id >= 0
2726                   && NILP (Fmemq (make_number (preferred_charset_id),
2727                                   charset_list)))
2728                 preferred_charset_id = -1;
2729               break;
2730             default:
2731               abort ();
2732             }
2733           charbuf += -c - 1;
2734           continue;
2735         }
2736
2737       if (ASCII_CHAR_P (c))
2738         EMIT_ONE_ASCII_BYTE (c);
2739       else if (CHAR_BYTE8_P (c))
2740         {
2741           c = CHAR_TO_BYTE8 (c);
2742           EMIT_ONE_BYTE (c);
2743         }
2744       else
2745         {
2746           struct charset *charset;
2747           unsigned code;
2748           int dimension;
2749           int emacs_mule_id;
2750           unsigned char leading_codes[2];
2751
2752           if (preferred_charset_id >= 0)
2753             {
2754               charset = CHARSET_FROM_ID (preferred_charset_id);
2755               if (CHAR_CHARSET_P (c, charset))
2756                 code = ENCODE_CHAR (charset, c);
2757               else
2758                 charset = char_charset (c, charset_list, &code);
2759             }
2760           else
2761             charset = char_charset (c, charset_list, &code);
2762           if (! charset)
2763             {
2764               c = coding->default_char;
2765               if (ASCII_CHAR_P (c))
2766                 {
2767                   EMIT_ONE_ASCII_BYTE (c);
2768                   continue;
2769                 }
2770               charset = char_charset (c, charset_list, &code);
2771             }
2772           dimension = CHARSET_DIMENSION (charset);
2773           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2774           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2775           EMIT_ONE_BYTE (leading_codes[0]);
2776           if (leading_codes[1])
2777             EMIT_ONE_BYTE (leading_codes[1]);
2778           if (dimension == 1)
2779             EMIT_ONE_BYTE (code | 0x80);
2780           else
2781             {
2782               code |= 0x8080;
2783               EMIT_ONE_BYTE (code >> 8);
2784               EMIT_ONE_BYTE (code & 0xFF);
2785             }
2786         }
2787     }
2788   record_conversion_result (coding, CODING_RESULT_SUCCESS);
2789   coding->produced_char += produced_chars;
2790   coding->produced = dst - coding->destination;
2791   return 0;
2792 }
2793
2794 \f
2795 /*** 7. ISO2022 handlers ***/
2796
2797 /* The following note describes the coding system ISO2022 briefly.
2798    Since the intention of this note is to help understand the
2799    functions in this file, some parts are NOT ACCURATE or are OVERLY
2800    SIMPLIFIED.  For thorough understanding, please refer to the
2801    original document of ISO2022.  This is equivalent to the standard
2802    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2803
2804    ISO2022 provides many mechanisms to encode several character sets
2805    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2806    is encoded using bytes less than 128.  This may make the encoded
2807    text a little bit longer, but the text passes more easily through
2808    several types of gateway, some of which strip off the MSB (Most
2809    Significant Bit).
2810
2811    There are two kinds of character sets: control character sets and
2812    graphic character sets.  The former contain control characters such
2813    as `newline' and `escape' to provide control functions (control
2814    functions are also provided by escape sequences).  The latter
2815    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2816    two control character sets and many graphic character sets.
2817
2818    Graphic character sets are classified into one of the following
2819    four classes, according to the number of bytes (DIMENSION) and
2820    number of characters in one dimension (CHARS) of the set:
2821    - DIMENSION1_CHARS94
2822    - DIMENSION1_CHARS96
2823    - DIMENSION2_CHARS94
2824    - DIMENSION2_CHARS96
2825
2826    In addition, each character set is assigned an identification tag,
2827    unique for each set, called the "final character" (denoted as <F>
2828    hereafter).  The <F> of each character set is decided by ECMA(*)
2829    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2830    (0x30..0x3F are for private use only).
2831
2832    Note (*): ECMA = European Computer Manufacturers Association
2833
2834    Here are examples of graphic character sets [NAME(<F>)]:
2835         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2836         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2837         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2838         o DIMENSION2_CHARS96 -- none for the moment
2839
2840    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2841         C0 [0x00..0x1F] -- control character plane 0
2842         GL [0x20..0x7F] -- graphic character plane 0
2843         C1 [0x80..0x9F] -- control character plane 1
2844         GR [0xA0..0xFF] -- graphic character plane 1
2845
2846    A control character set is directly designated and invoked to C0 or
2847    C1 by an escape sequence.  The most common case is that:
2848    - ISO646's  control character set is designated/invoked to C0, and
2849    - ISO6429's control character set is designated/invoked to C1,
2850    and usually these designations/invocations are omitted in encoded
2851    text.  In a 7-bit environment, only C0 can be used, and a control
2852    character for C1 is encoded by an appropriate escape sequence to
2853    fit into the environment.  All control characters for C1 are
2854    defined to have corresponding escape sequences.
2855
2856    A graphic character set is at first designated to one of four
2857    graphic registers (G0 through G3), then these graphic registers are
2858    invoked to GL or GR.  These designations and invocations can be
2859    done independently.  The most common case is that G0 is invoked to
2860    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2861    these invocations and designations are omitted in encoded text.
2862    In a 7-bit environment, only GL can be used.
2863
2864    When a graphic character set of CHARS94 is invoked to GL, codes
2865    0x20 and 0x7F of the GL area work as control characters SPACE and
2866    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2867    be used.
2868
2869    There are two ways of invocation: locking-shift and single-shift.
2870    With locking-shift, the invocation lasts until the next different
2871    invocation, whereas with single-shift, the invocation affects the
2872    following character only and doesn't affect the locking-shift
2873    state.  Invocations are done by the following control characters or
2874    escape sequences:
2875
2876    ----------------------------------------------------------------------
2877    abbrev  function                  cntrl escape seq   description
2878    ----------------------------------------------------------------------
2879    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2880    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2881    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2882    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2883    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2884    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2885    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2886    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2887    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2888    ----------------------------------------------------------------------
2889    (*) These are not used by any known coding system.
2890
2891    Control characters for these functions are defined by macros
2892    ISO_CODE_XXX in `coding.h'.
2893
2894    Designations are done by the following escape sequences:
2895    ----------------------------------------------------------------------
2896    escape sequence      description
2897    ----------------------------------------------------------------------
2898    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2899    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2900    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2901    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2902    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2903    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2904    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2905    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2906    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2907    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2908    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2909    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2910    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2911    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2912    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2913    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2914    ----------------------------------------------------------------------
2915
2916    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2917    of dimension 1, chars 94, and final character <F>, etc...
2918
2919    Note (*): Although these designations are not allowed in ISO2022,
2920    Emacs accepts them on decoding, and produces them on encoding
2921    CHARS96 character sets in a coding system which is characterized as
2922    7-bit environment, non-locking-shift, and non-single-shift.
2923
2924    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2925    '(' must be omitted.  We refer to this as "short-form" hereafter.
2926
2927    Now you may notice that there are a lot of ways of encoding the
2928    same multilingual text in ISO2022.  Actually, there exist many
2929    coding systems such as Compound Text (used in X11's inter client
2930    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2931    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2932    localized platforms), and all of these are variants of ISO2022.
2933
2934    In addition to the above, Emacs handles two more kinds of escape
2935    sequences: ISO6429's direction specification and Emacs' private
2936    sequence for specifying character composition.
2937
2938    ISO6429's direction specification takes the following form:
2939         o CSI ']'      -- end of the current direction
2940         o CSI '0' ']'  -- end of the current direction
2941         o CSI '1' ']'  -- start of left-to-right text
2942         o CSI '2' ']'  -- start of right-to-left text
2943    The control character CSI (0x9B: control sequence introducer) is
2944    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2945
2946    Character composition specification takes the following form:
2947         o ESC '0' -- start relative composition
2948         o ESC '1' -- end composition
2949         o ESC '2' -- start rule-base composition (*)
2950         o ESC '3' -- start relative composition with alternate chars  (**)
2951         o ESC '4' -- start rule-base composition with alternate chars  (**)
2952   Since these are not standard escape sequences of any ISO standard,
2953   the use of them with these meanings is restricted to Emacs only.
2954
2955   (*) This form is used only in Emacs 20.7 and older versions,
2956   but newer versions can safely decode it.
2957   (**) This form is used only in Emacs 21.1 and newer versions,
2958   and older versions can't decode it.
2959
2960   Here's a list of example usages of these composition escape
2961   sequences (categorized by `enum composition_method').
2962
2963   COMPOSITION_RELATIVE:
2964         ESC 0 CHAR [ CHAR ] ESC 1
2965   COMPOSITION_WITH_RULE:
2966         ESC 2 CHAR [ RULE CHAR ] ESC 1
2967   COMPOSITION_WITH_ALTCHARS:
2968         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2969   COMPOSITION_WITH_RULE_ALTCHARS:
2970         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2971
2972 enum iso_code_class_type iso_code_class[256];
2973
2974 #define SAFE_CHARSET_P(coding, id)      \
2975   ((id) <= (coding)->max_charset_id     \
2976    && (coding)->safe_charsets[id] != 255)
2977
2978
2979 #define SHIFT_OUT_OK(category)  \
2980   (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2981
2982 static void
2983 setup_iso_safe_charsets (Lisp_Object attrs)
2984 {
2985   Lisp_Object charset_list, safe_charsets;
2986   Lisp_Object request;
2987   Lisp_Object reg_usage;
2988   Lisp_Object tail;
2989   int reg94, reg96;
2990   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2991   int max_charset_id;
2992
2993   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2994   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2995       && ! EQ (charset_list, Viso_2022_charset_list))
2996     {
2997       CODING_ATTR_CHARSET_LIST (attrs)
2998         = charset_list = Viso_2022_charset_list;
2999       ASET (attrs, coding_attr_safe_charsets, Qnil);
3000     }
3001
3002   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
3003     return;
3004
3005   max_charset_id = 0;
3006   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
3007     {
3008       int id = XINT (XCAR (tail));
3009       if (max_charset_id < id)
3010         max_charset_id = id;
3011     }
3012
3013   safe_charsets = make_uninit_string (max_charset_id + 1);
3014   memset (SDATA (safe_charsets), 255, max_charset_id + 1);
3015   request = AREF (attrs, coding_attr_iso_request);
3016   reg_usage = AREF (attrs, coding_attr_iso_usage);
3017   reg94 = XINT (XCAR (reg_usage));
3018   reg96 = XINT (XCDR (reg_usage));
3019
3020   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
3021     {
3022       Lisp_Object id;
3023       Lisp_Object reg;
3024       struct charset *charset;
3025
3026       id = XCAR (tail);
3027       charset = CHARSET_FROM_ID (XINT (id));
3028       reg = Fcdr (Fassq (id, request));
3029       if (! NILP (reg))
3030         SSET (safe_charsets, XINT (id), XINT (reg));
3031       else if (charset->iso_chars_96)
3032         {
3033           if (reg96 < 4)
3034             SSET (safe_charsets, XINT (id), reg96);
3035         }
3036       else
3037         {
3038           if (reg94 < 4)
3039             SSET (safe_charsets, XINT (id), reg94);
3040         }
3041     }
3042   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
3043 }
3044
3045
3046 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3047    Check if a text is encoded in one of ISO-2022 based codig systems.
3048    If it is, return 1, else return 0.  */
3049
3050 static int
3051 detect_coding_iso_2022 (struct coding_system *coding, struct coding_detection_info *detect_info)
3052 {
3053   const unsigned char *src = coding->source, *src_base = src;
3054   const unsigned char *src_end = coding->source + coding->src_bytes;
3055   int multibytep = coding->src_multibyte;
3056   int single_shifting = 0;
3057   int id;
3058   int c, c1;
3059   int consumed_chars = 0;
3060   int i;
3061   int rejected = 0;
3062   int found = 0;
3063   int composition_count = -1;
3064
3065   detect_info->checked |= CATEGORY_MASK_ISO;
3066
3067   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
3068     {
3069       struct coding_system *this = &(coding_categories[i]);
3070       Lisp_Object attrs, val;
3071
3072       if (this->id < 0)
3073         continue;
3074       attrs = CODING_ID_ATTRS (this->id);
3075       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
3076           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Viso_2022_charset_list))
3077         setup_iso_safe_charsets (attrs);
3078       val = CODING_ATTR_SAFE_CHARSETS (attrs);
3079       this->max_charset_id = SCHARS (val) - 1;
3080       this->safe_charsets = SDATA (val);
3081     }
3082
3083   /* A coding system of this category is always ASCII compatible.  */
3084   src += coding->head_ascii;
3085
3086   while (rejected != CATEGORY_MASK_ISO)
3087     {
3088       src_base = src;
3089       ONE_MORE_BYTE (c);
3090       switch (c)
3091         {
3092         case ISO_CODE_ESC:
3093           if (inhibit_iso_escape_detection)
3094             break;
3095           single_shifting = 0;
3096           ONE_MORE_BYTE (c);
3097           if (c >= '(' && c <= '/')
3098             {
3099               /* Designation sequence for a charset of dimension 1.  */
3100               ONE_MORE_BYTE (c1);
3101               if (c1 < ' ' || c1 >= 0x80
3102                   || (id = iso_charset_table[0][c >= ','][c1]) < 0)
3103                 /* Invalid designation sequence.  Just ignore.  */
3104                 break;
3105             }
3106           else if (c == '$')
3107             {
3108               /* Designation sequence for a charset of dimension 2.  */
3109               ONE_MORE_BYTE (c);
3110               if (c >= '@' && c <= 'B')
3111                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
3112                 id = iso_charset_table[1][0][c];
3113               else if (c >= '(' && c <= '/')
3114                 {
3115                   ONE_MORE_BYTE (c1);
3116                   if (c1 < ' ' || c1 >= 0x80
3117                       || (id = iso_charset_table[1][c >= ','][c1]) < 0)
3118                     /* Invalid designation sequence.  Just ignore.  */
3119                     break;
3120                 }
3121               else
3122                 /* Invalid designation sequence.  Just ignore it.  */
3123                 break;
3124             }
3125           else if (c == 'N' || c == 'O')
3126             {
3127               /* ESC <Fe> for SS2 or SS3.  */
3128               single_shifting = 1;
3129               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
3130               break;
3131             }
3132           else if (c == '1')
3133             {
3134               /* End of composition.  */
3135               if (composition_count < 0
3136                   || composition_count > MAX_COMPOSITION_COMPONENTS)
3137                 /* Invalid */
3138                 break;
3139               composition_count = -1;
3140               found |= CATEGORY_MASK_ISO;
3141             }
3142           else if (c >= '0' && c <= '4')
3143             {
3144               /* ESC <Fp> for start/end composition.  */
3145               composition_count = 0;
3146               break;
3147             }
3148           else
3149             {
3150               /* Invalid escape sequence.  Just ignore it.  */
3151               break;
3152             }
3153
3154           /* We found a valid designation sequence for CHARSET.  */
3155           rejected |= CATEGORY_MASK_ISO_8BIT;
3156           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
3157                               id))
3158             found |= CATEGORY_MASK_ISO_7;
3159           else
3160             rejected |= CATEGORY_MASK_ISO_7;
3161           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
3162                               id))
3163             found |= CATEGORY_MASK_ISO_7_TIGHT;
3164           else
3165             rejected |= CATEGORY_MASK_ISO_7_TIGHT;
3166           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
3167                               id))
3168             found |= CATEGORY_MASK_ISO_7_ELSE;
3169           else
3170             rejected |= CATEGORY_MASK_ISO_7_ELSE;
3171           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
3172                               id))
3173             found |= CATEGORY_MASK_ISO_8_ELSE;
3174           else
3175             rejected |= CATEGORY_MASK_ISO_8_ELSE;
3176           break;
3177
3178         case ISO_CODE_SO:
3179         case ISO_CODE_SI:
3180           /* Locking shift out/in.  */
3181           if (inhibit_iso_escape_detection)
3182             break;
3183           single_shifting = 0;
3184           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
3185           break;
3186
3187         case ISO_CODE_CSI:
3188           /* Control sequence introducer.  */
3189           single_shifting = 0;
3190           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3191           found |= CATEGORY_MASK_ISO_8_ELSE;
3192           goto check_extra_latin;
3193
3194         case ISO_CODE_SS2:
3195         case ISO_CODE_SS3:
3196           /* Single shift.   */
3197           if (inhibit_iso_escape_detection)
3198             break;
3199           single_shifting = 0;
3200           rejected |= CATEGORY_MASK_ISO_7BIT;
3201           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3202               & CODING_ISO_FLAG_SINGLE_SHIFT)
3203             found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
3204           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
3205               & CODING_ISO_FLAG_SINGLE_SHIFT)
3206             found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
3207           if (single_shifting)
3208             break;
3209           goto check_extra_latin;
3210
3211         default:
3212           if (c < 0)
3213             continue;
3214           if (c < 0x80)
3215             {
3216               if (composition_count >= 0)
3217                 composition_count++;
3218               single_shifting = 0;
3219               break;
3220             }
3221           if (c >= 0xA0)
3222             {
3223               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3224               found |= CATEGORY_MASK_ISO_8_1;
3225               /* Check the length of succeeding codes of the range
3226                  0xA0..0FF.  If the byte length is even, we include
3227                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
3228                  only when we are not single shifting.  */
3229               if (! single_shifting
3230                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
3231                 {
3232                   int i = 1;
3233                   while (src < src_end)
3234                     {
3235                       src_base = src;
3236                       ONE_MORE_BYTE (c);
3237                       if (c < 0xA0)
3238                         {
3239                           src = src_base;
3240                           break;
3241                         }
3242                       i++;
3243                     }
3244
3245                   if (i & 1 && src < src_end)
3246                     {
3247                       rejected |= CATEGORY_MASK_ISO_8_2;
3248                       if (composition_count >= 0)
3249                         composition_count += i;
3250                     }
3251                   else
3252                     {
3253                       found |= CATEGORY_MASK_ISO_8_2;
3254                       if (composition_count >= 0)
3255                         composition_count += i / 2;
3256                     }
3257                 }
3258               break;
3259             }
3260         check_extra_latin:
3261           single_shifting = 0;
3262           if (! VECTORP (Vlatin_extra_code_table)
3263               || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3264             {
3265               rejected = CATEGORY_MASK_ISO;
3266               break;
3267             }
3268           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3269               & CODING_ISO_FLAG_LATIN_EXTRA)
3270             found |= CATEGORY_MASK_ISO_8_1;
3271           else
3272             rejected |= CATEGORY_MASK_ISO_8_1;
3273           rejected |= CATEGORY_MASK_ISO_8_2;
3274         }
3275     }
3276   detect_info->rejected |= CATEGORY_MASK_ISO;
3277   return 0;
3278
3279  no_more_source:
3280   detect_info->rejected |= rejected;
3281   detect_info->found |= (found & ~rejected);
3282   return 1;
3283 }
3284
3285
3286 /* Set designation state into CODING.  Set CHARS_96 to -1 if the
3287    escape sequence should be kept.  */
3288 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
3289   do {                                                                  \
3290     int id, prev;                                                       \
3291                                                                         \
3292     if (final < '0' || final >= 128                                     \
3293         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
3294         || !SAFE_CHARSET_P (coding, id))                                \
3295       {                                                                 \
3296         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
3297         chars_96 = -1;                                                  \
3298         break;                                                          \
3299       }                                                                 \
3300     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
3301     if (id == charset_jisx0201_roman)                                   \
3302       {                                                                 \
3303         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
3304           id = charset_ascii;                                           \
3305       }                                                                 \
3306     else if (id == charset_jisx0208_1978)                               \
3307       {                                                                 \
3308         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
3309           id = charset_jisx0208;                                        \
3310       }                                                                 \
3311     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
3312     /* If there was an invalid designation to REG previously, and this  \
3313        designation is ASCII to REG, we should keep this designation     \
3314        sequence.  */                                                    \
3315     if (prev == -2 && id == charset_ascii)                              \
3316       chars_96 = -1;                                                    \
3317   } while (0)
3318
3319
3320 /* Handle these composition sequence (ALT: alternate char):
3321
3322    (1) relative composition: ESC 0 CHAR ... ESC 1
3323    (2) rulebase composition: ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3324    (3) altchar composition:  ESC 3 ALT ... ALT ESC 0 CHAR ... ESC 1
3325    (4) alt&rule composition: ESC 4 ALT RULE ... ALT ESC 0 CHAR ... ESC 1
3326
3327    When the start sequence (ESC 0/2/3/4) is found, this annotation
3328    header is produced.
3329
3330         [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) 0 METHOD ]
3331
3332    Then, upon reading CHAR or RULE (one or two bytes), these codes are
3333    produced until the end sequence (ESC 1) is found:
3334
3335    (1) CHAR ... CHAR
3336    (2) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
3337    (3) ALT ... ALT -1 -1 CHAR ... CHAR
3338    (4) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT -1 -1 CHAR ... CHAR
3339
3340    When the end sequence (ESC 1) is found, LENGTH and NCHARS in the
3341    annotation header is updated as below:
3342
3343    (1) LENGTH: unchanged,  NCHARS: number of CHARs
3344    (2) LENGTH: unchanged,  NCHARS: number of CHARs
3345    (3) LENGTH: += number of ALTs + 2,  NCHARS: number of CHARs
3346    (4) LENGTH: += number of ALTs * 3,  NCHARS: number of CHARs
3347
3348    If an error is found while composing, the annotation header is
3349    changed to:
3350
3351         [ ESC '0'/'2'/'3'/'4' -2 0 ]
3352
3353    and the sequence [ -2 DECODED-RULE ] is changed to the original
3354    byte sequence as below:
3355         o the original byte sequence is B: [ B -1 ]
3356         o the original byte sequence is B1 B2: [ B1 B2 ]
3357    and the sequence [ -1 -1 ] is changed to the original byte
3358    sequence:
3359         [ ESC '0' ]
3360 */
3361
3362 /* Decode a composition rule C1 and maybe one more byte from the
3363    source, and set RULE to the encoded composition rule, NBYTES to the
3364    length of the composition rule.  If the rule is invalid, set RULE
3365    to some negative value.  */
3366
3367 #define DECODE_COMPOSITION_RULE(rule, nbytes)                           \
3368   do {                                                                  \
3369     rule = c1 - 32;                                                     \
3370     if (rule < 0)                                                       \
3371       break;                                                            \
3372     if (rule < 81)              /* old format (before ver.21) */        \
3373       {                                                                 \
3374         int gref = (rule) / 9;                                          \
3375         int nref = (rule) % 9;                                          \
3376         if (gref == 4) gref = 10;                                       \
3377         if (nref == 4) nref = 10;                                       \
3378         rule = COMPOSITION_ENCODE_RULE (gref, nref);                    \
3379         nbytes = 1;                                                     \
3380       }                                                                 \
3381     else                        /* new format (after ver.21) */         \
3382       {                                                                 \
3383         int c;                                                          \
3384                                                                         \
3385         ONE_MORE_BYTE (c);                                              \
3386         rule = COMPOSITION_ENCODE_RULE (rule - 81, c - 32);             \
3387         if (rule >= 0)                                                  \
3388           rule += 0x100;   /* to destinguish it from the old format */  \
3389         nbytes = 2;                                                     \
3390       }                                                                 \
3391   } while (0)
3392
3393 #define ENCODE_COMPOSITION_RULE(rule)                           \
3394   do {                                                          \
3395     int gref = (rule % 0x100) / 12, nref = (rule % 0x100) % 12; \
3396                                                                 \
3397     if (rule < 0x100)           /* old format */                \
3398       {                                                         \
3399         if (gref == 10) gref = 4;                               \
3400         if (nref == 10) nref = 4;                               \
3401         charbuf[idx] = 32 + gref * 9 + nref;                    \
3402         charbuf[idx + 1] = -1;                                  \
3403         new_chars++;                                            \
3404       }                                                         \
3405     else                                /* new format */        \
3406       {                                                         \
3407         charbuf[idx] = 32 + 81 + gref;                          \
3408         charbuf[idx + 1] = 32 + nref;                           \
3409         new_chars += 2;                                         \
3410       }                                                         \
3411   } while (0)
3412
3413 /* Finish the current composition as invalid.  */
3414
3415 static int finish_composition (int *, struct composition_status *);
3416
3417 static int
3418 finish_composition (int *charbuf, struct composition_status *cmp_status)
3419 {
3420   int idx = - cmp_status->length;
3421   int new_chars;
3422
3423   /* Recover the original ESC sequence */
3424   charbuf[idx++] = ISO_CODE_ESC;
3425   charbuf[idx++] = (cmp_status->method == COMPOSITION_RELATIVE ? '0'
3426                     : cmp_status->method == COMPOSITION_WITH_RULE ? '2'
3427                     : cmp_status->method == COMPOSITION_WITH_ALTCHARS ? '3'
3428                     /* cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS */
3429                     : '4');
3430   charbuf[idx++] = -2;
3431   charbuf[idx++] = 0;
3432   charbuf[idx++] = -1;
3433   new_chars = cmp_status->nchars;
3434   if (cmp_status->method >= COMPOSITION_WITH_RULE)
3435     for (; idx < 0; idx++)
3436       {
3437         int elt = charbuf[idx];
3438
3439         if (elt == -2)
3440           {
3441             ENCODE_COMPOSITION_RULE (charbuf[idx + 1]);
3442             idx++;
3443           }
3444         else if (elt == -1)
3445           {
3446             charbuf[idx++] = ISO_CODE_ESC;
3447             charbuf[idx] = '0';
3448             new_chars += 2;
3449           }
3450       }
3451   cmp_status->state = COMPOSING_NO;
3452   return new_chars;
3453 }
3454
3455 /* If characers are under composition, finish the composition.  */
3456 #define MAYBE_FINISH_COMPOSITION()                              \
3457   do {                                                          \
3458     if (cmp_status->state != COMPOSING_NO)                      \
3459       char_offset += finish_composition (charbuf, cmp_status);  \
3460   } while (0)
3461
3462 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
3463
3464    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
3465    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3466    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
3467    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
3468
3469    Produce this annotation sequence now:
3470
3471    [ -LENGTH(==-4) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) METHOD ]
3472 */
3473
3474 #define DECODE_COMPOSITION_START(c1)                                       \
3475   do {                                                                     \
3476     if (c1 == '0'                                                          \
3477         && ((cmp_status->state == COMPOSING_COMPONENT_CHAR                 \
3478              && cmp_status->method == COMPOSITION_WITH_ALTCHARS)           \
3479             || (cmp_status->state == COMPOSING_COMPONENT_RULE              \
3480                 && cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS))) \
3481       {                                                                    \
3482         *charbuf++ = -1;                                                   \
3483         *charbuf++= -1;                                                    \
3484         cmp_status->state = COMPOSING_CHAR;                                \
3485         cmp_status->length += 2;                                           \
3486       }                                                                    \
3487     else                                                                   \
3488       {                                                                    \
3489         MAYBE_FINISH_COMPOSITION ();                                       \
3490         cmp_status->method = (c1 == '0' ? COMPOSITION_RELATIVE             \
3491                               : c1 == '2' ? COMPOSITION_WITH_RULE          \
3492                               : c1 == '3' ? COMPOSITION_WITH_ALTCHARS      \
3493                               : COMPOSITION_WITH_RULE_ALTCHARS);           \
3494         cmp_status->state                                                  \
3495           = (c1 <= '2' ? COMPOSING_CHAR : COMPOSING_COMPONENT_CHAR);       \
3496         ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);          \
3497         cmp_status->length = MAX_ANNOTATION_LENGTH;                        \
3498         cmp_status->nchars = cmp_status->ncomps = 0;                       \
3499         coding->annotated = 1;                                             \
3500       }                                                                    \
3501   } while (0)
3502
3503
3504 /* Handle composition end sequence ESC 1.  */
3505
3506 #define DECODE_COMPOSITION_END()                                        \
3507   do {                                                                  \
3508     if (cmp_status->nchars == 0                                         \
3509         || ((cmp_status->state == COMPOSING_CHAR)                       \
3510             == (cmp_status->method == COMPOSITION_WITH_RULE)))          \
3511       {                                                                 \
3512         MAYBE_FINISH_COMPOSITION ();                                    \
3513         goto invalid_code;                                              \
3514       }                                                                 \
3515     if (cmp_status->method == COMPOSITION_WITH_ALTCHARS)                \
3516       charbuf[- cmp_status->length] -= cmp_status->ncomps + 2;          \
3517     else if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)      \
3518       charbuf[- cmp_status->length] -= cmp_status->ncomps * 3;          \
3519     charbuf[- cmp_status->length + 2] = cmp_status->nchars;             \
3520     char_offset += cmp_status->nchars;                                  \
3521     cmp_status->state = COMPOSING_NO;                                   \
3522   } while (0)
3523
3524 /* Store a composition rule RULE in charbuf, and update cmp_status.  */
3525
3526 #define STORE_COMPOSITION_RULE(rule)    \
3527   do {                                  \
3528     *charbuf++ = -2;                    \
3529     *charbuf++ = rule;                  \
3530     cmp_status->length += 2;            \
3531     cmp_status->state--;                \
3532   } while (0)
3533
3534 /* Store a composed char or a component char C in charbuf, and update
3535    cmp_status.  */
3536
3537 #define STORE_COMPOSITION_CHAR(c)                                       \
3538   do {                                                                  \
3539     *charbuf++ = (c);                                                   \
3540     cmp_status->length++;                                               \
3541     if (cmp_status->state == COMPOSING_CHAR)                            \
3542       cmp_status->nchars++;                                             \
3543     else                                                                \
3544       cmp_status->ncomps++;                                             \
3545     if (cmp_status->method == COMPOSITION_WITH_RULE                     \
3546         || (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS        \
3547             && cmp_status->state == COMPOSING_COMPONENT_CHAR))          \
3548       cmp_status->state++;                                              \
3549   } while (0)
3550
3551
3552 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
3553
3554 static void
3555 decode_coding_iso_2022 (struct coding_system *coding)
3556 {
3557   const unsigned char *src = coding->source + coding->consumed;
3558   const unsigned char *src_end = coding->source + coding->src_bytes;
3559   const unsigned char *src_base;
3560   int *charbuf = coding->charbuf + coding->charbuf_used;
3561   /* We may produce two annocations (charset and composition) in one
3562      loop and one more charset annocation at the end.  */
3563   int *charbuf_end
3564     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
3565   int consumed_chars = 0, consumed_chars_base;
3566   int multibytep = coding->src_multibyte;
3567   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
3568   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3569   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3570   int charset_id_2, charset_id_3;
3571   struct charset *charset;
3572   int c;
3573   struct composition_status *cmp_status = CODING_ISO_CMP_STATUS (coding);
3574   Lisp_Object attrs, charset_list;
3575   int char_offset = coding->produced_char;
3576   int last_offset = char_offset;
3577   int last_id = charset_ascii;
3578   int eol_crlf =
3579     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
3580   int byte_after_cr = -1;
3581   int i;
3582
3583   CODING_GET_INFO (coding, attrs, charset_list);
3584   setup_iso_safe_charsets (attrs);
3585   /* Charset list may have been changed.  */
3586   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3587   coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
3588
3589   if (cmp_status->state != COMPOSING_NO)
3590     {
3591       for (i = 0; i < cmp_status->length; i++)
3592         *charbuf++ = cmp_status->carryover[i];
3593       coding->annotated = 1;
3594     }
3595
3596   while (1)
3597     {
3598       int c1, c2, c3;
3599
3600       src_base = src;
3601       consumed_chars_base = consumed_chars;
3602
3603       if (charbuf >= charbuf_end)
3604         {
3605           if (byte_after_cr >= 0)
3606             src_base--;
3607           break;
3608         }
3609
3610       if (byte_after_cr >= 0)
3611         c1 = byte_after_cr, byte_after_cr = -1;
3612       else
3613         ONE_MORE_BYTE (c1);
3614       if (c1 < 0)
3615         goto invalid_code;
3616
3617       if (CODING_ISO_EXTSEGMENT_LEN (coding) > 0)
3618         {
3619           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3620           char_offset++;
3621           CODING_ISO_EXTSEGMENT_LEN (coding)--;
3622           continue;
3623         }
3624
3625       if (CODING_ISO_EMBEDDED_UTF_8 (coding))
3626         {
3627           if (c1 == ISO_CODE_ESC)
3628             {
3629               if (src + 1 >= src_end)
3630                 goto no_more_source;
3631               *charbuf++ = ISO_CODE_ESC;
3632               char_offset++;
3633               if (src[0] == '%' && src[1] == '@')
3634                 {
3635                   src += 2;
3636                   consumed_chars += 2;
3637                   char_offset += 2;
3638                   /* We are sure charbuf can contain two more chars. */
3639                   *charbuf++ = '%';
3640                   *charbuf++ = '@';
3641                   CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
3642                 }
3643             }
3644           else
3645             {
3646               *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3647               char_offset++;
3648             }
3649           continue;
3650         }
3651
3652       if ((cmp_status->state == COMPOSING_RULE
3653            || cmp_status->state == COMPOSING_COMPONENT_RULE)
3654           && c1 != ISO_CODE_ESC)
3655         {
3656           int rule, nbytes;
3657
3658           DECODE_COMPOSITION_RULE (rule, nbytes);
3659           if (rule < 0)
3660             goto invalid_code;
3661           STORE_COMPOSITION_RULE (rule);
3662           continue;
3663         }
3664
3665       /* We produce at most one character.  */
3666       switch (iso_code_class [c1])
3667         {
3668         case ISO_0x20_or_0x7F:
3669           if (charset_id_0 < 0
3670               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
3671             /* This is SPACE or DEL.  */
3672             charset = CHARSET_FROM_ID (charset_ascii);
3673           else
3674             charset = CHARSET_FROM_ID (charset_id_0);
3675           break;
3676
3677         case ISO_graphic_plane_0:
3678           if (charset_id_0 < 0)
3679             charset = CHARSET_FROM_ID (charset_ascii);
3680           else
3681             charset = CHARSET_FROM_ID (charset_id_0);
3682           break;
3683
3684         case ISO_0xA0_or_0xFF:
3685           if (charset_id_1 < 0
3686               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
3687               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3688             goto invalid_code;
3689           /* This is a graphic character, we fall down ... */
3690
3691         case ISO_graphic_plane_1:
3692           if (charset_id_1 < 0)
3693             goto invalid_code;
3694           charset = CHARSET_FROM_ID (charset_id_1);
3695           break;
3696
3697         case ISO_control_0:
3698           if (eol_crlf && c1 == '\r')
3699             ONE_MORE_BYTE (byte_after_cr);
3700           MAYBE_FINISH_COMPOSITION ();
3701           charset = CHARSET_FROM_ID (charset_ascii);
3702           break;
3703
3704         case ISO_control_1:
3705           goto invalid_code;
3706
3707         case ISO_shift_out:
3708           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3709               || CODING_ISO_DESIGNATION (coding, 1) < 0)
3710             goto invalid_code;
3711           CODING_ISO_INVOCATION (coding, 0) = 1;
3712           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3713           continue;
3714
3715         case ISO_shift_in:
3716           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3717             goto invalid_code;
3718           CODING_ISO_INVOCATION (coding, 0) = 0;
3719           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3720           continue;
3721
3722         case ISO_single_shift_2_7:
3723           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS))
3724             goto invalid_code;
3725         case ISO_single_shift_2:
3726           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3727             goto invalid_code;
3728           /* SS2 is handled as an escape sequence of ESC 'N' */
3729           c1 = 'N';
3730           goto label_escape_sequence;
3731
3732         case ISO_single_shift_3:
3733           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3734             goto invalid_code;
3735           /* SS2 is handled as an escape sequence of ESC 'O' */
3736           c1 = 'O';
3737           goto label_escape_sequence;
3738
3739         case ISO_control_sequence_introducer:
3740           /* CSI is handled as an escape sequence of ESC '[' ...  */
3741           c1 = '[';
3742           goto label_escape_sequence;
3743
3744         case ISO_escape:
3745           ONE_MORE_BYTE (c1);
3746         label_escape_sequence:
3747           /* Escape sequences handled here are invocation,
3748              designation, direction specification, and character
3749              composition specification.  */
3750           switch (c1)
3751             {
3752             case '&':           /* revision of following character set */
3753               ONE_MORE_BYTE (c1);
3754               if (!(c1 >= '@' && c1 <= '~'))
3755                 goto invalid_code;
3756               ONE_MORE_BYTE (c1);
3757               if (c1 != ISO_CODE_ESC)
3758                 goto invalid_code;
3759               ONE_MORE_BYTE (c1);
3760               goto label_escape_sequence;
3761
3762             case '$':           /* designation of 2-byte character set */
3763               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3764                 goto invalid_code;
3765               {
3766                 int reg, chars96;
3767
3768                 ONE_MORE_BYTE (c1);
3769                 if (c1 >= '@' && c1 <= 'B')
3770                   {     /* designation of JISX0208.1978, GB2312.1980,
3771                            or JISX0208.1980 */
3772                     reg = 0, chars96 = 0;
3773                   }
3774                 else if (c1 >= 0x28 && c1 <= 0x2B)
3775                   { /* designation of DIMENSION2_CHARS94 character set */
3776                     reg = c1 - 0x28, chars96 = 0;
3777                     ONE_MORE_BYTE (c1);
3778                   }
3779                 else if (c1 >= 0x2C && c1 <= 0x2F)
3780                   { /* designation of DIMENSION2_CHARS96 character set */
3781                     reg = c1 - 0x2C, chars96 = 1;
3782                     ONE_MORE_BYTE (c1);
3783                   }
3784                 else
3785                   goto invalid_code;
3786                 DECODE_DESIGNATION (reg, 2, chars96, c1);
3787                 /* We must update these variables now.  */
3788                 if (reg == 0)
3789                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3790                 else if (reg == 1)
3791                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3792                 if (chars96 < 0)
3793                   goto invalid_code;
3794               }
3795               continue;
3796
3797             case 'n':           /* invocation of locking-shift-2 */
3798               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3799                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3800                 goto invalid_code;
3801               CODING_ISO_INVOCATION (coding, 0) = 2;
3802               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3803               continue;
3804
3805             case 'o':           /* invocation of locking-shift-3 */
3806               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3807                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3808                 goto invalid_code;
3809               CODING_ISO_INVOCATION (coding, 0) = 3;
3810               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3811               continue;
3812
3813             case 'N':           /* invocation of single-shift-2 */
3814               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3815                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3816                 goto invalid_code;
3817               charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3818               if (charset_id_2 < 0)
3819                 charset = CHARSET_FROM_ID (charset_ascii);
3820               else
3821                 charset = CHARSET_FROM_ID (charset_id_2);
3822               ONE_MORE_BYTE (c1);
3823               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3824                 goto invalid_code;
3825               break;
3826
3827             case 'O':           /* invocation of single-shift-3 */
3828               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3829                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3830                 goto invalid_code;
3831               charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3832               if (charset_id_3 < 0)
3833                 charset = CHARSET_FROM_ID (charset_ascii);
3834               else
3835                 charset = CHARSET_FROM_ID (charset_id_3);
3836               ONE_MORE_BYTE (c1);
3837               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3838                 goto invalid_code;
3839               break;
3840
3841             case '0': case '2': case '3': case '4': /* start composition */
3842               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3843                 goto invalid_code;
3844               if (last_id != charset_ascii)
3845                 {
3846                   ADD_CHARSET_DATA (charbuf, char_offset- last_offset, last_id);
3847                   last_id = charset_ascii;
3848                   last_offset = char_offset;
3849                 }
3850               DECODE_COMPOSITION_START (c1);
3851               continue;
3852
3853             case '1':           /* end composition */
3854               if (cmp_status->state == COMPOSING_NO)
3855                 goto invalid_code;
3856               DECODE_COMPOSITION_END ();
3857               continue;
3858
3859             case '[':           /* specification of direction */
3860               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION))
3861                 goto invalid_code;
3862               /* For the moment, nested direction is not supported.
3863                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3864                  left-to-right, and nozero means right-to-left.  */
3865               ONE_MORE_BYTE (c1);
3866               switch (c1)
3867                 {
3868                 case ']':       /* end of the current direction */
3869                   coding->mode &= ~CODING_MODE_DIRECTION;
3870
3871                 case '0':       /* end of the current direction */
3872                 case '1':       /* start of left-to-right direction */
3873                   ONE_MORE_BYTE (c1);
3874                   if (c1 == ']')
3875                     coding->mode &= ~CODING_MODE_DIRECTION;
3876                   else
3877                     goto invalid_code;
3878                   break;
3879
3880                 case '2':       /* start of right-to-left direction */
3881                   ONE_MORE_BYTE (c1);
3882                   if (c1 == ']')
3883                     coding->mode |= CODING_MODE_DIRECTION;
3884                   else
3885                     goto invalid_code;
3886                   break;
3887
3888                 default:
3889                   goto invalid_code;
3890                 }
3891               continue;
3892
3893             case '%':
3894               ONE_MORE_BYTE (c1);
3895               if (c1 == '/')
3896                 {
3897                   /* CTEXT extended segment:
3898                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3899                      We keep these bytes as is for the moment.
3900                      They may be decoded by post-read-conversion.  */
3901                   int dim, M, L;
3902                   int size;
3903
3904                   ONE_MORE_BYTE (dim);
3905                   if (dim < 0 || dim > 4)
3906                     goto invalid_code;
3907                   ONE_MORE_BYTE (M);
3908                   if (M < 128)
3909                     goto invalid_code;
3910                   ONE_MORE_BYTE (L);
3911                   if (L < 128)
3912                     goto invalid_code;
3913                   size = ((M - 128) * 128) + (L - 128);
3914                   if (charbuf + 6 > charbuf_end)
3915                     goto break_loop;
3916                   *charbuf++ = ISO_CODE_ESC;
3917                   *charbuf++ = '%';
3918                   *charbuf++ = '/';
3919                   *charbuf++ = dim;
3920                   *charbuf++ = BYTE8_TO_CHAR (M);
3921                   *charbuf++ = BYTE8_TO_CHAR (L);
3922                   CODING_ISO_EXTSEGMENT_LEN (coding) = size;
3923                 }
3924               else if (c1 == 'G')
3925                 {
3926                   /* XFree86 extension for embedding UTF-8 in CTEXT:
3927                      ESC % G --UTF-8-BYTES-- ESC % @
3928                      We keep these bytes as is for the moment.
3929                      They may be decoded by post-read-conversion.  */
3930                   if (charbuf + 3 > charbuf_end)
3931                     goto break_loop;
3932                   *charbuf++ = ISO_CODE_ESC;
3933                   *charbuf++ = '%';
3934                   *charbuf++ = 'G';
3935                   CODING_ISO_EMBEDDED_UTF_8 (coding) = 1;
3936                 }
3937               else
3938                 goto invalid_code;
3939               continue;
3940               break;
3941
3942             default:
3943               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3944                 goto invalid_code;
3945               {
3946                 int reg, chars96;
3947
3948                 if (c1 >= 0x28 && c1 <= 0x2B)
3949                   { /* designation of DIMENSION1_CHARS94 character set */
3950                     reg = c1 - 0x28, chars96 = 0;
3951                     ONE_MORE_BYTE (c1);
3952                   }
3953                 else if (c1 >= 0x2C && c1 <= 0x2F)
3954                   { /* designation of DIMENSION1_CHARS96 character set */
3955                     reg = c1 - 0x2C, chars96 = 1;
3956                     ONE_MORE_BYTE (c1);
3957                   }
3958                 else
3959                   goto invalid_code;
3960                 DECODE_DESIGNATION (reg, 1, chars96, c1);
3961                 /* We must update these variables now.  */
3962                 if (reg == 0)
3963                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3964                 else if (reg == 1)
3965                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3966                 if (chars96 < 0)
3967                   goto invalid_code;
3968               }
3969               continue;
3970             }
3971         }
3972
3973       if (cmp_status->state == COMPOSING_NO
3974           && charset->id != charset_ascii
3975           && last_id != charset->id)
3976         {
3977           if (last_id != charset_ascii)
3978             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3979           last_id = charset->id;
3980           last_offset = char_offset;
3981         }
3982
3983       /* Now we know CHARSET and 1st position code C1 of a character.
3984          Produce a decoded character while getting 2nd and 3rd
3985          position codes C2, C3 if necessary.  */
3986       if (CHARSET_DIMENSION (charset) > 1)
3987         {
3988           ONE_MORE_BYTE (c2);
3989           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0)
3990               || ((c1 & 0x80) != (c2 & 0x80)))
3991             /* C2 is not in a valid range.  */
3992             goto invalid_code;
3993           if (CHARSET_DIMENSION (charset) == 2)
3994             c1 = (c1 << 8) | c2;
3995           else
3996             {
3997               ONE_MORE_BYTE (c3);
3998               if (c3 < 0x20 || (c3 >= 0x80 && c3 < 0xA0)
3999                   || ((c1 & 0x80) != (c3 & 0x80)))
4000                 /* C3 is not in a valid range.  */
4001                 goto invalid_code;
4002               c1 = (c1 << 16) | (c2 << 8) | c2;
4003             }
4004         }
4005       c1 &= 0x7F7F7F;
4006       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
4007       if (c < 0)
4008         {
4009           MAYBE_FINISH_COMPOSITION ();
4010           for (; src_base < src; src_base++, char_offset++)
4011             {
4012               if (ASCII_BYTE_P (*src_base))
4013                 *charbuf++ = *src_base;
4014               else
4015                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
4016             }
4017         }
4018       else if (cmp_status->state == COMPOSING_NO)
4019         {
4020           *charbuf++ = c;
4021           char_offset++;
4022         }
4023       else if ((cmp_status->state == COMPOSING_CHAR
4024                 ? cmp_status->nchars
4025                 : cmp_status->ncomps)
4026                >= MAX_COMPOSITION_COMPONENTS)
4027         {
4028           /* Too long composition.  */
4029           MAYBE_FINISH_COMPOSITION ();
4030           *charbuf++ = c;
4031           char_offset++;
4032         }
4033       else
4034         STORE_COMPOSITION_CHAR (c);
4035       continue;
4036
4037     invalid_code:
4038       MAYBE_FINISH_COMPOSITION ();
4039       src = src_base;
4040       consumed_chars = consumed_chars_base;
4041       ONE_MORE_BYTE (c);
4042       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4043       char_offset++;
4044       coding->errors++;
4045       continue;
4046
4047     break_loop:
4048       break;
4049     }
4050
4051  no_more_source:
4052   if (cmp_status->state != COMPOSING_NO)
4053     {
4054       if (coding->mode & CODING_MODE_LAST_BLOCK)
4055         MAYBE_FINISH_COMPOSITION ();
4056       else
4057         {
4058           charbuf -= cmp_status->length;
4059           for (i = 0; i < cmp_status->length; i++)
4060             cmp_status->carryover[i] = charbuf[i];
4061         }
4062     }
4063   else if (last_id != charset_ascii)
4064     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4065   coding->consumed_char += consumed_chars_base;
4066   coding->consumed = src_base - coding->source;
4067   coding->charbuf_used = charbuf - coding->charbuf;
4068 }
4069
4070
4071 /* ISO2022 encoding stuff.  */
4072
4073 /*
4074    It is not enough to say just "ISO2022" on encoding, we have to
4075    specify more details.  In Emacs, each coding system of ISO2022
4076    variant has the following specifications:
4077         1. Initial designation to G0 thru G3.
4078         2. Allows short-form designation?
4079         3. ASCII should be designated to G0 before control characters?
4080         4. ASCII should be designated to G0 at end of line?
4081         5. 7-bit environment or 8-bit environment?
4082         6. Use locking-shift?
4083         7. Use Single-shift?
4084    And the following two are only for Japanese:
4085         8. Use ASCII in place of JIS0201-1976-Roman?
4086         9. Use JISX0208-1983 in place of JISX0208-1978?
4087    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
4088    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
4089    details.
4090 */
4091
4092 /* Produce codes (escape sequence) for designating CHARSET to graphic
4093    register REG at DST, and increment DST.  If <final-char> of CHARSET is
4094    '@', 'A', or 'B' and the coding system CODING allows, produce
4095    designation sequence of short-form.  */
4096
4097 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
4098   do {                                                                  \
4099     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
4100     char *intermediate_char_94 = "()*+";                                \
4101     char *intermediate_char_96 = ",-./";                                \
4102     int revision = -1;                                                  \
4103     int c;                                                              \
4104                                                                         \
4105     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
4106       revision = CHARSET_ISO_REVISION (charset);                        \
4107                                                                         \
4108     if (revision >= 0)                                                  \
4109       {                                                                 \
4110         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
4111         EMIT_ONE_BYTE ('@' + revision);                                 \
4112       }                                                                 \
4113     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
4114     if (CHARSET_DIMENSION (charset) == 1)                               \
4115       {                                                                 \
4116         if (! CHARSET_ISO_CHARS_96 (charset))                           \
4117           c = intermediate_char_94[reg];                                \
4118         else                                                            \
4119           c = intermediate_char_96[reg];                                \
4120         EMIT_ONE_ASCII_BYTE (c);                                        \
4121       }                                                                 \
4122     else                                                                \
4123       {                                                                 \
4124         EMIT_ONE_ASCII_BYTE ('$');                                      \
4125         if (! CHARSET_ISO_CHARS_96 (charset))                           \
4126           {                                                             \
4127             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
4128                 || reg != 0                                             \
4129                 || final_char < '@' || final_char > 'B')                \
4130               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
4131           }                                                             \
4132         else                                                            \
4133           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
4134       }                                                                 \
4135     EMIT_ONE_ASCII_BYTE (final_char);                                   \
4136                                                                         \
4137     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
4138   } while (0)
4139
4140
4141 /* The following two macros produce codes (control character or escape
4142    sequence) for ISO2022 single-shift functions (single-shift-2 and
4143    single-shift-3).  */
4144
4145 #define ENCODE_SINGLE_SHIFT_2                                           \
4146   do {                                                                  \
4147     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
4148       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
4149     else                                                                \
4150       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
4151     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
4152   } while (0)
4153
4154
4155 #define ENCODE_SINGLE_SHIFT_3                                           \
4156   do {                                                                  \
4157     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
4158       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
4159     else                                                                \
4160       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
4161     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
4162   } while (0)
4163
4164
4165 /* The following four macros produce codes (control character or
4166    escape sequence) for ISO2022 locking-shift functions (shift-in,
4167    shift-out, locking-shift-2, and locking-shift-3).  */
4168
4169 #define ENCODE_SHIFT_IN                                 \
4170   do {                                                  \
4171     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
4172     CODING_ISO_INVOCATION (coding, 0) = 0;              \
4173   } while (0)
4174
4175
4176 #define ENCODE_SHIFT_OUT                                \
4177   do {                                                  \
4178     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
4179     CODING_ISO_INVOCATION (coding, 0) = 1;              \
4180   } while (0)
4181
4182
4183 #define ENCODE_LOCKING_SHIFT_2                          \
4184   do {                                                  \
4185     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
4186     CODING_ISO_INVOCATION (coding, 0) = 2;              \
4187   } while (0)
4188
4189
4190 #define ENCODE_LOCKING_SHIFT_3                          \
4191   do {                                                  \
4192     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
4193     CODING_ISO_INVOCATION (coding, 0) = 3;              \
4194   } while (0)
4195
4196
4197 /* Produce codes for a DIMENSION1 character whose character set is
4198    CHARSET and whose position-code is C1.  Designation and invocation
4199    sequences are also produced in advance if necessary.  */
4200
4201 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
4202   do {                                                                  \
4203     int id = CHARSET_ID (charset);                                      \
4204                                                                         \
4205     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
4206         && id == charset_ascii)                                         \
4207       {                                                                 \
4208         id = charset_jisx0201_roman;                                    \
4209         charset = CHARSET_FROM_ID (id);                                 \
4210       }                                                                 \
4211                                                                         \
4212     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
4213       {                                                                 \
4214         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
4215           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
4216         else                                                            \
4217           EMIT_ONE_BYTE (c1 | 0x80);                                    \
4218         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
4219         break;                                                          \
4220       }                                                                 \
4221     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
4222       {                                                                 \
4223         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
4224         break;                                                          \
4225       }                                                                 \
4226     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
4227       {                                                                 \
4228         EMIT_ONE_BYTE (c1 | 0x80);                                      \
4229         break;                                                          \
4230       }                                                                 \
4231     else                                                                \
4232       /* Since CHARSET is not yet invoked to any graphic planes, we     \
4233          must invoke it, or, at first, designate it to some graphic     \
4234          register.  Then repeat the loop to actually produce the        \
4235          character.  */                                                 \
4236       dst = encode_invocation_designation (charset, coding, dst,        \
4237                                            &produced_chars);            \
4238   } while (1)
4239
4240
4241 /* Produce codes for a DIMENSION2 character whose character set is
4242    CHARSET and whose position-codes are C1 and C2.  Designation and
4243    invocation codes are also produced in advance if necessary.  */
4244
4245 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
4246   do {                                                                  \
4247     int id = CHARSET_ID (charset);                                      \
4248                                                                         \
4249     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
4250         && id == charset_jisx0208)                                      \
4251       {                                                                 \
4252         id = charset_jisx0208_1978;                                     \
4253         charset = CHARSET_FROM_ID (id);                                 \
4254       }                                                                 \
4255                                                                         \
4256     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
4257       {                                                                 \
4258         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
4259           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
4260         else                                                            \
4261           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
4262         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
4263         break;                                                          \
4264       }                                                                 \
4265     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
4266       {                                                                 \
4267         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
4268         break;                                                          \
4269       }                                                                 \
4270     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
4271       {                                                                 \
4272         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
4273         break;                                                          \
4274       }                                                                 \
4275     else                                                                \
4276       /* Since CHARSET is not yet invoked to any graphic planes, we     \
4277          must invoke it, or, at first, designate it to some graphic     \
4278          register.  Then repeat the loop to actually produce the        \
4279          character.  */                                                 \
4280       dst = encode_invocation_designation (charset, coding, dst,        \
4281                                            &produced_chars);            \
4282   } while (1)
4283
4284
4285 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
4286   do {                                                                     \
4287     int code = ENCODE_CHAR ((charset), (c));                               \
4288                                                                            \
4289     if (CHARSET_DIMENSION (charset) == 1)                                  \
4290       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
4291     else                                                                   \
4292       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
4293   } while (0)
4294
4295
4296 /* Produce designation and invocation codes at a place pointed by DST
4297    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
4298    Return new DST.  */
4299
4300 unsigned char *
4301 encode_invocation_designation (struct charset *charset, struct coding_system *coding, unsigned char *dst, int *p_nchars)
4302 {
4303   int multibytep = coding->dst_multibyte;
4304   int produced_chars = *p_nchars;
4305   int reg;                      /* graphic register number */
4306   int id = CHARSET_ID (charset);
4307
4308   /* At first, check designations.  */
4309   for (reg = 0; reg < 4; reg++)
4310     if (id == CODING_ISO_DESIGNATION (coding, reg))
4311       break;
4312
4313   if (reg >= 4)
4314     {
4315       /* CHARSET is not yet designated to any graphic registers.  */
4316       /* At first check the requested designation.  */
4317       reg = CODING_ISO_REQUEST (coding, id);
4318       if (reg < 0)
4319         /* Since CHARSET requests no special designation, designate it
4320            to graphic register 0.  */
4321         reg = 0;
4322
4323       ENCODE_DESIGNATION (charset, reg, coding);
4324     }
4325
4326   if (CODING_ISO_INVOCATION (coding, 0) != reg
4327       && CODING_ISO_INVOCATION (coding, 1) != reg)
4328     {
4329       /* Since the graphic register REG is not invoked to any graphic
4330          planes, invoke it to graphic plane 0.  */
4331       switch (reg)
4332         {
4333         case 0:                 /* graphic register 0 */
4334           ENCODE_SHIFT_IN;
4335           break;
4336
4337         case 1:                 /* graphic register 1 */
4338           ENCODE_SHIFT_OUT;
4339           break;
4340
4341         case 2:                 /* graphic register 2 */
4342           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4343             ENCODE_SINGLE_SHIFT_2;
4344           else
4345             ENCODE_LOCKING_SHIFT_2;
4346           break;
4347
4348         case 3:                 /* graphic register 3 */
4349           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4350             ENCODE_SINGLE_SHIFT_3;
4351           else
4352             ENCODE_LOCKING_SHIFT_3;
4353           break;
4354         }
4355     }
4356
4357   *p_nchars = produced_chars;
4358   return dst;
4359 }
4360
4361 /* The following three macros produce codes for indicating direction
4362    of text.  */
4363 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER                              \
4364   do {                                                                  \
4365     if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS)        \
4366       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '[');                         \
4367     else                                                                \
4368       EMIT_ONE_BYTE (ISO_CODE_CSI);                                     \
4369   } while (0)
4370
4371
4372 #define ENCODE_DIRECTION_R2L()                  \
4373   do {                                          \
4374     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
4375     EMIT_TWO_ASCII_BYTES ('2', ']');            \
4376   } while (0)
4377
4378
4379 #define ENCODE_DIRECTION_L2R()                  \
4380   do {                                          \
4381     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
4382     EMIT_TWO_ASCII_BYTES ('0', ']');            \
4383   } while (0)
4384
4385
4386 /* Produce codes for designation and invocation to reset the graphic
4387    planes and registers to initial state.  */
4388 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
4389   do {                                                                  \
4390     int reg;                                                            \
4391     struct charset *charset;                                            \
4392                                                                         \
4393     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
4394       ENCODE_SHIFT_IN;                                                  \
4395     for (reg = 0; reg < 4; reg++)                                       \
4396       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
4397           && (CODING_ISO_DESIGNATION (coding, reg)                      \
4398               != CODING_ISO_INITIAL (coding, reg)))                     \
4399         {                                                               \
4400           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
4401           ENCODE_DESIGNATION (charset, reg, coding);                    \
4402         }                                                               \
4403   } while (0)
4404
4405
4406 /* Produce designation sequences of charsets in the line started from
4407    SRC to a place pointed by DST, and return updated DST.
4408
4409    If the current block ends before any end-of-line, we may fail to
4410    find all the necessary designations.  */
4411
4412 static unsigned char *
4413 encode_designation_at_bol (struct coding_system *coding, int *charbuf, int *charbuf_end, unsigned char *dst)
4414 {
4415   struct charset *charset;
4416   /* Table of charsets to be designated to each graphic register.  */
4417   int r[4];
4418   int c, found = 0, reg;
4419   int produced_chars = 0;
4420   int multibytep = coding->dst_multibyte;
4421   Lisp_Object attrs;
4422   Lisp_Object charset_list;
4423
4424   attrs = CODING_ID_ATTRS (coding->id);
4425   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4426   if (EQ (charset_list, Qiso_2022))
4427     charset_list = Viso_2022_charset_list;
4428
4429   for (reg = 0; reg < 4; reg++)
4430     r[reg] = -1;
4431
4432   while (found < 4)
4433     {
4434       int id;
4435
4436       c = *charbuf++;
4437       if (c == '\n')
4438         break;
4439       charset = char_charset (c, charset_list, NULL);
4440       id = CHARSET_ID (charset);
4441       reg = CODING_ISO_REQUEST (coding, id);
4442       if (reg >= 0 && r[reg] < 0)
4443         {
4444           found++;
4445           r[reg] = id;
4446         }
4447     }
4448
4449   if (found)
4450     {
4451       for (reg = 0; reg < 4; reg++)
4452         if (r[reg] >= 0
4453             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
4454           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
4455     }
4456
4457   return dst;
4458 }
4459
4460 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
4461
4462 static int
4463 encode_coding_iso_2022 (struct coding_system *coding)
4464 {
4465   int multibytep = coding->dst_multibyte;
4466   int *charbuf = coding->charbuf;
4467   int *charbuf_end = charbuf + coding->charbuf_used;
4468   unsigned char *dst = coding->destination + coding->produced;
4469   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4470   int safe_room = 16;
4471   int bol_designation
4472     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
4473        && CODING_ISO_BOL (coding));
4474   int produced_chars = 0;
4475   Lisp_Object attrs, eol_type, charset_list;
4476   int ascii_compatible;
4477   int c;
4478   int preferred_charset_id = -1;
4479
4480   CODING_GET_INFO (coding, attrs, charset_list);
4481   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
4482   if (VECTORP (eol_type))
4483     eol_type = Qunix;
4484
4485   setup_iso_safe_charsets (attrs);
4486   /* Charset list may have been changed.  */
4487   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4488   coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
4489
4490   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4491
4492   while (charbuf < charbuf_end)
4493     {
4494       ASSURE_DESTINATION (safe_room);
4495
4496       if (bol_designation)
4497         {
4498           unsigned char *dst_prev = dst;
4499
4500           /* We have to produce designation sequences if any now.  */
4501           dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
4502           bol_designation = 0;
4503           /* We are sure that designation sequences are all ASCII bytes.  */
4504           produced_chars += dst - dst_prev;
4505         }
4506
4507       c = *charbuf++;
4508
4509       if (c < 0)
4510         {
4511           /* Handle an annotation.  */
4512           switch (*charbuf)
4513             {
4514             case CODING_ANNOTATE_COMPOSITION_MASK:
4515               /* Not yet implemented.  */
4516               break;
4517             case CODING_ANNOTATE_CHARSET_MASK:
4518               preferred_charset_id = charbuf[2];
4519               if (preferred_charset_id >= 0
4520                   && NILP (Fmemq (make_number (preferred_charset_id),
4521                                   charset_list)))
4522                 preferred_charset_id = -1;
4523               break;
4524             default:
4525               abort ();
4526             }
4527           charbuf += -c - 1;
4528           continue;
4529         }
4530
4531       /* Now encode the character C.  */
4532       if (c < 0x20 || c == 0x7F)
4533         {
4534           if (c == '\n'
4535               || (c == '\r' && EQ (eol_type, Qmac)))
4536             {
4537               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4538                 ENCODE_RESET_PLANE_AND_REGISTER ();
4539               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
4540                 {
4541                   int i;
4542
4543                   for (i = 0; i < 4; i++)
4544                     CODING_ISO_DESIGNATION (coding, i)
4545                       = CODING_ISO_INITIAL (coding, i);
4546                 }
4547               bol_designation
4548                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
4549             }
4550           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
4551             ENCODE_RESET_PLANE_AND_REGISTER ();
4552           EMIT_ONE_ASCII_BYTE (c);
4553         }
4554       else if (ASCII_CHAR_P (c))
4555         {
4556           if (ascii_compatible)
4557             EMIT_ONE_ASCII_BYTE (c);
4558           else
4559             {
4560               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
4561               ENCODE_ISO_CHARACTER (charset, c);
4562             }
4563         }
4564       else if (CHAR_BYTE8_P (c))
4565         {
4566           c = CHAR_TO_BYTE8 (c);
4567           EMIT_ONE_BYTE (c);
4568         }
4569       else
4570         {
4571           struct charset *charset;
4572
4573           if (preferred_charset_id >= 0)
4574             {
4575               charset = CHARSET_FROM_ID (preferred_charset_id);
4576               if (! CHAR_CHARSET_P (c, charset))
4577                 charset = char_charset (c, charset_list, NULL);
4578             }
4579           else
4580             charset = char_charset (c, charset_list, NULL);
4581           if (!charset)
4582             {
4583               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4584                 {
4585                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4586                   charset = CHARSET_FROM_ID (charset_ascii);
4587                 }
4588               else
4589                 {
4590                   c = coding->default_char;
4591                   charset = char_charset (c, charset_list, NULL);
4592                 }
4593             }
4594           ENCODE_ISO_CHARACTER (charset, c);
4595         }
4596     }
4597
4598   if (coding->mode & CODING_MODE_LAST_BLOCK
4599       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4600     {
4601       ASSURE_DESTINATION (safe_room);
4602       ENCODE_RESET_PLANE_AND_REGISTER ();
4603     }
4604   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4605   CODING_ISO_BOL (coding) = bol_designation;
4606   coding->produced_char += produced_chars;
4607   coding->produced = dst - coding->destination;
4608   return 0;
4609 }
4610
4611 \f
4612 /*** 8,9. SJIS and BIG5 handlers ***/
4613
4614 /* Although SJIS and BIG5 are not ISO's coding system, they are used
4615    quite widely.  So, for the moment, Emacs supports them in the bare
4616    C code.  But, in the future, they may be supported only by CCL.  */
4617
4618 /* SJIS is a coding system encoding three character sets: ASCII, right
4619    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
4620    as is.  A character of charset katakana-jisx0201 is encoded by
4621    "position-code + 0x80".  A character of charset japanese-jisx0208
4622    is encoded in 2-byte but two position-codes are divided and shifted
4623    so that it fit in the range below.
4624
4625    --- CODE RANGE of SJIS ---
4626    (character set)      (range)
4627    ASCII                0x00 .. 0x7F
4628    KATAKANA-JISX0201    0xA0 .. 0xDF
4629    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
4630             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
4631    -------------------------------
4632
4633 */
4634
4635 /* BIG5 is a coding system encoding two character sets: ASCII and
4636    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
4637    character set and is encoded in two-byte.
4638
4639    --- CODE RANGE of BIG5 ---
4640    (character set)      (range)
4641    ASCII                0x00 .. 0x7F
4642    Big5 (1st byte)      0xA1 .. 0xFE
4643         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
4644    --------------------------
4645
4646   */
4647
4648 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4649    Check if a text is encoded in SJIS.  If it is, return
4650    CATEGORY_MASK_SJIS, else return 0.  */
4651
4652 static int
4653 detect_coding_sjis (struct coding_system *coding, struct coding_detection_info *detect_info)
4654 {
4655   const unsigned char *src = coding->source, *src_base;
4656   const unsigned char *src_end = coding->source + coding->src_bytes;
4657   int multibytep = coding->src_multibyte;
4658   int consumed_chars = 0;
4659   int found = 0;
4660   int c;
4661   Lisp_Object attrs, charset_list;
4662   int max_first_byte_of_2_byte_code;
4663
4664   CODING_GET_INFO (coding, attrs, charset_list);
4665   max_first_byte_of_2_byte_code
4666     = (XINT (Flength (charset_list)) > 3 ? 0xFC : 0xEF);
4667
4668   detect_info->checked |= CATEGORY_MASK_SJIS;
4669   /* A coding system of this category is always ASCII compatible.  */
4670   src += coding->head_ascii;
4671
4672   while (1)
4673     {
4674       src_base = src;
4675       ONE_MORE_BYTE (c);
4676       if (c < 0x80)
4677         continue;
4678       if ((c >= 0x81 && c <= 0x9F)
4679           || (c >= 0xE0 && c <= max_first_byte_of_2_byte_code))
4680         {
4681           ONE_MORE_BYTE (c);
4682           if (c < 0x40 || c == 0x7F || c > 0xFC)
4683             break;
4684           found = CATEGORY_MASK_SJIS;
4685         }
4686       else if (c >= 0xA0 && c < 0xE0)
4687         found = CATEGORY_MASK_SJIS;
4688       else
4689         break;
4690     }
4691   detect_info->rejected |= CATEGORY_MASK_SJIS;
4692   return 0;
4693
4694  no_more_source:
4695   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4696     {
4697       detect_info->rejected |= CATEGORY_MASK_SJIS;
4698       return 0;
4699     }
4700   detect_info->found |= found;
4701   return 1;
4702 }
4703
4704 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4705    Check if a text is encoded in BIG5.  If it is, return
4706    CATEGORY_MASK_BIG5, else return 0.  */
4707
4708 static int
4709 detect_coding_big5 (struct coding_system *coding, struct coding_detection_info *detect_info)
4710 {
4711   const unsigned char *src = coding->source, *src_base;
4712   const unsigned char *src_end = coding->source + coding->src_bytes;
4713   int multibytep = coding->src_multibyte;
4714   int consumed_chars = 0;
4715   int found = 0;
4716   int c;
4717
4718   detect_info->checked |= CATEGORY_MASK_BIG5;
4719   /* A coding system of this category is always ASCII compatible.  */
4720   src += coding->head_ascii;
4721
4722   while (1)
4723     {
4724       src_base = src;
4725       ONE_MORE_BYTE (c);
4726       if (c < 0x80)
4727         continue;
4728       if (c >= 0xA1)
4729         {
4730           ONE_MORE_BYTE (c);
4731           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
4732             return 0;
4733           found = CATEGORY_MASK_BIG5;
4734         }
4735       else
4736         break;
4737     }
4738   detect_info->rejected |= CATEGORY_MASK_BIG5;
4739   return 0;
4740
4741  no_more_source:
4742   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4743     {
4744       detect_info->rejected |= CATEGORY_MASK_BIG5;
4745       return 0;
4746     }
4747   detect_info->found |= found;
4748   return 1;
4749 }
4750
4751 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4752    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
4753
4754 static void
4755 decode_coding_sjis (struct coding_system *coding)
4756 {
4757   const unsigned char *src = coding->source + coding->consumed;
4758   const unsigned char *src_end = coding->source + coding->src_bytes;
4759   const unsigned char *src_base;
4760   int *charbuf = coding->charbuf + coding->charbuf_used;
4761   /* We may produce one charset annocation in one loop and one more at
4762      the end.  */
4763   int *charbuf_end
4764     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4765   int consumed_chars = 0, consumed_chars_base;
4766   int multibytep = coding->src_multibyte;
4767   struct charset *charset_roman, *charset_kanji, *charset_kana;
4768   struct charset *charset_kanji2;
4769   Lisp_Object attrs, charset_list, val;
4770   int char_offset = coding->produced_char;
4771   int last_offset = char_offset;
4772   int last_id = charset_ascii;
4773   int eol_crlf =
4774     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4775   int byte_after_cr = -1;
4776
4777   CODING_GET_INFO (coding, attrs, charset_list);
4778
4779   val = charset_list;
4780   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4781   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4782   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4783   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4784
4785   while (1)
4786     {
4787       int c, c1;
4788       struct charset *charset;
4789
4790       src_base = src;
4791       consumed_chars_base = consumed_chars;
4792
4793       if (charbuf >= charbuf_end)
4794         {
4795           if (byte_after_cr >= 0)
4796             src_base--;
4797           break;
4798         }
4799
4800       if (byte_after_cr >= 0)
4801         c = byte_after_cr, byte_after_cr = -1;
4802       else
4803         ONE_MORE_BYTE (c);
4804       if (c < 0)
4805         goto invalid_code;
4806       if (c < 0x80)
4807         {
4808           if (eol_crlf && c == '\r')
4809             ONE_MORE_BYTE (byte_after_cr);
4810           charset = charset_roman;
4811         }
4812       else if (c == 0x80 || c == 0xA0)
4813         goto invalid_code;
4814       else if (c >= 0xA1 && c <= 0xDF)
4815         {
4816           /* SJIS -> JISX0201-Kana */
4817           c &= 0x7F;
4818           charset = charset_kana;
4819         }
4820       else if (c <= 0xEF)
4821         {
4822           /* SJIS -> JISX0208 */
4823           ONE_MORE_BYTE (c1);
4824           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4825             goto invalid_code;
4826           c = (c << 8) | c1;
4827           SJIS_TO_JIS (c);
4828           charset = charset_kanji;
4829         }
4830       else if (c <= 0xFC && charset_kanji2)
4831         {
4832           /* SJIS -> JISX0213-2 */
4833           ONE_MORE_BYTE (c1);
4834           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4835             goto invalid_code;
4836           c = (c << 8) | c1;
4837           SJIS_TO_JIS2 (c);
4838           charset = charset_kanji2;
4839         }
4840       else
4841         goto invalid_code;
4842       if (charset->id != charset_ascii
4843           && last_id != charset->id)
4844         {
4845           if (last_id != charset_ascii)
4846             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4847           last_id = charset->id;
4848           last_offset = char_offset;
4849         }
4850       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4851       *charbuf++ = c;
4852       char_offset++;
4853       continue;
4854
4855     invalid_code:
4856       src = src_base;
4857       consumed_chars = consumed_chars_base;
4858       ONE_MORE_BYTE (c);
4859       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4860       char_offset++;
4861       coding->errors++;
4862     }
4863
4864  no_more_source:
4865   if (last_id != charset_ascii)
4866     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4867   coding->consumed_char += consumed_chars_base;
4868   coding->consumed = src_base - coding->source;
4869   coding->charbuf_used = charbuf - coding->charbuf;
4870 }
4871
4872 static void
4873 decode_coding_big5 (struct coding_system *coding)
4874 {
4875   const unsigned char *src = coding->source + coding->consumed;
4876   const unsigned char *src_end = coding->source + coding->src_bytes;
4877   const unsigned char *src_base;
4878   int *charbuf = coding->charbuf + coding->charbuf_used;
4879   /* We may produce one charset annocation in one loop and one more at
4880      the end.  */
4881   int *charbuf_end
4882     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4883   int consumed_chars = 0, consumed_chars_base;
4884   int multibytep = coding->src_multibyte;
4885   struct charset *charset_roman, *charset_big5;
4886   Lisp_Object attrs, charset_list, val;
4887   int char_offset = coding->produced_char;
4888   int last_offset = char_offset;
4889   int last_id = charset_ascii;
4890   int eol_crlf =
4891     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4892   int byte_after_cr = -1;
4893
4894   CODING_GET_INFO (coding, attrs, charset_list);
4895   val = charset_list;
4896   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4897   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4898
4899   while (1)
4900     {
4901       int c, c1;
4902       struct charset *charset;
4903
4904       src_base = src;
4905       consumed_chars_base = consumed_chars;
4906
4907       if (charbuf >= charbuf_end)
4908         {
4909           if (byte_after_cr >= 0)
4910             src_base--;
4911           break;
4912         }
4913
4914       if (byte_after_cr >= 0)
4915         c = byte_after_cr, byte_after_cr = -1;
4916       else
4917         ONE_MORE_BYTE (c);
4918
4919       if (c < 0)
4920         goto invalid_code;
4921       if (c < 0x80)
4922         {
4923           if (eol_crlf && c == '\r')
4924             ONE_MORE_BYTE (byte_after_cr);
4925           charset = charset_roman;
4926         }
4927       else
4928         {
4929           /* BIG5 -> Big5 */
4930           if (c < 0xA1 || c > 0xFE)
4931             goto invalid_code;
4932           ONE_MORE_BYTE (c1);
4933           if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4934             goto invalid_code;
4935           c = c << 8 | c1;
4936           charset = charset_big5;
4937         }
4938       if (charset->id != charset_ascii
4939           && last_id != charset->id)
4940         {
4941           if (last_id != charset_ascii)
4942             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4943           last_id = charset->id;
4944           last_offset = char_offset;
4945         }
4946       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4947       *charbuf++ = c;
4948       char_offset++;
4949       continue;
4950
4951     invalid_code:
4952       src = src_base;
4953       consumed_chars = consumed_chars_base;
4954       ONE_MORE_BYTE (c);
4955       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4956       char_offset++;
4957       coding->errors++;
4958     }
4959
4960  no_more_source:
4961   if (last_id != charset_ascii)
4962     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4963   coding->consumed_char += consumed_chars_base;
4964   coding->consumed = src_base - coding->source;
4965   coding->charbuf_used = charbuf - coding->charbuf;
4966 }
4967
4968 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4969    This function can encode charsets `ascii', `katakana-jisx0201',
4970    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
4971    are sure that all these charsets are registered as official charset
4972    (i.e. do not have extended leading-codes).  Characters of other
4973    charsets are produced without any encoding.  If SJIS_P is 1, encode
4974    SJIS text, else encode BIG5 text.  */
4975
4976 static int
4977 encode_coding_sjis (struct coding_system *coding)
4978 {
4979   int multibytep = coding->dst_multibyte;
4980   int *charbuf = coding->charbuf;
4981   int *charbuf_end = charbuf + coding->charbuf_used;
4982   unsigned char *dst = coding->destination + coding->produced;
4983   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4984   int safe_room = 4;
4985   int produced_chars = 0;
4986   Lisp_Object attrs, charset_list, val;
4987   int ascii_compatible;
4988   struct charset *charset_roman, *charset_kanji, *charset_kana;
4989   struct charset *charset_kanji2;
4990   int c;
4991
4992   CODING_GET_INFO (coding, attrs, charset_list);
4993   val = charset_list;
4994   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4995   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4996   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4997   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4998
4999   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5000
5001   while (charbuf < charbuf_end)
5002     {
5003       ASSURE_DESTINATION (safe_room);
5004       c = *charbuf++;
5005       /* Now encode the character C.  */
5006       if (ASCII_CHAR_P (c) && ascii_compatible)
5007         EMIT_ONE_ASCII_BYTE (c);
5008       else if (CHAR_BYTE8_P (c))
5009         {
5010           c = CHAR_TO_BYTE8 (c);
5011           EMIT_ONE_BYTE (c);
5012         }
5013       else
5014         {
5015           unsigned code;
5016           struct charset *charset = char_charset (c, charset_list, &code);
5017
5018           if (!charset)
5019             {
5020               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5021                 {
5022                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5023                   charset = CHARSET_FROM_ID (charset_ascii);
5024                 }
5025               else
5026                 {
5027                   c = coding->default_char;
5028                   charset = char_charset (c, charset_list, &code);
5029                 }
5030             }
5031           if (code == CHARSET_INVALID_CODE (charset))
5032             abort ();
5033           if (charset == charset_kanji)
5034             {
5035               int c1, c2;
5036               JIS_TO_SJIS (code);
5037               c1 = code >> 8, c2 = code & 0xFF;
5038               EMIT_TWO_BYTES (c1, c2);
5039             }
5040           else if (charset == charset_kana)
5041             EMIT_ONE_BYTE (code | 0x80);
5042           else if (charset_kanji2 && charset == charset_kanji2)
5043             {
5044               int c1, c2;
5045
5046               c1 = code >> 8;
5047               if (c1 == 0x21 || (c1 >= 0x23 && c1 <= 0x25)
5048                   || c1 == 0x28
5049                   || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
5050                 {
5051                   JIS_TO_SJIS2 (code);
5052                   c1 = code >> 8, c2 = code & 0xFF;
5053                   EMIT_TWO_BYTES (c1, c2);
5054                 }
5055               else
5056                 EMIT_ONE_ASCII_BYTE (code & 0x7F);
5057             }
5058           else
5059             EMIT_ONE_ASCII_BYTE (code & 0x7F);
5060         }
5061     }
5062   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5063   coding->produced_char += produced_chars;
5064   coding->produced = dst - coding->destination;
5065   return 0;
5066 }
5067
5068 static int
5069 encode_coding_big5 (struct coding_system *coding)
5070 {
5071   int multibytep = coding->dst_multibyte;
5072   int *charbuf = coding->charbuf;
5073   int *charbuf_end = charbuf + coding->charbuf_used;
5074   unsigned char *dst = coding->destination + coding->produced;
5075   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5076   int safe_room = 4;
5077   int produced_chars = 0;
5078   Lisp_Object attrs, charset_list, val;
5079   int ascii_compatible;
5080   struct charset *charset_roman, *charset_big5;
5081   int c;
5082
5083   CODING_GET_INFO (coding, attrs, charset_list);
5084   val = charset_list;
5085   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5086   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
5087   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5088
5089   while (charbuf < charbuf_end)
5090     {
5091       ASSURE_DESTINATION (safe_room);
5092       c = *charbuf++;
5093       /* Now encode the character C.  */
5094       if (ASCII_CHAR_P (c) && ascii_compatible)
5095         EMIT_ONE_ASCII_BYTE (c);
5096       else if (CHAR_BYTE8_P (c))
5097         {
5098           c = CHAR_TO_BYTE8 (c);
5099           EMIT_ONE_BYTE (c);
5100         }
5101       else
5102         {
5103           unsigned code;
5104           struct charset *charset = char_charset (c, charset_list, &code);
5105
5106           if (! charset)
5107             {
5108               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5109                 {
5110                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5111                   charset = CHARSET_FROM_ID (charset_ascii);
5112                 }
5113               else
5114                 {
5115                   c = coding->default_char;
5116                   charset = char_charset (c, charset_list, &code);
5117                 }
5118             }
5119           if (code == CHARSET_INVALID_CODE (charset))
5120             abort ();
5121           if (charset == charset_big5)
5122             {
5123               int c1, c2;
5124
5125               c1 = code >> 8, c2 = code & 0xFF;
5126               EMIT_TWO_BYTES (c1, c2);
5127             }
5128           else
5129             EMIT_ONE_ASCII_BYTE (code & 0x7F);
5130         }
5131     }
5132   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5133   coding->produced_char += produced_chars;
5134   coding->produced = dst - coding->destination;
5135   return 0;
5136 }
5137
5138 \f
5139 /*** 10. CCL handlers ***/
5140
5141 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5142    Check if a text is encoded in a coding system of which
5143    encoder/decoder are written in CCL program.  If it is, return
5144    CATEGORY_MASK_CCL, else return 0.  */
5145
5146 static int
5147 detect_coding_ccl (struct coding_system *coding, struct coding_detection_info *detect_info)
5148 {
5149   const unsigned char *src = coding->source, *src_base;
5150   const unsigned char *src_end = coding->source + coding->src_bytes;
5151   int multibytep = coding->src_multibyte;
5152   int consumed_chars = 0;
5153   int found = 0;
5154   unsigned char *valids;
5155   int head_ascii = coding->head_ascii;
5156   Lisp_Object attrs;
5157
5158   detect_info->checked |= CATEGORY_MASK_CCL;
5159
5160   coding = &coding_categories[coding_category_ccl];
5161   valids = CODING_CCL_VALIDS (coding);
5162   attrs = CODING_ID_ATTRS (coding->id);
5163   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5164     src += head_ascii;
5165
5166   while (1)
5167     {
5168       int c;
5169
5170       src_base = src;
5171       ONE_MORE_BYTE (c);
5172       if (c < 0 || ! valids[c])
5173         break;
5174       if ((valids[c] > 1))
5175         found = CATEGORY_MASK_CCL;
5176     }
5177   detect_info->rejected |= CATEGORY_MASK_CCL;
5178   return 0;
5179
5180  no_more_source:
5181   detect_info->found |= found;
5182   return 1;
5183 }
5184
5185 static void
5186 decode_coding_ccl (struct coding_system *coding)
5187 {
5188   const unsigned char *src = coding->source + coding->consumed;
5189   const unsigned char *src_end = coding->source + coding->src_bytes;
5190   int *charbuf = coding->charbuf + coding->charbuf_used;
5191   int *charbuf_end = coding->charbuf + coding->charbuf_size;
5192   int consumed_chars = 0;
5193   int multibytep = coding->src_multibyte;
5194   struct ccl_program *ccl = &coding->spec.ccl->ccl;
5195   int source_charbuf[1024];
5196   int source_byteidx[1025];
5197   Lisp_Object attrs, charset_list;
5198
5199   CODING_GET_INFO (coding, attrs, charset_list);
5200
5201   while (1)
5202     {
5203       const unsigned char *p = src;
5204       int i = 0;
5205
5206       if (multibytep)
5207         {
5208           while (i < 1024 && p < src_end)
5209             {
5210               source_byteidx[i] = p - src;
5211               source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
5212             }
5213           source_byteidx[i] = p - src;
5214         }
5215       else
5216         while (i < 1024 && p < src_end)
5217           source_charbuf[i++] = *p++;
5218
5219       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
5220         ccl->last_block = 1;
5221       ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf,
5222                   charset_list);
5223       charbuf += ccl->produced;
5224       if (multibytep)
5225         src += source_byteidx[ccl->consumed];
5226       else
5227         src += ccl->consumed;
5228       consumed_chars += ccl->consumed;
5229       if (p == src_end || ccl->status != CCL_STAT_SUSPEND_BY_SRC)
5230         break;
5231     }
5232
5233   switch (ccl->status)
5234     {
5235     case CCL_STAT_SUSPEND_BY_SRC:
5236       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5237       break;
5238     case CCL_STAT_SUSPEND_BY_DST:
5239       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
5240       break;
5241     case CCL_STAT_QUIT:
5242     case CCL_STAT_INVALID_CMD:
5243       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
5244       break;
5245     default:
5246       record_conversion_result (coding, CODING_RESULT_SUCCESS);
5247       break;
5248     }
5249   coding->consumed_char += consumed_chars;
5250   coding->consumed = src - coding->source;
5251   coding->charbuf_used = charbuf - coding->charbuf;
5252 }
5253
5254 static int
5255 encode_coding_ccl (struct coding_system *coding)
5256 {
5257   struct ccl_program *ccl = &coding->spec.ccl->ccl;
5258   int multibytep = coding->dst_multibyte;
5259   int *charbuf = coding->charbuf;
5260   int *charbuf_end = charbuf + coding->charbuf_used;
5261   unsigned char *dst = coding->destination + coding->produced;
5262   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5263   int destination_charbuf[1024];
5264   int i, produced_chars = 0;
5265   Lisp_Object attrs, charset_list;
5266
5267   CODING_GET_INFO (coding, attrs, charset_list);
5268   if (coding->consumed_char == coding->src_chars
5269       && coding->mode & CODING_MODE_LAST_BLOCK)
5270     ccl->last_block = 1;
5271
5272   while (charbuf < charbuf_end)
5273     {
5274       ccl_driver (ccl, charbuf, destination_charbuf,
5275                   charbuf_end - charbuf, 1024, charset_list);
5276       if (multibytep)
5277         {
5278           ASSURE_DESTINATION (ccl->produced * 2);
5279           for (i = 0; i < ccl->produced; i++)
5280             EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
5281         }
5282       else
5283         {
5284           ASSURE_DESTINATION (ccl->produced);
5285           for (i = 0; i < ccl->produced; i++)
5286             *dst++ = destination_charbuf[i] & 0xFF;
5287           produced_chars += ccl->produced;
5288         }
5289       charbuf += ccl->consumed;
5290       if (ccl->status == CCL_STAT_QUIT
5291           || ccl->status == CCL_STAT_INVALID_CMD)
5292         break;
5293     }
5294
5295   switch (ccl->status)
5296     {
5297     case CCL_STAT_SUSPEND_BY_SRC:
5298       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5299       break;
5300     case CCL_STAT_SUSPEND_BY_DST:
5301       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
5302       break;
5303     case CCL_STAT_QUIT:
5304     case CCL_STAT_INVALID_CMD:
5305       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
5306       break;
5307     default:
5308       record_conversion_result (coding, CODING_RESULT_SUCCESS);
5309       break;
5310     }
5311
5312   coding->produced_char += produced_chars;
5313   coding->produced = dst - coding->destination;
5314   return 0;
5315 }
5316
5317
5318 \f
5319 /*** 10, 11. no-conversion handlers ***/
5320
5321 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
5322
5323 static void
5324 decode_coding_raw_text (struct coding_system *coding)
5325 {
5326   int eol_crlf =
5327     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5328
5329   coding->chars_at_source = 1;
5330   coding->consumed_char = coding->src_chars;
5331   coding->consumed = coding->src_bytes;
5332   if (eol_crlf && coding->source[coding->src_bytes - 1] == '\r')
5333     {
5334       coding->consumed_char--;
5335       coding->consumed--;
5336       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5337     }
5338   else
5339     record_conversion_result (coding, CODING_RESULT_SUCCESS);
5340 }
5341
5342 static int
5343 encode_coding_raw_text (struct coding_system *coding)
5344 {
5345   int multibytep = coding->dst_multibyte;
5346   int *charbuf = coding->charbuf;
5347   int *charbuf_end = coding->charbuf + coding->charbuf_used;
5348   unsigned char *dst = coding->destination + coding->produced;
5349   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5350   int produced_chars = 0;
5351   int c;
5352
5353   if (multibytep)
5354     {
5355       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
5356
5357       if (coding->src_multibyte)
5358         while (charbuf < charbuf_end)
5359           {
5360             ASSURE_DESTINATION (safe_room);
5361             c = *charbuf++;
5362             if (ASCII_CHAR_P (c))
5363               EMIT_ONE_ASCII_BYTE (c);
5364             else if (CHAR_BYTE8_P (c))
5365               {
5366                 c = CHAR_TO_BYTE8 (c);
5367                 EMIT_ONE_BYTE (c);
5368               }
5369             else
5370               {
5371                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
5372
5373                 CHAR_STRING_ADVANCE (c, p1);
5374                 while (p0 < p1)
5375                   {
5376                     EMIT_ONE_BYTE (*p0);
5377                     p0++;
5378                   }
5379               }
5380           }
5381       else
5382         while (charbuf < charbuf_end)
5383           {
5384             ASSURE_DESTINATION (safe_room);
5385             c = *charbuf++;
5386             EMIT_ONE_BYTE (c);
5387           }
5388     }
5389   else
5390     {
5391       if (coding->src_multibyte)
5392         {
5393           int safe_room = MAX_MULTIBYTE_LENGTH;
5394
5395           while (charbuf < charbuf_end)
5396             {
5397               ASSURE_DESTINATION (safe_room);
5398               c = *charbuf++;
5399               if (ASCII_CHAR_P (c))
5400                 *dst++ = c;
5401               else if (CHAR_BYTE8_P (c))
5402                 *dst++ = CHAR_TO_BYTE8 (c);
5403               else
5404                 CHAR_STRING_ADVANCE (c, dst);
5405             }
5406         }
5407       else
5408         {
5409           ASSURE_DESTINATION (charbuf_end - charbuf);
5410           while (charbuf < charbuf_end && dst < dst_end)
5411             *dst++ = *charbuf++;
5412         }
5413       produced_chars = dst - (coding->destination + coding->produced);
5414     }
5415   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5416   coding->produced_char += produced_chars;
5417   coding->produced = dst - coding->destination;
5418   return 0;
5419 }
5420
5421 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5422    Check if a text is encoded in a charset-based coding system.  If it
5423    is, return 1, else return 0.  */
5424
5425 static int
5426 detect_coding_charset (struct coding_system *coding, struct coding_detection_info *detect_info)
5427 {
5428   const unsigned char *src = coding->source, *src_base;
5429   const unsigned char *src_end = coding->source + coding->src_bytes;
5430   int multibytep = coding->src_multibyte;
5431   int consumed_chars = 0;
5432   Lisp_Object attrs, valids, name;
5433   int found = 0;
5434   int head_ascii = coding->head_ascii;
5435   int check_latin_extra = 0;
5436
5437   detect_info->checked |= CATEGORY_MASK_CHARSET;
5438
5439   coding = &coding_categories[coding_category_charset];
5440   attrs = CODING_ID_ATTRS (coding->id);
5441   valids = AREF (attrs, coding_attr_charset_valids);
5442   name = CODING_ID_NAME (coding->id);
5443   if (strncmp ((char *) SDATA (SYMBOL_NAME (name)),
5444                "iso-8859-", sizeof ("iso-8859-") - 1) == 0
5445       || strncmp ((char *) SDATA (SYMBOL_NAME (name)),
5446                   "iso-latin-", sizeof ("iso-latin-") - 1) == 0)
5447     check_latin_extra = 1;
5448
5449   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5450     src += head_ascii;
5451
5452   while (1)
5453     {
5454       int c;
5455       Lisp_Object val;
5456       struct charset *charset;
5457       int dim, idx;
5458
5459       src_base = src;
5460       ONE_MORE_BYTE (c);
5461       if (c < 0)
5462         continue;
5463       val = AREF (valids, c);
5464       if (NILP (val))
5465         break;
5466       if (c >= 0x80)
5467         {
5468           if (c < 0xA0
5469               && check_latin_extra
5470               && (!VECTORP (Vlatin_extra_code_table)
5471                   || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])))
5472             break;
5473           found = CATEGORY_MASK_CHARSET;
5474         }
5475       if (INTEGERP (val))
5476         {
5477           charset = CHARSET_FROM_ID (XFASTINT (val));
5478           dim = CHARSET_DIMENSION (charset);
5479           for (idx = 1; idx < dim; idx++)
5480             {
5481               if (src == src_end)
5482                 goto too_short;
5483               ONE_MORE_BYTE (c);
5484               if (c < charset->code_space[(dim - 1 - idx) * 2]
5485                   || c > charset->code_space[(dim - 1 - idx) * 2 + 1])
5486                 break;
5487             }
5488           if (idx < dim)
5489             break;
5490         }
5491       else
5492         {
5493           idx = 1;
5494           for (; CONSP (val); val = XCDR (val))
5495             {
5496               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5497               dim = CHARSET_DIMENSION (charset);
5498               while (idx < dim)
5499                 {
5500                   if (src == src_end)
5501                     goto too_short;
5502                   ONE_MORE_BYTE (c);
5503                   if (c < charset->code_space[(dim - 1 - idx) * 4]
5504                       || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
5505                     break;
5506                   idx++;
5507                 }
5508               if (idx == dim)
5509                 {
5510                   val = Qnil;
5511                   break;
5512                 }
5513             }
5514           if (CONSP (val))
5515             break;
5516         }
5517     }
5518  too_short:
5519   detect_info->rejected |= CATEGORY_MASK_CHARSET;
5520   return 0;
5521
5522  no_more_source:
5523   detect_info->found |= found;
5524   return 1;
5525 }
5526
5527 static void
5528 decode_coding_charset (struct coding_system *coding)
5529 {
5530   const unsigned char *src = coding->source + coding->consumed;
5531   const unsigned char *src_end = coding->source + coding->src_bytes;
5532   const unsigned char *src_base;
5533   int *charbuf = coding->charbuf + coding->charbuf_used;
5534   /* We may produce one charset annocation in one loop and one more at
5535      the end.  */
5536   int *charbuf_end
5537     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
5538   int consumed_chars = 0, consumed_chars_base;
5539   int multibytep = coding->src_multibyte;
5540   Lisp_Object attrs, charset_list, valids;
5541   int char_offset = coding->produced_char;
5542   int last_offset = char_offset;
5543   int last_id = charset_ascii;
5544   int eol_crlf =
5545     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5546   int byte_after_cr = -1;
5547
5548   CODING_GET_INFO (coding, attrs, charset_list);
5549   valids = AREF (attrs, coding_attr_charset_valids);
5550
5551   while (1)
5552     {
5553       int c;
5554       Lisp_Object val;
5555       struct charset *charset;
5556       int dim;
5557       int len = 1;
5558       unsigned code;
5559
5560       src_base = src;
5561       consumed_chars_base = consumed_chars;
5562
5563       if (charbuf >= charbuf_end)
5564         {
5565           if (byte_after_cr >= 0)
5566             src_base--;
5567           break;
5568         }
5569
5570       if (byte_after_cr >= 0)
5571         {
5572           c = byte_after_cr;
5573           byte_after_cr = -1;
5574         }
5575       else
5576         {
5577           ONE_MORE_BYTE (c);
5578           if (eol_crlf && c == '\r')
5579             ONE_MORE_BYTE (byte_after_cr);
5580         }
5581       if (c < 0)
5582         goto invalid_code;
5583       code = c;
5584
5585       val = AREF (valids, c);
5586       if (! INTEGERP (val) && ! CONSP (val))
5587         goto invalid_code;
5588       if (INTEGERP (val))
5589         {
5590           charset = CHARSET_FROM_ID (XFASTINT (val));
5591           dim = CHARSET_DIMENSION (charset);
5592           while (len < dim)
5593             {
5594               ONE_MORE_BYTE (c);
5595               code = (code << 8) | c;
5596               len++;
5597             }
5598           CODING_DECODE_CHAR (coding, src, src_base, src_end,
5599                               charset, code, c);
5600         }
5601       else
5602         {
5603           /* VAL is a list of charset IDs.  It is assured that the
5604              list is sorted by charset dimensions (smaller one
5605              comes first).  */
5606           while (CONSP (val))
5607             {
5608               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5609               dim = CHARSET_DIMENSION (charset);
5610               while (len < dim)
5611                 {
5612                   ONE_MORE_BYTE (c);
5613                   code = (code << 8) | c;
5614                   len++;
5615                 }
5616               CODING_DECODE_CHAR (coding, src, src_base,
5617                                   src_end, charset, code, c);
5618               if (c >= 0)
5619                 break;
5620               val = XCDR (val);
5621             }
5622         }
5623       if (c < 0)
5624         goto invalid_code;
5625       if (charset->id != charset_ascii
5626           && last_id != charset->id)
5627         {
5628           if (last_id != charset_ascii)
5629             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5630           last_id = charset->id;
5631           last_offset = char_offset;
5632         }
5633
5634       *charbuf++ = c;
5635       char_offset++;
5636       continue;
5637
5638     invalid_code:
5639       src = src_base;
5640       consumed_chars = consumed_chars_base;
5641       ONE_MORE_BYTE (c);
5642       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
5643       char_offset++;
5644       coding->errors++;
5645     }
5646
5647  no_more_source:
5648   if (last_id != charset_ascii)
5649     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5650   coding->consumed_char += consumed_chars_base;
5651   coding->consumed = src_base - coding->source;
5652   coding->charbuf_used = charbuf - coding->charbuf;
5653 }
5654
5655 static int
5656 encode_coding_charset (struct coding_system *coding)
5657 {
5658   int multibytep = coding->dst_multibyte;
5659   int *charbuf = coding->charbuf;
5660   int *charbuf_end = charbuf + coding->charbuf_used;
5661   unsigned char *dst = coding->destination + coding->produced;
5662   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5663   int safe_room = MAX_MULTIBYTE_LENGTH;
5664   int produced_chars = 0;
5665   Lisp_Object attrs, charset_list;
5666   int ascii_compatible;
5667   int c;
5668
5669   CODING_GET_INFO (coding, attrs, charset_list);
5670   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5671
5672   while (charbuf < charbuf_end)
5673     {
5674       struct charset *charset;
5675       unsigned code;
5676
5677       ASSURE_DESTINATION (safe_room);
5678       c = *charbuf++;
5679       if (ascii_compatible && ASCII_CHAR_P (c))
5680         EMIT_ONE_ASCII_BYTE (c);
5681       else if (CHAR_BYTE8_P (c))
5682         {
5683           c = CHAR_TO_BYTE8 (c);
5684           EMIT_ONE_BYTE (c);
5685         }
5686       else
5687         {
5688           charset = char_charset (c, charset_list, &code);
5689           if (charset)
5690             {
5691               if (CHARSET_DIMENSION (charset) == 1)
5692                 EMIT_ONE_BYTE (code);
5693               else if (CHARSET_DIMENSION (charset) == 2)
5694                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
5695               else if (CHARSET_DIMENSION (charset) == 3)
5696                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
5697               else
5698                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
5699                                  (code >> 8) & 0xFF, code & 0xFF);
5700             }
5701           else
5702             {
5703               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5704                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5705               else
5706                 c = coding->default_char;
5707               EMIT_ONE_BYTE (c);
5708             }
5709         }
5710     }
5711
5712   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5713   coding->produced_char += produced_chars;
5714   coding->produced = dst - coding->destination;
5715   return 0;
5716 }
5717
5718 \f
5719 /*** 7. C library functions ***/
5720
5721 /* Setup coding context CODING from information about CODING_SYSTEM.
5722    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
5723    CODING_SYSTEM is invalid, signal an error.  */
5724
5725 void
5726 setup_coding_system (Lisp_Object coding_system, struct coding_system *coding)
5727 {
5728   Lisp_Object attrs;
5729   Lisp_Object eol_type;
5730   Lisp_Object coding_type;
5731   Lisp_Object val;
5732
5733   if (NILP (coding_system))
5734     coding_system = Qundecided;
5735
5736   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
5737
5738   attrs = CODING_ID_ATTRS (coding->id);
5739   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
5740
5741   coding->mode = 0;
5742   coding->head_ascii = -1;
5743   if (VECTORP (eol_type))
5744     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5745                             | CODING_REQUIRE_DETECTION_MASK);
5746   else if (! EQ (eol_type, Qunix))
5747     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5748                             | CODING_REQUIRE_ENCODING_MASK);
5749   else
5750     coding->common_flags = 0;
5751   if (! NILP (CODING_ATTR_POST_READ (attrs)))
5752     coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5753   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5754     coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5755   if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
5756     coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
5757
5758   val = CODING_ATTR_SAFE_CHARSETS (attrs);
5759   coding->max_charset_id = SCHARS (val) - 1;
5760   coding->safe_charsets = SDATA (val);
5761   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
5762   coding->carryover_bytes = 0;
5763
5764   coding_type = CODING_ATTR_TYPE (attrs);
5765   if (EQ (coding_type, Qundecided))
5766     {
5767       coding->detector = NULL;
5768       coding->decoder = decode_coding_raw_text;
5769       coding->encoder = encode_coding_raw_text;
5770       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5771     }
5772   else if (EQ (coding_type, Qiso_2022))
5773     {
5774       int i;
5775       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5776
5777       /* Invoke graphic register 0 to plane 0.  */
5778       CODING_ISO_INVOCATION (coding, 0) = 0;
5779       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
5780       CODING_ISO_INVOCATION (coding, 1)
5781         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
5782       /* Setup the initial status of designation.  */
5783       for (i = 0; i < 4; i++)
5784         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
5785       /* Not single shifting initially.  */
5786       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
5787       /* Beginning of buffer should also be regarded as bol. */
5788       CODING_ISO_BOL (coding) = 1;
5789       coding->detector = detect_coding_iso_2022;
5790       coding->decoder = decode_coding_iso_2022;
5791       coding->encoder = encode_coding_iso_2022;
5792       if (flags & CODING_ISO_FLAG_SAFE)
5793         coding->mode |= CODING_MODE_SAFE_ENCODING;
5794       coding->common_flags
5795         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5796             | CODING_REQUIRE_FLUSHING_MASK);
5797       if (flags & CODING_ISO_FLAG_COMPOSITION)
5798         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
5799       if (flags & CODING_ISO_FLAG_DESIGNATION)
5800         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
5801       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5802         {
5803           setup_iso_safe_charsets (attrs);
5804           val = CODING_ATTR_SAFE_CHARSETS (attrs);
5805           coding->max_charset_id = SCHARS (val) - 1;
5806           coding->safe_charsets = SDATA (val);
5807         }
5808       CODING_ISO_FLAGS (coding) = flags;
5809       CODING_ISO_CMP_STATUS (coding)->state = COMPOSING_NO;
5810       CODING_ISO_CMP_STATUS (coding)->method = COMPOSITION_NO;
5811       CODING_ISO_EXTSEGMENT_LEN (coding) = 0;
5812       CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
5813     }
5814   else if (EQ (coding_type, Qcharset))
5815     {
5816       coding->detector = detect_coding_charset;
5817       coding->decoder = decode_coding_charset;
5818       coding->encoder = encode_coding_charset;
5819       coding->common_flags
5820         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5821     }
5822   else if (EQ (coding_type, Qutf_8))
5823     {
5824       val = AREF (attrs, coding_attr_utf_bom);
5825       CODING_UTF_8_BOM (coding) = (CONSP (val) ? utf_detect_bom
5826                                    : EQ (val, Qt) ? utf_with_bom
5827                                    : utf_without_bom);
5828       coding->detector = detect_coding_utf_8;
5829       coding->decoder = decode_coding_utf_8;
5830       coding->encoder = encode_coding_utf_8;
5831       coding->common_flags
5832         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5833       if (CODING_UTF_8_BOM (coding) == utf_detect_bom)
5834         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5835     }
5836   else if (EQ (coding_type, Qutf_16))
5837     {
5838       val = AREF (attrs, coding_attr_utf_bom);
5839       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_detect_bom
5840                                     : EQ (val, Qt) ? utf_with_bom
5841                                     : utf_without_bom);
5842       val = AREF (attrs, coding_attr_utf_16_endian);
5843       CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
5844                                        : utf_16_little_endian);
5845       CODING_UTF_16_SURROGATE (coding) = 0;
5846       coding->detector = detect_coding_utf_16;
5847       coding->decoder = decode_coding_utf_16;
5848       coding->encoder = encode_coding_utf_16;
5849       coding->common_flags
5850         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5851       if (CODING_UTF_16_BOM (coding) == utf_detect_bom)
5852         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5853     }
5854   else if (EQ (coding_type, Qccl))
5855     {
5856       coding->detector = detect_coding_ccl;
5857       coding->decoder = decode_coding_ccl;
5858       coding->encoder = encode_coding_ccl;
5859       coding->common_flags
5860         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5861             | CODING_REQUIRE_FLUSHING_MASK);
5862     }
5863   else if (EQ (coding_type, Qemacs_mule))
5864     {
5865       coding->detector = detect_coding_emacs_mule;
5866       coding->decoder = decode_coding_emacs_mule;
5867       coding->encoder = encode_coding_emacs_mule;
5868       coding->common_flags
5869         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5870       coding->spec.emacs_mule.full_support = 1;
5871       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5872           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5873         {
5874           Lisp_Object tail, safe_charsets;
5875           int max_charset_id = 0;
5876
5877           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5878                tail = XCDR (tail))
5879             if (max_charset_id < XFASTINT (XCAR (tail)))
5880               max_charset_id = XFASTINT (XCAR (tail));
5881           safe_charsets = make_uninit_string (max_charset_id + 1);
5882           memset (SDATA (safe_charsets), 255, max_charset_id + 1);
5883           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5884                tail = XCDR (tail))
5885             SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
5886           coding->max_charset_id = max_charset_id;
5887           coding->safe_charsets = SDATA (safe_charsets);
5888           coding->spec.emacs_mule.full_support = 1;
5889         }
5890       coding->spec.emacs_mule.cmp_status.state = COMPOSING_NO;
5891       coding->spec.emacs_mule.cmp_status.method = COMPOSITION_NO;
5892     }
5893   else if (EQ (coding_type, Qshift_jis))
5894     {
5895       coding->detector = detect_coding_sjis;
5896       coding->decoder = decode_coding_sjis;
5897       coding->encoder = encode_coding_sjis;
5898       coding->common_flags
5899         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5900     }
5901   else if (EQ (coding_type, Qbig5))
5902     {
5903       coding->detector = detect_coding_big5;
5904       coding->decoder = decode_coding_big5;
5905       coding->encoder = encode_coding_big5;
5906       coding->common_flags
5907         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5908     }
5909   else                          /* EQ (coding_type, Qraw_text) */
5910     {
5911       coding->detector = NULL;
5912       coding->decoder = decode_coding_raw_text;
5913       coding->encoder = encode_coding_raw_text;
5914       if (! EQ (eol_type, Qunix))
5915         {
5916           coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5917           if (! VECTORP (eol_type))
5918             coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5919         }
5920
5921     }
5922
5923   return;
5924 }
5925
5926 /* Return a list of charsets supported by CODING.  */
5927
5928 Lisp_Object
5929 coding_charset_list (struct coding_system *coding)
5930 {
5931   Lisp_Object attrs, charset_list;
5932
5933   CODING_GET_INFO (coding, attrs, charset_list);
5934   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5935     {
5936       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5937
5938       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5939         charset_list = Viso_2022_charset_list;
5940     }
5941   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5942     {
5943       charset_list = Vemacs_mule_charset_list;
5944     }
5945   return charset_list;
5946 }
5947
5948
5949 /* Return a list of charsets supported by CODING-SYSTEM.  */
5950
5951 Lisp_Object
5952 coding_system_charset_list (Lisp_Object coding_system)
5953 {
5954   int id;
5955   Lisp_Object attrs, charset_list;
5956
5957   CHECK_CODING_SYSTEM_GET_ID (coding_system, id);
5958   attrs = CODING_ID_ATTRS (id);
5959
5960   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5961     {
5962       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5963
5964       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5965         charset_list = Viso_2022_charset_list;
5966       else
5967         charset_list = CODING_ATTR_CHARSET_LIST (attrs);
5968     }
5969   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5970     {
5971       charset_list = Vemacs_mule_charset_list;
5972     }
5973   else
5974     {
5975       charset_list = CODING_ATTR_CHARSET_LIST (attrs);
5976     }
5977   return charset_list;
5978 }
5979
5980
5981 /* Return raw-text or one of its subsidiaries that has the same
5982    eol_type as CODING-SYSTEM.  */
5983
5984 Lisp_Object
5985 raw_text_coding_system (Lisp_Object coding_system)
5986 {
5987   Lisp_Object spec, attrs;
5988   Lisp_Object eol_type, raw_text_eol_type;
5989
5990   if (NILP (coding_system))
5991     return Qraw_text;
5992   spec = CODING_SYSTEM_SPEC (coding_system);
5993   attrs = AREF (spec, 0);
5994
5995   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
5996     return coding_system;
5997
5998   eol_type = AREF (spec, 2);
5999   if (VECTORP (eol_type))
6000     return Qraw_text;
6001   spec = CODING_SYSTEM_SPEC (Qraw_text);
6002   raw_text_eol_type = AREF (spec, 2);
6003   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
6004           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
6005           : AREF (raw_text_eol_type, 2));
6006 }
6007
6008
6009 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
6010    does, return one of the subsidiary that has the same eol-spec as
6011    PARENT.  Otherwise, return CODING_SYSTEM.  If PARENT is nil,
6012    inherit end-of-line format from the system's setting
6013    (system_eol_type).  */
6014
6015 Lisp_Object
6016 coding_inherit_eol_type (Lisp_Object coding_system, Lisp_Object parent)
6017 {
6018   Lisp_Object spec, eol_type;
6019
6020   if (NILP (coding_system))
6021     coding_system = Qraw_text;
6022   spec = CODING_SYSTEM_SPEC (coding_system);
6023   eol_type = AREF (spec, 2);
6024   if (VECTORP (eol_type))
6025     {
6026       Lisp_Object parent_eol_type;
6027
6028       if (! NILP (parent))
6029         {
6030           Lisp_Object parent_spec;
6031
6032           parent_spec = CODING_SYSTEM_SPEC (parent);
6033           parent_eol_type = AREF (parent_spec, 2);
6034         }
6035       else
6036         parent_eol_type = system_eol_type;
6037       if (EQ (parent_eol_type, Qunix))
6038         coding_system = AREF (eol_type, 0);
6039       else if (EQ (parent_eol_type, Qdos))
6040         coding_system = AREF (eol_type, 1);
6041       else if (EQ (parent_eol_type, Qmac))
6042         coding_system = AREF (eol_type, 2);
6043     }
6044   return coding_system;
6045 }
6046
6047 /* Emacs has a mechanism to automatically detect a coding system if it
6048    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
6049    it's impossible to distinguish some coding systems accurately
6050    because they use the same range of codes.  So, at first, coding
6051    systems are categorized into 7, those are:
6052
6053    o coding-category-emacs-mule
6054
6055         The category for a coding system which has the same code range
6056         as Emacs' internal format.  Assigned the coding-system (Lisp
6057         symbol) `emacs-mule' by default.
6058
6059    o coding-category-sjis
6060
6061         The category for a coding system which has the same code range
6062         as SJIS.  Assigned the coding-system (Lisp
6063         symbol) `japanese-shift-jis' by default.
6064
6065    o coding-category-iso-7
6066
6067         The category for a coding system which has the same code range
6068         as ISO2022 of 7-bit environment.  This doesn't use any locking
6069         shift and single shift functions.  This can encode/decode all
6070         charsets.  Assigned the coding-system (Lisp symbol)
6071         `iso-2022-7bit' by default.
6072
6073    o coding-category-iso-7-tight
6074
6075         Same as coding-category-iso-7 except that this can
6076         encode/decode only the specified charsets.
6077
6078    o coding-category-iso-8-1
6079
6080         The category for a coding system which has the same code range
6081         as ISO2022 of 8-bit environment and graphic plane 1 used only
6082         for DIMENSION1 charset.  This doesn't use any locking shift
6083         and single shift functions.  Assigned the coding-system (Lisp
6084         symbol) `iso-latin-1' by default.
6085
6086    o coding-category-iso-8-2
6087
6088         The category for a coding system which has the same code range
6089         as ISO2022 of 8-bit environment and graphic plane 1 used only
6090         for DIMENSION2 charset.  This doesn't use any locking shift
6091         and single shift functions.  Assigned the coding-system (Lisp
6092         symbol) `japanese-iso-8bit' by default.
6093
6094    o coding-category-iso-7-else
6095
6096         The category for a coding system which has the same code range
6097         as ISO2022 of 7-bit environemnt but uses locking shift or
6098         single shift functions.  Assigned the coding-system (Lisp
6099         symbol) `iso-2022-7bit-lock' by default.
6100
6101    o coding-category-iso-8-else
6102
6103         The category for a coding system which has the same code range
6104         as ISO2022 of 8-bit environemnt but uses locking shift or
6105         single shift functions.  Assigned the coding-system (Lisp
6106         symbol) `iso-2022-8bit-ss2' by default.
6107
6108    o coding-category-big5
6109
6110         The category for a coding system which has the same code range
6111         as BIG5.  Assigned the coding-system (Lisp symbol)
6112         `cn-big5' by default.
6113
6114    o coding-category-utf-8
6115
6116         The category for a coding system which has the same code range
6117         as UTF-8 (cf. RFC3629).  Assigned the coding-system (Lisp
6118         symbol) `utf-8' by default.
6119
6120    o coding-category-utf-16-be
6121
6122         The category for a coding system in which a text has an
6123         Unicode signature (cf. Unicode Standard) in the order of BIG
6124         endian at the head.  Assigned the coding-system (Lisp symbol)
6125         `utf-16-be' by default.
6126
6127    o coding-category-utf-16-le
6128
6129         The category for a coding system in which a text has an
6130         Unicode signature (cf. Unicode Standard) in the order of
6131         LITTLE endian at the head.  Assigned the coding-system (Lisp
6132         symbol) `utf-16-le' by default.
6133
6134    o coding-category-ccl
6135
6136         The category for a coding system of which encoder/decoder is
6137         written in CCL programs.  The default value is nil, i.e., no
6138         coding system is assigned.
6139
6140    o coding-category-binary
6141
6142         The category for a coding system not categorized in any of the
6143         above.  Assigned the coding-system (Lisp symbol)
6144         `no-conversion' by default.
6145
6146    Each of them is a Lisp symbol and the value is an actual
6147    `coding-system's (this is also a Lisp symbol) assigned by a user.
6148    What Emacs does actually is to detect a category of coding system.
6149    Then, it uses a `coding-system' assigned to it.  If Emacs can't
6150    decide only one possible category, it selects a category of the
6151    highest priority.  Priorities of categories are also specified by a
6152    user in a Lisp variable `coding-category-list'.
6153
6154 */
6155
6156 #define EOL_SEEN_NONE   0
6157 #define EOL_SEEN_LF     1
6158 #define EOL_SEEN_CR     2
6159 #define EOL_SEEN_CRLF   4
6160
6161 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
6162    SOURCE is encoded.  If CATEGORY is one of
6163    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
6164    two-byte, else they are encoded by one-byte.
6165
6166    Return one of EOL_SEEN_XXX.  */
6167
6168 #define MAX_EOL_CHECK_COUNT 3
6169
6170 static int
6171 detect_eol (const unsigned char *source, EMACS_INT src_bytes, enum coding_category category)
6172 {
6173   const unsigned char *src = source, *src_end = src + src_bytes;
6174   unsigned char c;
6175   int total  = 0;
6176   int eol_seen = EOL_SEEN_NONE;
6177
6178   if ((1 << category) & CATEGORY_MASK_UTF_16)
6179     {
6180       int msb, lsb;
6181
6182       msb = category == (coding_category_utf_16_le
6183                          | coding_category_utf_16_le_nosig);
6184       lsb = 1 - msb;
6185
6186       while (src + 1 < src_end)
6187         {
6188           c = src[lsb];
6189           if (src[msb] == 0 && (c == '\n' || c == '\r'))
6190             {
6191               int this_eol;
6192
6193               if (c == '\n')
6194                 this_eol = EOL_SEEN_LF;
6195               else if (src + 3 >= src_end
6196                        || src[msb + 2] != 0
6197                        || src[lsb + 2] != '\n')
6198                 this_eol = EOL_SEEN_CR;
6199               else
6200                 {
6201                   this_eol = EOL_SEEN_CRLF;
6202                   src += 2;
6203                 }
6204
6205               if (eol_seen == EOL_SEEN_NONE)
6206                 /* This is the first end-of-line.  */
6207                 eol_seen = this_eol;
6208               else if (eol_seen != this_eol)
6209                 {
6210                   /* The found type is different from what found before.
6211                      Allow for stray ^M characters in DOS EOL files.  */
6212                   if (eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF
6213                       || eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR)
6214                     eol_seen = EOL_SEEN_CRLF;
6215                   else
6216                     {
6217                       eol_seen = EOL_SEEN_LF;
6218                       break;
6219                     }
6220                 }
6221               if (++total == MAX_EOL_CHECK_COUNT)
6222                 break;
6223             }
6224           src += 2;
6225         }
6226     }
6227   else
6228     {
6229       while (src < src_end)
6230         {
6231           c = *src++;
6232           if (c == '\n' || c == '\r')
6233             {
6234               int this_eol;
6235
6236               if (c == '\n')
6237                 this_eol = EOL_SEEN_LF;
6238               else if (src >= src_end || *src != '\n')
6239                 this_eol = EOL_SEEN_CR;
6240               else
6241                 this_eol = EOL_SEEN_CRLF, src++;
6242
6243               if (eol_seen == EOL_SEEN_NONE)
6244                 /* This is the first end-of-line.  */
6245                 eol_seen = this_eol;
6246               else if (eol_seen != this_eol)
6247                 {
6248                   /* The found type is different from what found before.
6249                      Allow for stray ^M characters in DOS EOL files.  */
6250                   if (eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF
6251                       || eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR)
6252                     eol_seen = EOL_SEEN_CRLF;
6253                   else
6254                     {
6255                       eol_seen = EOL_SEEN_LF;
6256                       break;
6257                     }
6258                 }
6259               if (++total == MAX_EOL_CHECK_COUNT)
6260                 break;
6261             }
6262         }
6263     }
6264   return eol_seen;
6265 }
6266
6267
6268 static Lisp_Object
6269 adjust_coding_eol_type (struct coding_system *coding, int eol_seen)
6270 {
6271   Lisp_Object eol_type;
6272
6273   eol_type = CODING_ID_EOL_TYPE (coding->id);
6274   if (eol_seen & EOL_SEEN_LF)
6275     {
6276       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
6277       eol_type = Qunix;
6278     }
6279   else if (eol_seen & EOL_SEEN_CRLF)
6280     {
6281       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
6282       eol_type = Qdos;
6283     }
6284   else if (eol_seen & EOL_SEEN_CR)
6285     {
6286       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
6287       eol_type = Qmac;
6288     }
6289   return eol_type;
6290 }
6291
6292 /* Detect how a text specified in CODING is encoded.  If a coding
6293    system is detected, update fields of CODING by the detected coding
6294    system.  */
6295
6296 void
6297 detect_coding (struct coding_system *coding)
6298 {
6299   const unsigned char *src, *src_end;
6300   int saved_mode = coding->mode;
6301
6302   coding->consumed = coding->consumed_char = 0;
6303   coding->produced = coding->produced_char = 0;
6304   coding_set_source (coding);
6305
6306   src_end = coding->source + coding->src_bytes;
6307   coding->head_ascii = 0;
6308
6309   /* If we have not yet decided the text encoding type, detect it
6310      now.  */
6311   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
6312     {
6313       int c, i;
6314       struct coding_detection_info detect_info;
6315       int null_byte_found = 0, eight_bit_found = 0;
6316
6317       detect_info.checked = detect_info.found = detect_info.rejected = 0;
6318       for (src = coding->source; src < src_end; src++)
6319         {
6320           c = *src;
6321           if (c & 0x80)
6322             {
6323               eight_bit_found = 1;
6324               if (null_byte_found)
6325                 break;
6326             }
6327           else if (c < 0x20)
6328             {
6329               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
6330                   && ! inhibit_iso_escape_detection
6331                   && ! detect_info.checked)
6332                 {
6333                   if (detect_coding_iso_2022 (coding, &detect_info))
6334                     {
6335                       /* We have scanned the whole data.  */
6336                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
6337                         {
6338                           /* We didn't find an 8-bit code.  We may
6339                              have found a null-byte, but it's very
6340                              rare that a binary file conforms to
6341                              ISO-2022.  */
6342                           src = src_end;
6343                           coding->head_ascii = src - coding->source;
6344                         }
6345                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
6346                       break;
6347                     }
6348                 }
6349               else if (! c && !inhibit_null_byte_detection)
6350                 {
6351                   null_byte_found = 1;
6352                   if (eight_bit_found)
6353                     break;
6354                 }
6355               if (! eight_bit_found)
6356                 coding->head_ascii++;
6357             }
6358           else if (! eight_bit_found)
6359             coding->head_ascii++;
6360         }
6361
6362       if (null_byte_found || eight_bit_found
6363           || coding->head_ascii < coding->src_bytes
6364           || detect_info.found)
6365         {
6366           enum coding_category category;
6367           struct coding_system *this;
6368
6369           if (coding->head_ascii == coding->src_bytes)
6370             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
6371             for (i = 0; i < coding_category_raw_text; i++)
6372               {
6373                 category = coding_priorities[i];
6374                 this = coding_categories + category;
6375                 if (detect_info.found & (1 << category))
6376                   break;
6377               }
6378           else
6379             {
6380               if (null_byte_found)
6381                 {
6382                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
6383                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
6384                 }
6385               for (i = 0; i < coding_category_raw_text; i++)
6386                 {
6387                   category = coding_priorities[i];
6388                   this = coding_categories + category;
6389                   if (this->id < 0)
6390                     {
6391                       /* No coding system of this category is defined.  */
6392                       detect_info.rejected |= (1 << category);
6393                     }
6394                   else if (category >= coding_category_raw_text)
6395                     continue;
6396                   else if (detect_info.checked & (1 << category))
6397                     {
6398                       if (detect_info.found & (1 << category))
6399                         break;
6400                     }
6401                   else if ((*(this->detector)) (coding, &detect_info)
6402                            && detect_info.found & (1 << category))
6403                     {
6404                       if (category == coding_category_utf_16_auto)
6405                         {
6406                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6407                             category = coding_category_utf_16_le;
6408                           else
6409                             category = coding_category_utf_16_be;
6410                         }
6411                       break;
6412                     }
6413                 }
6414             }
6415
6416           if (i < coding_category_raw_text)
6417             setup_coding_system (CODING_ID_NAME (this->id), coding);
6418           else if (null_byte_found)
6419             setup_coding_system (Qno_conversion, coding);
6420           else if ((detect_info.rejected & CATEGORY_MASK_ANY)
6421                    == CATEGORY_MASK_ANY)
6422             setup_coding_system (Qraw_text, coding);
6423           else if (detect_info.rejected)
6424             for (i = 0; i < coding_category_raw_text; i++)
6425               if (! (detect_info.rejected & (1 << coding_priorities[i])))
6426                 {
6427                   this = coding_categories + coding_priorities[i];
6428                   setup_coding_system (CODING_ID_NAME (this->id), coding);
6429                   break;
6430                 }
6431         }
6432     }
6433   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6434            == coding_category_utf_8_auto)
6435     {
6436       Lisp_Object coding_systems;
6437       struct coding_detection_info detect_info;
6438
6439       coding_systems
6440         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6441       detect_info.found = detect_info.rejected = 0;
6442       coding->head_ascii = 0;
6443       if (CONSP (coding_systems)
6444           && detect_coding_utf_8 (coding, &detect_info))
6445         {
6446           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
6447             setup_coding_system (XCAR (coding_systems), coding);
6448           else
6449             setup_coding_system (XCDR (coding_systems), coding);
6450         }
6451     }
6452   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6453            == coding_category_utf_16_auto)
6454     {
6455       Lisp_Object coding_systems;
6456       struct coding_detection_info detect_info;
6457
6458       coding_systems
6459         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6460       detect_info.found = detect_info.rejected = 0;
6461       coding->head_ascii = 0;
6462       if (CONSP (coding_systems)
6463           && detect_coding_utf_16 (coding, &detect_info))
6464         {
6465           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6466             setup_coding_system (XCAR (coding_systems), coding);
6467           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
6468             setup_coding_system (XCDR (coding_systems), coding);
6469         }
6470     }
6471   coding->mode = saved_mode;
6472 }
6473
6474
6475 static void
6476 decode_eol (struct coding_system *coding)
6477 {
6478   Lisp_Object eol_type;
6479   unsigned char *p, *pbeg, *pend;
6480
6481   eol_type = CODING_ID_EOL_TYPE (coding->id);
6482   if (EQ (eol_type, Qunix) || inhibit_eol_conversion)
6483     return;
6484
6485   if (NILP (coding->dst_object))
6486     pbeg = coding->destination;
6487   else
6488     pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
6489   pend = pbeg + coding->produced;
6490
6491   if (VECTORP (eol_type))
6492     {
6493       int eol_seen = EOL_SEEN_NONE;
6494
6495       for (p = pbeg; p < pend; p++)
6496         {
6497           if (*p == '\n')
6498             eol_seen |= EOL_SEEN_LF;
6499           else if (*p == '\r')
6500             {
6501               if (p + 1 < pend && *(p + 1) == '\n')
6502                 {
6503                   eol_seen |= EOL_SEEN_CRLF;
6504                   p++;
6505                 }
6506               else
6507                 eol_seen |= EOL_SEEN_CR;
6508             }
6509         }
6510       /* Handle DOS-style EOLs in a file with stray ^M characters.  */
6511       if ((eol_seen & EOL_SEEN_CRLF) != 0
6512           && (eol_seen & EOL_SEEN_CR) != 0
6513           && (eol_seen & EOL_SEEN_LF) == 0)
6514         eol_seen = EOL_SEEN_CRLF;
6515       else if (eol_seen != EOL_SEEN_NONE
6516           && eol_seen != EOL_SEEN_LF
6517           && eol_seen != EOL_SEEN_CRLF
6518           && eol_seen != EOL_SEEN_CR)
6519         eol_seen = EOL_SEEN_LF;
6520       if (eol_seen != EOL_SEEN_NONE)
6521         eol_type = adjust_coding_eol_type (coding, eol_seen);
6522     }
6523
6524   if (EQ (eol_type, Qmac))
6525     {
6526       for (p = pbeg; p < pend; p++)
6527         if (*p == '\r')
6528           *p = '\n';
6529     }
6530   else if (EQ (eol_type, Qdos))
6531     {
6532       int n = 0;
6533
6534       if (NILP (coding->dst_object))
6535         {
6536           /* Start deleting '\r' from the tail to minimize the memory
6537              movement.  */
6538           for (p = pend - 2; p >= pbeg; p--)
6539             if (*p == '\r')
6540               {
6541                 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
6542                 n++;
6543               }
6544         }
6545       else
6546         {
6547           int pos_byte = coding->dst_pos_byte;
6548           int pos = coding->dst_pos;
6549           int pos_end = pos + coding->produced_char - 1;
6550
6551           while (pos < pos_end)
6552             {
6553               p = BYTE_POS_ADDR (pos_byte);
6554               if (*p == '\r' && p[1] == '\n')
6555                 {
6556                   del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
6557                   n++;
6558                   pos_end--;
6559                 }
6560               pos++;
6561               if (coding->dst_multibyte)
6562                 pos_byte += BYTES_BY_CHAR_HEAD (*p);
6563               else
6564                 pos_byte++;
6565             }
6566         }
6567       coding->produced -= n;
6568       coding->produced_char -= n;
6569     }
6570 }
6571
6572
6573 /* Return a translation table (or list of them) from coding system
6574    attribute vector ATTRS for encoding (ENCODEP is nonzero) or
6575    decoding (ENCODEP is zero). */
6576
6577 static Lisp_Object
6578 get_translation_table (Lisp_Object attrs, int encodep, int *max_lookup)
6579 {
6580   Lisp_Object standard, translation_table;
6581   Lisp_Object val;
6582
6583   if (NILP (Venable_character_translation))
6584     {
6585       if (max_lookup)
6586         *max_lookup = 0;
6587       return Qnil;
6588     }
6589   if (encodep)
6590     translation_table = CODING_ATTR_ENCODE_TBL (attrs),
6591       standard = Vstandard_translation_table_for_encode;
6592   else
6593     translation_table = CODING_ATTR_DECODE_TBL (attrs),
6594       standard = Vstandard_translation_table_for_decode;
6595   if (NILP (translation_table))
6596     translation_table = standard;
6597   else
6598     {
6599       if (SYMBOLP (translation_table))
6600         translation_table = Fget (translation_table, Qtranslation_table);
6601       else if (CONSP (translation_table))
6602         {
6603           translation_table = Fcopy_sequence (translation_table);
6604           for (val = translation_table; CONSP (val); val = XCDR (val))
6605             if (SYMBOLP (XCAR (val)))
6606               XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
6607         }
6608       if (CHAR_TABLE_P (standard))
6609         {
6610           if (CONSP (translation_table))
6611             translation_table = nconc2 (translation_table,
6612                                         Fcons (standard, Qnil));
6613           else
6614             translation_table = Fcons (translation_table,
6615                                        Fcons (standard, Qnil));
6616         }
6617     }
6618
6619   if (max_lookup)
6620     {
6621       *max_lookup = 1;
6622       if (CHAR_TABLE_P (translation_table)
6623           && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
6624         {
6625           val = XCHAR_TABLE (translation_table)->extras[1];
6626           if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6627             *max_lookup = XFASTINT (val);
6628         }
6629       else if (CONSP (translation_table))
6630         {
6631           Lisp_Object tail, val;
6632
6633           for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
6634             if (CHAR_TABLE_P (XCAR (tail))
6635                 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
6636               {
6637                 val = XCHAR_TABLE (XCAR (tail))->extras[1];
6638                 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6639                   *max_lookup = XFASTINT (val);
6640               }
6641         }
6642     }
6643   return translation_table;
6644 }
6645
6646 #define LOOKUP_TRANSLATION_TABLE(table, c, trans)               \
6647   do {                                                          \
6648     trans = Qnil;                                               \
6649     if (CHAR_TABLE_P (table))                                   \
6650       {                                                         \
6651         trans = CHAR_TABLE_REF (table, c);                      \
6652         if (CHARACTERP (trans))                                 \
6653           c = XFASTINT (trans), trans = Qnil;                   \
6654       }                                                         \
6655     else if (CONSP (table))                                     \
6656       {                                                         \
6657         Lisp_Object tail;                                       \
6658                                                                 \
6659         for (tail = table; CONSP (tail); tail = XCDR (tail))    \
6660           if (CHAR_TABLE_P (XCAR (tail)))                       \
6661             {                                                   \
6662               trans = CHAR_TABLE_REF (XCAR (tail), c);          \
6663               if (CHARACTERP (trans))                           \
6664                 c = XFASTINT (trans), trans = Qnil;             \
6665               else if (! NILP (trans))                          \
6666                 break;                                          \
6667             }                                                   \
6668       }                                                         \
6669   } while (0)
6670
6671
6672 /* Return a translation of character(s) at BUF according to TRANS.
6673    TRANS is TO-CHAR or ((FROM .  TO) ...) where
6674    FROM = [FROM-CHAR ...], TO is TO-CHAR or [TO-CHAR ...].
6675    The return value is TO-CHAR or ([FROM-CHAR ...] . TO) if a
6676    translation is found, and Qnil if not found..
6677    If BUF is too short to lookup characters in FROM, return Qt.  */
6678
6679 static Lisp_Object
6680 get_translation (Lisp_Object trans, int *buf, int *buf_end)
6681 {
6682
6683   if (INTEGERP (trans))
6684     return trans;
6685   for (; CONSP (trans); trans = XCDR (trans))
6686     {
6687       Lisp_Object val = XCAR (trans);
6688       Lisp_Object from = XCAR (val);
6689       int len = ASIZE (from);
6690       int i;
6691
6692       for (i = 0; i < len; i++)
6693         {
6694           if (buf + i == buf_end)
6695             return Qt;
6696           if (XINT (AREF (from, i)) != buf[i])
6697             break;
6698         }
6699       if (i == len)
6700         return val;
6701     }
6702   return Qnil;
6703 }
6704
6705
6706 static int
6707 produce_chars (struct coding_system *coding, Lisp_Object translation_table, int last_block)
6708 {
6709   unsigned char *dst = coding->destination + coding->produced;
6710   unsigned char *dst_end = coding->destination + coding->dst_bytes;
6711   EMACS_INT produced;
6712   EMACS_INT produced_chars = 0;
6713   int carryover = 0;
6714
6715   if (! coding->chars_at_source)
6716     {
6717       /* Source characters are in coding->charbuf.  */
6718       int *buf = coding->charbuf;
6719       int *buf_end = buf + coding->charbuf_used;
6720
6721       if (EQ (coding->src_object, coding->dst_object))
6722         {
6723           coding_set_source (coding);
6724           dst_end = ((unsigned char *) coding->source) + coding->consumed;
6725         }
6726
6727       while (buf < buf_end)
6728         {
6729           int c = *buf, i;
6730
6731           if (c >= 0)
6732             {
6733               int from_nchars = 1, to_nchars = 1;
6734               Lisp_Object trans = Qnil;
6735
6736               LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6737               if (! NILP (trans))
6738                 {
6739                   trans = get_translation (trans, buf, buf_end);
6740                   if (INTEGERP (trans))
6741                     c = XINT (trans);
6742                   else if (CONSP (trans))
6743                     {
6744                       from_nchars = ASIZE (XCAR (trans));
6745                       trans = XCDR (trans);
6746                       if (INTEGERP (trans))
6747                         c = XINT (trans);
6748                       else
6749                         {
6750                           to_nchars = ASIZE (trans);
6751                           c = XINT (AREF (trans, 0));
6752                         }
6753                     }
6754                   else if (EQ (trans, Qt) && ! last_block)
6755                     break;
6756                 }
6757
6758               if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
6759                 {
6760                   dst = alloc_destination (coding,
6761                                            buf_end - buf
6762                                            + MAX_MULTIBYTE_LENGTH * to_nchars,
6763                                            dst);
6764                   if (EQ (coding->src_object, coding->dst_object))
6765                     {
6766                       coding_set_source (coding);
6767                       dst_end = (((unsigned char *) coding->source)
6768                                  + coding->consumed);
6769                     }
6770                   else
6771                     dst_end = coding->destination + coding->dst_bytes;
6772                 }
6773
6774               for (i = 0; i < to_nchars; i++)
6775                 {
6776                   if (i > 0)
6777                     c = XINT (AREF (trans, i));
6778                   if (coding->dst_multibyte
6779                       || ! CHAR_BYTE8_P (c))
6780                     CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
6781                   else
6782                     *dst++ = CHAR_TO_BYTE8 (c);
6783                 }
6784               produced_chars += to_nchars;
6785               buf += from_nchars;
6786             }
6787           else
6788             /* This is an annotation datum.  (-C) is the length.  */
6789             buf += -c;
6790         }
6791       carryover = buf_end - buf;
6792     }
6793   else
6794     {
6795       /* Source characters are at coding->source.  */
6796       const unsigned char *src = coding->source;
6797       const unsigned char *src_end = src + coding->consumed;
6798
6799       if (EQ (coding->dst_object, coding->src_object))
6800         dst_end = (unsigned char *) src;
6801       if (coding->src_multibyte != coding->dst_multibyte)
6802         {
6803           if (coding->src_multibyte)
6804             {
6805               int multibytep = 1;
6806               EMACS_INT consumed_chars = 0;
6807
6808               while (1)
6809                 {
6810                   const unsigned char *src_base = src;
6811                   int c;
6812
6813                   ONE_MORE_BYTE (c);
6814                   if (dst == dst_end)
6815                     {
6816                       if (EQ (coding->src_object, coding->dst_object))
6817                         dst_end = (unsigned char *) src;
6818                       if (dst == dst_end)
6819                         {
6820                           EMACS_INT offset = src - coding->source;
6821
6822                           dst = alloc_destination (coding, src_end - src + 1,
6823                                                    dst);
6824                           dst_end = coding->destination + coding->dst_bytes;
6825                           coding_set_source (coding);
6826                           src = coding->source + offset;
6827                           src_end = coding->source + coding->src_bytes;
6828                           if (EQ (coding->src_object, coding->dst_object))
6829                             dst_end = (unsigned char *) src;
6830                         }
6831                     }
6832                   *dst++ = c;
6833                   produced_chars++;
6834                 }
6835             no_more_source:
6836               ;
6837             }
6838           else
6839             while (src < src_end)
6840               {
6841                 int multibytep = 1;
6842                 int c = *src++;
6843
6844                 if (dst >= dst_end - 1)
6845                   {
6846                     if (EQ (coding->src_object, coding->dst_object))
6847                       dst_end = (unsigned char *) src;
6848                     if (dst >= dst_end - 1)
6849                       {
6850                         EMACS_INT offset = src - coding->source;
6851                         EMACS_INT more_bytes;
6852
6853                         if (EQ (coding->src_object, coding->dst_object))
6854                           more_bytes = ((src_end - src) / 2) + 2;
6855                         else
6856                           more_bytes = src_end - src + 2;
6857                         dst = alloc_destination (coding, more_bytes, dst);
6858                         dst_end = coding->destination + coding->dst_bytes;
6859                         coding_set_source (coding);
6860                         src = coding->source + offset;
6861                         src_end = coding->source + coding->src_bytes;
6862                         if (EQ (coding->src_object, coding->dst_object))
6863                           dst_end = (unsigned char *) src;
6864                       }
6865                   }
6866                 EMIT_ONE_BYTE (c);
6867               }
6868         }
6869       else
6870         {
6871           if (!EQ (coding->src_object, coding->dst_object))
6872             {
6873               EMACS_INT require = coding->src_bytes - coding->dst_bytes;
6874
6875               if (require > 0)
6876                 {
6877                   EMACS_INT offset = src - coding->source;
6878
6879                   dst = alloc_destination (coding, require, dst);
6880                   coding_set_source (coding);
6881                   src = coding->source + offset;
6882                   src_end = coding->source + coding->src_bytes;
6883                 }
6884             }
6885           produced_chars = coding->consumed_char;
6886           while (src < src_end)
6887             *dst++ = *src++;
6888         }
6889     }
6890
6891   produced = dst - (coding->destination + coding->produced);
6892   if (BUFFERP (coding->dst_object) && produced_chars > 0)
6893     insert_from_gap (produced_chars, produced);
6894   coding->produced += produced;
6895   coding->produced_char += produced_chars;
6896   return carryover;
6897 }
6898
6899 /* Compose text in CODING->object according to the annotation data at
6900    CHARBUF.  CHARBUF is an array:
6901      [ -LENGTH ANNOTATION_MASK NCHARS NBYTES METHOD [ COMPONENTS... ] ]
6902  */
6903
6904 static INLINE void
6905 produce_composition (struct coding_system *coding, int *charbuf, EMACS_INT pos)
6906 {
6907   int len;
6908   EMACS_INT to;
6909   enum composition_method method;
6910   Lisp_Object components;
6911
6912   len = -charbuf[0] - MAX_ANNOTATION_LENGTH;
6913   to = pos + charbuf[2];
6914   method = (enum composition_method) (charbuf[4]);
6915
6916   if (method == COMPOSITION_RELATIVE)
6917     components = Qnil;
6918   else
6919     {
6920       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
6921       int i, j;
6922
6923       if (method == COMPOSITION_WITH_RULE)
6924         len = charbuf[2] * 3 - 2;
6925       charbuf += MAX_ANNOTATION_LENGTH;
6926       /* charbuf = [ CHRA ... CHAR] or [ CHAR -2 RULE ... CHAR ] */
6927       for (i = j = 0; i < len && charbuf[i] != -1; i++, j++)
6928         {
6929           if (charbuf[i] >= 0)
6930             args[j] = make_number (charbuf[i]);
6931           else
6932             {
6933               i++;
6934               args[j] = make_number (charbuf[i] % 0x100);
6935             }
6936         }
6937       components = (i == j ? Fstring (j, args) : Fvector (j, args));
6938     }
6939   compose_text (pos, to, components, Qnil, coding->dst_object);
6940 }
6941
6942
6943 /* Put `charset' property on text in CODING->object according to
6944    the annotation data at CHARBUF.  CHARBUF is an array:
6945      [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
6946  */
6947
6948 static INLINE void
6949 produce_charset (struct coding_system *coding, int *charbuf, EMACS_INT pos)
6950 {
6951   EMACS_INT from = pos - charbuf[2];
6952   struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
6953
6954   Fput_text_property (make_number (from), make_number (pos),
6955                       Qcharset, CHARSET_NAME (charset),
6956                       coding->dst_object);
6957 }
6958
6959
6960 #define CHARBUF_SIZE 0x4000
6961
6962 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
6963   do {                                                                  \
6964     int size = CHARBUF_SIZE;                                            \
6965                                                                         \
6966     coding->charbuf = NULL;                                             \
6967     while (size > 1024)                                                 \
6968       {                                                                 \
6969         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
6970         if (coding->charbuf)                                            \
6971           break;                                                        \
6972         size >>= 1;                                                     \
6973       }                                                                 \
6974     if (! coding->charbuf)                                              \
6975       {                                                                 \
6976         record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
6977         return coding->result;                                          \
6978       }                                                                 \
6979     coding->charbuf_size = size;                                        \
6980   } while (0)
6981
6982
6983 static void
6984 produce_annotation (struct coding_system *coding, EMACS_INT pos)
6985 {
6986   int *charbuf = coding->charbuf;
6987   int *charbuf_end = charbuf + coding->charbuf_used;
6988
6989   if (NILP (coding->dst_object))
6990     return;
6991
6992   while (charbuf < charbuf_end)
6993     {
6994       if (*charbuf >= 0)
6995         pos++, charbuf++;
6996       else
6997         {
6998           int len = -*charbuf;
6999
7000           if (len > 2)
7001             switch (charbuf[1])
7002               {
7003               case CODING_ANNOTATE_COMPOSITION_MASK:
7004                 produce_composition (coding, charbuf, pos);
7005                 break;
7006               case CODING_ANNOTATE_CHARSET_MASK:
7007                 produce_charset (coding, charbuf, pos);
7008                 break;
7009               }
7010           charbuf += len;
7011         }
7012     }
7013 }
7014
7015 /* Decode the data at CODING->src_object into CODING->dst_object.
7016    CODING->src_object is a buffer, a string, or nil.
7017    CODING->dst_object is a buffer.
7018
7019    If CODING->src_object is a buffer, it must be the current buffer.
7020    In this case, if CODING->src_pos is positive, it is a position of
7021    the source text in the buffer, otherwise, the source text is in the
7022    gap area of the buffer, and CODING->src_pos specifies the offset of
7023    the text from GPT (which must be the same as PT).  If this is the
7024    same buffer as CODING->dst_object, CODING->src_pos must be
7025    negative.
7026
7027    If CODING->src_object is a string, CODING->src_pos is an index to
7028    that string.
7029
7030    If CODING->src_object is nil, CODING->source must already point to
7031    the non-relocatable memory area.  In this case, CODING->src_pos is
7032    an offset from CODING->source.
7033
7034    The decoded data is inserted at the current point of the buffer
7035    CODING->dst_object.
7036 */
7037
7038 static int
7039 decode_coding (struct coding_system *coding)
7040 {
7041   Lisp_Object attrs;
7042   Lisp_Object undo_list;
7043   Lisp_Object translation_table;
7044   struct ccl_spec cclspec;
7045   int carryover;
7046   int i;
7047
7048   if (BUFFERP (coding->src_object)
7049       && coding->src_pos > 0
7050       && coding->src_pos < GPT
7051       && coding->src_pos + coding->src_chars > GPT)
7052     move_gap_both (coding->src_pos, coding->src_pos_byte);
7053
7054   undo_list = Qt;
7055   if (BUFFERP (coding->dst_object))
7056     {
7057       if (current_buffer != XBUFFER (coding->dst_object))
7058         set_buffer_internal (XBUFFER (coding->dst_object));
7059       if (GPT != PT)
7060         move_gap_both (PT, PT_BYTE);
7061       undo_list = current_buffer->undo_list;
7062       current_buffer->undo_list = Qt;
7063     }
7064
7065   coding->consumed = coding->consumed_char = 0;
7066   coding->produced = coding->produced_char = 0;
7067   coding->chars_at_source = 0;
7068   record_conversion_result (coding, CODING_RESULT_SUCCESS);
7069   coding->errors = 0;
7070
7071   ALLOC_CONVERSION_WORK_AREA (coding);
7072
7073   attrs = CODING_ID_ATTRS (coding->id);
7074   translation_table = get_translation_table (attrs, 0, NULL);
7075
7076   carryover = 0;
7077   if (coding->decoder == decode_coding_ccl)
7078     {
7079       coding->spec.ccl = &cclspec;
7080       setup_ccl_program (&cclspec.ccl, CODING_CCL_DECODER (coding));
7081     }
7082   do
7083     {
7084       EMACS_INT pos = coding->dst_pos + coding->produced_char;
7085
7086       coding_set_source (coding);
7087       coding->annotated = 0;
7088       coding->charbuf_used = carryover;
7089       (*(coding->decoder)) (coding);
7090       coding_set_destination (coding);
7091       carryover = produce_chars (coding, translation_table, 0);
7092       if (coding->annotated)
7093         produce_annotation (coding, pos);
7094       for (i = 0; i < carryover; i++)
7095         coding->charbuf[i]
7096           = coding->charbuf[coding->charbuf_used - carryover + i];
7097     }
7098   while (coding->result == CODING_RESULT_INSUFFICIENT_DST
7099          || (coding->consumed < coding->src_bytes
7100              && (coding->result == CODING_RESULT_SUCCESS
7101                  || coding->result == CODING_RESULT_INVALID_SRC)));
7102
7103   if (carryover > 0)
7104     {
7105       coding_set_destination (coding);
7106       coding->charbuf_used = carryover;
7107       produce_chars (coding, translation_table, 1);
7108     }
7109
7110   coding->carryover_bytes = 0;
7111   if (coding->consumed < coding->src_bytes)
7112     {
7113       int nbytes = coding->src_bytes - coding->consumed;
7114       const unsigned char *src;
7115
7116       coding_set_source (coding);
7117       coding_set_destination (coding);
7118       src = coding->source + coding->consumed;
7119
7120       if (coding->mode & CODING_MODE_LAST_BLOCK)
7121         {
7122           /* Flush out unprocessed data as binary chars.  We are sure
7123              that the number of data is less than the size of
7124              coding->charbuf.  */
7125           coding->charbuf_used = 0;
7126           coding->chars_at_source = 0;
7127
7128           while (nbytes-- > 0)
7129             {
7130               int c = *src++;
7131
7132               if (c & 0x80)
7133                 c = BYTE8_TO_CHAR (c);
7134               coding->charbuf[coding->charbuf_used++] = c;
7135             }
7136           produce_chars (coding, Qnil, 1);
7137         }
7138       else
7139         {
7140           /* Record unprocessed bytes in coding->carryover.  We are
7141              sure that the number of data is less than the size of
7142              coding->carryover.  */
7143           unsigned char *p = coding->carryover;
7144
7145           if (nbytes > sizeof coding->carryover)
7146             nbytes = sizeof coding->carryover;
7147           coding->carryover_bytes = nbytes;
7148           while (nbytes-- > 0)
7149             *p++ = *src++;
7150         }
7151       coding->consumed = coding->src_bytes;
7152     }
7153
7154   if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix)
7155       && !inhibit_eol_conversion)
7156     decode_eol (coding);
7157   if (BUFFERP (coding->dst_object))
7158     {
7159       current_buffer->undo_list = undo_list;
7160       record_insert (coding->dst_pos, coding->produced_char);
7161     }
7162   return coding->result;
7163 }
7164
7165
7166 /* Extract an annotation datum from a composition starting at POS and
7167    ending before LIMIT of CODING->src_object (buffer or string), store
7168    the data in BUF, set *STOP to a starting position of the next
7169    composition (if any) or to LIMIT, and return the address of the
7170    next element of BUF.
7171
7172    If such an annotation is not found, set *STOP to a starting
7173    position of a composition after POS (if any) or to LIMIT, and
7174    return BUF.  */
7175
7176 static INLINE int *
7177 handle_composition_annotation (EMACS_INT pos, EMACS_INT limit, struct coding_system *coding, int *buf, EMACS_INT *stop)
7178 {
7179   EMACS_INT start, end;
7180   Lisp_Object prop;
7181
7182   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
7183       || end > limit)
7184     *stop = limit;
7185   else if (start > pos)
7186     *stop = start;
7187   else
7188     {
7189       if (start == pos)
7190         {
7191           /* We found a composition.  Store the corresponding
7192              annotation data in BUF.  */
7193           int *head = buf;
7194           enum composition_method method = COMPOSITION_METHOD (prop);
7195           int nchars = COMPOSITION_LENGTH (prop);
7196
7197           ADD_COMPOSITION_DATA (buf, nchars, 0, method);
7198           if (method != COMPOSITION_RELATIVE)
7199             {
7200               Lisp_Object components;
7201               int len, i, i_byte;
7202
7203               components = COMPOSITION_COMPONENTS (prop);
7204               if (VECTORP (components))
7205                 {
7206                   len = XVECTOR (components)->size;
7207                   for (i = 0; i < len; i++)
7208                     *buf++ = XINT (AREF (components, i));
7209                 }
7210               else if (STRINGP (components))
7211                 {
7212                   len = SCHARS (components);
7213                   i = i_byte = 0;
7214                   while (i < len)
7215                     {
7216                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
7217                       buf++;
7218                     }
7219                 }
7220               else if (INTEGERP (components))
7221                 {
7222                   len = 1;
7223                   *buf++ = XINT (components);
7224                 }
7225               else if (CONSP (components))
7226                 {
7227                   for (len = 0; CONSP (components);
7228                        len++, components = XCDR (components))
7229                     *buf++ = XINT (XCAR (components));
7230                 }
7231               else
7232                 abort ();
7233               *head -= len;
7234             }
7235         }
7236
7237       if (find_composition (end, limit, &start, &end, &prop,
7238                             coding->src_object)
7239           && end <= limit)
7240         *stop = start;
7241       else
7242         *stop = limit;
7243     }
7244   return buf;
7245 }
7246
7247
7248 /* Extract an annotation datum from a text property `charset' at POS of
7249    CODING->src_object (buffer of string), store the data in BUF, set
7250    *STOP to the position where the value of `charset' property changes
7251    (limiting by LIMIT), and return the address of the next element of
7252    BUF.
7253
7254    If the property value is nil, set *STOP to the position where the
7255    property value is non-nil (limiting by LIMIT), and return BUF.  */
7256
7257 static INLINE int *
7258 handle_charset_annotation (EMACS_INT pos, EMACS_INT limit, struct coding_system *coding, int *buf, EMACS_INT *stop)
7259 {
7260   Lisp_Object val, next;
7261   int id;
7262
7263   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
7264   if (! NILP (val) && CHARSETP (val))
7265     id = XINT (CHARSET_SYMBOL_ID (val));
7266   else
7267     id = -1;
7268   ADD_CHARSET_DATA (buf, 0, id);
7269   next = Fnext_single_property_change (make_number (pos), Qcharset,
7270                                        coding->src_object,
7271                                        make_number (limit));
7272   *stop = XINT (next);
7273   return buf;
7274 }
7275
7276
7277 static void
7278 consume_chars (struct coding_system *coding, Lisp_Object translation_table, int max_lookup)
7279 {
7280   int *buf = coding->charbuf;
7281   int *buf_end = coding->charbuf + coding->charbuf_size;
7282   const unsigned char *src = coding->source + coding->consumed;
7283   const unsigned char *src_end = coding->source + coding->src_bytes;
7284   EMACS_INT pos = coding->src_pos + coding->consumed_char;
7285   EMACS_INT end_pos = coding->src_pos + coding->src_chars;
7286   int multibytep = coding->src_multibyte;
7287   Lisp_Object eol_type;
7288   int c;
7289   EMACS_INT stop, stop_composition, stop_charset;
7290   int *lookup_buf = NULL;
7291
7292   if (! NILP (translation_table))
7293     lookup_buf = alloca (sizeof (int) * max_lookup);
7294
7295   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
7296   if (VECTORP (eol_type))
7297     eol_type = Qunix;
7298
7299   /* Note: composition handling is not yet implemented.  */
7300   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7301
7302   if (NILP (coding->src_object))
7303     stop = stop_composition = stop_charset = end_pos;
7304   else
7305     {
7306       if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
7307         stop = stop_composition = pos;
7308       else
7309         stop = stop_composition = end_pos;
7310       if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
7311         stop = stop_charset = pos;
7312       else
7313         stop_charset = end_pos;
7314     }
7315
7316   /* Compensate for CRLF and conversion.  */
7317   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
7318   while (buf < buf_end)
7319     {
7320       Lisp_Object trans;
7321
7322       if (pos == stop)
7323         {
7324           if (pos == end_pos)
7325             break;
7326           if (pos == stop_composition)
7327             buf = handle_composition_annotation (pos, end_pos, coding,
7328                                                  buf, &stop_composition);
7329           if (pos == stop_charset)
7330             buf = handle_charset_annotation (pos, end_pos, coding,
7331                                              buf, &stop_charset);
7332           stop = (stop_composition < stop_charset
7333                   ? stop_composition : stop_charset);
7334         }
7335
7336       if (! multibytep)
7337         {
7338           EMACS_INT bytes;
7339
7340           if (coding->encoder == encode_coding_raw_text
7341               || coding->encoder == encode_coding_ccl)
7342             c = *src++, pos++;
7343           else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
7344             c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
7345           else
7346             c = BYTE8_TO_CHAR (*src), src++, pos++;
7347         }
7348       else
7349         c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
7350       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
7351         c = '\n';
7352       if (! EQ (eol_type, Qunix))
7353         {
7354           if (c == '\n')
7355             {
7356               if (EQ (eol_type, Qdos))
7357                 *buf++ = '\r';
7358               else
7359                 c = '\r';
7360             }
7361         }
7362
7363       trans = Qnil;
7364       LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
7365       if (NILP (trans))
7366         *buf++ = c;
7367       else
7368         {
7369           int from_nchars = 1, to_nchars = 1;
7370           int *lookup_buf_end;
7371           const unsigned char *p = src;
7372           int i;
7373
7374           lookup_buf[0] = c;
7375           for (i = 1; i < max_lookup && p < src_end; i++)
7376             lookup_buf[i] = STRING_CHAR_ADVANCE (p);
7377           lookup_buf_end = lookup_buf + i;
7378           trans = get_translation (trans, lookup_buf, lookup_buf_end);
7379           if (INTEGERP (trans))
7380             c = XINT (trans);
7381           else if (CONSP (trans))
7382             {
7383               from_nchars = ASIZE (XCAR (trans));
7384               trans = XCDR (trans);
7385               if (INTEGERP (trans))
7386                 c = XINT (trans);
7387               else
7388                 {
7389                   to_nchars = ASIZE (trans);
7390                   if (buf + to_nchars > buf_end)
7391                     break;
7392                   c = XINT (AREF (trans, 0));
7393                 }
7394             }
7395           else
7396             break;
7397           *buf++ = c;
7398           for (i = 1; i < to_nchars; i++)
7399             *buf++ = XINT (AREF (trans, i));
7400           for (i = 1; i < from_nchars; i++, pos++)
7401             src += MULTIBYTE_LENGTH_NO_CHECK (src);
7402         }
7403     }
7404
7405   coding->consumed = src - coding->source;
7406   coding->consumed_char = pos - coding->src_pos;
7407   coding->charbuf_used = buf - coding->charbuf;
7408   coding->chars_at_source = 0;
7409 }
7410
7411
7412 /* Encode the text at CODING->src_object into CODING->dst_object.
7413    CODING->src_object is a buffer or a string.
7414    CODING->dst_object is a buffer or nil.
7415
7416    If CODING->src_object is a buffer, it must be the current buffer.
7417    In this case, if CODING->src_pos is positive, it is a position of
7418    the source text in the buffer, otherwise. the source text is in the
7419    gap area of the buffer, and coding->src_pos specifies the offset of
7420    the text from GPT (which must be the same as PT).  If this is the
7421    same buffer as CODING->dst_object, CODING->src_pos must be
7422    negative and CODING should not have `pre-write-conversion'.
7423
7424    If CODING->src_object is a string, CODING should not have
7425    `pre-write-conversion'.
7426
7427    If CODING->dst_object is a buffer, the encoded data is inserted at
7428    the current point of that buffer.
7429
7430    If CODING->dst_object is nil, the encoded data is placed at the
7431    memory area specified by CODING->destination.  */
7432
7433 static int
7434 encode_coding (struct coding_system *coding)
7435 {
7436   Lisp_Object attrs;
7437   Lisp_Object translation_table;
7438   int max_lookup;
7439   struct ccl_spec cclspec;
7440
7441   attrs = CODING_ID_ATTRS (coding->id);
7442   if (coding->encoder == encode_coding_raw_text)
7443     translation_table = Qnil, max_lookup = 0;
7444   else
7445     translation_table = get_translation_table (attrs, 1, &max_lookup);
7446
7447   if (BUFFERP (coding->dst_object))
7448     {
7449       set_buffer_internal (XBUFFER (coding->dst_object));
7450       coding->dst_multibyte
7451         = ! NILP (current_buffer->enable_multibyte_characters);
7452     }
7453
7454   coding->consumed = coding->consumed_char = 0;
7455   coding->produced = coding->produced_char = 0;
7456   record_conversion_result (coding, CODING_RESULT_SUCCESS);
7457   coding->errors = 0;
7458
7459   ALLOC_CONVERSION_WORK_AREA (coding);
7460
7461   if (coding->encoder == encode_coding_ccl)
7462     {
7463       coding->spec.ccl = &cclspec;
7464       setup_ccl_program (&cclspec.ccl, CODING_CCL_ENCODER (coding));
7465     }
7466   do {
7467     coding_set_source (coding);
7468     consume_chars (coding, translation_table, max_lookup);
7469     coding_set_destination (coding);
7470     (*(coding->encoder)) (coding);
7471   } while (coding->consumed_char < coding->src_chars);
7472
7473   if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
7474     insert_from_gap (coding->produced_char, coding->produced);
7475
7476   return (coding->result);
7477 }
7478
7479
7480 /* Name (or base name) of work buffer for code conversion.  */
7481 static Lisp_Object Vcode_conversion_workbuf_name;
7482
7483 /* A working buffer used by the top level conversion.  Once it is
7484    created, it is never destroyed.  It has the name
7485    Vcode_conversion_workbuf_name.  The other working buffers are
7486    destroyed after the use is finished, and their names are modified
7487    versions of Vcode_conversion_workbuf_name.  */
7488 static Lisp_Object Vcode_conversion_reused_workbuf;
7489
7490 /* 1 iff Vcode_conversion_reused_workbuf is already in use.  */
7491 static int reused_workbuf_in_use;
7492
7493
7494 /* Return a working buffer of code convesion.  MULTIBYTE specifies the
7495    multibyteness of returning buffer.  */
7496
7497 static Lisp_Object
7498 make_conversion_work_buffer (int multibyte)
7499 {
7500   Lisp_Object name, workbuf;
7501   struct buffer *current;
7502
7503   if (reused_workbuf_in_use++)
7504     {
7505       name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
7506       workbuf = Fget_buffer_create (name);
7507     }
7508   else
7509     {
7510       if (NILP (Fbuffer_live_p (Vcode_conversion_reused_workbuf)))
7511         Vcode_conversion_reused_workbuf
7512           = Fget_buffer_create (Vcode_conversion_workbuf_name);
7513       workbuf = Vcode_conversion_reused_workbuf;
7514     }
7515   current = current_buffer;
7516   set_buffer_internal (XBUFFER (workbuf));
7517   /* We can't allow modification hooks to run in the work buffer.  For
7518      instance, directory_files_internal assumes that file decoding
7519      doesn't compile new regexps.  */
7520   Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt);
7521   Ferase_buffer ();
7522   current_buffer->undo_list = Qt;
7523   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
7524   set_buffer_internal (current);
7525   return workbuf;
7526 }
7527
7528
7529 static Lisp_Object
7530 code_conversion_restore (Lisp_Object arg)
7531 {
7532   Lisp_Object current, workbuf;
7533   struct gcpro gcpro1;
7534
7535   GCPRO1 (arg);
7536   current = XCAR (arg);
7537   workbuf = XCDR (arg);
7538   if (! NILP (workbuf))
7539     {
7540       if (EQ (workbuf, Vcode_conversion_reused_workbuf))
7541         reused_workbuf_in_use = 0;
7542       else if (! NILP (Fbuffer_live_p (workbuf)))
7543         Fkill_buffer (workbuf);
7544     }
7545   set_buffer_internal (XBUFFER (current));
7546   UNGCPRO;
7547   return Qnil;
7548 }
7549
7550 Lisp_Object
7551 code_conversion_save (int with_work_buf, int multibyte)
7552 {
7553   Lisp_Object workbuf = Qnil;
7554
7555   if (with_work_buf)
7556     workbuf = make_conversion_work_buffer (multibyte);
7557   record_unwind_protect (code_conversion_restore,
7558                          Fcons (Fcurrent_buffer (), workbuf));
7559   return workbuf;
7560 }
7561
7562 int
7563 decode_coding_gap (struct coding_system *coding, EMACS_INT chars, EMACS_INT bytes)
7564 {
7565   int count = SPECPDL_INDEX ();
7566   Lisp_Object attrs;
7567
7568   code_conversion_save (0, 0);
7569
7570   coding->src_object = Fcurrent_buffer ();
7571   coding->src_chars = chars;
7572   coding->src_bytes = bytes;
7573   coding->src_pos = -chars;
7574   coding->src_pos_byte = -bytes;
7575   coding->src_multibyte = chars < bytes;
7576   coding->dst_object = coding->src_object;
7577   coding->dst_pos = PT;
7578   coding->dst_pos_byte = PT_BYTE;
7579   coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
7580
7581   if (CODING_REQUIRE_DETECTION (coding))
7582     detect_coding (coding);
7583
7584   coding->mode |= CODING_MODE_LAST_BLOCK;
7585   current_buffer->text->inhibit_shrinking = 1;
7586   decode_coding (coding);
7587   current_buffer->text->inhibit_shrinking = 0;
7588
7589   attrs = CODING_ID_ATTRS (coding->id);
7590   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7591     {
7592       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7593       Lisp_Object val;
7594
7595       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7596       val = call1 (CODING_ATTR_POST_READ (attrs),
7597                    make_number (coding->produced_char));
7598       CHECK_NATNUM (val);
7599       coding->produced_char += Z - prev_Z;
7600       coding->produced += Z_BYTE - prev_Z_BYTE;
7601     }
7602
7603   unbind_to (count, Qnil);
7604   return coding->result;
7605 }
7606
7607 int
7608 encode_coding_gap (struct coding_system *coding, EMACS_INT chars, EMACS_INT bytes)
7609 {
7610   int count = SPECPDL_INDEX ();
7611
7612   code_conversion_save (0, 0);
7613
7614   coding->src_object = Fcurrent_buffer ();
7615   coding->src_chars = chars;
7616   coding->src_bytes = bytes;
7617   coding->src_pos = -chars;
7618   coding->src_pos_byte = -bytes;
7619   coding->src_multibyte = chars < bytes;
7620   coding->dst_object = coding->src_object;
7621   coding->dst_pos = PT;
7622   coding->dst_pos_byte = PT_BYTE;
7623
7624   encode_coding (coding);
7625
7626   unbind_to (count, Qnil);
7627   return coding->result;
7628 }
7629
7630
7631 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
7632    SRC_OBJECT into DST_OBJECT by coding context CODING.
7633
7634    SRC_OBJECT is a buffer, a string, or Qnil.
7635
7636    If it is a buffer, the text is at point of the buffer.  FROM and TO
7637    are positions in the buffer.
7638
7639    If it is a string, the text is at the beginning of the string.
7640    FROM and TO are indices to the string.
7641
7642    If it is nil, the text is at coding->source.  FROM and TO are
7643    indices to coding->source.
7644
7645    DST_OBJECT is a buffer, Qt, or Qnil.
7646
7647    If it is a buffer, the decoded text is inserted at point of the
7648    buffer.  If the buffer is the same as SRC_OBJECT, the source text
7649    is deleted.
7650
7651    If it is Qt, a string is made from the decoded text, and
7652    set in CODING->dst_object.
7653
7654    If it is Qnil, the decoded text is stored at CODING->destination.
7655    The caller must allocate CODING->dst_bytes bytes at
7656    CODING->destination by xmalloc.  If the decoded text is longer than
7657    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
7658  */
7659
7660 void
7661 decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7662                       dst_object)
7663      struct coding_system *coding;
7664      Lisp_Object src_object;
7665      EMACS_INT from, from_byte, to, to_byte;
7666      Lisp_Object dst_object;
7667 {
7668   int count = SPECPDL_INDEX ();
7669   unsigned char *destination;
7670   EMACS_INT dst_bytes;
7671   EMACS_INT chars = to - from;
7672   EMACS_INT bytes = to_byte - from_byte;
7673   Lisp_Object attrs;
7674   int saved_pt = -1, saved_pt_byte;
7675   int need_marker_adjustment = 0;
7676   Lisp_Object old_deactivate_mark;
7677
7678   old_deactivate_mark = Vdeactivate_mark;
7679
7680   if (NILP (dst_object))
7681     {
7682       destination = coding->destination;
7683       dst_bytes = coding->dst_bytes;
7684     }
7685
7686   coding->src_object = src_object;
7687   coding->src_chars = chars;
7688   coding->src_bytes = bytes;
7689   coding->src_multibyte = chars < bytes;
7690
7691   if (STRINGP (src_object))
7692     {
7693       coding->src_pos = from;
7694       coding->src_pos_byte = from_byte;
7695     }
7696   else if (BUFFERP (src_object))
7697     {
7698       set_buffer_internal (XBUFFER (src_object));
7699       if (from != GPT)
7700         move_gap_both (from, from_byte);
7701       if (EQ (src_object, dst_object))
7702         {
7703           struct Lisp_Marker *tail;
7704
7705           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7706             {
7707               tail->need_adjustment
7708                 = tail->charpos == (tail->insertion_type ? from : to);
7709               need_marker_adjustment |= tail->need_adjustment;
7710             }
7711           saved_pt = PT, saved_pt_byte = PT_BYTE;
7712           TEMP_SET_PT_BOTH (from, from_byte);
7713           current_buffer->text->inhibit_shrinking = 1;
7714           del_range_both (from, from_byte, to, to_byte, 1);
7715           coding->src_pos = -chars;
7716           coding->src_pos_byte = -bytes;
7717         }
7718       else
7719         {
7720           coding->src_pos = from;
7721           coding->src_pos_byte = from_byte;
7722         }
7723     }
7724
7725   if (CODING_REQUIRE_DETECTION (coding))
7726     detect_coding (coding);
7727   attrs = CODING_ID_ATTRS (coding->id);
7728
7729   if (EQ (dst_object, Qt)
7730       || (! NILP (CODING_ATTR_POST_READ (attrs))
7731           && NILP (dst_object)))
7732     {
7733       coding->dst_multibyte = !CODING_FOR_UNIBYTE (coding);
7734       coding->dst_object = code_conversion_save (1, coding->dst_multibyte);
7735       coding->dst_pos = BEG;
7736       coding->dst_pos_byte = BEG_BYTE;
7737     }
7738   else if (BUFFERP (dst_object))
7739     {
7740       code_conversion_save (0, 0);
7741       coding->dst_object = dst_object;
7742       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7743       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7744       coding->dst_multibyte
7745         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
7746     }
7747   else
7748     {
7749       code_conversion_save (0, 0);
7750       coding->dst_object = Qnil;
7751       /* Most callers presume this will return a multibyte result, and they
7752          won't use `binary' or `raw-text' anyway, so let's not worry about
7753          CODING_FOR_UNIBYTE.  */
7754       coding->dst_multibyte = 1;
7755     }
7756
7757   decode_coding (coding);
7758
7759   if (BUFFERP (coding->dst_object))
7760     set_buffer_internal (XBUFFER (coding->dst_object));
7761
7762   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7763     {
7764       struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7765       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7766       Lisp_Object val;
7767
7768       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7769       GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7770               old_deactivate_mark);
7771       val = safe_call1 (CODING_ATTR_POST_READ (attrs),
7772                         make_number (coding->produced_char));
7773       UNGCPRO;
7774       CHECK_NATNUM (val);
7775       coding->produced_char += Z - prev_Z;
7776       coding->produced += Z_BYTE - prev_Z_BYTE;
7777     }
7778
7779   if (EQ (dst_object, Qt))
7780     {
7781       coding->dst_object = Fbuffer_string ();
7782     }
7783   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
7784     {
7785       set_buffer_internal (XBUFFER (coding->dst_object));
7786       if (dst_bytes < coding->produced)
7787         {
7788           destination = xrealloc (destination, coding->produced);
7789           if (! destination)
7790             {
7791               record_conversion_result (coding,
7792                                         CODING_RESULT_INSUFFICIENT_MEM);
7793               unbind_to (count, Qnil);
7794               return;
7795             }
7796           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
7797             move_gap_both (BEGV, BEGV_BYTE);
7798           bcopy (BEGV_ADDR, destination, coding->produced);
7799           coding->destination = destination;
7800         }
7801     }
7802
7803   if (saved_pt >= 0)
7804     {
7805       /* This is the case of:
7806          (BUFFERP (src_object) && EQ (src_object, dst_object))
7807          As we have moved PT while replacing the original buffer
7808          contents, we must recover it now.  */
7809       set_buffer_internal (XBUFFER (src_object));
7810       current_buffer->text->inhibit_shrinking = 0;
7811       if (saved_pt < from)
7812         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7813       else if (saved_pt < from + chars)
7814         TEMP_SET_PT_BOTH (from, from_byte);
7815       else if (! NILP (current_buffer->enable_multibyte_characters))
7816         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7817                           saved_pt_byte + (coding->produced - bytes));
7818       else
7819         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7820                           saved_pt_byte + (coding->produced - bytes));
7821
7822       if (need_marker_adjustment)
7823         {
7824           struct Lisp_Marker *tail;
7825
7826           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7827             if (tail->need_adjustment)
7828               {
7829                 tail->need_adjustment = 0;
7830                 if (tail->insertion_type)
7831                   {
7832                     tail->bytepos = from_byte;
7833                     tail->charpos = from;
7834                   }
7835                 else
7836                   {
7837                     tail->bytepos = from_byte + coding->produced;
7838                     tail->charpos
7839                       = (NILP (current_buffer->enable_multibyte_characters)
7840                          ? tail->bytepos : from + coding->produced_char);
7841                   }
7842               }
7843         }
7844     }
7845
7846   Vdeactivate_mark = old_deactivate_mark;
7847   unbind_to (count, coding->dst_object);
7848 }
7849
7850
7851 void
7852 encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7853                       dst_object)
7854      struct coding_system *coding;
7855      Lisp_Object src_object;
7856      EMACS_INT from, from_byte, to, to_byte;
7857      Lisp_Object dst_object;
7858 {
7859   int count = SPECPDL_INDEX ();
7860   EMACS_INT chars = to - from;
7861   EMACS_INT bytes = to_byte - from_byte;
7862   Lisp_Object attrs;
7863   int saved_pt = -1, saved_pt_byte;
7864   int need_marker_adjustment = 0;
7865   int kill_src_buffer = 0;
7866   Lisp_Object old_deactivate_mark;
7867
7868   old_deactivate_mark = Vdeactivate_mark;
7869
7870   coding->src_object = src_object;
7871   coding->src_chars = chars;
7872   coding->src_bytes = bytes;
7873   coding->src_multibyte = chars < bytes;
7874
7875   attrs = CODING_ID_ATTRS (coding->id);
7876
7877   if (EQ (src_object, dst_object))
7878     {
7879       struct Lisp_Marker *tail;
7880
7881       for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7882         {
7883           tail->need_adjustment
7884             = tail->charpos == (tail->insertion_type ? from : to);
7885           need_marker_adjustment |= tail->need_adjustment;
7886         }
7887     }
7888
7889   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
7890     {
7891       coding->src_object = code_conversion_save (1, coding->src_multibyte);
7892       set_buffer_internal (XBUFFER (coding->src_object));
7893       if (STRINGP (src_object))
7894         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
7895       else if (BUFFERP (src_object))
7896         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
7897       else
7898         insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
7899
7900       if (EQ (src_object, dst_object))
7901         {
7902           set_buffer_internal (XBUFFER (src_object));
7903           saved_pt = PT, saved_pt_byte = PT_BYTE;
7904           del_range_both (from, from_byte, to, to_byte, 1);
7905           set_buffer_internal (XBUFFER (coding->src_object));
7906         }
7907
7908       {
7909         Lisp_Object args[3];
7910         struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7911
7912         GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7913                 old_deactivate_mark);
7914         args[0] = CODING_ATTR_PRE_WRITE (attrs);
7915         args[1] = make_number (BEG);
7916         args[2] = make_number (Z);
7917         safe_call (3, args);
7918         UNGCPRO;
7919       }
7920       if (XBUFFER (coding->src_object) != current_buffer)
7921         kill_src_buffer = 1;
7922       coding->src_object = Fcurrent_buffer ();
7923       if (BEG != GPT)
7924         move_gap_both (BEG, BEG_BYTE);
7925       coding->src_chars = Z - BEG;
7926       coding->src_bytes = Z_BYTE - BEG_BYTE;
7927       coding->src_pos = BEG;
7928       coding->src_pos_byte = BEG_BYTE;
7929       coding->src_multibyte = Z < Z_BYTE;
7930     }
7931   else if (STRINGP (src_object))
7932     {
7933       code_conversion_save (0, 0);
7934       coding->src_pos = from;
7935       coding->src_pos_byte = from_byte;
7936     }
7937   else if (BUFFERP (src_object))
7938     {
7939       code_conversion_save (0, 0);
7940       set_buffer_internal (XBUFFER (src_object));
7941       if (EQ (src_object, dst_object))
7942         {
7943           saved_pt = PT, saved_pt_byte = PT_BYTE;
7944           coding->src_object = del_range_1 (from, to, 1, 1);
7945           coding->src_pos = 0;
7946           coding->src_pos_byte = 0;
7947         }
7948       else
7949         {
7950           if (from < GPT && to >= GPT)
7951             move_gap_both (from, from_byte);
7952           coding->src_pos = from;
7953           coding->src_pos_byte = from_byte;
7954         }
7955     }
7956   else
7957     code_conversion_save (0, 0);
7958
7959   if (BUFFERP (dst_object))
7960     {
7961       coding->dst_object = dst_object;
7962       if (EQ (src_object, dst_object))
7963         {
7964           coding->dst_pos = from;
7965           coding->dst_pos_byte = from_byte;
7966         }
7967       else
7968         {
7969           struct buffer *current = current_buffer;
7970
7971           set_buffer_temp (XBUFFER (dst_object));
7972           coding->dst_pos = PT;
7973           coding->dst_pos_byte = PT_BYTE;
7974           move_gap_both (coding->dst_pos, coding->dst_pos_byte);
7975           set_buffer_temp (current);
7976         }
7977       coding->dst_multibyte
7978         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
7979     }
7980   else if (EQ (dst_object, Qt))
7981     {
7982       coding->dst_object = Qnil;
7983       coding->dst_bytes = coding->src_chars;
7984       if (coding->dst_bytes == 0)
7985         coding->dst_bytes = 1;
7986       coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
7987       coding->dst_multibyte = 0;
7988     }
7989   else
7990     {
7991       coding->dst_object = Qnil;
7992       coding->dst_multibyte = 0;
7993     }
7994
7995   encode_coding (coding);
7996
7997   if (EQ (dst_object, Qt))
7998     {
7999       if (BUFFERP (coding->dst_object))
8000         coding->dst_object = Fbuffer_string ();
8001       else
8002         {
8003           coding->dst_object
8004             = make_unibyte_string ((char *) coding->destination,
8005                                    coding->produced);
8006           xfree (coding->destination);
8007         }
8008     }
8009
8010   if (saved_pt >= 0)
8011     {
8012       /* This is the case of:
8013          (BUFFERP (src_object) && EQ (src_object, dst_object))
8014          As we have moved PT while replacing the original buffer
8015          contents, we must recover it now.  */
8016       set_buffer_internal (XBUFFER (src_object));
8017       if (saved_pt < from)
8018         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
8019       else if (saved_pt < from + chars)
8020         TEMP_SET_PT_BOTH (from, from_byte);
8021       else if (! NILP (current_buffer->enable_multibyte_characters))
8022         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
8023                           saved_pt_byte + (coding->produced - bytes));
8024       else
8025         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
8026                           saved_pt_byte + (coding->produced - bytes));
8027
8028       if (need_marker_adjustment)
8029         {
8030           struct Lisp_Marker *tail;
8031
8032           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
8033             if (tail->need_adjustment)
8034               {
8035                 tail->need_adjustment = 0;
8036                 if (tail->insertion_type)
8037                   {
8038                     tail->bytepos = from_byte;
8039                     tail->charpos = from;
8040                   }
8041                 else
8042                   {
8043                     tail->bytepos = from_byte + coding->produced;
8044                     tail->charpos
8045                       = (NILP (current_buffer->enable_multibyte_characters)
8046                          ? tail->bytepos : from + coding->produced_char);
8047                   }
8048               }
8049         }
8050     }
8051
8052   if (kill_src_buffer)
8053     Fkill_buffer (coding->src_object);
8054
8055   Vdeactivate_mark = old_deactivate_mark;
8056   unbind_to (count, Qnil);
8057 }
8058
8059
8060 Lisp_Object
8061 preferred_coding_system (void)
8062 {
8063   int id = coding_categories[coding_priorities[0]].id;
8064
8065   return CODING_ID_NAME (id);
8066 }
8067
8068 \f
8069 #ifdef emacs
8070 /*** 8. Emacs Lisp library functions ***/
8071
8072 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
8073        doc: /* Return t if OBJECT is nil or a coding-system.
8074 See the documentation of `define-coding-system' for information
8075 about coding-system objects.  */)
8076      (object)
8077      Lisp_Object object;
8078 {
8079   if (NILP (object)
8080       || CODING_SYSTEM_ID (object) >= 0)
8081     return Qt;
8082   if (! SYMBOLP (object)
8083       || NILP (Fget (object, Qcoding_system_define_form)))
8084     return Qnil;
8085   return Qt;
8086 }
8087
8088 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
8089        Sread_non_nil_coding_system, 1, 1, 0,
8090        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
8091      (prompt)
8092      Lisp_Object prompt;
8093 {
8094   Lisp_Object val;
8095   do
8096     {
8097       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8098                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
8099     }
8100   while (SCHARS (val) == 0);
8101   return (Fintern (val, Qnil));
8102 }
8103
8104 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
8105        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
8106 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.
8107 Ignores case when completing coding systems (all Emacs coding systems
8108 are lower-case).  */)
8109      (prompt, default_coding_system)
8110      Lisp_Object prompt, default_coding_system;
8111 {
8112   Lisp_Object val;
8113   int count = SPECPDL_INDEX ();
8114
8115   if (SYMBOLP (default_coding_system))
8116     default_coding_system = SYMBOL_NAME (default_coding_system);
8117   specbind (Qcompletion_ignore_case, Qt);
8118   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8119                           Qt, Qnil, Qcoding_system_history,
8120                           default_coding_system, Qnil);
8121   unbind_to (count, Qnil);
8122   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
8123 }
8124
8125 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
8126        1, 1, 0,
8127        doc: /* Check validity of CODING-SYSTEM.
8128 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
8129 It is valid if it is nil or a symbol defined as a coding system by the
8130 function `define-coding-system'.  */)
8131   (coding_system)
8132      Lisp_Object coding_system;
8133 {
8134   Lisp_Object define_form;
8135
8136   define_form = Fget (coding_system, Qcoding_system_define_form);
8137   if (! NILP (define_form))
8138     {
8139       Fput (coding_system, Qcoding_system_define_form, Qnil);
8140       safe_eval (define_form);
8141     }
8142   if (!NILP (Fcoding_system_p (coding_system)))
8143     return coding_system;
8144   xsignal1 (Qcoding_system_error, coding_system);
8145 }
8146
8147 \f
8148 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
8149    HIGHEST is nonzero, return the coding system of the highest
8150    priority among the detected coding systems.  Otherwize return a
8151    list of detected coding systems sorted by their priorities.  If
8152    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
8153    multibyte form but contains only ASCII and eight-bit chars.
8154    Otherwise, the bytes are raw bytes.
8155
8156    CODING-SYSTEM controls the detection as below:
8157
8158    If it is nil, detect both text-format and eol-format.  If the
8159    text-format part of CODING-SYSTEM is already specified
8160    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
8161    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
8162    detect only text-format.  */
8163
8164 Lisp_Object
8165 detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
8166                       coding_system)
8167      const unsigned char *src;
8168      EMACS_INT src_chars, src_bytes;
8169      int highest;
8170      int multibytep;
8171      Lisp_Object coding_system;
8172 {
8173   const unsigned char *src_end = src + src_bytes;
8174   Lisp_Object attrs, eol_type;
8175   Lisp_Object val = Qnil;
8176   struct coding_system coding;
8177   int id;
8178   struct coding_detection_info detect_info;
8179   enum coding_category base_category;
8180   int null_byte_found = 0, eight_bit_found = 0;
8181
8182   if (NILP (coding_system))
8183     coding_system = Qundecided;
8184   setup_coding_system (coding_system, &coding);
8185   attrs = CODING_ID_ATTRS (coding.id);
8186   eol_type = CODING_ID_EOL_TYPE (coding.id);
8187   coding_system = CODING_ATTR_BASE_NAME (attrs);
8188
8189   coding.source = src;
8190   coding.src_chars = src_chars;
8191   coding.src_bytes = src_bytes;
8192   coding.src_multibyte = multibytep;
8193   coding.consumed = 0;
8194   coding.mode |= CODING_MODE_LAST_BLOCK;
8195   coding.head_ascii = 0;
8196
8197   detect_info.checked = detect_info.found = detect_info.rejected = 0;
8198
8199   /* At first, detect text-format if necessary.  */
8200   base_category = XINT (CODING_ATTR_CATEGORY (attrs));
8201   if (base_category == coding_category_undecided)
8202     {
8203       enum coding_category category;
8204       struct coding_system *this;
8205       int c, i;
8206
8207       /* Skip all ASCII bytes except for a few ISO2022 controls.  */
8208       for (; src < src_end; src++)
8209         {
8210           c = *src;
8211           if (c & 0x80)
8212             {
8213               eight_bit_found = 1;
8214               if (null_byte_found)
8215                 break;
8216             }
8217           else if (c < 0x20)
8218             {
8219               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
8220                   && ! inhibit_iso_escape_detection
8221                   && ! detect_info.checked)
8222                 {
8223                   if (detect_coding_iso_2022 (&coding, &detect_info))
8224                     {
8225                       /* We have scanned the whole data.  */
8226                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
8227                         {
8228                           /* We didn't find an 8-bit code.  We may
8229                              have found a null-byte, but it's very
8230                              rare that a binary file confirm to
8231                              ISO-2022.  */
8232                           src = src_end;
8233                           coding.head_ascii = src - coding.source;
8234                         }
8235                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
8236                       break;
8237                     }
8238                 }
8239               else if (! c && !inhibit_null_byte_detection)
8240                 {
8241                   null_byte_found = 1;
8242                   if (eight_bit_found)
8243                     break;
8244                 }
8245               if (! eight_bit_found)
8246                 coding.head_ascii++;
8247             }
8248           else if (! eight_bit_found)
8249             coding.head_ascii++;
8250         }
8251
8252       if (null_byte_found || eight_bit_found
8253           || coding.head_ascii < coding.src_bytes
8254           || detect_info.found)
8255         {
8256           if (coding.head_ascii == coding.src_bytes)
8257             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
8258             for (i = 0; i < coding_category_raw_text; i++)
8259               {
8260                 category = coding_priorities[i];
8261                 this = coding_categories + category;
8262                 if (detect_info.found & (1 << category))
8263                   break;
8264               }
8265           else
8266             {
8267               if (null_byte_found)
8268                 {
8269                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
8270                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
8271                 }
8272               for (i = 0; i < coding_category_raw_text; i++)
8273                 {
8274                   category = coding_priorities[i];
8275                   this = coding_categories + category;
8276
8277                   if (this->id < 0)
8278                     {
8279                       /* No coding system of this category is defined.  */
8280                       detect_info.rejected |= (1 << category);
8281                     }
8282                   else if (category >= coding_category_raw_text)
8283                     continue;
8284                   else if (detect_info.checked & (1 << category))
8285                     {
8286                       if (highest
8287                           && (detect_info.found & (1 << category)))
8288                         break;
8289                     }
8290                   else if ((*(this->detector)) (&coding, &detect_info)
8291                            && highest
8292                            && (detect_info.found & (1 << category)))
8293                     {
8294                       if (category == coding_category_utf_16_auto)
8295                         {
8296                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8297                             category = coding_category_utf_16_le;
8298                           else
8299                             category = coding_category_utf_16_be;
8300                         }
8301                       break;
8302                     }
8303                 }
8304             }
8305         }
8306
8307       if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY
8308           || null_byte_found)
8309         {
8310           detect_info.found = CATEGORY_MASK_RAW_TEXT;
8311           id = CODING_SYSTEM_ID (Qno_conversion);
8312           val = Fcons (make_number (id), Qnil);
8313         }
8314       else if (! detect_info.rejected && ! detect_info.found)
8315         {
8316           detect_info.found = CATEGORY_MASK_ANY;
8317           id = coding_categories[coding_category_undecided].id;
8318           val = Fcons (make_number (id), Qnil);
8319         }
8320       else if (highest)
8321         {
8322           if (detect_info.found)
8323             {
8324               detect_info.found = 1 << category;
8325               val = Fcons (make_number (this->id), Qnil);
8326             }
8327           else
8328             for (i = 0; i < coding_category_raw_text; i++)
8329               if (! (detect_info.rejected & (1 << coding_priorities[i])))
8330                 {
8331                   detect_info.found = 1 << coding_priorities[i];
8332                   id = coding_categories[coding_priorities[i]].id;
8333                   val = Fcons (make_number (id), Qnil);
8334                   break;
8335                 }
8336         }
8337       else
8338         {
8339           int mask = detect_info.rejected | detect_info.found;
8340           int found = 0;
8341
8342           for (i = coding_category_raw_text - 1; i >= 0; i--)
8343             {
8344               category = coding_priorities[i];
8345               if (! (mask & (1 << category)))
8346                 {
8347                   found |= 1 << category;
8348                   id = coding_categories[category].id;
8349                   if (id >= 0)
8350                     val = Fcons (make_number (id), val);
8351                 }
8352             }
8353           for (i = coding_category_raw_text - 1; i >= 0; i--)
8354             {
8355               category = coding_priorities[i];
8356               if (detect_info.found & (1 << category))
8357                 {
8358                   id = coding_categories[category].id;
8359                   val = Fcons (make_number (id), val);
8360                 }
8361             }
8362           detect_info.found |= found;
8363         }
8364     }
8365   else if (base_category == coding_category_utf_8_auto)
8366     {
8367       if (detect_coding_utf_8 (&coding, &detect_info))
8368         {
8369           struct coding_system *this;
8370
8371           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
8372             this = coding_categories + coding_category_utf_8_sig;
8373           else
8374             this = coding_categories + coding_category_utf_8_nosig;
8375           val = Fcons (make_number (this->id), Qnil);
8376         }
8377     }
8378   else if (base_category == coding_category_utf_16_auto)
8379     {
8380       if (detect_coding_utf_16 (&coding, &detect_info))
8381         {
8382           struct coding_system *this;
8383
8384           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8385             this = coding_categories + coding_category_utf_16_le;
8386           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
8387             this = coding_categories + coding_category_utf_16_be;
8388           else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
8389             this = coding_categories + coding_category_utf_16_be_nosig;
8390           else
8391             this = coding_categories + coding_category_utf_16_le_nosig;
8392           val = Fcons (make_number (this->id), Qnil);
8393         }
8394     }
8395   else
8396     {
8397       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
8398       val = Fcons (make_number (coding.id), Qnil);
8399     }
8400
8401   /* Then, detect eol-format if necessary.  */
8402   {
8403     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol = -1;
8404     Lisp_Object tail;
8405
8406     if (VECTORP (eol_type))
8407       {
8408         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
8409           {
8410             if (null_byte_found)
8411               normal_eol = EOL_SEEN_LF;
8412             else
8413               normal_eol = detect_eol (coding.source, src_bytes,
8414                                        coding_category_raw_text);
8415           }
8416         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
8417                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
8418           utf_16_be_eol = detect_eol (coding.source, src_bytes,
8419                                       coding_category_utf_16_be);
8420         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
8421                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
8422           utf_16_le_eol = detect_eol (coding.source, src_bytes,
8423                                       coding_category_utf_16_le);
8424       }
8425     else
8426       {
8427         if (EQ (eol_type, Qunix))
8428           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
8429         else if (EQ (eol_type, Qdos))
8430           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
8431         else
8432           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
8433       }
8434
8435     for (tail = val; CONSP (tail); tail = XCDR (tail))
8436       {
8437         enum coding_category category;
8438         int this_eol;
8439
8440         id = XINT (XCAR (tail));
8441         attrs = CODING_ID_ATTRS (id);
8442         category = XINT (CODING_ATTR_CATEGORY (attrs));
8443         eol_type = CODING_ID_EOL_TYPE (id);
8444         if (VECTORP (eol_type))
8445           {
8446             if (category == coding_category_utf_16_be
8447                 || category == coding_category_utf_16_be_nosig)
8448               this_eol = utf_16_be_eol;
8449             else if (category == coding_category_utf_16_le
8450                      || category == coding_category_utf_16_le_nosig)
8451               this_eol = utf_16_le_eol;
8452             else
8453               this_eol = normal_eol;
8454
8455             if (this_eol == EOL_SEEN_LF)
8456               XSETCAR (tail, AREF (eol_type, 0));
8457             else if (this_eol == EOL_SEEN_CRLF)
8458               XSETCAR (tail, AREF (eol_type, 1));
8459             else if (this_eol == EOL_SEEN_CR)
8460               XSETCAR (tail, AREF (eol_type, 2));
8461             else
8462               XSETCAR (tail, CODING_ID_NAME (id));
8463           }
8464         else
8465           XSETCAR (tail, CODING_ID_NAME (id));
8466       }
8467   }
8468
8469   return (highest ? (CONSP (val) ? XCAR (val) : Qnil) : val);
8470 }
8471
8472
8473 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
8474        2, 3, 0,
8475        doc: /* Detect coding system of the text in the region between START and END.
8476 Return a list of possible coding systems ordered by priority.
8477 The coding systems to try and their priorities follows what
8478 the function `coding-system-priority-list' (which see) returns.
8479
8480 If only ASCII characters are found (except for such ISO-2022 control
8481 characters as ESC), it returns a list of single element `undecided'
8482 or its subsidiary coding system according to a detected end-of-line
8483 format.
8484
8485 If optional argument HIGHEST is non-nil, return the coding system of
8486 highest priority.  */)
8487      (start, end, highest)
8488      Lisp_Object start, end, highest;
8489 {
8490   int from, to;
8491   int from_byte, to_byte;
8492
8493   CHECK_NUMBER_COERCE_MARKER (start);
8494   CHECK_NUMBER_COERCE_MARKER (end);
8495
8496   validate_region (&start, &end);
8497   from = XINT (start), to = XINT (end);
8498   from_byte = CHAR_TO_BYTE (from);
8499   to_byte = CHAR_TO_BYTE (to);
8500
8501   if (from < GPT && to >= GPT)
8502     move_gap_both (to, to_byte);
8503
8504   return detect_coding_system (BYTE_POS_ADDR (from_byte),
8505                                to - from, to_byte - from_byte,
8506                                !NILP (highest),
8507                                !NILP (current_buffer
8508                                       ->enable_multibyte_characters),
8509                                Qnil);
8510 }
8511
8512 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
8513        1, 2, 0,
8514        doc: /* Detect coding system of the text in STRING.
8515 Return a list of possible coding systems ordered by priority.
8516 The coding systems to try and their priorities follows what
8517 the function `coding-system-priority-list' (which see) returns.
8518
8519 If only ASCII characters are found (except for such ISO-2022 control
8520 characters as ESC), it returns a list of single element `undecided'
8521 or its subsidiary coding system according to a detected end-of-line
8522 format.
8523
8524 If optional argument HIGHEST is non-nil, return the coding system of
8525 highest priority.  */)
8526      (string, highest)
8527      Lisp_Object string, highest;
8528 {
8529   CHECK_STRING (string);
8530
8531   return detect_coding_system (SDATA (string),
8532                                SCHARS (string), SBYTES (string),
8533                                !NILP (highest), STRING_MULTIBYTE (string),
8534                                Qnil);
8535 }
8536
8537
8538 static INLINE int
8539 char_encodable_p (int c, Lisp_Object attrs)
8540 {
8541   Lisp_Object tail;
8542   struct charset *charset;
8543   Lisp_Object translation_table;
8544
8545   translation_table = CODING_ATTR_TRANS_TBL (attrs);
8546   if (! NILP (translation_table))
8547     c = translate_char (translation_table, c);
8548   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
8549        CONSP (tail); tail = XCDR (tail))
8550     {
8551       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
8552       if (CHAR_CHARSET_P (c, charset))
8553         break;
8554     }
8555   return (! NILP (tail));
8556 }
8557
8558
8559 /* Return a list of coding systems that safely encode the text between
8560    START and END.  If EXCLUDE is non-nil, it is a list of coding
8561    systems not to check.  The returned list doesn't contain any such
8562    coding systems.  In any case, if the text contains only ASCII or is
8563    unibyte, return t.  */
8564
8565 DEFUN ("find-coding-systems-region-internal",
8566        Ffind_coding_systems_region_internal,
8567        Sfind_coding_systems_region_internal, 2, 3, 0,
8568        doc: /* Internal use only.  */)
8569      (start, end, exclude)
8570      Lisp_Object start, end, exclude;
8571 {
8572   Lisp_Object coding_attrs_list, safe_codings;
8573   EMACS_INT start_byte, end_byte;
8574   const unsigned char *p, *pbeg, *pend;
8575   int c;
8576   Lisp_Object tail, elt, work_table;
8577
8578   if (STRINGP (start))
8579     {
8580       if (!STRING_MULTIBYTE (start)
8581           || SCHARS (start) == SBYTES (start))
8582         return Qt;
8583       start_byte = 0;
8584       end_byte = SBYTES (start);
8585     }
8586   else
8587     {
8588       CHECK_NUMBER_COERCE_MARKER (start);
8589       CHECK_NUMBER_COERCE_MARKER (end);
8590       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8591         args_out_of_range (start, end);
8592       if (NILP (current_buffer->enable_multibyte_characters))
8593         return Qt;
8594       start_byte = CHAR_TO_BYTE (XINT (start));
8595       end_byte = CHAR_TO_BYTE (XINT (end));
8596       if (XINT (end) - XINT (start) == end_byte - start_byte)
8597         return Qt;
8598
8599       if (XINT (start) < GPT && XINT (end) > GPT)
8600         {
8601           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8602             move_gap_both (XINT (start), start_byte);
8603           else
8604             move_gap_both (XINT (end), end_byte);
8605         }
8606     }
8607
8608   coding_attrs_list = Qnil;
8609   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
8610     if (NILP (exclude)
8611         || NILP (Fmemq (XCAR (tail), exclude)))
8612       {
8613         Lisp_Object attrs;
8614
8615         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
8616         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
8617             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
8618           {
8619             ASET (attrs, coding_attr_trans_tbl,
8620                   get_translation_table (attrs, 1, NULL));
8621             coding_attrs_list = Fcons (attrs, coding_attrs_list);
8622           }
8623       }
8624
8625   if (STRINGP (start))
8626     p = pbeg = SDATA (start);
8627   else
8628     p = pbeg = BYTE_POS_ADDR (start_byte);
8629   pend = p + (end_byte - start_byte);
8630
8631   while (p < pend && ASCII_BYTE_P (*p)) p++;
8632   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8633
8634   work_table = Fmake_char_table (Qnil, Qnil);
8635   while (p < pend)
8636     {
8637       if (ASCII_BYTE_P (*p))
8638         p++;
8639       else
8640         {
8641           c = STRING_CHAR_ADVANCE (p);
8642           if (!NILP (char_table_ref (work_table, c)))
8643             /* This character was already checked.  Ignore it.  */
8644             continue;
8645
8646           charset_map_loaded = 0;
8647           for (tail = coding_attrs_list; CONSP (tail);)
8648             {
8649               elt = XCAR (tail);
8650               if (NILP (elt))
8651                 tail = XCDR (tail);
8652               else if (char_encodable_p (c, elt))
8653                 tail = XCDR (tail);
8654               else if (CONSP (XCDR (tail)))
8655                 {
8656                   XSETCAR (tail, XCAR (XCDR (tail)));
8657                   XSETCDR (tail, XCDR (XCDR (tail)));
8658                 }
8659               else
8660                 {
8661                   XSETCAR (tail, Qnil);
8662                   tail = XCDR (tail);
8663                 }
8664             }
8665           if (charset_map_loaded)
8666             {
8667               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8668
8669               if (STRINGP (start))
8670                 pbeg = SDATA (start);
8671               else
8672                 pbeg = BYTE_POS_ADDR (start_byte);
8673               p = pbeg + p_offset;
8674               pend = pbeg + pend_offset;
8675             }
8676           char_table_set (work_table, c, Qt);
8677         }
8678     }
8679
8680   safe_codings = list2 (Qraw_text, Qno_conversion);
8681   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
8682     if (! NILP (XCAR (tail)))
8683       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
8684
8685   return safe_codings;
8686 }
8687
8688
8689 DEFUN ("unencodable-char-position", Funencodable_char_position,
8690        Sunencodable_char_position, 3, 5, 0,
8691        doc: /*
8692 Return position of first un-encodable character in a region.
8693 START and END specify the region and CODING-SYSTEM specifies the
8694 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
8695
8696 If optional 4th argument COUNT is non-nil, it specifies at most how
8697 many un-encodable characters to search.  In this case, the value is a
8698 list of positions.
8699
8700 If optional 5th argument STRING is non-nil, it is a string to search
8701 for un-encodable characters.  In that case, START and END are indexes
8702 to the string.  */)
8703      (start, end, coding_system, count, string)
8704      Lisp_Object start, end, coding_system, count, string;
8705 {
8706   int n;
8707   struct coding_system coding;
8708   Lisp_Object attrs, charset_list, translation_table;
8709   Lisp_Object positions;
8710   int from, to;
8711   const unsigned char *p, *stop, *pend;
8712   int ascii_compatible;
8713
8714   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
8715   attrs = CODING_ID_ATTRS (coding.id);
8716   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
8717     return Qnil;
8718   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
8719   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8720   translation_table = get_translation_table (attrs, 1, NULL);
8721
8722   if (NILP (string))
8723     {
8724       validate_region (&start, &end);
8725       from = XINT (start);
8726       to = XINT (end);
8727       if (NILP (current_buffer->enable_multibyte_characters)
8728           || (ascii_compatible
8729               && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
8730         return Qnil;
8731       p = CHAR_POS_ADDR (from);
8732       pend = CHAR_POS_ADDR (to);
8733       if (from < GPT && to >= GPT)
8734         stop = GPT_ADDR;
8735       else
8736         stop = pend;
8737     }
8738   else
8739     {
8740       CHECK_STRING (string);
8741       CHECK_NATNUM (start);
8742       CHECK_NATNUM (end);
8743       from = XINT (start);
8744       to = XINT (end);
8745       if (from > to
8746           || to > SCHARS (string))
8747         args_out_of_range_3 (string, start, end);
8748       if (! STRING_MULTIBYTE (string))
8749         return Qnil;
8750       p = SDATA (string) + string_char_to_byte (string, from);
8751       stop = pend = SDATA (string) + string_char_to_byte (string, to);
8752       if (ascii_compatible && (to - from) == (pend - p))
8753         return Qnil;
8754     }
8755
8756   if (NILP (count))
8757     n = 1;
8758   else
8759     {
8760       CHECK_NATNUM (count);
8761       n = XINT (count);
8762     }
8763
8764   positions = Qnil;
8765   while (1)
8766     {
8767       int c;
8768
8769       if (ascii_compatible)
8770         while (p < stop && ASCII_BYTE_P (*p))
8771           p++, from++;
8772       if (p >= stop)
8773         {
8774           if (p >= pend)
8775             break;
8776           stop = pend;
8777           p = GAP_END_ADDR;
8778         }
8779
8780       c = STRING_CHAR_ADVANCE (p);
8781       if (! (ASCII_CHAR_P (c) && ascii_compatible)
8782           && ! char_charset (translate_char (translation_table, c),
8783                              charset_list, NULL))
8784         {
8785           positions = Fcons (make_number (from), positions);
8786           n--;
8787           if (n == 0)
8788             break;
8789         }
8790
8791       from++;
8792     }
8793
8794   return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
8795 }
8796
8797
8798 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
8799        Scheck_coding_systems_region, 3, 3, 0,
8800        doc: /* Check if the region is encodable by coding systems.
8801
8802 START and END are buffer positions specifying the region.
8803 CODING-SYSTEM-LIST is a list of coding systems to check.
8804
8805 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
8806 CODING-SYSTEM is a member of CODING-SYSTEM-LIST and can't encode the
8807 whole region, POS0, POS1, ... are buffer positions where non-encodable
8808 characters are found.
8809
8810 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
8811 value is nil.
8812
8813 START may be a string.  In that case, check if the string is
8814 encodable, and the value contains indices to the string instead of
8815 buffer positions.  END is ignored.
8816
8817 If the current buffer (or START if it is a string) is unibyte, the value
8818 is nil.  */)
8819      (start, end, coding_system_list)
8820      Lisp_Object start, end, coding_system_list;
8821 {
8822   Lisp_Object list;
8823   EMACS_INT start_byte, end_byte;
8824   int pos;
8825   const unsigned char *p, *pbeg, *pend;
8826   int c;
8827   Lisp_Object tail, elt, attrs;
8828
8829   if (STRINGP (start))
8830     {
8831       if (!STRING_MULTIBYTE (start)
8832           || SCHARS (start) == SBYTES (start))
8833         return Qnil;
8834       start_byte = 0;
8835       end_byte = SBYTES (start);
8836       pos = 0;
8837     }
8838   else
8839     {
8840       CHECK_NUMBER_COERCE_MARKER (start);
8841       CHECK_NUMBER_COERCE_MARKER (end);
8842       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8843         args_out_of_range (start, end);
8844       if (NILP (current_buffer->enable_multibyte_characters))
8845         return Qnil;
8846       start_byte = CHAR_TO_BYTE (XINT (start));
8847       end_byte = CHAR_TO_BYTE (XINT (end));
8848       if (XINT (end) - XINT (start) == end_byte - start_byte)
8849         return Qnil;
8850
8851       if (XINT (start) < GPT && XINT (end) > GPT)
8852         {
8853           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8854             move_gap_both (XINT (start), start_byte);
8855           else
8856             move_gap_both (XINT (end), end_byte);
8857         }
8858       pos = XINT (start);
8859     }
8860
8861   list = Qnil;
8862   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
8863     {
8864       elt = XCAR (tail);
8865       attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
8866       ASET (attrs, coding_attr_trans_tbl,
8867             get_translation_table (attrs, 1, NULL));
8868       list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
8869     }
8870
8871   if (STRINGP (start))
8872     p = pbeg = SDATA (start);
8873   else
8874     p = pbeg = BYTE_POS_ADDR (start_byte);
8875   pend = p + (end_byte - start_byte);
8876
8877   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
8878   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8879
8880   while (p < pend)
8881     {
8882       if (ASCII_BYTE_P (*p))
8883         p++;
8884       else
8885         {
8886           c = STRING_CHAR_ADVANCE (p);
8887
8888           charset_map_loaded = 0;
8889           for (tail = list; CONSP (tail); tail = XCDR (tail))
8890             {
8891               elt = XCDR (XCAR (tail));
8892               if (! char_encodable_p (c, XCAR (elt)))
8893                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
8894             }
8895           if (charset_map_loaded)
8896             {
8897               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8898
8899               if (STRINGP (start))
8900                 pbeg = SDATA (start);
8901               else
8902                 pbeg = BYTE_POS_ADDR (start_byte);
8903               p = pbeg + p_offset;
8904               pend = pbeg + pend_offset;
8905             }
8906         }
8907       pos++;
8908     }
8909
8910   tail = list;
8911   list = Qnil;
8912   for (; CONSP (tail); tail = XCDR (tail))
8913     {
8914       elt = XCAR (tail);
8915       if (CONSP (XCDR (XCDR (elt))))
8916         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
8917                       list);
8918     }
8919
8920   return list;
8921 }
8922
8923
8924 Lisp_Object
8925 code_convert_region (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object dst_object, int encodep, int norecord)
8926 {
8927   struct coding_system coding;
8928   EMACS_INT from, from_byte, to, to_byte;
8929   Lisp_Object src_object;
8930
8931   CHECK_NUMBER_COERCE_MARKER (start);
8932   CHECK_NUMBER_COERCE_MARKER (end);
8933   if (NILP (coding_system))
8934     coding_system = Qno_conversion;
8935   else
8936     CHECK_CODING_SYSTEM (coding_system);
8937   src_object = Fcurrent_buffer ();
8938   if (NILP (dst_object))
8939     dst_object = src_object;
8940   else if (! EQ (dst_object, Qt))
8941     CHECK_BUFFER (dst_object);
8942
8943   validate_region (&start, &end);
8944   from = XFASTINT (start);
8945   from_byte = CHAR_TO_BYTE (from);
8946   to = XFASTINT (end);
8947   to_byte = CHAR_TO_BYTE (to);
8948
8949   setup_coding_system (coding_system, &coding);
8950   coding.mode |= CODING_MODE_LAST_BLOCK;
8951
8952   if (encodep)
8953     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8954                           dst_object);
8955   else
8956     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8957                           dst_object);
8958   if (! norecord)
8959     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
8960
8961   return (BUFFERP (dst_object)
8962           ? make_number (coding.produced_char)
8963           : coding.dst_object);
8964 }
8965
8966
8967 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
8968        3, 4, "r\nzCoding system: ",
8969        doc: /* Decode the current region from the specified coding system.
8970 When called from a program, takes four arguments:
8971         START, END, CODING-SYSTEM, and DESTINATION.
8972 START and END are buffer positions.
8973
8974 Optional 4th arguments DESTINATION specifies where the decoded text goes.
8975 If nil, the region between START and END is replaced by the decoded text.
8976 If buffer, the decoded text is inserted in that buffer after point (point
8977 does not move).
8978 In those cases, the length of the decoded text is returned.
8979 If DESTINATION is t, the decoded text is returned.
8980
8981 This function sets `last-coding-system-used' to the precise coding system
8982 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8983 not fully specified.)  */)
8984      (start, end, coding_system, destination)
8985      Lisp_Object start, end, coding_system, destination;
8986 {
8987   return code_convert_region (start, end, coding_system, destination, 0, 0);
8988 }
8989
8990 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
8991        3, 4, "r\nzCoding system: ",
8992        doc: /* Encode the current region by specified coding system.
8993 When called from a program, takes four arguments:
8994         START, END, CODING-SYSTEM and DESTINATION.
8995 START and END are buffer positions.
8996
8997 Optional 4th arguments DESTINATION specifies where the encoded text goes.
8998 If nil, the region between START and END is replace by the encoded text.
8999 If buffer, the encoded text is inserted in that buffer after point (point
9000 does not move).
9001 In those cases, the length of the encoded text is returned.
9002 If DESTINATION is t, the encoded text is returned.
9003
9004 This function sets `last-coding-system-used' to the precise coding system
9005 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9006 not fully specified.)  */)
9007   (start, end, coding_system, destination)
9008      Lisp_Object start, end, coding_system, destination;
9009 {
9010   return code_convert_region (start, end, coding_system, destination, 1, 0);
9011 }
9012
9013 Lisp_Object
9014 code_convert_string (string, coding_system, dst_object,
9015                      encodep, nocopy, norecord)
9016      Lisp_Object string, coding_system, dst_object;
9017      int encodep, nocopy, norecord;
9018 {
9019   struct coding_system coding;
9020   EMACS_INT chars, bytes;
9021
9022   CHECK_STRING (string);
9023   if (NILP (coding_system))
9024     {
9025       if (! norecord)
9026         Vlast_coding_system_used = Qno_conversion;
9027       if (NILP (dst_object))
9028         return (nocopy ? Fcopy_sequence (string) : string);
9029     }
9030
9031   if (NILP (coding_system))
9032     coding_system = Qno_conversion;
9033   else
9034     CHECK_CODING_SYSTEM (coding_system);
9035   if (NILP (dst_object))
9036     dst_object = Qt;
9037   else if (! EQ (dst_object, Qt))
9038     CHECK_BUFFER (dst_object);
9039
9040   setup_coding_system (coding_system, &coding);
9041   coding.mode |= CODING_MODE_LAST_BLOCK;
9042   chars = SCHARS (string);
9043   bytes = SBYTES (string);
9044   if (encodep)
9045     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9046   else
9047     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9048   if (! norecord)
9049     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
9050
9051   return (BUFFERP (dst_object)
9052           ? make_number (coding.produced_char)
9053           : coding.dst_object);
9054 }
9055
9056
9057 /* Encode or decode STRING according to CODING_SYSTEM.
9058    Do not set Vlast_coding_system_used.
9059
9060    This function is called only from macros DECODE_FILE and
9061    ENCODE_FILE, thus we ignore character composition.  */
9062
9063 Lisp_Object
9064 code_convert_string_norecord (Lisp_Object string, Lisp_Object coding_system, int encodep)
9065 {
9066   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
9067 }
9068
9069
9070 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
9071        2, 4, 0,
9072        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
9073
9074 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
9075 if the decoding operation is trivial.
9076
9077 Optional fourth arg BUFFER non-nil means that the decoded text is
9078 inserted in that buffer after point (point does not move).  In this
9079 case, the return value is the length of the decoded text.
9080
9081 This function sets `last-coding-system-used' to the precise coding system
9082 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9083 not fully specified.)  */)
9084   (string, coding_system, nocopy, buffer)
9085      Lisp_Object string, coding_system, nocopy, buffer;
9086 {
9087   return code_convert_string (string, coding_system, buffer,
9088                               0, ! NILP (nocopy), 0);
9089 }
9090
9091 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
9092        2, 4, 0,
9093        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
9094
9095 Optional third arg NOCOPY non-nil means it is OK to return STRING
9096 itself if the encoding operation is trivial.
9097
9098 Optional fourth arg BUFFER non-nil means that the encoded text is
9099 inserted in that buffer after point (point does not move).  In this
9100 case, the return value is the length of the encoded text.
9101
9102 This function sets `last-coding-system-used' to the precise coding system
9103 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9104 not fully specified.)  */)
9105      (string, coding_system, nocopy, buffer)
9106      Lisp_Object string, coding_system, nocopy, buffer;
9107 {
9108   return code_convert_string (string, coding_system, buffer,
9109                               1, ! NILP (nocopy), 1);
9110 }
9111
9112 \f
9113 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
9114        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
9115 Return the corresponding character.  */)
9116      (code)
9117      Lisp_Object code;
9118 {
9119   Lisp_Object spec, attrs, val;
9120   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
9121   int c;
9122
9123   CHECK_NATNUM (code);
9124   c = XFASTINT (code);
9125   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9126   attrs = AREF (spec, 0);
9127
9128   if (ASCII_BYTE_P (c)
9129       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9130     return code;
9131
9132   val = CODING_ATTR_CHARSET_LIST (attrs);
9133   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9134   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9135   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
9136
9137   if (c <= 0x7F)
9138     charset = charset_roman;
9139   else if (c >= 0xA0 && c < 0xDF)
9140     {
9141       charset = charset_kana;
9142       c -= 0x80;
9143     }
9144   else
9145     {
9146       int s1 = c >> 8, s2 = c & 0xFF;
9147
9148       if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
9149           || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
9150         error ("Invalid code: %d", code);
9151       SJIS_TO_JIS (c);
9152       charset = charset_kanji;
9153     }
9154   c = DECODE_CHAR (charset, c);
9155   if (c < 0)
9156     error ("Invalid code: %d", code);
9157   return make_number (c);
9158 }
9159
9160
9161 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
9162        doc: /* Encode a Japanese character CH to shift_jis encoding.
9163 Return the corresponding code in SJIS.  */)
9164      (ch)
9165     Lisp_Object ch;
9166 {
9167   Lisp_Object spec, attrs, charset_list;
9168   int c;
9169   struct charset *charset;
9170   unsigned code;
9171
9172   CHECK_CHARACTER (ch);
9173   c = XFASTINT (ch);
9174   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9175   attrs = AREF (spec, 0);
9176
9177   if (ASCII_CHAR_P (c)
9178       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9179     return ch;
9180
9181   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9182   charset = char_charset (c, charset_list, &code);
9183   if (code == CHARSET_INVALID_CODE (charset))
9184     error ("Can't encode by shift_jis encoding: %d", c);
9185   JIS_TO_SJIS (code);
9186
9187   return make_number (code);
9188 }
9189
9190 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
9191        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
9192 Return the corresponding character.  */)
9193      (code)
9194      Lisp_Object code;
9195 {
9196   Lisp_Object spec, attrs, val;
9197   struct charset *charset_roman, *charset_big5, *charset;
9198   int c;
9199
9200   CHECK_NATNUM (code);
9201   c = XFASTINT (code);
9202   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9203   attrs = AREF (spec, 0);
9204
9205   if (ASCII_BYTE_P (c)
9206       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9207     return code;
9208
9209   val = CODING_ATTR_CHARSET_LIST (attrs);
9210   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9211   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
9212
9213   if (c <= 0x7F)
9214     charset = charset_roman;
9215   else
9216     {
9217       int b1 = c >> 8, b2 = c & 0x7F;
9218       if (b1 < 0xA1 || b1 > 0xFE
9219           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
9220         error ("Invalid code: %d", code);
9221       charset = charset_big5;
9222     }
9223   c = DECODE_CHAR (charset, (unsigned )c);
9224   if (c < 0)
9225     error ("Invalid code: %d", code);
9226   return make_number (c);
9227 }
9228
9229 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
9230        doc: /* Encode the Big5 character CH to BIG5 coding system.
9231 Return the corresponding character code in Big5.  */)
9232      (ch)
9233      Lisp_Object ch;
9234 {
9235   Lisp_Object spec, attrs, charset_list;
9236   struct charset *charset;
9237   int c;
9238   unsigned code;
9239
9240   CHECK_CHARACTER (ch);
9241   c = XFASTINT (ch);
9242   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9243   attrs = AREF (spec, 0);
9244   if (ASCII_CHAR_P (c)
9245       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9246     return ch;
9247
9248   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9249   charset = char_charset (c, charset_list, &code);
9250   if (code == CHARSET_INVALID_CODE (charset))
9251     error ("Can't encode by Big5 encoding: %d", c);
9252
9253   return make_number (code);
9254 }
9255
9256 \f
9257 DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
9258        Sset_terminal_coding_system_internal, 1, 2, 0,
9259        doc: /* Internal use only.  */)
9260      (coding_system, terminal)
9261      Lisp_Object coding_system;
9262      Lisp_Object terminal;
9263 {
9264   struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
9265   CHECK_SYMBOL (coding_system);
9266   setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
9267   /* We had better not send unsafe characters to terminal.  */
9268   terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
9269   /* Characer composition should be disabled.  */
9270   terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9271   terminal_coding->src_multibyte = 1;
9272   terminal_coding->dst_multibyte = 0;
9273   return Qnil;
9274 }
9275
9276 DEFUN ("set-safe-terminal-coding-system-internal",
9277        Fset_safe_terminal_coding_system_internal,
9278        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
9279        doc: /* Internal use only.  */)
9280      (coding_system)
9281      Lisp_Object coding_system;
9282 {
9283   CHECK_SYMBOL (coding_system);
9284   setup_coding_system (Fcheck_coding_system (coding_system),
9285                        &safe_terminal_coding);
9286   /* Characer composition should be disabled.  */
9287   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9288   safe_terminal_coding.src_multibyte = 1;
9289   safe_terminal_coding.dst_multibyte = 0;
9290   return Qnil;
9291 }
9292
9293 DEFUN ("terminal-coding-system", Fterminal_coding_system,
9294        Sterminal_coding_system, 0, 1, 0,
9295        doc: /* Return coding system specified for terminal output on the given terminal.
9296 TERMINAL may be a terminal object, a frame, or nil for the selected
9297 frame's terminal device.  */)
9298      (terminal)
9299      Lisp_Object terminal;
9300 {
9301   struct coding_system *terminal_coding
9302     = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
9303   Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
9304
9305   /* For backward compatibility, return nil if it is `undecided'. */
9306   return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
9307 }
9308
9309 DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
9310        Sset_keyboard_coding_system_internal, 1, 2, 0,
9311        doc: /* Internal use only.  */)
9312      (coding_system, terminal)
9313      Lisp_Object coding_system;
9314      Lisp_Object terminal;
9315 {
9316   struct terminal *t = get_terminal (terminal, 1);
9317   CHECK_SYMBOL (coding_system);
9318   if (NILP (coding_system))
9319     coding_system = Qno_conversion;
9320   else
9321     Fcheck_coding_system (coding_system);
9322   setup_coding_system (coding_system, TERMINAL_KEYBOARD_CODING (t));
9323   /* Characer composition should be disabled.  */
9324   TERMINAL_KEYBOARD_CODING (t)->common_flags
9325     &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9326   return Qnil;
9327 }
9328
9329 DEFUN ("keyboard-coding-system",
9330        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 1, 0,
9331        doc: /* Return coding system specified for decoding keyboard input.  */)
9332      (terminal)
9333      Lisp_Object terminal;
9334 {
9335   return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING
9336                          (get_terminal (terminal, 1))->id);
9337 }
9338
9339 \f
9340 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
9341        Sfind_operation_coding_system,  1, MANY, 0,
9342        doc: /* Choose a coding system for an operation based on the target name.
9343 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
9344 DECODING-SYSTEM is the coding system to use for decoding
9345 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
9346 for encoding (in case OPERATION does encoding).
9347
9348 The first argument OPERATION specifies an I/O primitive:
9349   For file I/O, `insert-file-contents' or `write-region'.
9350   For process I/O, `call-process', `call-process-region', or `start-process'.
9351   For network I/O, `open-network-stream'.
9352
9353 The remaining arguments should be the same arguments that were passed
9354 to the primitive.  Depending on which primitive, one of those arguments
9355 is selected as the TARGET.  For example, if OPERATION does file I/O,
9356 whichever argument specifies the file name is TARGET.
9357
9358 TARGET has a meaning which depends on OPERATION:
9359   For file I/O, TARGET is a file name (except for the special case below).
9360   For process I/O, TARGET is a process name.
9361   For network I/O, TARGET is a service name or a port number.
9362
9363 This function looks up what is specified for TARGET in
9364 `file-coding-system-alist', `process-coding-system-alist',
9365 or `network-coding-system-alist' depending on OPERATION.
9366 They may specify a coding system, a cons of coding systems,
9367 or a function symbol to call.
9368 In the last case, we call the function with one argument,
9369 which is a list of all the arguments given to this function.
9370 If the function can't decide a coding system, it can return
9371 `undecided' so that the normal code-detection is performed.
9372
9373 If OPERATION is `insert-file-contents', the argument corresponding to
9374 TARGET may be a cons (FILENAME . BUFFER).  In that case, FILENAME is a
9375 file name to look up, and BUFFER is a buffer that contains the file's
9376 contents (not yet decoded).  If `file-coding-system-alist' specifies a
9377 function to call for FILENAME, that function should examine the
9378 contents of BUFFER instead of reading the file.
9379
9380 usage: (find-operation-coding-system OPERATION ARGUMENTS...)  */)
9381      (nargs, args)
9382      int nargs;
9383      Lisp_Object *args;
9384 {
9385   Lisp_Object operation, target_idx, target, val;
9386   register Lisp_Object chain;
9387
9388   if (nargs < 2)
9389     error ("Too few arguments");
9390   operation = args[0];
9391   if (!SYMBOLP (operation)
9392       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
9393     error ("Invalid first argument");
9394   if (nargs < 1 + XINT (target_idx))
9395     error ("Too few arguments for operation: %s",
9396            SDATA (SYMBOL_NAME (operation)));
9397   target = args[XINT (target_idx) + 1];
9398   if (!(STRINGP (target)
9399         || (EQ (operation, Qinsert_file_contents) && CONSP (target)
9400             && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
9401         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
9402     error ("Invalid %dth argument", XINT (target_idx) + 1);
9403   if (CONSP (target))
9404     target = XCAR (target);
9405
9406   chain = ((EQ (operation, Qinsert_file_contents)
9407             || EQ (operation, Qwrite_region))
9408            ? Vfile_coding_system_alist
9409            : (EQ (operation, Qopen_network_stream)
9410               ? Vnetwork_coding_system_alist
9411               : Vprocess_coding_system_alist));
9412   if (NILP (chain))
9413     return Qnil;
9414
9415   for (; CONSP (chain); chain = XCDR (chain))
9416     {
9417       Lisp_Object elt;
9418
9419       elt = XCAR (chain);
9420       if (CONSP (elt)
9421           && ((STRINGP (target)
9422                && STRINGP (XCAR (elt))
9423                && fast_string_match (XCAR (elt), target) >= 0)
9424               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
9425         {
9426           val = XCDR (elt);
9427           /* Here, if VAL is both a valid coding system and a valid
9428              function symbol, we return VAL as a coding system.  */
9429           if (CONSP (val))
9430             return val;
9431           if (! SYMBOLP (val))
9432             return Qnil;
9433           if (! NILP (Fcoding_system_p (val)))
9434             return Fcons (val, val);
9435           if (! NILP (Ffboundp (val)))
9436             {
9437               /* We use call1 rather than safe_call1
9438                  so as to get bug reports about functions called here
9439                  which don't handle the current interface.  */
9440               val = call1 (val, Flist (nargs, args));
9441               if (CONSP (val))
9442                 return val;
9443               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
9444                 return Fcons (val, val);
9445             }
9446           return Qnil;
9447         }
9448     }
9449   return Qnil;
9450 }
9451
9452 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
9453        Sset_coding_system_priority, 0, MANY, 0,
9454        doc: /* Assign higher priority to the coding systems given as arguments.
9455 If multiple coding systems belong to the same category,
9456 all but the first one are ignored.
9457
9458 usage: (set-coding-system-priority &rest coding-systems)  */)
9459      (nargs, args)
9460      int nargs;
9461      Lisp_Object *args;
9462 {
9463   int i, j;
9464   int changed[coding_category_max];
9465   enum coding_category priorities[coding_category_max];
9466
9467   bzero (changed, sizeof changed);
9468
9469   for (i = j = 0; i < nargs; i++)
9470     {
9471       enum coding_category category;
9472       Lisp_Object spec, attrs;
9473
9474       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
9475       attrs = AREF (spec, 0);
9476       category = XINT (CODING_ATTR_CATEGORY (attrs));
9477       if (changed[category])
9478         /* Ignore this coding system because a coding system of the
9479            same category already had a higher priority.  */
9480         continue;
9481       changed[category] = 1;
9482       priorities[j++] = category;
9483       if (coding_categories[category].id >= 0
9484           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
9485         setup_coding_system (args[i], &coding_categories[category]);
9486       Fset (AREF (Vcoding_category_table, category), args[i]);
9487     }
9488
9489   /* Now we have decided top J priorities.  Reflect the order of the
9490      original priorities to the remaining priorities.  */
9491
9492   for (i = j, j = 0; i < coding_category_max; i++, j++)
9493     {
9494       while (j < coding_category_max
9495              && changed[coding_priorities[j]])
9496         j++;
9497       if (j == coding_category_max)
9498         abort ();
9499       priorities[i] = coding_priorities[j];
9500     }
9501
9502   bcopy (priorities, coding_priorities, sizeof priorities);
9503
9504   /* Update `coding-category-list'.  */
9505   Vcoding_category_list = Qnil;
9506   for (i = coding_category_max - 1; i >= 0; i--)
9507     Vcoding_category_list
9508       = Fcons (AREF (Vcoding_category_table, priorities[i]),
9509                Vcoding_category_list);
9510
9511   return Qnil;
9512 }
9513
9514 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
9515        Scoding_system_priority_list, 0, 1, 0,
9516        doc: /* Return a list of coding systems ordered by their priorities.
9517 The list contains a subset of coding systems; i.e. coding systems
9518 assigned to each coding category (see `coding-category-list').
9519
9520 HIGHESTP non-nil means just return the highest priority one.  */)
9521      (highestp)
9522      Lisp_Object highestp;
9523 {
9524   int i;
9525   Lisp_Object val;
9526
9527   for (i = 0, val = Qnil; i < coding_category_max; i++)
9528     {
9529       enum coding_category category = coding_priorities[i];
9530       int id = coding_categories[category].id;
9531       Lisp_Object attrs;
9532
9533       if (id < 0)
9534         continue;
9535       attrs = CODING_ID_ATTRS (id);
9536       if (! NILP (highestp))
9537         return CODING_ATTR_BASE_NAME (attrs);
9538       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
9539     }
9540   return Fnreverse (val);
9541 }
9542
9543 static const char *const suffixes[] = { "-unix", "-dos", "-mac" };
9544
9545 static Lisp_Object
9546 make_subsidiaries (Lisp_Object base)
9547 {
9548   Lisp_Object subsidiaries;
9549   int base_name_len = SBYTES (SYMBOL_NAME (base));
9550   char *buf = (char *) alloca (base_name_len + 6);
9551   int i;
9552
9553   bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
9554   subsidiaries = Fmake_vector (make_number (3), Qnil);
9555   for (i = 0; i < 3; i++)
9556     {
9557       bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
9558       ASET (subsidiaries, i, intern (buf));
9559     }
9560   return subsidiaries;
9561 }
9562
9563
9564 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
9565        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
9566        doc: /* For internal use only.
9567 usage: (define-coding-system-internal ...)  */)
9568      (nargs, args)
9569      int nargs;
9570      Lisp_Object *args;
9571 {
9572   Lisp_Object name;
9573   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
9574   Lisp_Object attrs;            /* Vector of attributes.  */
9575   Lisp_Object eol_type;
9576   Lisp_Object aliases;
9577   Lisp_Object coding_type, charset_list, safe_charsets;
9578   enum coding_category category;
9579   Lisp_Object tail, val;
9580   int max_charset_id = 0;
9581   int i;
9582
9583   if (nargs < coding_arg_max)
9584     goto short_args;
9585
9586   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
9587
9588   name = args[coding_arg_name];
9589   CHECK_SYMBOL (name);
9590   CODING_ATTR_BASE_NAME (attrs) = name;
9591
9592   val = args[coding_arg_mnemonic];
9593   if (! STRINGP (val))
9594     CHECK_CHARACTER (val);
9595   CODING_ATTR_MNEMONIC (attrs) = val;
9596
9597   coding_type = args[coding_arg_coding_type];
9598   CHECK_SYMBOL (coding_type);
9599   CODING_ATTR_TYPE (attrs) = coding_type;
9600
9601   charset_list = args[coding_arg_charset_list];
9602   if (SYMBOLP (charset_list))
9603     {
9604       if (EQ (charset_list, Qiso_2022))
9605         {
9606           if (! EQ (coding_type, Qiso_2022))
9607             error ("Invalid charset-list");
9608           charset_list = Viso_2022_charset_list;
9609         }
9610       else if (EQ (charset_list, Qemacs_mule))
9611         {
9612           if (! EQ (coding_type, Qemacs_mule))
9613             error ("Invalid charset-list");
9614           charset_list = Vemacs_mule_charset_list;
9615         }
9616       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9617         if (max_charset_id < XFASTINT (XCAR (tail)))
9618           max_charset_id = XFASTINT (XCAR (tail));
9619     }
9620   else
9621     {
9622       charset_list = Fcopy_sequence (charset_list);
9623       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9624         {
9625           struct charset *charset;
9626
9627           val = XCAR (tail);
9628           CHECK_CHARSET_GET_CHARSET (val, charset);
9629           if (EQ (coding_type, Qiso_2022)
9630               ? CHARSET_ISO_FINAL (charset) < 0
9631               : EQ (coding_type, Qemacs_mule)
9632               ? CHARSET_EMACS_MULE_ID (charset) < 0
9633               : 0)
9634             error ("Can't handle charset `%s'",
9635                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9636
9637           XSETCAR (tail, make_number (charset->id));
9638           if (max_charset_id < charset->id)
9639             max_charset_id = charset->id;
9640         }
9641     }
9642   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
9643
9644   safe_charsets = make_uninit_string (max_charset_id + 1);
9645   memset (SDATA (safe_charsets), 255, max_charset_id + 1);
9646   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9647     SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
9648   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
9649
9650   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
9651
9652   val = args[coding_arg_decode_translation_table];
9653   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9654     CHECK_SYMBOL (val);
9655   CODING_ATTR_DECODE_TBL (attrs) = val;
9656
9657   val = args[coding_arg_encode_translation_table];
9658   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9659     CHECK_SYMBOL (val);
9660   CODING_ATTR_ENCODE_TBL (attrs) = val;
9661
9662   val = args[coding_arg_post_read_conversion];
9663   CHECK_SYMBOL (val);
9664   CODING_ATTR_POST_READ (attrs) = val;
9665
9666   val = args[coding_arg_pre_write_conversion];
9667   CHECK_SYMBOL (val);
9668   CODING_ATTR_PRE_WRITE (attrs) = val;
9669
9670   val = args[coding_arg_default_char];
9671   if (NILP (val))
9672     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
9673   else
9674     {
9675       CHECK_CHARACTER (val);
9676       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9677     }
9678
9679   val = args[coding_arg_for_unibyte];
9680   CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
9681
9682   val = args[coding_arg_plist];
9683   CHECK_LIST (val);
9684   CODING_ATTR_PLIST (attrs) = val;
9685
9686   if (EQ (coding_type, Qcharset))
9687     {
9688       /* Generate a lisp vector of 256 elements.  Each element is nil,
9689          integer, or a list of charset IDs.
9690
9691          If Nth element is nil, the byte code N is invalid in this
9692          coding system.
9693
9694          If Nth element is a number NUM, N is the first byte of a
9695          charset whose ID is NUM.
9696
9697          If Nth element is a list of charset IDs, N is the first byte
9698          of one of them.  The list is sorted by dimensions of the
9699          charsets.  A charset of smaller dimension comes firtst. */
9700       val = Fmake_vector (make_number (256), Qnil);
9701
9702       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9703         {
9704           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
9705           int dim = CHARSET_DIMENSION (charset);
9706           int idx = (dim - 1) * 4;
9707
9708           if (CHARSET_ASCII_COMPATIBLE_P (charset))
9709             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9710
9711           for (i = charset->code_space[idx];
9712                i <= charset->code_space[idx + 1]; i++)
9713             {
9714               Lisp_Object tmp, tmp2;
9715               int dim2;
9716
9717               tmp = AREF (val, i);
9718               if (NILP (tmp))
9719                 tmp = XCAR (tail);
9720               else if (NUMBERP (tmp))
9721                 {
9722                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
9723                   if (dim < dim2)
9724                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
9725                   else
9726                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
9727                 }
9728               else
9729                 {
9730                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
9731                     {
9732                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
9733                       if (dim < dim2)
9734                         break;
9735                     }
9736                   if (NILP (tmp2))
9737                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
9738                   else
9739                     {
9740                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
9741                       XSETCAR (tmp2, XCAR (tail));
9742                     }
9743                 }
9744               ASET (val, i, tmp);
9745             }
9746         }
9747       ASET (attrs, coding_attr_charset_valids, val);
9748       category = coding_category_charset;
9749     }
9750   else if (EQ (coding_type, Qccl))
9751     {
9752       Lisp_Object valids;
9753
9754       if (nargs < coding_arg_ccl_max)
9755         goto short_args;
9756
9757       val = args[coding_arg_ccl_decoder];
9758       CHECK_CCL_PROGRAM (val);
9759       if (VECTORP (val))
9760         val = Fcopy_sequence (val);
9761       ASET (attrs, coding_attr_ccl_decoder, val);
9762
9763       val = args[coding_arg_ccl_encoder];
9764       CHECK_CCL_PROGRAM (val);
9765       if (VECTORP (val))
9766         val = Fcopy_sequence (val);
9767       ASET (attrs, coding_attr_ccl_encoder, val);
9768
9769       val = args[coding_arg_ccl_valids];
9770       valids = Fmake_string (make_number (256), make_number (0));
9771       for (tail = val; !NILP (tail); tail = Fcdr (tail))
9772         {
9773           int from, to;
9774
9775           val = Fcar (tail);
9776           if (INTEGERP (val))
9777             {
9778               from = to = XINT (val);
9779               if (from < 0 || from > 255)
9780                 args_out_of_range_3 (val, make_number (0), make_number (255));
9781             }
9782           else
9783             {
9784               CHECK_CONS (val);
9785               CHECK_NATNUM_CAR (val);
9786               CHECK_NATNUM_CDR (val);
9787               from = XINT (XCAR (val));
9788               if (from > 255)
9789                 args_out_of_range_3 (XCAR (val),
9790                                      make_number (0), make_number (255));
9791               to = XINT (XCDR (val));
9792               if (to < from || to > 255)
9793                 args_out_of_range_3 (XCDR (val),
9794                                      XCAR (val), make_number (255));
9795             }
9796           for (i = from; i <= to; i++)
9797             SSET (valids, i, 1);
9798         }
9799       ASET (attrs, coding_attr_ccl_valids, valids);
9800
9801       category = coding_category_ccl;
9802     }
9803   else if (EQ (coding_type, Qutf_16))
9804     {
9805       Lisp_Object bom, endian;
9806
9807       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9808
9809       if (nargs < coding_arg_utf16_max)
9810         goto short_args;
9811
9812       bom = args[coding_arg_utf16_bom];
9813       if (! NILP (bom) && ! EQ (bom, Qt))
9814         {
9815           CHECK_CONS (bom);
9816           val = XCAR (bom);
9817           CHECK_CODING_SYSTEM (val);
9818           val = XCDR (bom);
9819           CHECK_CODING_SYSTEM (val);
9820         }
9821       ASET (attrs, coding_attr_utf_bom, bom);
9822
9823       endian = args[coding_arg_utf16_endian];
9824       CHECK_SYMBOL (endian);
9825       if (NILP (endian))
9826         endian = Qbig;
9827       else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
9828         error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
9829       ASET (attrs, coding_attr_utf_16_endian, endian);
9830
9831       category = (CONSP (bom)
9832                   ? coding_category_utf_16_auto
9833                   : NILP (bom)
9834                   ? (EQ (endian, Qbig)
9835                      ? coding_category_utf_16_be_nosig
9836                      : coding_category_utf_16_le_nosig)
9837                   : (EQ (endian, Qbig)
9838                      ? coding_category_utf_16_be
9839                      : coding_category_utf_16_le));
9840     }
9841   else if (EQ (coding_type, Qiso_2022))
9842     {
9843       Lisp_Object initial, reg_usage, request, flags;
9844       int i;
9845
9846       if (nargs < coding_arg_iso2022_max)
9847         goto short_args;
9848
9849       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
9850       CHECK_VECTOR (initial);
9851       for (i = 0; i < 4; i++)
9852         {
9853           val = Faref (initial, make_number (i));
9854           if (! NILP (val))
9855             {
9856               struct charset *charset;
9857
9858               CHECK_CHARSET_GET_CHARSET (val, charset);
9859               ASET (initial, i, make_number (CHARSET_ID (charset)));
9860               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
9861                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9862             }
9863           else
9864             ASET (initial, i, make_number (-1));
9865         }
9866
9867       reg_usage = args[coding_arg_iso2022_reg_usage];
9868       CHECK_CONS (reg_usage);
9869       CHECK_NUMBER_CAR (reg_usage);
9870       CHECK_NUMBER_CDR (reg_usage);
9871
9872       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
9873       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
9874         {
9875           int id;
9876           Lisp_Object tmp;
9877
9878           val = Fcar (tail);
9879           CHECK_CONS (val);
9880           tmp = XCAR (val);
9881           CHECK_CHARSET_GET_ID (tmp, id);
9882           CHECK_NATNUM_CDR (val);
9883           if (XINT (XCDR (val)) >= 4)
9884             error ("Invalid graphic register number: %d", XINT (XCDR (val)));
9885           XSETCAR (val, make_number (id));
9886         }
9887
9888       flags = args[coding_arg_iso2022_flags];
9889       CHECK_NATNUM (flags);
9890       i = XINT (flags);
9891       if (EQ (args[coding_arg_charset_list], Qiso_2022))
9892         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
9893
9894       ASET (attrs, coding_attr_iso_initial, initial);
9895       ASET (attrs, coding_attr_iso_usage, reg_usage);
9896       ASET (attrs, coding_attr_iso_request, request);
9897       ASET (attrs, coding_attr_iso_flags, flags);
9898       setup_iso_safe_charsets (attrs);
9899
9900       if (i & CODING_ISO_FLAG_SEVEN_BITS)
9901         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
9902                           | CODING_ISO_FLAG_SINGLE_SHIFT))
9903                     ? coding_category_iso_7_else
9904                     : EQ (args[coding_arg_charset_list], Qiso_2022)
9905                     ? coding_category_iso_7
9906                     : coding_category_iso_7_tight);
9907       else
9908         {
9909           int id = XINT (AREF (initial, 1));
9910
9911           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
9912                        || EQ (args[coding_arg_charset_list], Qiso_2022)
9913                        || id < 0)
9914                       ? coding_category_iso_8_else
9915                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
9916                       ? coding_category_iso_8_1
9917                       : coding_category_iso_8_2);
9918         }
9919       if (category != coding_category_iso_8_1
9920           && category != coding_category_iso_8_2)
9921         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9922     }
9923   else if (EQ (coding_type, Qemacs_mule))
9924     {
9925       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
9926         ASET (attrs, coding_attr_emacs_mule_full, Qt);
9927       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9928       category = coding_category_emacs_mule;
9929     }
9930   else if (EQ (coding_type, Qshift_jis))
9931     {
9932
9933       struct charset *charset;
9934
9935       if (XINT (Flength (charset_list)) != 3
9936           && XINT (Flength (charset_list)) != 4)
9937         error ("There should be three or four charsets");
9938
9939       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9940       if (CHARSET_DIMENSION (charset) != 1)
9941         error ("Dimension of charset %s is not one",
9942                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9943       if (CHARSET_ASCII_COMPATIBLE_P (charset))
9944         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9945
9946       charset_list = XCDR (charset_list);
9947       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9948       if (CHARSET_DIMENSION (charset) != 1)
9949         error ("Dimension of charset %s is not one",
9950                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9951
9952       charset_list = XCDR (charset_list);
9953       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9954       if (CHARSET_DIMENSION (charset) != 2)
9955         error ("Dimension of charset %s is not two",
9956                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9957
9958       charset_list = XCDR (charset_list);
9959       if (! NILP (charset_list))
9960         {
9961           charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9962           if (CHARSET_DIMENSION (charset) != 2)
9963             error ("Dimension of charset %s is not two",
9964                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9965         }
9966
9967       category = coding_category_sjis;
9968       Vsjis_coding_system = name;
9969     }
9970   else if (EQ (coding_type, Qbig5))
9971     {
9972       struct charset *charset;
9973
9974       if (XINT (Flength (charset_list)) != 2)
9975         error ("There should be just two charsets");
9976
9977       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9978       if (CHARSET_DIMENSION (charset) != 1)
9979         error ("Dimension of charset %s is not one",
9980                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9981       if (CHARSET_ASCII_COMPATIBLE_P (charset))
9982         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9983
9984       charset_list = XCDR (charset_list);
9985       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9986       if (CHARSET_DIMENSION (charset) != 2)
9987         error ("Dimension of charset %s is not two",
9988                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9989
9990       category = coding_category_big5;
9991       Vbig5_coding_system = name;
9992     }
9993   else if (EQ (coding_type, Qraw_text))
9994     {
9995       category = coding_category_raw_text;
9996       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9997     }
9998   else if (EQ (coding_type, Qutf_8))
9999     {
10000       Lisp_Object bom;
10001
10002       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
10003
10004       if (nargs < coding_arg_utf8_max)
10005         goto short_args;
10006
10007       bom = args[coding_arg_utf8_bom];
10008       if (! NILP (bom) && ! EQ (bom, Qt))
10009         {
10010           CHECK_CONS (bom);
10011           val = XCAR (bom);
10012           CHECK_CODING_SYSTEM (val);
10013           val = XCDR (bom);
10014           CHECK_CODING_SYSTEM (val);
10015         }
10016       ASET (attrs, coding_attr_utf_bom, bom);
10017
10018       category = (CONSP (bom) ? coding_category_utf_8_auto
10019                   : NILP (bom) ? coding_category_utf_8_nosig
10020                   : coding_category_utf_8_sig);
10021     }
10022   else if (EQ (coding_type, Qundecided))
10023     category = coding_category_undecided;
10024   else
10025     error ("Invalid coding system type: %s",
10026            SDATA (SYMBOL_NAME (coding_type)));
10027
10028   CODING_ATTR_CATEGORY (attrs) = make_number (category);
10029   CODING_ATTR_PLIST (attrs)
10030     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
10031                                 CODING_ATTR_PLIST (attrs)));
10032   CODING_ATTR_PLIST (attrs)
10033     = Fcons (QCascii_compatible_p,
10034              Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
10035                     CODING_ATTR_PLIST (attrs)));
10036
10037   eol_type = args[coding_arg_eol_type];
10038   if (! NILP (eol_type)
10039       && ! EQ (eol_type, Qunix)
10040       && ! EQ (eol_type, Qdos)
10041       && ! EQ (eol_type, Qmac))
10042     error ("Invalid eol-type");
10043
10044   aliases = Fcons (name, Qnil);
10045
10046   if (NILP (eol_type))
10047     {
10048       eol_type = make_subsidiaries (name);
10049       for (i = 0; i < 3; i++)
10050         {
10051           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
10052
10053           this_name = AREF (eol_type, i);
10054           this_aliases = Fcons (this_name, Qnil);
10055           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
10056           this_spec = Fmake_vector (make_number (3), attrs);
10057           ASET (this_spec, 1, this_aliases);
10058           ASET (this_spec, 2, this_eol_type);
10059           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
10060           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
10061           val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist);
10062           if (NILP (val))
10063             Vcoding_system_alist
10064               = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
10065                        Vcoding_system_alist);
10066         }
10067     }
10068
10069   spec_vec = Fmake_vector (make_number (3), attrs);
10070   ASET (spec_vec, 1, aliases);
10071   ASET (spec_vec, 2, eol_type);
10072
10073   Fputhash (name, spec_vec, Vcoding_system_hash_table);
10074   Vcoding_system_list = Fcons (name, Vcoding_system_list);
10075   val = Fassoc (Fsymbol_name (name), Vcoding_system_alist);
10076   if (NILP (val))
10077     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
10078                                   Vcoding_system_alist);
10079
10080   {
10081     int id = coding_categories[category].id;
10082
10083     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
10084       setup_coding_system (name, &coding_categories[category]);
10085   }
10086
10087   return Qnil;
10088
10089  short_args:
10090   return Fsignal (Qwrong_number_of_arguments,
10091                   Fcons (intern ("define-coding-system-internal"),
10092                          make_number (nargs)));
10093 }
10094
10095
10096 DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
10097        3, 3, 0,
10098        doc: /* Change value in CODING-SYSTEM's property list PROP to VAL.  */)
10099   (coding_system, prop, val)
10100      Lisp_Object coding_system, prop, val;
10101 {
10102   Lisp_Object spec, attrs;
10103
10104   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10105   attrs = AREF (spec, 0);
10106   if (EQ (prop, QCmnemonic))
10107     {
10108       if (! STRINGP (val))
10109         CHECK_CHARACTER (val);
10110       CODING_ATTR_MNEMONIC (attrs) = val;
10111     }
10112   else if (EQ (prop, QCdefault_char))
10113     {
10114       if (NILP (val))
10115         val = make_number (' ');
10116       else
10117         CHECK_CHARACTER (val);
10118       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
10119     }
10120   else if (EQ (prop, QCdecode_translation_table))
10121     {
10122       if (! CHAR_TABLE_P (val) && ! CONSP (val))
10123         CHECK_SYMBOL (val);
10124       CODING_ATTR_DECODE_TBL (attrs) = val;
10125     }
10126   else if (EQ (prop, QCencode_translation_table))
10127     {
10128       if (! CHAR_TABLE_P (val) && ! CONSP (val))
10129         CHECK_SYMBOL (val);
10130       CODING_ATTR_ENCODE_TBL (attrs) = val;
10131     }
10132   else if (EQ (prop, QCpost_read_conversion))
10133     {
10134       CHECK_SYMBOL (val);
10135       CODING_ATTR_POST_READ (attrs) = val;
10136     }
10137   else if (EQ (prop, QCpre_write_conversion))
10138     {
10139       CHECK_SYMBOL (val);
10140       CODING_ATTR_PRE_WRITE (attrs) = val;
10141     }
10142   else if (EQ (prop, QCascii_compatible_p))
10143     {
10144       CODING_ATTR_ASCII_COMPAT (attrs) = val;
10145     }
10146
10147   CODING_ATTR_PLIST (attrs)
10148     = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
10149   return val;
10150 }
10151
10152
10153 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
10154        Sdefine_coding_system_alias, 2, 2, 0,
10155        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
10156      (alias, coding_system)
10157      Lisp_Object alias, coding_system;
10158 {
10159   Lisp_Object spec, aliases, eol_type, val;
10160
10161   CHECK_SYMBOL (alias);
10162   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10163   aliases = AREF (spec, 1);
10164   /* ALIASES should be a list of length more than zero, and the first
10165      element is a base coding system.  Append ALIAS at the tail of the
10166      list.  */
10167   while (!NILP (XCDR (aliases)))
10168     aliases = XCDR (aliases);
10169   XSETCDR (aliases, Fcons (alias, Qnil));
10170
10171   eol_type = AREF (spec, 2);
10172   if (VECTORP (eol_type))
10173     {
10174       Lisp_Object subsidiaries;
10175       int i;
10176
10177       subsidiaries = make_subsidiaries (alias);
10178       for (i = 0; i < 3; i++)
10179         Fdefine_coding_system_alias (AREF (subsidiaries, i),
10180                                      AREF (eol_type, i));
10181     }
10182
10183   Fputhash (alias, spec, Vcoding_system_hash_table);
10184   Vcoding_system_list = Fcons (alias, Vcoding_system_list);
10185   val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist);
10186   if (NILP (val))
10187     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
10188                                   Vcoding_system_alist);
10189
10190   return Qnil;
10191 }
10192
10193 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
10194        1, 1, 0,
10195        doc: /* Return the base of CODING-SYSTEM.
10196 Any alias or subsidiary coding system is not a base coding system.  */)
10197   (coding_system)
10198      Lisp_Object coding_system;
10199 {
10200   Lisp_Object spec, attrs;
10201
10202   if (NILP (coding_system))
10203     return (Qno_conversion);
10204   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10205   attrs = AREF (spec, 0);
10206   return CODING_ATTR_BASE_NAME (attrs);
10207 }
10208
10209 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
10210        1, 1, 0,
10211        doc: "Return the property list of CODING-SYSTEM.")
10212      (coding_system)
10213      Lisp_Object coding_system;
10214 {
10215   Lisp_Object spec, attrs;
10216
10217   if (NILP (coding_system))
10218     coding_system = Qno_conversion;
10219   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10220   attrs = AREF (spec, 0);
10221   return CODING_ATTR_PLIST (attrs);
10222 }
10223
10224
10225 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
10226        1, 1, 0,
10227        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
10228      (coding_system)
10229      Lisp_Object coding_system;
10230 {
10231   Lisp_Object spec;
10232
10233   if (NILP (coding_system))
10234     coding_system = Qno_conversion;
10235   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10236   return AREF (spec, 1);
10237 }
10238
10239 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
10240        Scoding_system_eol_type, 1, 1, 0,
10241        doc: /* Return eol-type of CODING-SYSTEM.
10242 An eol-type is an integer 0, 1, 2, or a vector of coding systems.
10243
10244 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
10245 and CR respectively.
10246
10247 A vector value indicates that a format of end-of-line should be
10248 detected automatically.  Nth element of the vector is the subsidiary
10249 coding system whose eol-type is N.  */)
10250      (coding_system)
10251      Lisp_Object coding_system;
10252 {
10253   Lisp_Object spec, eol_type;
10254   int n;
10255
10256   if (NILP (coding_system))
10257     coding_system = Qno_conversion;
10258   if (! CODING_SYSTEM_P (coding_system))
10259     return Qnil;
10260   spec = CODING_SYSTEM_SPEC (coding_system);
10261   eol_type = AREF (spec, 2);
10262   if (VECTORP (eol_type))
10263     return Fcopy_sequence (eol_type);
10264   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
10265   return make_number (n);
10266 }
10267
10268 #endif /* emacs */
10269
10270 \f
10271 /*** 9. Post-amble ***/
10272
10273 void
10274 init_coding_once (void)
10275 {
10276   int i;
10277
10278   for (i = 0; i < coding_category_max; i++)
10279     {
10280       coding_categories[i].id = -1;
10281       coding_priorities[i] = i;
10282     }
10283
10284   /* ISO2022 specific initialize routine.  */
10285   for (i = 0; i < 0x20; i++)
10286     iso_code_class[i] = ISO_control_0;
10287   for (i = 0x21; i < 0x7F; i++)
10288     iso_code_class[i] = ISO_graphic_plane_0;
10289   for (i = 0x80; i < 0xA0; i++)
10290     iso_code_class[i] = ISO_control_1;
10291   for (i = 0xA1; i < 0xFF; i++)
10292     iso_code_class[i] = ISO_graphic_plane_1;
10293   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
10294   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
10295   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
10296   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
10297   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
10298   iso_code_class[ISO_CODE_ESC] = ISO_escape;
10299   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
10300   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
10301   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
10302
10303   for (i = 0; i < 256; i++)
10304     {
10305       emacs_mule_bytes[i] = 1;
10306     }
10307   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
10308   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
10309   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
10310   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
10311 }
10312
10313 #ifdef emacs
10314
10315 void
10316 syms_of_coding (void)
10317 {
10318   staticpro (&Vcoding_system_hash_table);
10319   {
10320     Lisp_Object args[2];
10321     args[0] = QCtest;
10322     args[1] = Qeq;
10323     Vcoding_system_hash_table = Fmake_hash_table (2, args);
10324   }
10325
10326   staticpro (&Vsjis_coding_system);
10327   Vsjis_coding_system = Qnil;
10328
10329   staticpro (&Vbig5_coding_system);
10330   Vbig5_coding_system = Qnil;
10331
10332   staticpro (&Vcode_conversion_reused_workbuf);
10333   Vcode_conversion_reused_workbuf = Qnil;
10334
10335   staticpro (&Vcode_conversion_workbuf_name);
10336   Vcode_conversion_workbuf_name = make_pure_c_string (" *code-conversion-work*");
10337
10338   reused_workbuf_in_use = 0;
10339
10340   DEFSYM (Qcharset, "charset");
10341   DEFSYM (Qtarget_idx, "target-idx");
10342   DEFSYM (Qcoding_system_history, "coding-system-history");
10343   Fset (Qcoding_system_history, Qnil);
10344
10345   /* Target FILENAME is the first argument.  */
10346   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
10347   /* Target FILENAME is the third argument.  */
10348   Fput (Qwrite_region, Qtarget_idx, make_number (2));
10349
10350   DEFSYM (Qcall_process, "call-process");
10351   /* Target PROGRAM is the first argument.  */
10352   Fput (Qcall_process, Qtarget_idx, make_number (0));
10353
10354   DEFSYM (Qcall_process_region, "call-process-region");
10355   /* Target PROGRAM is the third argument.  */
10356   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
10357
10358   DEFSYM (Qstart_process, "start-process");
10359   /* Target PROGRAM is the third argument.  */
10360   Fput (Qstart_process, Qtarget_idx, make_number (2));
10361
10362   DEFSYM (Qopen_network_stream, "open-network-stream");
10363   /* Target SERVICE is the fourth argument.  */
10364   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
10365
10366   DEFSYM (Qcoding_system, "coding-system");
10367   DEFSYM (Qcoding_aliases, "coding-aliases");
10368
10369   DEFSYM (Qeol_type, "eol-type");
10370   DEFSYM (Qunix, "unix");
10371   DEFSYM (Qdos, "dos");
10372
10373   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
10374   DEFSYM (Qpost_read_conversion, "post-read-conversion");
10375   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
10376   DEFSYM (Qdefault_char, "default-char");
10377   DEFSYM (Qundecided, "undecided");
10378   DEFSYM (Qno_conversion, "no-conversion");
10379   DEFSYM (Qraw_text, "raw-text");
10380
10381   DEFSYM (Qiso_2022, "iso-2022");
10382
10383   DEFSYM (Qutf_8, "utf-8");
10384   DEFSYM (Qutf_8_emacs, "utf-8-emacs");
10385
10386   DEFSYM (Qutf_16, "utf-16");
10387   DEFSYM (Qbig, "big");
10388   DEFSYM (Qlittle, "little");
10389
10390   DEFSYM (Qshift_jis, "shift-jis");
10391   DEFSYM (Qbig5, "big5");
10392
10393   DEFSYM (Qcoding_system_p, "coding-system-p");
10394
10395   DEFSYM (Qcoding_system_error, "coding-system-error");
10396   Fput (Qcoding_system_error, Qerror_conditions,
10397         pure_cons (Qcoding_system_error, pure_cons (Qerror, Qnil)));
10398   Fput (Qcoding_system_error, Qerror_message,
10399         make_pure_c_string ("Invalid coding system"));
10400
10401   /* Intern this now in case it isn't already done.
10402      Setting this variable twice is harmless.
10403      But don't staticpro it here--that is done in alloc.c.  */
10404   Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
10405
10406   DEFSYM (Qtranslation_table, "translation-table");
10407   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
10408   DEFSYM (Qtranslation_table_id, "translation-table-id");
10409   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
10410   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
10411
10412   DEFSYM (Qvalid_codes, "valid-codes");
10413
10414   DEFSYM (Qemacs_mule, "emacs-mule");
10415
10416   DEFSYM (QCcategory, ":category");
10417   DEFSYM (QCmnemonic, ":mnemonic");
10418   DEFSYM (QCdefault_char, ":default-char");
10419   DEFSYM (QCdecode_translation_table, ":decode-translation-table");
10420   DEFSYM (QCencode_translation_table, ":encode-translation-table");
10421   DEFSYM (QCpost_read_conversion, ":post-read-conversion");
10422   DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
10423   DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
10424
10425   Vcoding_category_table
10426     = Fmake_vector (make_number (coding_category_max), Qnil);
10427   staticpro (&Vcoding_category_table);
10428   /* Followings are target of code detection.  */
10429   ASET (Vcoding_category_table, coding_category_iso_7,
10430         intern_c_string ("coding-category-iso-7"));
10431   ASET (Vcoding_category_table, coding_category_iso_7_tight,
10432         intern_c_string ("coding-category-iso-7-tight"));
10433   ASET (Vcoding_category_table, coding_category_iso_8_1,
10434         intern_c_string ("coding-category-iso-8-1"));
10435   ASET (Vcoding_category_table, coding_category_iso_8_2,
10436         intern_c_string ("coding-category-iso-8-2"));
10437   ASET (Vcoding_category_table, coding_category_iso_7_else,
10438         intern_c_string ("coding-category-iso-7-else"));
10439   ASET (Vcoding_category_table, coding_category_iso_8_else,
10440         intern_c_string ("coding-category-iso-8-else"));
10441   ASET (Vcoding_category_table, coding_category_utf_8_auto,
10442         intern_c_string ("coding-category-utf-8-auto"));
10443   ASET (Vcoding_category_table, coding_category_utf_8_nosig,
10444         intern_c_string ("coding-category-utf-8"));
10445   ASET (Vcoding_category_table, coding_category_utf_8_sig,
10446         intern_c_string ("coding-category-utf-8-sig"));
10447   ASET (Vcoding_category_table, coding_category_utf_16_be,
10448         intern_c_string ("coding-category-utf-16-be"));
10449   ASET (Vcoding_category_table, coding_category_utf_16_auto,
10450         intern_c_string ("coding-category-utf-16-auto"));
10451   ASET (Vcoding_category_table, coding_category_utf_16_le,
10452         intern_c_string ("coding-category-utf-16-le"));
10453   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
10454         intern_c_string ("coding-category-utf-16-be-nosig"));
10455   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
10456         intern_c_string ("coding-category-utf-16-le-nosig"));
10457   ASET (Vcoding_category_table, coding_category_charset,
10458         intern_c_string ("coding-category-charset"));
10459   ASET (Vcoding_category_table, coding_category_sjis,
10460         intern_c_string ("coding-category-sjis"));
10461   ASET (Vcoding_category_table, coding_category_big5,
10462         intern_c_string ("coding-category-big5"));
10463   ASET (Vcoding_category_table, coding_category_ccl,
10464         intern_c_string ("coding-category-ccl"));
10465   ASET (Vcoding_category_table, coding_category_emacs_mule,
10466         intern_c_string ("coding-category-emacs-mule"));
10467   /* Followings are NOT target of code detection.  */
10468   ASET (Vcoding_category_table, coding_category_raw_text,
10469         intern_c_string ("coding-category-raw-text"));
10470   ASET (Vcoding_category_table, coding_category_undecided,
10471         intern_c_string ("coding-category-undecided"));
10472
10473   DEFSYM (Qinsufficient_source, "insufficient-source");
10474   DEFSYM (Qinconsistent_eol, "inconsistent-eol");
10475   DEFSYM (Qinvalid_source, "invalid-source");
10476   DEFSYM (Qinterrupted, "interrupted");
10477   DEFSYM (Qinsufficient_memory, "insufficient-memory");
10478   DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
10479
10480   defsubr (&Scoding_system_p);
10481   defsubr (&Sread_coding_system);
10482   defsubr (&Sread_non_nil_coding_system);
10483   defsubr (&Scheck_coding_system);
10484   defsubr (&Sdetect_coding_region);
10485   defsubr (&Sdetect_coding_string);
10486   defsubr (&Sfind_coding_systems_region_internal);
10487   defsubr (&Sunencodable_char_position);
10488   defsubr (&Scheck_coding_systems_region);
10489   defsubr (&Sdecode_coding_region);
10490   defsubr (&Sencode_coding_region);
10491   defsubr (&Sdecode_coding_string);
10492   defsubr (&Sencode_coding_string);
10493   defsubr (&Sdecode_sjis_char);
10494   defsubr (&Sencode_sjis_char);
10495   defsubr (&Sdecode_big5_char);
10496   defsubr (&Sencode_big5_char);
10497   defsubr (&Sset_terminal_coding_system_internal);
10498   defsubr (&Sset_safe_terminal_coding_system_internal);
10499   defsubr (&Sterminal_coding_system);
10500   defsubr (&Sset_keyboard_coding_system_internal);
10501   defsubr (&Skeyboard_coding_system);
10502   defsubr (&Sfind_operation_coding_system);
10503   defsubr (&Sset_coding_system_priority);
10504   defsubr (&Sdefine_coding_system_internal);
10505   defsubr (&Sdefine_coding_system_alias);
10506   defsubr (&Scoding_system_put);
10507   defsubr (&Scoding_system_base);
10508   defsubr (&Scoding_system_plist);
10509   defsubr (&Scoding_system_aliases);
10510   defsubr (&Scoding_system_eol_type);
10511   defsubr (&Scoding_system_priority_list);
10512
10513   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
10514                doc: /* List of coding systems.
10515
10516 Do not alter the value of this variable manually.  This variable should be
10517 updated by the functions `define-coding-system' and
10518 `define-coding-system-alias'.  */);
10519   Vcoding_system_list = Qnil;
10520
10521   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
10522                doc: /* Alist of coding system names.
10523 Each element is one element list of coding system name.
10524 This variable is given to `completing-read' as COLLECTION argument.
10525
10526 Do not alter the value of this variable manually.  This variable should be
10527 updated by the functions `make-coding-system' and
10528 `define-coding-system-alias'.  */);
10529   Vcoding_system_alist = Qnil;
10530
10531   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
10532                doc: /* List of coding-categories (symbols) ordered by priority.
10533
10534 On detecting a coding system, Emacs tries code detection algorithms
10535 associated with each coding-category one by one in this order.  When
10536 one algorithm agrees with a byte sequence of source text, the coding
10537 system bound to the corresponding coding-category is selected.
10538
10539 Don't modify this variable directly, but use `set-coding-priority'.  */);
10540   {
10541     int i;
10542
10543     Vcoding_category_list = Qnil;
10544     for (i = coding_category_max - 1; i >= 0; i--)
10545       Vcoding_category_list
10546         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
10547                  Vcoding_category_list);
10548   }
10549
10550   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
10551                doc: /* Specify the coding system for read operations.
10552 It is useful to bind this variable with `let', but do not set it globally.
10553 If the value is a coding system, it is used for decoding on read operation.
10554 If not, an appropriate element is used from one of the coding system alists.
10555 There are three such tables: `file-coding-system-alist',
10556 `process-coding-system-alist', and `network-coding-system-alist'.  */);
10557   Vcoding_system_for_read = Qnil;
10558
10559   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
10560                doc: /* Specify the coding system for write operations.
10561 Programs bind this variable with `let', but you should not set it globally.
10562 If the value is a coding system, it is used for encoding of output,
10563 when writing it to a file and when sending it to a file or subprocess.
10564
10565 If this does not specify a coding system, an appropriate element
10566 is used from one of the coding system alists.
10567 There are three such tables: `file-coding-system-alist',
10568 `process-coding-system-alist', and `network-coding-system-alist'.
10569 For output to files, if the above procedure does not specify a coding system,
10570 the value of `buffer-file-coding-system' is used.  */);
10571   Vcoding_system_for_write = Qnil;
10572
10573   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
10574                doc: /*
10575 Coding system used in the latest file or process I/O.  */);
10576   Vlast_coding_system_used = Qnil;
10577
10578   DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
10579                doc: /*
10580 Error status of the last code conversion.
10581
10582 When an error was detected in the last code conversion, this variable
10583 is set to one of the following symbols.
10584   `insufficient-source'
10585   `inconsistent-eol'
10586   `invalid-source'
10587   `interrupted'
10588   `insufficient-memory'
10589 When no error was detected, the value doesn't change.  So, to check
10590 the error status of a code conversion by this variable, you must
10591 explicitly set this variable to nil before performing code
10592 conversion.  */);
10593   Vlast_code_conversion_error = Qnil;
10594
10595   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
10596                doc: /*
10597 *Non-nil means always inhibit code conversion of end-of-line format.
10598 See info node `Coding Systems' and info node `Text and Binary' concerning
10599 such conversion.  */);
10600   inhibit_eol_conversion = 0;
10601
10602   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
10603                doc: /*
10604 Non-nil means process buffer inherits coding system of process output.
10605 Bind it to t if the process output is to be treated as if it were a file
10606 read from some filesystem.  */);
10607   inherit_process_coding_system = 0;
10608
10609   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
10610                doc: /*
10611 Alist to decide a coding system to use for a file I/O operation.
10612 The format is ((PATTERN . VAL) ...),
10613 where PATTERN is a regular expression matching a file name,
10614 VAL is a coding system, a cons of coding systems, or a function symbol.
10615 If VAL is a coding system, it is used for both decoding and encoding
10616 the file contents.
10617 If VAL is a cons of coding systems, the car part is used for decoding,
10618 and the cdr part is used for encoding.
10619 If VAL is a function symbol, the function must return a coding system
10620 or a cons of coding systems which are used as above.  The function is
10621 called with an argument that is a list of the arguments with which
10622 `find-operation-coding-system' was called.  If the function can't decide
10623 a coding system, it can return `undecided' so that the normal
10624 code-detection is performed.
10625
10626 See also the function `find-operation-coding-system'
10627 and the variable `auto-coding-alist'.  */);
10628   Vfile_coding_system_alist = Qnil;
10629
10630   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
10631                doc: /*
10632 Alist to decide a coding system to use for a process I/O operation.
10633 The format is ((PATTERN . VAL) ...),
10634 where PATTERN is a regular expression matching a program name,
10635 VAL is a coding system, a cons of coding systems, or a function symbol.
10636 If VAL is a coding system, it is used for both decoding what received
10637 from the program and encoding what sent to the program.
10638 If VAL is a cons of coding systems, the car part is used for decoding,
10639 and the cdr part is used for encoding.
10640 If VAL is a function symbol, the function must return a coding system
10641 or a cons of coding systems which are used as above.
10642
10643 See also the function `find-operation-coding-system'.  */);
10644   Vprocess_coding_system_alist = Qnil;
10645
10646   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
10647                doc: /*
10648 Alist to decide a coding system to use for a network I/O operation.
10649 The format is ((PATTERN . VAL) ...),
10650 where PATTERN is a regular expression matching a network service name
10651 or is a port number to connect to,
10652 VAL is a coding system, a cons of coding systems, or a function symbol.
10653 If VAL is a coding system, it is used for both decoding what received
10654 from the network stream and encoding what sent to the network stream.
10655 If VAL is a cons of coding systems, the car part is used for decoding,
10656 and the cdr part is used for encoding.
10657 If VAL is a function symbol, the function must return a coding system
10658 or a cons of coding systems which are used as above.
10659
10660 See also the function `find-operation-coding-system'.  */);
10661   Vnetwork_coding_system_alist = Qnil;
10662
10663   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
10664                doc: /* Coding system to use with system messages.
10665 Also used for decoding keyboard input on X Window system.  */);
10666   Vlocale_coding_system = Qnil;
10667
10668   /* The eol mnemonics are reset in startup.el system-dependently.  */
10669   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
10670                doc: /*
10671 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
10672   eol_mnemonic_unix = make_pure_c_string (":");
10673
10674   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
10675                doc: /*
10676 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
10677   eol_mnemonic_dos = make_pure_c_string ("\\");
10678
10679   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
10680                doc: /*
10681 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
10682   eol_mnemonic_mac = make_pure_c_string ("/");
10683
10684   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
10685                doc: /*
10686 *String displayed in mode line when end-of-line format is not yet determined.  */);
10687   eol_mnemonic_undecided = make_pure_c_string (":");
10688
10689   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
10690                doc: /*
10691 *Non-nil enables character translation while encoding and decoding.  */);
10692   Venable_character_translation = Qt;
10693
10694   DEFVAR_LISP ("standard-translation-table-for-decode",
10695                &Vstandard_translation_table_for_decode,
10696                doc: /* Table for translating characters while decoding.  */);
10697   Vstandard_translation_table_for_decode = Qnil;
10698
10699   DEFVAR_LISP ("standard-translation-table-for-encode",
10700                &Vstandard_translation_table_for_encode,
10701                doc: /* Table for translating characters while encoding.  */);
10702   Vstandard_translation_table_for_encode = Qnil;
10703
10704   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
10705                doc: /* Alist of charsets vs revision numbers.
10706 While encoding, if a charset (car part of an element) is found,
10707 designate it with the escape sequence identifying revision (cdr part
10708 of the element).  */);
10709   Vcharset_revision_table = Qnil;
10710
10711   DEFVAR_LISP ("default-process-coding-system",
10712                &Vdefault_process_coding_system,
10713                doc: /* Cons of coding systems used for process I/O by default.
10714 The car part is used for decoding a process output,
10715 the cdr part is used for encoding a text to be sent to a process.  */);
10716   Vdefault_process_coding_system = Qnil;
10717
10718   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
10719                doc: /*
10720 Table of extra Latin codes in the range 128..159 (inclusive).
10721 This is a vector of length 256.
10722 If Nth element is non-nil, the existence of code N in a file
10723 \(or output of subprocess) doesn't prevent it to be detected as
10724 a coding system of ISO 2022 variant which has a flag
10725 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
10726 or reading output of a subprocess.
10727 Only 128th through 159th elements have a meaning.  */);
10728   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
10729
10730   DEFVAR_LISP ("select-safe-coding-system-function",
10731                &Vselect_safe_coding_system_function,
10732                doc: /*
10733 Function to call to select safe coding system for encoding a text.
10734
10735 If set, this function is called to force a user to select a proper
10736 coding system which can encode the text in the case that a default
10737 coding system used in each operation can't encode the text.  The
10738 function should take care that the buffer is not modified while
10739 the coding system is being selected.
10740
10741 The default value is `select-safe-coding-system' (which see).  */);
10742   Vselect_safe_coding_system_function = Qnil;
10743
10744   DEFVAR_BOOL ("coding-system-require-warning",
10745                &coding_system_require_warning,
10746                doc: /* Internal use only.
10747 If non-nil, on writing a file, `select-safe-coding-system-function' is
10748 called even if `coding-system-for-write' is non-nil.  The command
10749 `universal-coding-system-argument' binds this variable to t temporarily.  */);
10750   coding_system_require_warning = 0;
10751
10752
10753   DEFVAR_BOOL ("inhibit-iso-escape-detection",
10754                &inhibit_iso_escape_detection,
10755                doc: /*
10756 If non-nil, Emacs ignores ISO-2022 escape sequences during code detection.
10757
10758 When Emacs reads text, it tries to detect how the text is encoded.
10759 This code detection is sensitive to escape sequences.  If Emacs sees
10760 a valid ISO-2022 escape sequence, it assumes the text is encoded in one
10761 of the ISO2022 encodings, and decodes text by the corresponding coding
10762 system (e.g. `iso-2022-7bit').
10763
10764 However, there may be a case that you want to read escape sequences in
10765 a file as is.  In such a case, you can set this variable to non-nil.
10766 Then the code detection will ignore any escape sequences, and no text is
10767 detected as encoded in some ISO-2022 encoding.  The result is that all
10768 escape sequences become visible in a buffer.
10769
10770 The default value is nil, and it is strongly recommended not to change
10771 it.  That is because many Emacs Lisp source files that contain
10772 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
10773 in Emacs's distribution, and they won't be decoded correctly on
10774 reading if you suppress escape sequence detection.
10775
10776 The other way to read escape sequences in a file without decoding is
10777 to explicitly specify some coding system that doesn't use ISO-2022
10778 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
10779   inhibit_iso_escape_detection = 0;
10780
10781   DEFVAR_BOOL ("inhibit-null-byte-detection",
10782                &inhibit_null_byte_detection,
10783                doc: /* If non-nil, Emacs ignores null bytes on code detection.
10784 By default, Emacs treats it as binary data, and does not attempt to
10785 decode it.  The effect is as if you specified `no-conversion' for
10786 reading that text.
10787
10788 Set this to non-nil when a regular text happens to include null bytes.
10789 Examples are Index nodes of Info files and null-byte delimited output
10790 from GNU Find and GNU Grep.  Emacs will then ignore the null bytes and
10791 decode text as usual.  */);
10792   inhibit_null_byte_detection = 0;
10793
10794   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
10795                doc: /* Char table for translating self-inserting characters.
10796 This is applied to the result of input methods, not their input.
10797 See also `keyboard-translate-table'.
10798
10799 Use of this variable for character code unification was rendered
10800 obsolete in Emacs 23.1 and later, since Unicode is now the basis of
10801 internal character representation.  */);
10802     Vtranslation_table_for_input = Qnil;
10803
10804   {
10805     Lisp_Object args[coding_arg_max];
10806     Lisp_Object plist[16];
10807     int i;
10808
10809     for (i = 0; i < coding_arg_max; i++)
10810       args[i] = Qnil;
10811
10812     plist[0] = intern_c_string (":name");
10813     plist[1] = args[coding_arg_name] = Qno_conversion;
10814     plist[2] = intern_c_string (":mnemonic");
10815     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
10816     plist[4] = intern_c_string (":coding-type");
10817     plist[5] = args[coding_arg_coding_type] = Qraw_text;
10818     plist[6] = intern_c_string (":ascii-compatible-p");
10819     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
10820     plist[8] = intern_c_string (":default-char");
10821     plist[9] = args[coding_arg_default_char] = make_number (0);
10822     plist[10] = intern_c_string (":for-unibyte");
10823     plist[11] = args[coding_arg_for_unibyte] = Qt;
10824     plist[12] = intern_c_string (":docstring");
10825     plist[13] = make_pure_c_string ("Do no conversion.\n\
10826 \n\
10827 When you visit a file with this coding, the file is read into a\n\
10828 unibyte buffer as is, thus each byte of a file is treated as a\n\
10829 character.");
10830     plist[14] = intern_c_string (":eol-type");
10831     plist[15] = args[coding_arg_eol_type] = Qunix;
10832     args[coding_arg_plist] = Flist (16, plist);
10833     Fdefine_coding_system_internal (coding_arg_max, args);
10834
10835     plist[1] = args[coding_arg_name] = Qundecided;
10836     plist[3] = args[coding_arg_mnemonic] = make_number ('-');
10837     plist[5] = args[coding_arg_coding_type] = Qundecided;
10838     /* This is already set.
10839        plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
10840     plist[8] = intern_c_string (":charset-list");
10841     plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
10842     plist[11] = args[coding_arg_for_unibyte] = Qnil;
10843     plist[13] = make_pure_c_string ("No conversion on encoding, automatic conversion on decoding.");
10844     plist[15] = args[coding_arg_eol_type] = Qnil;
10845     args[coding_arg_plist] = Flist (16, plist);
10846     Fdefine_coding_system_internal (coding_arg_max, args);
10847   }
10848
10849   setup_coding_system (Qno_conversion, &safe_terminal_coding);
10850
10851   {
10852     int i;
10853
10854     for (i = 0; i < coding_category_max; i++)
10855       Fset (AREF (Vcoding_category_table, i), Qno_conversion);
10856   }
10857 #if defined (DOS_NT)
10858   system_eol_type = Qdos;
10859 #else
10860   system_eol_type = Qunix;
10861 #endif
10862   staticpro (&system_eol_type);
10863 }
10864
10865 char *
10866 emacs_strerror (int error_number)
10867 {
10868   char *str;
10869
10870   synchronize_system_messages_locale ();
10871   str = strerror (error_number);
10872
10873   if (! NILP (Vlocale_coding_system))
10874     {
10875       Lisp_Object dec = code_convert_string_norecord (build_string (str),
10876                                                       Vlocale_coding_system,
10877                                                       0);
10878       str = (char *) SDATA (dec);
10879     }
10880
10881   return str;
10882 }
10883
10884 #endif /* emacs */
10885
10886 /* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
10887    (do not change this comment) */