src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 2001-2012 Free Software Foundation, Inc.
   3    Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   4      2005, 2006, 2007, 2008, 2009, 2010, 2011
   5      National Institute of Advanced Industrial Science and Technology (AIST)
   6      Registration Number H14PRO021
   7    Copyright (C) 2003
   8      National Institute of Advanced Industrial Science and Technology (AIST)
   9      Registration Number H13PRO009
  10
  11 This file is part of GNU Emacs.
  12
  13 GNU Emacs is free software: you can redistribute it and/or modify
  14 it under the terms of the GNU General Public License as published by
  15 the Free Software Foundation, either version 3 of the License, or
  16 (at your option) any later version.
  17
  18 GNU Emacs is distributed in the hope that it will be useful,
  19 but WITHOUT ANY WARRANTY; without even the implied warranty of
  20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  21 GNU General Public License for more details.
  22
  23 You should have received a copy of the GNU General Public License
  24 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  25
  26 /*** TABLE OF CONTENTS ***
  27
  28   0. General comments
  29   1. Preamble
  30   2. Emacs' internal format (emacs-utf-8) handlers
  31   3. UTF-8 handlers
  32   4. UTF-16 handlers
  33   5. Charset-base coding systems handlers
  34   6. emacs-mule (old Emacs' internal format) handlers
  35   7. ISO2022 handlers
  36   8. Shift-JIS and BIG5 handlers
  37   9. CCL handlers
  38   10. C library functions
  39   11. Emacs Lisp library functions
  40   12. Postamble
  41
  42 */
  43
  44 /*** 0. General comments ***
  45
  46
  47 CODING SYSTEM
  48
  49   A coding system is an object for an encoding mechanism that contains
  50   information about how to convert byte sequences to character
  51   sequences and vice versa.  When we say "decode", it means converting
  52   a byte sequence of a specific coding system into a character
  53   sequence that is represented by Emacs' internal coding system
  54   `emacs-utf-8', and when we say "encode", it means converting a
  55   character sequence of emacs-utf-8 to a byte sequence of a specific
  56   coding system.
  57
  58   In Emacs Lisp, a coding system is represented by a Lisp symbol.  On
  59   the C level, a coding system is represented by a vector of attributes
  60   stored in the hash table Vcharset_hash_table.  The conversion from
  61   coding system symbol to attributes vector is done by looking up
  62   Vcharset_hash_table by the symbol.
  63
  64   Coding systems are classified into the following types depending on
  65   the encoding mechanism.  Here's a brief description of the types.
  66
  67   o UTF-8
  68
  69   o UTF-16
  70
  71   o Charset-base coding system
  72
  73   A coding system defined by one or more (coded) character sets.
  74   Decoding and encoding are done by a code converter defined for each
  75   character set.
  76
  77   o Old Emacs internal format (emacs-mule)
  78
  79   The coding system adopted by old versions of Emacs (20 and 21).
  80
  81   o ISO2022-base coding system
  82
  83   The most famous coding system for multiple character sets.  X's
  84   Compound Text, various EUCs (Extended Unix Code), and coding systems
  85   used in the Internet communication such as ISO-2022-JP are all
  86   variants of ISO2022.
  87
  88   o SJIS (or Shift-JIS or MS-Kanji-Code)
  89
  90   A coding system to encode character sets: ASCII, JISX0201, and
  91   JISX0208.  Widely used for PC's in Japan.  Details are described in
  92   section 8.
  93
  94   o BIG5
  95
  96   A coding system to encode character sets: ASCII and Big5.  Widely
  97   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  98   described in section 8.  In this file, when we write "big5" (all
  99   lowercase), we mean the coding system, and when we write "Big5"
 100   (capitalized), we mean the character set.
 101
 102   o CCL
 103
 104   If a user wants to decode/encode text encoded in a coding system
 105   not listed above, he can supply a decoder and an encoder for it in
 106   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 107   program while decoding/encoding.
 108
 109   o Raw-text
 110
 111   A coding system for text containing raw eight-bit data.  Emacs
 112   treats each byte of source text as a character (except for
 113   end-of-line conversion).
 114
 115   o No-conversion
 116
 117   Like raw text, but don't do end-of-line conversion.
 118
 119
 120 END-OF-LINE FORMAT
 121
 122   How text end-of-line is encoded depends on operating system.  For
 123   instance, Unix's format is just one byte of LF (line-feed) code,
 124   whereas DOS's format is two-byte sequence of `carriage-return' and
 125   `line-feed' codes.  MacOS's format is usually one byte of
 126   `carriage-return'.
 127
 128   Since text character encoding and end-of-line encoding are
 129   independent, any coding system described above can take any format
 130   of end-of-line (except for no-conversion).
 131
 132 STRUCT CODING_SYSTEM
 133
 134   Before using a coding system for code conversion (i.e. decoding and
 135   encoding), we setup a structure of type `struct coding_system'.
 136   This structure keeps various information about a specific code
 137   conversion (e.g. the location of source and destination data).
 138
 139 */
 140
 141 /* COMMON MACROS */
 142
 143
 144 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 145
 146   These functions check if a byte sequence specified as a source in
 147   CODING conforms to the format of XXX, and update the members of
 148   DETECT_INFO.
 149
 150   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 151
 152   Below is the template of these functions.  */
 153
 154 #if 0
 155 static int
 156 detect_coding_XXX (struct coding_system *coding,
 157                    struct coding_detection_info *detect_info)
 158 {
 159   const unsigned char *src = coding->source;
 160   const unsigned char *src_end = coding->source + coding->src_bytes;
 161   int multibytep = coding->src_multibyte;
 162   ptrdiff_t consumed_chars = 0;
 163   int found = 0;
 164   ...;
 165
 166   while (1)
 167     {
 168       /* Get one byte from the source.  If the source is exhausted, jump
 169          to no_more_source:.  */
 170       ONE_MORE_BYTE (c);
 171
 172       if (! __C_conforms_to_XXX___ (c))
 173         break;
 174       if (! __C_strongly_suggests_XXX__ (c))
 175         found = CATEGORY_MASK_XXX;
 176     }
 177   /* The byte sequence is invalid for XXX.  */
 178   detect_info->rejected |= CATEGORY_MASK_XXX;
 179   return 0;
 180
 181  no_more_source:
 182   /* The source exhausted successfully.  */
 183   detect_info->found |= found;
 184   return 1;
 185 }
 186 #endif
 187
 188 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 189
 190   These functions decode a byte sequence specified as a source by
 191   CODING.  The resulting multibyte text goes to a place pointed to by
 192   CODING->charbuf, the length of which should not exceed
 193   CODING->charbuf_size;
 194
 195   These functions set the information of original and decoded texts in
 196   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 197   They also set CODING->result to one of CODING_RESULT_XXX indicating
 198   how the decoding is finished.
 199
 200   Below is the template of these functions.  */
 201
 202 #if 0
 203 static void
 204 decode_coding_XXXX (struct coding_system *coding)
 205 {
 206   const unsigned char *src = coding->source + coding->consumed;
 207   const unsigned char *src_end = coding->source + coding->src_bytes;
 208   /* SRC_BASE remembers the start position in source in each loop.
 209      The loop will be exited when there's not enough source code, or
 210      when there's no room in CHARBUF for a decoded character.  */
 211   const unsigned char *src_base;
 212   /* A buffer to produce decoded characters.  */
 213   int *charbuf = coding->charbuf + coding->charbuf_used;
 214   int *charbuf_end = coding->charbuf + coding->charbuf_size;
 215   int multibytep = coding->src_multibyte;
 216
 217   while (1)
 218     {
 219       src_base = src;
 220       if (charbuf < charbuf_end)
 221         /* No more room to produce a decoded character.  */
 222         break;
 223       ONE_MORE_BYTE (c);
 224       /* Decode it. */
 225     }
 226
 227  no_more_source:
 228   if (src_base < src_end
 229       && coding->mode & CODING_MODE_LAST_BLOCK)
 230     /* If the source ends by partial bytes to construct a character,
 231        treat them as eight-bit raw data.  */
 232     while (src_base < src_end && charbuf < charbuf_end)
 233       *charbuf++ = *src_base++;
 234   /* Remember how many bytes and characters we consumed.  If the
 235      source is multibyte, the bytes and chars are not identical.  */
 236   coding->consumed = coding->consumed_char = src_base - coding->source;
 237   /* Remember how many characters we produced.  */
 238   coding->charbuf_used = charbuf - coding->charbuf;
 239 }
 240 #endif
 241
 242 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 243
 244   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 245   internal multibyte format by CODING.  The resulting byte sequence
 246   goes to a place pointed to by DESTINATION, the length of which
 247   should not exceed DST_BYTES.
 248
 249   These functions set the information of original and encoded texts in
 250   the members produced, produced_char, consumed, and consumed_char of
 251   the structure *CODING.  They also set the member result to one of
 252   CODING_RESULT_XXX indicating how the encoding finished.
 253
 254   DST_BYTES zero means that source area and destination area are
 255   overlapped, which means that we can produce a encoded text until it
 256   reaches at the head of not-yet-encoded source text.
 257
 258   Below is a template of these functions.  */
 259 #if 0
 260 static void
 261 encode_coding_XXX (struct coding_system *coding)
 262 {
 263   int multibytep = coding->dst_multibyte;
 264   int *charbuf = coding->charbuf;
 265   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 266   unsigned char *dst = coding->destination + coding->produced;
 267   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 268   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 269   ptrdiff_t produced_chars = 0;
 270
 271   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 272     {
 273       int c = *charbuf;
 274       /* Encode C into DST, and increment DST.  */
 275     }
 276  label_no_more_destination:
 277   /* How many chars and bytes we produced.  */
 278   coding->produced_char += produced_chars;
 279   coding->produced = dst - coding->destination;
 280 }
 281 #endif
 282
 283 \f
 284 /*** 1. Preamble ***/
 285
 286 #include <config.h>
 287 #include <stdio.h>
 288 #include <setjmp.h>
 289
 290 #include "lisp.h"
 291 #include "character.h"
 292 #include "buffer.h"
 293 #include "charset.h"
 294 #include "ccl.h"
 295 #include "composite.h"
 296 #include "coding.h"
 297 #include "window.h"
 298 #include "frame.h"
 299 #include "termhooks.h"
 300
 301 Lisp_Object Vcoding_system_hash_table;
 302
 303 static Lisp_Object Qcoding_system, Qeol_type;
 304 static Lisp_Object Qcoding_aliases;
 305 Lisp_Object Qunix, Qdos;
 306 Lisp_Object Qbuffer_file_coding_system;
 307 static Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 308 static Lisp_Object Qdefault_char;
 309 Lisp_Object Qno_conversion, Qundecided;
 310 Lisp_Object Qcharset, Qutf_8;
 311 static Lisp_Object Qiso_2022;
 312 static Lisp_Object Qutf_16, Qshift_jis, Qbig5;
 313 static Lisp_Object Qbig, Qlittle;
 314 static Lisp_Object Qcoding_system_history;
 315 static Lisp_Object Qvalid_codes;
 316 static Lisp_Object QCcategory, QCmnemonic, QCdefault_char;
 317 static Lisp_Object QCdecode_translation_table, QCencode_translation_table;
 318 static Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
 319 static Lisp_Object QCascii_compatible_p;
 320
 321 Lisp_Object Qcall_process, Qcall_process_region;
 322 Lisp_Object Qstart_process, Qopen_network_stream;
 323 static Lisp_Object Qtarget_idx;
 324
 325 static Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
 326 static Lisp_Object Qinterrupted, Qinsufficient_memory;
 327
 328 /* If a symbol has this property, evaluate the value to define the
 329    symbol as a coding system.  */
 330 static Lisp_Object Qcoding_system_define_form;
 331
 332 /* Format of end-of-line decided by system.  This is Qunix on
 333    Unix and Mac, Qdos on DOS/Windows.
 334    This has an effect only for external encoding (i.e. for output to
 335    file and process), not for in-buffer or Lisp string encoding.  */
 336 static Lisp_Object system_eol_type;
 337
 338 #ifdef emacs
 339
 340 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 341
 342 /* Coding system emacs-mule and raw-text are for converting only
 343    end-of-line format.  */
 344 Lisp_Object Qemacs_mule, Qraw_text;
 345 Lisp_Object Qutf_8_emacs;
 346
 347 /* Coding-systems are handed between Emacs Lisp programs and C internal
 348    routines by the following three variables.  */
 349 /* Coding system to be used to encode text for terminal display when
 350    terminal coding system is nil.  */
 351 struct coding_system safe_terminal_coding;
 352
 353 #endif /* emacs */
 354
 355 Lisp_Object Qtranslation_table;
 356 Lisp_Object Qtranslation_table_id;
 357 static Lisp_Object Qtranslation_table_for_decode;
 358 static Lisp_Object Qtranslation_table_for_encode;
 359
 360 /* Two special coding systems.  */
 361 static Lisp_Object Vsjis_coding_system;
 362 static Lisp_Object Vbig5_coding_system;
 363
 364 /* ISO2022 section */
 365
 366 #define CODING_ISO_INITIAL(coding, reg)                 \
 367   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 368                      coding_attr_iso_initial),          \
 369                reg)))
 370
 371
 372 #define CODING_ISO_REQUEST(coding, charset_id)          \
 373   (((charset_id) <= (coding)->max_charset_id            \
 374     ? ((coding)->safe_charsets[charset_id] != 255       \
 375        ? (coding)->safe_charsets[charset_id]            \
 376        : -1)                                            \
 377     : -1))
 378
 379
 380 #define CODING_ISO_FLAGS(coding)        \
 381   ((coding)->spec.iso_2022.flags)
 382 #define CODING_ISO_DESIGNATION(coding, reg)     \
 383   ((coding)->spec.iso_2022.current_designation[reg])
 384 #define CODING_ISO_INVOCATION(coding, plane)    \
 385   ((coding)->spec.iso_2022.current_invocation[plane])
 386 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 387   ((coding)->spec.iso_2022.single_shifting)
 388 #define CODING_ISO_BOL(coding)  \
 389   ((coding)->spec.iso_2022.bol)
 390 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 391   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 392 #define CODING_ISO_CMP_STATUS(coding)   \
 393   (&(coding)->spec.iso_2022.cmp_status)
 394 #define CODING_ISO_EXTSEGMENT_LEN(coding)       \
 395   ((coding)->spec.iso_2022.ctext_extended_segment_len)
 396 #define CODING_ISO_EMBEDDED_UTF_8(coding)       \
 397   ((coding)->spec.iso_2022.embedded_utf_8)
 398
 399 /* Control characters of ISO2022.  */
 400                         /* code */      /* function */
 401 #define ISO_CODE_SO     0x0E            /* shift-out */
 402 #define ISO_CODE_SI     0x0F            /* shift-in */
 403 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 404 #define ISO_CODE_ESC    0x1B            /* escape */
 405 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 406 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 407 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 408
 409 /* All code (1-byte) of ISO2022 is classified into one of the
 410    followings.  */
 411 enum iso_code_class_type
 412   {
 413     ISO_control_0,              /* Control codes in the range
 414                                    0x00..0x1F and 0x7F, except for the
 415                                    following 5 codes.  */
 416     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 417     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 418     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 419     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 420     ISO_control_1,              /* Control codes in the range
 421                                    0x80..0x9F, except for the
 422                                    following 3 codes.  */
 423     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 424     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 425     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 426     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 427     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 428     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 429     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 430   };
 431
 432 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 433     `iso-flags' attribute of an iso2022 coding system.  */
 434
 435 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 436    instead of the correct short-form sequence (e.g. ESC $ A).  */
 437 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 438
 439 /* If set, reset graphic planes and registers at end-of-line to the
 440    initial state.  */
 441 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 442
 443 /* If set, reset graphic planes and registers before any control
 444    characters to the initial state.  */
 445 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 446
 447 /* If set, encode by 7-bit environment.  */
 448 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 449
 450 /* If set, use locking-shift function.  */
 451 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 452
 453 /* If set, use single-shift function.  Overwrite
 454    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 455 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 456
 457 /* If set, use designation escape sequence.  */
 458 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 459
 460 /* If set, produce revision number sequence.  */
 461 #define CODING_ISO_FLAG_REVISION        0x0080
 462
 463 /* If set, produce ISO6429's direction specifying sequence.  */
 464 #define CODING_ISO_FLAG_DIRECTION       0x0100
 465
 466 /* If set, assume designation states are reset at beginning of line on
 467    output.  */
 468 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 469
 470 /* If set, designation sequence should be placed at beginning of line
 471    on output.  */
 472 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 473
 474 /* If set, do not encode unsafe characters on output.  */
 475 #define CODING_ISO_FLAG_SAFE            0x0800
 476
 477 /* If set, extra latin codes (128..159) are accepted as a valid code
 478    on input.  */
 479 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 480
 481 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 482
 483 /* #define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000 */
 484
 485 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 486
 487 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 488
 489 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 490
 491 /* A character to be produced on output if encoding of the original
 492    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 493 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 494
 495 /* UTF-8 section */
 496 #define CODING_UTF_8_BOM(coding)        \
 497   ((coding)->spec.utf_8_bom)
 498
 499 /* UTF-16 section */
 500 #define CODING_UTF_16_BOM(coding)       \
 501   ((coding)->spec.utf_16.bom)
 502
 503 #define CODING_UTF_16_ENDIAN(coding)    \
 504   ((coding)->spec.utf_16.endian)
 505
 506 #define CODING_UTF_16_SURROGATE(coding) \
 507   ((coding)->spec.utf_16.surrogate)
 508
 509
 510 /* CCL section */
 511 #define CODING_CCL_DECODER(coding)      \
 512   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 513 #define CODING_CCL_ENCODER(coding)      \
 514   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 515 #define CODING_CCL_VALIDS(coding)                                          \
 516   (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
 517
 518 /* Index for each coding category in `coding_categories' */
 519
 520 enum coding_category
 521   {
 522     coding_category_iso_7,
 523     coding_category_iso_7_tight,
 524     coding_category_iso_8_1,
 525     coding_category_iso_8_2,
 526     coding_category_iso_7_else,
 527     coding_category_iso_8_else,
 528     coding_category_utf_8_auto,
 529     coding_category_utf_8_nosig,
 530     coding_category_utf_8_sig,
 531     coding_category_utf_16_auto,
 532     coding_category_utf_16_be,
 533     coding_category_utf_16_le,
 534     coding_category_utf_16_be_nosig,
 535     coding_category_utf_16_le_nosig,
 536     coding_category_charset,
 537     coding_category_sjis,
 538     coding_category_big5,
 539     coding_category_ccl,
 540     coding_category_emacs_mule,
 541     /* All above are targets of code detection.  */
 542     coding_category_raw_text,
 543     coding_category_undecided,
 544     coding_category_max
 545   };
 546
 547 /* Definitions of flag bits used in detect_coding_XXXX.  */
 548 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 549 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 550 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 551 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 552 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 553 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 554 #define CATEGORY_MASK_UTF_8_AUTO        (1 << coding_category_utf_8_auto)
 555 #define CATEGORY_MASK_UTF_8_NOSIG       (1 << coding_category_utf_8_nosig)
 556 #define CATEGORY_MASK_UTF_8_SIG         (1 << coding_category_utf_8_sig)
 557 #define CATEGORY_MASK_UTF_16_AUTO       (1 << coding_category_utf_16_auto)
 558 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 559 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 560 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 561 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 562 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 563 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 564 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 565 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 566 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 567 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 568
 569 /* This value is returned if detect_coding_mask () find nothing other
 570    than ASCII characters.  */
 571 #define CATEGORY_MASK_ANY               \
 572   (CATEGORY_MASK_ISO_7                  \
 573    | CATEGORY_MASK_ISO_7_TIGHT          \
 574    | CATEGORY_MASK_ISO_8_1              \
 575    | CATEGORY_MASK_ISO_8_2              \
 576    | CATEGORY_MASK_ISO_7_ELSE           \
 577    | CATEGORY_MASK_ISO_8_ELSE           \
 578    | CATEGORY_MASK_UTF_8_AUTO           \
 579    | CATEGORY_MASK_UTF_8_NOSIG          \
 580    | CATEGORY_MASK_UTF_8_SIG            \
 581    | CATEGORY_MASK_UTF_16_AUTO          \
 582    | CATEGORY_MASK_UTF_16_BE            \
 583    | CATEGORY_MASK_UTF_16_LE            \
 584    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 585    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 586    | CATEGORY_MASK_CHARSET              \
 587    | CATEGORY_MASK_SJIS                 \
 588    | CATEGORY_MASK_BIG5                 \
 589    | CATEGORY_MASK_CCL                  \
 590    | CATEGORY_MASK_EMACS_MULE)
 591
 592
 593 #define CATEGORY_MASK_ISO_7BIT \
 594   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 595
 596 #define CATEGORY_MASK_ISO_8BIT \
 597   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 598
 599 #define CATEGORY_MASK_ISO_ELSE \
 600   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 601
 602 #define CATEGORY_MASK_ISO_ESCAPE        \
 603   (CATEGORY_MASK_ISO_7                  \
 604    | CATEGORY_MASK_ISO_7_TIGHT          \
 605    | CATEGORY_MASK_ISO_7_ELSE           \
 606    | CATEGORY_MASK_ISO_8_ELSE)
 607
 608 #define CATEGORY_MASK_ISO       \
 609   (  CATEGORY_MASK_ISO_7BIT     \
 610      | CATEGORY_MASK_ISO_8BIT   \
 611      | CATEGORY_MASK_ISO_ELSE)
 612
 613 #define CATEGORY_MASK_UTF_16            \
 614   (CATEGORY_MASK_UTF_16_AUTO            \
 615    | CATEGORY_MASK_UTF_16_BE            \
 616    | CATEGORY_MASK_UTF_16_LE            \
 617    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 618    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 619
 620 #define CATEGORY_MASK_UTF_8     \
 621   (CATEGORY_MASK_UTF_8_AUTO     \
 622    | CATEGORY_MASK_UTF_8_NOSIG  \
 623    | CATEGORY_MASK_UTF_8_SIG)
 624
 625 /* Table of coding categories (Lisp symbols).  This variable is for
 626    internal use only.  */
 627 static Lisp_Object Vcoding_category_table;
 628
 629 /* Table of coding-categories ordered by priority.  */
 630 static enum coding_category coding_priorities[coding_category_max];
 631
 632 /* Nth element is a coding context for the coding system bound to the
 633    Nth coding category.  */
 634 static struct coding_system coding_categories[coding_category_max];
 635
 636 /*** Commonly used macros and functions ***/
 637
 638 #ifndef min
 639 #define min(a, b) ((a) < (b) ? (a) : (b))
 640 #endif
 641 #ifndef max
 642 #define max(a, b) ((a) > (b) ? (a) : (b))
 643 #endif
 644
 645 #define CODING_GET_INFO(coding, attrs, charset_list)    \
 646   do {                                                  \
 647     (attrs) = CODING_ID_ATTRS ((coding)->id);           \
 648     (charset_list) = CODING_ATTR_CHARSET_LIST (attrs);  \
 649   } while (0)
 650
 651
 652 /* Safely get one byte from the source text pointed by SRC which ends
 653    at SRC_END, and set C to that byte.  If there are not enough bytes
 654    in the source, it jumps to `no_more_source'.  If multibytep is
 655    nonzero, and a multibyte character is found at SRC, set C to the
 656    negative value of the character code.  The caller should declare
 657    and set these variables appropriately in advance:
 658         src, src_end, multibytep */
 659
 660 #define ONE_MORE_BYTE(c)                                \
 661   do {                                                  \
 662     if (src == src_end)                                 \
 663       {                                                 \
 664         if (src_base < src)                             \
 665           record_conversion_result                      \
 666             (coding, CODING_RESULT_INSUFFICIENT_SRC);   \
 667         goto no_more_source;                            \
 668       }                                                 \
 669     c = *src++;                                         \
 670     if (multibytep && (c & 0x80))                       \
 671       {                                                 \
 672         if ((c & 0xFE) == 0xC0)                         \
 673           c = ((c & 1) << 6) | *src++;                  \
 674         else                                            \
 675           {                                             \
 676             src--;                                      \
 677             c = - string_char (src, &src, NULL);        \
 678             record_conversion_result                    \
 679               (coding, CODING_RESULT_INVALID_SRC);      \
 680           }                                             \
 681       }                                                 \
 682     consumed_chars++;                                   \
 683   } while (0)
 684
 685 /* Safely get two bytes from the source text pointed by SRC which ends
 686    at SRC_END, and set C1 and C2 to those bytes while skipping the
 687    heading multibyte characters.  If there are not enough bytes in the
 688    source, it jumps to `no_more_source'.  If multibytep is nonzero and
 689    a multibyte character is found for C2, set C2 to the negative value
 690    of the character code.  The caller should declare and set these
 691    variables appropriately in advance:
 692         src, src_end, multibytep
 693    It is intended that this macro is used in detect_coding_utf_16.  */
 694
 695 #define TWO_MORE_BYTES(c1, c2)                          \
 696   do {                                                  \
 697     do {                                                \
 698       if (src == src_end)                               \
 699         goto no_more_source;                            \
 700       c1 = *src++;                                      \
 701       if (multibytep && (c1 & 0x80))                    \
 702         {                                               \
 703           if ((c1 & 0xFE) == 0xC0)                      \
 704             c1 = ((c1 & 1) << 6) | *src++;              \
 705           else                                          \
 706             {                                           \
 707               src += BYTES_BY_CHAR_HEAD (c1) - 1;       \
 708               c1 = -1;                                  \
 709             }                                           \
 710         }                                               \
 711     } while (c1 < 0);                                   \
 712     if (src == src_end)                                 \
 713       goto no_more_source;                              \
 714     c2 = *src++;                                        \
 715     if (multibytep && (c2 & 0x80))                      \
 716       {                                                 \
 717         if ((c2 & 0xFE) == 0xC0)                        \
 718           c2 = ((c2 & 1) << 6) | *src++;                \
 719         else                                            \
 720           c2 = -1;                                      \
 721       }                                                 \
 722   } while (0)
 723
 724
 725 /* Store a byte C in the place pointed by DST and increment DST to the
 726    next free point, and increment PRODUCED_CHARS.  The caller should
 727    assure that C is 0..127, and declare and set the variable `dst'
 728    appropriately in advance.
 729 */
 730
 731
 732 #define EMIT_ONE_ASCII_BYTE(c)  \
 733   do {                          \
 734     produced_chars++;           \
 735     *dst++ = (c);               \
 736   } while (0)
 737
 738
 739 /* Like EMIT_ONE_ASCII_BYTE but store two bytes; C1 and C2.  */
 740
 741 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 742   do {                                  \
 743     produced_chars += 2;                \
 744     *dst++ = (c1), *dst++ = (c2);       \
 745   } while (0)
 746
 747
 748 /* Store a byte C in the place pointed by DST and increment DST to the
 749    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 750    nonzero, store in an appropriate multibyte from.  The caller should
 751    declare and set the variables `dst' and `multibytep' appropriately
 752    in advance.  */
 753
 754 #define EMIT_ONE_BYTE(c)                \
 755   do {                                  \
 756     produced_chars++;                   \
 757     if (multibytep)                     \
 758       {                                 \
 759         unsigned ch = (c);              \
 760         if (ch >= 0x80)                 \
 761           ch = BYTE8_TO_CHAR (ch);      \
 762         CHAR_STRING_ADVANCE (ch, dst);  \
 763       }                                 \
 764     else                                \
 765       *dst++ = (c);                     \
 766   } while (0)
 767
 768
 769 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 770
 771 #define EMIT_TWO_BYTES(c1, c2)          \
 772   do {                                  \
 773     produced_chars += 2;                \
 774     if (multibytep)                     \
 775       {                                 \
 776         unsigned ch;                    \
 777                                         \
 778         ch = (c1);                      \
 779         if (ch >= 0x80)                 \
 780           ch = BYTE8_TO_CHAR (ch);      \
 781         CHAR_STRING_ADVANCE (ch, dst);  \
 782         ch = (c2);                      \
 783         if (ch >= 0x80)                 \
 784           ch = BYTE8_TO_CHAR (ch);      \
 785         CHAR_STRING_ADVANCE (ch, dst);  \
 786       }                                 \
 787     else                                \
 788       {                                 \
 789         *dst++ = (c1);                  \
 790         *dst++ = (c2);                  \
 791       }                                 \
 792   } while (0)
 793
 794
 795 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 796   do {                                  \
 797     EMIT_ONE_BYTE (c1);                 \
 798     EMIT_TWO_BYTES (c2, c3);            \
 799   } while (0)
 800
 801
 802 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 803   do {                                          \
 804     EMIT_TWO_BYTES (c1, c2);                    \
 805     EMIT_TWO_BYTES (c3, c4);                    \
 806   } while (0)
 807
 808
 809 /* Prototypes for static functions.  */
 810 static void record_conversion_result (struct coding_system *coding,
 811                                       enum coding_result_code result);
 812 static int detect_coding_utf_8 (struct coding_system *,
 813                                 struct coding_detection_info *info);
 814 static void decode_coding_utf_8 (struct coding_system *);
 815 static int encode_coding_utf_8 (struct coding_system *);
 816
 817 static int detect_coding_utf_16 (struct coding_system *,
 818                                  struct coding_detection_info *info);
 819 static void decode_coding_utf_16 (struct coding_system *);
 820 static int encode_coding_utf_16 (struct coding_system *);
 821
 822 static int detect_coding_iso_2022 (struct coding_system *,
 823                                    struct coding_detection_info *info);
 824 static void decode_coding_iso_2022 (struct coding_system *);
 825 static int encode_coding_iso_2022 (struct coding_system *);
 826
 827 static int detect_coding_emacs_mule (struct coding_system *,
 828                                      struct coding_detection_info *info);
 829 static void decode_coding_emacs_mule (struct coding_system *);
 830 static int encode_coding_emacs_mule (struct coding_system *);
 831
 832 static int detect_coding_sjis (struct coding_system *,
 833                                struct coding_detection_info *info);
 834 static void decode_coding_sjis (struct coding_system *);
 835 static int encode_coding_sjis (struct coding_system *);
 836
 837 static int detect_coding_big5 (struct coding_system *,
 838                                struct coding_detection_info *info);
 839 static void decode_coding_big5 (struct coding_system *);
 840 static int encode_coding_big5 (struct coding_system *);
 841
 842 static int detect_coding_ccl (struct coding_system *,
 843                               struct coding_detection_info *info);
 844 static void decode_coding_ccl (struct coding_system *);
 845 static int encode_coding_ccl (struct coding_system *);
 846
 847 static void decode_coding_raw_text (struct coding_system *);
 848 static int encode_coding_raw_text (struct coding_system *);
 849
 850 static void coding_set_source (struct coding_system *);
 851 static ptrdiff_t coding_change_source (struct coding_system *);
 852 static void coding_set_destination (struct coding_system *);
 853 static ptrdiff_t coding_change_destination (struct coding_system *);
 854 static void coding_alloc_by_realloc (struct coding_system *, ptrdiff_t);
 855 static void coding_alloc_by_making_gap (struct coding_system *,
 856                                         ptrdiff_t, ptrdiff_t);
 857 static unsigned char *alloc_destination (struct coding_system *,
 858                                          ptrdiff_t, unsigned char *);
 859 static void setup_iso_safe_charsets (Lisp_Object);
 860 static ptrdiff_t encode_designation_at_bol (struct coding_system *,
 861                                       int *, int *, unsigned char *);
 862 static int detect_eol (const unsigned char *,
 863                        ptrdiff_t, enum coding_category);
 864 static Lisp_Object adjust_coding_eol_type (struct coding_system *, int);
 865 static void decode_eol (struct coding_system *);
 866 static Lisp_Object get_translation_table (Lisp_Object, int, int *);
 867 static Lisp_Object get_translation (Lisp_Object, int *, int *);
 868 static int produce_chars (struct coding_system *, Lisp_Object, int);
 869 static inline void produce_charset (struct coding_system *, int *,
 870                                     ptrdiff_t);
 871 static void produce_annotation (struct coding_system *, ptrdiff_t);
 872 static int decode_coding (struct coding_system *);
 873 static inline int *handle_composition_annotation (ptrdiff_t, ptrdiff_t,
 874                                                   struct coding_system *,
 875                                                   int *, ptrdiff_t *);
 876 static inline int *handle_charset_annotation (ptrdiff_t, ptrdiff_t,
 877                                               struct coding_system *,
 878                                               int *, ptrdiff_t *);
 879 static void consume_chars (struct coding_system *, Lisp_Object, int);
 880 static int encode_coding (struct coding_system *);
 881 static Lisp_Object make_conversion_work_buffer (int);
 882 static Lisp_Object code_conversion_restore (Lisp_Object);
 883 static inline int char_encodable_p (int, Lisp_Object);
 884 static Lisp_Object make_subsidiaries (Lisp_Object);
 885
 886 static void
 887 record_conversion_result (struct coding_system *coding,
 888                           enum coding_result_code result)
 889 {
 890   coding->result = result;
 891   switch (result)
 892     {
 893     case CODING_RESULT_INSUFFICIENT_SRC:
 894       Vlast_code_conversion_error = Qinsufficient_source;
 895       break;
 896     case CODING_RESULT_INCONSISTENT_EOL:
 897       Vlast_code_conversion_error = Qinconsistent_eol;
 898       break;
 899     case CODING_RESULT_INVALID_SRC:
 900       Vlast_code_conversion_error = Qinvalid_source;
 901       break;
 902     case CODING_RESULT_INTERRUPT:
 903       Vlast_code_conversion_error = Qinterrupted;
 904       break;
 905     case CODING_RESULT_INSUFFICIENT_MEM:
 906       Vlast_code_conversion_error = Qinsufficient_memory;
 907       break;
 908     case CODING_RESULT_INSUFFICIENT_DST:
 909       /* Don't record this error in Vlast_code_conversion_error
 910          because it happens just temporarily and is resolved when the
 911          whole conversion is finished.  */
 912       break;
 913     case CODING_RESULT_SUCCESS:
 914       break;
 915     default:
 916       Vlast_code_conversion_error = intern ("Unknown error");
 917     }
 918 }
 919
 920 /* These wrapper macros are used to preserve validity of pointers into
 921    buffer text across calls to decode_char, encode_char, etc, which
 922    could cause relocation of buffers if it loads a charset map,
 923    because loading a charset map allocates large structures.  */
 924
 925 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
 926   do {                                                                       \
 927     ptrdiff_t offset;                                                        \
 928                                                                              \
 929     charset_map_loaded = 0;                                                  \
 930     c = DECODE_CHAR (charset, code);                                         \
 931     if (charset_map_loaded                                                   \
 932         && (offset = coding_change_source (coding)))                         \
 933       {                                                                      \
 934         src += offset;                                                       \
 935         src_base += offset;                                                  \
 936         src_end += offset;                                                   \
 937       }                                                                      \
 938   } while (0)
 939
 940 #define CODING_ENCODE_CHAR(coding, dst, dst_end, charset, c, code)      \
 941   do {                                                                  \
 942     ptrdiff_t offset;                                                   \
 943                                                                         \
 944     charset_map_loaded = 0;                                             \
 945     code = ENCODE_CHAR (charset, c);                                    \
 946     if (charset_map_loaded                                              \
 947         && (offset = coding_change_destination (coding)))               \
 948       {                                                                 \
 949         dst += offset;                                                  \
 950         dst_end += offset;                                              \
 951       }                                                                 \
 952   } while (0)
 953
 954 #define CODING_CHAR_CHARSET(coding, dst, dst_end, c, charset_list, code_return, charset) \
 955   do {                                                                  \
 956     ptrdiff_t offset;                                                   \
 957                                                                         \
 958     charset_map_loaded = 0;                                             \
 959     charset = char_charset (c, charset_list, code_return);              \
 960     if (charset_map_loaded                                              \
 961         && (offset = coding_change_destination (coding)))               \
 962       {                                                                 \
 963         dst += offset;                                                  \
 964         dst_end += offset;                                              \
 965       }                                                                 \
 966   } while (0)
 967
 968 #define CODING_CHAR_CHARSET_P(coding, dst, dst_end, c, charset, result) \
 969   do {                                                                  \
 970     ptrdiff_t offset;                                                   \
 971                                                                         \
 972     charset_map_loaded = 0;                                             \
 973     result = CHAR_CHARSET_P (c, charset);                               \
 974     if (charset_map_loaded                                              \
 975         && (offset = coding_change_destination (coding)))               \
 976       {                                                                 \
 977         dst += offset;                                                  \
 978         dst_end += offset;                                              \
 979       }                                                                 \
 980   } while (0)
 981
 982
 983 /* If there are at least BYTES length of room at dst, allocate memory
 984    for coding->destination and update dst and dst_end.  We don't have
 985    to take care of coding->source which will be relocated.  It is
 986    handled by calling coding_set_source in encode_coding.  */
 987
 988 #define ASSURE_DESTINATION(bytes)                               \
 989   do {                                                          \
 990     if (dst + (bytes) >= dst_end)                               \
 991       {                                                         \
 992         ptrdiff_t more_bytes = charbuf_end - charbuf + (bytes); \
 993                                                                 \
 994         dst = alloc_destination (coding, more_bytes, dst);      \
 995         dst_end = coding->destination + coding->dst_bytes;      \
 996       }                                                         \
 997   } while (0)
 998
 999
1000 /* Store multibyte form of the character C in P, and advance P to the
1001    end of the multibyte form.  This is like CHAR_STRING_ADVANCE but it
1002    never calls MAYBE_UNIFY_CHAR.  */
1003
1004 #define CHAR_STRING_ADVANCE_NO_UNIFY(c, p)      \
1005   do {                                          \
1006     if ((c) <= MAX_1_BYTE_CHAR)                 \
1007       *(p)++ = (c);                             \
1008     else if ((c) <= MAX_2_BYTE_CHAR)            \
1009       *(p)++ = (0xC0 | ((c) >> 6)),             \
1010         *(p)++ = (0x80 | ((c) & 0x3F));         \
1011     else if ((c) <= MAX_3_BYTE_CHAR)            \
1012       *(p)++ = (0xE0 | ((c) >> 12)),            \
1013         *(p)++ = (0x80 | (((c) >> 6) & 0x3F)),  \
1014         *(p)++ = (0x80 | ((c) & 0x3F));         \
1015     else if ((c) <= MAX_4_BYTE_CHAR)            \
1016       *(p)++ = (0xF0 | (c >> 18)),              \
1017         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1018         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1019         *(p)++ = (0x80 | (c & 0x3F));           \
1020     else if ((c) <= MAX_5_BYTE_CHAR)            \
1021       *(p)++ = 0xF8,                            \
1022         *(p)++ = (0x80 | ((c >> 18) & 0x0F)),   \
1023         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1024         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1025         *(p)++ = (0x80 | (c & 0x3F));           \
1026     else                                        \
1027       (p) += BYTE8_STRING ((c) - 0x3FFF80, p);  \
1028   } while (0)
1029
1030
1031 /* Return the character code of character whose multibyte form is at
1032    P, and advance P to the end of the multibyte form.  This is like
1033    STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR.  */
1034
1035 #define STRING_CHAR_ADVANCE_NO_UNIFY(p)                         \
1036   (!((p)[0] & 0x80)                                             \
1037    ? *(p)++                                                     \
1038    : ! ((p)[0] & 0x20)                                          \
1039    ? ((p) += 2,                                                 \
1040       ((((p)[-2] & 0x1F) << 6)                                  \
1041        | ((p)[-1] & 0x3F)                                       \
1042        | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))    \
1043    : ! ((p)[0] & 0x10)                                          \
1044    ? ((p) += 3,                                                 \
1045       ((((p)[-3] & 0x0F) << 12)                                 \
1046        | (((p)[-2] & 0x3F) << 6)                                \
1047        | ((p)[-1] & 0x3F)))                                     \
1048    : ! ((p)[0] & 0x08)                                          \
1049    ? ((p) += 4,                                                 \
1050       ((((p)[-4] & 0xF) << 18)                                  \
1051        | (((p)[-3] & 0x3F) << 12)                               \
1052        | (((p)[-2] & 0x3F) << 6)                                \
1053        | ((p)[-1] & 0x3F)))                                     \
1054    : ((p) += 5,                                                 \
1055       ((((p)[-4] & 0x3F) << 18)                                 \
1056        | (((p)[-3] & 0x3F) << 12)                               \
1057        | (((p)[-2] & 0x3F) << 6)                                \
1058        | ((p)[-1] & 0x3F))))
1059
1060
1061 /* Set coding->source from coding->src_object.  */
1062
1063 static void
1064 coding_set_source (struct coding_system *coding)
1065 {
1066   if (BUFFERP (coding->src_object))
1067     {
1068       struct buffer *buf = XBUFFER (coding->src_object);
1069
1070       if (coding->src_pos < 0)
1071         coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
1072       else
1073         coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
1074     }
1075   else if (STRINGP (coding->src_object))
1076     {
1077       coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
1078     }
1079   else
1080     {
1081       /* Otherwise, the source is C string and is never relocated
1082          automatically.  Thus we don't have to update anything.  */
1083     }
1084 }
1085
1086
1087 /* Set coding->source from coding->src_object, and return how many
1088    bytes coding->source was changed.  */
1089
1090 static ptrdiff_t
1091 coding_change_source (struct coding_system *coding)
1092 {
1093   const unsigned char *orig = coding->source;
1094   coding_set_source (coding);
1095   return coding->source - orig;
1096 }
1097
1098
1099 /* Set coding->destination from coding->dst_object.  */
1100
1101 static void
1102 coding_set_destination (struct coding_system *coding)
1103 {
1104   if (BUFFERP (coding->dst_object))
1105     {
1106       if (BUFFERP (coding->src_object) && coding->src_pos < 0)
1107         {
1108           coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
1109           coding->dst_bytes = (GAP_END_ADDR
1110                                - (coding->src_bytes - coding->consumed)
1111                                - coding->destination);
1112         }
1113       else
1114         {
1115           /* We are sure that coding->dst_pos_byte is before the gap
1116              of the buffer. */
1117           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
1118                                  + coding->dst_pos_byte - BEG_BYTE);
1119           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
1120                                - coding->destination);
1121         }
1122     }
1123   else
1124     {
1125       /* Otherwise, the destination is C string and is never relocated
1126          automatically.  Thus we don't have to update anything.  */
1127     }
1128 }
1129
1130
1131 /* Set coding->destination from coding->dst_object, and return how
1132    many bytes coding->destination was changed.  */
1133
1134 static ptrdiff_t
1135 coding_change_destination (struct coding_system *coding)
1136 {
1137   const unsigned char *orig = coding->destination;
1138   coding_set_destination (coding);
1139   return coding->destination - orig;
1140 }
1141
1142
1143 static void
1144 coding_alloc_by_realloc (struct coding_system *coding, ptrdiff_t bytes)
1145 {
1146   if (STRING_BYTES_BOUND - coding->dst_bytes < bytes)
1147     string_overflow ();
1148   coding->destination = xrealloc (coding->destination,
1149                                   coding->dst_bytes + bytes);
1150   coding->dst_bytes += bytes;
1151 }
1152
1153 static void
1154 coding_alloc_by_making_gap (struct coding_system *coding,
1155                             ptrdiff_t gap_head_used, ptrdiff_t bytes)
1156 {
1157   if (EQ (coding->src_object, coding->dst_object))
1158     {
1159       /* The gap may contain the produced data at the head and not-yet
1160          consumed data at the tail.  To preserve those data, we at
1161          first make the gap size to zero, then increase the gap
1162          size.  */
1163       ptrdiff_t add = GAP_SIZE;
1164
1165       GPT += gap_head_used, GPT_BYTE += gap_head_used;
1166       GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
1167       make_gap (bytes);
1168       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
1169       GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
1170     }
1171   else
1172     {
1173       Lisp_Object this_buffer;
1174
1175       this_buffer = Fcurrent_buffer ();
1176       set_buffer_internal (XBUFFER (coding->dst_object));
1177       make_gap (bytes);
1178       set_buffer_internal (XBUFFER (this_buffer));
1179     }
1180 }
1181
1182
1183 static unsigned char *
1184 alloc_destination (struct coding_system *coding, ptrdiff_t nbytes,
1185                    unsigned char *dst)
1186 {
1187   ptrdiff_t offset = dst - coding->destination;
1188
1189   if (BUFFERP (coding->dst_object))
1190     {
1191       struct buffer *buf = XBUFFER (coding->dst_object);
1192
1193       coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
1194     }
1195   else
1196     coding_alloc_by_realloc (coding, nbytes);
1197   coding_set_destination (coding);
1198   dst = coding->destination + offset;
1199   return dst;
1200 }
1201
1202 /** Macros for annotations.  */
1203
1204 /* An annotation data is stored in the array coding->charbuf in this
1205    format:
1206      [ -LENGTH ANNOTATION_MASK NCHARS ... ]
1207    LENGTH is the number of elements in the annotation.
1208    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1209    NCHARS is the number of characters in the text annotated.
1210
1211    The format of the following elements depend on ANNOTATION_MASK.
1212
1213    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1214    follows:
1215      ... NBYTES METHOD [ COMPOSITION-COMPONENTS ... ]
1216
1217    NBYTES is the number of bytes specified in the header part of
1218    old-style emacs-mule encoding, or 0 for the other kind of
1219    composition.
1220
1221    METHOD is one of enum composition_method.
1222
1223    Optional COMPOSITION-COMPONENTS are characters and composition
1224    rules.
1225
1226    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1227    follows.
1228
1229    If ANNOTATION_MASK is 0, this annotation is just a space holder to
1230    recover from an invalid annotation, and should be skipped by
1231    produce_annotation.  */
1232
1233 /* Maximum length of the header of annotation data.  */
1234 #define MAX_ANNOTATION_LENGTH 5
1235
1236 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars)     \
1237   do {                                                  \
1238     *(buf)++ = -(len);                                  \
1239     *(buf)++ = (mask);                                  \
1240     *(buf)++ = (nchars);                                \
1241     coding->annotated = 1;                              \
1242   } while (0);
1243
1244 #define ADD_COMPOSITION_DATA(buf, nchars, nbytes, method)                   \
1245   do {                                                                      \
1246     ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1247     *buf++ = nbytes;                                                        \
1248     *buf++ = method;                                                        \
1249   } while (0)
1250
1251
1252 #define ADD_CHARSET_DATA(buf, nchars, id)                               \
1253   do {                                                                  \
1254     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1255     *buf++ = id;                                                        \
1256   } while (0)
1257
1258 \f
1259 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1260
1261
1262
1263 \f
1264 /*** 3. UTF-8 ***/
1265
1266 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1267    Check if a text is encoded in UTF-8.  If it is, return 1, else
1268    return 0.  */
1269
1270 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1271 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1272 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1273 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1274 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1275 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1276
1277 #define UTF_8_BOM_1 0xEF
1278 #define UTF_8_BOM_2 0xBB
1279 #define UTF_8_BOM_3 0xBF
1280
1281 static int
1282 detect_coding_utf_8 (struct coding_system *coding,
1283                      struct coding_detection_info *detect_info)
1284 {
1285   const unsigned char *src = coding->source, *src_base;
1286   const unsigned char *src_end = coding->source + coding->src_bytes;
1287   int multibytep = coding->src_multibyte;
1288   ptrdiff_t consumed_chars = 0;
1289   int bom_found = 0;
1290   int found = 0;
1291
1292   detect_info->checked |= CATEGORY_MASK_UTF_8;
1293   /* A coding system of this category is always ASCII compatible.  */
1294   src += coding->head_ascii;
1295
1296   while (1)
1297     {
1298       int c, c1, c2, c3, c4;
1299
1300       src_base = src;
1301       ONE_MORE_BYTE (c);
1302       if (c < 0 || UTF_8_1_OCTET_P (c))
1303         continue;
1304       ONE_MORE_BYTE (c1);
1305       if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
1306         break;
1307       if (UTF_8_2_OCTET_LEADING_P (c))
1308         {
1309           found = 1;
1310           continue;
1311         }
1312       ONE_MORE_BYTE (c2);
1313       if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1314         break;
1315       if (UTF_8_3_OCTET_LEADING_P (c))
1316         {
1317           found = 1;
1318           if (src_base == coding->source
1319               && c == UTF_8_BOM_1 && c1 == UTF_8_BOM_2 && c2 == UTF_8_BOM_3)
1320             bom_found = 1;
1321           continue;
1322         }
1323       ONE_MORE_BYTE (c3);
1324       if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1325         break;
1326       if (UTF_8_4_OCTET_LEADING_P (c))
1327         {
1328           found = 1;
1329           continue;
1330         }
1331       ONE_MORE_BYTE (c4);
1332       if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1333         break;
1334       if (UTF_8_5_OCTET_LEADING_P (c))
1335         {
1336           found = 1;
1337           continue;
1338         }
1339       break;
1340     }
1341   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1342   return 0;
1343
1344  no_more_source:
1345   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1346     {
1347       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1348       return 0;
1349     }
1350   if (bom_found)
1351     {
1352       /* The first character 0xFFFE doesn't necessarily mean a BOM.  */
1353       detect_info->found |= CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG;
1354     }
1355   else
1356     {
1357       detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG;
1358       if (found)
1359         detect_info->found |= CATEGORY_MASK_UTF_8_NOSIG;
1360     }
1361   return 1;
1362 }
1363
1364
1365 static void
1366 decode_coding_utf_8 (struct coding_system *coding)
1367 {
1368   const unsigned char *src = coding->source + coding->consumed;
1369   const unsigned char *src_end = coding->source + coding->src_bytes;
1370   const unsigned char *src_base;
1371   int *charbuf = coding->charbuf + coding->charbuf_used;
1372   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1373   ptrdiff_t consumed_chars = 0, consumed_chars_base = 0;
1374   int multibytep = coding->src_multibyte;
1375   enum utf_bom_type bom = CODING_UTF_8_BOM (coding);
1376   int eol_dos =
1377     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1378   int byte_after_cr = -1;
1379
1380   if (bom != utf_without_bom)
1381     {
1382       int c1, c2, c3;
1383
1384       src_base = src;
1385       ONE_MORE_BYTE (c1);
1386       if (! UTF_8_3_OCTET_LEADING_P (c1))
1387         src = src_base;
1388       else
1389         {
1390           ONE_MORE_BYTE (c2);
1391           if (! UTF_8_EXTRA_OCTET_P (c2))
1392             src = src_base;
1393           else
1394             {
1395               ONE_MORE_BYTE (c3);
1396               if (! UTF_8_EXTRA_OCTET_P (c3))
1397                 src = src_base;
1398               else
1399                 {
1400                   if ((c1 != UTF_8_BOM_1)
1401                       || (c2 != UTF_8_BOM_2) || (c3 != UTF_8_BOM_3))
1402                     src = src_base;
1403                   else
1404                     CODING_UTF_8_BOM (coding) = utf_without_bom;
1405                 }
1406             }
1407         }
1408     }
1409   CODING_UTF_8_BOM (coding) = utf_without_bom;
1410
1411   while (1)
1412     {
1413       int c, c1, c2, c3, c4, c5;
1414
1415       src_base = src;
1416       consumed_chars_base = consumed_chars;
1417
1418       if (charbuf >= charbuf_end)
1419         {
1420           if (byte_after_cr >= 0)
1421             src_base--;
1422           break;
1423         }
1424
1425       if (byte_after_cr >= 0)
1426         c1 = byte_after_cr, byte_after_cr = -1;
1427       else
1428         ONE_MORE_BYTE (c1);
1429       if (c1 < 0)
1430         {
1431           c = - c1;
1432         }
1433       else if (UTF_8_1_OCTET_P (c1))
1434         {
1435           if (eol_dos && c1 == '\r')
1436             ONE_MORE_BYTE (byte_after_cr);
1437           c = c1;
1438         }
1439       else
1440         {
1441           ONE_MORE_BYTE (c2);
1442           if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1443             goto invalid_code;
1444           if (UTF_8_2_OCTET_LEADING_P (c1))
1445             {
1446               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1447               /* Reject overlong sequences here and below.  Encoders
1448                  producing them are incorrect, they can be misleading,
1449                  and they mess up read/write invariance.  */
1450               if (c < 128)
1451                 goto invalid_code;
1452             }
1453           else
1454             {
1455               ONE_MORE_BYTE (c3);
1456               if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1457                 goto invalid_code;
1458               if (UTF_8_3_OCTET_LEADING_P (c1))
1459                 {
1460                   c = (((c1 & 0xF) << 12)
1461                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1462                   if (c < 0x800
1463                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1464                     goto invalid_code;
1465                 }
1466               else
1467                 {
1468                   ONE_MORE_BYTE (c4);
1469                   if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1470                     goto invalid_code;
1471                   if (UTF_8_4_OCTET_LEADING_P (c1))
1472                     {
1473                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1474                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1475                     if (c < 0x10000)
1476                       goto invalid_code;
1477                     }
1478                   else
1479                     {
1480                       ONE_MORE_BYTE (c5);
1481                       if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
1482                         goto invalid_code;
1483                       if (UTF_8_5_OCTET_LEADING_P (c1))
1484                         {
1485                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1486                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1487                                | (c5 & 0x3F));
1488                           if ((c > MAX_CHAR) || (c < 0x200000))
1489                             goto invalid_code;
1490                         }
1491                       else
1492                         goto invalid_code;
1493                     }
1494                 }
1495             }
1496         }
1497
1498       *charbuf++ = c;
1499       continue;
1500
1501     invalid_code:
1502       src = src_base;
1503       consumed_chars = consumed_chars_base;
1504       ONE_MORE_BYTE (c);
1505       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1506       coding->errors++;
1507     }
1508
1509  no_more_source:
1510   coding->consumed_char += consumed_chars_base;
1511   coding->consumed = src_base - coding->source;
1512   coding->charbuf_used = charbuf - coding->charbuf;
1513 }
1514
1515
1516 static int
1517 encode_coding_utf_8 (struct coding_system *coding)
1518 {
1519   int multibytep = coding->dst_multibyte;
1520   int *charbuf = coding->charbuf;
1521   int *charbuf_end = charbuf + coding->charbuf_used;
1522   unsigned char *dst = coding->destination + coding->produced;
1523   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1524   ptrdiff_t produced_chars = 0;
1525   int c;
1526
1527   if (CODING_UTF_8_BOM (coding) == utf_with_bom)
1528     {
1529       ASSURE_DESTINATION (3);
1530       EMIT_THREE_BYTES (UTF_8_BOM_1, UTF_8_BOM_2, UTF_8_BOM_3);
1531       CODING_UTF_8_BOM (coding) = utf_without_bom;
1532     }
1533
1534   if (multibytep)
1535     {
1536       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1537
1538       while (charbuf < charbuf_end)
1539         {
1540           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1541
1542           ASSURE_DESTINATION (safe_room);
1543           c = *charbuf++;
1544           if (CHAR_BYTE8_P (c))
1545             {
1546               c = CHAR_TO_BYTE8 (c);
1547               EMIT_ONE_BYTE (c);
1548             }
1549           else
1550             {
1551               CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
1552               for (p = str; p < pend; p++)
1553                 EMIT_ONE_BYTE (*p);
1554             }
1555         }
1556     }
1557   else
1558     {
1559       int safe_room = MAX_MULTIBYTE_LENGTH;
1560
1561       while (charbuf < charbuf_end)
1562         {
1563           ASSURE_DESTINATION (safe_room);
1564           c = *charbuf++;
1565           if (CHAR_BYTE8_P (c))
1566             *dst++ = CHAR_TO_BYTE8 (c);
1567           else
1568             CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
1569           produced_chars++;
1570         }
1571     }
1572   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1573   coding->produced_char += produced_chars;
1574   coding->produced = dst - coding->destination;
1575   return 0;
1576 }
1577
1578
1579 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1580    Check if a text is encoded in one of UTF-16 based coding systems.
1581    If it is, return 1, else return 0.  */
1582
1583 #define UTF_16_HIGH_SURROGATE_P(val) \
1584   (((val) & 0xFC00) == 0xD800)
1585
1586 #define UTF_16_LOW_SURROGATE_P(val) \
1587   (((val) & 0xFC00) == 0xDC00)
1588
1589
1590 static int
1591 detect_coding_utf_16 (struct coding_system *coding,
1592                       struct coding_detection_info *detect_info)
1593 {
1594   const unsigned char *src = coding->source;
1595   const unsigned char *src_end = coding->source + coding->src_bytes;
1596   int multibytep = coding->src_multibyte;
1597   int c1, c2;
1598
1599   detect_info->checked |= CATEGORY_MASK_UTF_16;
1600   if (coding->mode & CODING_MODE_LAST_BLOCK
1601       && (coding->src_chars & 1))
1602     {
1603       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1604       return 0;
1605     }
1606
1607   TWO_MORE_BYTES (c1, c2);
1608   if ((c1 == 0xFF) && (c2 == 0xFE))
1609     {
1610       detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1611                              | CATEGORY_MASK_UTF_16_AUTO);
1612       detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1613                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1614                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1615     }
1616   else if ((c1 == 0xFE) && (c2 == 0xFF))
1617     {
1618       detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1619                              | CATEGORY_MASK_UTF_16_AUTO);
1620       detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1621                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1622                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1623     }
1624   else if (c2 < 0)
1625     {
1626       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1627       return 0;
1628     }
1629   else
1630     {
1631       /* We check the dispersion of Eth and Oth bytes where E is even and
1632          O is odd.  If both are high, we assume binary data.*/
1633       unsigned char e[256], o[256];
1634       unsigned e_num = 1, o_num = 1;
1635
1636       memset (e, 0, 256);
1637       memset (o, 0, 256);
1638       e[c1] = 1;
1639       o[c2] = 1;
1640
1641       detect_info->rejected |= (CATEGORY_MASK_UTF_16_AUTO
1642                                 |CATEGORY_MASK_UTF_16_BE
1643                                 | CATEGORY_MASK_UTF_16_LE);
1644
1645       while ((detect_info->rejected & CATEGORY_MASK_UTF_16)
1646              != CATEGORY_MASK_UTF_16)
1647         {
1648           TWO_MORE_BYTES (c1, c2);
1649           if (c2 < 0)
1650             break;
1651           if (! e[c1])
1652             {
1653               e[c1] = 1;
1654               e_num++;
1655               if (e_num >= 128)
1656                 detect_info->rejected |= CATEGORY_MASK_UTF_16_BE_NOSIG;
1657             }
1658           if (! o[c2])
1659             {
1660               o[c2] = 1;
1661               o_num++;
1662               if (o_num >= 128)
1663                 detect_info->rejected |= CATEGORY_MASK_UTF_16_LE_NOSIG;
1664             }
1665         }
1666       return 0;
1667     }
1668
1669  no_more_source:
1670   return 1;
1671 }
1672
1673 static void
1674 decode_coding_utf_16 (struct coding_system *coding)
1675 {
1676   const unsigned char *src = coding->source + coding->consumed;
1677   const unsigned char *src_end = coding->source + coding->src_bytes;
1678   const unsigned char *src_base;
1679   int *charbuf = coding->charbuf + coding->charbuf_used;
1680   /* We may produces at most 3 chars in one loop.  */
1681   int *charbuf_end = coding->charbuf + coding->charbuf_size - 2;
1682   ptrdiff_t consumed_chars = 0, consumed_chars_base = 0;
1683   int multibytep = coding->src_multibyte;
1684   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1685   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1686   int surrogate = CODING_UTF_16_SURROGATE (coding);
1687   int eol_dos =
1688     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1689   int byte_after_cr1 = -1, byte_after_cr2 = -1;
1690
1691   if (bom == utf_with_bom)
1692     {
1693       int c, c1, c2;
1694
1695       src_base = src;
1696       ONE_MORE_BYTE (c1);
1697       ONE_MORE_BYTE (c2);
1698       c = (c1 << 8) | c2;
1699
1700       if (endian == utf_16_big_endian
1701           ? c != 0xFEFF : c != 0xFFFE)
1702         {
1703           /* The first two bytes are not BOM.  Treat them as bytes
1704              for a normal character.  */
1705           src = src_base;
1706           coding->errors++;
1707         }
1708       CODING_UTF_16_BOM (coding) = utf_without_bom;
1709     }
1710   else if (bom == utf_detect_bom)
1711     {
1712       /* We have already tried to detect BOM and failed in
1713          detect_coding.  */
1714       CODING_UTF_16_BOM (coding) = utf_without_bom;
1715     }
1716
1717   while (1)
1718     {
1719       int c, c1, c2;
1720
1721       src_base = src;
1722       consumed_chars_base = consumed_chars;
1723
1724       if (charbuf >= charbuf_end)
1725         {
1726           if (byte_after_cr1 >= 0)
1727             src_base -= 2;
1728           break;
1729         }
1730
1731       if (byte_after_cr1 >= 0)
1732         c1 = byte_after_cr1, byte_after_cr1 = -1;
1733       else
1734         ONE_MORE_BYTE (c1);
1735       if (c1 < 0)
1736         {
1737           *charbuf++ = -c1;
1738           continue;
1739         }
1740       if (byte_after_cr2 >= 0)
1741         c2 = byte_after_cr2, byte_after_cr2 = -1;
1742       else
1743         ONE_MORE_BYTE (c2);
1744       if (c2 < 0)
1745         {
1746           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1747           *charbuf++ = -c2;
1748           continue;
1749         }
1750       c = (endian == utf_16_big_endian
1751            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1752
1753       if (surrogate)
1754         {
1755           if (! UTF_16_LOW_SURROGATE_P (c))
1756             {
1757               if (endian == utf_16_big_endian)
1758                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1759               else
1760                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1761               *charbuf++ = c1;
1762               *charbuf++ = c2;
1763               coding->errors++;
1764               if (UTF_16_HIGH_SURROGATE_P (c))
1765                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1766               else
1767                 *charbuf++ = c;
1768             }
1769           else
1770             {
1771               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1772               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1773               *charbuf++ = 0x10000 + c;
1774             }
1775         }
1776       else
1777         {
1778           if (UTF_16_HIGH_SURROGATE_P (c))
1779             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1780           else
1781             {
1782               if (eol_dos && c == '\r')
1783                 {
1784                   ONE_MORE_BYTE (byte_after_cr1);
1785                   ONE_MORE_BYTE (byte_after_cr2);
1786                 }
1787               *charbuf++ = c;
1788             }
1789         }
1790     }
1791
1792  no_more_source:
1793   coding->consumed_char += consumed_chars_base;
1794   coding->consumed = src_base - coding->source;
1795   coding->charbuf_used = charbuf - coding->charbuf;
1796 }
1797
1798 static int
1799 encode_coding_utf_16 (struct coding_system *coding)
1800 {
1801   int multibytep = coding->dst_multibyte;
1802   int *charbuf = coding->charbuf;
1803   int *charbuf_end = charbuf + coding->charbuf_used;
1804   unsigned char *dst = coding->destination + coding->produced;
1805   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1806   int safe_room = 8;
1807   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1808   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1809   ptrdiff_t produced_chars = 0;
1810   int c;
1811
1812   if (bom != utf_without_bom)
1813     {
1814       ASSURE_DESTINATION (safe_room);
1815       if (big_endian)
1816         EMIT_TWO_BYTES (0xFE, 0xFF);
1817       else
1818         EMIT_TWO_BYTES (0xFF, 0xFE);
1819       CODING_UTF_16_BOM (coding) = utf_without_bom;
1820     }
1821
1822   while (charbuf < charbuf_end)
1823     {
1824       ASSURE_DESTINATION (safe_room);
1825       c = *charbuf++;
1826       if (c > MAX_UNICODE_CHAR)
1827         c = coding->default_char;
1828
1829       if (c < 0x10000)
1830         {
1831           if (big_endian)
1832             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1833           else
1834             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1835         }
1836       else
1837         {
1838           int c1, c2;
1839
1840           c -= 0x10000;
1841           c1 = (c >> 10) + 0xD800;
1842           c2 = (c & 0x3FF) + 0xDC00;
1843           if (big_endian)
1844             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1845           else
1846             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1847         }
1848     }
1849   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1850   coding->produced = dst - coding->destination;
1851   coding->produced_char += produced_chars;
1852   return 0;
1853 }
1854
1855 \f
1856 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1857
1858 /* Emacs' internal format for representation of multiple character
1859    sets is a kind of multi-byte encoding, i.e. characters are
1860    represented by variable-length sequences of one-byte codes.
1861
1862    ASCII characters and control characters (e.g. `tab', `newline') are
1863    represented by one-byte sequences which are their ASCII codes, in
1864    the range 0x00 through 0x7F.
1865
1866    8-bit characters of the range 0x80..0x9F are represented by
1867    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1868    code + 0x20).
1869
1870    8-bit characters of the range 0xA0..0xFF are represented by
1871    one-byte sequences which are their 8-bit code.
1872
1873    The other characters are represented by a sequence of `base
1874    leading-code', optional `extended leading-code', and one or two
1875    `position-code's.  The length of the sequence is determined by the
1876    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1877    whereas extended leading-code and position-code take the range 0xA0
1878    through 0xFF.  See `charset.h' for more details about leading-code
1879    and position-code.
1880
1881    --- CODE RANGE of Emacs' internal format ---
1882    character set        range
1883    -------------        -----
1884    ascii                0x00..0x7F
1885    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1886    eight-bit-graphic    0xA0..0xBF
1887    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1888    ---------------------------------------------
1889
1890    As this is the internal character representation, the format is
1891    usually not used externally (i.e. in a file or in a data sent to a
1892    process).  But, it is possible to have a text externally in this
1893    format (i.e. by encoding by the coding system `emacs-mule').
1894
1895    In that case, a sequence of one-byte codes has a slightly different
1896    form.
1897
1898    At first, all characters in eight-bit-control are represented by
1899    one-byte sequences which are their 8-bit code.
1900
1901    Next, character composition data are represented by the byte
1902    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1903    where,
1904         METHOD is 0xF2 plus one of composition method (enum
1905         composition_method),
1906
1907         BYTES is 0xA0 plus a byte length of this composition data,
1908
1909         CHARS is 0xA0 plus a number of characters composed by this
1910         data,
1911
1912         COMPONENTs are characters of multibyte form or composition
1913         rules encoded by two-byte of ASCII codes.
1914
1915    In addition, for backward compatibility, the following formats are
1916    also recognized as composition data on decoding.
1917
1918    0x80 MSEQ ...
1919    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1920
1921    Here,
1922         MSEQ is a multibyte form but in these special format:
1923           ASCII: 0xA0 ASCII_CODE+0x80,
1924           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1925         RULE is a one byte code of the range 0xA0..0xF0 that
1926         represents a composition rule.
1927   */
1928
1929 char emacs_mule_bytes[256];
1930
1931
1932 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1933    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
1934    else return 0.  */
1935
1936 static int
1937 detect_coding_emacs_mule (struct coding_system *coding,
1938                           struct coding_detection_info *detect_info)
1939 {
1940   const unsigned char *src = coding->source, *src_base;
1941   const unsigned char *src_end = coding->source + coding->src_bytes;
1942   int multibytep = coding->src_multibyte;
1943   ptrdiff_t consumed_chars = 0;
1944   int c;
1945   int found = 0;
1946
1947   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1948   /* A coding system of this category is always ASCII compatible.  */
1949   src += coding->head_ascii;
1950
1951   while (1)
1952     {
1953       src_base = src;
1954       ONE_MORE_BYTE (c);
1955       if (c < 0)
1956         continue;
1957       if (c == 0x80)
1958         {
1959           /* Perhaps the start of composite character.  We simply skip
1960              it because analyzing it is too heavy for detecting.  But,
1961              at least, we check that the composite character
1962              constitutes of more than 4 bytes.  */
1963           const unsigned char *src_start;
1964
1965         repeat:
1966           src_start = src;
1967           do
1968             {
1969               ONE_MORE_BYTE (c);
1970             }
1971           while (c >= 0xA0);
1972
1973           if (src - src_start <= 4)
1974             break;
1975           found = CATEGORY_MASK_EMACS_MULE;
1976           if (c == 0x80)
1977             goto repeat;
1978         }
1979
1980       if (c < 0x80)
1981         {
1982           if (c < 0x20
1983               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1984             break;
1985         }
1986       else
1987         {
1988           int more_bytes = emacs_mule_bytes[c] - 1;
1989
1990           while (more_bytes > 0)
1991             {
1992               ONE_MORE_BYTE (c);
1993               if (c < 0xA0)
1994                 {
1995                   src--;        /* Unread the last byte.  */
1996                   break;
1997                 }
1998               more_bytes--;
1999             }
2000           if (more_bytes != 0)
2001             break;
2002           found = CATEGORY_MASK_EMACS_MULE;
2003         }
2004     }
2005   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2006   return 0;
2007
2008  no_more_source:
2009   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
2010     {
2011       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2012       return 0;
2013     }
2014   detect_info->found |= found;
2015   return 1;
2016 }
2017
2018
2019 /* Parse emacs-mule multibyte sequence at SRC and return the decoded
2020    character.  If CMP_STATUS indicates that we must expect MSEQ or
2021    RULE described above, decode it and return the negative value of
2022    the decoded character or rule.  If an invalid byte is found, return
2023    -1.  If SRC is too short, return -2.  */
2024
2025 static int
2026 emacs_mule_char (struct coding_system *coding, const unsigned char *src,
2027                  int *nbytes, int *nchars, int *id,
2028                  struct composition_status *cmp_status)
2029 {
2030   const unsigned char *src_end = coding->source + coding->src_bytes;
2031   const unsigned char *src_base = src;
2032   int multibytep = coding->src_multibyte;
2033   int charset_ID;
2034   unsigned code;
2035   int c;
2036   int consumed_chars = 0;
2037   int mseq_found = 0;
2038
2039   ONE_MORE_BYTE (c);
2040   if (c < 0)
2041     {
2042       c = -c;
2043       charset_ID = emacs_mule_charset[0];
2044     }
2045   else
2046     {
2047       if (c >= 0xA0)
2048         {
2049           if (cmp_status->state != COMPOSING_NO
2050               && cmp_status->old_form)
2051             {
2052               if (cmp_status->state == COMPOSING_CHAR)
2053                 {
2054                   if (c == 0xA0)
2055                     {
2056                       ONE_MORE_BYTE (c);
2057                       c -= 0x80;
2058                       if (c < 0)
2059                         goto invalid_code;
2060                     }
2061                   else
2062                     c -= 0x20;
2063                   mseq_found = 1;
2064                 }
2065               else
2066                 {
2067                   *nbytes = src - src_base;
2068                   *nchars = consumed_chars;
2069                   return -c;
2070                 }
2071             }
2072           else
2073             goto invalid_code;
2074         }
2075
2076       switch (emacs_mule_bytes[c])
2077         {
2078         case 2:
2079           if ((charset_ID = emacs_mule_charset[c]) < 0)
2080             goto invalid_code;
2081           ONE_MORE_BYTE (c);
2082           if (c < 0xA0)
2083             goto invalid_code;
2084           code = c & 0x7F;
2085           break;
2086
2087         case 3:
2088           if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
2089               || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
2090             {
2091               ONE_MORE_BYTE (c);
2092               if (c < 0xA0 || (charset_ID = emacs_mule_charset[c]) < 0)
2093                 goto invalid_code;
2094               ONE_MORE_BYTE (c);
2095               if (c < 0xA0)
2096                 goto invalid_code;
2097               code = c & 0x7F;
2098             }
2099           else
2100             {
2101               if ((charset_ID = emacs_mule_charset[c]) < 0)
2102                 goto invalid_code;
2103               ONE_MORE_BYTE (c);
2104               if (c < 0xA0)
2105                 goto invalid_code;
2106               code = (c & 0x7F) << 8;
2107               ONE_MORE_BYTE (c);
2108               if (c < 0xA0)
2109                 goto invalid_code;
2110               code |= c & 0x7F;
2111             }
2112           break;
2113
2114         case 4:
2115           ONE_MORE_BYTE (c);
2116           if (c < 0 || (charset_ID = emacs_mule_charset[c]) < 0)
2117             goto invalid_code;
2118           ONE_MORE_BYTE (c);
2119           if (c < 0xA0)
2120             goto invalid_code;
2121           code = (c & 0x7F) << 8;
2122           ONE_MORE_BYTE (c);
2123           if (c < 0xA0)
2124             goto invalid_code;
2125           code |= c & 0x7F;
2126           break;
2127
2128         case 1:
2129           code = c;
2130           charset_ID = ASCII_BYTE_P (code) ? charset_ascii : charset_eight_bit;
2131           break;
2132
2133         default:
2134           abort ();
2135         }
2136       CODING_DECODE_CHAR (coding, src, src_base, src_end,
2137                           CHARSET_FROM_ID (charset_ID), code, c);
2138       if (c < 0)
2139         goto invalid_code;
2140     }
2141   *nbytes = src - src_base;
2142   *nchars = consumed_chars;
2143   if (id)
2144     *id = charset_ID;
2145   return (mseq_found ? -c : c);
2146
2147  no_more_source:
2148   return -2;
2149
2150  invalid_code:
2151   return -1;
2152 }
2153
2154
2155 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2156
2157 /* Handle these composition sequence ('|': the end of header elements,
2158    BYTES and CHARS >= 0xA0):
2159
2160    (1) relative composition: 0x80 0xF2 BYTES CHARS | CHAR ...
2161    (2) altchar composition:  0x80 0xF4 BYTES CHARS | ALT ... ALT CHAR ...
2162    (3) alt&rule composition: 0x80 0xF5 BYTES CHARS | ALT RULE ... ALT CHAR ...
2163
2164    and these old form:
2165
2166    (4) relative composition: 0x80 | MSEQ ... MSEQ
2167    (5) rulebase composition: 0x80 0xFF | MSEQ MRULE ... MSEQ
2168
2169    When the starter 0x80 and the following header elements are found,
2170    this annotation header is produced.
2171
2172         [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS NBYTES METHOD ]
2173
2174    NCHARS is CHARS - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2175    NBYTES is BYTES - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2176
2177    Then, upon reading the following elements, these codes are produced
2178    until the composition end is found:
2179
2180    (1) CHAR ... CHAR
2181    (2) ALT ... ALT CHAR ... CHAR
2182    (3) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT CHAR ... CHAR
2183    (4) CHAR ... CHAR
2184    (5) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
2185
2186    When the composition end is found, LENGTH and NCHARS in the
2187    annotation header is updated as below:
2188
2189    (1) LENGTH: unchanged, NCHARS: unchanged
2190    (2) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2191    (3) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2192    (4) LENGTH: unchanged,  NCHARS: number of CHARs
2193    (5) LENGTH: unchanged,  NCHARS: number of CHARs
2194
2195    If an error is found while composing, the annotation header is
2196    changed to the original composition header (plus filler -1s) as
2197    below:
2198
2199    (1),(2),(3)  [ 0x80 0xF2+METHOD BYTES CHARS -1 ]
2200    (5)          [ 0x80 0xFF -1 -1- -1 ]
2201
2202    and the sequence [ -2 DECODED-RULE ] is changed to the original
2203    byte sequence as below:
2204         o the original byte sequence is B: [ B -1 ]
2205         o the original byte sequence is B1 B2: [ B1 B2 ]
2206
2207    Most of the routines are implemented by macros because many
2208    variables and labels in the caller decode_coding_emacs_mule must be
2209    accessible, and they are usually called just once (thus doesn't
2210    increase the size of compiled object).  */
2211
2212 /* Decode a composition rule represented by C as a component of
2213    composition sequence of Emacs 20 style.  Set RULE to the decoded
2214    rule. */
2215
2216 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(c, rule)  \
2217   do {                                                  \
2218     int gref, nref;                                     \
2219                                                         \
2220     c -= 0xA0;                                          \
2221     if (c < 0 || c >= 81)                               \
2222       goto invalid_code;                                \
2223     gref = c / 9, nref = c % 9;                         \
2224     if (gref == 4) gref = 10;                           \
2225     if (nref == 4) nref = 10;                           \
2226     rule = COMPOSITION_ENCODE_RULE (gref, nref);        \
2227   } while (0)
2228
2229
2230 /* Decode a composition rule represented by C and the following byte
2231    at SRC as a component of composition sequence of Emacs 21 style.
2232    Set RULE to the decoded rule.  */
2233
2234 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(c, rule)  \
2235   do {                                                  \
2236     int gref, nref;                                     \
2237                                                         \
2238     gref = c - 0x20;                                    \
2239     if (gref < 0 || gref >= 81)                         \
2240       goto invalid_code;                                \
2241     ONE_MORE_BYTE (c);                                  \
2242     nref = c - 0x20;                                    \
2243     if (nref < 0 || nref >= 81)                         \
2244       goto invalid_code;                                \
2245     rule = COMPOSITION_ENCODE_RULE (gref, nref);        \
2246   } while (0)
2247
2248
2249 /* Start of Emacs 21 style format.  The first three bytes at SRC are
2250    (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is the
2251    byte length of this composition information, CHARS is the number of
2252    characters composed by this composition.  */
2253
2254 #define DECODE_EMACS_MULE_21_COMPOSITION()                              \
2255   do {                                                                  \
2256     enum composition_method method = c - 0xF2;                          \
2257     int nbytes, nchars;                                                 \
2258                                                                         \
2259     ONE_MORE_BYTE (c);                                                  \
2260     if (c < 0)                                                          \
2261       goto invalid_code;                                                \
2262     nbytes = c - 0xA0;                                                  \
2263     if (nbytes < 3 || (method == COMPOSITION_RELATIVE && nbytes != 4))  \
2264       goto invalid_code;                                                \
2265     ONE_MORE_BYTE (c);                                                  \
2266     nchars = c - 0xA0;                                                  \
2267     if (nchars <= 0 || nchars >= MAX_COMPOSITION_COMPONENTS)            \
2268       goto invalid_code;                                                \
2269     cmp_status->old_form = 0;                                           \
2270     cmp_status->method = method;                                        \
2271     if (method == COMPOSITION_RELATIVE)                                 \
2272       cmp_status->state = COMPOSING_CHAR;                               \
2273     else                                                                \
2274       cmp_status->state = COMPOSING_COMPONENT_CHAR;                     \
2275     cmp_status->length = MAX_ANNOTATION_LENGTH;                         \
2276     cmp_status->nchars = nchars;                                        \
2277     cmp_status->ncomps = nbytes - 4;                                    \
2278     ADD_COMPOSITION_DATA (charbuf, nchars, nbytes, method);             \
2279   } while (0)
2280
2281
2282 /* Start of Emacs 20 style format for relative composition.  */
2283
2284 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION()             \
2285   do {                                                          \
2286     cmp_status->old_form = 1;                                   \
2287     cmp_status->method = COMPOSITION_RELATIVE;                  \
2288     cmp_status->state = COMPOSING_CHAR;                         \
2289     cmp_status->length = MAX_ANNOTATION_LENGTH;                 \
2290     cmp_status->nchars = cmp_status->ncomps = 0;                \
2291     ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);   \
2292   } while (0)
2293
2294
2295 /* Start of Emacs 20 style format for rule-base composition.  */
2296
2297 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION()             \
2298   do {                                                          \
2299     cmp_status->old_form = 1;                                   \
2300     cmp_status->method = COMPOSITION_WITH_RULE;                 \
2301     cmp_status->state = COMPOSING_CHAR;                         \
2302     cmp_status->length = MAX_ANNOTATION_LENGTH;                 \
2303     cmp_status->nchars = cmp_status->ncomps = 0;                \
2304     ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);   \
2305   } while (0)
2306
2307
2308 #define DECODE_EMACS_MULE_COMPOSITION_START()           \
2309   do {                                                  \
2310     const unsigned char *current_src = src;             \
2311                                                         \
2312     ONE_MORE_BYTE (c);                                  \
2313     if (c < 0)                                          \
2314       goto invalid_code;                                \
2315     if (c - 0xF2 >= COMPOSITION_RELATIVE                \
2316         && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)  \
2317       DECODE_EMACS_MULE_21_COMPOSITION ();              \
2318     else if (c < 0xA0)                                  \
2319       goto invalid_code;                                \
2320     else if (c < 0xC0)                                  \
2321       {                                                 \
2322         DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION ();   \
2323         /* Re-read C as a composition component.  */    \
2324         src = current_src;                              \
2325       }                                                 \
2326     else if (c == 0xFF)                                 \
2327       DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION ();     \
2328     else                                                \
2329       goto invalid_code;                                \
2330   } while (0)
2331
2332 #define EMACS_MULE_COMPOSITION_END()                            \
2333   do {                                                          \
2334     int idx = - cmp_status->length;                             \
2335                                                                 \
2336     if (cmp_status->old_form)                                   \
2337       charbuf[idx + 2] = cmp_status->nchars;                    \
2338     else if (cmp_status->method > COMPOSITION_RELATIVE)         \
2339       charbuf[idx] = charbuf[idx + 2] - cmp_status->length;     \
2340     cmp_status->state = COMPOSING_NO;                           \
2341   } while (0)
2342
2343
2344 static int
2345 emacs_mule_finish_composition (int *charbuf,
2346                                struct composition_status *cmp_status)
2347 {
2348   int idx = - cmp_status->length;
2349   int new_chars;
2350
2351   if (cmp_status->old_form && cmp_status->nchars > 0)
2352     {
2353       charbuf[idx + 2] = cmp_status->nchars;
2354       new_chars = 0;
2355       if (cmp_status->method == COMPOSITION_WITH_RULE
2356           && cmp_status->state == COMPOSING_CHAR)
2357         {
2358           /* The last rule was invalid.  */
2359           int rule = charbuf[-1] + 0xA0;
2360
2361           charbuf[-2] = BYTE8_TO_CHAR (rule);
2362           charbuf[-1] = -1;
2363           new_chars = 1;
2364         }
2365     }
2366   else
2367     {
2368       charbuf[idx++] = BYTE8_TO_CHAR (0x80);
2369
2370       if (cmp_status->method == COMPOSITION_WITH_RULE)
2371         {
2372           charbuf[idx++] = BYTE8_TO_CHAR (0xFF);
2373           charbuf[idx++] = -3;
2374           charbuf[idx++] = 0;
2375           new_chars = 1;
2376         }
2377       else
2378         {
2379           int nchars = charbuf[idx + 1] + 0xA0;
2380           int nbytes = charbuf[idx + 2] + 0xA0;
2381
2382           charbuf[idx++] = BYTE8_TO_CHAR (0xF2 + cmp_status->method);
2383           charbuf[idx++] = BYTE8_TO_CHAR (nbytes);
2384           charbuf[idx++] = BYTE8_TO_CHAR (nchars);
2385           charbuf[idx++] = -1;
2386           new_chars = 4;
2387         }
2388     }
2389   cmp_status->state = COMPOSING_NO;
2390   return new_chars;
2391 }
2392
2393 #define EMACS_MULE_MAYBE_FINISH_COMPOSITION()                             \
2394   do {                                                                    \
2395     if (cmp_status->state != COMPOSING_NO)                                \
2396       char_offset += emacs_mule_finish_composition (charbuf, cmp_status); \
2397   } while (0)
2398
2399
2400 static void
2401 decode_coding_emacs_mule (struct coding_system *coding)
2402 {
2403   const unsigned char *src = coding->source + coding->consumed;
2404   const unsigned char *src_end = coding->source + coding->src_bytes;
2405   const unsigned char *src_base;
2406   int *charbuf = coding->charbuf + coding->charbuf_used;
2407   /* We may produce two annotations (charset and composition) in one
2408      loop and one more charset annotation at the end.  */
2409   int *charbuf_end
2410     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3)
2411       /* We can produce up to 2 characters in a loop.  */
2412       - 1;
2413   ptrdiff_t consumed_chars = 0, consumed_chars_base;
2414   int multibytep = coding->src_multibyte;
2415   ptrdiff_t char_offset = coding->produced_char;
2416   ptrdiff_t last_offset = char_offset;
2417   int last_id = charset_ascii;
2418   int eol_dos =
2419     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
2420   int byte_after_cr = -1;
2421   struct composition_status *cmp_status = &coding->spec.emacs_mule.cmp_status;
2422
2423   if (cmp_status->state != COMPOSING_NO)
2424     {
2425       int i;
2426
2427       if (charbuf_end - charbuf < cmp_status->length)
2428         abort ();
2429       for (i = 0; i < cmp_status->length; i++)
2430         *charbuf++ = cmp_status->carryover[i];
2431       coding->annotated = 1;
2432     }
2433
2434   while (1)
2435     {
2436       int c, id IF_LINT (= 0);
2437
2438       src_base = src;
2439       consumed_chars_base = consumed_chars;
2440
2441       if (charbuf >= charbuf_end)
2442         {
2443           if (byte_after_cr >= 0)
2444             src_base--;
2445           break;
2446         }
2447
2448       if (byte_after_cr >= 0)
2449         c = byte_after_cr, byte_after_cr = -1;
2450       else
2451         ONE_MORE_BYTE (c);
2452
2453       if (c < 0 || c == 0x80)
2454         {
2455           EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2456           if (c < 0)
2457             {
2458               *charbuf++ = -c;
2459               char_offset++;
2460             }
2461           else
2462             DECODE_EMACS_MULE_COMPOSITION_START ();
2463           continue;
2464         }
2465
2466       if (c < 0x80)
2467         {
2468           if (eol_dos && c == '\r')
2469             ONE_MORE_BYTE (byte_after_cr);
2470           id = charset_ascii;
2471           if (cmp_status->state != COMPOSING_NO)
2472             {
2473               if (cmp_status->old_form)
2474                 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2475               else if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2476                 cmp_status->ncomps--;
2477             }
2478         }
2479       else
2480         {
2481           int nchars IF_LINT (= 0), nbytes IF_LINT (= 0);
2482           /* emacs_mule_char can load a charset map from a file, which
2483              allocates a large structure and might cause buffer text
2484              to be relocated as result.  Thus, we need to remember the
2485              original pointer to buffer text, and fix up all related
2486              pointers after the call.  */
2487           const unsigned char *orig = coding->source;
2488           ptrdiff_t offset;
2489
2490           c = emacs_mule_char (coding, src_base, &nbytes, &nchars, &id,
2491                                cmp_status);
2492           offset = coding->source - orig;
2493           if (offset)
2494             {
2495               src += offset;
2496               src_base += offset;
2497               src_end += offset;
2498             }
2499           if (c < 0)
2500             {
2501               if (c == -1)
2502                 goto invalid_code;
2503               if (c == -2)
2504                 break;
2505             }
2506           src = src_base + nbytes;
2507           consumed_chars = consumed_chars_base + nchars;
2508           if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2509             cmp_status->ncomps -= nchars;
2510         }
2511
2512       /* Now if C >= 0, we found a normally encoded character, if C <
2513          0, we found an old-style composition component character or
2514          rule.  */
2515
2516       if (cmp_status->state == COMPOSING_NO)
2517         {
2518           if (last_id != id)
2519             {
2520               if (last_id != charset_ascii)
2521                 ADD_CHARSET_DATA (charbuf, char_offset - last_offset,
2522                                   last_id);
2523               last_id = id;
2524               last_offset = char_offset;
2525             }
2526           *charbuf++ = c;
2527           char_offset++;
2528         }
2529       else if (cmp_status->state == COMPOSING_CHAR)
2530         {
2531           if (cmp_status->old_form)
2532             {
2533               if (c >= 0)
2534                 {
2535                   EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2536                   *charbuf++ = c;
2537                   char_offset++;
2538                 }
2539               else
2540                 {
2541                   *charbuf++ = -c;
2542                   cmp_status->nchars++;
2543                   cmp_status->length++;
2544                   if (cmp_status->nchars == MAX_COMPOSITION_COMPONENTS)
2545                     EMACS_MULE_COMPOSITION_END ();
2546                   else if (cmp_status->method == COMPOSITION_WITH_RULE)
2547                     cmp_status->state = COMPOSING_RULE;
2548                 }
2549             }
2550           else
2551             {
2552               *charbuf++ = c;
2553               cmp_status->length++;
2554               cmp_status->nchars--;
2555               if (cmp_status->nchars == 0)
2556                 EMACS_MULE_COMPOSITION_END ();
2557             }
2558         }
2559       else if (cmp_status->state == COMPOSING_RULE)
2560         {
2561           int rule;
2562
2563           if (c >= 0)
2564             {
2565               EMACS_MULE_COMPOSITION_END ();
2566               *charbuf++ = c;
2567               char_offset++;
2568             }
2569           else
2570             {
2571               c = -c;
2572               DECODE_EMACS_MULE_COMPOSITION_RULE_20 (c, rule);
2573               if (rule < 0)
2574                 goto invalid_code;
2575               *charbuf++ = -2;
2576               *charbuf++ = rule;
2577               cmp_status->length += 2;
2578               cmp_status->state = COMPOSING_CHAR;
2579             }
2580         }
2581       else if (cmp_status->state == COMPOSING_COMPONENT_CHAR)
2582         {
2583           *charbuf++ = c;
2584           cmp_status->length++;
2585           if (cmp_status->ncomps == 0)
2586             cmp_status->state = COMPOSING_CHAR;
2587           else if (cmp_status->ncomps > 0)
2588             {
2589               if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)
2590                 cmp_status->state = COMPOSING_COMPONENT_RULE;
2591             }
2592           else
2593             EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2594         }
2595       else                      /* COMPOSING_COMPONENT_RULE */
2596         {
2597           int rule;
2598
2599           DECODE_EMACS_MULE_COMPOSITION_RULE_21 (c, rule);
2600           if (rule < 0)
2601             goto invalid_code;
2602           *charbuf++ = -2;
2603           *charbuf++ = rule;
2604           cmp_status->length += 2;
2605           cmp_status->ncomps--;
2606           if (cmp_status->ncomps > 0)
2607             cmp_status->state = COMPOSING_COMPONENT_CHAR;
2608           else
2609             EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2610         }
2611       continue;
2612
2613     invalid_code:
2614       EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2615       src = src_base;
2616       consumed_chars = consumed_chars_base;
2617       ONE_MORE_BYTE (c);
2618       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2619       char_offset++;
2620       coding->errors++;
2621     }
2622
2623  no_more_source:
2624   if (cmp_status->state != COMPOSING_NO)
2625     {
2626       if (coding->mode & CODING_MODE_LAST_BLOCK)
2627         EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2628       else
2629         {
2630           int i;
2631
2632           charbuf -= cmp_status->length;
2633           for (i = 0; i < cmp_status->length; i++)
2634             cmp_status->carryover[i] = charbuf[i];
2635         }
2636     }
2637   if (last_id != charset_ascii)
2638     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2639   coding->consumed_char += consumed_chars_base;
2640   coding->consumed = src_base - coding->source;
2641   coding->charbuf_used = charbuf - coding->charbuf;
2642 }
2643
2644
2645 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2646   do {                                          \
2647     if (id < 0xA0)                              \
2648       codes[0] = id, codes[1] = 0;              \
2649     else if (id < 0xE0)                         \
2650       codes[0] = 0x9A, codes[1] = id;           \
2651     else if (id < 0xF0)                         \
2652       codes[0] = 0x9B, codes[1] = id;           \
2653     else if (id < 0xF5)                         \
2654       codes[0] = 0x9C, codes[1] = id;           \
2655     else                                        \
2656       codes[0] = 0x9D, codes[1] = id;           \
2657   } while (0);
2658
2659
2660 static int
2661 encode_coding_emacs_mule (struct coding_system *coding)
2662 {
2663   int multibytep = coding->dst_multibyte;
2664   int *charbuf = coding->charbuf;
2665   int *charbuf_end = charbuf + coding->charbuf_used;
2666   unsigned char *dst = coding->destination + coding->produced;
2667   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2668   int safe_room = 8;
2669   ptrdiff_t produced_chars = 0;
2670   Lisp_Object attrs, charset_list;
2671   int c;
2672   int preferred_charset_id = -1;
2673
2674   CODING_GET_INFO (coding, attrs, charset_list);
2675   if (! EQ (charset_list, Vemacs_mule_charset_list))
2676     {
2677       CODING_ATTR_CHARSET_LIST (attrs)
2678         = charset_list = Vemacs_mule_charset_list;
2679     }
2680
2681   while (charbuf < charbuf_end)
2682     {
2683       ASSURE_DESTINATION (safe_room);
2684       c = *charbuf++;
2685
2686       if (c < 0)
2687         {
2688           /* Handle an annotation.  */
2689           switch (*charbuf)
2690             {
2691             case CODING_ANNOTATE_COMPOSITION_MASK:
2692               /* Not yet implemented.  */
2693               break;
2694             case CODING_ANNOTATE_CHARSET_MASK:
2695               preferred_charset_id = charbuf[3];
2696               if (preferred_charset_id >= 0
2697                   && NILP (Fmemq (make_number (preferred_charset_id),
2698                                   charset_list)))
2699                 preferred_charset_id = -1;
2700               break;
2701             default:
2702               abort ();
2703             }
2704           charbuf += -c - 1;
2705           continue;
2706         }
2707
2708       if (ASCII_CHAR_P (c))
2709         EMIT_ONE_ASCII_BYTE (c);
2710       else if (CHAR_BYTE8_P (c))
2711         {
2712           c = CHAR_TO_BYTE8 (c);
2713           EMIT_ONE_BYTE (c);
2714         }
2715       else
2716         {
2717           struct charset *charset;
2718           unsigned code;
2719           int dimension;
2720           int emacs_mule_id;
2721           unsigned char leading_codes[2];
2722
2723           if (preferred_charset_id >= 0)
2724             {
2725               int result;
2726
2727               charset = CHARSET_FROM_ID (preferred_charset_id);
2728               CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result);
2729               if (result)
2730                 code = ENCODE_CHAR (charset, c);
2731               else
2732                 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
2733                                      &code, charset);
2734             }
2735           else
2736             CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
2737                                  &code, charset);
2738           if (! charset)
2739             {
2740               c = coding->default_char;
2741               if (ASCII_CHAR_P (c))
2742                 {
2743                   EMIT_ONE_ASCII_BYTE (c);
2744                   continue;
2745                 }
2746               CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
2747                                    &code, charset);
2748             }
2749           dimension = CHARSET_DIMENSION (charset);
2750           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2751           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2752           EMIT_ONE_BYTE (leading_codes[0]);
2753           if (leading_codes[1])
2754             EMIT_ONE_BYTE (leading_codes[1]);
2755           if (dimension == 1)
2756             EMIT_ONE_BYTE (code | 0x80);
2757           else
2758             {
2759               code |= 0x8080;
2760               EMIT_ONE_BYTE (code >> 8);
2761               EMIT_ONE_BYTE (code & 0xFF);
2762             }
2763         }
2764     }
2765   record_conversion_result (coding, CODING_RESULT_SUCCESS);
2766   coding->produced_char += produced_chars;
2767   coding->produced = dst - coding->destination;
2768   return 0;
2769 }
2770
2771 \f
2772 /*** 7. ISO2022 handlers ***/
2773
2774 /* The following note describes the coding system ISO2022 briefly.
2775    Since the intention of this note is to help understand the
2776    functions in this file, some parts are NOT ACCURATE or are OVERLY
2777    SIMPLIFIED.  For thorough understanding, please refer to the
2778    original document of ISO2022.  This is equivalent to the standard
2779    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2780
2781    ISO2022 provides many mechanisms to encode several character sets
2782    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2783    is encoded using bytes less than 128.  This may make the encoded
2784    text a little bit longer, but the text passes more easily through
2785    several types of gateway, some of which strip off the MSB (Most
2786    Significant Bit).
2787
2788    There are two kinds of character sets: control character sets and
2789    graphic character sets.  The former contain control characters such
2790    as `newline' and `escape' to provide control functions (control
2791    functions are also provided by escape sequences).  The latter
2792    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2793    two control character sets and many graphic character sets.
2794
2795    Graphic character sets are classified into one of the following
2796    four classes, according to the number of bytes (DIMENSION) and
2797    number of characters in one dimension (CHARS) of the set:
2798    - DIMENSION1_CHARS94
2799    - DIMENSION1_CHARS96
2800    - DIMENSION2_CHARS94
2801    - DIMENSION2_CHARS96
2802
2803    In addition, each character set is assigned an identification tag,
2804    unique for each set, called the "final character" (denoted as <F>
2805    hereafter).  The <F> of each character set is decided by ECMA(*)
2806    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2807    (0x30..0x3F are for private use only).
2808
2809    Note (*): ECMA = European Computer Manufacturers Association
2810
2811    Here are examples of graphic character sets [NAME(<F>)]:
2812         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2813         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2814         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2815         o DIMENSION2_CHARS96 -- none for the moment
2816
2817    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2818         C0 [0x00..0x1F] -- control character plane 0
2819         GL [0x20..0x7F] -- graphic character plane 0
2820         C1 [0x80..0x9F] -- control character plane 1
2821         GR [0xA0..0xFF] -- graphic character plane 1
2822
2823    A control character set is directly designated and invoked to C0 or
2824    C1 by an escape sequence.  The most common case is that:
2825    - ISO646's  control character set is designated/invoked to C0, and
2826    - ISO6429's control character set is designated/invoked to C1,
2827    and usually these designations/invocations are omitted in encoded
2828    text.  In a 7-bit environment, only C0 can be used, and a control
2829    character for C1 is encoded by an appropriate escape sequence to
2830    fit into the environment.  All control characters for C1 are
2831    defined to have corresponding escape sequences.
2832
2833    A graphic character set is at first designated to one of four
2834    graphic registers (G0 through G3), then these graphic registers are
2835    invoked to GL or GR.  These designations and invocations can be
2836    done independently.  The most common case is that G0 is invoked to
2837    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2838    these invocations and designations are omitted in encoded text.
2839    In a 7-bit environment, only GL can be used.
2840
2841    When a graphic character set of CHARS94 is invoked to GL, codes
2842    0x20 and 0x7F of the GL area work as control characters SPACE and
2843    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2844    be used.
2845
2846    There are two ways of invocation: locking-shift and single-shift.
2847    With locking-shift, the invocation lasts until the next different
2848    invocation, whereas with single-shift, the invocation affects the
2849    following character only and doesn't affect the locking-shift
2850    state.  Invocations are done by the following control characters or
2851    escape sequences:
2852
2853    ----------------------------------------------------------------------
2854    abbrev  function                  cntrl escape seq   description
2855    ----------------------------------------------------------------------
2856    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2857    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2858    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2859    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2860    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2861    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2862    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2863    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2864    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2865    ----------------------------------------------------------------------
2866    (*) These are not used by any known coding system.
2867
2868    Control characters for these functions are defined by macros
2869    ISO_CODE_XXX in `coding.h'.
2870
2871    Designations are done by the following escape sequences:
2872    ----------------------------------------------------------------------
2873    escape sequence      description
2874    ----------------------------------------------------------------------
2875    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2876    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2877    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2878    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2879    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2880    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2881    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2882    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2883    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2884    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2885    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2886    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2887    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2888    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2889    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2890    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2891    ----------------------------------------------------------------------
2892
2893    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2894    of dimension 1, chars 94, and final character <F>, etc...
2895
2896    Note (*): Although these designations are not allowed in ISO2022,
2897    Emacs accepts them on decoding, and produces them on encoding
2898    CHARS96 character sets in a coding system which is characterized as
2899    7-bit environment, non-locking-shift, and non-single-shift.
2900
2901    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2902    '(' must be omitted.  We refer to this as "short-form" hereafter.
2903
2904    Now you may notice that there are a lot of ways of encoding the
2905    same multilingual text in ISO2022.  Actually, there exist many
2906    coding systems such as Compound Text (used in X11's inter client
2907    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2908    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2909    localized platforms), and all of these are variants of ISO2022.
2910
2911    In addition to the above, Emacs handles two more kinds of escape
2912    sequences: ISO6429's direction specification and Emacs' private
2913    sequence for specifying character composition.
2914
2915    ISO6429's direction specification takes the following form:
2916         o CSI ']'      -- end of the current direction
2917         o CSI '0' ']'  -- end of the current direction
2918         o CSI '1' ']'  -- start of left-to-right text
2919         o CSI '2' ']'  -- start of right-to-left text
2920    The control character CSI (0x9B: control sequence introducer) is
2921    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2922
2923    Character composition specification takes the following form:
2924         o ESC '0' -- start relative composition
2925         o ESC '1' -- end composition
2926         o ESC '2' -- start rule-base composition (*)
2927         o ESC '3' -- start relative composition with alternate chars  (**)
2928         o ESC '4' -- start rule-base composition with alternate chars  (**)
2929   Since these are not standard escape sequences of any ISO standard,
2930   the use of them with these meanings is restricted to Emacs only.
2931
2932   (*) This form is used only in Emacs 20.7 and older versions,
2933   but newer versions can safely decode it.
2934   (**) This form is used only in Emacs 21.1 and newer versions,
2935   and older versions can't decode it.
2936
2937   Here's a list of example usages of these composition escape
2938   sequences (categorized by `enum composition_method').
2939
2940   COMPOSITION_RELATIVE:
2941         ESC 0 CHAR [ CHAR ] ESC 1
2942   COMPOSITION_WITH_RULE:
2943         ESC 2 CHAR [ RULE CHAR ] ESC 1
2944   COMPOSITION_WITH_ALTCHARS:
2945         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2946   COMPOSITION_WITH_RULE_ALTCHARS:
2947         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2948
2949 static enum iso_code_class_type iso_code_class[256];
2950
2951 #define SAFE_CHARSET_P(coding, id)      \
2952   ((id) <= (coding)->max_charset_id     \
2953    && (coding)->safe_charsets[id] != 255)
2954
2955 static void
2956 setup_iso_safe_charsets (Lisp_Object attrs)
2957 {
2958   Lisp_Object charset_list, safe_charsets;
2959   Lisp_Object request;
2960   Lisp_Object reg_usage;
2961   Lisp_Object tail;
2962   EMACS_INT reg94, reg96;
2963   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2964   int max_charset_id;
2965
2966   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2967   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2968       && ! EQ (charset_list, Viso_2022_charset_list))
2969     {
2970       CODING_ATTR_CHARSET_LIST (attrs)
2971         = charset_list = Viso_2022_charset_list;
2972       ASET (attrs, coding_attr_safe_charsets, Qnil);
2973     }
2974
2975   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2976     return;
2977
2978   max_charset_id = 0;
2979   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2980     {
2981       int id = XINT (XCAR (tail));
2982       if (max_charset_id < id)
2983         max_charset_id = id;
2984     }
2985
2986   safe_charsets = make_uninit_string (max_charset_id + 1);
2987   memset (SDATA (safe_charsets), 255, max_charset_id + 1);
2988   request = AREF (attrs, coding_attr_iso_request);
2989   reg_usage = AREF (attrs, coding_attr_iso_usage);
2990   reg94 = XINT (XCAR (reg_usage));
2991   reg96 = XINT (XCDR (reg_usage));
2992
2993   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2994     {
2995       Lisp_Object id;
2996       Lisp_Object reg;
2997       struct charset *charset;
2998
2999       id = XCAR (tail);
3000       charset = CHARSET_FROM_ID (XINT (id));
3001       reg = Fcdr (Fassq (id, request));
3002       if (! NILP (reg))
3003         SSET (safe_charsets, XINT (id), XINT (reg));
3004       else if (charset->iso_chars_96)
3005         {
3006           if (reg96 < 4)
3007             SSET (safe_charsets, XINT (id), reg96);
3008         }
3009       else
3010         {
3011           if (reg94 < 4)
3012             SSET (safe_charsets, XINT (id), reg94);
3013         }
3014     }
3015   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
3016 }
3017
3018
3019 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3020    Check if a text is encoded in one of ISO-2022 based coding systems.
3021    If it is, return 1, else return 0.  */
3022
3023 static int
3024 detect_coding_iso_2022 (struct coding_system *coding,
3025                         struct coding_detection_info *detect_info)
3026 {
3027   const unsigned char *src = coding->source, *src_base = src;
3028   const unsigned char *src_end = coding->source + coding->src_bytes;
3029   int multibytep = coding->src_multibyte;
3030   int single_shifting = 0;
3031   int id;
3032   int c, c1;
3033   ptrdiff_t consumed_chars = 0;
3034   int i;
3035   int rejected = 0;
3036   int found = 0;
3037   int composition_count = -1;
3038
3039   detect_info->checked |= CATEGORY_MASK_ISO;
3040
3041   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
3042     {
3043       struct coding_system *this = &(coding_categories[i]);
3044       Lisp_Object attrs, val;
3045
3046       if (this->id < 0)
3047         continue;
3048       attrs = CODING_ID_ATTRS (this->id);
3049       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
3050           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Viso_2022_charset_list))
3051         setup_iso_safe_charsets (attrs);
3052       val = CODING_ATTR_SAFE_CHARSETS (attrs);
3053       this->max_charset_id = SCHARS (val) - 1;
3054       this->safe_charsets = SDATA (val);
3055     }
3056
3057   /* A coding system of this category is always ASCII compatible.  */
3058   src += coding->head_ascii;
3059
3060   while (rejected != CATEGORY_MASK_ISO)
3061     {
3062       src_base = src;
3063       ONE_MORE_BYTE (c);
3064       switch (c)
3065         {
3066         case ISO_CODE_ESC:
3067           if (inhibit_iso_escape_detection)
3068             break;
3069           single_shifting = 0;
3070           ONE_MORE_BYTE (c);
3071           if (c == 'N' || c == 'O')
3072             {
3073               /* ESC <Fe> for SS2 or SS3.  */
3074               single_shifting = 1;
3075               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
3076             }
3077           else if (c == '1')
3078             {
3079               /* End of composition.  */
3080               if (composition_count < 0
3081                   || composition_count > MAX_COMPOSITION_COMPONENTS)
3082                 /* Invalid */
3083                 break;
3084               composition_count = -1;
3085               found |= CATEGORY_MASK_ISO;
3086             }
3087           else if (c >= '0' && c <= '4')
3088             {
3089               /* ESC <Fp> for start/end composition.  */
3090               composition_count = 0;
3091             }
3092           else
3093             {
3094               if (c >= '(' && c <= '/')
3095                 {
3096                   /* Designation sequence for a charset of dimension 1.  */
3097                   ONE_MORE_BYTE (c1);
3098                   if (c1 < ' ' || c1 >= 0x80
3099                       || (id = iso_charset_table[0][c >= ','][c1]) < 0)
3100                     /* Invalid designation sequence.  Just ignore.  */
3101                     break;
3102                 }
3103               else if (c == '$')
3104                 {
3105                   /* Designation sequence for a charset of dimension 2.  */
3106                   ONE_MORE_BYTE (c);
3107                   if (c >= '@' && c <= 'B')
3108                     /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
3109                     id = iso_charset_table[1][0][c];
3110                   else if (c >= '(' && c <= '/')
3111                     {
3112                       ONE_MORE_BYTE (c1);
3113                       if (c1 < ' ' || c1 >= 0x80
3114                           || (id = iso_charset_table[1][c >= ','][c1]) < 0)
3115                         /* Invalid designation sequence.  Just ignore.  */
3116                         break;
3117                     }
3118                   else
3119                     /* Invalid designation sequence.  Just ignore it.  */
3120                     break;
3121                 }
3122               else
3123                 {
3124                   /* Invalid escape sequence.  Just ignore it.  */
3125                   break;
3126                 }
3127
3128               /* We found a valid designation sequence for CHARSET.  */
3129               rejected |= CATEGORY_MASK_ISO_8BIT;
3130               if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
3131                                   id))
3132                 found |= CATEGORY_MASK_ISO_7;
3133               else
3134                 rejected |= CATEGORY_MASK_ISO_7;
3135               if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
3136                                   id))
3137                 found |= CATEGORY_MASK_ISO_7_TIGHT;
3138               else
3139                 rejected |= CATEGORY_MASK_ISO_7_TIGHT;
3140               if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
3141                                   id))
3142                 found |= CATEGORY_MASK_ISO_7_ELSE;
3143               else
3144                 rejected |= CATEGORY_MASK_ISO_7_ELSE;
3145               if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
3146                                   id))
3147                 found |= CATEGORY_MASK_ISO_8_ELSE;
3148               else
3149                 rejected |= CATEGORY_MASK_ISO_8_ELSE;
3150             }
3151           break;
3152
3153         case ISO_CODE_SO:
3154         case ISO_CODE_SI:
3155           /* Locking shift out/in.  */
3156           if (inhibit_iso_escape_detection)
3157             break;
3158           single_shifting = 0;
3159           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
3160           break;
3161
3162         case ISO_CODE_CSI:
3163           /* Control sequence introducer.  */
3164           single_shifting = 0;
3165           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3166           found |= CATEGORY_MASK_ISO_8_ELSE;
3167           goto check_extra_latin;
3168
3169         case ISO_CODE_SS2:
3170         case ISO_CODE_SS3:
3171           /* Single shift.   */
3172           if (inhibit_iso_escape_detection)
3173             break;
3174           single_shifting = 0;
3175           rejected |= CATEGORY_MASK_ISO_7BIT;
3176           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3177               & CODING_ISO_FLAG_SINGLE_SHIFT)
3178             {
3179               found |= CATEGORY_MASK_ISO_8_1;
3180               single_shifting = 1;
3181             }
3182           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
3183               & CODING_ISO_FLAG_SINGLE_SHIFT)
3184             {
3185               found |= CATEGORY_MASK_ISO_8_2;
3186               single_shifting = 1;
3187             }
3188           if (single_shifting)
3189             break;
3190         check_extra_latin:
3191           if (! VECTORP (Vlatin_extra_code_table)
3192               || NILP (AREF (Vlatin_extra_code_table, c)))
3193             {
3194               rejected = CATEGORY_MASK_ISO;
3195               break;
3196             }
3197           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3198               & CODING_ISO_FLAG_LATIN_EXTRA)
3199             found |= CATEGORY_MASK_ISO_8_1;
3200           else
3201             rejected |= CATEGORY_MASK_ISO_8_1;
3202           rejected |= CATEGORY_MASK_ISO_8_2;
3203           break;
3204
3205         default:
3206           if (c < 0)
3207             continue;
3208           if (c < 0x80)
3209             {
3210               if (composition_count >= 0)
3211                 composition_count++;
3212               single_shifting = 0;
3213               break;
3214             }
3215           if (c >= 0xA0)
3216             {
3217               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3218               found |= CATEGORY_MASK_ISO_8_1;
3219               /* Check the length of succeeding codes of the range
3220                  0xA0..0FF.  If the byte length is even, we include
3221                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
3222                  only when we are not single shifting.  */
3223               if (! single_shifting
3224                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
3225                 {
3226                   int len = 1;
3227                   while (src < src_end)
3228                     {
3229                       src_base = src;
3230                       ONE_MORE_BYTE (c);
3231                       if (c < 0xA0)
3232                         {
3233                           src = src_base;
3234                           break;
3235                         }
3236                       len++;
3237                     }
3238
3239                   if (len & 1 && src < src_end)
3240                     {
3241                       rejected |= CATEGORY_MASK_ISO_8_2;
3242                       if (composition_count >= 0)
3243                         composition_count += len;
3244                     }
3245                   else
3246                     {
3247                       found |= CATEGORY_MASK_ISO_8_2;
3248                       if (composition_count >= 0)
3249                         composition_count += len / 2;
3250                     }
3251                 }
3252               break;
3253             }
3254         }
3255     }
3256   detect_info->rejected |= CATEGORY_MASK_ISO;
3257   return 0;
3258
3259  no_more_source:
3260   detect_info->rejected |= rejected;
3261   detect_info->found |= (found & ~rejected);
3262   return 1;
3263 }
3264
3265
3266 /* Set designation state into CODING.  Set CHARS_96 to -1 if the
3267    escape sequence should be kept.  */
3268 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
3269   do {                                                                  \
3270     int id, prev;                                                       \
3271                                                                         \
3272     if (final < '0' || final >= 128                                     \
3273         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
3274         || !SAFE_CHARSET_P (coding, id))                                \
3275       {                                                                 \
3276         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
3277         chars_96 = -1;                                                  \
3278         break;                                                          \
3279       }                                                                 \
3280     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
3281     if (id == charset_jisx0201_roman)                                   \
3282       {                                                                 \
3283         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
3284           id = charset_ascii;                                           \
3285       }                                                                 \
3286     else if (id == charset_jisx0208_1978)                               \
3287       {                                                                 \
3288         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
3289           id = charset_jisx0208;                                        \
3290       }                                                                 \
3291     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
3292     /* If there was an invalid designation to REG previously, and this  \
3293        designation is ASCII to REG, we should keep this designation     \
3294        sequence.  */                                                    \
3295     if (prev == -2 && id == charset_ascii)                              \
3296       chars_96 = -1;                                                    \
3297   } while (0)
3298
3299
3300 /* Handle these composition sequence (ALT: alternate char):
3301
3302    (1) relative composition: ESC 0 CHAR ... ESC 1
3303    (2) rulebase composition: ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3304    (3) altchar composition:  ESC 3 ALT ... ALT ESC 0 CHAR ... ESC 1
3305    (4) alt&rule composition: ESC 4 ALT RULE ... ALT ESC 0 CHAR ... ESC 1
3306
3307    When the start sequence (ESC 0/2/3/4) is found, this annotation
3308    header is produced.
3309
3310         [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) 0 METHOD ]
3311
3312    Then, upon reading CHAR or RULE (one or two bytes), these codes are
3313    produced until the end sequence (ESC 1) is found:
3314
3315    (1) CHAR ... CHAR
3316    (2) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
3317    (3) ALT ... ALT -1 -1 CHAR ... CHAR
3318    (4) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT -1 -1 CHAR ... CHAR
3319
3320    When the end sequence (ESC 1) is found, LENGTH and NCHARS in the
3321    annotation header is updated as below:
3322
3323    (1) LENGTH: unchanged,  NCHARS: number of CHARs
3324    (2) LENGTH: unchanged,  NCHARS: number of CHARs
3325    (3) LENGTH: += number of ALTs + 2,  NCHARS: number of CHARs
3326    (4) LENGTH: += number of ALTs * 3,  NCHARS: number of CHARs
3327
3328    If an error is found while composing, the annotation header is
3329    changed to:
3330
3331         [ ESC '0'/'2'/'3'/'4' -2 0 ]
3332
3333    and the sequence [ -2 DECODED-RULE ] is changed to the original
3334    byte sequence as below:
3335         o the original byte sequence is B: [ B -1 ]
3336         o the original byte sequence is B1 B2: [ B1 B2 ]
3337    and the sequence [ -1 -1 ] is changed to the original byte
3338    sequence:
3339         [ ESC '0' ]
3340 */
3341
3342 /* Decode a composition rule C1 and maybe one more byte from the
3343    source, and set RULE to the encoded composition rule.  If the rule
3344    is invalid, goto invalid_code.  */
3345
3346 #define DECODE_COMPOSITION_RULE(rule)                                   \
3347   do {                                                                  \
3348     rule = c1 - 32;                                                     \
3349     if (rule < 0)                                                       \
3350       goto invalid_code;                                                \
3351     if (rule < 81)              /* old format (before ver.21) */        \
3352       {                                                                 \
3353         int gref = (rule) / 9;                                          \
3354         int nref = (rule) % 9;                                          \
3355         if (gref == 4) gref = 10;                                       \
3356         if (nref == 4) nref = 10;                                       \
3357         rule = COMPOSITION_ENCODE_RULE (gref, nref);                    \
3358       }                                                                 \
3359     else                        /* new format (after ver.21) */         \
3360       {                                                                 \
3361         int b;                                                          \
3362                                                                         \
3363         ONE_MORE_BYTE (b);                                              \
3364         if (! COMPOSITION_ENCODE_RULE_VALID (rule - 81, b - 32))        \
3365           goto invalid_code;                                            \
3366         rule = COMPOSITION_ENCODE_RULE (rule - 81, b - 32);             \
3367         rule += 0x100;   /* Distinguish it from the old format.  */     \
3368       }                                                                 \
3369   } while (0)
3370
3371 #define ENCODE_COMPOSITION_RULE(rule)                           \
3372   do {                                                          \
3373     int gref = (rule % 0x100) / 12, nref = (rule % 0x100) % 12; \
3374                                                                 \
3375     if (rule < 0x100)           /* old format */                \
3376       {                                                         \
3377         if (gref == 10) gref = 4;                               \
3378         if (nref == 10) nref = 4;                               \
3379         charbuf[idx] = 32 + gref * 9 + nref;                    \
3380         charbuf[idx + 1] = -1;                                  \
3381         new_chars++;                                            \
3382       }                                                         \
3383     else                                /* new format */        \
3384       {                                                         \
3385         charbuf[idx] = 32 + 81 + gref;                          \
3386         charbuf[idx + 1] = 32 + nref;                           \
3387         new_chars += 2;                                         \
3388       }                                                         \
3389   } while (0)
3390
3391 /* Finish the current composition as invalid.  */
3392
3393 static int finish_composition (int *, struct composition_status *);
3394
3395 static int
3396 finish_composition (int *charbuf, struct composition_status *cmp_status)
3397 {
3398   int idx = - cmp_status->length;
3399   int new_chars;
3400
3401   /* Recover the original ESC sequence */
3402   charbuf[idx++] = ISO_CODE_ESC;
3403   charbuf[idx++] = (cmp_status->method == COMPOSITION_RELATIVE ? '0'
3404                     : cmp_status->method == COMPOSITION_WITH_RULE ? '2'
3405                     : cmp_status->method == COMPOSITION_WITH_ALTCHARS ? '3'
3406                     /* cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS */
3407                     : '4');
3408   charbuf[idx++] = -2;
3409   charbuf[idx++] = 0;
3410   charbuf[idx++] = -1;
3411   new_chars = cmp_status->nchars;
3412   if (cmp_status->method >= COMPOSITION_WITH_RULE)
3413     for (; idx < 0; idx++)
3414       {
3415         int elt = charbuf[idx];
3416
3417         if (elt == -2)
3418           {
3419             ENCODE_COMPOSITION_RULE (charbuf[idx + 1]);
3420             idx++;
3421           }
3422         else if (elt == -1)
3423           {
3424             charbuf[idx++] = ISO_CODE_ESC;
3425             charbuf[idx] = '0';
3426             new_chars += 2;
3427           }
3428       }
3429   cmp_status->state = COMPOSING_NO;
3430   return new_chars;
3431 }
3432
3433 /* If characters are under composition, finish the composition.  */
3434 #define MAYBE_FINISH_COMPOSITION()                              \
3435   do {                                                          \
3436     if (cmp_status->state != COMPOSING_NO)                      \
3437       char_offset += finish_composition (charbuf, cmp_status);  \
3438   } while (0)
3439
3440 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
3441
3442    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
3443    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3444    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
3445    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
3446
3447    Produce this annotation sequence now:
3448
3449    [ -LENGTH(==-4) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) METHOD ]
3450 */
3451
3452 #define DECODE_COMPOSITION_START(c1)                                       \
3453   do {                                                                     \
3454     if (c1 == '0'                                                          \
3455         && ((cmp_status->state == COMPOSING_COMPONENT_CHAR                 \
3456              && cmp_status->method == COMPOSITION_WITH_ALTCHARS)           \
3457             || (cmp_status->state == COMPOSING_COMPONENT_RULE              \
3458                 && cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS))) \
3459       {                                                                    \
3460         *charbuf++ = -1;                                                   \
3461         *charbuf++= -1;                                                    \
3462         cmp_status->state = COMPOSING_CHAR;                                \
3463         cmp_status->length += 2;                                           \
3464       }                                                                    \
3465     else                                                                   \
3466       {                                                                    \
3467         MAYBE_FINISH_COMPOSITION ();                                       \
3468         cmp_status->method = (c1 == '0' ? COMPOSITION_RELATIVE             \
3469                               : c1 == '2' ? COMPOSITION_WITH_RULE          \
3470                               : c1 == '3' ? COMPOSITION_WITH_ALTCHARS      \
3471                               : COMPOSITION_WITH_RULE_ALTCHARS);           \
3472         cmp_status->state                                                  \
3473           = (c1 <= '2' ? COMPOSING_CHAR : COMPOSING_COMPONENT_CHAR);       \
3474         ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);          \
3475         cmp_status->length = MAX_ANNOTATION_LENGTH;                        \
3476         cmp_status->nchars = cmp_status->ncomps = 0;                       \
3477         coding->annotated = 1;                                             \
3478       }                                                                    \
3479   } while (0)
3480
3481
3482 /* Handle composition end sequence ESC 1.  */
3483
3484 #define DECODE_COMPOSITION_END()                                        \
3485   do {                                                                  \
3486     if (cmp_status->nchars == 0                                         \
3487         || ((cmp_status->state == COMPOSING_CHAR)                       \
3488             == (cmp_status->method == COMPOSITION_WITH_RULE)))          \
3489       {                                                                 \
3490         MAYBE_FINISH_COMPOSITION ();                                    \
3491         goto invalid_code;                                              \
3492       }                                                                 \
3493     if (cmp_status->method == COMPOSITION_WITH_ALTCHARS)                \
3494       charbuf[- cmp_status->length] -= cmp_status->ncomps + 2;          \
3495     else if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)      \
3496       charbuf[- cmp_status->length] -= cmp_status->ncomps * 3;          \
3497     charbuf[- cmp_status->length + 2] = cmp_status->nchars;             \
3498     char_offset += cmp_status->nchars;                                  \
3499     cmp_status->state = COMPOSING_NO;                                   \
3500   } while (0)
3501
3502 /* Store a composition rule RULE in charbuf, and update cmp_status.  */
3503
3504 #define STORE_COMPOSITION_RULE(rule)    \
3505   do {                                  \
3506     *charbuf++ = -2;                    \
3507     *charbuf++ = rule;                  \
3508     cmp_status->length += 2;            \
3509     cmp_status->state--;                \
3510   } while (0)
3511
3512 /* Store a composed char or a component char C in charbuf, and update
3513    cmp_status.  */
3514
3515 #define STORE_COMPOSITION_CHAR(c)                                       \
3516   do {                                                                  \
3517     *charbuf++ = (c);                                                   \
3518     cmp_status->length++;                                               \
3519     if (cmp_status->state == COMPOSING_CHAR)                            \
3520       cmp_status->nchars++;                                             \
3521     else                                                                \
3522       cmp_status->ncomps++;                                             \
3523     if (cmp_status->method == COMPOSITION_WITH_RULE                     \
3524         || (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS        \
3525             && cmp_status->state == COMPOSING_COMPONENT_CHAR))          \
3526       cmp_status->state++;                                              \
3527   } while (0)
3528
3529
3530 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
3531
3532 static void
3533 decode_coding_iso_2022 (struct coding_system *coding)
3534 {
3535   const unsigned char *src = coding->source + coding->consumed;
3536   const unsigned char *src_end = coding->source + coding->src_bytes;
3537   const unsigned char *src_base;
3538   int *charbuf = coding->charbuf + coding->charbuf_used;
3539   /* We may produce two annotations (charset and composition) in one
3540      loop and one more charset annotation at the end.  */
3541   int *charbuf_end
3542     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
3543   ptrdiff_t consumed_chars = 0, consumed_chars_base;
3544   int multibytep = coding->src_multibyte;
3545   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
3546   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3547   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3548   int charset_id_2, charset_id_3;
3549   struct charset *charset;
3550   int c;
3551   struct composition_status *cmp_status = CODING_ISO_CMP_STATUS (coding);
3552   Lisp_Object attrs = CODING_ID_ATTRS (coding->id);
3553   ptrdiff_t char_offset = coding->produced_char;
3554   ptrdiff_t last_offset = char_offset;
3555   int last_id = charset_ascii;
3556   int eol_dos =
3557     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
3558   int byte_after_cr = -1;
3559   int i;
3560
3561   setup_iso_safe_charsets (attrs);
3562   coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
3563
3564   if (cmp_status->state != COMPOSING_NO)
3565     {
3566       if (charbuf_end - charbuf < cmp_status->length)
3567         abort ();
3568       for (i = 0; i < cmp_status->length; i++)
3569         *charbuf++ = cmp_status->carryover[i];
3570       coding->annotated = 1;
3571     }
3572
3573   while (1)
3574     {
3575       int c1, c2, c3;
3576
3577       src_base = src;
3578       consumed_chars_base = consumed_chars;
3579
3580       if (charbuf >= charbuf_end)
3581         {
3582           if (byte_after_cr >= 0)
3583             src_base--;
3584           break;
3585         }
3586
3587       if (byte_after_cr >= 0)
3588         c1 = byte_after_cr, byte_after_cr = -1;
3589       else
3590         ONE_MORE_BYTE (c1);
3591       if (c1 < 0)
3592         goto invalid_code;
3593
3594       if (CODING_ISO_EXTSEGMENT_LEN (coding) > 0)
3595         {
3596           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3597           char_offset++;
3598           CODING_ISO_EXTSEGMENT_LEN (coding)--;
3599           continue;
3600         }
3601
3602       if (CODING_ISO_EMBEDDED_UTF_8 (coding))
3603         {
3604           if (c1 == ISO_CODE_ESC)
3605             {
3606               if (src + 1 >= src_end)
3607                 goto no_more_source;
3608               *charbuf++ = ISO_CODE_ESC;
3609               char_offset++;
3610               if (src[0] == '%' && src[1] == '@')
3611                 {
3612                   src += 2;
3613                   consumed_chars += 2;
3614                   char_offset += 2;
3615                   /* We are sure charbuf can contain two more chars. */
3616                   *charbuf++ = '%';
3617                   *charbuf++ = '@';
3618                   CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
3619                 }
3620             }
3621           else
3622             {
3623               *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3624               char_offset++;
3625             }
3626           continue;
3627         }
3628
3629       if ((cmp_status->state == COMPOSING_RULE
3630            || cmp_status->state == COMPOSING_COMPONENT_RULE)
3631           && c1 != ISO_CODE_ESC)
3632         {
3633           int rule;
3634
3635           DECODE_COMPOSITION_RULE (rule);
3636           STORE_COMPOSITION_RULE (rule);
3637           continue;
3638         }
3639
3640       /* We produce at most one character.  */
3641       switch (iso_code_class [c1])
3642         {
3643         case ISO_0x20_or_0x7F:
3644           if (charset_id_0 < 0
3645               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
3646             /* This is SPACE or DEL.  */
3647             charset = CHARSET_FROM_ID (charset_ascii);
3648           else
3649             charset = CHARSET_FROM_ID (charset_id_0);
3650           break;
3651
3652         case ISO_graphic_plane_0:
3653           if (charset_id_0 < 0)
3654             charset = CHARSET_FROM_ID (charset_ascii);
3655           else
3656             charset = CHARSET_FROM_ID (charset_id_0);
3657           break;
3658
3659         case ISO_0xA0_or_0xFF:
3660           if (charset_id_1 < 0
3661               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
3662               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3663             goto invalid_code;
3664           /* This is a graphic character, we fall down ... */
3665
3666         case ISO_graphic_plane_1:
3667           if (charset_id_1 < 0)
3668             goto invalid_code;
3669           charset = CHARSET_FROM_ID (charset_id_1);
3670           break;
3671
3672         case ISO_control_0:
3673           if (eol_dos && c1 == '\r')
3674             ONE_MORE_BYTE (byte_after_cr);
3675           MAYBE_FINISH_COMPOSITION ();
3676           charset = CHARSET_FROM_ID (charset_ascii);
3677           break;
3678
3679         case ISO_control_1:
3680           goto invalid_code;
3681
3682         case ISO_shift_out:
3683           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3684               || CODING_ISO_DESIGNATION (coding, 1) < 0)
3685             goto invalid_code;
3686           CODING_ISO_INVOCATION (coding, 0) = 1;
3687           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3688           continue;
3689
3690         case ISO_shift_in:
3691           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3692             goto invalid_code;
3693           CODING_ISO_INVOCATION (coding, 0) = 0;
3694           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3695           continue;
3696
3697         case ISO_single_shift_2_7:
3698           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS))
3699             goto invalid_code;
3700         case ISO_single_shift_2:
3701           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3702             goto invalid_code;
3703           /* SS2 is handled as an escape sequence of ESC 'N' */
3704           c1 = 'N';
3705           goto label_escape_sequence;
3706
3707         case ISO_single_shift_3:
3708           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3709             goto invalid_code;
3710           /* SS2 is handled as an escape sequence of ESC 'O' */
3711           c1 = 'O';
3712           goto label_escape_sequence;
3713
3714         case ISO_control_sequence_introducer:
3715           /* CSI is handled as an escape sequence of ESC '[' ...  */
3716           c1 = '[';
3717           goto label_escape_sequence;
3718
3719         case ISO_escape:
3720           ONE_MORE_BYTE (c1);
3721         label_escape_sequence:
3722           /* Escape sequences handled here are invocation,
3723              designation, direction specification, and character
3724              composition specification.  */
3725           switch (c1)
3726             {
3727             case '&':           /* revision of following character set */
3728               ONE_MORE_BYTE (c1);
3729               if (!(c1 >= '@' && c1 <= '~'))
3730                 goto invalid_code;
3731               ONE_MORE_BYTE (c1);
3732               if (c1 != ISO_CODE_ESC)
3733                 goto invalid_code;
3734               ONE_MORE_BYTE (c1);
3735               goto label_escape_sequence;
3736
3737             case '$':           /* designation of 2-byte character set */
3738               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3739                 goto invalid_code;
3740               {
3741                 int reg, chars96;
3742
3743                 ONE_MORE_BYTE (c1);
3744                 if (c1 >= '@' && c1 <= 'B')
3745                   {     /* designation of JISX0208.1978, GB2312.1980,
3746                            or JISX0208.1980 */
3747                     reg = 0, chars96 = 0;
3748                   }
3749                 else if (c1 >= 0x28 && c1 <= 0x2B)
3750                   { /* designation of DIMENSION2_CHARS94 character set */
3751                     reg = c1 - 0x28, chars96 = 0;
3752                     ONE_MORE_BYTE (c1);
3753                   }
3754                 else if (c1 >= 0x2C && c1 <= 0x2F)
3755                   { /* designation of DIMENSION2_CHARS96 character set */
3756                     reg = c1 - 0x2C, chars96 = 1;
3757                     ONE_MORE_BYTE (c1);
3758                   }
3759                 else
3760                   goto invalid_code;
3761                 DECODE_DESIGNATION (reg, 2, chars96, c1);
3762                 /* We must update these variables now.  */
3763                 if (reg == 0)
3764                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3765                 else if (reg == 1)
3766                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3767                 if (chars96 < 0)
3768                   goto invalid_code;
3769               }
3770               continue;
3771
3772             case 'n':           /* invocation of locking-shift-2 */
3773               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3774                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3775                 goto invalid_code;
3776               CODING_ISO_INVOCATION (coding, 0) = 2;
3777               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3778               continue;
3779
3780             case 'o':           /* invocation of locking-shift-3 */
3781               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3782                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3783                 goto invalid_code;
3784               CODING_ISO_INVOCATION (coding, 0) = 3;
3785               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3786               continue;
3787
3788             case 'N':           /* invocation of single-shift-2 */
3789               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3790                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3791                 goto invalid_code;
3792               charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3793               if (charset_id_2 < 0)
3794                 charset = CHARSET_FROM_ID (charset_ascii);
3795               else
3796                 charset = CHARSET_FROM_ID (charset_id_2);
3797               ONE_MORE_BYTE (c1);
3798               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3799                 goto invalid_code;
3800               break;
3801
3802             case 'O':           /* invocation of single-shift-3 */
3803               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3804                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3805                 goto invalid_code;
3806               charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3807               if (charset_id_3 < 0)
3808                 charset = CHARSET_FROM_ID (charset_ascii);
3809               else
3810                 charset = CHARSET_FROM_ID (charset_id_3);
3811               ONE_MORE_BYTE (c1);
3812               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3813                 goto invalid_code;
3814               break;
3815
3816             case '0': case '2': case '3': case '4': /* start composition */
3817               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3818                 goto invalid_code;
3819               if (last_id != charset_ascii)
3820                 {
3821                   ADD_CHARSET_DATA (charbuf, char_offset- last_offset, last_id);
3822                   last_id = charset_ascii;
3823                   last_offset = char_offset;
3824                 }
3825               DECODE_COMPOSITION_START (c1);
3826               continue;
3827
3828             case '1':           /* end composition */
3829               if (cmp_status->state == COMPOSING_NO)
3830                 goto invalid_code;
3831               DECODE_COMPOSITION_END ();
3832               continue;
3833
3834             case '[':           /* specification of direction */
3835               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION))
3836                 goto invalid_code;
3837               /* For the moment, nested direction is not supported.
3838                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3839                  left-to-right, and nonzero means right-to-left.  */
3840               ONE_MORE_BYTE (c1);
3841               switch (c1)
3842                 {
3843                 case ']':       /* end of the current direction */
3844                   coding->mode &= ~CODING_MODE_DIRECTION;
3845
3846                 case '0':       /* end of the current direction */
3847                 case '1':       /* start of left-to-right direction */
3848                   ONE_MORE_BYTE (c1);
3849                   if (c1 == ']')
3850                     coding->mode &= ~CODING_MODE_DIRECTION;
3851                   else
3852                     goto invalid_code;
3853                   break;
3854
3855                 case '2':       /* start of right-to-left direction */
3856                   ONE_MORE_BYTE (c1);
3857                   if (c1 == ']')
3858                     coding->mode |= CODING_MODE_DIRECTION;
3859                   else
3860                     goto invalid_code;
3861                   break;
3862
3863                 default:
3864                   goto invalid_code;
3865                 }
3866               continue;
3867
3868             case '%':
3869               ONE_MORE_BYTE (c1);
3870               if (c1 == '/')
3871                 {
3872                   /* CTEXT extended segment:
3873                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3874                      We keep these bytes as is for the moment.
3875                      They may be decoded by post-read-conversion.  */
3876                   int dim, M, L;
3877                   int size;
3878
3879                   ONE_MORE_BYTE (dim);
3880                   if (dim < '0' || dim > '4')
3881                     goto invalid_code;
3882                   ONE_MORE_BYTE (M);
3883                   if (M < 128)
3884                     goto invalid_code;
3885                   ONE_MORE_BYTE (L);
3886                   if (L < 128)
3887                     goto invalid_code;
3888                   size = ((M - 128) * 128) + (L - 128);
3889                   if (charbuf + 6 > charbuf_end)
3890                     goto break_loop;
3891                   *charbuf++ = ISO_CODE_ESC;
3892                   *charbuf++ = '%';
3893                   *charbuf++ = '/';
3894                   *charbuf++ = dim;
3895                   *charbuf++ = BYTE8_TO_CHAR (M);
3896                   *charbuf++ = BYTE8_TO_CHAR (L);
3897                   CODING_ISO_EXTSEGMENT_LEN (coding) = size;
3898                 }
3899               else if (c1 == 'G')
3900                 {
3901                   /* XFree86 extension for embedding UTF-8 in CTEXT:
3902                      ESC % G --UTF-8-BYTES-- ESC % @
3903                      We keep these bytes as is for the moment.
3904                      They may be decoded by post-read-conversion.  */
3905                   if (charbuf + 3 > charbuf_end)
3906                     goto break_loop;
3907                   *charbuf++ = ISO_CODE_ESC;
3908                   *charbuf++ = '%';
3909                   *charbuf++ = 'G';
3910                   CODING_ISO_EMBEDDED_UTF_8 (coding) = 1;
3911                 }
3912               else
3913                 goto invalid_code;
3914               continue;
3915               break;
3916
3917             default:
3918               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3919                 goto invalid_code;
3920               {
3921                 int reg, chars96;
3922
3923                 if (c1 >= 0x28 && c1 <= 0x2B)
3924                   { /* designation of DIMENSION1_CHARS94 character set */
3925                     reg = c1 - 0x28, chars96 = 0;
3926                     ONE_MORE_BYTE (c1);
3927                   }
3928                 else if (c1 >= 0x2C && c1 <= 0x2F)
3929                   { /* designation of DIMENSION1_CHARS96 character set */
3930                     reg = c1 - 0x2C, chars96 = 1;
3931                     ONE_MORE_BYTE (c1);
3932                   }
3933                 else
3934                   goto invalid_code;
3935                 DECODE_DESIGNATION (reg, 1, chars96, c1);
3936                 /* We must update these variables now.  */
3937                 if (reg == 0)
3938                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3939                 else if (reg == 1)
3940                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3941                 if (chars96 < 0)
3942                   goto invalid_code;
3943               }
3944               continue;
3945             }
3946           break;
3947
3948         default:
3949           abort ();
3950         }
3951
3952       if (cmp_status->state == COMPOSING_NO
3953           && charset->id != charset_ascii
3954           && last_id != charset->id)
3955         {
3956           if (last_id != charset_ascii)
3957             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3958           last_id = charset->id;
3959           last_offset = char_offset;
3960         }
3961
3962       /* Now we know CHARSET and 1st position code C1 of a character.
3963          Produce a decoded character while getting 2nd and 3rd
3964          position codes C2, C3 if necessary.  */
3965       if (CHARSET_DIMENSION (charset) > 1)
3966         {
3967           ONE_MORE_BYTE (c2);
3968           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0)
3969               || ((c1 & 0x80) != (c2 & 0x80)))
3970             /* C2 is not in a valid range.  */
3971             goto invalid_code;
3972           if (CHARSET_DIMENSION (charset) == 2)
3973             c1 = (c1 << 8) | c2;
3974           else
3975             {
3976               ONE_MORE_BYTE (c3);
3977               if (c3 < 0x20 || (c3 >= 0x80 && c3 < 0xA0)
3978                   || ((c1 & 0x80) != (c3 & 0x80)))
3979                 /* C3 is not in a valid range.  */
3980                 goto invalid_code;
3981               c1 = (c1 << 16) | (c2 << 8) | c2;
3982             }
3983         }
3984       c1 &= 0x7F7F7F;
3985       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3986       if (c < 0)
3987         {
3988           MAYBE_FINISH_COMPOSITION ();
3989           for (; src_base < src; src_base++, char_offset++)
3990             {
3991               if (ASCII_BYTE_P (*src_base))
3992                 *charbuf++ = *src_base;
3993               else
3994                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3995             }
3996         }
3997       else if (cmp_status->state == COMPOSING_NO)
3998         {
3999           *charbuf++ = c;
4000           char_offset++;
4001         }
4002       else if ((cmp_status->state == COMPOSING_CHAR
4003                 ? cmp_status->nchars
4004                 : cmp_status->ncomps)
4005                >= MAX_COMPOSITION_COMPONENTS)
4006         {
4007           /* Too long composition.  */
4008           MAYBE_FINISH_COMPOSITION ();
4009           *charbuf++ = c;
4010           char_offset++;
4011         }
4012       else
4013         STORE_COMPOSITION_CHAR (c);
4014       continue;
4015
4016     invalid_code:
4017       MAYBE_FINISH_COMPOSITION ();
4018       src = src_base;
4019       consumed_chars = consumed_chars_base;
4020       ONE_MORE_BYTE (c);
4021       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4022       char_offset++;
4023       coding->errors++;
4024       continue;
4025
4026     break_loop:
4027       break;
4028     }
4029
4030  no_more_source:
4031   if (cmp_status->state != COMPOSING_NO)
4032     {
4033       if (coding->mode & CODING_MODE_LAST_BLOCK)
4034         MAYBE_FINISH_COMPOSITION ();
4035       else
4036         {
4037           charbuf -= cmp_status->length;
4038           for (i = 0; i < cmp_status->length; i++)
4039             cmp_status->carryover[i] = charbuf[i];
4040         }
4041     }
4042   else if (last_id != charset_ascii)
4043     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4044   coding->consumed_char += consumed_chars_base;
4045   coding->consumed = src_base - coding->source;
4046   coding->charbuf_used = charbuf - coding->charbuf;
4047 }
4048
4049
4050 /* ISO2022 encoding stuff.  */
4051
4052 /*
4053    It is not enough to say just "ISO2022" on encoding, we have to
4054    specify more details.  In Emacs, each coding system of ISO2022
4055    variant has the following specifications:
4056         1. Initial designation to G0 thru G3.
4057         2. Allows short-form designation?
4058         3. ASCII should be designated to G0 before control characters?
4059         4. ASCII should be designated to G0 at end of line?
4060         5. 7-bit environment or 8-bit environment?
4061         6. Use locking-shift?
4062         7. Use Single-shift?
4063    And the following two are only for Japanese:
4064         8. Use ASCII in place of JIS0201-1976-Roman?
4065         9. Use JISX0208-1983 in place of JISX0208-1978?
4066    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
4067    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
4068    details.
4069 */
4070
4071 /* Produce codes (escape sequence) for designating CHARSET to graphic
4072    register REG at DST, and increment DST.  If <final-char> of CHARSET is
4073    '@', 'A', or 'B' and the coding system CODING allows, produce
4074    designation sequence of short-form.  */
4075
4076 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
4077   do {                                                                  \
4078     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
4079     const char *intermediate_char_94 = "()*+";                          \
4080     const char *intermediate_char_96 = ",-./";                          \
4081     int revision = -1;                                                  \
4082                                                                         \
4083     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
4084       revision = CHARSET_ISO_REVISION (charset);                        \
4085                                                                         \
4086     if (revision >= 0)                                                  \
4087       {                                                                 \
4088         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
4089         EMIT_ONE_BYTE ('@' + revision);                                 \
4090       }                                                                 \
4091     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
4092     if (CHARSET_DIMENSION (charset) == 1)                               \
4093       {                                                                 \
4094         int b;                                                          \
4095         if (! CHARSET_ISO_CHARS_96 (charset))                           \
4096           b = intermediate_char_94[reg];                                \
4097         else                                                            \
4098           b = intermediate_char_96[reg];                                \
4099         EMIT_ONE_ASCII_BYTE (b);                                        \
4100       }                                                                 \
4101     else                                                                \
4102       {                                                                 \
4103         EMIT_ONE_ASCII_BYTE ('$');                                      \
4104         if (! CHARSET_ISO_CHARS_96 (charset))                           \
4105           {                                                             \
4106             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
4107                 || reg != 0                                             \
4108                 || final_char < '@' || final_char > 'B')                \
4109               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
4110           }                                                             \
4111         else                                                            \
4112           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
4113       }                                                                 \
4114     EMIT_ONE_ASCII_BYTE (final_char);                                   \
4115                                                                         \
4116     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
4117   } while (0)
4118
4119
4120 /* The following two macros produce codes (control character or escape
4121    sequence) for ISO2022 single-shift functions (single-shift-2 and
4122    single-shift-3).  */
4123
4124 #define ENCODE_SINGLE_SHIFT_2                                           \
4125   do {                                                                  \
4126     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
4127       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
4128     else                                                                \
4129       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
4130     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
4131   } while (0)
4132
4133
4134 #define ENCODE_SINGLE_SHIFT_3                                           \
4135   do {                                                                  \
4136     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
4137       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
4138     else                                                                \
4139       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
4140     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
4141   } while (0)
4142
4143
4144 /* The following four macros produce codes (control character or
4145    escape sequence) for ISO2022 locking-shift functions (shift-in,
4146    shift-out, locking-shift-2, and locking-shift-3).  */
4147
4148 #define ENCODE_SHIFT_IN                                 \
4149   do {                                                  \
4150     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
4151     CODING_ISO_INVOCATION (coding, 0) = 0;              \
4152   } while (0)
4153
4154
4155 #define ENCODE_SHIFT_OUT                                \
4156   do {                                                  \
4157     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
4158     CODING_ISO_INVOCATION (coding, 0) = 1;              \
4159   } while (0)
4160
4161
4162 #define ENCODE_LOCKING_SHIFT_2                          \
4163   do {                                                  \
4164     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
4165     CODING_ISO_INVOCATION (coding, 0) = 2;              \
4166   } while (0)
4167
4168
4169 #define ENCODE_LOCKING_SHIFT_3                          \
4170   do {                                                  \
4171     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
4172     CODING_ISO_INVOCATION (coding, 0) = 3;              \
4173   } while (0)
4174
4175
4176 /* Produce codes for a DIMENSION1 character whose character set is
4177    CHARSET and whose position-code is C1.  Designation and invocation
4178    sequences are also produced in advance if necessary.  */
4179
4180 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
4181   do {                                                                  \
4182     int id = CHARSET_ID (charset);                                      \
4183                                                                         \
4184     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
4185         && id == charset_ascii)                                         \
4186       {                                                                 \
4187         id = charset_jisx0201_roman;                                    \
4188         charset = CHARSET_FROM_ID (id);                                 \
4189       }                                                                 \
4190                                                                         \
4191     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
4192       {                                                                 \
4193         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
4194           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
4195         else                                                            \
4196           EMIT_ONE_BYTE (c1 | 0x80);                                    \
4197         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
4198         break;                                                          \
4199       }                                                                 \
4200     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
4201       {                                                                 \
4202         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
4203         break;                                                          \
4204       }                                                                 \
4205     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
4206       {                                                                 \
4207         EMIT_ONE_BYTE (c1 | 0x80);                                      \
4208         break;                                                          \
4209       }                                                                 \
4210     else                                                                \
4211       /* Since CHARSET is not yet invoked to any graphic planes, we     \
4212          must invoke it, or, at first, designate it to some graphic     \
4213          register.  Then repeat the loop to actually produce the        \
4214          character.  */                                                 \
4215       dst = encode_invocation_designation (charset, coding, dst,        \
4216                                            &produced_chars);            \
4217   } while (1)
4218
4219
4220 /* Produce codes for a DIMENSION2 character whose character set is
4221    CHARSET and whose position-codes are C1 and C2.  Designation and
4222    invocation codes are also produced in advance if necessary.  */
4223
4224 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
4225   do {                                                                  \
4226     int id = CHARSET_ID (charset);                                      \
4227                                                                         \
4228     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
4229         && id == charset_jisx0208)                                      \
4230       {                                                                 \
4231         id = charset_jisx0208_1978;                                     \
4232         charset = CHARSET_FROM_ID (id);                                 \
4233       }                                                                 \
4234                                                                         \
4235     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
4236       {                                                                 \
4237         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
4238           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
4239         else                                                            \
4240           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
4241         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
4242         break;                                                          \
4243       }                                                                 \
4244     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
4245       {                                                                 \
4246         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
4247         break;                                                          \
4248       }                                                                 \
4249     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
4250       {                                                                 \
4251         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
4252         break;                                                          \
4253       }                                                                 \
4254     else                                                                \
4255       /* Since CHARSET is not yet invoked to any graphic planes, we     \
4256          must invoke it, or, at first, designate it to some graphic     \
4257          register.  Then repeat the loop to actually produce the        \
4258          character.  */                                                 \
4259       dst = encode_invocation_designation (charset, coding, dst,        \
4260                                            &produced_chars);            \
4261   } while (1)
4262
4263
4264 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
4265   do {                                                                     \
4266     unsigned code;                                                         \
4267     CODING_ENCODE_CHAR (coding, dst, dst_end, (charset), (c), code);       \
4268                                                                            \
4269     if (CHARSET_DIMENSION (charset) == 1)                                  \
4270       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
4271     else                                                                   \
4272       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
4273   } while (0)
4274
4275
4276 /* Produce designation and invocation codes at a place pointed by DST
4277    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
4278    Return new DST.  */
4279
4280 static unsigned char *
4281 encode_invocation_designation (struct charset *charset,
4282                                struct coding_system *coding,
4283                                unsigned char *dst, ptrdiff_t *p_nchars)
4284 {
4285   int multibytep = coding->dst_multibyte;
4286   ptrdiff_t produced_chars = *p_nchars;
4287   int reg;                      /* graphic register number */
4288   int id = CHARSET_ID (charset);
4289
4290   /* At first, check designations.  */
4291   for (reg = 0; reg < 4; reg++)
4292     if (id == CODING_ISO_DESIGNATION (coding, reg))
4293       break;
4294
4295   if (reg >= 4)
4296     {
4297       /* CHARSET is not yet designated to any graphic registers.  */
4298       /* At first check the requested designation.  */
4299       reg = CODING_ISO_REQUEST (coding, id);
4300       if (reg < 0)
4301         /* Since CHARSET requests no special designation, designate it
4302            to graphic register 0.  */
4303         reg = 0;
4304
4305       ENCODE_DESIGNATION (charset, reg, coding);
4306     }
4307
4308   if (CODING_ISO_INVOCATION (coding, 0) != reg
4309       && CODING_ISO_INVOCATION (coding, 1) != reg)
4310     {
4311       /* Since the graphic register REG is not invoked to any graphic
4312          planes, invoke it to graphic plane 0.  */
4313       switch (reg)
4314         {
4315         case 0:                 /* graphic register 0 */
4316           ENCODE_SHIFT_IN;
4317           break;
4318
4319         case 1:                 /* graphic register 1 */
4320           ENCODE_SHIFT_OUT;
4321           break;
4322
4323         case 2:                 /* graphic register 2 */
4324           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4325             ENCODE_SINGLE_SHIFT_2;
4326           else
4327             ENCODE_LOCKING_SHIFT_2;
4328           break;
4329
4330         case 3:                 /* graphic register 3 */
4331           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4332             ENCODE_SINGLE_SHIFT_3;
4333           else
4334             ENCODE_LOCKING_SHIFT_3;
4335           break;
4336         }
4337     }
4338
4339   *p_nchars = produced_chars;
4340   return dst;
4341 }
4342
4343
4344 /* Produce codes for designation and invocation to reset the graphic
4345    planes and registers to initial state.  */
4346 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
4347   do {                                                                  \
4348     int reg;                                                            \
4349     struct charset *charset;                                            \
4350                                                                         \
4351     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
4352       ENCODE_SHIFT_IN;                                                  \
4353     for (reg = 0; reg < 4; reg++)                                       \
4354       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
4355           && (CODING_ISO_DESIGNATION (coding, reg)                      \
4356               != CODING_ISO_INITIAL (coding, reg)))                     \
4357         {                                                               \
4358           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
4359           ENCODE_DESIGNATION (charset, reg, coding);                    \
4360         }                                                               \
4361   } while (0)
4362
4363
4364 /* Produce designation sequences of charsets in the line started from
4365    CHARBUF to a place pointed by DST, and return the number of
4366    produced bytes.  DST should not directly point a buffer text area
4367    which may be relocated by char_charset call.
4368
4369    If the current block ends before any end-of-line, we may fail to
4370    find all the necessary designations.  */
4371
4372 static ptrdiff_t
4373 encode_designation_at_bol (struct coding_system *coding,
4374                            int *charbuf, int *charbuf_end,
4375                            unsigned char *dst)
4376 {
4377   unsigned char *orig = dst;
4378   struct charset *charset;
4379   /* Table of charsets to be designated to each graphic register.  */
4380   int r[4];
4381   int c, found = 0, reg;
4382   ptrdiff_t produced_chars = 0;
4383   int multibytep = coding->dst_multibyte;
4384   Lisp_Object attrs;
4385   Lisp_Object charset_list;
4386
4387   attrs = CODING_ID_ATTRS (coding->id);
4388   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4389   if (EQ (charset_list, Qiso_2022))
4390     charset_list = Viso_2022_charset_list;
4391
4392   for (reg = 0; reg < 4; reg++)
4393     r[reg] = -1;
4394
4395   while (charbuf < charbuf_end && found < 4)
4396     {
4397       int id;
4398
4399       c = *charbuf++;
4400       if (c == '\n')
4401         break;
4402       charset = char_charset (c, charset_list, NULL);
4403       id = CHARSET_ID (charset);
4404       reg = CODING_ISO_REQUEST (coding, id);
4405       if (reg >= 0 && r[reg] < 0)
4406         {
4407           found++;
4408           r[reg] = id;
4409         }
4410     }
4411
4412   if (found)
4413     {
4414       for (reg = 0; reg < 4; reg++)
4415         if (r[reg] >= 0
4416             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
4417           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
4418     }
4419
4420   return dst - orig;
4421 }
4422
4423 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
4424
4425 static int
4426 encode_coding_iso_2022 (struct coding_system *coding)
4427 {
4428   int multibytep = coding->dst_multibyte;
4429   int *charbuf = coding->charbuf;
4430   int *charbuf_end = charbuf + coding->charbuf_used;
4431   unsigned char *dst = coding->destination + coding->produced;
4432   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4433   int safe_room = 16;
4434   int bol_designation
4435     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
4436        && CODING_ISO_BOL (coding));
4437   ptrdiff_t produced_chars = 0;
4438   Lisp_Object attrs, eol_type, charset_list;
4439   int ascii_compatible;
4440   int c;
4441   int preferred_charset_id = -1;
4442
4443   CODING_GET_INFO (coding, attrs, charset_list);
4444   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
4445   if (VECTORP (eol_type))
4446     eol_type = Qunix;
4447
4448   setup_iso_safe_charsets (attrs);
4449   /* Charset list may have been changed.  */
4450   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4451   coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
4452
4453   ascii_compatible
4454     = (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))
4455        && ! (CODING_ISO_FLAGS (coding) & (CODING_ISO_FLAG_DESIGNATION
4456                                           | CODING_ISO_FLAG_LOCKING_SHIFT)));
4457
4458   while (charbuf < charbuf_end)
4459     {
4460       ASSURE_DESTINATION (safe_room);
4461
4462       if (bol_designation)
4463         {
4464           /* We have to produce designation sequences if any now.  */
4465           unsigned char desig_buf[16];
4466           int nbytes;
4467           ptrdiff_t offset;
4468
4469           charset_map_loaded = 0;
4470           nbytes = encode_designation_at_bol (coding, charbuf, charbuf_end,
4471                                               desig_buf);
4472           if (charset_map_loaded
4473               && (offset = coding_change_destination (coding)))
4474             {
4475               dst += offset;
4476               dst_end += offset;
4477             }
4478           memcpy (dst, desig_buf, nbytes);
4479           dst += nbytes;
4480           /* We are sure that designation sequences are all ASCII bytes.  */
4481           produced_chars += nbytes;
4482           bol_designation = 0;
4483           ASSURE_DESTINATION (safe_room);
4484         }
4485
4486       c = *charbuf++;
4487
4488       if (c < 0)
4489         {
4490           /* Handle an annotation.  */
4491           switch (*charbuf)
4492             {
4493             case CODING_ANNOTATE_COMPOSITION_MASK:
4494               /* Not yet implemented.  */
4495               break;
4496             case CODING_ANNOTATE_CHARSET_MASK:
4497               preferred_charset_id = charbuf[2];
4498               if (preferred_charset_id >= 0
4499                   && NILP (Fmemq (make_number (preferred_charset_id),
4500                                   charset_list)))
4501                 preferred_charset_id = -1;
4502               break;
4503             default:
4504               abort ();
4505             }
4506           charbuf += -c - 1;
4507           continue;
4508         }
4509
4510       /* Now encode the character C.  */
4511       if (c < 0x20 || c == 0x7F)
4512         {
4513           if (c == '\n'
4514               || (c == '\r' && EQ (eol_type, Qmac)))
4515             {
4516               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4517                 ENCODE_RESET_PLANE_AND_REGISTER ();
4518               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
4519                 {
4520                   int i;
4521
4522                   for (i = 0; i < 4; i++)
4523                     CODING_ISO_DESIGNATION (coding, i)
4524                       = CODING_ISO_INITIAL (coding, i);
4525                 }
4526               bol_designation
4527                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
4528             }
4529           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
4530             ENCODE_RESET_PLANE_AND_REGISTER ();
4531           EMIT_ONE_ASCII_BYTE (c);
4532         }
4533       else if (ASCII_CHAR_P (c))
4534         {
4535           if (ascii_compatible)
4536             EMIT_ONE_ASCII_BYTE (c);
4537           else
4538             {
4539               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
4540               ENCODE_ISO_CHARACTER (charset, c);
4541             }
4542         }
4543       else if (CHAR_BYTE8_P (c))
4544         {
4545           c = CHAR_TO_BYTE8 (c);
4546           EMIT_ONE_BYTE (c);
4547         }
4548       else
4549         {
4550           struct charset *charset;
4551
4552           if (preferred_charset_id >= 0)
4553             {
4554               int result;
4555
4556               charset = CHARSET_FROM_ID (preferred_charset_id);
4557               CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result);
4558               if (! result)
4559                 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
4560                                      NULL, charset);
4561             }
4562           else
4563             CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
4564                                  NULL, charset);
4565           if (!charset)
4566             {
4567               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4568                 {
4569                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4570                   charset = CHARSET_FROM_ID (charset_ascii);
4571                 }
4572               else
4573                 {
4574                   c = coding->default_char;
4575                   CODING_CHAR_CHARSET (coding, dst, dst_end, c,
4576                                        charset_list, NULL, charset);
4577                 }
4578             }
4579           ENCODE_ISO_CHARACTER (charset, c);
4580         }
4581     }
4582
4583   if (coding->mode & CODING_MODE_LAST_BLOCK
4584       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4585     {
4586       ASSURE_DESTINATION (safe_room);
4587       ENCODE_RESET_PLANE_AND_REGISTER ();
4588     }
4589   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4590   CODING_ISO_BOL (coding) = bol_designation;
4591   coding->produced_char += produced_chars;
4592   coding->produced = dst - coding->destination;
4593   return 0;
4594 }
4595
4596 \f
4597 /*** 8,9. SJIS and BIG5 handlers ***/
4598
4599 /* Although SJIS and BIG5 are not ISO's coding system, they are used
4600    quite widely.  So, for the moment, Emacs supports them in the bare
4601    C code.  But, in the future, they may be supported only by CCL.  */
4602
4603 /* SJIS is a coding system encoding three character sets: ASCII, right
4604    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
4605    as is.  A character of charset katakana-jisx0201 is encoded by
4606    "position-code + 0x80".  A character of charset japanese-jisx0208
4607    is encoded in 2-byte but two position-codes are divided and shifted
4608    so that it fit in the range below.
4609
4610    --- CODE RANGE of SJIS ---
4611    (character set)      (range)
4612    ASCII                0x00 .. 0x7F
4613    KATAKANA-JISX0201    0xA0 .. 0xDF
4614    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
4615             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
4616    -------------------------------
4617
4618 */
4619
4620 /* BIG5 is a coding system encoding two character sets: ASCII and
4621    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
4622    character set and is encoded in two-byte.
4623
4624    --- CODE RANGE of BIG5 ---
4625    (character set)      (range)
4626    ASCII                0x00 .. 0x7F
4627    Big5 (1st byte)      0xA1 .. 0xFE
4628         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
4629    --------------------------
4630
4631   */
4632
4633 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4634    Check if a text is encoded in SJIS.  If it is, return
4635    CATEGORY_MASK_SJIS, else return 0.  */
4636
4637 static int
4638 detect_coding_sjis (struct coding_system *coding,
4639                     struct coding_detection_info *detect_info)
4640 {
4641   const unsigned char *src = coding->source, *src_base;
4642   const unsigned char *src_end = coding->source + coding->src_bytes;
4643   int multibytep = coding->src_multibyte;
4644   ptrdiff_t consumed_chars = 0;
4645   int found = 0;
4646   int c;
4647   Lisp_Object attrs, charset_list;
4648   int max_first_byte_of_2_byte_code;
4649
4650   CODING_GET_INFO (coding, attrs, charset_list);
4651   max_first_byte_of_2_byte_code
4652     = (XINT (Flength (charset_list)) > 3 ? 0xFC : 0xEF);
4653
4654   detect_info->checked |= CATEGORY_MASK_SJIS;
4655   /* A coding system of this category is always ASCII compatible.  */
4656   src += coding->head_ascii;
4657
4658   while (1)
4659     {
4660       src_base = src;
4661       ONE_MORE_BYTE (c);
4662       if (c < 0x80)
4663         continue;
4664       if ((c >= 0x81 && c <= 0x9F)
4665           || (c >= 0xE0 && c <= max_first_byte_of_2_byte_code))
4666         {
4667           ONE_MORE_BYTE (c);
4668           if (c < 0x40 || c == 0x7F || c > 0xFC)
4669             break;
4670           found = CATEGORY_MASK_SJIS;
4671         }
4672       else if (c >= 0xA0 && c < 0xE0)
4673         found = CATEGORY_MASK_SJIS;
4674       else
4675         break;
4676     }
4677   detect_info->rejected |= CATEGORY_MASK_SJIS;
4678   return 0;
4679
4680  no_more_source:
4681   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4682     {
4683       detect_info->rejected |= CATEGORY_MASK_SJIS;
4684       return 0;
4685     }
4686   detect_info->found |= found;
4687   return 1;
4688 }
4689
4690 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4691    Check if a text is encoded in BIG5.  If it is, return
4692    CATEGORY_MASK_BIG5, else return 0.  */
4693
4694 static int
4695 detect_coding_big5 (struct coding_system *coding,
4696                     struct coding_detection_info *detect_info)
4697 {
4698   const unsigned char *src = coding->source, *src_base;
4699   const unsigned char *src_end = coding->source + coding->src_bytes;
4700   int multibytep = coding->src_multibyte;
4701   ptrdiff_t consumed_chars = 0;
4702   int found = 0;
4703   int c;
4704
4705   detect_info->checked |= CATEGORY_MASK_BIG5;
4706   /* A coding system of this category is always ASCII compatible.  */
4707   src += coding->head_ascii;
4708
4709   while (1)
4710     {
4711       src_base = src;
4712       ONE_MORE_BYTE (c);
4713       if (c < 0x80)
4714         continue;
4715       if (c >= 0xA1)
4716         {
4717           ONE_MORE_BYTE (c);
4718           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
4719             return 0;
4720           found = CATEGORY_MASK_BIG5;
4721         }
4722       else
4723         break;
4724     }
4725   detect_info->rejected |= CATEGORY_MASK_BIG5;
4726   return 0;
4727
4728  no_more_source:
4729   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4730     {
4731       detect_info->rejected |= CATEGORY_MASK_BIG5;
4732       return 0;
4733     }
4734   detect_info->found |= found;
4735   return 1;
4736 }
4737
4738 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4739    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
4740
4741 static void
4742 decode_coding_sjis (struct coding_system *coding)
4743 {
4744   const unsigned char *src = coding->source + coding->consumed;
4745   const unsigned char *src_end = coding->source + coding->src_bytes;
4746   const unsigned char *src_base;
4747   int *charbuf = coding->charbuf + coding->charbuf_used;
4748   /* We may produce one charset annotation in one loop and one more at
4749      the end.  */
4750   int *charbuf_end
4751     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4752   ptrdiff_t consumed_chars = 0, consumed_chars_base;
4753   int multibytep = coding->src_multibyte;
4754   struct charset *charset_roman, *charset_kanji, *charset_kana;
4755   struct charset *charset_kanji2;
4756   Lisp_Object attrs, charset_list, val;
4757   ptrdiff_t char_offset = coding->produced_char;
4758   ptrdiff_t last_offset = char_offset;
4759   int last_id = charset_ascii;
4760   int eol_dos =
4761     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4762   int byte_after_cr = -1;
4763
4764   CODING_GET_INFO (coding, attrs, charset_list);
4765
4766   val = charset_list;
4767   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4768   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4769   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4770   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4771
4772   while (1)
4773     {
4774       int c, c1;
4775       struct charset *charset;
4776
4777       src_base = src;
4778       consumed_chars_base = consumed_chars;
4779
4780       if (charbuf >= charbuf_end)
4781         {
4782           if (byte_after_cr >= 0)
4783             src_base--;
4784           break;
4785         }
4786
4787       if (byte_after_cr >= 0)
4788         c = byte_after_cr, byte_after_cr = -1;
4789       else
4790         ONE_MORE_BYTE (c);
4791       if (c < 0)
4792         goto invalid_code;
4793       if (c < 0x80)
4794         {
4795           if (eol_dos && c == '\r')
4796             ONE_MORE_BYTE (byte_after_cr);
4797           charset = charset_roman;
4798         }
4799       else if (c == 0x80 || c == 0xA0)
4800         goto invalid_code;
4801       else if (c >= 0xA1 && c <= 0xDF)
4802         {
4803           /* SJIS -> JISX0201-Kana */
4804           c &= 0x7F;
4805           charset = charset_kana;
4806         }
4807       else if (c <= 0xEF)
4808         {
4809           /* SJIS -> JISX0208 */
4810           ONE_MORE_BYTE (c1);
4811           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4812             goto invalid_code;
4813           c = (c << 8) | c1;
4814           SJIS_TO_JIS (c);
4815           charset = charset_kanji;
4816         }
4817       else if (c <= 0xFC && charset_kanji2)
4818         {
4819           /* SJIS -> JISX0213-2 */
4820           ONE_MORE_BYTE (c1);
4821           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4822             goto invalid_code;
4823           c = (c << 8) | c1;
4824           SJIS_TO_JIS2 (c);
4825           charset = charset_kanji2;
4826         }
4827       else
4828         goto invalid_code;
4829       if (charset->id != charset_ascii
4830           && last_id != charset->id)
4831         {
4832           if (last_id != charset_ascii)
4833             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4834           last_id = charset->id;
4835           last_offset = char_offset;
4836         }
4837       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4838       *charbuf++ = c;
4839       char_offset++;
4840       continue;
4841
4842     invalid_code:
4843       src = src_base;
4844       consumed_chars = consumed_chars_base;
4845       ONE_MORE_BYTE (c);
4846       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4847       char_offset++;
4848       coding->errors++;
4849     }
4850
4851  no_more_source:
4852   if (last_id != charset_ascii)
4853     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4854   coding->consumed_char += consumed_chars_base;
4855   coding->consumed = src_base - coding->source;
4856   coding->charbuf_used = charbuf - coding->charbuf;
4857 }
4858
4859 static void
4860 decode_coding_big5 (struct coding_system *coding)
4861 {
4862   const unsigned char *src = coding->source + coding->consumed;
4863   const unsigned char *src_end = coding->source + coding->src_bytes;
4864   const unsigned char *src_base;
4865   int *charbuf = coding->charbuf + coding->charbuf_used;
4866   /* We may produce one charset annotation in one loop and one more at
4867      the end.  */
4868   int *charbuf_end
4869     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4870   ptrdiff_t consumed_chars = 0, consumed_chars_base;
4871   int multibytep = coding->src_multibyte;
4872   struct charset *charset_roman, *charset_big5;
4873   Lisp_Object attrs, charset_list, val;
4874   ptrdiff_t char_offset = coding->produced_char;
4875   ptrdiff_t last_offset = char_offset;
4876   int last_id = charset_ascii;
4877   int eol_dos =
4878     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4879   int byte_after_cr = -1;
4880
4881   CODING_GET_INFO (coding, attrs, charset_list);
4882   val = charset_list;
4883   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4884   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4885
4886   while (1)
4887     {
4888       int c, c1;
4889       struct charset *charset;
4890
4891       src_base = src;
4892       consumed_chars_base = consumed_chars;
4893
4894       if (charbuf >= charbuf_end)
4895         {
4896           if (byte_after_cr >= 0)
4897             src_base--;
4898           break;
4899         }
4900
4901       if (byte_after_cr >= 0)
4902         c = byte_after_cr, byte_after_cr = -1;
4903       else
4904         ONE_MORE_BYTE (c);
4905
4906       if (c < 0)
4907         goto invalid_code;
4908       if (c < 0x80)
4909         {
4910           if (eol_dos && c == '\r')
4911             ONE_MORE_BYTE (byte_after_cr);
4912           charset = charset_roman;
4913         }
4914       else
4915         {
4916           /* BIG5 -> Big5 */
4917           if (c < 0xA1 || c > 0xFE)
4918             goto invalid_code;
4919           ONE_MORE_BYTE (c1);
4920           if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4921             goto invalid_code;
4922           c = c << 8 | c1;
4923           charset = charset_big5;
4924         }
4925       if (charset->id != charset_ascii
4926           && last_id != charset->id)
4927         {
4928           if (last_id != charset_ascii)
4929             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4930           last_id = charset->id;
4931           last_offset = char_offset;
4932         }
4933       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4934       *charbuf++ = c;
4935       char_offset++;
4936       continue;
4937
4938     invalid_code:
4939       src = src_base;
4940       consumed_chars = consumed_chars_base;
4941       ONE_MORE_BYTE (c);
4942       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4943       char_offset++;
4944       coding->errors++;
4945     }
4946
4947  no_more_source:
4948   if (last_id != charset_ascii)
4949     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4950   coding->consumed_char += consumed_chars_base;
4951   coding->consumed = src_base - coding->source;
4952   coding->charbuf_used = charbuf - coding->charbuf;
4953 }
4954
4955 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4956    This function can encode charsets `ascii', `katakana-jisx0201',
4957    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
4958    are sure that all these charsets are registered as official charset
4959    (i.e. do not have extended leading-codes).  Characters of other
4960    charsets are produced without any encoding.  If SJIS_P is 1, encode
4961    SJIS text, else encode BIG5 text.  */
4962
4963 static int
4964 encode_coding_sjis (struct coding_system *coding)
4965 {
4966   int multibytep = coding->dst_multibyte;
4967   int *charbuf = coding->charbuf;
4968   int *charbuf_end = charbuf + coding->charbuf_used;
4969   unsigned char *dst = coding->destination + coding->produced;
4970   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4971   int safe_room = 4;
4972   ptrdiff_t produced_chars = 0;
4973   Lisp_Object attrs, charset_list, val;
4974   int ascii_compatible;
4975   struct charset *charset_kanji, *charset_kana;
4976   struct charset *charset_kanji2;
4977   int c;
4978
4979   CODING_GET_INFO (coding, attrs, charset_list);
4980   val = XCDR (charset_list);
4981   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4982   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4983   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4984
4985   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4986
4987   while (charbuf < charbuf_end)
4988     {
4989       ASSURE_DESTINATION (safe_room);
4990       c = *charbuf++;
4991       /* Now encode the character C.  */
4992       if (ASCII_CHAR_P (c) && ascii_compatible)
4993         EMIT_ONE_ASCII_BYTE (c);
4994       else if (CHAR_BYTE8_P (c))
4995         {
4996           c = CHAR_TO_BYTE8 (c);
4997           EMIT_ONE_BYTE (c);
4998         }
4999       else
5000         {
5001           unsigned code;
5002           struct charset *charset;
5003           CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
5004                                &code, charset);
5005
5006           if (!charset)
5007             {
5008               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5009                 {
5010                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5011                   charset = CHARSET_FROM_ID (charset_ascii);
5012                 }
5013               else
5014                 {
5015                   c = coding->default_char;
5016                   CODING_CHAR_CHARSET (coding, dst, dst_end, c,
5017                                        charset_list, &code, charset);
5018                 }
5019             }
5020           if (code == CHARSET_INVALID_CODE (charset))
5021             abort ();
5022           if (charset == charset_kanji)
5023             {
5024               int c1, c2;
5025               JIS_TO_SJIS (code);
5026               c1 = code >> 8, c2 = code & 0xFF;
5027               EMIT_TWO_BYTES (c1, c2);
5028             }
5029           else if (charset == charset_kana)
5030             EMIT_ONE_BYTE (code | 0x80);
5031           else if (charset_kanji2 && charset == charset_kanji2)
5032             {
5033               int c1, c2;
5034
5035               c1 = code >> 8;
5036               if (c1 == 0x21 || (c1 >= 0x23 && c1 <= 0x25)
5037                   || c1 == 0x28
5038                   || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
5039                 {
5040                   JIS_TO_SJIS2 (code);
5041                   c1 = code >> 8, c2 = code & 0xFF;
5042                   EMIT_TWO_BYTES (c1, c2);
5043                 }
5044               else
5045                 EMIT_ONE_ASCII_BYTE (code & 0x7F);
5046             }
5047           else
5048             EMIT_ONE_ASCII_BYTE (code & 0x7F);
5049         }
5050     }
5051   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5052   coding->produced_char += produced_chars;
5053   coding->produced = dst - coding->destination;
5054   return 0;
5055 }
5056
5057 static int
5058 encode_coding_big5 (struct coding_system *coding)
5059 {
5060   int multibytep = coding->dst_multibyte;
5061   int *charbuf = coding->charbuf;
5062   int *charbuf_end = charbuf + coding->charbuf_used;
5063   unsigned char *dst = coding->destination + coding->produced;
5064   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5065   int safe_room = 4;
5066   ptrdiff_t produced_chars = 0;
5067   Lisp_Object attrs, charset_list, val;
5068   int ascii_compatible;
5069   struct charset *charset_big5;
5070   int c;
5071
5072   CODING_GET_INFO (coding, attrs, charset_list);
5073   val = XCDR (charset_list);
5074   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
5075   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5076
5077   while (charbuf < charbuf_end)
5078     {
5079       ASSURE_DESTINATION (safe_room);
5080       c = *charbuf++;
5081       /* Now encode the character C.  */
5082       if (ASCII_CHAR_P (c) && ascii_compatible)
5083         EMIT_ONE_ASCII_BYTE (c);
5084       else if (CHAR_BYTE8_P (c))
5085         {
5086           c = CHAR_TO_BYTE8 (c);
5087           EMIT_ONE_BYTE (c);
5088         }
5089       else
5090         {
5091           unsigned code;
5092           struct charset *charset;
5093           CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
5094                                &code, charset);
5095
5096           if (! charset)
5097             {
5098               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5099                 {
5100                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5101                   charset = CHARSET_FROM_ID (charset_ascii);
5102                 }
5103               else
5104                 {
5105                   c = coding->default_char;
5106                   CODING_CHAR_CHARSET (coding, dst, dst_end, c,
5107                                        charset_list, &code, charset);
5108                 }
5109             }
5110           if (code == CHARSET_INVALID_CODE (charset))
5111             abort ();
5112           if (charset == charset_big5)
5113             {
5114               int c1, c2;
5115
5116               c1 = code >> 8, c2 = code & 0xFF;
5117               EMIT_TWO_BYTES (c1, c2);
5118             }
5119           else
5120             EMIT_ONE_ASCII_BYTE (code & 0x7F);
5121         }
5122     }
5123   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5124   coding->produced_char += produced_chars;
5125   coding->produced = dst - coding->destination;
5126   return 0;
5127 }
5128
5129 \f
5130 /*** 10. CCL handlers ***/
5131
5132 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5133    Check if a text is encoded in a coding system of which
5134    encoder/decoder are written in CCL program.  If it is, return
5135    CATEGORY_MASK_CCL, else return 0.  */
5136
5137 static int
5138 detect_coding_ccl (struct coding_system *coding,
5139                    struct coding_detection_info *detect_info)
5140 {
5141   const unsigned char *src = coding->source, *src_base;
5142   const unsigned char *src_end = coding->source + coding->src_bytes;
5143   int multibytep = coding->src_multibyte;
5144   ptrdiff_t consumed_chars = 0;
5145   int found = 0;
5146   unsigned char *valids;
5147   ptrdiff_t head_ascii = coding->head_ascii;
5148   Lisp_Object attrs;
5149
5150   detect_info->checked |= CATEGORY_MASK_CCL;
5151
5152   coding = &coding_categories[coding_category_ccl];
5153   valids = CODING_CCL_VALIDS (coding);
5154   attrs = CODING_ID_ATTRS (coding->id);
5155   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5156     src += head_ascii;
5157
5158   while (1)
5159     {
5160       int c;
5161
5162       src_base = src;
5163       ONE_MORE_BYTE (c);
5164       if (c < 0 || ! valids[c])
5165         break;
5166       if ((valids[c] > 1))
5167         found = CATEGORY_MASK_CCL;
5168     }
5169   detect_info->rejected |= CATEGORY_MASK_CCL;
5170   return 0;
5171
5172  no_more_source:
5173   detect_info->found |= found;
5174   return 1;
5175 }
5176
5177 static void
5178 decode_coding_ccl (struct coding_system *coding)
5179 {
5180   const unsigned char *src = coding->source + coding->consumed;
5181   const unsigned char *src_end = coding->source + coding->src_bytes;
5182   int *charbuf = coding->charbuf + coding->charbuf_used;
5183   int *charbuf_end = coding->charbuf + coding->charbuf_size;
5184   ptrdiff_t consumed_chars = 0;
5185   int multibytep = coding->src_multibyte;
5186   struct ccl_program *ccl = &coding->spec.ccl->ccl;
5187   int source_charbuf[1024];
5188   int source_byteidx[1025];
5189   Lisp_Object attrs, charset_list;
5190
5191   CODING_GET_INFO (coding, attrs, charset_list);
5192
5193   while (1)
5194     {
5195       const unsigned char *p = src;
5196       int i = 0;
5197
5198       if (multibytep)
5199         {
5200           while (i < 1024 && p < src_end)
5201             {
5202               source_byteidx[i] = p - src;
5203               source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
5204             }
5205           source_byteidx[i] = p - src;
5206         }
5207       else
5208         while (i < 1024 && p < src_end)
5209           source_charbuf[i++] = *p++;
5210
5211       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
5212         ccl->last_block = 1;
5213       ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf,
5214                   charset_list);
5215       charbuf += ccl->produced;
5216       if (multibytep)
5217         src += source_byteidx[ccl->consumed];
5218       else
5219         src += ccl->consumed;
5220       consumed_chars += ccl->consumed;
5221       if (p == src_end || ccl->status != CCL_STAT_SUSPEND_BY_SRC)
5222         break;
5223     }
5224
5225   switch (ccl->status)
5226     {
5227     case CCL_STAT_SUSPEND_BY_SRC:
5228       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5229       break;
5230     case CCL_STAT_SUSPEND_BY_DST:
5231       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
5232       break;
5233     case CCL_STAT_QUIT:
5234     case CCL_STAT_INVALID_CMD:
5235       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
5236       break;
5237     default:
5238       record_conversion_result (coding, CODING_RESULT_SUCCESS);
5239       break;
5240     }
5241   coding->consumed_char += consumed_chars;
5242   coding->consumed = src - coding->source;
5243   coding->charbuf_used = charbuf - coding->charbuf;
5244 }
5245
5246 static int
5247 encode_coding_ccl (struct coding_system *coding)
5248 {
5249   struct ccl_program *ccl = &coding->spec.ccl->ccl;
5250   int multibytep = coding->dst_multibyte;
5251   int *charbuf = coding->charbuf;
5252   int *charbuf_end = charbuf + coding->charbuf_used;
5253   unsigned char *dst = coding->destination + coding->produced;
5254   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5255   int destination_charbuf[1024];
5256   ptrdiff_t produced_chars = 0;
5257   int i;
5258   Lisp_Object attrs, charset_list;
5259
5260   CODING_GET_INFO (coding, attrs, charset_list);
5261   if (coding->consumed_char == coding->src_chars
5262       && coding->mode & CODING_MODE_LAST_BLOCK)
5263     ccl->last_block = 1;
5264
5265   do
5266     {
5267       ccl_driver (ccl, charbuf, destination_charbuf,
5268                   charbuf_end - charbuf, 1024, charset_list);
5269       if (multibytep)
5270         {
5271           ASSURE_DESTINATION (ccl->produced * 2);
5272           for (i = 0; i < ccl->produced; i++)
5273             EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
5274         }
5275       else
5276         {
5277           ASSURE_DESTINATION (ccl->produced);
5278           for (i = 0; i < ccl->produced; i++)
5279             *dst++ = destination_charbuf[i] & 0xFF;
5280           produced_chars += ccl->produced;
5281         }
5282       charbuf += ccl->consumed;
5283       if (ccl->status == CCL_STAT_QUIT
5284           || ccl->status == CCL_STAT_INVALID_CMD)
5285         break;
5286     }
5287   while (charbuf < charbuf_end);
5288
5289   switch (ccl->status)
5290     {
5291     case CCL_STAT_SUSPEND_BY_SRC:
5292       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5293       break;
5294     case CCL_STAT_SUSPEND_BY_DST:
5295       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
5296       break;
5297     case CCL_STAT_QUIT:
5298     case CCL_STAT_INVALID_CMD:
5299       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
5300       break;
5301     default:
5302       record_conversion_result (coding, CODING_RESULT_SUCCESS);
5303       break;
5304     }
5305
5306   coding->produced_char += produced_chars;
5307   coding->produced = dst - coding->destination;
5308   return 0;
5309 }
5310
5311
5312 \f
5313 /*** 10, 11. no-conversion handlers ***/
5314
5315 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
5316
5317 static void
5318 decode_coding_raw_text (struct coding_system *coding)
5319 {
5320   int eol_dos =
5321     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5322
5323   coding->chars_at_source = 1;
5324   coding->consumed_char = coding->src_chars;
5325   coding->consumed = coding->src_bytes;
5326   if (eol_dos && coding->source[coding->src_bytes - 1] == '\r')
5327     {
5328       coding->consumed_char--;
5329       coding->consumed--;
5330       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5331     }
5332   else
5333     record_conversion_result (coding, CODING_RESULT_SUCCESS);
5334 }
5335
5336 static int
5337 encode_coding_raw_text (struct coding_system *coding)
5338 {
5339   int multibytep = coding->dst_multibyte;
5340   int *charbuf = coding->charbuf;
5341   int *charbuf_end = coding->charbuf + coding->charbuf_used;
5342   unsigned char *dst = coding->destination + coding->produced;
5343   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5344   ptrdiff_t produced_chars = 0;
5345   int c;
5346
5347   if (multibytep)
5348     {
5349       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
5350
5351       if (coding->src_multibyte)
5352         while (charbuf < charbuf_end)
5353           {
5354             ASSURE_DESTINATION (safe_room);
5355             c = *charbuf++;
5356             if (ASCII_CHAR_P (c))
5357               EMIT_ONE_ASCII_BYTE (c);
5358             else if (CHAR_BYTE8_P (c))
5359               {
5360                 c = CHAR_TO_BYTE8 (c);
5361                 EMIT_ONE_BYTE (c);
5362               }
5363             else
5364               {
5365                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
5366
5367                 CHAR_STRING_ADVANCE (c, p1);
5368                 do
5369                   {
5370                     EMIT_ONE_BYTE (*p0);
5371                     p0++;
5372                   }
5373                 while (p0 < p1);
5374               }
5375           }
5376       else
5377         while (charbuf < charbuf_end)
5378           {
5379             ASSURE_DESTINATION (safe_room);
5380             c = *charbuf++;
5381             EMIT_ONE_BYTE (c);
5382           }
5383     }
5384   else
5385     {
5386       if (coding->src_multibyte)
5387         {
5388           int safe_room = MAX_MULTIBYTE_LENGTH;
5389
5390           while (charbuf < charbuf_end)
5391             {
5392               ASSURE_DESTINATION (safe_room);
5393               c = *charbuf++;
5394               if (ASCII_CHAR_P (c))
5395                 *dst++ = c;
5396               else if (CHAR_BYTE8_P (c))
5397                 *dst++ = CHAR_TO_BYTE8 (c);
5398               else
5399                 CHAR_STRING_ADVANCE (c, dst);
5400             }
5401         }
5402       else
5403         {
5404           ASSURE_DESTINATION (charbuf_end - charbuf);
5405           while (charbuf < charbuf_end && dst < dst_end)
5406             *dst++ = *charbuf++;
5407         }
5408       produced_chars = dst - (coding->destination + coding->produced);
5409     }
5410   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5411   coding->produced_char += produced_chars;
5412   coding->produced = dst - coding->destination;
5413   return 0;
5414 }
5415
5416 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5417    Check if a text is encoded in a charset-based coding system.  If it
5418    is, return 1, else return 0.  */
5419
5420 static int
5421 detect_coding_charset (struct coding_system *coding,
5422                        struct coding_detection_info *detect_info)
5423 {
5424   const unsigned char *src = coding->source, *src_base;
5425   const unsigned char *src_end = coding->source + coding->src_bytes;
5426   int multibytep = coding->src_multibyte;
5427   ptrdiff_t consumed_chars = 0;
5428   Lisp_Object attrs, valids, name;
5429   int found = 0;
5430   ptrdiff_t head_ascii = coding->head_ascii;
5431   int check_latin_extra = 0;
5432
5433   detect_info->checked |= CATEGORY_MASK_CHARSET;
5434
5435   coding = &coding_categories[coding_category_charset];
5436   attrs = CODING_ID_ATTRS (coding->id);
5437   valids = AREF (attrs, coding_attr_charset_valids);
5438   name = CODING_ID_NAME (coding->id);
5439   if (strncmp (SSDATA (SYMBOL_NAME (name)),
5440                "iso-8859-", sizeof ("iso-8859-") - 1) == 0
5441       || strncmp (SSDATA (SYMBOL_NAME (name)),
5442                   "iso-latin-", sizeof ("iso-latin-") - 1) == 0)
5443     check_latin_extra = 1;
5444
5445   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5446     src += head_ascii;
5447
5448   while (1)
5449     {
5450       int c;
5451       Lisp_Object val;
5452       struct charset *charset;
5453       int dim, idx;
5454
5455       src_base = src;
5456       ONE_MORE_BYTE (c);
5457       if (c < 0)
5458         continue;
5459       val = AREF (valids, c);
5460       if (NILP (val))
5461         break;
5462       if (c >= 0x80)
5463         {
5464           if (c < 0xA0
5465               && check_latin_extra
5466               && (!VECTORP (Vlatin_extra_code_table)
5467                   || NILP (AREF (Vlatin_extra_code_table, c))))
5468             break;
5469           found = CATEGORY_MASK_CHARSET;
5470         }
5471       if (INTEGERP (val))
5472         {
5473           charset = CHARSET_FROM_ID (XFASTINT (val));
5474           dim = CHARSET_DIMENSION (charset);
5475           for (idx = 1; idx < dim; idx++)
5476             {
5477               if (src == src_end)
5478                 goto too_short;
5479               ONE_MORE_BYTE (c);
5480               if (c < charset->code_space[(dim - 1 - idx) * 4]
5481                   || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
5482                 break;
5483             }
5484           if (idx < dim)
5485             break;
5486         }
5487       else
5488         {
5489           idx = 1;
5490           for (; CONSP (val); val = XCDR (val))
5491             {
5492               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5493               dim = CHARSET_DIMENSION (charset);
5494               while (idx < dim)
5495                 {
5496                   if (src == src_end)
5497                     goto too_short;
5498                   ONE_MORE_BYTE (c);
5499                   if (c < charset->code_space[(dim - 1 - idx) * 4]
5500                       || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
5501                     break;
5502                   idx++;
5503                 }
5504               if (idx == dim)
5505                 {
5506                   val = Qnil;
5507                   break;
5508                 }
5509             }
5510           if (CONSP (val))
5511             break;
5512         }
5513     }
5514  too_short:
5515   detect_info->rejected |= CATEGORY_MASK_CHARSET;
5516   return 0;
5517
5518  no_more_source:
5519   detect_info->found |= found;
5520   return 1;
5521 }
5522
5523 static void
5524 decode_coding_charset (struct coding_system *coding)
5525 {
5526   const unsigned char *src = coding->source + coding->consumed;
5527   const unsigned char *src_end = coding->source + coding->src_bytes;
5528   const unsigned char *src_base;
5529   int *charbuf = coding->charbuf + coding->charbuf_used;
5530   /* We may produce one charset annotation in one loop and one more at
5531      the end.  */
5532   int *charbuf_end
5533     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
5534   ptrdiff_t consumed_chars = 0, consumed_chars_base;
5535   int multibytep = coding->src_multibyte;
5536   Lisp_Object attrs = CODING_ID_ATTRS (coding->id);
5537   Lisp_Object valids;
5538   ptrdiff_t char_offset = coding->produced_char;
5539   ptrdiff_t last_offset = char_offset;
5540   int last_id = charset_ascii;
5541   int eol_dos =
5542     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5543   int byte_after_cr = -1;
5544
5545   valids = AREF (attrs, coding_attr_charset_valids);
5546
5547   while (1)
5548     {
5549       int c;
5550       Lisp_Object val;
5551       struct charset *charset;
5552       int dim;
5553       int len = 1;
5554       unsigned code;
5555
5556       src_base = src;
5557       consumed_chars_base = consumed_chars;
5558
5559       if (charbuf >= charbuf_end)
5560         {
5561           if (byte_after_cr >= 0)
5562             src_base--;
5563           break;
5564         }
5565
5566       if (byte_after_cr >= 0)
5567         {
5568           c = byte_after_cr;
5569           byte_after_cr = -1;
5570         }
5571       else
5572         {
5573           ONE_MORE_BYTE (c);
5574           if (eol_dos && c == '\r')
5575             ONE_MORE_BYTE (byte_after_cr);
5576         }
5577       if (c < 0)
5578         goto invalid_code;
5579       code = c;
5580
5581       val = AREF (valids, c);
5582       if (! INTEGERP (val) && ! CONSP (val))
5583         goto invalid_code;
5584       if (INTEGERP (val))
5585         {
5586           charset = CHARSET_FROM_ID (XFASTINT (val));
5587           dim = CHARSET_DIMENSION (charset);
5588           while (len < dim)
5589             {
5590               ONE_MORE_BYTE (c);
5591               code = (code << 8) | c;
5592               len++;
5593             }
5594           CODING_DECODE_CHAR (coding, src, src_base, src_end,
5595                               charset, code, c);
5596         }
5597       else
5598         {
5599           /* VAL is a list of charset IDs.  It is assured that the
5600              list is sorted by charset dimensions (smaller one
5601              comes first).  */
5602           while (CONSP (val))
5603             {
5604               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5605               dim = CHARSET_DIMENSION (charset);
5606               while (len < dim)
5607                 {
5608                   ONE_MORE_BYTE (c);
5609                   code = (code << 8) | c;
5610                   len++;
5611                 }
5612               CODING_DECODE_CHAR (coding, src, src_base,
5613                                   src_end, charset, code, c);
5614               if (c >= 0)
5615                 break;
5616               val = XCDR (val);
5617             }
5618         }
5619       if (c < 0)
5620         goto invalid_code;
5621       if (charset->id != charset_ascii
5622           && last_id != charset->id)
5623         {
5624           if (last_id != charset_ascii)
5625             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5626           last_id = charset->id;
5627           last_offset = char_offset;
5628         }
5629
5630       *charbuf++ = c;
5631       char_offset++;
5632       continue;
5633
5634     invalid_code:
5635       src = src_base;
5636       consumed_chars = consumed_chars_base;
5637       ONE_MORE_BYTE (c);
5638       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
5639       char_offset++;
5640       coding->errors++;
5641     }
5642
5643  no_more_source:
5644   if (last_id != charset_ascii)
5645     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5646   coding->consumed_char += consumed_chars_base;
5647   coding->consumed = src_base - coding->source;
5648   coding->charbuf_used = charbuf - coding->charbuf;
5649 }
5650
5651 static int
5652 encode_coding_charset (struct coding_system *coding)
5653 {
5654   int multibytep = coding->dst_multibyte;
5655   int *charbuf = coding->charbuf;
5656   int *charbuf_end = charbuf + coding->charbuf_used;
5657   unsigned char *dst = coding->destination + coding->produced;
5658   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5659   int safe_room = MAX_MULTIBYTE_LENGTH;
5660   ptrdiff_t produced_chars = 0;
5661   Lisp_Object attrs, charset_list;
5662   int ascii_compatible;
5663   int c;
5664
5665   CODING_GET_INFO (coding, attrs, charset_list);
5666   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5667
5668   while (charbuf < charbuf_end)
5669     {
5670       struct charset *charset;
5671       unsigned code;
5672
5673       ASSURE_DESTINATION (safe_room);
5674       c = *charbuf++;
5675       if (ascii_compatible && ASCII_CHAR_P (c))
5676         EMIT_ONE_ASCII_BYTE (c);
5677       else if (CHAR_BYTE8_P (c))
5678         {
5679           c = CHAR_TO_BYTE8 (c);
5680           EMIT_ONE_BYTE (c);
5681         }
5682       else
5683         {
5684           CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
5685                                &code, charset);
5686
5687           if (charset)
5688             {
5689               if (CHARSET_DIMENSION (charset) == 1)
5690                 EMIT_ONE_BYTE (code);
5691               else if (CHARSET_DIMENSION (charset) == 2)
5692                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
5693               else if (CHARSET_DIMENSION (charset) == 3)
5694                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
5695               else
5696                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
5697                                  (code >> 8) & 0xFF, code & 0xFF);
5698             }
5699           else
5700             {
5701               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5702                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5703               else
5704                 c = coding->default_char;
5705               EMIT_ONE_BYTE (c);
5706             }
5707         }
5708     }
5709
5710   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5711   coding->produced_char += produced_chars;
5712   coding->produced = dst - coding->destination;
5713   return 0;
5714 }
5715
5716 \f
5717 /*** 7. C library functions ***/
5718
5719 /* Setup coding context CODING from information about CODING_SYSTEM.
5720    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
5721    CODING_SYSTEM is invalid, signal an error.  */
5722
5723 void
5724 setup_coding_system (Lisp_Object coding_system, struct coding_system *coding)
5725 {
5726   Lisp_Object attrs;
5727   Lisp_Object eol_type;
5728   Lisp_Object coding_type;
5729   Lisp_Object val;
5730
5731   if (NILP (coding_system))
5732     coding_system = Qundecided;
5733
5734   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
5735
5736   attrs = CODING_ID_ATTRS (coding->id);
5737   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
5738
5739   coding->mode = 0;
5740   coding->head_ascii = -1;
5741   if (VECTORP (eol_type))
5742     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5743                             | CODING_REQUIRE_DETECTION_MASK);
5744   else if (! EQ (eol_type, Qunix))
5745     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5746                             | CODING_REQUIRE_ENCODING_MASK);
5747   else
5748     coding->common_flags = 0;
5749   if (! NILP (CODING_ATTR_POST_READ (attrs)))
5750     coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5751   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5752     coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5753   if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
5754     coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
5755
5756   val = CODING_ATTR_SAFE_CHARSETS (attrs);
5757   coding->max_charset_id = SCHARS (val) - 1;
5758   coding->safe_charsets = SDATA (val);
5759   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
5760   coding->carryover_bytes = 0;
5761
5762   coding_type = CODING_ATTR_TYPE (attrs);
5763   if (EQ (coding_type, Qundecided))
5764     {
5765       coding->detector = NULL;
5766       coding->decoder = decode_coding_raw_text;
5767       coding->encoder = encode_coding_raw_text;
5768       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5769     }
5770   else if (EQ (coding_type, Qiso_2022))
5771     {
5772       int i;
5773       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5774
5775       /* Invoke graphic register 0 to plane 0.  */
5776       CODING_ISO_INVOCATION (coding, 0) = 0;
5777       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
5778       CODING_ISO_INVOCATION (coding, 1)
5779         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
5780       /* Setup the initial status of designation.  */
5781       for (i = 0; i < 4; i++)
5782         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
5783       /* Not single shifting initially.  */
5784       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
5785       /* Beginning of buffer should also be regarded as bol. */
5786       CODING_ISO_BOL (coding) = 1;
5787       coding->detector = detect_coding_iso_2022;
5788       coding->decoder = decode_coding_iso_2022;
5789       coding->encoder = encode_coding_iso_2022;
5790       if (flags & CODING_ISO_FLAG_SAFE)
5791         coding->mode |= CODING_MODE_SAFE_ENCODING;
5792       coding->common_flags
5793         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5794             | CODING_REQUIRE_FLUSHING_MASK);
5795       if (flags & CODING_ISO_FLAG_COMPOSITION)
5796         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
5797       if (flags & CODING_ISO_FLAG_DESIGNATION)
5798         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
5799       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5800         {
5801           setup_iso_safe_charsets (attrs);
5802           val = CODING_ATTR_SAFE_CHARSETS (attrs);
5803           coding->max_charset_id = SCHARS (val) - 1;
5804           coding->safe_charsets = SDATA (val);
5805         }
5806       CODING_ISO_FLAGS (coding) = flags;
5807       CODING_ISO_CMP_STATUS (coding)->state = COMPOSING_NO;
5808       CODING_ISO_CMP_STATUS (coding)->method = COMPOSITION_NO;
5809       CODING_ISO_EXTSEGMENT_LEN (coding) = 0;
5810       CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
5811     }
5812   else if (EQ (coding_type, Qcharset))
5813     {
5814       coding->detector = detect_coding_charset;
5815       coding->decoder = decode_coding_charset;
5816       coding->encoder = encode_coding_charset;
5817       coding->common_flags
5818         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5819     }
5820   else if (EQ (coding_type, Qutf_8))
5821     {
5822       val = AREF (attrs, coding_attr_utf_bom);
5823       CODING_UTF_8_BOM (coding) = (CONSP (val) ? utf_detect_bom
5824                                    : EQ (val, Qt) ? utf_with_bom
5825                                    : utf_without_bom);
5826       coding->detector = detect_coding_utf_8;
5827       coding->decoder = decode_coding_utf_8;
5828       coding->encoder = encode_coding_utf_8;
5829       coding->common_flags
5830         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5831       if (CODING_UTF_8_BOM (coding) == utf_detect_bom)
5832         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5833     }
5834   else if (EQ (coding_type, Qutf_16))
5835     {
5836       val = AREF (attrs, coding_attr_utf_bom);
5837       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_detect_bom
5838                                     : EQ (val, Qt) ? utf_with_bom
5839                                     : utf_without_bom);
5840       val = AREF (attrs, coding_attr_utf_16_endian);
5841       CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
5842                                        : utf_16_little_endian);
5843       CODING_UTF_16_SURROGATE (coding) = 0;
5844       coding->detector = detect_coding_utf_16;
5845       coding->decoder = decode_coding_utf_16;
5846       coding->encoder = encode_coding_utf_16;
5847       coding->common_flags
5848         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5849       if (CODING_UTF_16_BOM (coding) == utf_detect_bom)
5850         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5851     }
5852   else if (EQ (coding_type, Qccl))
5853     {
5854       coding->detector = detect_coding_ccl;
5855       coding->decoder = decode_coding_ccl;
5856       coding->encoder = encode_coding_ccl;
5857       coding->common_flags
5858         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5859             | CODING_REQUIRE_FLUSHING_MASK);
5860     }
5861   else if (EQ (coding_type, Qemacs_mule))
5862     {
5863       coding->detector = detect_coding_emacs_mule;
5864       coding->decoder = decode_coding_emacs_mule;
5865       coding->encoder = encode_coding_emacs_mule;
5866       coding->common_flags
5867         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5868       coding->spec.emacs_mule.full_support = 1;
5869       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5870           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5871         {
5872           Lisp_Object tail, safe_charsets;
5873           int max_charset_id = 0;
5874
5875           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5876                tail = XCDR (tail))
5877             if (max_charset_id < XFASTINT (XCAR (tail)))
5878               max_charset_id = XFASTINT (XCAR (tail));
5879           safe_charsets = make_uninit_string (max_charset_id + 1);
5880           memset (SDATA (safe_charsets), 255, max_charset_id + 1);
5881           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5882                tail = XCDR (tail))
5883             SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
5884           coding->max_charset_id = max_charset_id;
5885           coding->safe_charsets = SDATA (safe_charsets);
5886           coding->spec.emacs_mule.full_support = 1;
5887         }
5888       coding->spec.emacs_mule.cmp_status.state = COMPOSING_NO;
5889       coding->spec.emacs_mule.cmp_status.method = COMPOSITION_NO;
5890     }
5891   else if (EQ (coding_type, Qshift_jis))
5892     {
5893       coding->detector = detect_coding_sjis;
5894       coding->decoder = decode_coding_sjis;
5895       coding->encoder = encode_coding_sjis;
5896       coding->common_flags
5897         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5898     }
5899   else if (EQ (coding_type, Qbig5))
5900     {
5901       coding->detector = detect_coding_big5;
5902       coding->decoder = decode_coding_big5;
5903       coding->encoder = encode_coding_big5;
5904       coding->common_flags
5905         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5906     }
5907   else                          /* EQ (coding_type, Qraw_text) */
5908     {
5909       coding->detector = NULL;
5910       coding->decoder = decode_coding_raw_text;
5911       coding->encoder = encode_coding_raw_text;
5912       if (! EQ (eol_type, Qunix))
5913         {
5914           coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5915           if (! VECTORP (eol_type))
5916             coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5917         }
5918
5919     }
5920
5921   return;
5922 }
5923
5924 /* Return a list of charsets supported by CODING.  */
5925
5926 Lisp_Object
5927 coding_charset_list (struct coding_system *coding)
5928 {
5929   Lisp_Object attrs, charset_list;
5930
5931   CODING_GET_INFO (coding, attrs, charset_list);
5932   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5933     {
5934       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5935
5936       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5937         charset_list = Viso_2022_charset_list;
5938     }
5939   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5940     {
5941       charset_list = Vemacs_mule_charset_list;
5942     }
5943   return charset_list;
5944 }
5945
5946
5947 /* Return a list of charsets supported by CODING-SYSTEM.  */
5948
5949 Lisp_Object
5950 coding_system_charset_list (Lisp_Object coding_system)
5951 {
5952   ptrdiff_t id;
5953   Lisp_Object attrs, charset_list;
5954
5955   CHECK_CODING_SYSTEM_GET_ID (coding_system, id);
5956   attrs = CODING_ID_ATTRS (id);
5957
5958   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5959     {
5960       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5961
5962       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5963         charset_list = Viso_2022_charset_list;
5964       else
5965         charset_list = CODING_ATTR_CHARSET_LIST (attrs);
5966     }
5967   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5968     {
5969       charset_list = Vemacs_mule_charset_list;
5970     }
5971   else
5972     {
5973       charset_list = CODING_ATTR_CHARSET_LIST (attrs);
5974     }
5975   return charset_list;
5976 }
5977
5978
5979 /* Return raw-text or one of its subsidiaries that has the same
5980    eol_type as CODING-SYSTEM.  */
5981
5982 Lisp_Object
5983 raw_text_coding_system (Lisp_Object coding_system)
5984 {
5985   Lisp_Object spec, attrs;
5986   Lisp_Object eol_type, raw_text_eol_type;
5987
5988   if (NILP (coding_system))
5989     return Qraw_text;
5990   spec = CODING_SYSTEM_SPEC (coding_system);
5991   attrs = AREF (spec, 0);
5992
5993   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
5994     return coding_system;
5995
5996   eol_type = AREF (spec, 2);
5997   if (VECTORP (eol_type))
5998     return Qraw_text;
5999   spec = CODING_SYSTEM_SPEC (Qraw_text);
6000   raw_text_eol_type = AREF (spec, 2);
6001   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
6002           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
6003           : AREF (raw_text_eol_type, 2));
6004 }
6005
6006
6007 /* If CODING_SYSTEM doesn't specify end-of-line format, return one of
6008    the subsidiary that has the same eol-spec as PARENT (if it is not
6009    nil and specifies end-of-line format) or the system's setting
6010    (system_eol_type).  */
6011
6012 Lisp_Object
6013 coding_inherit_eol_type (Lisp_Object coding_system, Lisp_Object parent)
6014 {
6015   Lisp_Object spec, eol_type;
6016
6017   if (NILP (coding_system))
6018     coding_system = Qraw_text;
6019   spec = CODING_SYSTEM_SPEC (coding_system);
6020   eol_type = AREF (spec, 2);
6021   if (VECTORP (eol_type))
6022     {
6023       Lisp_Object parent_eol_type;
6024
6025       if (! NILP (parent))
6026         {
6027           Lisp_Object parent_spec;
6028
6029           parent_spec = CODING_SYSTEM_SPEC (parent);
6030           parent_eol_type = AREF (parent_spec, 2);
6031           if (VECTORP (parent_eol_type))
6032             parent_eol_type = system_eol_type;
6033         }
6034       else
6035         parent_eol_type = system_eol_type;
6036       if (EQ (parent_eol_type, Qunix))
6037         coding_system = AREF (eol_type, 0);
6038       else if (EQ (parent_eol_type, Qdos))
6039         coding_system = AREF (eol_type, 1);
6040       else if (EQ (parent_eol_type, Qmac))
6041         coding_system = AREF (eol_type, 2);
6042     }
6043   return coding_system;
6044 }
6045
6046
6047 /* Check if text-conversion and eol-conversion of CODING_SYSTEM are
6048    decided for writing to a process.  If not, complement them, and
6049    return a new coding system.  */
6050
6051 Lisp_Object
6052 complement_process_encoding_system (Lisp_Object coding_system)
6053 {
6054   Lisp_Object coding_base = Qnil, eol_base = Qnil;
6055   Lisp_Object spec, attrs;
6056   int i;
6057
6058   for (i = 0; i < 3; i++)
6059     {
6060       if (i == 1)
6061         coding_system = CDR_SAFE (Vdefault_process_coding_system);
6062       else if (i == 2)
6063         coding_system = preferred_coding_system ();
6064       spec = CODING_SYSTEM_SPEC (coding_system);
6065       if (NILP (spec))
6066         continue;
6067       attrs = AREF (spec, 0);
6068       if (NILP (coding_base) && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
6069         coding_base = CODING_ATTR_BASE_NAME (attrs);
6070       if (NILP (eol_base) && ! VECTORP (AREF (spec, 2)))
6071         eol_base = coding_system;
6072       if (! NILP (coding_base) && ! NILP (eol_base))
6073         break;
6074     }
6075
6076   if (i > 0)
6077     /* The original CODING_SYSTEM didn't specify text-conversion or
6078        eol-conversion.  Be sure that we return a fully complemented
6079        coding system.  */
6080     coding_system = coding_inherit_eol_type (coding_base, eol_base);
6081   return coding_system;
6082 }
6083
6084
6085 /* Emacs has a mechanism to automatically detect a coding system if it
6086    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
6087    it's impossible to distinguish some coding systems accurately
6088    because they use the same range of codes.  So, at first, coding
6089    systems are categorized into 7, those are:
6090
6091    o coding-category-emacs-mule
6092
6093         The category for a coding system which has the same code range
6094         as Emacs' internal format.  Assigned the coding-system (Lisp
6095         symbol) `emacs-mule' by default.
6096
6097    o coding-category-sjis
6098
6099         The category for a coding system which has the same code range
6100         as SJIS.  Assigned the coding-system (Lisp
6101         symbol) `japanese-shift-jis' by default.
6102
6103    o coding-category-iso-7
6104
6105         The category for a coding system which has the same code range
6106         as ISO2022 of 7-bit environment.  This doesn't use any locking
6107         shift and single shift functions.  This can encode/decode all
6108         charsets.  Assigned the coding-system (Lisp symbol)
6109         `iso-2022-7bit' by default.
6110
6111    o coding-category-iso-7-tight
6112
6113         Same as coding-category-iso-7 except that this can
6114         encode/decode only the specified charsets.
6115
6116    o coding-category-iso-8-1
6117
6118         The category for a coding system which has the same code range
6119         as ISO2022 of 8-bit environment and graphic plane 1 used only
6120         for DIMENSION1 charset.  This doesn't use any locking shift
6121         and single shift functions.  Assigned the coding-system (Lisp
6122         symbol) `iso-latin-1' by default.
6123
6124    o coding-category-iso-8-2
6125
6126         The category for a coding system which has the same code range
6127         as ISO2022 of 8-bit environment and graphic plane 1 used only
6128         for DIMENSION2 charset.  This doesn't use any locking shift
6129         and single shift functions.  Assigned the coding-system (Lisp
6130         symbol) `japanese-iso-8bit' by default.
6131
6132    o coding-category-iso-7-else
6133
6134         The category for a coding system which has the same code range
6135         as ISO2022 of 7-bit environment but uses locking shift or
6136         single shift functions.  Assigned the coding-system (Lisp
6137         symbol) `iso-2022-7bit-lock' by default.
6138
6139    o coding-category-iso-8-else
6140
6141         The category for a coding system which has the same code range
6142         as ISO2022 of 8-bit environment but uses locking shift or
6143         single shift functions.  Assigned the coding-system (Lisp
6144         symbol) `iso-2022-8bit-ss2' by default.
6145
6146    o coding-category-big5
6147
6148         The category for a coding system which has the same code range
6149         as BIG5.  Assigned the coding-system (Lisp symbol)
6150         `cn-big5' by default.
6151
6152    o coding-category-utf-8
6153
6154         The category for a coding system which has the same code range
6155         as UTF-8 (cf. RFC3629).  Assigned the coding-system (Lisp
6156         symbol) `utf-8' by default.
6157
6158    o coding-category-utf-16-be
6159
6160         The category for a coding system in which a text has an
6161         Unicode signature (cf. Unicode Standard) in the order of BIG
6162         endian at the head.  Assigned the coding-system (Lisp symbol)
6163         `utf-16-be' by default.
6164
6165    o coding-category-utf-16-le
6166
6167         The category for a coding system in which a text has an
6168         Unicode signature (cf. Unicode Standard) in the order of
6169         LITTLE endian at the head.  Assigned the coding-system (Lisp
6170         symbol) `utf-16-le' by default.
6171
6172    o coding-category-ccl
6173
6174         The category for a coding system of which encoder/decoder is
6175         written in CCL programs.  The default value is nil, i.e., no
6176         coding system is assigned.
6177
6178    o coding-category-binary
6179
6180         The category for a coding system not categorized in any of the
6181         above.  Assigned the coding-system (Lisp symbol)
6182         `no-conversion' by default.
6183
6184    Each of them is a Lisp symbol and the value is an actual
6185    `coding-system's (this is also a Lisp symbol) assigned by a user.
6186    What Emacs does actually is to detect a category of coding system.
6187    Then, it uses a `coding-system' assigned to it.  If Emacs can't
6188    decide only one possible category, it selects a category of the
6189    highest priority.  Priorities of categories are also specified by a
6190    user in a Lisp variable `coding-category-list'.
6191
6192 */
6193
6194 #define EOL_SEEN_NONE   0
6195 #define EOL_SEEN_LF     1
6196 #define EOL_SEEN_CR     2
6197 #define EOL_SEEN_CRLF   4
6198
6199 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
6200    SOURCE is encoded.  If CATEGORY is one of
6201    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
6202    two-byte, else they are encoded by one-byte.
6203
6204    Return one of EOL_SEEN_XXX.  */
6205
6206 #define MAX_EOL_CHECK_COUNT 3
6207
6208 static int
6209 detect_eol (const unsigned char *source, ptrdiff_t src_bytes,
6210             enum coding_category category)
6211 {
6212   const unsigned char *src = source, *src_end = src + src_bytes;
6213   unsigned char c;
6214   int total  = 0;
6215   int eol_seen = EOL_SEEN_NONE;
6216
6217   if ((1 << category) & CATEGORY_MASK_UTF_16)
6218     {
6219       int msb, lsb;
6220
6221       msb = category == (coding_category_utf_16_le
6222                          | coding_category_utf_16_le_nosig);
6223       lsb = 1 - msb;
6224
6225       while (src + 1 < src_end)
6226         {
6227           c = src[lsb];
6228           if (src[msb] == 0 && (c == '\n' || c == '\r'))
6229             {
6230               int this_eol;
6231
6232               if (c == '\n')
6233                 this_eol = EOL_SEEN_LF;
6234               else if (src + 3 >= src_end
6235                        || src[msb + 2] != 0
6236                        || src[lsb + 2] != '\n')
6237                 this_eol = EOL_SEEN_CR;
6238               else
6239                 {
6240                   this_eol = EOL_SEEN_CRLF;
6241                   src += 2;
6242                 }
6243
6244               if (eol_seen == EOL_SEEN_NONE)
6245                 /* This is the first end-of-line.  */
6246                 eol_seen = this_eol;
6247               else if (eol_seen != this_eol)
6248                 {
6249                   /* The found type is different from what found before.
6250                      Allow for stray ^M characters in DOS EOL files.  */
6251                   if ((eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF)
6252                       || (eol_seen == EOL_SEEN_CRLF
6253                           && this_eol == EOL_SEEN_CR))
6254                     eol_seen = EOL_SEEN_CRLF;
6255                   else
6256                     {
6257                       eol_seen = EOL_SEEN_LF;
6258                       break;
6259                     }
6260                 }
6261               if (++total == MAX_EOL_CHECK_COUNT)
6262                 break;
6263             }
6264           src += 2;
6265         }
6266     }
6267   else
6268     while (src < src_end)
6269       {
6270         c = *src++;
6271         if (c == '\n' || c == '\r')
6272           {
6273             int this_eol;
6274
6275             if (c == '\n')
6276               this_eol = EOL_SEEN_LF;
6277             else if (src >= src_end || *src != '\n')
6278               this_eol = EOL_SEEN_CR;
6279             else
6280               this_eol = EOL_SEEN_CRLF, src++;
6281
6282             if (eol_seen == EOL_SEEN_NONE)
6283               /* This is the first end-of-line.  */
6284               eol_seen = this_eol;
6285             else if (eol_seen != this_eol)
6286               {
6287                 /* The found type is different from what found before.
6288                    Allow for stray ^M characters in DOS EOL files.  */
6289                 if ((eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF)
6290                     || (eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR))
6291                   eol_seen = EOL_SEEN_CRLF;
6292                 else
6293                   {
6294                     eol_seen = EOL_SEEN_LF;
6295                     break;
6296                   }
6297               }
6298             if (++total == MAX_EOL_CHECK_COUNT)
6299               break;
6300           }
6301       }
6302   return eol_seen;
6303 }
6304
6305
6306 static Lisp_Object
6307 adjust_coding_eol_type (struct coding_system *coding, int eol_seen)
6308 {
6309   Lisp_Object eol_type;
6310
6311   eol_type = CODING_ID_EOL_TYPE (coding->id);
6312   if (eol_seen & EOL_SEEN_LF)
6313     {
6314       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
6315       eol_type = Qunix;
6316     }
6317   else if (eol_seen & EOL_SEEN_CRLF)
6318     {
6319       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
6320       eol_type = Qdos;
6321     }
6322   else if (eol_seen & EOL_SEEN_CR)
6323     {
6324       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
6325       eol_type = Qmac;
6326     }
6327   return eol_type;
6328 }
6329
6330 /* Detect how a text specified in CODING is encoded.  If a coding
6331    system is detected, update fields of CODING by the detected coding
6332    system.  */
6333
6334 static void
6335 detect_coding (struct coding_system *coding)
6336 {
6337   const unsigned char *src, *src_end;
6338   int saved_mode = coding->mode;
6339
6340   coding->consumed = coding->consumed_char = 0;
6341   coding->produced = coding->produced_char = 0;
6342   coding_set_source (coding);
6343
6344   src_end = coding->source + coding->src_bytes;
6345   coding->head_ascii = 0;
6346
6347   /* If we have not yet decided the text encoding type, detect it
6348      now.  */
6349   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
6350     {
6351       int c, i;
6352       struct coding_detection_info detect_info;
6353       int null_byte_found = 0, eight_bit_found = 0;
6354
6355       detect_info.checked = detect_info.found = detect_info.rejected = 0;
6356       for (src = coding->source; src < src_end; src++)
6357         {
6358           c = *src;
6359           if (c & 0x80)
6360             {
6361               eight_bit_found = 1;
6362               if (null_byte_found)
6363                 break;
6364             }
6365           else if (c < 0x20)
6366             {
6367               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
6368                   && ! inhibit_iso_escape_detection
6369                   && ! detect_info.checked)
6370                 {
6371                   if (detect_coding_iso_2022 (coding, &detect_info))
6372                     {
6373                       /* We have scanned the whole data.  */
6374                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
6375                         {
6376                           /* We didn't find an 8-bit code.  We may
6377                              have found a null-byte, but it's very
6378                              rare that a binary file conforms to
6379                              ISO-2022.  */
6380                           src = src_end;
6381                           coding->head_ascii = src - coding->source;
6382                         }
6383                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
6384                       break;
6385                     }
6386                 }
6387               else if (! c && !inhibit_null_byte_detection)
6388                 {
6389                   null_byte_found = 1;
6390                   if (eight_bit_found)
6391                     break;
6392                 }
6393               if (! eight_bit_found)
6394                 coding->head_ascii++;
6395             }
6396           else if (! eight_bit_found)
6397             coding->head_ascii++;
6398         }
6399
6400       if (null_byte_found || eight_bit_found
6401           || coding->head_ascii < coding->src_bytes
6402           || detect_info.found)
6403         {
6404           enum coding_category category;
6405           struct coding_system *this;
6406
6407           if (coding->head_ascii == coding->src_bytes)
6408             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
6409             for (i = 0; i < coding_category_raw_text; i++)
6410               {
6411                 category = coding_priorities[i];
6412                 this = coding_categories + category;
6413                 if (detect_info.found & (1 << category))
6414                   break;
6415               }
6416           else
6417             {
6418               if (null_byte_found)
6419                 {
6420                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
6421                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
6422                 }
6423               for (i = 0; i < coding_category_raw_text; i++)
6424                 {
6425                   category = coding_priorities[i];
6426                   this = coding_categories + category;
6427                   if (this->id < 0)
6428                     {
6429                       /* No coding system of this category is defined.  */
6430                       detect_info.rejected |= (1 << category);
6431                     }
6432                   else if (category >= coding_category_raw_text)
6433                     continue;
6434                   else if (detect_info.checked & (1 << category))
6435                     {
6436                       if (detect_info.found & (1 << category))
6437                         break;
6438                     }
6439                   else if ((*(this->detector)) (coding, &detect_info)
6440                            && detect_info.found & (1 << category))
6441                     {
6442                       if (category == coding_category_utf_16_auto)
6443                         {
6444                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6445                             category = coding_category_utf_16_le;
6446                           else
6447                             category = coding_category_utf_16_be;
6448                         }
6449                       break;
6450                     }
6451                 }
6452             }
6453
6454           if (i < coding_category_raw_text)
6455             setup_coding_system (CODING_ID_NAME (this->id), coding);
6456           else if (null_byte_found)
6457             setup_coding_system (Qno_conversion, coding);
6458           else if ((detect_info.rejected & CATEGORY_MASK_ANY)
6459                    == CATEGORY_MASK_ANY)
6460             setup_coding_system (Qraw_text, coding);
6461           else if (detect_info.rejected)
6462             for (i = 0; i < coding_category_raw_text; i++)
6463               if (! (detect_info.rejected & (1 << coding_priorities[i])))
6464                 {
6465                   this = coding_categories + coding_priorities[i];
6466                   setup_coding_system (CODING_ID_NAME (this->id), coding);
6467                   break;
6468                 }
6469         }
6470     }
6471   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6472            == coding_category_utf_8_auto)
6473     {
6474       Lisp_Object coding_systems;
6475       struct coding_detection_info detect_info;
6476
6477       coding_systems
6478         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6479       detect_info.found = detect_info.rejected = 0;
6480       coding->head_ascii = 0;
6481       if (CONSP (coding_systems)
6482           && detect_coding_utf_8 (coding, &detect_info))
6483         {
6484           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
6485             setup_coding_system (XCAR (coding_systems), coding);
6486           else
6487             setup_coding_system (XCDR (coding_systems), coding);
6488         }
6489     }
6490   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6491            == coding_category_utf_16_auto)
6492     {
6493       Lisp_Object coding_systems;
6494       struct coding_detection_info detect_info;
6495
6496       coding_systems
6497         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6498       detect_info.found = detect_info.rejected = 0;
6499       coding->head_ascii = 0;
6500       if (CONSP (coding_systems)
6501           && detect_coding_utf_16 (coding, &detect_info))
6502         {
6503           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6504             setup_coding_system (XCAR (coding_systems), coding);
6505           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
6506             setup_coding_system (XCDR (coding_systems), coding);
6507         }
6508     }
6509   coding->mode = saved_mode;
6510 }
6511
6512
6513 static void
6514 decode_eol (struct coding_system *coding)
6515 {
6516   Lisp_Object eol_type;
6517   unsigned char *p, *pbeg, *pend;
6518
6519   eol_type = CODING_ID_EOL_TYPE (coding->id);
6520   if (EQ (eol_type, Qunix) || inhibit_eol_conversion)
6521     return;
6522
6523   if (NILP (coding->dst_object))
6524     pbeg = coding->destination;
6525   else
6526     pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
6527   pend = pbeg + coding->produced;
6528
6529   if (VECTORP (eol_type))
6530     {
6531       int eol_seen = EOL_SEEN_NONE;
6532
6533       for (p = pbeg; p < pend; p++)
6534         {
6535           if (*p == '\n')
6536             eol_seen |= EOL_SEEN_LF;
6537           else if (*p == '\r')
6538             {
6539               if (p + 1 < pend && *(p + 1) == '\n')
6540                 {
6541                   eol_seen |= EOL_SEEN_CRLF;
6542                   p++;
6543                 }
6544               else
6545                 eol_seen |= EOL_SEEN_CR;
6546             }
6547         }
6548       /* Handle DOS-style EOLs in a file with stray ^M characters.  */
6549       if ((eol_seen & EOL_SEEN_CRLF) != 0
6550           && (eol_seen & EOL_SEEN_CR) != 0
6551           && (eol_seen & EOL_SEEN_LF) == 0)
6552         eol_seen = EOL_SEEN_CRLF;
6553       else if (eol_seen != EOL_SEEN_NONE
6554           && eol_seen != EOL_SEEN_LF
6555           && eol_seen != EOL_SEEN_CRLF
6556           && eol_seen != EOL_SEEN_CR)
6557         eol_seen = EOL_SEEN_LF;
6558       if (eol_seen != EOL_SEEN_NONE)
6559         eol_type = adjust_coding_eol_type (coding, eol_seen);
6560     }
6561
6562   if (EQ (eol_type, Qmac))
6563     {
6564       for (p = pbeg; p < pend; p++)
6565         if (*p == '\r')
6566           *p = '\n';
6567     }
6568   else if (EQ (eol_type, Qdos))
6569     {
6570       ptrdiff_t n = 0;
6571
6572       if (NILP (coding->dst_object))
6573         {
6574           /* Start deleting '\r' from the tail to minimize the memory
6575              movement.  */
6576           for (p = pend - 2; p >= pbeg; p--)
6577             if (*p == '\r')
6578               {
6579                 memmove (p, p + 1, pend-- - p - 1);
6580                 n++;
6581               }
6582         }
6583       else
6584         {
6585           ptrdiff_t pos_byte = coding->dst_pos_byte;
6586           ptrdiff_t pos = coding->dst_pos;
6587           ptrdiff_t pos_end = pos + coding->produced_char - 1;
6588
6589           while (pos < pos_end)
6590             {
6591               p = BYTE_POS_ADDR (pos_byte);
6592               if (*p == '\r' && p[1] == '\n')
6593                 {
6594                   del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
6595                   n++;
6596                   pos_end--;
6597                 }
6598               pos++;
6599               if (coding->dst_multibyte)
6600                 pos_byte += BYTES_BY_CHAR_HEAD (*p);
6601               else
6602                 pos_byte++;
6603             }
6604         }
6605       coding->produced -= n;
6606       coding->produced_char -= n;
6607     }
6608 }
6609
6610
6611 /* Return a translation table (or list of them) from coding system
6612    attribute vector ATTRS for encoding (ENCODEP is nonzero) or
6613    decoding (ENCODEP is zero). */
6614
6615 static Lisp_Object
6616 get_translation_table (Lisp_Object attrs, int encodep, int *max_lookup)
6617 {
6618   Lisp_Object standard, translation_table;
6619   Lisp_Object val;
6620
6621   if (NILP (Venable_character_translation))
6622     {
6623       if (max_lookup)
6624         *max_lookup = 0;
6625       return Qnil;
6626     }
6627   if (encodep)
6628     translation_table = CODING_ATTR_ENCODE_TBL (attrs),
6629       standard = Vstandard_translation_table_for_encode;
6630   else
6631     translation_table = CODING_ATTR_DECODE_TBL (attrs),
6632       standard = Vstandard_translation_table_for_decode;
6633   if (NILP (translation_table))
6634     translation_table = standard;
6635   else
6636     {
6637       if (SYMBOLP (translation_table))
6638         translation_table = Fget (translation_table, Qtranslation_table);
6639       else if (CONSP (translation_table))
6640         {
6641           translation_table = Fcopy_sequence (translation_table);
6642           for (val = translation_table; CONSP (val); val = XCDR (val))
6643             if (SYMBOLP (XCAR (val)))
6644               XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
6645         }
6646       if (CHAR_TABLE_P (standard))
6647         {
6648           if (CONSP (translation_table))
6649             translation_table = nconc2 (translation_table,
6650                                         Fcons (standard, Qnil));
6651           else
6652             translation_table = Fcons (translation_table,
6653                                        Fcons (standard, Qnil));
6654         }
6655     }
6656
6657   if (max_lookup)
6658     {
6659       *max_lookup = 1;
6660       if (CHAR_TABLE_P (translation_table)
6661           && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
6662         {
6663           val = XCHAR_TABLE (translation_table)->extras[1];
6664           if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6665             *max_lookup = XFASTINT (val);
6666         }
6667       else if (CONSP (translation_table))
6668         {
6669           Lisp_Object tail;
6670
6671           for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
6672             if (CHAR_TABLE_P (XCAR (tail))
6673                 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
6674               {
6675                 Lisp_Object tailval = XCHAR_TABLE (XCAR (tail))->extras[1];
6676                 if (NATNUMP (tailval) && *max_lookup < XFASTINT (tailval))
6677                   *max_lookup = XFASTINT (tailval);
6678               }
6679         }
6680     }
6681   return translation_table;
6682 }
6683
6684 #define LOOKUP_TRANSLATION_TABLE(table, c, trans)               \
6685   do {                                                          \
6686     trans = Qnil;                                               \
6687     if (CHAR_TABLE_P (table))                                   \
6688       {                                                         \
6689         trans = CHAR_TABLE_REF (table, c);                      \
6690         if (CHARACTERP (trans))                                 \
6691           c = XFASTINT (trans), trans = Qnil;                   \
6692       }                                                         \
6693     else if (CONSP (table))                                     \
6694       {                                                         \
6695         Lisp_Object tail;                                       \
6696                                                                 \
6697         for (tail = table; CONSP (tail); tail = XCDR (tail))    \
6698           if (CHAR_TABLE_P (XCAR (tail)))                       \
6699             {                                                   \
6700               trans = CHAR_TABLE_REF (XCAR (tail), c);          \
6701               if (CHARACTERP (trans))                           \
6702                 c = XFASTINT (trans), trans = Qnil;             \
6703               else if (! NILP (trans))                          \
6704                 break;                                          \
6705             }                                                   \
6706       }                                                         \
6707   } while (0)
6708
6709
6710 /* Return a translation of character(s) at BUF according to TRANS.
6711    TRANS is TO-CHAR or ((FROM .  TO) ...) where
6712    FROM = [FROM-CHAR ...], TO is TO-CHAR or [TO-CHAR ...].
6713    The return value is TO-CHAR or ([FROM-CHAR ...] . TO) if a
6714    translation is found, and Qnil if not found..
6715    If BUF is too short to lookup characters in FROM, return Qt.  */
6716
6717 static Lisp_Object
6718 get_translation (Lisp_Object trans, int *buf, int *buf_end)
6719 {
6720
6721   if (INTEGERP (trans))
6722     return trans;
6723   for (; CONSP (trans); trans = XCDR (trans))
6724     {
6725       Lisp_Object val = XCAR (trans);
6726       Lisp_Object from = XCAR (val);
6727       ptrdiff_t len = ASIZE (from);
6728       ptrdiff_t i;
6729
6730       for (i = 0; i < len; i++)
6731         {
6732           if (buf + i == buf_end)
6733             return Qt;
6734           if (XINT (AREF (from, i)) != buf[i])
6735             break;
6736         }
6737       if (i == len)
6738         return val;
6739     }
6740   return Qnil;
6741 }
6742
6743
6744 static int
6745 produce_chars (struct coding_system *coding, Lisp_Object translation_table,
6746                int last_block)
6747 {
6748   unsigned char *dst = coding->destination + coding->produced;
6749   unsigned char *dst_end = coding->destination + coding->dst_bytes;
6750   ptrdiff_t produced;
6751   ptrdiff_t produced_chars = 0;
6752   int carryover = 0;
6753
6754   if (! coding->chars_at_source)
6755     {
6756       /* Source characters are in coding->charbuf.  */
6757       int *buf = coding->charbuf;
6758       int *buf_end = buf + coding->charbuf_used;
6759
6760       if (EQ (coding->src_object, coding->dst_object))
6761         {
6762           coding_set_source (coding);
6763           dst_end = ((unsigned char *) coding->source) + coding->consumed;
6764         }
6765
6766       while (buf < buf_end)
6767         {
6768           int c = *buf;
6769           ptrdiff_t i;
6770
6771           if (c >= 0)
6772             {
6773               ptrdiff_t from_nchars = 1, to_nchars = 1;
6774               Lisp_Object trans = Qnil;
6775
6776               LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6777               if (! NILP (trans))
6778                 {
6779                   trans = get_translation (trans, buf, buf_end);
6780                   if (INTEGERP (trans))
6781                     c = XINT (trans);
6782                   else if (CONSP (trans))
6783                     {
6784                       from_nchars = ASIZE (XCAR (trans));
6785                       trans = XCDR (trans);
6786                       if (INTEGERP (trans))
6787                         c = XINT (trans);
6788                       else
6789                         {
6790                           to_nchars = ASIZE (trans);
6791                           c = XINT (AREF (trans, 0));
6792                         }
6793                     }
6794                   else if (EQ (trans, Qt) && ! last_block)
6795                     break;
6796                 }
6797
6798               if ((dst_end - dst) / MAX_MULTIBYTE_LENGTH < to_nchars)
6799                 {
6800                   if (((min (PTRDIFF_MAX, SIZE_MAX) - (buf_end - buf))
6801                        / MAX_MULTIBYTE_LENGTH)
6802                       < to_nchars)
6803                     memory_full (SIZE_MAX);
6804                   dst = alloc_destination (coding,
6805                                            buf_end - buf
6806                                            + MAX_MULTIBYTE_LENGTH * to_nchars,
6807                                            dst);
6808                   if (EQ (coding->src_object, coding->dst_object))
6809                     {
6810                       coding_set_source (coding);
6811                       dst_end = (((unsigned char *) coding->source)
6812                                  + coding->consumed);
6813                     }
6814                   else
6815                     dst_end = coding->destination + coding->dst_bytes;
6816                 }
6817
6818               for (i = 0; i < to_nchars; i++)
6819                 {
6820                   if (i > 0)
6821                     c = XINT (AREF (trans, i));
6822                   if (coding->dst_multibyte
6823                       || ! CHAR_BYTE8_P (c))
6824                     CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
6825                   else
6826                     *dst++ = CHAR_TO_BYTE8 (c);
6827                 }
6828               produced_chars += to_nchars;
6829               buf += from_nchars;
6830             }
6831           else
6832             /* This is an annotation datum.  (-C) is the length.  */
6833             buf += -c;
6834         }
6835       carryover = buf_end - buf;
6836     }
6837   else
6838     {
6839       /* Source characters are at coding->source.  */
6840       const unsigned char *src = coding->source;
6841       const unsigned char *src_end = src + coding->consumed;
6842
6843       if (EQ (coding->dst_object, coding->src_object))
6844         dst_end = (unsigned char *) src;
6845       if (coding->src_multibyte != coding->dst_multibyte)
6846         {
6847           if (coding->src_multibyte)
6848             {
6849               int multibytep = 1;
6850               ptrdiff_t consumed_chars = 0;
6851
6852               while (1)
6853                 {
6854                   const unsigned char *src_base = src;
6855                   int c;
6856
6857                   ONE_MORE_BYTE (c);
6858                   if (dst == dst_end)
6859                     {
6860                       if (EQ (coding->src_object, coding->dst_object))
6861                         dst_end = (unsigned char *) src;
6862                       if (dst == dst_end)
6863                         {
6864                           ptrdiff_t offset = src - coding->source;
6865
6866                           dst = alloc_destination (coding, src_end - src + 1,
6867                                                    dst);
6868                           dst_end = coding->destination + coding->dst_bytes;
6869                           coding_set_source (coding);
6870                           src = coding->source + offset;
6871                           src_end = coding->source + coding->consumed;
6872                           if (EQ (coding->src_object, coding->dst_object))
6873                             dst_end = (unsigned char *) src;
6874                         }
6875                     }
6876                   *dst++ = c;
6877                   produced_chars++;
6878                 }
6879             no_more_source:
6880               ;
6881             }
6882           else
6883             while (src < src_end)
6884               {
6885                 int multibytep = 1;
6886                 int c = *src++;
6887
6888                 if (dst >= dst_end - 1)
6889                   {
6890                     if (EQ (coding->src_object, coding->dst_object))
6891                       dst_end = (unsigned char *) src;
6892                     if (dst >= dst_end - 1)
6893                       {
6894                         ptrdiff_t offset = src - coding->source;
6895                         ptrdiff_t more_bytes;
6896
6897                         if (EQ (coding->src_object, coding->dst_object))
6898                           more_bytes = ((src_end - src) / 2) + 2;
6899                         else
6900                           more_bytes = src_end - src + 2;
6901                         dst = alloc_destination (coding, more_bytes, dst);
6902                         dst_end = coding->destination + coding->dst_bytes;
6903                         coding_set_source (coding);
6904                         src = coding->source + offset;
6905                         src_end = coding->source + coding->consumed;
6906                         if (EQ (coding->src_object, coding->dst_object))
6907                           dst_end = (unsigned char *) src;
6908                       }
6909                   }
6910                 EMIT_ONE_BYTE (c);
6911               }
6912         }
6913       else
6914         {
6915           if (!EQ (coding->src_object, coding->dst_object))
6916             {
6917               ptrdiff_t require = coding->src_bytes - coding->dst_bytes;
6918
6919               if (require > 0)
6920                 {
6921                   ptrdiff_t offset = src - coding->source;
6922
6923                   dst = alloc_destination (coding, require, dst);
6924                   coding_set_source (coding);
6925                   src = coding->source + offset;
6926                   src_end = coding->source + coding->consumed;
6927                 }
6928             }
6929           produced_chars = coding->consumed_char;
6930           while (src < src_end)
6931             *dst++ = *src++;
6932         }
6933     }
6934
6935   produced = dst - (coding->destination + coding->produced);
6936   if (BUFFERP (coding->dst_object) && produced_chars > 0)
6937     insert_from_gap (produced_chars, produced);
6938   coding->produced += produced;
6939   coding->produced_char += produced_chars;
6940   return carryover;
6941 }
6942
6943 /* Compose text in CODING->object according to the annotation data at
6944    CHARBUF.  CHARBUF is an array:
6945      [ -LENGTH ANNOTATION_MASK NCHARS NBYTES METHOD [ COMPONENTS... ] ]
6946  */
6947
6948 static inline void
6949 produce_composition (struct coding_system *coding, int *charbuf, ptrdiff_t pos)
6950 {
6951   int len;
6952   ptrdiff_t to;
6953   enum composition_method method;
6954   Lisp_Object components;
6955
6956   len = -charbuf[0] - MAX_ANNOTATION_LENGTH;
6957   to = pos + charbuf[2];
6958   method = (enum composition_method) (charbuf[4]);
6959
6960   if (method == COMPOSITION_RELATIVE)
6961     components = Qnil;
6962   else
6963     {
6964       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
6965       int i, j;
6966
6967       if (method == COMPOSITION_WITH_RULE)
6968         len = charbuf[2] * 3 - 2;
6969       charbuf += MAX_ANNOTATION_LENGTH;
6970       /* charbuf = [ CHRA ... CHAR] or [ CHAR -2 RULE ... CHAR ] */
6971       for (i = j = 0; i < len && charbuf[i] != -1; i++, j++)
6972         {
6973           if (charbuf[i] >= 0)
6974             args[j] = make_number (charbuf[i]);
6975           else
6976             {
6977               i++;
6978               args[j] = make_number (charbuf[i] % 0x100);
6979             }
6980         }
6981       components = (i == j ? Fstring (j, args) : Fvector (j, args));
6982     }
6983   compose_text (pos, to, components, Qnil, coding->dst_object);
6984 }
6985
6986
6987 /* Put `charset' property on text in CODING->object according to
6988    the annotation data at CHARBUF.  CHARBUF is an array:
6989      [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
6990  */
6991
6992 static inline void
6993 produce_charset (struct coding_system *coding, int *charbuf, ptrdiff_t pos)
6994 {
6995   ptrdiff_t from = pos - charbuf[2];
6996   struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
6997
6998   Fput_text_property (make_number (from), make_number (pos),
6999                       Qcharset, CHARSET_NAME (charset),
7000                       coding->dst_object);
7001 }
7002
7003
7004 #define CHARBUF_SIZE 0x4000
7005
7006 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
7007   do {                                                                  \
7008     int size = CHARBUF_SIZE;                                            \
7009                                                                         \
7010     coding->charbuf = NULL;                                             \
7011     while (size > 1024)                                                 \
7012       {                                                                 \
7013         coding->charbuf = alloca (sizeof (int) * size);                 \
7014         if (coding->charbuf)                                            \
7015           break;                                                        \
7016         size >>= 1;                                                     \
7017       }                                                                 \
7018     if (! coding->charbuf)                                              \
7019       {                                                                 \
7020         record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
7021         return coding->result;                                          \
7022       }                                                                 \
7023     coding->charbuf_size = size;                                        \
7024   } while (0)
7025
7026
7027 static void
7028 produce_annotation (struct coding_system *coding, ptrdiff_t pos)
7029 {
7030   int *charbuf = coding->charbuf;
7031   int *charbuf_end = charbuf + coding->charbuf_used;
7032
7033   if (NILP (coding->dst_object))
7034     return;
7035
7036   while (charbuf < charbuf_end)
7037     {
7038       if (*charbuf >= 0)
7039         pos++, charbuf++;
7040       else
7041         {
7042           int len = -*charbuf;
7043
7044           if (len > 2)
7045             switch (charbuf[1])
7046               {
7047               case CODING_ANNOTATE_COMPOSITION_MASK:
7048                 produce_composition (coding, charbuf, pos);
7049                 break;
7050               case CODING_ANNOTATE_CHARSET_MASK:
7051                 produce_charset (coding, charbuf, pos);
7052                 break;
7053               }
7054           charbuf += len;
7055         }
7056     }
7057 }
7058
7059 /* Decode the data at CODING->src_object into CODING->dst_object.
7060    CODING->src_object is a buffer, a string, or nil.
7061    CODING->dst_object is a buffer.
7062
7063    If CODING->src_object is a buffer, it must be the current buffer.
7064    In this case, if CODING->src_pos is positive, it is a position of
7065    the source text in the buffer, otherwise, the source text is in the
7066    gap area of the buffer, and CODING->src_pos specifies the offset of
7067    the text from GPT (which must be the same as PT).  If this is the
7068    same buffer as CODING->dst_object, CODING->src_pos must be
7069    negative.
7070
7071    If CODING->src_object is a string, CODING->src_pos is an index to
7072    that string.
7073
7074    If CODING->src_object is nil, CODING->source must already point to
7075    the non-relocatable memory area.  In this case, CODING->src_pos is
7076    an offset from CODING->source.
7077
7078    The decoded data is inserted at the current point of the buffer
7079    CODING->dst_object.
7080 */
7081
7082 static int
7083 decode_coding (struct coding_system *coding)
7084 {
7085   Lisp_Object attrs;
7086   Lisp_Object undo_list;
7087   Lisp_Object translation_table;
7088   struct ccl_spec cclspec;
7089   int carryover;
7090   int i;
7091
7092   if (BUFFERP (coding->src_object)
7093       && coding->src_pos > 0
7094       && coding->src_pos < GPT
7095       && coding->src_pos + coding->src_chars > GPT)
7096     move_gap_both (coding->src_pos, coding->src_pos_byte);
7097
7098   undo_list = Qt;
7099   if (BUFFERP (coding->dst_object))
7100     {
7101       if (current_buffer != XBUFFER (coding->dst_object))
7102         set_buffer_internal (XBUFFER (coding->dst_object));
7103       if (GPT != PT)
7104         move_gap_both (PT, PT_BYTE);
7105       undo_list = BVAR (current_buffer, undo_list);
7106       BVAR (current_buffer, undo_list) = Qt;
7107     }
7108
7109   coding->consumed = coding->consumed_char = 0;
7110   coding->produced = coding->produced_char = 0;
7111   coding->chars_at_source = 0;
7112   record_conversion_result (coding, CODING_RESULT_SUCCESS);
7113   coding->errors = 0;
7114
7115   ALLOC_CONVERSION_WORK_AREA (coding);
7116
7117   attrs = CODING_ID_ATTRS (coding->id);
7118   translation_table = get_translation_table (attrs, 0, NULL);
7119
7120   carryover = 0;
7121   if (coding->decoder == decode_coding_ccl)
7122     {
7123       coding->spec.ccl = &cclspec;
7124       setup_ccl_program (&cclspec.ccl, CODING_CCL_DECODER (coding));
7125     }
7126   do
7127     {
7128       ptrdiff_t pos = coding->dst_pos + coding->produced_char;
7129
7130       coding_set_source (coding);
7131       coding->annotated = 0;
7132       coding->charbuf_used = carryover;
7133       (*(coding->decoder)) (coding);
7134       coding_set_destination (coding);
7135       carryover = produce_chars (coding, translation_table, 0);
7136       if (coding->annotated)
7137         produce_annotation (coding, pos);
7138       for (i = 0; i < carryover; i++)
7139         coding->charbuf[i]
7140           = coding->charbuf[coding->charbuf_used - carryover + i];
7141     }
7142   while (coding->result == CODING_RESULT_INSUFFICIENT_DST
7143          || (coding->consumed < coding->src_bytes
7144              && (coding->result == CODING_RESULT_SUCCESS
7145                  || coding->result == CODING_RESULT_INVALID_SRC)));
7146
7147   if (carryover > 0)
7148     {
7149       coding_set_destination (coding);
7150       coding->charbuf_used = carryover;
7151       produce_chars (coding, translation_table, 1);
7152     }
7153
7154   coding->carryover_bytes = 0;
7155   if (coding->consumed < coding->src_bytes)
7156     {
7157       int nbytes = coding->src_bytes - coding->consumed;
7158       const unsigned char *src;
7159
7160       coding_set_source (coding);
7161       coding_set_destination (coding);
7162       src = coding->source + coding->consumed;
7163
7164       if (coding->mode & CODING_MODE_LAST_BLOCK)
7165         {
7166           /* Flush out unprocessed data as binary chars.  We are sure
7167              that the number of data is less than the size of
7168              coding->charbuf.  */
7169           coding->charbuf_used = 0;
7170           coding->chars_at_source = 0;
7171
7172           while (nbytes-- > 0)
7173             {
7174               int c = *src++;
7175
7176               if (c & 0x80)
7177                 c = BYTE8_TO_CHAR (c);
7178               coding->charbuf[coding->charbuf_used++] = c;
7179             }
7180           produce_chars (coding, Qnil, 1);
7181         }
7182       else
7183         {
7184           /* Record unprocessed bytes in coding->carryover.  We are
7185              sure that the number of data is less than the size of
7186              coding->carryover.  */
7187           unsigned char *p = coding->carryover;
7188
7189           if (nbytes > sizeof coding->carryover)
7190             nbytes = sizeof coding->carryover;
7191           coding->carryover_bytes = nbytes;
7192           while (nbytes-- > 0)
7193             *p++ = *src++;
7194         }
7195       coding->consumed = coding->src_bytes;
7196     }
7197
7198   if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix)
7199       && !inhibit_eol_conversion)
7200     decode_eol (coding);
7201   if (BUFFERP (coding->dst_object))
7202     {
7203       BVAR (current_buffer, undo_list) = undo_list;
7204       record_insert (coding->dst_pos, coding->produced_char);
7205     }
7206   return coding->result;
7207 }
7208
7209
7210 /* Extract an annotation datum from a composition starting at POS and
7211    ending before LIMIT of CODING->src_object (buffer or string), store
7212    the data in BUF, set *STOP to a starting position of the next
7213    composition (if any) or to LIMIT, and return the address of the
7214    next element of BUF.
7215
7216    If such an annotation is not found, set *STOP to a starting
7217    position of a composition after POS (if any) or to LIMIT, and
7218    return BUF.  */
7219
7220 static inline int *
7221 handle_composition_annotation (ptrdiff_t pos, ptrdiff_t limit,
7222                                struct coding_system *coding, int *buf,
7223                                ptrdiff_t *stop)
7224 {
7225   ptrdiff_t start, end;
7226   Lisp_Object prop;
7227
7228   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
7229       || end > limit)
7230     *stop = limit;
7231   else if (start > pos)
7232     *stop = start;
7233   else
7234     {
7235       if (start == pos)
7236         {
7237           /* We found a composition.  Store the corresponding
7238              annotation data in BUF.  */
7239           int *head = buf;
7240           enum composition_method method = COMPOSITION_METHOD (prop);
7241           int nchars = COMPOSITION_LENGTH (prop);
7242
7243           ADD_COMPOSITION_DATA (buf, nchars, 0, method);
7244           if (method != COMPOSITION_RELATIVE)
7245             {
7246               Lisp_Object components;
7247               ptrdiff_t i, len, i_byte;
7248
7249               components = COMPOSITION_COMPONENTS (prop);
7250               if (VECTORP (components))
7251                 {
7252                   len = ASIZE (components);
7253                   for (i = 0; i < len; i++)
7254                     *buf++ = XINT (AREF (components, i));
7255                 }
7256               else if (STRINGP (components))
7257                 {
7258                   len = SCHARS (components);
7259                   i = i_byte = 0;
7260                   while (i < len)
7261                     {
7262                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
7263                       buf++;
7264                     }
7265                 }
7266               else if (INTEGERP (components))
7267                 {
7268                   len = 1;
7269                   *buf++ = XINT (components);
7270                 }
7271               else if (CONSP (components))
7272                 {
7273                   for (len = 0; CONSP (components);
7274                        len++, components = XCDR (components))
7275                     *buf++ = XINT (XCAR (components));
7276                 }
7277               else
7278                 abort ();
7279               *head -= len;
7280             }
7281         }
7282
7283       if (find_composition (end, limit, &start, &end, &prop,
7284                             coding->src_object)
7285           && end <= limit)
7286         *stop = start;
7287       else
7288         *stop = limit;
7289     }
7290   return buf;
7291 }
7292
7293
7294 /* Extract an annotation datum from a text property `charset' at POS of
7295    CODING->src_object (buffer of string), store the data in BUF, set
7296    *STOP to the position where the value of `charset' property changes
7297    (limiting by LIMIT), and return the address of the next element of
7298    BUF.
7299
7300    If the property value is nil, set *STOP to the position where the
7301    property value is non-nil (limiting by LIMIT), and return BUF.  */
7302
7303 static inline int *
7304 handle_charset_annotation (ptrdiff_t pos, ptrdiff_t limit,
7305                            struct coding_system *coding, int *buf,
7306                            ptrdiff_t *stop)
7307 {
7308   Lisp_Object val, next;
7309   int id;
7310
7311   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
7312   if (! NILP (val) && CHARSETP (val))
7313     id = XINT (CHARSET_SYMBOL_ID (val));
7314   else
7315     id = -1;
7316   ADD_CHARSET_DATA (buf, 0, id);
7317   next = Fnext_single_property_change (make_number (pos), Qcharset,
7318                                        coding->src_object,
7319                                        make_number (limit));
7320   *stop = XINT (next);
7321   return buf;
7322 }
7323
7324
7325 static void
7326 consume_chars (struct coding_system *coding, Lisp_Object translation_table,
7327                int max_lookup)
7328 {
7329   int *buf = coding->charbuf;
7330   int *buf_end = coding->charbuf + coding->charbuf_size;
7331   const unsigned char *src = coding->source + coding->consumed;
7332   const unsigned char *src_end = coding->source + coding->src_bytes;
7333   ptrdiff_t pos = coding->src_pos + coding->consumed_char;
7334   ptrdiff_t end_pos = coding->src_pos + coding->src_chars;
7335   int multibytep = coding->src_multibyte;
7336   Lisp_Object eol_type;
7337   int c;
7338   ptrdiff_t stop, stop_composition, stop_charset;
7339   int *lookup_buf = NULL;
7340
7341   if (! NILP (translation_table))
7342     lookup_buf = alloca (sizeof (int) * max_lookup);
7343
7344   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
7345   if (VECTORP (eol_type))
7346     eol_type = Qunix;
7347
7348   /* Note: composition handling is not yet implemented.  */
7349   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7350
7351   if (NILP (coding->src_object))
7352     stop = stop_composition = stop_charset = end_pos;
7353   else
7354     {
7355       if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
7356         stop = stop_composition = pos;
7357       else
7358         stop = stop_composition = end_pos;
7359       if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
7360         stop = stop_charset = pos;
7361       else
7362         stop_charset = end_pos;
7363     }
7364
7365   /* Compensate for CRLF and conversion.  */
7366   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
7367   while (buf < buf_end)
7368     {
7369       Lisp_Object trans;
7370
7371       if (pos == stop)
7372         {
7373           if (pos == end_pos)
7374             break;
7375           if (pos == stop_composition)
7376             buf = handle_composition_annotation (pos, end_pos, coding,
7377                                                  buf, &stop_composition);
7378           if (pos == stop_charset)
7379             buf = handle_charset_annotation (pos, end_pos, coding,
7380                                              buf, &stop_charset);
7381           stop = (stop_composition < stop_charset
7382                   ? stop_composition : stop_charset);
7383         }
7384
7385       if (! multibytep)
7386         {
7387           int bytes;
7388
7389           if (coding->encoder == encode_coding_raw_text
7390               || coding->encoder == encode_coding_ccl)
7391             c = *src++, pos++;
7392           else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
7393             c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
7394           else
7395             c = BYTE8_TO_CHAR (*src), src++, pos++;
7396         }
7397       else
7398         c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
7399       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
7400         c = '\n';
7401       if (! EQ (eol_type, Qunix))
7402         {
7403           if (c == '\n')
7404             {
7405               if (EQ (eol_type, Qdos))
7406                 *buf++ = '\r';
7407               else
7408                 c = '\r';
7409             }
7410         }
7411
7412       trans = Qnil;
7413       LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
7414       if (NILP (trans))
7415         *buf++ = c;
7416       else
7417         {
7418           ptrdiff_t from_nchars = 1, to_nchars = 1;
7419           int *lookup_buf_end;
7420           const unsigned char *p = src;
7421           int i;
7422
7423           lookup_buf[0] = c;
7424           for (i = 1; i < max_lookup && p < src_end; i++)
7425             lookup_buf[i] = STRING_CHAR_ADVANCE (p);
7426           lookup_buf_end = lookup_buf + i;
7427           trans = get_translation (trans, lookup_buf, lookup_buf_end);
7428           if (INTEGERP (trans))
7429             c = XINT (trans);
7430           else if (CONSP (trans))
7431             {
7432               from_nchars = ASIZE (XCAR (trans));
7433               trans = XCDR (trans);
7434               if (INTEGERP (trans))
7435                 c = XINT (trans);
7436               else
7437                 {
7438                   to_nchars = ASIZE (trans);
7439                   if (buf_end - buf < to_nchars)
7440                     break;
7441                   c = XINT (AREF (trans, 0));
7442                 }
7443             }
7444           else
7445             break;
7446           *buf++ = c;
7447           for (i = 1; i < to_nchars; i++)
7448             *buf++ = XINT (AREF (trans, i));
7449           for (i = 1; i < from_nchars; i++, pos++)
7450             src += MULTIBYTE_LENGTH_NO_CHECK (src);
7451         }
7452     }
7453
7454   coding->consumed = src - coding->source;
7455   coding->consumed_char = pos - coding->src_pos;
7456   coding->charbuf_used = buf - coding->charbuf;
7457   coding->chars_at_source = 0;
7458 }
7459
7460
7461 /* Encode the text at CODING->src_object into CODING->dst_object.
7462    CODING->src_object is a buffer or a string.
7463    CODING->dst_object is a buffer or nil.
7464
7465    If CODING->src_object is a buffer, it must be the current buffer.
7466    In this case, if CODING->src_pos is positive, it is a position of
7467    the source text in the buffer, otherwise. the source text is in the
7468    gap area of the buffer, and coding->src_pos specifies the offset of
7469    the text from GPT (which must be the same as PT).  If this is the
7470    same buffer as CODING->dst_object, CODING->src_pos must be
7471    negative and CODING should not have `pre-write-conversion'.
7472
7473    If CODING->src_object is a string, CODING should not have
7474    `pre-write-conversion'.
7475
7476    If CODING->dst_object is a buffer, the encoded data is inserted at
7477    the current point of that buffer.
7478
7479    If CODING->dst_object is nil, the encoded data is placed at the
7480    memory area specified by CODING->destination.  */
7481
7482 static int
7483 encode_coding (struct coding_system *coding)
7484 {
7485   Lisp_Object attrs;
7486   Lisp_Object translation_table;
7487   int max_lookup;
7488   struct ccl_spec cclspec;
7489
7490   attrs = CODING_ID_ATTRS (coding->id);
7491   if (coding->encoder == encode_coding_raw_text)
7492     translation_table = Qnil, max_lookup = 0;
7493   else
7494     translation_table = get_translation_table (attrs, 1, &max_lookup);
7495
7496   if (BUFFERP (coding->dst_object))
7497     {
7498       set_buffer_internal (XBUFFER (coding->dst_object));
7499       coding->dst_multibyte
7500         = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
7501     }
7502
7503   coding->consumed = coding->consumed_char = 0;
7504   coding->produced = coding->produced_char = 0;
7505   record_conversion_result (coding, CODING_RESULT_SUCCESS);
7506   coding->errors = 0;
7507
7508   ALLOC_CONVERSION_WORK_AREA (coding);
7509
7510   if (coding->encoder == encode_coding_ccl)
7511     {
7512       coding->spec.ccl = &cclspec;
7513       setup_ccl_program (&cclspec.ccl, CODING_CCL_ENCODER (coding));
7514     }
7515   do {
7516     coding_set_source (coding);
7517     consume_chars (coding, translation_table, max_lookup);
7518     coding_set_destination (coding);
7519     (*(coding->encoder)) (coding);
7520   } while (coding->consumed_char < coding->src_chars);
7521
7522   if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
7523     insert_from_gap (coding->produced_char, coding->produced);
7524
7525   return (coding->result);
7526 }
7527
7528
7529 /* Name (or base name) of work buffer for code conversion.  */
7530 static Lisp_Object Vcode_conversion_workbuf_name;
7531
7532 /* A working buffer used by the top level conversion.  Once it is
7533    created, it is never destroyed.  It has the name
7534    Vcode_conversion_workbuf_name.  The other working buffers are
7535    destroyed after the use is finished, and their names are modified
7536    versions of Vcode_conversion_workbuf_name.  */
7537 static Lisp_Object Vcode_conversion_reused_workbuf;
7538
7539 /* 1 iff Vcode_conversion_reused_workbuf is already in use.  */
7540 static int reused_workbuf_in_use;
7541
7542
7543 /* Return a working buffer of code conversion.  MULTIBYTE specifies the
7544    multibyteness of returning buffer.  */
7545
7546 static Lisp_Object
7547 make_conversion_work_buffer (int multibyte)
7548 {
7549   Lisp_Object name, workbuf;
7550   struct buffer *current;
7551
7552   if (reused_workbuf_in_use++)
7553     {
7554       name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
7555       workbuf = Fget_buffer_create (name);
7556     }
7557   else
7558     {
7559       if (NILP (Fbuffer_live_p (Vcode_conversion_reused_workbuf)))
7560         Vcode_conversion_reused_workbuf
7561           = Fget_buffer_create (Vcode_conversion_workbuf_name);
7562       workbuf = Vcode_conversion_reused_workbuf;
7563     }
7564   current = current_buffer;
7565   set_buffer_internal (XBUFFER (workbuf));
7566   /* We can't allow modification hooks to run in the work buffer.  For
7567      instance, directory_files_internal assumes that file decoding
7568      doesn't compile new regexps.  */
7569   Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt);
7570   Ferase_buffer ();
7571   BVAR (current_buffer, undo_list) = Qt;
7572   BVAR (current_buffer, enable_multibyte_characters) = multibyte ? Qt : Qnil;
7573   set_buffer_internal (current);
7574   return workbuf;
7575 }
7576
7577
7578 static Lisp_Object
7579 code_conversion_restore (Lisp_Object arg)
7580 {
7581   Lisp_Object current, workbuf;
7582   struct gcpro gcpro1;
7583
7584   GCPRO1 (arg);
7585   current = XCAR (arg);
7586   workbuf = XCDR (arg);
7587   if (! NILP (workbuf))
7588     {
7589       if (EQ (workbuf, Vcode_conversion_reused_workbuf))
7590         reused_workbuf_in_use = 0;
7591       else
7592         Fkill_buffer (workbuf);
7593     }
7594   set_buffer_internal (XBUFFER (current));
7595   UNGCPRO;
7596   return Qnil;
7597 }
7598
7599 Lisp_Object
7600 code_conversion_save (int with_work_buf, int multibyte)
7601 {
7602   Lisp_Object workbuf = Qnil;
7603
7604   if (with_work_buf)
7605     workbuf = make_conversion_work_buffer (multibyte);
7606   record_unwind_protect (code_conversion_restore,
7607                          Fcons (Fcurrent_buffer (), workbuf));
7608   return workbuf;
7609 }
7610
7611 int
7612 decode_coding_gap (struct coding_system *coding,
7613                    ptrdiff_t chars, ptrdiff_t bytes)
7614 {
7615   ptrdiff_t count = SPECPDL_INDEX ();
7616   Lisp_Object attrs;
7617
7618   code_conversion_save (0, 0);
7619
7620   coding->src_object = Fcurrent_buffer ();
7621   coding->src_chars = chars;
7622   coding->src_bytes = bytes;
7623   coding->src_pos = -chars;
7624   coding->src_pos_byte = -bytes;
7625   coding->src_multibyte = chars < bytes;
7626   coding->dst_object = coding->src_object;
7627   coding->dst_pos = PT;
7628   coding->dst_pos_byte = PT_BYTE;
7629   coding->dst_multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
7630
7631   if (CODING_REQUIRE_DETECTION (coding))
7632     detect_coding (coding);
7633
7634   coding->mode |= CODING_MODE_LAST_BLOCK;
7635   current_buffer->text->inhibit_shrinking = 1;
7636   decode_coding (coding);
7637   current_buffer->text->inhibit_shrinking = 0;
7638
7639   attrs = CODING_ID_ATTRS (coding->id);
7640   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7641     {
7642       ptrdiff_t prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7643       Lisp_Object val;
7644
7645       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7646       val = call1 (CODING_ATTR_POST_READ (attrs),
7647                    make_number (coding->produced_char));
7648       CHECK_NATNUM (val);
7649       coding->produced_char += Z - prev_Z;
7650       coding->produced += Z_BYTE - prev_Z_BYTE;
7651     }
7652
7653   unbind_to (count, Qnil);
7654   return coding->result;
7655 }
7656
7657
7658 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
7659    SRC_OBJECT into DST_OBJECT by coding context CODING.
7660
7661    SRC_OBJECT is a buffer, a string, or Qnil.
7662
7663    If it is a buffer, the text is at point of the buffer.  FROM and TO
7664    are positions in the buffer.
7665
7666    If it is a string, the text is at the beginning of the string.
7667    FROM and TO are indices to the string.
7668
7669    If it is nil, the text is at coding->source.  FROM and TO are
7670    indices to coding->source.
7671
7672    DST_OBJECT is a buffer, Qt, or Qnil.
7673
7674    If it is a buffer, the decoded text is inserted at point of the
7675    buffer.  If the buffer is the same as SRC_OBJECT, the source text
7676    is deleted.
7677
7678    If it is Qt, a string is made from the decoded text, and
7679    set in CODING->dst_object.
7680
7681    If it is Qnil, the decoded text is stored at CODING->destination.
7682    The caller must allocate CODING->dst_bytes bytes at
7683    CODING->destination by xmalloc.  If the decoded text is longer than
7684    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
7685  */
7686
7687 void
7688 decode_coding_object (struct coding_system *coding,
7689                       Lisp_Object src_object,
7690                       ptrdiff_t from, ptrdiff_t from_byte,
7691                       ptrdiff_t to, ptrdiff_t to_byte,
7692                       Lisp_Object dst_object)
7693 {
7694   ptrdiff_t count = SPECPDL_INDEX ();
7695   unsigned char *destination IF_LINT (= NULL);
7696   ptrdiff_t dst_bytes IF_LINT (= 0);
7697   ptrdiff_t chars = to - from;
7698   ptrdiff_t bytes = to_byte - from_byte;
7699   Lisp_Object attrs;
7700   int saved_pt = -1, saved_pt_byte IF_LINT (= 0);
7701   int need_marker_adjustment = 0;
7702   Lisp_Object old_deactivate_mark;
7703
7704   old_deactivate_mark = Vdeactivate_mark;
7705
7706   if (NILP (dst_object))
7707     {
7708       destination = coding->destination;
7709       dst_bytes = coding->dst_bytes;
7710     }
7711
7712   coding->src_object = src_object;
7713   coding->src_chars = chars;
7714   coding->src_bytes = bytes;
7715   coding->src_multibyte = chars < bytes;
7716
7717   if (STRINGP (src_object))
7718     {
7719       coding->src_pos = from;
7720       coding->src_pos_byte = from_byte;
7721     }
7722   else if (BUFFERP (src_object))
7723     {
7724       set_buffer_internal (XBUFFER (src_object));
7725       if (from != GPT)
7726         move_gap_both (from, from_byte);
7727       if (EQ (src_object, dst_object))
7728         {
7729           struct Lisp_Marker *tail;
7730
7731           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7732             {
7733               tail->need_adjustment
7734                 = tail->charpos == (tail->insertion_type ? from : to);
7735               need_marker_adjustment |= tail->need_adjustment;
7736             }
7737           saved_pt = PT, saved_pt_byte = PT_BYTE;
7738           TEMP_SET_PT_BOTH (from, from_byte);
7739           current_buffer->text->inhibit_shrinking = 1;
7740           del_range_both (from, from_byte, to, to_byte, 1);
7741           coding->src_pos = -chars;
7742           coding->src_pos_byte = -bytes;
7743         }
7744       else
7745         {
7746           coding->src_pos = from;
7747           coding->src_pos_byte = from_byte;
7748         }
7749     }
7750
7751   if (CODING_REQUIRE_DETECTION (coding))
7752     detect_coding (coding);
7753   attrs = CODING_ID_ATTRS (coding->id);
7754
7755   if (EQ (dst_object, Qt)
7756       || (! NILP (CODING_ATTR_POST_READ (attrs))
7757           && NILP (dst_object)))
7758     {
7759       coding->dst_multibyte = !CODING_FOR_UNIBYTE (coding);
7760       coding->dst_object = code_conversion_save (1, coding->dst_multibyte);
7761       coding->dst_pos = BEG;
7762       coding->dst_pos_byte = BEG_BYTE;
7763     }
7764   else if (BUFFERP (dst_object))
7765     {
7766       code_conversion_save (0, 0);
7767       coding->dst_object = dst_object;
7768       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7769       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7770       coding->dst_multibyte
7771         = ! NILP (BVAR (XBUFFER (dst_object), enable_multibyte_characters));
7772     }
7773   else
7774     {
7775       code_conversion_save (0, 0);
7776       coding->dst_object = Qnil;
7777       /* Most callers presume this will return a multibyte result, and they
7778          won't use `binary' or `raw-text' anyway, so let's not worry about
7779          CODING_FOR_UNIBYTE.  */
7780       coding->dst_multibyte = 1;
7781     }
7782
7783   decode_coding (coding);
7784
7785   if (BUFFERP (coding->dst_object))
7786     set_buffer_internal (XBUFFER (coding->dst_object));
7787
7788   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7789     {
7790       struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7791       ptrdiff_t prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7792       Lisp_Object val;
7793
7794       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7795       GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7796               old_deactivate_mark);
7797       val = safe_call1 (CODING_ATTR_POST_READ (attrs),
7798                         make_number (coding->produced_char));
7799       UNGCPRO;
7800       CHECK_NATNUM (val);
7801       coding->produced_char += Z - prev_Z;
7802       coding->produced += Z_BYTE - prev_Z_BYTE;
7803     }
7804
7805   if (EQ (dst_object, Qt))
7806     {
7807       coding->dst_object = Fbuffer_string ();
7808     }
7809   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
7810     {
7811       set_buffer_internal (XBUFFER (coding->dst_object));
7812       if (dst_bytes < coding->produced)
7813         {
7814           destination = xrealloc (destination, coding->produced);
7815           if (! destination)
7816             {
7817               record_conversion_result (coding,
7818                                         CODING_RESULT_INSUFFICIENT_MEM);
7819               unbind_to (count, Qnil);
7820               return;
7821             }
7822           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
7823             move_gap_both (BEGV, BEGV_BYTE);
7824           memcpy (destination, BEGV_ADDR, coding->produced);
7825           coding->destination = destination;
7826         }
7827     }
7828
7829   if (saved_pt >= 0)
7830     {
7831       /* This is the case of:
7832          (BUFFERP (src_object) && EQ (src_object, dst_object))
7833          As we have moved PT while replacing the original buffer
7834          contents, we must recover it now.  */
7835       set_buffer_internal (XBUFFER (src_object));
7836       current_buffer->text->inhibit_shrinking = 0;
7837       if (saved_pt < from)
7838         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7839       else if (saved_pt < from + chars)
7840         TEMP_SET_PT_BOTH (from, from_byte);
7841       else if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
7842         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7843                           saved_pt_byte + (coding->produced - bytes));
7844       else
7845         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7846                           saved_pt_byte + (coding->produced - bytes));
7847
7848       if (need_marker_adjustment)
7849         {
7850           struct Lisp_Marker *tail;
7851
7852           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7853             if (tail->need_adjustment)
7854               {
7855                 tail->need_adjustment = 0;
7856                 if (tail->insertion_type)
7857                   {
7858                     tail->bytepos = from_byte;
7859                     tail->charpos = from;
7860                   }
7861                 else
7862                   {
7863                     tail->bytepos = from_byte + coding->produced;
7864                     tail->charpos
7865                       = (NILP (BVAR (current_buffer, enable_multibyte_characters))
7866                          ? tail->bytepos : from + coding->produced_char);
7867                   }
7868               }
7869         }
7870     }
7871
7872   Vdeactivate_mark = old_deactivate_mark;
7873   unbind_to (count, coding->dst_object);
7874 }
7875
7876
7877 void
7878 encode_coding_object (struct coding_system *coding,
7879                       Lisp_Object src_object,
7880                       ptrdiff_t from, ptrdiff_t from_byte,
7881                       ptrdiff_t to, ptrdiff_t to_byte,
7882                       Lisp_Object dst_object)
7883 {
7884   ptrdiff_t count = SPECPDL_INDEX ();
7885   ptrdiff_t chars = to - from;
7886   ptrdiff_t bytes = to_byte - from_byte;
7887   Lisp_Object attrs;
7888   int saved_pt = -1, saved_pt_byte IF_LINT (= 0);
7889   int need_marker_adjustment = 0;
7890   int kill_src_buffer = 0;
7891   Lisp_Object old_deactivate_mark;
7892
7893   old_deactivate_mark = Vdeactivate_mark;
7894
7895   coding->src_object = src_object;
7896   coding->src_chars = chars;
7897   coding->src_bytes = bytes;
7898   coding->src_multibyte = chars < bytes;
7899
7900   attrs = CODING_ID_ATTRS (coding->id);
7901
7902   if (EQ (src_object, dst_object))
7903     {
7904       struct Lisp_Marker *tail;
7905
7906       for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7907         {
7908           tail->need_adjustment
7909             = tail->charpos == (tail->insertion_type ? from : to);
7910           need_marker_adjustment |= tail->need_adjustment;
7911         }
7912     }
7913
7914   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
7915     {
7916       coding->src_object = code_conversion_save (1, coding->src_multibyte);
7917       set_buffer_internal (XBUFFER (coding->src_object));
7918       if (STRINGP (src_object))
7919         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
7920       else if (BUFFERP (src_object))
7921         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
7922       else
7923         insert_1_both ((char *) coding->source + from, chars, bytes, 0, 0, 0);
7924
7925       if (EQ (src_object, dst_object))
7926         {
7927           set_buffer_internal (XBUFFER (src_object));
7928           saved_pt = PT, saved_pt_byte = PT_BYTE;
7929           del_range_both (from, from_byte, to, to_byte, 1);
7930           set_buffer_internal (XBUFFER (coding->src_object));
7931         }
7932
7933       {
7934         struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7935
7936         GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7937                 old_deactivate_mark);
7938         safe_call2 (CODING_ATTR_PRE_WRITE (attrs),
7939                     make_number (BEG), make_number (Z));
7940         UNGCPRO;
7941       }
7942       if (XBUFFER (coding->src_object) != current_buffer)
7943         kill_src_buffer = 1;
7944       coding->src_object = Fcurrent_buffer ();
7945       if (BEG != GPT)
7946         move_gap_both (BEG, BEG_BYTE);
7947       coding->src_chars = Z - BEG;
7948       coding->src_bytes = Z_BYTE - BEG_BYTE;
7949       coding->src_pos = BEG;
7950       coding->src_pos_byte = BEG_BYTE;
7951       coding->src_multibyte = Z < Z_BYTE;
7952     }
7953   else if (STRINGP (src_object))
7954     {
7955       code_conversion_save (0, 0);
7956       coding->src_pos = from;
7957       coding->src_pos_byte = from_byte;
7958     }
7959   else if (BUFFERP (src_object))
7960     {
7961       code_conversion_save (0, 0);
7962       set_buffer_internal (XBUFFER (src_object));
7963       if (EQ (src_object, dst_object))
7964         {
7965           saved_pt = PT, saved_pt_byte = PT_BYTE;
7966           coding->src_object = del_range_1 (from, to, 1, 1);
7967           coding->src_pos = 0;
7968           coding->src_pos_byte = 0;
7969         }
7970       else
7971         {
7972           if (from < GPT && to >= GPT)
7973             move_gap_both (from, from_byte);
7974           coding->src_pos = from;
7975           coding->src_pos_byte = from_byte;
7976         }
7977     }
7978   else
7979     code_conversion_save (0, 0);
7980
7981   if (BUFFERP (dst_object))
7982     {
7983       coding->dst_object = dst_object;
7984       if (EQ (src_object, dst_object))
7985         {
7986           coding->dst_pos = from;
7987           coding->dst_pos_byte = from_byte;
7988         }
7989       else
7990         {
7991           struct buffer *current = current_buffer;
7992
7993           set_buffer_temp (XBUFFER (dst_object));
7994           coding->dst_pos = PT;
7995           coding->dst_pos_byte = PT_BYTE;
7996           move_gap_both (coding->dst_pos, coding->dst_pos_byte);
7997           set_buffer_temp (current);
7998         }
7999       coding->dst_multibyte
8000         = ! NILP (BVAR (XBUFFER (dst_object), enable_multibyte_characters));
8001     }
8002   else if (EQ (dst_object, Qt))
8003     {
8004       ptrdiff_t dst_bytes = max (1, coding->src_chars);
8005       coding->dst_object = Qnil;
8006       coding->destination = xmalloc (dst_bytes);
8007       coding->dst_bytes = dst_bytes;
8008       coding->dst_multibyte = 0;
8009     }
8010   else
8011     {
8012       coding->dst_object = Qnil;
8013       coding->dst_multibyte = 0;
8014     }
8015
8016   encode_coding (coding);
8017
8018   if (EQ (dst_object, Qt))
8019     {
8020       if (BUFFERP (coding->dst_object))
8021         coding->dst_object = Fbuffer_string ();
8022       else
8023         {
8024           coding->dst_object
8025             = make_unibyte_string ((char *) coding->destination,
8026                                    coding->produced);
8027           xfree (coding->destination);
8028         }
8029     }
8030
8031   if (saved_pt >= 0)
8032     {
8033       /* This is the case of:
8034          (BUFFERP (src_object) && EQ (src_object, dst_object))
8035          As we have moved PT while replacing the original buffer
8036          contents, we must recover it now.  */
8037       set_buffer_internal (XBUFFER (src_object));
8038       if (saved_pt < from)
8039         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
8040       else if (saved_pt < from + chars)
8041         TEMP_SET_PT_BOTH (from, from_byte);
8042       else if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
8043         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
8044                           saved_pt_byte + (coding->produced - bytes));
8045       else
8046         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
8047                           saved_pt_byte + (coding->produced - bytes));
8048
8049       if (need_marker_adjustment)
8050         {
8051           struct Lisp_Marker *tail;
8052
8053           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
8054             if (tail->need_adjustment)
8055               {
8056                 tail->need_adjustment = 0;
8057                 if (tail->insertion_type)
8058                   {
8059                     tail->bytepos = from_byte;
8060                     tail->charpos = from;
8061                   }
8062                 else
8063                   {
8064                     tail->bytepos = from_byte + coding->produced;
8065                     tail->charpos
8066                       = (NILP (BVAR (current_buffer, enable_multibyte_characters))
8067                          ? tail->bytepos : from + coding->produced_char);
8068                   }
8069               }
8070         }
8071     }
8072
8073   if (kill_src_buffer)
8074     Fkill_buffer (coding->src_object);
8075
8076   Vdeactivate_mark = old_deactivate_mark;
8077   unbind_to (count, Qnil);
8078 }
8079
8080
8081 Lisp_Object
8082 preferred_coding_system (void)
8083 {
8084   int id = coding_categories[coding_priorities[0]].id;
8085
8086   return CODING_ID_NAME (id);
8087 }
8088
8089 \f
8090 #ifdef emacs
8091 /*** 8. Emacs Lisp library functions ***/
8092
8093 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
8094        doc: /* Return t if OBJECT is nil or a coding-system.
8095 See the documentation of `define-coding-system' for information
8096 about coding-system objects.  */)
8097   (Lisp_Object object)
8098 {
8099   if (NILP (object)
8100       || CODING_SYSTEM_ID (object) >= 0)
8101     return Qt;
8102   if (! SYMBOLP (object)
8103       || NILP (Fget (object, Qcoding_system_define_form)))
8104     return Qnil;
8105   return Qt;
8106 }
8107
8108 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
8109        Sread_non_nil_coding_system, 1, 1, 0,
8110        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
8111   (Lisp_Object prompt)
8112 {
8113   Lisp_Object val;
8114   do
8115     {
8116       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8117                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
8118     }
8119   while (SCHARS (val) == 0);
8120   return (Fintern (val, Qnil));
8121 }
8122
8123 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
8124        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
8125 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.
8126 Ignores case when completing coding systems (all Emacs coding systems
8127 are lower-case).  */)
8128   (Lisp_Object prompt, Lisp_Object default_coding_system)
8129 {
8130   Lisp_Object val;
8131   ptrdiff_t count = SPECPDL_INDEX ();
8132
8133   if (SYMBOLP (default_coding_system))
8134     default_coding_system = SYMBOL_NAME (default_coding_system);
8135   specbind (Qcompletion_ignore_case, Qt);
8136   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8137                           Qt, Qnil, Qcoding_system_history,
8138                           default_coding_system, Qnil);
8139   unbind_to (count, Qnil);
8140   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
8141 }
8142
8143 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
8144        1, 1, 0,
8145        doc: /* Check validity of CODING-SYSTEM.
8146 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
8147 It is valid if it is nil or a symbol defined as a coding system by the
8148 function `define-coding-system'.  */)
8149   (Lisp_Object coding_system)
8150 {
8151   Lisp_Object define_form;
8152
8153   define_form = Fget (coding_system, Qcoding_system_define_form);
8154   if (! NILP (define_form))
8155     {
8156       Fput (coding_system, Qcoding_system_define_form, Qnil);
8157       safe_eval (define_form);
8158     }
8159   if (!NILP (Fcoding_system_p (coding_system)))
8160     return coding_system;
8161   xsignal1 (Qcoding_system_error, coding_system);
8162 }
8163
8164 \f
8165 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
8166    HIGHEST is nonzero, return the coding system of the highest
8167    priority among the detected coding systems.  Otherwise return a
8168    list of detected coding systems sorted by their priorities.  If
8169    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
8170    multibyte form but contains only ASCII and eight-bit chars.
8171    Otherwise, the bytes are raw bytes.
8172
8173    CODING-SYSTEM controls the detection as below:
8174
8175    If it is nil, detect both text-format and eol-format.  If the
8176    text-format part of CODING-SYSTEM is already specified
8177    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
8178    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
8179    detect only text-format.  */
8180
8181 Lisp_Object
8182 detect_coding_system (const unsigned char *src,
8183                       ptrdiff_t src_chars, ptrdiff_t src_bytes,
8184                       int highest, int multibytep,
8185                       Lisp_Object coding_system)
8186 {
8187   const unsigned char *src_end = src + src_bytes;
8188   Lisp_Object attrs, eol_type;
8189   Lisp_Object val = Qnil;
8190   struct coding_system coding;
8191   ptrdiff_t id;
8192   struct coding_detection_info detect_info;
8193   enum coding_category base_category;
8194   int null_byte_found = 0, eight_bit_found = 0;
8195
8196   if (NILP (coding_system))
8197     coding_system = Qundecided;
8198   setup_coding_system (coding_system, &coding);
8199   attrs = CODING_ID_ATTRS (coding.id);
8200   eol_type = CODING_ID_EOL_TYPE (coding.id);
8201   coding_system = CODING_ATTR_BASE_NAME (attrs);
8202
8203   coding.source = src;
8204   coding.src_chars = src_chars;
8205   coding.src_bytes = src_bytes;
8206   coding.src_multibyte = multibytep;
8207   coding.consumed = 0;
8208   coding.mode |= CODING_MODE_LAST_BLOCK;
8209   coding.head_ascii = 0;
8210
8211   detect_info.checked = detect_info.found = detect_info.rejected = 0;
8212
8213   /* At first, detect text-format if necessary.  */
8214   base_category = XINT (CODING_ATTR_CATEGORY (attrs));
8215   if (base_category == coding_category_undecided)
8216     {
8217       enum coding_category category IF_LINT (= 0);
8218       struct coding_system *this IF_LINT (= NULL);
8219       int c, i;
8220
8221       /* Skip all ASCII bytes except for a few ISO2022 controls.  */
8222       for (; src < src_end; src++)
8223         {
8224           c = *src;
8225           if (c & 0x80)
8226             {
8227               eight_bit_found = 1;
8228               if (null_byte_found)
8229                 break;
8230             }
8231           else if (c < 0x20)
8232             {
8233               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
8234                   && ! inhibit_iso_escape_detection
8235                   && ! detect_info.checked)
8236                 {
8237                   if (detect_coding_iso_2022 (&coding, &detect_info))
8238                     {
8239                       /* We have scanned the whole data.  */
8240                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
8241                         {
8242                           /* We didn't find an 8-bit code.  We may
8243                              have found a null-byte, but it's very
8244                              rare that a binary file confirm to
8245                              ISO-2022.  */
8246                           src = src_end;
8247                           coding.head_ascii = src - coding.source;
8248                         }
8249                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
8250                       break;
8251                     }
8252                 }
8253               else if (! c && !inhibit_null_byte_detection)
8254                 {
8255                   null_byte_found = 1;
8256                   if (eight_bit_found)
8257                     break;
8258                 }
8259               if (! eight_bit_found)
8260                 coding.head_ascii++;
8261             }
8262           else if (! eight_bit_found)
8263             coding.head_ascii++;
8264         }
8265
8266       if (null_byte_found || eight_bit_found
8267           || coding.head_ascii < coding.src_bytes
8268           || detect_info.found)
8269         {
8270           if (coding.head_ascii == coding.src_bytes)
8271             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
8272             for (i = 0; i < coding_category_raw_text; i++)
8273               {
8274                 category = coding_priorities[i];
8275                 this = coding_categories + category;
8276                 if (detect_info.found & (1 << category))
8277                   break;
8278               }
8279           else
8280             {
8281               if (null_byte_found)
8282                 {
8283                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
8284                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
8285                 }
8286               for (i = 0; i < coding_category_raw_text; i++)
8287                 {
8288                   category = coding_priorities[i];
8289                   this = coding_categories + category;
8290
8291                   if (this->id < 0)
8292                     {
8293                       /* No coding system of this category is defined.  */
8294                       detect_info.rejected |= (1 << category);
8295                     }
8296                   else if (category >= coding_category_raw_text)
8297                     continue;
8298                   else if (detect_info.checked & (1 << category))
8299                     {
8300                       if (highest
8301                           && (detect_info.found & (1 << category)))
8302                         break;
8303                     }
8304                   else if ((*(this->detector)) (&coding, &detect_info)
8305                            && highest
8306                            && (detect_info.found & (1 << category)))
8307                     {
8308                       if (category == coding_category_utf_16_auto)
8309                         {
8310                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8311                             category = coding_category_utf_16_le;
8312                           else
8313                             category = coding_category_utf_16_be;
8314                         }
8315                       break;
8316                     }
8317                 }
8318             }
8319         }
8320
8321       if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY
8322           || null_byte_found)
8323         {
8324           detect_info.found = CATEGORY_MASK_RAW_TEXT;
8325           id = CODING_SYSTEM_ID (Qno_conversion);
8326           val = Fcons (make_number (id), Qnil);
8327         }
8328       else if (! detect_info.rejected && ! detect_info.found)
8329         {
8330           detect_info.found = CATEGORY_MASK_ANY;
8331           id = coding_categories[coding_category_undecided].id;
8332           val = Fcons (make_number (id), Qnil);
8333         }
8334       else if (highest)
8335         {
8336           if (detect_info.found)
8337             {
8338               detect_info.found = 1 << category;
8339               val = Fcons (make_number (this->id), Qnil);
8340             }
8341           else
8342             for (i = 0; i < coding_category_raw_text; i++)
8343               if (! (detect_info.rejected & (1 << coding_priorities[i])))
8344                 {
8345                   detect_info.found = 1 << coding_priorities[i];
8346                   id = coding_categories[coding_priorities[i]].id;
8347                   val = Fcons (make_number (id), Qnil);
8348                   break;
8349                 }
8350         }
8351       else
8352         {
8353           int mask = detect_info.rejected | detect_info.found;
8354           int found = 0;
8355
8356           for (i = coding_category_raw_text - 1; i >= 0; i--)
8357             {
8358               category = coding_priorities[i];
8359               if (! (mask & (1 << category)))
8360                 {
8361                   found |= 1 << category;
8362                   id = coding_categories[category].id;
8363                   if (id >= 0)
8364                     val = Fcons (make_number (id), val);
8365                 }
8366             }
8367           for (i = coding_category_raw_text - 1; i >= 0; i--)
8368             {
8369               category = coding_priorities[i];
8370               if (detect_info.found & (1 << category))
8371                 {
8372                   id = coding_categories[category].id;
8373                   val = Fcons (make_number (id), val);
8374                 }
8375             }
8376           detect_info.found |= found;
8377         }
8378     }
8379   else if (base_category == coding_category_utf_8_auto)
8380     {
8381       if (detect_coding_utf_8 (&coding, &detect_info))
8382         {
8383           struct coding_system *this;
8384
8385           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
8386             this = coding_categories + coding_category_utf_8_sig;
8387           else
8388             this = coding_categories + coding_category_utf_8_nosig;
8389           val = Fcons (make_number (this->id), Qnil);
8390         }
8391     }
8392   else if (base_category == coding_category_utf_16_auto)
8393     {
8394       if (detect_coding_utf_16 (&coding, &detect_info))
8395         {
8396           struct coding_system *this;
8397
8398           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8399             this = coding_categories + coding_category_utf_16_le;
8400           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
8401             this = coding_categories + coding_category_utf_16_be;
8402           else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
8403             this = coding_categories + coding_category_utf_16_be_nosig;
8404           else
8405             this = coding_categories + coding_category_utf_16_le_nosig;
8406           val = Fcons (make_number (this->id), Qnil);
8407         }
8408     }
8409   else
8410     {
8411       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
8412       val = Fcons (make_number (coding.id), Qnil);
8413     }
8414
8415   /* Then, detect eol-format if necessary.  */
8416   {
8417     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol = -1;
8418     Lisp_Object tail;
8419
8420     if (VECTORP (eol_type))
8421       {
8422         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
8423           {
8424             if (null_byte_found)
8425               normal_eol = EOL_SEEN_LF;
8426             else
8427               normal_eol = detect_eol (coding.source, src_bytes,
8428                                        coding_category_raw_text);
8429           }
8430         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
8431                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
8432           utf_16_be_eol = detect_eol (coding.source, src_bytes,
8433                                       coding_category_utf_16_be);
8434         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
8435                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
8436           utf_16_le_eol = detect_eol (coding.source, src_bytes,
8437                                       coding_category_utf_16_le);
8438       }
8439     else
8440       {
8441         if (EQ (eol_type, Qunix))
8442           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
8443         else if (EQ (eol_type, Qdos))
8444           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
8445         else
8446           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
8447       }
8448
8449     for (tail = val; CONSP (tail); tail = XCDR (tail))
8450       {
8451         enum coding_category category;
8452         int this_eol;
8453
8454         id = XINT (XCAR (tail));
8455         attrs = CODING_ID_ATTRS (id);
8456         category = XINT (CODING_ATTR_CATEGORY (attrs));
8457         eol_type = CODING_ID_EOL_TYPE (id);
8458         if (VECTORP (eol_type))
8459           {
8460             if (category == coding_category_utf_16_be
8461                 || category == coding_category_utf_16_be_nosig)
8462               this_eol = utf_16_be_eol;
8463             else if (category == coding_category_utf_16_le
8464                      || category == coding_category_utf_16_le_nosig)
8465               this_eol = utf_16_le_eol;
8466             else
8467               this_eol = normal_eol;
8468
8469             if (this_eol == EOL_SEEN_LF)
8470               XSETCAR (tail, AREF (eol_type, 0));
8471             else if (this_eol == EOL_SEEN_CRLF)
8472               XSETCAR (tail, AREF (eol_type, 1));
8473             else if (this_eol == EOL_SEEN_CR)
8474               XSETCAR (tail, AREF (eol_type, 2));
8475             else
8476               XSETCAR (tail, CODING_ID_NAME (id));
8477           }
8478         else
8479           XSETCAR (tail, CODING_ID_NAME (id));
8480       }
8481   }
8482
8483   return (highest ? (CONSP (val) ? XCAR (val) : Qnil) : val);
8484 }
8485
8486
8487 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
8488        2, 3, 0,
8489        doc: /* Detect coding system of the text in the region between START and END.
8490 Return a list of possible coding systems ordered by priority.
8491 The coding systems to try and their priorities follows what
8492 the function `coding-system-priority-list' (which see) returns.
8493
8494 If only ASCII characters are found (except for such ISO-2022 control
8495 characters as ESC), it returns a list of single element `undecided'
8496 or its subsidiary coding system according to a detected end-of-line
8497 format.
8498
8499 If optional argument HIGHEST is non-nil, return the coding system of
8500 highest priority.  */)
8501   (Lisp_Object start, Lisp_Object end, Lisp_Object highest)
8502 {
8503   ptrdiff_t from, to;
8504   ptrdiff_t from_byte, to_byte;
8505
8506   CHECK_NUMBER_COERCE_MARKER (start);
8507   CHECK_NUMBER_COERCE_MARKER (end);
8508
8509   validate_region (&start, &end);
8510   from = XINT (start), to = XINT (end);
8511   from_byte = CHAR_TO_BYTE (from);
8512   to_byte = CHAR_TO_BYTE (to);
8513
8514   if (from < GPT && to >= GPT)
8515     move_gap_both (to, to_byte);
8516
8517   return detect_coding_system (BYTE_POS_ADDR (from_byte),
8518                                to - from, to_byte - from_byte,
8519                                !NILP (highest),
8520                                !NILP (BVAR (current_buffer
8521                                       , enable_multibyte_characters)),
8522                                Qnil);
8523 }
8524
8525 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
8526        1, 2, 0,
8527        doc: /* Detect coding system of the text in STRING.
8528 Return a list of possible coding systems ordered by priority.
8529 The coding systems to try and their priorities follows what
8530 the function `coding-system-priority-list' (which see) returns.
8531
8532 If only ASCII characters are found (except for such ISO-2022 control
8533 characters as ESC), it returns a list of single element `undecided'
8534 or its subsidiary coding system according to a detected end-of-line
8535 format.
8536
8537 If optional argument HIGHEST is non-nil, return the coding system of
8538 highest priority.  */)
8539   (Lisp_Object string, Lisp_Object highest)
8540 {
8541   CHECK_STRING (string);
8542
8543   return detect_coding_system (SDATA (string),
8544                                SCHARS (string), SBYTES (string),
8545                                !NILP (highest), STRING_MULTIBYTE (string),
8546                                Qnil);
8547 }
8548
8549
8550 static inline int
8551 char_encodable_p (int c, Lisp_Object attrs)
8552 {
8553   Lisp_Object tail;
8554   struct charset *charset;
8555   Lisp_Object translation_table;
8556
8557   translation_table = CODING_ATTR_TRANS_TBL (attrs);
8558   if (! NILP (translation_table))
8559     c = translate_char (translation_table, c);
8560   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
8561        CONSP (tail); tail = XCDR (tail))
8562     {
8563       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
8564       if (CHAR_CHARSET_P (c, charset))
8565         break;
8566     }
8567   return (! NILP (tail));
8568 }
8569
8570
8571 /* Return a list of coding systems that safely encode the text between
8572    START and END.  If EXCLUDE is non-nil, it is a list of coding
8573    systems not to check.  The returned list doesn't contain any such
8574    coding systems.  In any case, if the text contains only ASCII or is
8575    unibyte, return t.  */
8576
8577 DEFUN ("find-coding-systems-region-internal",
8578        Ffind_coding_systems_region_internal,
8579        Sfind_coding_systems_region_internal, 2, 3, 0,
8580        doc: /* Internal use only.  */)
8581   (Lisp_Object start, Lisp_Object end, Lisp_Object exclude)
8582 {
8583   Lisp_Object coding_attrs_list, safe_codings;
8584   ptrdiff_t start_byte, end_byte;
8585   const unsigned char *p, *pbeg, *pend;
8586   int c;
8587   Lisp_Object tail, elt, work_table;
8588
8589   if (STRINGP (start))
8590     {
8591       if (!STRING_MULTIBYTE (start)
8592           || SCHARS (start) == SBYTES (start))
8593         return Qt;
8594       start_byte = 0;
8595       end_byte = SBYTES (start);
8596     }
8597   else
8598     {
8599       CHECK_NUMBER_COERCE_MARKER (start);
8600       CHECK_NUMBER_COERCE_MARKER (end);
8601       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8602         args_out_of_range (start, end);
8603       if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
8604         return Qt;
8605       start_byte = CHAR_TO_BYTE (XINT (start));
8606       end_byte = CHAR_TO_BYTE (XINT (end));
8607       if (XINT (end) - XINT (start) == end_byte - start_byte)
8608         return Qt;
8609
8610       if (XINT (start) < GPT && XINT (end) > GPT)
8611         {
8612           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8613             move_gap_both (XINT (start), start_byte);
8614           else
8615             move_gap_both (XINT (end), end_byte);
8616         }
8617     }
8618
8619   coding_attrs_list = Qnil;
8620   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
8621     if (NILP (exclude)
8622         || NILP (Fmemq (XCAR (tail), exclude)))
8623       {
8624         Lisp_Object attrs;
8625
8626         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
8627         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
8628             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
8629           {
8630             ASET (attrs, coding_attr_trans_tbl,
8631                   get_translation_table (attrs, 1, NULL));
8632             coding_attrs_list = Fcons (attrs, coding_attrs_list);
8633           }
8634       }
8635
8636   if (STRINGP (start))
8637     p = pbeg = SDATA (start);
8638   else
8639     p = pbeg = BYTE_POS_ADDR (start_byte);
8640   pend = p + (end_byte - start_byte);
8641
8642   while (p < pend && ASCII_BYTE_P (*p)) p++;
8643   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8644
8645   work_table = Fmake_char_table (Qnil, Qnil);
8646   while (p < pend)
8647     {
8648       if (ASCII_BYTE_P (*p))
8649         p++;
8650       else
8651         {
8652           c = STRING_CHAR_ADVANCE (p);
8653           if (!NILP (char_table_ref (work_table, c)))
8654             /* This character was already checked.  Ignore it.  */
8655             continue;
8656
8657           charset_map_loaded = 0;
8658           for (tail = coding_attrs_list; CONSP (tail);)
8659             {
8660               elt = XCAR (tail);
8661               if (NILP (elt))
8662                 tail = XCDR (tail);
8663               else if (char_encodable_p (c, elt))
8664                 tail = XCDR (tail);
8665               else if (CONSP (XCDR (tail)))
8666                 {
8667                   XSETCAR (tail, XCAR (XCDR (tail)));
8668                   XSETCDR (tail, XCDR (XCDR (tail)));
8669                 }
8670               else
8671                 {
8672                   XSETCAR (tail, Qnil);
8673                   tail = XCDR (tail);
8674                 }
8675             }
8676           if (charset_map_loaded)
8677             {
8678               ptrdiff_t p_offset = p - pbeg, pend_offset = pend - pbeg;
8679
8680               if (STRINGP (start))
8681                 pbeg = SDATA (start);
8682               else
8683                 pbeg = BYTE_POS_ADDR (start_byte);
8684               p = pbeg + p_offset;
8685               pend = pbeg + pend_offset;
8686             }
8687           char_table_set (work_table, c, Qt);
8688         }
8689     }
8690
8691   safe_codings = list2 (Qraw_text, Qno_conversion);
8692   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
8693     if (! NILP (XCAR (tail)))
8694       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
8695
8696   return safe_codings;
8697 }
8698
8699
8700 DEFUN ("unencodable-char-position", Funencodable_char_position,
8701        Sunencodable_char_position, 3, 5, 0,
8702        doc: /*
8703 Return position of first un-encodable character in a region.
8704 START and END specify the region and CODING-SYSTEM specifies the
8705 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
8706
8707 If optional 4th argument COUNT is non-nil, it specifies at most how
8708 many un-encodable characters to search.  In this case, the value is a
8709 list of positions.
8710
8711 If optional 5th argument STRING is non-nil, it is a string to search
8712 for un-encodable characters.  In that case, START and END are indexes
8713 to the string.  */)
8714   (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object count, Lisp_Object string)
8715 {
8716   EMACS_INT n;
8717   struct coding_system coding;
8718   Lisp_Object attrs, charset_list, translation_table;
8719   Lisp_Object positions;
8720   ptrdiff_t from, to;
8721   const unsigned char *p, *stop, *pend;
8722   int ascii_compatible;
8723
8724   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
8725   attrs = CODING_ID_ATTRS (coding.id);
8726   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
8727     return Qnil;
8728   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
8729   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8730   translation_table = get_translation_table (attrs, 1, NULL);
8731
8732   if (NILP (string))
8733     {
8734       validate_region (&start, &end);
8735       from = XINT (start);
8736       to = XINT (end);
8737       if (NILP (BVAR (current_buffer, enable_multibyte_characters))
8738           || (ascii_compatible
8739               && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
8740         return Qnil;
8741       p = CHAR_POS_ADDR (from);
8742       pend = CHAR_POS_ADDR (to);
8743       if (from < GPT && to >= GPT)
8744         stop = GPT_ADDR;
8745       else
8746         stop = pend;
8747     }
8748   else
8749     {
8750       CHECK_STRING (string);
8751       CHECK_NATNUM (start);
8752       CHECK_NATNUM (end);
8753       if (! (XINT (start) <= XINT (end) && XINT (end) <= SCHARS (string)))
8754         args_out_of_range_3 (string, start, end);
8755       from = XINT (start);
8756       to = XINT (end);
8757       if (! STRING_MULTIBYTE (string))
8758         return Qnil;
8759       p = SDATA (string) + string_char_to_byte (string, from);
8760       stop = pend = SDATA (string) + string_char_to_byte (string, to);
8761       if (ascii_compatible && (to - from) == (pend - p))
8762         return Qnil;
8763     }
8764
8765   if (NILP (count))
8766     n = 1;
8767   else
8768     {
8769       CHECK_NATNUM (count);
8770       n = XINT (count);
8771     }
8772
8773   positions = Qnil;
8774   charset_map_loaded = 0;
8775   while (1)
8776     {
8777       int c;
8778
8779       if (ascii_compatible)
8780         while (p < stop && ASCII_BYTE_P (*p))
8781           p++, from++;
8782       if (p >= stop)
8783         {
8784           if (p >= pend)
8785             break;
8786           stop = pend;
8787           p = GAP_END_ADDR;
8788         }
8789
8790       c = STRING_CHAR_ADVANCE (p);
8791       if (! (ASCII_CHAR_P (c) && ascii_compatible)
8792           && ! char_charset (translate_char (translation_table, c),
8793                              charset_list, NULL))
8794         {
8795           positions = Fcons (make_number (from), positions);
8796           n--;
8797           if (n == 0)
8798             break;
8799         }
8800
8801       from++;
8802       if (charset_map_loaded && NILP (string))
8803         {
8804           p = CHAR_POS_ADDR (from);
8805           pend = CHAR_POS_ADDR (to);
8806           if (from < GPT && to >= GPT)
8807             stop = GPT_ADDR;
8808           else
8809             stop = pend;
8810           charset_map_loaded = 0;
8811         }
8812     }
8813
8814   return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
8815 }
8816
8817
8818 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
8819        Scheck_coding_systems_region, 3, 3, 0,
8820        doc: /* Check if the region is encodable by coding systems.
8821
8822 START and END are buffer positions specifying the region.
8823 CODING-SYSTEM-LIST is a list of coding systems to check.
8824
8825 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
8826 CODING-SYSTEM is a member of CODING-SYSTEM-LIST and can't encode the
8827 whole region, POS0, POS1, ... are buffer positions where non-encodable
8828 characters are found.
8829
8830 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
8831 value is nil.
8832
8833 START may be a string.  In that case, check if the string is
8834 encodable, and the value contains indices to the string instead of
8835 buffer positions.  END is ignored.
8836
8837 If the current buffer (or START if it is a string) is unibyte, the value
8838 is nil.  */)
8839   (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system_list)
8840 {
8841   Lisp_Object list;
8842   ptrdiff_t start_byte, end_byte;
8843   ptrdiff_t pos;
8844   const unsigned char *p, *pbeg, *pend;
8845   int c;
8846   Lisp_Object tail, elt, attrs;
8847
8848   if (STRINGP (start))
8849     {
8850       if (!STRING_MULTIBYTE (start)
8851           || SCHARS (start) == SBYTES (start))
8852         return Qnil;
8853       start_byte = 0;
8854       end_byte = SBYTES (start);
8855       pos = 0;
8856     }
8857   else
8858     {
8859       CHECK_NUMBER_COERCE_MARKER (start);
8860       CHECK_NUMBER_COERCE_MARKER (end);
8861       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8862         args_out_of_range (start, end);
8863       if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
8864         return Qnil;
8865       start_byte = CHAR_TO_BYTE (XINT (start));
8866       end_byte = CHAR_TO_BYTE (XINT (end));
8867       if (XINT (end) - XINT (start) == end_byte - start_byte)
8868         return Qnil;
8869
8870       if (XINT (start) < GPT && XINT (end) > GPT)
8871         {
8872           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8873             move_gap_both (XINT (start), start_byte);
8874           else
8875             move_gap_both (XINT (end), end_byte);
8876         }
8877       pos = XINT (start);
8878     }
8879
8880   list = Qnil;
8881   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
8882     {
8883       elt = XCAR (tail);
8884       attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
8885       ASET (attrs, coding_attr_trans_tbl,
8886             get_translation_table (attrs, 1, NULL));
8887       list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
8888     }
8889
8890   if (STRINGP (start))
8891     p = pbeg = SDATA (start);
8892   else
8893     p = pbeg = BYTE_POS_ADDR (start_byte);
8894   pend = p + (end_byte - start_byte);
8895
8896   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
8897   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8898
8899   while (p < pend)
8900     {
8901       if (ASCII_BYTE_P (*p))
8902         p++;
8903       else
8904         {
8905           c = STRING_CHAR_ADVANCE (p);
8906
8907           charset_map_loaded = 0;
8908           for (tail = list; CONSP (tail); tail = XCDR (tail))
8909             {
8910               elt = XCDR (XCAR (tail));
8911               if (! char_encodable_p (c, XCAR (elt)))
8912                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
8913             }
8914           if (charset_map_loaded)
8915             {
8916               ptrdiff_t p_offset = p - pbeg, pend_offset = pend - pbeg;
8917
8918               if (STRINGP (start))
8919                 pbeg = SDATA (start);
8920               else
8921                 pbeg = BYTE_POS_ADDR (start_byte);
8922               p = pbeg + p_offset;
8923               pend = pbeg + pend_offset;
8924             }
8925         }
8926       pos++;
8927     }
8928
8929   tail = list;
8930   list = Qnil;
8931   for (; CONSP (tail); tail = XCDR (tail))
8932     {
8933       elt = XCAR (tail);
8934       if (CONSP (XCDR (XCDR (elt))))
8935         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
8936                       list);
8937     }
8938
8939   return list;
8940 }
8941
8942
8943 static Lisp_Object
8944 code_convert_region (Lisp_Object start, Lisp_Object end,
8945                      Lisp_Object coding_system, Lisp_Object dst_object,
8946                      int encodep, int norecord)
8947 {
8948   struct coding_system coding;
8949   ptrdiff_t from, from_byte, to, to_byte;
8950   Lisp_Object src_object;
8951
8952   CHECK_NUMBER_COERCE_MARKER (start);
8953   CHECK_NUMBER_COERCE_MARKER (end);
8954   if (NILP (coding_system))
8955     coding_system = Qno_conversion;
8956   else
8957     CHECK_CODING_SYSTEM (coding_system);
8958   src_object = Fcurrent_buffer ();
8959   if (NILP (dst_object))
8960     dst_object = src_object;
8961   else if (! EQ (dst_object, Qt))
8962     CHECK_BUFFER (dst_object);
8963
8964   validate_region (&start, &end);
8965   from = XFASTINT (start);
8966   from_byte = CHAR_TO_BYTE (from);
8967   to = XFASTINT (end);
8968   to_byte = CHAR_TO_BYTE (to);
8969
8970   setup_coding_system (coding_system, &coding);
8971   coding.mode |= CODING_MODE_LAST_BLOCK;
8972
8973   if (encodep)
8974     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8975                           dst_object);
8976   else
8977     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8978                           dst_object);
8979   if (! norecord)
8980     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
8981
8982   return (BUFFERP (dst_object)
8983           ? make_number (coding.produced_char)
8984           : coding.dst_object);
8985 }
8986
8987
8988 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
8989        3, 4, "r\nzCoding system: ",
8990        doc: /* Decode the current region from the specified coding system.
8991 When called from a program, takes four arguments:
8992         START, END, CODING-SYSTEM, and DESTINATION.
8993 START and END are buffer positions.
8994
8995 Optional 4th arguments DESTINATION specifies where the decoded text goes.
8996 If nil, the region between START and END is replaced by the decoded text.
8997 If buffer, the decoded text is inserted in that buffer after point (point
8998 does not move).
8999 In those cases, the length of the decoded text is returned.
9000 If DESTINATION is t, the decoded text is returned.
9001
9002 This function sets `last-coding-system-used' to the precise coding system
9003 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9004 not fully specified.)  */)
9005   (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object destination)
9006 {
9007   return code_convert_region (start, end, coding_system, destination, 0, 0);
9008 }
9009
9010 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
9011        3, 4, "r\nzCoding system: ",
9012        doc: /* Encode the current region by specified coding system.
9013 When called from a program, takes four arguments:
9014         START, END, CODING-SYSTEM and DESTINATION.
9015 START and END are buffer positions.
9016
9017 Optional 4th arguments DESTINATION specifies where the encoded text goes.
9018 If nil, the region between START and END is replace by the encoded text.
9019 If buffer, the encoded text is inserted in that buffer after point (point
9020 does not move).
9021 In those cases, the length of the encoded text is returned.
9022 If DESTINATION is t, the encoded text is returned.
9023
9024 This function sets `last-coding-system-used' to the precise coding system
9025 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9026 not fully specified.)  */)
9027   (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object destination)
9028 {
9029   return code_convert_region (start, end, coding_system, destination, 1, 0);
9030 }
9031
9032 Lisp_Object
9033 code_convert_string (Lisp_Object string, Lisp_Object coding_system,
9034                      Lisp_Object dst_object, int encodep, int nocopy, int norecord)
9035 {
9036   struct coding_system coding;
9037   ptrdiff_t chars, bytes;
9038
9039   CHECK_STRING (string);
9040   if (NILP (coding_system))
9041     {
9042       if (! norecord)
9043         Vlast_coding_system_used = Qno_conversion;
9044       if (NILP (dst_object))
9045         return (nocopy ? Fcopy_sequence (string) : string);
9046     }
9047
9048   if (NILP (coding_system))
9049     coding_system = Qno_conversion;
9050   else
9051     CHECK_CODING_SYSTEM (coding_system);
9052   if (NILP (dst_object))
9053     dst_object = Qt;
9054   else if (! EQ (dst_object, Qt))
9055     CHECK_BUFFER (dst_object);
9056
9057   setup_coding_system (coding_system, &coding);
9058   coding.mode |= CODING_MODE_LAST_BLOCK;
9059   chars = SCHARS (string);
9060   bytes = SBYTES (string);
9061   if (encodep)
9062     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9063   else
9064     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9065   if (! norecord)
9066     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
9067
9068   return (BUFFERP (dst_object)
9069           ? make_number (coding.produced_char)
9070           : coding.dst_object);
9071 }
9072
9073
9074 /* Encode or decode STRING according to CODING_SYSTEM.
9075    Do not set Vlast_coding_system_used.
9076
9077    This function is called only from macros DECODE_FILE and
9078    ENCODE_FILE, thus we ignore character composition.  */
9079
9080 Lisp_Object
9081 code_convert_string_norecord (Lisp_Object string, Lisp_Object coding_system,
9082                               int encodep)
9083 {
9084   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
9085 }
9086
9087
9088 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
9089        2, 4, 0,
9090        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
9091
9092 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
9093 if the decoding operation is trivial.
9094
9095 Optional fourth arg BUFFER non-nil means that the decoded text is
9096 inserted in that buffer after point (point does not move).  In this
9097 case, the return value is the length of the decoded text.
9098
9099 This function sets `last-coding-system-used' to the precise coding system
9100 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9101 not fully specified.)  */)
9102   (Lisp_Object string, Lisp_Object coding_system, Lisp_Object nocopy, Lisp_Object buffer)
9103 {
9104   return code_convert_string (string, coding_system, buffer,
9105                               0, ! NILP (nocopy), 0);
9106 }
9107
9108 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
9109        2, 4, 0,
9110        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
9111
9112 Optional third arg NOCOPY non-nil means it is OK to return STRING
9113 itself if the encoding operation is trivial.
9114
9115 Optional fourth arg BUFFER non-nil means that the encoded text is
9116 inserted in that buffer after point (point does not move).  In this
9117 case, the return value is the length of the encoded text.
9118
9119 This function sets `last-coding-system-used' to the precise coding system
9120 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9121 not fully specified.)  */)
9122   (Lisp_Object string, Lisp_Object coding_system, Lisp_Object nocopy, Lisp_Object buffer)
9123 {
9124   return code_convert_string (string, coding_system, buffer,
9125                               1, ! NILP (nocopy), 0);
9126 }
9127
9128 \f
9129 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
9130        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
9131 Return the corresponding character.  */)
9132   (Lisp_Object code)
9133 {
9134   Lisp_Object spec, attrs, val;
9135   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
9136   EMACS_INT ch;
9137   int c;
9138
9139   CHECK_NATNUM (code);
9140   ch = XFASTINT (code);
9141   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9142   attrs = AREF (spec, 0);
9143
9144   if (ASCII_BYTE_P (ch)
9145       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9146     return code;
9147
9148   val = CODING_ATTR_CHARSET_LIST (attrs);
9149   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9150   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9151   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
9152
9153   if (ch <= 0x7F)
9154     {
9155       c = ch;
9156       charset = charset_roman;
9157     }
9158   else if (ch >= 0xA0 && ch < 0xDF)
9159     {
9160       c = ch - 0x80;
9161       charset = charset_kana;
9162     }
9163   else
9164     {
9165       EMACS_INT c1 = ch >> 8;
9166       int c2 = ch & 0xFF;
9167
9168       if (c1 < 0x81 || (c1 > 0x9F && c1 < 0xE0) || c1 > 0xEF
9169           || c2 < 0x40 || c2 == 0x7F || c2 > 0xFC)
9170         error ("Invalid code: %"pI"d", ch);
9171       c = ch;
9172       SJIS_TO_JIS (c);
9173       charset = charset_kanji;
9174     }
9175   c = DECODE_CHAR (charset, c);
9176   if (c < 0)
9177     error ("Invalid code: %"pI"d", ch);
9178   return make_number (c);
9179 }
9180
9181
9182 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
9183        doc: /* Encode a Japanese character CH to shift_jis encoding.
9184 Return the corresponding code in SJIS.  */)
9185   (Lisp_Object ch)
9186 {
9187   Lisp_Object spec, attrs, charset_list;
9188   int c;
9189   struct charset *charset;
9190   unsigned code;
9191
9192   CHECK_CHARACTER (ch);
9193   c = XFASTINT (ch);
9194   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9195   attrs = AREF (spec, 0);
9196
9197   if (ASCII_CHAR_P (c)
9198       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9199     return ch;
9200
9201   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9202   charset = char_charset (c, charset_list, &code);
9203   if (code == CHARSET_INVALID_CODE (charset))
9204     error ("Can't encode by shift_jis encoding: %c", c);
9205   JIS_TO_SJIS (code);
9206
9207   return make_number (code);
9208 }
9209
9210 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
9211        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
9212 Return the corresponding character.  */)
9213   (Lisp_Object code)
9214 {
9215   Lisp_Object spec, attrs, val;
9216   struct charset *charset_roman, *charset_big5, *charset;
9217   EMACS_INT ch;
9218   int c;
9219
9220   CHECK_NATNUM (code);
9221   ch = XFASTINT (code);
9222   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9223   attrs = AREF (spec, 0);
9224
9225   if (ASCII_BYTE_P (ch)
9226       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9227     return code;
9228
9229   val = CODING_ATTR_CHARSET_LIST (attrs);
9230   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9231   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
9232
9233   if (ch <= 0x7F)
9234     {
9235       c = ch;
9236       charset = charset_roman;
9237     }
9238   else
9239     {
9240       EMACS_INT b1 = ch >> 8;
9241       int b2 = ch & 0x7F;
9242       if (b1 < 0xA1 || b1 > 0xFE
9243           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
9244         error ("Invalid code: %"pI"d", ch);
9245       c = ch;
9246       charset = charset_big5;
9247     }
9248   c = DECODE_CHAR (charset, c);
9249   if (c < 0)
9250     error ("Invalid code: %"pI"d", ch);
9251   return make_number (c);
9252 }
9253
9254 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
9255        doc: /* Encode the Big5 character CH to BIG5 coding system.
9256 Return the corresponding character code in Big5.  */)
9257   (Lisp_Object ch)
9258 {
9259   Lisp_Object spec, attrs, charset_list;
9260   struct charset *charset;
9261   int c;
9262   unsigned code;
9263
9264   CHECK_CHARACTER (ch);
9265   c = XFASTINT (ch);
9266   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9267   attrs = AREF (spec, 0);
9268   if (ASCII_CHAR_P (c)
9269       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9270     return ch;
9271
9272   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9273   charset = char_charset (c, charset_list, &code);
9274   if (code == CHARSET_INVALID_CODE (charset))
9275     error ("Can't encode by Big5 encoding: %c", c);
9276
9277   return make_number (code);
9278 }
9279
9280 \f
9281 DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
9282        Sset_terminal_coding_system_internal, 1, 2, 0,
9283        doc: /* Internal use only.  */)
9284   (Lisp_Object coding_system, Lisp_Object terminal)
9285 {
9286   struct terminal *term = get_terminal (terminal, 1);
9287   struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (term);
9288   CHECK_SYMBOL (coding_system);
9289   setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
9290   /* We had better not send unsafe characters to terminal.  */
9291   terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
9292   /* Character composition should be disabled.  */
9293   terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9294   terminal_coding->src_multibyte = 1;
9295   terminal_coding->dst_multibyte = 0;
9296   if (terminal_coding->common_flags & CODING_REQUIRE_ENCODING_MASK)
9297     term->charset_list = coding_charset_list (terminal_coding);
9298   else
9299     term->charset_list = Fcons (make_number (charset_ascii), Qnil);
9300   return Qnil;
9301 }
9302
9303 DEFUN ("set-safe-terminal-coding-system-internal",
9304        Fset_safe_terminal_coding_system_internal,
9305        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
9306        doc: /* Internal use only.  */)
9307   (Lisp_Object coding_system)
9308 {
9309   CHECK_SYMBOL (coding_system);
9310   setup_coding_system (Fcheck_coding_system (coding_system),
9311                        &safe_terminal_coding);
9312   /* Character composition should be disabled.  */
9313   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9314   safe_terminal_coding.src_multibyte = 1;
9315   safe_terminal_coding.dst_multibyte = 0;
9316   return Qnil;
9317 }
9318
9319 DEFUN ("terminal-coding-system", Fterminal_coding_system,
9320        Sterminal_coding_system, 0, 1, 0,
9321        doc: /* Return coding system specified for terminal output on the given terminal.
9322 TERMINAL may be a terminal object, a frame, or nil for the selected
9323 frame's terminal device.  */)
9324   (Lisp_Object terminal)
9325 {
9326   struct coding_system *terminal_coding
9327     = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
9328   Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
9329
9330   /* For backward compatibility, return nil if it is `undecided'.  */
9331   return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
9332 }
9333
9334 DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
9335        Sset_keyboard_coding_system_internal, 1, 2, 0,
9336        doc: /* Internal use only.  */)
9337   (Lisp_Object coding_system, Lisp_Object terminal)
9338 {
9339   struct terminal *t = get_terminal (terminal, 1);
9340   CHECK_SYMBOL (coding_system);
9341   if (NILP (coding_system))
9342     coding_system = Qno_conversion;
9343   else
9344     Fcheck_coding_system (coding_system);
9345   setup_coding_system (coding_system, TERMINAL_KEYBOARD_CODING (t));
9346   /* Character composition should be disabled.  */
9347   TERMINAL_KEYBOARD_CODING (t)->common_flags
9348     &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9349   return Qnil;
9350 }
9351
9352 DEFUN ("keyboard-coding-system",
9353        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 1, 0,
9354        doc: /* Return coding system specified for decoding keyboard input.  */)
9355   (Lisp_Object terminal)
9356 {
9357   return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING
9358                          (get_terminal (terminal, 1))->id);
9359 }
9360
9361 \f
9362 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
9363        Sfind_operation_coding_system,  1, MANY, 0,
9364        doc: /* Choose a coding system for an operation based on the target name.
9365 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
9366 DECODING-SYSTEM is the coding system to use for decoding
9367 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
9368 for encoding (in case OPERATION does encoding).
9369
9370 The first argument OPERATION specifies an I/O primitive:
9371   For file I/O, `insert-file-contents' or `write-region'.
9372   For process I/O, `call-process', `call-process-region', or `start-process'.
9373   For network I/O, `open-network-stream'.
9374
9375 The remaining arguments should be the same arguments that were passed
9376 to the primitive.  Depending on which primitive, one of those arguments
9377 is selected as the TARGET.  For example, if OPERATION does file I/O,
9378 whichever argument specifies the file name is TARGET.
9379
9380 TARGET has a meaning which depends on OPERATION:
9381   For file I/O, TARGET is a file name (except for the special case below).
9382   For process I/O, TARGET is a process name.
9383   For network I/O, TARGET is a service name or a port number.
9384
9385 This function looks up what is specified for TARGET in
9386 `file-coding-system-alist', `process-coding-system-alist',
9387 or `network-coding-system-alist' depending on OPERATION.
9388 They may specify a coding system, a cons of coding systems,
9389 or a function symbol to call.
9390 In the last case, we call the function with one argument,
9391 which is a list of all the arguments given to this function.
9392 If the function can't decide a coding system, it can return
9393 `undecided' so that the normal code-detection is performed.
9394
9395 If OPERATION is `insert-file-contents', the argument corresponding to
9396 TARGET may be a cons (FILENAME . BUFFER).  In that case, FILENAME is a
9397 file name to look up, and BUFFER is a buffer that contains the file's
9398 contents (not yet decoded).  If `file-coding-system-alist' specifies a
9399 function to call for FILENAME, that function should examine the
9400 contents of BUFFER instead of reading the file.
9401
9402 usage: (find-operation-coding-system OPERATION ARGUMENTS...)  */)
9403   (ptrdiff_t nargs, Lisp_Object *args)
9404 {
9405   Lisp_Object operation, target_idx, target, val;
9406   register Lisp_Object chain;
9407
9408   if (nargs < 2)
9409     error ("Too few arguments");
9410   operation = args[0];
9411   if (!SYMBOLP (operation)
9412       || (target_idx = Fget (operation, Qtarget_idx), !NATNUMP (target_idx)))
9413     error ("Invalid first argument");
9414   if (nargs <= 1 + XFASTINT (target_idx))
9415     error ("Too few arguments for operation `%s'",
9416            SDATA (SYMBOL_NAME (operation)));
9417   target = args[XFASTINT (target_idx) + 1];
9418   if (!(STRINGP (target)
9419         || (EQ (operation, Qinsert_file_contents) && CONSP (target)
9420             && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
9421         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
9422     error ("Invalid argument %"pI"d of operation `%s'",
9423            XFASTINT (target_idx) + 1, SDATA (SYMBOL_NAME (operation)));
9424   if (CONSP (target))
9425     target = XCAR (target);
9426
9427   chain = ((EQ (operation, Qinsert_file_contents)
9428             || EQ (operation, Qwrite_region))
9429            ? Vfile_coding_system_alist
9430            : (EQ (operation, Qopen_network_stream)
9431               ? Vnetwork_coding_system_alist
9432               : Vprocess_coding_system_alist));
9433   if (NILP (chain))
9434     return Qnil;
9435
9436   for (; CONSP (chain); chain = XCDR (chain))
9437     {
9438       Lisp_Object elt;
9439
9440       elt = XCAR (chain);
9441       if (CONSP (elt)
9442           && ((STRINGP (target)
9443                && STRINGP (XCAR (elt))
9444                && fast_string_match (XCAR (elt), target) >= 0)
9445               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
9446         {
9447           val = XCDR (elt);
9448           /* Here, if VAL is both a valid coding system and a valid
9449              function symbol, we return VAL as a coding system.  */
9450           if (CONSP (val))
9451             return val;
9452           if (! SYMBOLP (val))
9453             return Qnil;
9454           if (! NILP (Fcoding_system_p (val)))
9455             return Fcons (val, val);
9456           if (! NILP (Ffboundp (val)))
9457             {
9458               /* We use call1 rather than safe_call1
9459                  so as to get bug reports about functions called here
9460                  which don't handle the current interface.  */
9461               val = call1 (val, Flist (nargs, args));
9462               if (CONSP (val))
9463                 return val;
9464               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
9465                 return Fcons (val, val);
9466             }
9467           return Qnil;
9468         }
9469     }
9470   return Qnil;
9471 }
9472
9473 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
9474        Sset_coding_system_priority, 0, MANY, 0,
9475        doc: /* Assign higher priority to the coding systems given as arguments.
9476 If multiple coding systems belong to the same category,
9477 all but the first one are ignored.
9478
9479 usage: (set-coding-system-priority &rest coding-systems)  */)
9480   (ptrdiff_t nargs, Lisp_Object *args)
9481 {
9482   ptrdiff_t i, j;
9483   int changed[coding_category_max];
9484   enum coding_category priorities[coding_category_max];
9485
9486   memset (changed, 0, sizeof changed);
9487
9488   for (i = j = 0; i < nargs; i++)
9489     {
9490       enum coding_category category;
9491       Lisp_Object spec, attrs;
9492
9493       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
9494       attrs = AREF (spec, 0);
9495       category = XINT (CODING_ATTR_CATEGORY (attrs));
9496       if (changed[category])
9497         /* Ignore this coding system because a coding system of the
9498            same category already had a higher priority.  */
9499         continue;
9500       changed[category] = 1;
9501       priorities[j++] = category;
9502       if (coding_categories[category].id >= 0
9503           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
9504         setup_coding_system (args[i], &coding_categories[category]);
9505       Fset (AREF (Vcoding_category_table, category), args[i]);
9506     }
9507
9508   /* Now we have decided top J priorities.  Reflect the order of the
9509      original priorities to the remaining priorities.  */
9510
9511   for (i = j, j = 0; i < coding_category_max; i++, j++)
9512     {
9513       while (j < coding_category_max
9514              && changed[coding_priorities[j]])
9515         j++;
9516       if (j == coding_category_max)
9517         abort ();
9518       priorities[i] = coding_priorities[j];
9519     }
9520
9521   memcpy (coding_priorities, priorities, sizeof priorities);
9522
9523   /* Update `coding-category-list'.  */
9524   Vcoding_category_list = Qnil;
9525   for (i = coding_category_max; i-- > 0; )
9526     Vcoding_category_list
9527       = Fcons (AREF (Vcoding_category_table, priorities[i]),
9528                Vcoding_category_list);
9529
9530   return Qnil;
9531 }
9532
9533 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
9534        Scoding_system_priority_list, 0, 1, 0,
9535        doc: /* Return a list of coding systems ordered by their priorities.
9536 The list contains a subset of coding systems; i.e. coding systems
9537 assigned to each coding category (see `coding-category-list').
9538
9539 HIGHESTP non-nil means just return the highest priority one.  */)
9540   (Lisp_Object highestp)
9541 {
9542   int i;
9543   Lisp_Object val;
9544
9545   for (i = 0, val = Qnil; i < coding_category_max; i++)
9546     {
9547       enum coding_category category = coding_priorities[i];
9548       int id = coding_categories[category].id;
9549       Lisp_Object attrs;
9550
9551       if (id < 0)
9552         continue;
9553       attrs = CODING_ID_ATTRS (id);
9554       if (! NILP (highestp))
9555         return CODING_ATTR_BASE_NAME (attrs);
9556       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
9557     }
9558   return Fnreverse (val);
9559 }
9560
9561 static const char *const suffixes[] = { "-unix", "-dos", "-mac" };
9562
9563 static Lisp_Object
9564 make_subsidiaries (Lisp_Object base)
9565 {
9566   Lisp_Object subsidiaries;
9567   ptrdiff_t base_name_len = SBYTES (SYMBOL_NAME (base));
9568   char *buf = alloca (base_name_len + 6);
9569   int i;
9570
9571   memcpy (buf, SDATA (SYMBOL_NAME (base)), base_name_len);
9572   subsidiaries = Fmake_vector (make_number (3), Qnil);
9573   for (i = 0; i < 3; i++)
9574     {
9575       strcpy (buf + base_name_len, suffixes[i]);
9576       ASET (subsidiaries, i, intern (buf));
9577     }
9578   return subsidiaries;
9579 }
9580
9581
9582 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
9583        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
9584        doc: /* For internal use only.
9585 usage: (define-coding-system-internal ...)  */)
9586   (ptrdiff_t nargs, Lisp_Object *args)
9587 {
9588   Lisp_Object name;
9589   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
9590   Lisp_Object attrs;            /* Vector of attributes.  */
9591   Lisp_Object eol_type;
9592   Lisp_Object aliases;
9593   Lisp_Object coding_type, charset_list, safe_charsets;
9594   enum coding_category category;
9595   Lisp_Object tail, val;
9596   int max_charset_id = 0;
9597   int i;
9598
9599   if (nargs < coding_arg_max)
9600     goto short_args;
9601
9602   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
9603
9604   name = args[coding_arg_name];
9605   CHECK_SYMBOL (name);
9606   CODING_ATTR_BASE_NAME (attrs) = name;
9607
9608   val = args[coding_arg_mnemonic];
9609   if (! STRINGP (val))
9610     CHECK_CHARACTER (val);
9611   CODING_ATTR_MNEMONIC (attrs) = val;
9612
9613   coding_type = args[coding_arg_coding_type];
9614   CHECK_SYMBOL (coding_type);
9615   CODING_ATTR_TYPE (attrs) = coding_type;
9616
9617   charset_list = args[coding_arg_charset_list];
9618   if (SYMBOLP (charset_list))
9619     {
9620       if (EQ (charset_list, Qiso_2022))
9621         {
9622           if (! EQ (coding_type, Qiso_2022))
9623             error ("Invalid charset-list");
9624           charset_list = Viso_2022_charset_list;
9625         }
9626       else if (EQ (charset_list, Qemacs_mule))
9627         {
9628           if (! EQ (coding_type, Qemacs_mule))
9629             error ("Invalid charset-list");
9630           charset_list = Vemacs_mule_charset_list;
9631         }
9632       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9633         {
9634           if (! RANGED_INTEGERP (0, XCAR (tail), INT_MAX - 1))
9635             error ("Invalid charset-list");
9636           if (max_charset_id < XFASTINT (XCAR (tail)))
9637             max_charset_id = XFASTINT (XCAR (tail));
9638         }
9639     }
9640   else
9641     {
9642       charset_list = Fcopy_sequence (charset_list);
9643       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9644         {
9645           struct charset *charset;
9646
9647           val = XCAR (tail);
9648           CHECK_CHARSET_GET_CHARSET (val, charset);
9649           if (EQ (coding_type, Qiso_2022)
9650               ? CHARSET_ISO_FINAL (charset) < 0
9651               : EQ (coding_type, Qemacs_mule)
9652               ? CHARSET_EMACS_MULE_ID (charset) < 0
9653               : 0)
9654             error ("Can't handle charset `%s'",
9655                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9656
9657           XSETCAR (tail, make_number (charset->id));
9658           if (max_charset_id < charset->id)
9659             max_charset_id = charset->id;
9660         }
9661     }
9662   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
9663
9664   safe_charsets = make_uninit_string (max_charset_id + 1);
9665   memset (SDATA (safe_charsets), 255, max_charset_id + 1);
9666   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9667     SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
9668   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
9669
9670   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
9671
9672   val = args[coding_arg_decode_translation_table];
9673   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9674     CHECK_SYMBOL (val);
9675   CODING_ATTR_DECODE_TBL (attrs) = val;
9676
9677   val = args[coding_arg_encode_translation_table];
9678   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9679     CHECK_SYMBOL (val);
9680   CODING_ATTR_ENCODE_TBL (attrs) = val;
9681
9682   val = args[coding_arg_post_read_conversion];
9683   CHECK_SYMBOL (val);
9684   CODING_ATTR_POST_READ (attrs) = val;
9685
9686   val = args[coding_arg_pre_write_conversion];
9687   CHECK_SYMBOL (val);
9688   CODING_ATTR_PRE_WRITE (attrs) = val;
9689
9690   val = args[coding_arg_default_char];
9691   if (NILP (val))
9692     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
9693   else
9694     {
9695       CHECK_CHARACTER (val);
9696       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9697     }
9698
9699   val = args[coding_arg_for_unibyte];
9700   CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
9701
9702   val = args[coding_arg_plist];
9703   CHECK_LIST (val);
9704   CODING_ATTR_PLIST (attrs) = val;
9705
9706   if (EQ (coding_type, Qcharset))
9707     {
9708       /* Generate a lisp vector of 256 elements.  Each element is nil,
9709          integer, or a list of charset IDs.
9710
9711          If Nth element is nil, the byte code N is invalid in this
9712          coding system.
9713
9714          If Nth element is a number NUM, N is the first byte of a
9715          charset whose ID is NUM.
9716
9717          If Nth element is a list of charset IDs, N is the first byte
9718          of one of them.  The list is sorted by dimensions of the
9719          charsets.  A charset of smaller dimension comes first. */
9720       val = Fmake_vector (make_number (256), Qnil);
9721
9722       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9723         {
9724           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
9725           int dim = CHARSET_DIMENSION (charset);
9726           int idx = (dim - 1) * 4;
9727
9728           if (CHARSET_ASCII_COMPATIBLE_P (charset))
9729             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9730
9731           for (i = charset->code_space[idx];
9732                i <= charset->code_space[idx + 1]; i++)
9733             {
9734               Lisp_Object tmp, tmp2;
9735               int dim2;
9736
9737               tmp = AREF (val, i);
9738               if (NILP (tmp))
9739                 tmp = XCAR (tail);
9740               else if (NUMBERP (tmp))
9741                 {
9742                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
9743                   if (dim < dim2)
9744                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
9745                   else
9746                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
9747                 }
9748               else
9749                 {
9750                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
9751                     {
9752                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
9753                       if (dim < dim2)
9754                         break;
9755                     }
9756                   if (NILP (tmp2))
9757                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
9758                   else
9759                     {
9760                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
9761                       XSETCAR (tmp2, XCAR (tail));
9762                     }
9763                 }
9764               ASET (val, i, tmp);
9765             }
9766         }
9767       ASET (attrs, coding_attr_charset_valids, val);
9768       category = coding_category_charset;
9769     }
9770   else if (EQ (coding_type, Qccl))
9771     {
9772       Lisp_Object valids;
9773
9774       if (nargs < coding_arg_ccl_max)
9775         goto short_args;
9776
9777       val = args[coding_arg_ccl_decoder];
9778       CHECK_CCL_PROGRAM (val);
9779       if (VECTORP (val))
9780         val = Fcopy_sequence (val);
9781       ASET (attrs, coding_attr_ccl_decoder, val);
9782
9783       val = args[coding_arg_ccl_encoder];
9784       CHECK_CCL_PROGRAM (val);
9785       if (VECTORP (val))
9786         val = Fcopy_sequence (val);
9787       ASET (attrs, coding_attr_ccl_encoder, val);
9788
9789       val = args[coding_arg_ccl_valids];
9790       valids = Fmake_string (make_number (256), make_number (0));
9791       for (tail = val; CONSP (tail); tail = XCDR (tail))
9792         {
9793           int from, to;
9794
9795           val = XCAR (tail);
9796           if (INTEGERP (val))
9797             {
9798               if (! (0 <= XINT (val) && XINT (val) <= 255))
9799                 args_out_of_range_3 (val, make_number (0), make_number (255));
9800               from = to = XINT (val);
9801             }
9802           else
9803             {
9804               CHECK_CONS (val);
9805               CHECK_NATNUM_CAR (val);
9806               CHECK_NUMBER_CDR (val);
9807               if (XINT (XCAR (val)) > 255)
9808                 args_out_of_range_3 (XCAR (val),
9809                                      make_number (0), make_number (255));
9810               from = XINT (XCAR (val));
9811               if (! (from <= XINT (XCDR (val)) && XINT (XCDR (val)) <= 255))
9812                 args_out_of_range_3 (XCDR (val),
9813                                      XCAR (val), make_number (255));
9814               to = XINT (XCDR (val));
9815             }
9816           for (i = from; i <= to; i++)
9817             SSET (valids, i, 1);
9818         }
9819       ASET (attrs, coding_attr_ccl_valids, valids);
9820
9821       category = coding_category_ccl;
9822     }
9823   else if (EQ (coding_type, Qutf_16))
9824     {
9825       Lisp_Object bom, endian;
9826
9827       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9828
9829       if (nargs < coding_arg_utf16_max)
9830         goto short_args;
9831
9832       bom = args[coding_arg_utf16_bom];
9833       if (! NILP (bom) && ! EQ (bom, Qt))
9834         {
9835           CHECK_CONS (bom);
9836           val = XCAR (bom);
9837           CHECK_CODING_SYSTEM (val);
9838           val = XCDR (bom);
9839           CHECK_CODING_SYSTEM (val);
9840         }
9841       ASET (attrs, coding_attr_utf_bom, bom);
9842
9843       endian = args[coding_arg_utf16_endian];
9844       CHECK_SYMBOL (endian);
9845       if (NILP (endian))
9846         endian = Qbig;
9847       else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
9848         error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
9849       ASET (attrs, coding_attr_utf_16_endian, endian);
9850
9851       category = (CONSP (bom)
9852                   ? coding_category_utf_16_auto
9853                   : NILP (bom)
9854                   ? (EQ (endian, Qbig)
9855                      ? coding_category_utf_16_be_nosig
9856                      : coding_category_utf_16_le_nosig)
9857                   : (EQ (endian, Qbig)
9858                      ? coding_category_utf_16_be
9859                      : coding_category_utf_16_le));
9860     }
9861   else if (EQ (coding_type, Qiso_2022))
9862     {
9863       Lisp_Object initial, reg_usage, request, flags;
9864
9865       if (nargs < coding_arg_iso2022_max)
9866         goto short_args;
9867
9868       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
9869       CHECK_VECTOR (initial);
9870       for (i = 0; i < 4; i++)
9871         {
9872           val = Faref (initial, make_number (i));
9873           if (! NILP (val))
9874             {
9875               struct charset *charset;
9876
9877               CHECK_CHARSET_GET_CHARSET (val, charset);
9878               ASET (initial, i, make_number (CHARSET_ID (charset)));
9879               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
9880                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9881             }
9882           else
9883             ASET (initial, i, make_number (-1));
9884         }
9885
9886       reg_usage = args[coding_arg_iso2022_reg_usage];
9887       CHECK_CONS (reg_usage);
9888       CHECK_NUMBER_CAR (reg_usage);
9889       CHECK_NUMBER_CDR (reg_usage);
9890
9891       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
9892       for (tail = request; CONSP (tail); tail = XCDR (tail))
9893         {
9894           int id;
9895           Lisp_Object tmp1;
9896
9897           val = XCAR (tail);
9898           CHECK_CONS (val);
9899           tmp1 = XCAR (val);
9900           CHECK_CHARSET_GET_ID (tmp1, id);
9901           CHECK_NATNUM_CDR (val);
9902           if (XINT (XCDR (val)) >= 4)
9903             error ("Invalid graphic register number: %"pI"d", XINT (XCDR (val)));
9904           XSETCAR (val, make_number (id));
9905         }
9906
9907       flags = args[coding_arg_iso2022_flags];
9908       CHECK_NATNUM (flags);
9909       i = XINT (flags) & INT_MAX;
9910       if (EQ (args[coding_arg_charset_list], Qiso_2022))
9911         i |= CODING_ISO_FLAG_FULL_SUPPORT;
9912       flags = make_number (i);
9913
9914       ASET (attrs, coding_attr_iso_initial, initial);
9915       ASET (attrs, coding_attr_iso_usage, reg_usage);
9916       ASET (attrs, coding_attr_iso_request, request);
9917       ASET (attrs, coding_attr_iso_flags, flags);
9918       setup_iso_safe_charsets (attrs);
9919
9920       if (i & CODING_ISO_FLAG_SEVEN_BITS)
9921         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
9922                           | CODING_ISO_FLAG_SINGLE_SHIFT))
9923                     ? coding_category_iso_7_else
9924                     : EQ (args[coding_arg_charset_list], Qiso_2022)
9925                     ? coding_category_iso_7
9926                     : coding_category_iso_7_tight);
9927       else
9928         {
9929           int id = XINT (AREF (initial, 1));
9930
9931           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
9932                        || EQ (args[coding_arg_charset_list], Qiso_2022)
9933                        || id < 0)
9934                       ? coding_category_iso_8_else
9935                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
9936                       ? coding_category_iso_8_1
9937                       : coding_category_iso_8_2);
9938         }
9939       if (category != coding_category_iso_8_1
9940           && category != coding_category_iso_8_2)
9941         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9942     }
9943   else if (EQ (coding_type, Qemacs_mule))
9944     {
9945       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
9946         ASET (attrs, coding_attr_emacs_mule_full, Qt);
9947       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9948       category = coding_category_emacs_mule;
9949     }
9950   else if (EQ (coding_type, Qshift_jis))
9951     {
9952
9953       struct charset *charset;
9954
9955       if (XINT (Flength (charset_list)) != 3
9956           && XINT (Flength (charset_list)) != 4)
9957         error ("There should be three or four charsets");
9958
9959       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9960       if (CHARSET_DIMENSION (charset) != 1)
9961         error ("Dimension of charset %s is not one",
9962                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9963       if (CHARSET_ASCII_COMPATIBLE_P (charset))
9964         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9965
9966       charset_list = XCDR (charset_list);
9967       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9968       if (CHARSET_DIMENSION (charset) != 1)
9969         error ("Dimension of charset %s is not one",
9970                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9971
9972       charset_list = XCDR (charset_list);
9973       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9974       if (CHARSET_DIMENSION (charset) != 2)
9975         error ("Dimension of charset %s is not two",
9976                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9977
9978       charset_list = XCDR (charset_list);
9979       if (! NILP (charset_list))
9980         {
9981           charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9982           if (CHARSET_DIMENSION (charset) != 2)
9983             error ("Dimension of charset %s is not two",
9984                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9985         }
9986
9987       category = coding_category_sjis;
9988       Vsjis_coding_system = name;
9989     }
9990   else if (EQ (coding_type, Qbig5))
9991     {
9992       struct charset *charset;
9993
9994       if (XINT (Flength (charset_list)) != 2)
9995         error ("There should be just two charsets");
9996
9997       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9998       if (CHARSET_DIMENSION (charset) != 1)
9999         error ("Dimension of charset %s is not one",
10000                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10001       if (CHARSET_ASCII_COMPATIBLE_P (charset))
10002         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
10003
10004       charset_list = XCDR (charset_list);
10005       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10006       if (CHARSET_DIMENSION (charset) != 2)
10007         error ("Dimension of charset %s is not two",
10008                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10009
10010       category = coding_category_big5;
10011       Vbig5_coding_system = name;
10012     }
10013   else if (EQ (coding_type, Qraw_text))
10014     {
10015       category = coding_category_raw_text;
10016       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
10017     }
10018   else if (EQ (coding_type, Qutf_8))
10019     {
10020       Lisp_Object bom;
10021
10022       if (nargs < coding_arg_utf8_max)
10023         goto short_args;
10024
10025       bom = args[coding_arg_utf8_bom];
10026       if (! NILP (bom) && ! EQ (bom, Qt))
10027         {
10028           CHECK_CONS (bom);
10029           val = XCAR (bom);
10030           CHECK_CODING_SYSTEM (val);
10031           val = XCDR (bom);
10032           CHECK_CODING_SYSTEM (val);
10033         }
10034       ASET (attrs, coding_attr_utf_bom, bom);
10035       if (NILP (bom))
10036         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
10037
10038       category = (CONSP (bom) ? coding_category_utf_8_auto
10039                   : NILP (bom) ? coding_category_utf_8_nosig
10040                   : coding_category_utf_8_sig);
10041     }
10042   else if (EQ (coding_type, Qundecided))
10043     category = coding_category_undecided;
10044   else
10045     error ("Invalid coding system type: %s",
10046            SDATA (SYMBOL_NAME (coding_type)));
10047
10048   CODING_ATTR_CATEGORY (attrs) = make_number (category);
10049   CODING_ATTR_PLIST (attrs)
10050     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
10051                                 CODING_ATTR_PLIST (attrs)));
10052   CODING_ATTR_PLIST (attrs)
10053     = Fcons (QCascii_compatible_p,
10054              Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
10055                     CODING_ATTR_PLIST (attrs)));
10056
10057   eol_type = args[coding_arg_eol_type];
10058   if (! NILP (eol_type)
10059       && ! EQ (eol_type, Qunix)
10060       && ! EQ (eol_type, Qdos)
10061       && ! EQ (eol_type, Qmac))
10062     error ("Invalid eol-type");
10063
10064   aliases = Fcons (name, Qnil);
10065
10066   if (NILP (eol_type))
10067     {
10068       eol_type = make_subsidiaries (name);
10069       for (i = 0; i < 3; i++)
10070         {
10071           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
10072
10073           this_name = AREF (eol_type, i);
10074           this_aliases = Fcons (this_name, Qnil);
10075           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
10076           this_spec = Fmake_vector (make_number (3), attrs);
10077           ASET (this_spec, 1, this_aliases);
10078           ASET (this_spec, 2, this_eol_type);
10079           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
10080           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
10081           val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist);
10082           if (NILP (val))
10083             Vcoding_system_alist
10084               = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
10085                        Vcoding_system_alist);
10086         }
10087     }
10088
10089   spec_vec = Fmake_vector (make_number (3), attrs);
10090   ASET (spec_vec, 1, aliases);
10091   ASET (spec_vec, 2, eol_type);
10092
10093   Fputhash (name, spec_vec, Vcoding_system_hash_table);
10094   Vcoding_system_list = Fcons (name, Vcoding_system_list);
10095   val = Fassoc (Fsymbol_name (name), Vcoding_system_alist);
10096   if (NILP (val))
10097     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
10098                                   Vcoding_system_alist);
10099
10100   {
10101     int id = coding_categories[category].id;
10102
10103     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
10104       setup_coding_system (name, &coding_categories[category]);
10105   }
10106
10107   return Qnil;
10108
10109  short_args:
10110   return Fsignal (Qwrong_number_of_arguments,
10111                   Fcons (intern ("define-coding-system-internal"),
10112                          make_number (nargs)));
10113 }
10114
10115
10116 DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
10117        3, 3, 0,
10118        doc: /* Change value in CODING-SYSTEM's property list PROP to VAL.  */)
10119   (Lisp_Object coding_system, Lisp_Object prop, Lisp_Object val)
10120 {
10121   Lisp_Object spec, attrs;
10122
10123   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10124   attrs = AREF (spec, 0);
10125   if (EQ (prop, QCmnemonic))
10126     {
10127       if (! STRINGP (val))
10128         CHECK_CHARACTER (val);
10129       CODING_ATTR_MNEMONIC (attrs) = val;
10130     }
10131   else if (EQ (prop, QCdefault_char))
10132     {
10133       if (NILP (val))
10134         val = make_number (' ');
10135       else
10136         CHECK_CHARACTER (val);
10137       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
10138     }
10139   else if (EQ (prop, QCdecode_translation_table))
10140     {
10141       if (! CHAR_TABLE_P (val) && ! CONSP (val))
10142         CHECK_SYMBOL (val);
10143       CODING_ATTR_DECODE_TBL (attrs) = val;
10144     }
10145   else if (EQ (prop, QCencode_translation_table))
10146     {
10147       if (! CHAR_TABLE_P (val) && ! CONSP (val))
10148         CHECK_SYMBOL (val);
10149       CODING_ATTR_ENCODE_TBL (attrs) = val;
10150     }
10151   else if (EQ (prop, QCpost_read_conversion))
10152     {
10153       CHECK_SYMBOL (val);
10154       CODING_ATTR_POST_READ (attrs) = val;
10155     }
10156   else if (EQ (prop, QCpre_write_conversion))
10157     {
10158       CHECK_SYMBOL (val);
10159       CODING_ATTR_PRE_WRITE (attrs) = val;
10160     }
10161   else if (EQ (prop, QCascii_compatible_p))
10162     {
10163       CODING_ATTR_ASCII_COMPAT (attrs) = val;
10164     }
10165
10166   CODING_ATTR_PLIST (attrs)
10167     = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
10168   return val;
10169 }
10170
10171
10172 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
10173        Sdefine_coding_system_alias, 2, 2, 0,
10174        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
10175   (Lisp_Object alias, Lisp_Object coding_system)
10176 {
10177   Lisp_Object spec, aliases, eol_type, val;
10178
10179   CHECK_SYMBOL (alias);
10180   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10181   aliases = AREF (spec, 1);
10182   /* ALIASES should be a list of length more than zero, and the first
10183      element is a base coding system.  Append ALIAS at the tail of the
10184      list.  */
10185   while (!NILP (XCDR (aliases)))
10186     aliases = XCDR (aliases);
10187   XSETCDR (aliases, Fcons (alias, Qnil));
10188
10189   eol_type = AREF (spec, 2);
10190   if (VECTORP (eol_type))
10191     {
10192       Lisp_Object subsidiaries;
10193       int i;
10194
10195       subsidiaries = make_subsidiaries (alias);
10196       for (i = 0; i < 3; i++)
10197         Fdefine_coding_system_alias (AREF (subsidiaries, i),
10198                                      AREF (eol_type, i));
10199     }
10200
10201   Fputhash (alias, spec, Vcoding_system_hash_table);
10202   Vcoding_system_list = Fcons (alias, Vcoding_system_list);
10203   val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist);
10204   if (NILP (val))
10205     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
10206                                   Vcoding_system_alist);
10207
10208   return Qnil;
10209 }
10210
10211 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
10212        1, 1, 0,
10213        doc: /* Return the base of CODING-SYSTEM.
10214 Any alias or subsidiary coding system is not a base coding system.  */)
10215   (Lisp_Object coding_system)
10216 {
10217   Lisp_Object spec, attrs;
10218
10219   if (NILP (coding_system))
10220     return (Qno_conversion);
10221   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10222   attrs = AREF (spec, 0);
10223   return CODING_ATTR_BASE_NAME (attrs);
10224 }
10225
10226 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
10227        1, 1, 0,
10228        doc: "Return the property list of CODING-SYSTEM.")
10229   (Lisp_Object coding_system)
10230 {
10231   Lisp_Object spec, attrs;
10232
10233   if (NILP (coding_system))
10234     coding_system = Qno_conversion;
10235   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10236   attrs = AREF (spec, 0);
10237   return CODING_ATTR_PLIST (attrs);
10238 }
10239
10240
10241 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
10242        1, 1, 0,
10243        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
10244   (Lisp_Object coding_system)
10245 {
10246   Lisp_Object spec;
10247
10248   if (NILP (coding_system))
10249     coding_system = Qno_conversion;
10250   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10251   return AREF (spec, 1);
10252 }
10253
10254 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
10255        Scoding_system_eol_type, 1, 1, 0,
10256        doc: /* Return eol-type of CODING-SYSTEM.
10257 An eol-type is an integer 0, 1, 2, or a vector of coding systems.
10258
10259 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
10260 and CR respectively.
10261
10262 A vector value indicates that a format of end-of-line should be
10263 detected automatically.  Nth element of the vector is the subsidiary
10264 coding system whose eol-type is N.  */)
10265   (Lisp_Object coding_system)
10266 {
10267   Lisp_Object spec, eol_type;
10268   int n;
10269
10270   if (NILP (coding_system))
10271     coding_system = Qno_conversion;
10272   if (! CODING_SYSTEM_P (coding_system))
10273     return Qnil;
10274   spec = CODING_SYSTEM_SPEC (coding_system);
10275   eol_type = AREF (spec, 2);
10276   if (VECTORP (eol_type))
10277     return Fcopy_sequence (eol_type);
10278   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
10279   return make_number (n);
10280 }
10281
10282 #endif /* emacs */
10283
10284 \f
10285 /*** 9. Post-amble ***/
10286
10287 void
10288 init_coding_once (void)
10289 {
10290   int i;
10291
10292   for (i = 0; i < coding_category_max; i++)
10293     {
10294       coding_categories[i].id = -1;
10295       coding_priorities[i] = i;
10296     }
10297
10298   /* ISO2022 specific initialize routine.  */
10299   for (i = 0; i < 0x20; i++)
10300     iso_code_class[i] = ISO_control_0;
10301   for (i = 0x21; i < 0x7F; i++)
10302     iso_code_class[i] = ISO_graphic_plane_0;
10303   for (i = 0x80; i < 0xA0; i++)
10304     iso_code_class[i] = ISO_control_1;
10305   for (i = 0xA1; i < 0xFF; i++)
10306     iso_code_class[i] = ISO_graphic_plane_1;
10307   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
10308   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
10309   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
10310   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
10311   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
10312   iso_code_class[ISO_CODE_ESC] = ISO_escape;
10313   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
10314   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
10315   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
10316
10317   for (i = 0; i < 256; i++)
10318     {
10319       emacs_mule_bytes[i] = 1;
10320     }
10321   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
10322   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
10323   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
10324   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
10325 }
10326
10327 #ifdef emacs
10328
10329 void
10330 syms_of_coding (void)
10331 {
10332   staticpro (&Vcoding_system_hash_table);
10333   {
10334     Lisp_Object args[2];
10335     args[0] = QCtest;
10336     args[1] = Qeq;
10337     Vcoding_system_hash_table = Fmake_hash_table (2, args);
10338   }
10339
10340   staticpro (&Vsjis_coding_system);
10341   Vsjis_coding_system = Qnil;
10342
10343   staticpro (&Vbig5_coding_system);
10344   Vbig5_coding_system = Qnil;
10345
10346   staticpro (&Vcode_conversion_reused_workbuf);
10347   Vcode_conversion_reused_workbuf = Qnil;
10348
10349   staticpro (&Vcode_conversion_workbuf_name);
10350   Vcode_conversion_workbuf_name = build_pure_c_string (" *code-conversion-work*");
10351
10352   reused_workbuf_in_use = 0;
10353
10354   DEFSYM (Qcharset, "charset");
10355   DEFSYM (Qtarget_idx, "target-idx");
10356   DEFSYM (Qcoding_system_history, "coding-system-history");
10357   Fset (Qcoding_system_history, Qnil);
10358
10359   /* Target FILENAME is the first argument.  */
10360   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
10361   /* Target FILENAME is the third argument.  */
10362   Fput (Qwrite_region, Qtarget_idx, make_number (2));
10363
10364   DEFSYM (Qcall_process, "call-process");
10365   /* Target PROGRAM is the first argument.  */
10366   Fput (Qcall_process, Qtarget_idx, make_number (0));
10367
10368   DEFSYM (Qcall_process_region, "call-process-region");
10369   /* Target PROGRAM is the third argument.  */
10370   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
10371
10372   DEFSYM (Qstart_process, "start-process");
10373   /* Target PROGRAM is the third argument.  */
10374   Fput (Qstart_process, Qtarget_idx, make_number (2));
10375
10376   DEFSYM (Qopen_network_stream, "open-network-stream");
10377   /* Target SERVICE is the fourth argument.  */
10378   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
10379
10380   DEFSYM (Qcoding_system, "coding-system");
10381   DEFSYM (Qcoding_aliases, "coding-aliases");
10382
10383   DEFSYM (Qeol_type, "eol-type");
10384   DEFSYM (Qunix, "unix");
10385   DEFSYM (Qdos, "dos");
10386
10387   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
10388   DEFSYM (Qpost_read_conversion, "post-read-conversion");
10389   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
10390   DEFSYM (Qdefault_char, "default-char");
10391   DEFSYM (Qundecided, "undecided");
10392   DEFSYM (Qno_conversion, "no-conversion");
10393   DEFSYM (Qraw_text, "raw-text");
10394
10395   DEFSYM (Qiso_2022, "iso-2022");
10396
10397   DEFSYM (Qutf_8, "utf-8");
10398   DEFSYM (Qutf_8_emacs, "utf-8-emacs");
10399
10400   DEFSYM (Qutf_16, "utf-16");
10401   DEFSYM (Qbig, "big");
10402   DEFSYM (Qlittle, "little");
10403
10404   DEFSYM (Qshift_jis, "shift-jis");
10405   DEFSYM (Qbig5, "big5");
10406
10407   DEFSYM (Qcoding_system_p, "coding-system-p");
10408
10409   DEFSYM (Qcoding_system_error, "coding-system-error");
10410   Fput (Qcoding_system_error, Qerror_conditions,
10411         listn (CONSTYPE_PURE, 2, Qcoding_system_error, Qerror));
10412   Fput (Qcoding_system_error, Qerror_message,
10413         build_pure_c_string ("Invalid coding system"));
10414
10415   /* Intern this now in case it isn't already done.
10416      Setting this variable twice is harmless.
10417      But don't staticpro it here--that is done in alloc.c.  */
10418   Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
10419
10420   DEFSYM (Qtranslation_table, "translation-table");
10421   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
10422   DEFSYM (Qtranslation_table_id, "translation-table-id");
10423   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
10424   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
10425
10426   DEFSYM (Qvalid_codes, "valid-codes");
10427
10428   DEFSYM (Qemacs_mule, "emacs-mule");
10429
10430   DEFSYM (QCcategory, ":category");
10431   DEFSYM (QCmnemonic, ":mnemonic");
10432   DEFSYM (QCdefault_char, ":default-char");
10433   DEFSYM (QCdecode_translation_table, ":decode-translation-table");
10434   DEFSYM (QCencode_translation_table, ":encode-translation-table");
10435   DEFSYM (QCpost_read_conversion, ":post-read-conversion");
10436   DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
10437   DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
10438
10439   Vcoding_category_table
10440     = Fmake_vector (make_number (coding_category_max), Qnil);
10441   staticpro (&Vcoding_category_table);
10442   /* Followings are target of code detection.  */
10443   ASET (Vcoding_category_table, coding_category_iso_7,
10444         intern_c_string ("coding-category-iso-7"));
10445   ASET (Vcoding_category_table, coding_category_iso_7_tight,
10446         intern_c_string ("coding-category-iso-7-tight"));
10447   ASET (Vcoding_category_table, coding_category_iso_8_1,
10448         intern_c_string ("coding-category-iso-8-1"));
10449   ASET (Vcoding_category_table, coding_category_iso_8_2,
10450         intern_c_string ("coding-category-iso-8-2"));
10451   ASET (Vcoding_category_table, coding_category_iso_7_else,
10452         intern_c_string ("coding-category-iso-7-else"));
10453   ASET (Vcoding_category_table, coding_category_iso_8_else,
10454         intern_c_string ("coding-category-iso-8-else"));
10455   ASET (Vcoding_category_table, coding_category_utf_8_auto,
10456         intern_c_string ("coding-category-utf-8-auto"));
10457   ASET (Vcoding_category_table, coding_category_utf_8_nosig,
10458         intern_c_string ("coding-category-utf-8"));
10459   ASET (Vcoding_category_table, coding_category_utf_8_sig,
10460         intern_c_string ("coding-category-utf-8-sig"));
10461   ASET (Vcoding_category_table, coding_category_utf_16_be,
10462         intern_c_string ("coding-category-utf-16-be"));
10463   ASET (Vcoding_category_table, coding_category_utf_16_auto,
10464         intern_c_string ("coding-category-utf-16-auto"));
10465   ASET (Vcoding_category_table, coding_category_utf_16_le,
10466         intern_c_string ("coding-category-utf-16-le"));
10467   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
10468         intern_c_string ("coding-category-utf-16-be-nosig"));
10469   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
10470         intern_c_string ("coding-category-utf-16-le-nosig"));
10471   ASET (Vcoding_category_table, coding_category_charset,
10472         intern_c_string ("coding-category-charset"));
10473   ASET (Vcoding_category_table, coding_category_sjis,
10474         intern_c_string ("coding-category-sjis"));
10475   ASET (Vcoding_category_table, coding_category_big5,
10476         intern_c_string ("coding-category-big5"));
10477   ASET (Vcoding_category_table, coding_category_ccl,
10478         intern_c_string ("coding-category-ccl"));
10479   ASET (Vcoding_category_table, coding_category_emacs_mule,
10480         intern_c_string ("coding-category-emacs-mule"));
10481   /* Followings are NOT target of code detection.  */
10482   ASET (Vcoding_category_table, coding_category_raw_text,
10483         intern_c_string ("coding-category-raw-text"));
10484   ASET (Vcoding_category_table, coding_category_undecided,
10485         intern_c_string ("coding-category-undecided"));
10486
10487   DEFSYM (Qinsufficient_source, "insufficient-source");
10488   DEFSYM (Qinconsistent_eol, "inconsistent-eol");
10489   DEFSYM (Qinvalid_source, "invalid-source");
10490   DEFSYM (Qinterrupted, "interrupted");
10491   DEFSYM (Qinsufficient_memory, "insufficient-memory");
10492   DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
10493
10494   defsubr (&Scoding_system_p);
10495   defsubr (&Sread_coding_system);
10496   defsubr (&Sread_non_nil_coding_system);
10497   defsubr (&Scheck_coding_system);
10498   defsubr (&Sdetect_coding_region);
10499   defsubr (&Sdetect_coding_string);
10500   defsubr (&Sfind_coding_systems_region_internal);
10501   defsubr (&Sunencodable_char_position);
10502   defsubr (&Scheck_coding_systems_region);
10503   defsubr (&Sdecode_coding_region);
10504   defsubr (&Sencode_coding_region);
10505   defsubr (&Sdecode_coding_string);
10506   defsubr (&Sencode_coding_string);
10507   defsubr (&Sdecode_sjis_char);
10508   defsubr (&Sencode_sjis_char);
10509   defsubr (&Sdecode_big5_char);
10510   defsubr (&Sencode_big5_char);
10511   defsubr (&Sset_terminal_coding_system_internal);
10512   defsubr (&Sset_safe_terminal_coding_system_internal);
10513   defsubr (&Sterminal_coding_system);
10514   defsubr (&Sset_keyboard_coding_system_internal);
10515   defsubr (&Skeyboard_coding_system);
10516   defsubr (&Sfind_operation_coding_system);
10517   defsubr (&Sset_coding_system_priority);
10518   defsubr (&Sdefine_coding_system_internal);
10519   defsubr (&Sdefine_coding_system_alias);
10520   defsubr (&Scoding_system_put);
10521   defsubr (&Scoding_system_base);
10522   defsubr (&Scoding_system_plist);
10523   defsubr (&Scoding_system_aliases);
10524   defsubr (&Scoding_system_eol_type);
10525   defsubr (&Scoding_system_priority_list);
10526
10527   DEFVAR_LISP ("coding-system-list", Vcoding_system_list,
10528                doc: /* List of coding systems.
10529
10530 Do not alter the value of this variable manually.  This variable should be
10531 updated by the functions `define-coding-system' and
10532 `define-coding-system-alias'.  */);
10533   Vcoding_system_list = Qnil;
10534
10535   DEFVAR_LISP ("coding-system-alist", Vcoding_system_alist,
10536                doc: /* Alist of coding system names.
10537 Each element is one element list of coding system name.
10538 This variable is given to `completing-read' as COLLECTION argument.
10539
10540 Do not alter the value of this variable manually.  This variable should be
10541 updated by the functions `make-coding-system' and
10542 `define-coding-system-alias'.  */);
10543   Vcoding_system_alist = Qnil;
10544
10545   DEFVAR_LISP ("coding-category-list", Vcoding_category_list,
10546                doc: /* List of coding-categories (symbols) ordered by priority.
10547
10548 On detecting a coding system, Emacs tries code detection algorithms
10549 associated with each coding-category one by one in this order.  When
10550 one algorithm agrees with a byte sequence of source text, the coding
10551 system bound to the corresponding coding-category is selected.
10552
10553 Don't modify this variable directly, but use `set-coding-system-priority'.  */);
10554   {
10555     int i;
10556
10557     Vcoding_category_list = Qnil;
10558     for (i = coding_category_max - 1; i >= 0; i--)
10559       Vcoding_category_list
10560         = Fcons (AREF (Vcoding_category_table, i),
10561                  Vcoding_category_list);
10562   }
10563
10564   DEFVAR_LISP ("coding-system-for-read", Vcoding_system_for_read,
10565                doc: /* Specify the coding system for read operations.
10566 It is useful to bind this variable with `let', but do not set it globally.
10567 If the value is a coding system, it is used for decoding on read operation.
10568 If not, an appropriate element is used from one of the coding system alists.
10569 There are three such tables: `file-coding-system-alist',
10570 `process-coding-system-alist', and `network-coding-system-alist'.  */);
10571   Vcoding_system_for_read = Qnil;
10572
10573   DEFVAR_LISP ("coding-system-for-write", Vcoding_system_for_write,
10574                doc: /* Specify the coding system for write operations.
10575 Programs bind this variable with `let', but you should not set it globally.
10576 If the value is a coding system, it is used for encoding of output,
10577 when writing it to a file and when sending it to a file or subprocess.
10578
10579 If this does not specify a coding system, an appropriate element
10580 is used from one of the coding system alists.
10581 There are three such tables: `file-coding-system-alist',
10582 `process-coding-system-alist', and `network-coding-system-alist'.
10583 For output to files, if the above procedure does not specify a coding system,
10584 the value of `buffer-file-coding-system' is used.  */);
10585   Vcoding_system_for_write = Qnil;
10586
10587   DEFVAR_LISP ("last-coding-system-used", Vlast_coding_system_used,
10588                doc: /*
10589 Coding system used in the latest file or process I/O.  */);
10590   Vlast_coding_system_used = Qnil;
10591
10592   DEFVAR_LISP ("last-code-conversion-error", Vlast_code_conversion_error,
10593                doc: /*
10594 Error status of the last code conversion.
10595
10596 When an error was detected in the last code conversion, this variable
10597 is set to one of the following symbols.
10598   `insufficient-source'
10599   `inconsistent-eol'
10600   `invalid-source'
10601   `interrupted'
10602   `insufficient-memory'
10603 When no error was detected, the value doesn't change.  So, to check
10604 the error status of a code conversion by this variable, you must
10605 explicitly set this variable to nil before performing code
10606 conversion.  */);
10607   Vlast_code_conversion_error = Qnil;
10608
10609   DEFVAR_BOOL ("inhibit-eol-conversion", inhibit_eol_conversion,
10610                doc: /*
10611 *Non-nil means always inhibit code conversion of end-of-line format.
10612 See info node `Coding Systems' and info node `Text and Binary' concerning
10613 such conversion.  */);
10614   inhibit_eol_conversion = 0;
10615
10616   DEFVAR_BOOL ("inherit-process-coding-system", inherit_process_coding_system,
10617                doc: /*
10618 Non-nil means process buffer inherits coding system of process output.
10619 Bind it to t if the process output is to be treated as if it were a file
10620 read from some filesystem.  */);
10621   inherit_process_coding_system = 0;
10622
10623   DEFVAR_LISP ("file-coding-system-alist", Vfile_coding_system_alist,
10624                doc: /*
10625 Alist to decide a coding system to use for a file I/O operation.
10626 The format is ((PATTERN . VAL) ...),
10627 where PATTERN is a regular expression matching a file name,
10628 VAL is a coding system, a cons of coding systems, or a function symbol.
10629 If VAL is a coding system, it is used for both decoding and encoding
10630 the file contents.
10631 If VAL is a cons of coding systems, the car part is used for decoding,
10632 and the cdr part is used for encoding.
10633 If VAL is a function symbol, the function must return a coding system
10634 or a cons of coding systems which are used as above.  The function is
10635 called with an argument that is a list of the arguments with which
10636 `find-operation-coding-system' was called.  If the function can't decide
10637 a coding system, it can return `undecided' so that the normal
10638 code-detection is performed.
10639
10640 See also the function `find-operation-coding-system'
10641 and the variable `auto-coding-alist'.  */);
10642   Vfile_coding_system_alist = Qnil;
10643
10644   DEFVAR_LISP ("process-coding-system-alist", Vprocess_coding_system_alist,
10645                doc: /*
10646 Alist to decide a coding system to use for a process I/O operation.
10647 The format is ((PATTERN . VAL) ...),
10648 where PATTERN is a regular expression matching a program name,
10649 VAL is a coding system, a cons of coding systems, or a function symbol.
10650 If VAL is a coding system, it is used for both decoding what received
10651 from the program and encoding what sent to the program.
10652 If VAL is a cons of coding systems, the car part is used for decoding,
10653 and the cdr part is used for encoding.
10654 If VAL is a function symbol, the function must return a coding system
10655 or a cons of coding systems which are used as above.
10656
10657 See also the function `find-operation-coding-system'.  */);
10658   Vprocess_coding_system_alist = Qnil;
10659
10660   DEFVAR_LISP ("network-coding-system-alist", Vnetwork_coding_system_alist,
10661                doc: /*
10662 Alist to decide a coding system to use for a network I/O operation.
10663 The format is ((PATTERN . VAL) ...),
10664 where PATTERN is a regular expression matching a network service name
10665 or is a port number to connect to,
10666 VAL is a coding system, a cons of coding systems, or a function symbol.
10667 If VAL is a coding system, it is used for both decoding what received
10668 from the network stream and encoding what sent to the network stream.
10669 If VAL is a cons of coding systems, the car part is used for decoding,
10670 and the cdr part is used for encoding.
10671 If VAL is a function symbol, the function must return a coding system
10672 or a cons of coding systems which are used as above.
10673
10674 See also the function `find-operation-coding-system'.  */);
10675   Vnetwork_coding_system_alist = Qnil;
10676
10677   DEFVAR_LISP ("locale-coding-system", Vlocale_coding_system,
10678                doc: /* Coding system to use with system messages.
10679 Also used for decoding keyboard input on X Window system.  */);
10680   Vlocale_coding_system = Qnil;
10681
10682   /* The eol mnemonics are reset in startup.el system-dependently.  */
10683   DEFVAR_LISP ("eol-mnemonic-unix", eol_mnemonic_unix,
10684                doc: /*
10685 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
10686   eol_mnemonic_unix = build_pure_c_string (":");
10687
10688   DEFVAR_LISP ("eol-mnemonic-dos", eol_mnemonic_dos,
10689                doc: /*
10690 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
10691   eol_mnemonic_dos = build_pure_c_string ("\\");
10692
10693   DEFVAR_LISP ("eol-mnemonic-mac", eol_mnemonic_mac,
10694                doc: /*
10695 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
10696   eol_mnemonic_mac = build_pure_c_string ("/");
10697
10698   DEFVAR_LISP ("eol-mnemonic-undecided", eol_mnemonic_undecided,
10699                doc: /*
10700 *String displayed in mode line when end-of-line format is not yet determined.  */);
10701   eol_mnemonic_undecided = build_pure_c_string (":");
10702
10703   DEFVAR_LISP ("enable-character-translation", Venable_character_translation,
10704                doc: /*
10705 *Non-nil enables character translation while encoding and decoding.  */);
10706   Venable_character_translation = Qt;
10707
10708   DEFVAR_LISP ("standard-translation-table-for-decode",
10709                Vstandard_translation_table_for_decode,
10710                doc: /* Table for translating characters while decoding.  */);
10711   Vstandard_translation_table_for_decode = Qnil;
10712
10713   DEFVAR_LISP ("standard-translation-table-for-encode",
10714                Vstandard_translation_table_for_encode,
10715                doc: /* Table for translating characters while encoding.  */);
10716   Vstandard_translation_table_for_encode = Qnil;
10717
10718   DEFVAR_LISP ("charset-revision-table", Vcharset_revision_table,
10719                doc: /* Alist of charsets vs revision numbers.
10720 While encoding, if a charset (car part of an element) is found,
10721 designate it with the escape sequence identifying revision (cdr part
10722 of the element).  */);
10723   Vcharset_revision_table = Qnil;
10724
10725   DEFVAR_LISP ("default-process-coding-system",
10726                Vdefault_process_coding_system,
10727                doc: /* Cons of coding systems used for process I/O by default.
10728 The car part is used for decoding a process output,
10729 the cdr part is used for encoding a text to be sent to a process.  */);
10730   Vdefault_process_coding_system = Qnil;
10731
10732   DEFVAR_LISP ("latin-extra-code-table", Vlatin_extra_code_table,
10733                doc: /*
10734 Table of extra Latin codes in the range 128..159 (inclusive).
10735 This is a vector of length 256.
10736 If Nth element is non-nil, the existence of code N in a file
10737 \(or output of subprocess) doesn't prevent it to be detected as
10738 a coding system of ISO 2022 variant which has a flag
10739 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
10740 or reading output of a subprocess.
10741 Only 128th through 159th elements have a meaning.  */);
10742   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
10743
10744   DEFVAR_LISP ("select-safe-coding-system-function",
10745                Vselect_safe_coding_system_function,
10746                doc: /*
10747 Function to call to select safe coding system for encoding a text.
10748
10749 If set, this function is called to force a user to select a proper
10750 coding system which can encode the text in the case that a default
10751 coding system used in each operation can't encode the text.  The
10752 function should take care that the buffer is not modified while
10753 the coding system is being selected.
10754
10755 The default value is `select-safe-coding-system' (which see).  */);
10756   Vselect_safe_coding_system_function = Qnil;
10757
10758   DEFVAR_BOOL ("coding-system-require-warning",
10759                coding_system_require_warning,
10760                doc: /* Internal use only.
10761 If non-nil, on writing a file, `select-safe-coding-system-function' is
10762 called even if `coding-system-for-write' is non-nil.  The command
10763 `universal-coding-system-argument' binds this variable to t temporarily.  */);
10764   coding_system_require_warning = 0;
10765
10766
10767   DEFVAR_BOOL ("inhibit-iso-escape-detection",
10768                inhibit_iso_escape_detection,
10769                doc: /*
10770 If non-nil, Emacs ignores ISO-2022 escape sequences during code detection.
10771
10772 When Emacs reads text, it tries to detect how the text is encoded.
10773 This code detection is sensitive to escape sequences.  If Emacs sees
10774 a valid ISO-2022 escape sequence, it assumes the text is encoded in one
10775 of the ISO2022 encodings, and decodes text by the corresponding coding
10776 system (e.g. `iso-2022-7bit').
10777
10778 However, there may be a case that you want to read escape sequences in
10779 a file as is.  In such a case, you can set this variable to non-nil.
10780 Then the code detection will ignore any escape sequences, and no text is
10781 detected as encoded in some ISO-2022 encoding.  The result is that all
10782 escape sequences become visible in a buffer.
10783
10784 The default value is nil, and it is strongly recommended not to change
10785 it.  That is because many Emacs Lisp source files that contain
10786 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
10787 in Emacs's distribution, and they won't be decoded correctly on
10788 reading if you suppress escape sequence detection.
10789
10790 The other way to read escape sequences in a file without decoding is
10791 to explicitly specify some coding system that doesn't use ISO-2022
10792 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
10793   inhibit_iso_escape_detection = 0;
10794
10795   DEFVAR_BOOL ("inhibit-null-byte-detection",
10796                inhibit_null_byte_detection,
10797                doc: /* If non-nil, Emacs ignores null bytes on code detection.
10798 By default, Emacs treats it as binary data, and does not attempt to
10799 decode it.  The effect is as if you specified `no-conversion' for
10800 reading that text.
10801
10802 Set this to non-nil when a regular text happens to include null bytes.
10803 Examples are Index nodes of Info files and null-byte delimited output
10804 from GNU Find and GNU Grep.  Emacs will then ignore the null bytes and
10805 decode text as usual.  */);
10806   inhibit_null_byte_detection = 0;
10807
10808   DEFVAR_LISP ("translation-table-for-input", Vtranslation_table_for_input,
10809                doc: /* Char table for translating self-inserting characters.
10810 This is applied to the result of input methods, not their input.
10811 See also `keyboard-translate-table'.
10812
10813 Use of this variable for character code unification was rendered
10814 obsolete in Emacs 23.1 and later, since Unicode is now the basis of
10815 internal character representation.  */);
10816     Vtranslation_table_for_input = Qnil;
10817
10818   {
10819     Lisp_Object args[coding_arg_max];
10820     Lisp_Object plist[16];
10821     int i;
10822
10823     for (i = 0; i < coding_arg_max; i++)
10824       args[i] = Qnil;
10825
10826     plist[0] = intern_c_string (":name");
10827     plist[1] = args[coding_arg_name] = Qno_conversion;
10828     plist[2] = intern_c_string (":mnemonic");
10829     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
10830     plist[4] = intern_c_string (":coding-type");
10831     plist[5] = args[coding_arg_coding_type] = Qraw_text;
10832     plist[6] = intern_c_string (":ascii-compatible-p");
10833     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
10834     plist[8] = intern_c_string (":default-char");
10835     plist[9] = args[coding_arg_default_char] = make_number (0);
10836     plist[10] = intern_c_string (":for-unibyte");
10837     plist[11] = args[coding_arg_for_unibyte] = Qt;
10838     plist[12] = intern_c_string (":docstring");
10839     plist[13] = build_pure_c_string ("Do no conversion.\n\
10840 \n\
10841 When you visit a file with this coding, the file is read into a\n\
10842 unibyte buffer as is, thus each byte of a file is treated as a\n\
10843 character.");
10844     plist[14] = intern_c_string (":eol-type");
10845     plist[15] = args[coding_arg_eol_type] = Qunix;
10846     args[coding_arg_plist] = Flist (16, plist);
10847     Fdefine_coding_system_internal (coding_arg_max, args);
10848
10849     plist[1] = args[coding_arg_name] = Qundecided;
10850     plist[3] = args[coding_arg_mnemonic] = make_number ('-');
10851     plist[5] = args[coding_arg_coding_type] = Qundecided;
10852     /* This is already set.
10853        plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
10854     plist[8] = intern_c_string (":charset-list");
10855     plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
10856     plist[11] = args[coding_arg_for_unibyte] = Qnil;
10857     plist[13] = build_pure_c_string ("No conversion on encoding, automatic conversion on decoding.");
10858     plist[15] = args[coding_arg_eol_type] = Qnil;
10859     args[coding_arg_plist] = Flist (16, plist);
10860     Fdefine_coding_system_internal (coding_arg_max, args);
10861   }
10862
10863   setup_coding_system (Qno_conversion, &safe_terminal_coding);
10864
10865   {
10866     int i;
10867
10868     for (i = 0; i < coding_category_max; i++)
10869       Fset (AREF (Vcoding_category_table, i), Qno_conversion);
10870   }
10871 #if defined (DOS_NT)
10872   system_eol_type = Qdos;
10873 #else
10874   system_eol_type = Qunix;
10875 #endif
10876   staticpro (&system_eol_type);
10877 }
10878
10879 char *
10880 emacs_strerror (int error_number)
10881 {
10882   char *str;
10883
10884   synchronize_system_messages_locale ();
10885   str = strerror (error_number);
10886
10887   if (! NILP (Vlocale_coding_system))
10888     {
10889       Lisp_Object dec = code_convert_string_norecord (build_string (str),
10890                                                       Vlocale_coding_system,
10891                                                       0);
10892       str = SSDATA (dec);
10893     }
10894
10895   return str;
10896 }
10897
10898 #endif /* emacs */