src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 2001-2011 Free Software Foundation, Inc.
   3    Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   4      2005, 2006, 2007, 2008, 2009, 2010, 2011
   5      National Institute of Advanced Industrial Science and Technology (AIST)
   6      Registration Number H14PRO021
   7    Copyright (C) 2003
   8      National Institute of Advanced Industrial Science and Technology (AIST)
   9      Registration Number H13PRO009
  10
  11 This file is part of GNU Emacs.
  12
  13 GNU Emacs is free software: you can redistribute it and/or modify
  14 it under the terms of the GNU General Public License as published by
  15 the Free Software Foundation, either version 3 of the License, or
  16 (at your option) any later version.
  17
  18 GNU Emacs is distributed in the hope that it will be useful,
  19 but WITHOUT ANY WARRANTY; without even the implied warranty of
  20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  21 GNU General Public License for more details.
  22
  23 You should have received a copy of the GNU General Public License
  24 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  25
  26 /*** TABLE OF CONTENTS ***
  27
  28   0. General comments
  29   1. Preamble
  30   2. Emacs' internal format (emacs-utf-8) handlers
  31   3. UTF-8 handlers
  32   4. UTF-16 handlers
  33   5. Charset-base coding systems handlers
  34   6. emacs-mule (old Emacs' internal format) handlers
  35   7. ISO2022 handlers
  36   8. Shift-JIS and BIG5 handlers
  37   9. CCL handlers
  38   10. C library functions
  39   11. Emacs Lisp library functions
  40   12. Postamble
  41
  42 */
  43
  44 /*** 0. General comments ***
  45
  46
  47 CODING SYSTEM
  48
  49   A coding system is an object for an encoding mechanism that contains
  50   information about how to convert byte sequences to character
  51   sequences and vice versa.  When we say "decode", it means converting
  52   a byte sequence of a specific coding system into a character
  53   sequence that is represented by Emacs' internal coding system
  54   `emacs-utf-8', and when we say "encode", it means converting a
  55   character sequence of emacs-utf-8 to a byte sequence of a specific
  56   coding system.
  57
  58   In Emacs Lisp, a coding system is represented by a Lisp symbol.  In
  59   C level, a coding system is represented by a vector of attributes
  60   stored in the hash table Vcharset_hash_table.  The conversion from
  61   coding system symbol to attributes vector is done by looking up
  62   Vcharset_hash_table by the symbol.
  63
  64   Coding systems are classified into the following types depending on
  65   the encoding mechanism.  Here's a brief description of the types.
  66
  67   o UTF-8
  68
  69   o UTF-16
  70
  71   o Charset-base coding system
  72
  73   A coding system defined by one or more (coded) character sets.
  74   Decoding and encoding are done by a code converter defined for each
  75   character set.
  76
  77   o Old Emacs internal format (emacs-mule)
  78
  79   The coding system adopted by old versions of Emacs (20 and 21).
  80
  81   o ISO2022-base coding system
  82
  83   The most famous coding system for multiple character sets.  X's
  84   Compound Text, various EUCs (Extended Unix Code), and coding systems
  85   used in the Internet communication such as ISO-2022-JP are all
  86   variants of ISO2022.
  87
  88   o SJIS (or Shift-JIS or MS-Kanji-Code)
  89
  90   A coding system to encode character sets: ASCII, JISX0201, and
  91   JISX0208.  Widely used for PC's in Japan.  Details are described in
  92   section 8.
  93
  94   o BIG5
  95
  96   A coding system to encode character sets: ASCII and Big5.  Widely
  97   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  98   described in section 8.  In this file, when we write "big5" (all
  99   lowercase), we mean the coding system, and when we write "Big5"
 100   (capitalized), we mean the character set.
 101
 102   o CCL
 103
 104   If a user wants to decode/encode text encoded in a coding system
 105   not listed above, he can supply a decoder and an encoder for it in
 106   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 107   program while decoding/encoding.
 108
 109   o Raw-text
 110
 111   A coding system for text containing raw eight-bit data.  Emacs
 112   treats each byte of source text as a character (except for
 113   end-of-line conversion).
 114
 115   o No-conversion
 116
 117   Like raw text, but don't do end-of-line conversion.
 118
 119
 120 END-OF-LINE FORMAT
 121
 122   How text end-of-line is encoded depends on operating system.  For
 123   instance, Unix's format is just one byte of LF (line-feed) code,
 124   whereas DOS's format is two-byte sequence of `carriage-return' and
 125   `line-feed' codes.  MacOS's format is usually one byte of
 126   `carriage-return'.
 127
 128   Since text character encoding and end-of-line encoding are
 129   independent, any coding system described above can take any format
 130   of end-of-line (except for no-conversion).
 131
 132 STRUCT CODING_SYSTEM
 133
 134   Before using a coding system for code conversion (i.e. decoding and
 135   encoding), we setup a structure of type `struct coding_system'.
 136   This structure keeps various information about a specific code
 137   conversion (e.g. the location of source and destination data).
 138
 139 */
 140
 141 /* COMMON MACROS */
 142
 143
 144 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 145
 146   These functions check if a byte sequence specified as a source in
 147   CODING conforms to the format of XXX, and update the members of
 148   DETECT_INFO.
 149
 150   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 151
 152   Below is the template of these functions.  */
 153
 154 #if 0
 155 static int
 156 detect_coding_XXX (struct coding_system *coding,
 157                    struct coding_detection_info *detect_info)
 158 {
 159   const unsigned char *src = coding->source;
 160   const unsigned char *src_end = coding->source + coding->src_bytes;
 161   int multibytep = coding->src_multibyte;
 162   int consumed_chars = 0;
 163   int found = 0;
 164   ...;
 165
 166   while (1)
 167     {
 168       /* Get one byte from the source.  If the source is exhausted, jump
 169          to no_more_source:.  */
 170       ONE_MORE_BYTE (c);
 171
 172       if (! __C_conforms_to_XXX___ (c))
 173         break;
 174       if (! __C_strongly_suggests_XXX__ (c))
 175         found = CATEGORY_MASK_XXX;
 176     }
 177   /* The byte sequence is invalid for XXX.  */
 178   detect_info->rejected |= CATEGORY_MASK_XXX;
 179   return 0;
 180
 181  no_more_source:
 182   /* The source exhausted successfully.  */
 183   detect_info->found |= found;
 184   return 1;
 185 }
 186 #endif
 187
 188 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 189
 190   These functions decode a byte sequence specified as a source by
 191   CODING.  The resulting multibyte text goes to a place pointed to by
 192   CODING->charbuf, the length of which should not exceed
 193   CODING->charbuf_size;
 194
 195   These functions set the information of original and decoded texts in
 196   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 197   They also set CODING->result to one of CODING_RESULT_XXX indicating
 198   how the decoding is finished.
 199
 200   Below is the template of these functions.  */
 201
 202 #if 0
 203 static void
 204 decode_coding_XXXX (struct coding_system *coding)
 205 {
 206   const unsigned char *src = coding->source + coding->consumed;
 207   const unsigned char *src_end = coding->source + coding->src_bytes;
 208   /* SRC_BASE remembers the start position in source in each loop.
 209      The loop will be exited when there's not enough source code, or
 210      when there's no room in CHARBUF for a decoded character.  */
 211   const unsigned char *src_base;
 212   /* A buffer to produce decoded characters.  */
 213   int *charbuf = coding->charbuf + coding->charbuf_used;
 214   int *charbuf_end = coding->charbuf + coding->charbuf_size;
 215   int multibytep = coding->src_multibyte;
 216
 217   while (1)
 218     {
 219       src_base = src;
 220       if (charbuf < charbuf_end)
 221         /* No more room to produce a decoded character.  */
 222         break;
 223       ONE_MORE_BYTE (c);
 224       /* Decode it. */
 225     }
 226
 227  no_more_source:
 228   if (src_base < src_end
 229       && coding->mode & CODING_MODE_LAST_BLOCK)
 230     /* If the source ends by partial bytes to construct a character,
 231        treat them as eight-bit raw data.  */
 232     while (src_base < src_end && charbuf < charbuf_end)
 233       *charbuf++ = *src_base++;
 234   /* Remember how many bytes and characters we consumed.  If the
 235      source is multibyte, the bytes and chars are not identical.  */
 236   coding->consumed = coding->consumed_char = src_base - coding->source;
 237   /* Remember how many characters we produced.  */
 238   coding->charbuf_used = charbuf - coding->charbuf;
 239 }
 240 #endif
 241
 242 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 243
 244   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 245   internal multibyte format by CODING.  The resulting byte sequence
 246   goes to a place pointed to by DESTINATION, the length of which
 247   should not exceed DST_BYTES.
 248
 249   These functions set the information of original and encoded texts in
 250   the members produced, produced_char, consumed, and consumed_char of
 251   the structure *CODING.  They also set the member result to one of
 252   CODING_RESULT_XXX indicating how the encoding finished.
 253
 254   DST_BYTES zero means that source area and destination area are
 255   overlapped, which means that we can produce a encoded text until it
 256   reaches at the head of not-yet-encoded source text.
 257
 258   Below is a template of these functions.  */
 259 #if 0
 260 static void
 261 encode_coding_XXX (struct coding_system *coding)
 262 {
 263   int multibytep = coding->dst_multibyte;
 264   int *charbuf = coding->charbuf;
 265   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 266   unsigned char *dst = coding->destination + coding->produced;
 267   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 268   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 269   int produced_chars = 0;
 270
 271   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 272     {
 273       int c = *charbuf;
 274       /* Encode C into DST, and increment DST.  */
 275     }
 276  label_no_more_destination:
 277   /* How many chars and bytes we produced.  */
 278   coding->produced_char += produced_chars;
 279   coding->produced = dst - coding->destination;
 280 }
 281 #endif
 282
 283 \f
 284 /*** 1. Preamble ***/
 285
 286 #include <config.h>
 287 #include <stdio.h>
 288 #include <setjmp.h>
 289
 290 #include "lisp.h"
 291 #include "buffer.h"
 292 #include "character.h"
 293 #include "charset.h"
 294 #include "ccl.h"
 295 #include "composite.h"
 296 #include "coding.h"
 297 #include "window.h"
 298 #include "frame.h"
 299 #include "termhooks.h"
 300
 301 Lisp_Object Vcoding_system_hash_table;
 302
 303 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
 304 Lisp_Object Qunix, Qdos;
 305 Lisp_Object Qbuffer_file_coding_system;
 306 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 307 Lisp_Object Qdefault_char;
 308 Lisp_Object Qno_conversion, Qundecided;
 309 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
 310 Lisp_Object Qbig, Qlittle;
 311 Lisp_Object Qcoding_system_history;
 312 Lisp_Object Qvalid_codes;
 313 Lisp_Object QCcategory, QCmnemonic, QCdefault_char;
 314 Lisp_Object QCdecode_translation_table, QCencode_translation_table;
 315 Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
 316 Lisp_Object QCascii_compatible_p;
 317
 318 Lisp_Object Qcall_process, Qcall_process_region;
 319 Lisp_Object Qstart_process, Qopen_network_stream;
 320 Lisp_Object Qtarget_idx;
 321
 322 Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
 323 Lisp_Object Qinterrupted, Qinsufficient_memory;
 324
 325 /* If a symbol has this property, evaluate the value to define the
 326    symbol as a coding system.  */
 327 static Lisp_Object Qcoding_system_define_form;
 328
 329 /* Format of end-of-line decided by system.  This is Qunix on
 330    Unix and Mac, Qdos on DOS/Windows.
 331    This has an effect only for external encoding (i.e. for output to
 332    file and process), not for in-buffer or Lisp string encoding.  */
 333 static Lisp_Object system_eol_type;
 334
 335 #ifdef emacs
 336
 337 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 338
 339 /* Coding system emacs-mule and raw-text are for converting only
 340    end-of-line format.  */
 341 Lisp_Object Qemacs_mule, Qraw_text;
 342 Lisp_Object Qutf_8_emacs;
 343
 344 /* Coding-systems are handed between Emacs Lisp programs and C internal
 345    routines by the following three variables.  */
 346 /* Coding system to be used to encode text for terminal display when
 347    terminal coding system is nil.  */
 348 struct coding_system safe_terminal_coding;
 349
 350 #endif /* emacs */
 351
 352 Lisp_Object Qtranslation_table;
 353 Lisp_Object Qtranslation_table_id;
 354 Lisp_Object Qtranslation_table_for_decode;
 355 Lisp_Object Qtranslation_table_for_encode;
 356
 357 /* Two special coding systems.  */
 358 Lisp_Object Vsjis_coding_system;
 359 Lisp_Object Vbig5_coding_system;
 360
 361 /* ISO2022 section */
 362
 363 #define CODING_ISO_INITIAL(coding, reg)                 \
 364   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 365                      coding_attr_iso_initial),          \
 366                reg)))
 367
 368
 369 #define CODING_ISO_REQUEST(coding, charset_id)          \
 370   (((charset_id) <= (coding)->max_charset_id            \
 371     ? ((coding)->safe_charsets[charset_id] != 255       \
 372        ? (coding)->safe_charsets[charset_id]            \
 373        : -1)                                            \
 374     : -1))
 375
 376
 377 #define CODING_ISO_FLAGS(coding)        \
 378   ((coding)->spec.iso_2022.flags)
 379 #define CODING_ISO_DESIGNATION(coding, reg)     \
 380   ((coding)->spec.iso_2022.current_designation[reg])
 381 #define CODING_ISO_INVOCATION(coding, plane)    \
 382   ((coding)->spec.iso_2022.current_invocation[plane])
 383 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 384   ((coding)->spec.iso_2022.single_shifting)
 385 #define CODING_ISO_BOL(coding)  \
 386   ((coding)->spec.iso_2022.bol)
 387 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 388   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 389 #define CODING_ISO_CMP_STATUS(coding)   \
 390   (&(coding)->spec.iso_2022.cmp_status)
 391 #define CODING_ISO_EXTSEGMENT_LEN(coding)       \
 392   ((coding)->spec.iso_2022.ctext_extended_segment_len)
 393 #define CODING_ISO_EMBEDDED_UTF_8(coding)       \
 394   ((coding)->spec.iso_2022.embedded_utf_8)
 395
 396 /* Control characters of ISO2022.  */
 397                         /* code */      /* function */
 398 #define ISO_CODE_SO     0x0E            /* shift-out */
 399 #define ISO_CODE_SI     0x0F            /* shift-in */
 400 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 401 #define ISO_CODE_ESC    0x1B            /* escape */
 402 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 403 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 404 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 405
 406 /* All code (1-byte) of ISO2022 is classified into one of the
 407    followings.  */
 408 enum iso_code_class_type
 409   {
 410     ISO_control_0,              /* Control codes in the range
 411                                    0x00..0x1F and 0x7F, except for the
 412                                    following 5 codes.  */
 413     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 414     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 415     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 416     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 417     ISO_control_1,              /* Control codes in the range
 418                                    0x80..0x9F, except for the
 419                                    following 3 codes.  */
 420     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 421     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 422     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 423     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 424     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 425     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 426     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 427   };
 428
 429 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 430     `iso-flags' attribute of an iso2022 coding system.  */
 431
 432 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 433    instead of the correct short-form sequence (e.g. ESC $ A).  */
 434 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 435
 436 /* If set, reset graphic planes and registers at end-of-line to the
 437    initial state.  */
 438 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 439
 440 /* If set, reset graphic planes and registers before any control
 441    characters to the initial state.  */
 442 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 443
 444 /* If set, encode by 7-bit environment.  */
 445 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 446
 447 /* If set, use locking-shift function.  */
 448 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 449
 450 /* If set, use single-shift function.  Overwrite
 451    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 452 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 453
 454 /* If set, use designation escape sequence.  */
 455 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 456
 457 /* If set, produce revision number sequence.  */
 458 #define CODING_ISO_FLAG_REVISION        0x0080
 459
 460 /* If set, produce ISO6429's direction specifying sequence.  */
 461 #define CODING_ISO_FLAG_DIRECTION       0x0100
 462
 463 /* If set, assume designation states are reset at beginning of line on
 464    output.  */
 465 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 466
 467 /* If set, designation sequence should be placed at beginning of line
 468    on output.  */
 469 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 470
 471 /* If set, do not encode unsafe characters on output.  */
 472 #define CODING_ISO_FLAG_SAFE            0x0800
 473
 474 /* If set, extra latin codes (128..159) are accepted as a valid code
 475    on input.  */
 476 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 477
 478 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 479
 480 /* #define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000 */
 481
 482 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 483
 484 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 485
 486 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 487
 488 /* A character to be produced on output if encoding of the original
 489    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 490 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 491
 492 /* UTF-8 section */
 493 #define CODING_UTF_8_BOM(coding)        \
 494   ((coding)->spec.utf_8_bom)
 495
 496 /* UTF-16 section */
 497 #define CODING_UTF_16_BOM(coding)       \
 498   ((coding)->spec.utf_16.bom)
 499
 500 #define CODING_UTF_16_ENDIAN(coding)    \
 501   ((coding)->spec.utf_16.endian)
 502
 503 #define CODING_UTF_16_SURROGATE(coding) \
 504   ((coding)->spec.utf_16.surrogate)
 505
 506
 507 /* CCL section */
 508 #define CODING_CCL_DECODER(coding)      \
 509   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 510 #define CODING_CCL_ENCODER(coding)      \
 511   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 512 #define CODING_CCL_VALIDS(coding)                                          \
 513   (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
 514
 515 /* Index for each coding category in `coding_categories' */
 516
 517 enum coding_category
 518   {
 519     coding_category_iso_7,
 520     coding_category_iso_7_tight,
 521     coding_category_iso_8_1,
 522     coding_category_iso_8_2,
 523     coding_category_iso_7_else,
 524     coding_category_iso_8_else,
 525     coding_category_utf_8_auto,
 526     coding_category_utf_8_nosig,
 527     coding_category_utf_8_sig,
 528     coding_category_utf_16_auto,
 529     coding_category_utf_16_be,
 530     coding_category_utf_16_le,
 531     coding_category_utf_16_be_nosig,
 532     coding_category_utf_16_le_nosig,
 533     coding_category_charset,
 534     coding_category_sjis,
 535     coding_category_big5,
 536     coding_category_ccl,
 537     coding_category_emacs_mule,
 538     /* All above are targets of code detection.  */
 539     coding_category_raw_text,
 540     coding_category_undecided,
 541     coding_category_max
 542   };
 543
 544 /* Definitions of flag bits used in detect_coding_XXXX.  */
 545 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 546 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 547 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 548 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 549 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 550 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 551 #define CATEGORY_MASK_UTF_8_AUTO        (1 << coding_category_utf_8_auto)
 552 #define CATEGORY_MASK_UTF_8_NOSIG       (1 << coding_category_utf_8_nosig)
 553 #define CATEGORY_MASK_UTF_8_SIG         (1 << coding_category_utf_8_sig)
 554 #define CATEGORY_MASK_UTF_16_AUTO       (1 << coding_category_utf_16_auto)
 555 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 556 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 557 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 558 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 559 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 560 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 561 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 562 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 563 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 564 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 565
 566 /* This value is returned if detect_coding_mask () find nothing other
 567    than ASCII characters.  */
 568 #define CATEGORY_MASK_ANY               \
 569   (CATEGORY_MASK_ISO_7                  \
 570    | CATEGORY_MASK_ISO_7_TIGHT          \
 571    | CATEGORY_MASK_ISO_8_1              \
 572    | CATEGORY_MASK_ISO_8_2              \
 573    | CATEGORY_MASK_ISO_7_ELSE           \
 574    | CATEGORY_MASK_ISO_8_ELSE           \
 575    | CATEGORY_MASK_UTF_8_AUTO           \
 576    | CATEGORY_MASK_UTF_8_NOSIG          \
 577    | CATEGORY_MASK_UTF_8_SIG            \
 578    | CATEGORY_MASK_UTF_16_AUTO          \
 579    | CATEGORY_MASK_UTF_16_BE            \
 580    | CATEGORY_MASK_UTF_16_LE            \
 581    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 582    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 583    | CATEGORY_MASK_CHARSET              \
 584    | CATEGORY_MASK_SJIS                 \
 585    | CATEGORY_MASK_BIG5                 \
 586    | CATEGORY_MASK_CCL                  \
 587    | CATEGORY_MASK_EMACS_MULE)
 588
 589
 590 #define CATEGORY_MASK_ISO_7BIT \
 591   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 592
 593 #define CATEGORY_MASK_ISO_8BIT \
 594   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 595
 596 #define CATEGORY_MASK_ISO_ELSE \
 597   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 598
 599 #define CATEGORY_MASK_ISO_ESCAPE        \
 600   (CATEGORY_MASK_ISO_7                  \
 601    | CATEGORY_MASK_ISO_7_TIGHT          \
 602    | CATEGORY_MASK_ISO_7_ELSE           \
 603    | CATEGORY_MASK_ISO_8_ELSE)
 604
 605 #define CATEGORY_MASK_ISO       \
 606   (  CATEGORY_MASK_ISO_7BIT     \
 607      | CATEGORY_MASK_ISO_8BIT   \
 608      | CATEGORY_MASK_ISO_ELSE)
 609
 610 #define CATEGORY_MASK_UTF_16            \
 611   (CATEGORY_MASK_UTF_16_AUTO            \
 612    | CATEGORY_MASK_UTF_16_BE            \
 613    | CATEGORY_MASK_UTF_16_LE            \
 614    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 615    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 616
 617 #define CATEGORY_MASK_UTF_8     \
 618   (CATEGORY_MASK_UTF_8_AUTO     \
 619    | CATEGORY_MASK_UTF_8_NOSIG  \
 620    | CATEGORY_MASK_UTF_8_SIG)
 621
 622 /* Table of coding categories (Lisp symbols).  This variable is for
 623    internal use only.  */
 624 static Lisp_Object Vcoding_category_table;
 625
 626 /* Table of coding-categories ordered by priority.  */
 627 static enum coding_category coding_priorities[coding_category_max];
 628
 629 /* Nth element is a coding context for the coding system bound to the
 630    Nth coding category.  */
 631 static struct coding_system coding_categories[coding_category_max];
 632
 633 /*** Commonly used macros and functions ***/
 634
 635 #ifndef min
 636 #define min(a, b) ((a) < (b) ? (a) : (b))
 637 #endif
 638 #ifndef max
 639 #define max(a, b) ((a) > (b) ? (a) : (b))
 640 #endif
 641
 642 #define CODING_GET_INFO(coding, attrs, charset_list)    \
 643   do {                                                  \
 644     (attrs) = CODING_ID_ATTRS ((coding)->id);           \
 645     (charset_list) = CODING_ATTR_CHARSET_LIST (attrs);  \
 646   } while (0)
 647
 648
 649 /* Safely get one byte from the source text pointed by SRC which ends
 650    at SRC_END, and set C to that byte.  If there are not enough bytes
 651    in the source, it jumps to `no_more_source'.  If multibytep is
 652    nonzero, and a multibyte character is found at SRC, set C to the
 653    negative value of the character code.  The caller should declare
 654    and set these variables appropriately in advance:
 655         src, src_end, multibytep */
 656
 657 #define ONE_MORE_BYTE(c)                                \
 658   do {                                                  \
 659     if (src == src_end)                                 \
 660       {                                                 \
 661         if (src_base < src)                             \
 662           record_conversion_result                      \
 663             (coding, CODING_RESULT_INSUFFICIENT_SRC);   \
 664         goto no_more_source;                            \
 665       }                                                 \
 666     c = *src++;                                         \
 667     if (multibytep && (c & 0x80))                       \
 668       {                                                 \
 669         if ((c & 0xFE) == 0xC0)                         \
 670           c = ((c & 1) << 6) | *src++;                  \
 671         else                                            \
 672           {                                             \
 673             src--;                                      \
 674             c = - string_char (src, &src, NULL);        \
 675             record_conversion_result                    \
 676               (coding, CODING_RESULT_INVALID_SRC);      \
 677           }                                             \
 678       }                                                 \
 679     consumed_chars++;                                   \
 680   } while (0)
 681
 682 /* Safely get two bytes from the source text pointed by SRC which ends
 683    at SRC_END, and set C1 and C2 to those bytes while skipping the
 684    heading multibyte characters.  If there are not enough bytes in the
 685    source, it jumps to `no_more_source'.  If multibytep is nonzero and
 686    a multibyte character is found for C2, set C2 to the negative value
 687    of the character code.  The caller should declare and set these
 688    variables appropriately in advance:
 689         src, src_end, multibytep
 690    It is intended that this macro is used in detect_coding_utf_16.  */
 691
 692 #define TWO_MORE_BYTES(c1, c2)                          \
 693   do {                                                  \
 694     do {                                                \
 695       if (src == src_end)                               \
 696         goto no_more_source;                            \
 697       c1 = *src++;                                      \
 698       if (multibytep && (c1 & 0x80))                    \
 699         {                                               \
 700           if ((c1 & 0xFE) == 0xC0)                      \
 701             c1 = ((c1 & 1) << 6) | *src++;              \
 702           else                                          \
 703             {                                           \
 704               src += BYTES_BY_CHAR_HEAD (c1) - 1;       \
 705               c1 = -1;                                  \
 706             }                                           \
 707         }                                               \
 708     } while (c1 < 0);                                   \
 709     if (src == src_end)                                 \
 710       goto no_more_source;                              \
 711     c2 = *src++;                                        \
 712     if (multibytep && (c2 & 0x80))                      \
 713       {                                                 \
 714         if ((c2 & 0xFE) == 0xC0)                        \
 715           c2 = ((c2 & 1) << 6) | *src++;                \
 716         else                                            \
 717           c2 = -1;                                      \
 718       }                                                 \
 719   } while (0)
 720
 721
 722 /* Store a byte C in the place pointed by DST and increment DST to the
 723    next free point, and increment PRODUCED_CHARS.  The caller should
 724    assure that C is 0..127, and declare and set the variable `dst'
 725    appropriately in advance.
 726 */
 727
 728
 729 #define EMIT_ONE_ASCII_BYTE(c)  \
 730   do {                          \
 731     produced_chars++;           \
 732     *dst++ = (c);               \
 733   } while (0)
 734
 735
 736 /* Like EMIT_ONE_ASCII_BYTE but store two bytes; C1 and C2.  */
 737
 738 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 739   do {                                  \
 740     produced_chars += 2;                \
 741     *dst++ = (c1), *dst++ = (c2);       \
 742   } while (0)
 743
 744
 745 /* Store a byte C in the place pointed by DST and increment DST to the
 746    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 747    nonzero, store in an appropriate multibyte from.  The caller should
 748    declare and set the variables `dst' and `multibytep' appropriately
 749    in advance.  */
 750
 751 #define EMIT_ONE_BYTE(c)                \
 752   do {                                  \
 753     produced_chars++;                   \
 754     if (multibytep)                     \
 755       {                                 \
 756         int ch = (c);                   \
 757         if (ch >= 0x80)                 \
 758           ch = BYTE8_TO_CHAR (ch);      \
 759         CHAR_STRING_ADVANCE (ch, dst);  \
 760       }                                 \
 761     else                                \
 762       *dst++ = (c);                     \
 763   } while (0)
 764
 765
 766 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 767
 768 #define EMIT_TWO_BYTES(c1, c2)          \
 769   do {                                  \
 770     produced_chars += 2;                \
 771     if (multibytep)                     \
 772       {                                 \
 773         int ch;                         \
 774                                         \
 775         ch = (c1);                      \
 776         if (ch >= 0x80)                 \
 777           ch = BYTE8_TO_CHAR (ch);      \
 778         CHAR_STRING_ADVANCE (ch, dst);  \
 779         ch = (c2);                      \
 780         if (ch >= 0x80)                 \
 781           ch = BYTE8_TO_CHAR (ch);      \
 782         CHAR_STRING_ADVANCE (ch, dst);  \
 783       }                                 \
 784     else                                \
 785       {                                 \
 786         *dst++ = (c1);                  \
 787         *dst++ = (c2);                  \
 788       }                                 \
 789   } while (0)
 790
 791
 792 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 793   do {                                  \
 794     EMIT_ONE_BYTE (c1);                 \
 795     EMIT_TWO_BYTES (c2, c3);            \
 796   } while (0)
 797
 798
 799 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 800   do {                                          \
 801     EMIT_TWO_BYTES (c1, c2);                    \
 802     EMIT_TWO_BYTES (c3, c4);                    \
 803   } while (0)
 804
 805
 806 /* Prototypes for static functions.  */
 807 static void record_conversion_result (struct coding_system *coding,
 808                                       enum coding_result_code result);
 809 static int detect_coding_utf_8 (struct coding_system *,
 810                                 struct coding_detection_info *info);
 811 static void decode_coding_utf_8 (struct coding_system *);
 812 static int encode_coding_utf_8 (struct coding_system *);
 813
 814 static int detect_coding_utf_16 (struct coding_system *,
 815                                  struct coding_detection_info *info);
 816 static void decode_coding_utf_16 (struct coding_system *);
 817 static int encode_coding_utf_16 (struct coding_system *);
 818
 819 static int detect_coding_iso_2022 (struct coding_system *,
 820                                    struct coding_detection_info *info);
 821 static void decode_coding_iso_2022 (struct coding_system *);
 822 static int encode_coding_iso_2022 (struct coding_system *);
 823
 824 static int detect_coding_emacs_mule (struct coding_system *,
 825                                      struct coding_detection_info *info);
 826 static void decode_coding_emacs_mule (struct coding_system *);
 827 static int encode_coding_emacs_mule (struct coding_system *);
 828
 829 static int detect_coding_sjis (struct coding_system *,
 830                                struct coding_detection_info *info);
 831 static void decode_coding_sjis (struct coding_system *);
 832 static int encode_coding_sjis (struct coding_system *);
 833
 834 static int detect_coding_big5 (struct coding_system *,
 835                                struct coding_detection_info *info);
 836 static void decode_coding_big5 (struct coding_system *);
 837 static int encode_coding_big5 (struct coding_system *);
 838
 839 static int detect_coding_ccl (struct coding_system *,
 840                               struct coding_detection_info *info);
 841 static void decode_coding_ccl (struct coding_system *);
 842 static int encode_coding_ccl (struct coding_system *);
 843
 844 static void decode_coding_raw_text (struct coding_system *);
 845 static int encode_coding_raw_text (struct coding_system *);
 846
 847 static void coding_set_source (struct coding_system *);
 848 static void coding_set_destination (struct coding_system *);
 849 static void coding_alloc_by_realloc (struct coding_system *, EMACS_INT);
 850 static void coding_alloc_by_making_gap (struct coding_system *,
 851                                         EMACS_INT, EMACS_INT);
 852 static unsigned char *alloc_destination (struct coding_system *,
 853                                          EMACS_INT, unsigned char *);
 854 static void setup_iso_safe_charsets (Lisp_Object);
 855 static unsigned char *encode_designation_at_bol (struct coding_system *,
 856                                                  int *, unsigned char *);
 857 static int detect_eol (const unsigned char *,
 858                        EMACS_INT, enum coding_category);
 859 static Lisp_Object adjust_coding_eol_type (struct coding_system *, int);
 860 static void decode_eol (struct coding_system *);
 861 static Lisp_Object get_translation_table (Lisp_Object, int, int *);
 862 static Lisp_Object get_translation (Lisp_Object, int *, int *);
 863 static int produce_chars (struct coding_system *, Lisp_Object, int);
 864 static INLINE void produce_charset (struct coding_system *, int *,
 865                                     EMACS_INT);
 866 static void produce_annotation (struct coding_system *, EMACS_INT);
 867 static int decode_coding (struct coding_system *);
 868 static INLINE int *handle_composition_annotation (EMACS_INT, EMACS_INT,
 869                                                   struct coding_system *,
 870                                                   int *, EMACS_INT *);
 871 static INLINE int *handle_charset_annotation (EMACS_INT, EMACS_INT,
 872                                               struct coding_system *,
 873                                               int *, EMACS_INT *);
 874 static void consume_chars (struct coding_system *, Lisp_Object, int);
 875 static int encode_coding (struct coding_system *);
 876 static Lisp_Object make_conversion_work_buffer (int);
 877 static Lisp_Object code_conversion_restore (Lisp_Object);
 878 static INLINE int char_encodable_p (int, Lisp_Object);
 879 static Lisp_Object make_subsidiaries (Lisp_Object);
 880
 881 static void
 882 record_conversion_result (struct coding_system *coding,
 883                           enum coding_result_code result)
 884 {
 885   coding->result = result;
 886   switch (result)
 887     {
 888     case CODING_RESULT_INSUFFICIENT_SRC:
 889       Vlast_code_conversion_error = Qinsufficient_source;
 890       break;
 891     case CODING_RESULT_INCONSISTENT_EOL:
 892       Vlast_code_conversion_error = Qinconsistent_eol;
 893       break;
 894     case CODING_RESULT_INVALID_SRC:
 895       Vlast_code_conversion_error = Qinvalid_source;
 896       break;
 897     case CODING_RESULT_INTERRUPT:
 898       Vlast_code_conversion_error = Qinterrupted;
 899       break;
 900     case CODING_RESULT_INSUFFICIENT_MEM:
 901       Vlast_code_conversion_error = Qinsufficient_memory;
 902       break;
 903     case CODING_RESULT_INSUFFICIENT_DST:
 904       /* Don't record this error in Vlast_code_conversion_error
 905          because it happens just temporarily and is resolved when the
 906          whole conversion is finished.  */
 907       break;
 908     case CODING_RESULT_SUCCESS:
 909       break;
 910     default:
 911       Vlast_code_conversion_error = intern ("Unknown error");
 912     }
 913 }
 914
 915 /* This wrapper macro is used to preserve validity of pointers into
 916    buffer text across calls to decode_char, which could cause
 917    relocation of buffers if it loads a charset map, because loading a
 918    charset map allocates large structures.  */
 919 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
 920   do {                                                                       \
 921     charset_map_loaded = 0;                                                  \
 922     c = DECODE_CHAR (charset, code);                                         \
 923     if (charset_map_loaded)                                                  \
 924       {                                                                      \
 925         const unsigned char *orig = coding->source;                          \
 926         EMACS_INT offset;                                                    \
 927                                                                              \
 928         coding_set_source (coding);                                          \
 929         offset = coding->source - orig;                                      \
 930         src += offset;                                                       \
 931         src_base += offset;                                                  \
 932         src_end += offset;                                                   \
 933       }                                                                      \
 934   } while (0)
 935
 936
 937 /* If there are at least BYTES length of room at dst, allocate memory
 938    for coding->destination and update dst and dst_end.  We don't have
 939    to take care of coding->source which will be relocated.  It is
 940    handled by calling coding_set_source in encode_coding.  */
 941
 942 #define ASSURE_DESTINATION(bytes)                               \
 943   do {                                                          \
 944     if (dst + (bytes) >= dst_end)                               \
 945       {                                                         \
 946         int more_bytes = charbuf_end - charbuf + (bytes);       \
 947                                                                 \
 948         dst = alloc_destination (coding, more_bytes, dst);      \
 949         dst_end = coding->destination + coding->dst_bytes;      \
 950       }                                                         \
 951   } while (0)
 952
 953
 954 /* Store multibyte form of the character C in P, and advance P to the
 955    end of the multibyte form.  This is like CHAR_STRING_ADVANCE but it
 956    never calls MAYBE_UNIFY_CHAR.  */
 957
 958 #define CHAR_STRING_ADVANCE_NO_UNIFY(c, p)      \
 959   do {                                          \
 960     if ((c) <= MAX_1_BYTE_CHAR)                 \
 961       *(p)++ = (c);                             \
 962     else if ((c) <= MAX_2_BYTE_CHAR)            \
 963       *(p)++ = (0xC0 | ((c) >> 6)),             \
 964         *(p)++ = (0x80 | ((c) & 0x3F));         \
 965     else if ((c) <= MAX_3_BYTE_CHAR)            \
 966       *(p)++ = (0xE0 | ((c) >> 12)),            \
 967         *(p)++ = (0x80 | (((c) >> 6) & 0x3F)),  \
 968         *(p)++ = (0x80 | ((c) & 0x3F));         \
 969     else if ((c) <= MAX_4_BYTE_CHAR)            \
 970       *(p)++ = (0xF0 | (c >> 18)),              \
 971         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
 972         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
 973         *(p)++ = (0x80 | (c & 0x3F));           \
 974     else if ((c) <= MAX_5_BYTE_CHAR)            \
 975       *(p)++ = 0xF8,                            \
 976         *(p)++ = (0x80 | ((c >> 18) & 0x0F)),   \
 977         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
 978         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
 979         *(p)++ = (0x80 | (c & 0x3F));           \
 980     else                                        \
 981       (p) += BYTE8_STRING ((c) - 0x3FFF80, p);  \
 982   } while (0)
 983
 984
 985 /* Return the character code of character whose multibyte form is at
 986    P, and advance P to the end of the multibyte form.  This is like
 987    STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR.  */
 988
 989 #define STRING_CHAR_ADVANCE_NO_UNIFY(p)                         \
 990   (!((p)[0] & 0x80)                                             \
 991    ? *(p)++                                                     \
 992    : ! ((p)[0] & 0x20)                                          \
 993    ? ((p) += 2,                                                 \
 994       ((((p)[-2] & 0x1F) << 6)                                  \
 995        | ((p)[-1] & 0x3F)                                       \
 996        | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))    \
 997    : ! ((p)[0] & 0x10)                                          \
 998    ? ((p) += 3,                                                 \
 999       ((((p)[-3] & 0x0F) << 12)                                 \
1000        | (((p)[-2] & 0x3F) << 6)                                \
1001        | ((p)[-1] & 0x3F)))                                     \
1002    : ! ((p)[0] & 0x08)                                          \
1003    ? ((p) += 4,                                                 \
1004       ((((p)[-4] & 0xF) << 18)                                  \
1005        | (((p)[-3] & 0x3F) << 12)                               \
1006        | (((p)[-2] & 0x3F) << 6)                                \
1007        | ((p)[-1] & 0x3F)))                                     \
1008    : ((p) += 5,                                                 \
1009       ((((p)[-4] & 0x3F) << 18)                                 \
1010        | (((p)[-3] & 0x3F) << 12)                               \
1011        | (((p)[-2] & 0x3F) << 6)                                \
1012        | ((p)[-1] & 0x3F))))
1013
1014
1015 static void
1016 coding_set_source (struct coding_system *coding)
1017 {
1018   if (BUFFERP (coding->src_object))
1019     {
1020       struct buffer *buf = XBUFFER (coding->src_object);
1021
1022       if (coding->src_pos < 0)
1023         coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
1024       else
1025         coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
1026     }
1027   else if (STRINGP (coding->src_object))
1028     {
1029       coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
1030     }
1031   else
1032     {
1033       /* Otherwise, the source is C string and is never relocated
1034          automatically.  Thus we don't have to update anything.  */
1035     }
1036 }
1037
1038 static void
1039 coding_set_destination (struct coding_system *coding)
1040 {
1041   if (BUFFERP (coding->dst_object))
1042     {
1043       if (coding->src_pos < 0)
1044         {
1045           coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
1046           coding->dst_bytes = (GAP_END_ADDR
1047                                - (coding->src_bytes - coding->consumed)
1048                                - coding->destination);
1049         }
1050       else
1051         {
1052           /* We are sure that coding->dst_pos_byte is before the gap
1053              of the buffer. */
1054           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
1055                                  + coding->dst_pos_byte - BEG_BYTE);
1056           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
1057                                - coding->destination);
1058         }
1059     }
1060   else
1061     {
1062       /* Otherwise, the destination is C string and is never relocated
1063          automatically.  Thus we don't have to update anything.  */
1064     }
1065 }
1066
1067
1068 static void
1069 coding_alloc_by_realloc (struct coding_system *coding, EMACS_INT bytes)
1070 {
1071   coding->destination = (unsigned char *) xrealloc (coding->destination,
1072                                                     coding->dst_bytes + bytes);
1073   coding->dst_bytes += bytes;
1074 }
1075
1076 static void
1077 coding_alloc_by_making_gap (struct coding_system *coding,
1078                             EMACS_INT gap_head_used, EMACS_INT bytes)
1079 {
1080   if (EQ (coding->src_object, coding->dst_object))
1081     {
1082       /* The gap may contain the produced data at the head and not-yet
1083          consumed data at the tail.  To preserve those data, we at
1084          first make the gap size to zero, then increase the gap
1085          size.  */
1086       EMACS_INT add = GAP_SIZE;
1087
1088       GPT += gap_head_used, GPT_BYTE += gap_head_used;
1089       GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
1090       make_gap (bytes);
1091       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
1092       GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
1093     }
1094   else
1095     {
1096       Lisp_Object this_buffer;
1097
1098       this_buffer = Fcurrent_buffer ();
1099       set_buffer_internal (XBUFFER (coding->dst_object));
1100       make_gap (bytes);
1101       set_buffer_internal (XBUFFER (this_buffer));
1102     }
1103 }
1104
1105
1106 static unsigned char *
1107 alloc_destination (struct coding_system *coding, EMACS_INT nbytes,
1108                    unsigned char *dst)
1109 {
1110   EMACS_INT offset = dst - coding->destination;
1111
1112   if (BUFFERP (coding->dst_object))
1113     {
1114       struct buffer *buf = XBUFFER (coding->dst_object);
1115
1116       coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
1117     }
1118   else
1119     coding_alloc_by_realloc (coding, nbytes);
1120   coding_set_destination (coding);
1121   dst = coding->destination + offset;
1122   return dst;
1123 }
1124
1125 /** Macros for annotations.  */
1126
1127 /* An annotation data is stored in the array coding->charbuf in this
1128    format:
1129      [ -LENGTH ANNOTATION_MASK NCHARS ... ]
1130    LENGTH is the number of elements in the annotation.
1131    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1132    NCHARS is the number of characters in the text annotated.
1133
1134    The format of the following elements depend on ANNOTATION_MASK.
1135
1136    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1137    follows:
1138      ... NBYTES METHOD [ COMPOSITION-COMPONENTS ... ]
1139
1140    NBYTES is the number of bytes specified in the header part of
1141    old-style emacs-mule encoding, or 0 for the other kind of
1142    composition.
1143
1144    METHOD is one of enum composition_method.
1145
1146    Optional COMPOSITION-COMPONENTS are characters and composition
1147    rules.
1148
1149    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1150    follows.
1151
1152    If ANNOTATION_MASK is 0, this annotation is just a space holder to
1153    recover from an invalid annotation, and should be skipped by
1154    produce_annotation.  */
1155
1156 /* Maximum length of the header of annotation data.  */
1157 #define MAX_ANNOTATION_LENGTH 5
1158
1159 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars)     \
1160   do {                                                  \
1161     *(buf)++ = -(len);                                  \
1162     *(buf)++ = (mask);                                  \
1163     *(buf)++ = (nchars);                                \
1164     coding->annotated = 1;                              \
1165   } while (0);
1166
1167 #define ADD_COMPOSITION_DATA(buf, nchars, nbytes, method)                   \
1168   do {                                                                      \
1169     ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1170     *buf++ = nbytes;                                                        \
1171     *buf++ = method;                                                        \
1172   } while (0)
1173
1174
1175 #define ADD_CHARSET_DATA(buf, nchars, id)                               \
1176   do {                                                                  \
1177     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1178     *buf++ = id;                                                        \
1179   } while (0)
1180
1181 \f
1182 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1183
1184
1185
1186 \f
1187 /*** 3. UTF-8 ***/
1188
1189 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1190    Check if a text is encoded in UTF-8.  If it is, return 1, else
1191    return 0.  */
1192
1193 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1194 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1195 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1196 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1197 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1198 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1199
1200 #define UTF_8_BOM_1 0xEF
1201 #define UTF_8_BOM_2 0xBB
1202 #define UTF_8_BOM_3 0xBF
1203
1204 static int
1205 detect_coding_utf_8 (struct coding_system *coding,
1206                      struct coding_detection_info *detect_info)
1207 {
1208   const unsigned char *src = coding->source, *src_base;
1209   const unsigned char *src_end = coding->source + coding->src_bytes;
1210   int multibytep = coding->src_multibyte;
1211   int consumed_chars = 0;
1212   int bom_found = 0;
1213   int found = 0;
1214
1215   detect_info->checked |= CATEGORY_MASK_UTF_8;
1216   /* A coding system of this category is always ASCII compatible.  */
1217   src += coding->head_ascii;
1218
1219   while (1)
1220     {
1221       int c, c1, c2, c3, c4;
1222
1223       src_base = src;
1224       ONE_MORE_BYTE (c);
1225       if (c < 0 || UTF_8_1_OCTET_P (c))
1226         continue;
1227       ONE_MORE_BYTE (c1);
1228       if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
1229         break;
1230       if (UTF_8_2_OCTET_LEADING_P (c))
1231         {
1232           found = 1;
1233           continue;
1234         }
1235       ONE_MORE_BYTE (c2);
1236       if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1237         break;
1238       if (UTF_8_3_OCTET_LEADING_P (c))
1239         {
1240           found = 1;
1241           if (src_base == coding->source
1242               && c == UTF_8_BOM_1 && c1 == UTF_8_BOM_2 && c2 == UTF_8_BOM_3)
1243             bom_found = 1;
1244           continue;
1245         }
1246       ONE_MORE_BYTE (c3);
1247       if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1248         break;
1249       if (UTF_8_4_OCTET_LEADING_P (c))
1250         {
1251           found = 1;
1252           continue;
1253         }
1254       ONE_MORE_BYTE (c4);
1255       if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1256         break;
1257       if (UTF_8_5_OCTET_LEADING_P (c))
1258         {
1259           found = 1;
1260           continue;
1261         }
1262       break;
1263     }
1264   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1265   return 0;
1266
1267  no_more_source:
1268   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1269     {
1270       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1271       return 0;
1272     }
1273   if (bom_found)
1274     {
1275       /* The first character 0xFFFE doesn't necessarily mean a BOM.  */
1276       detect_info->found |= CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG;
1277     }
1278   else
1279     {
1280       detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG;
1281       if (found)
1282         detect_info->found |= CATEGORY_MASK_UTF_8_NOSIG;
1283     }
1284   return 1;
1285 }
1286
1287
1288 static void
1289 decode_coding_utf_8 (struct coding_system *coding)
1290 {
1291   const unsigned char *src = coding->source + coding->consumed;
1292   const unsigned char *src_end = coding->source + coding->src_bytes;
1293   const unsigned char *src_base;
1294   int *charbuf = coding->charbuf + coding->charbuf_used;
1295   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1296   int consumed_chars = 0, consumed_chars_base = 0;
1297   int multibytep = coding->src_multibyte;
1298   enum utf_bom_type bom = CODING_UTF_8_BOM (coding);
1299   Lisp_Object attr, charset_list;
1300   int eol_dos =
1301     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1302   int byte_after_cr = -1;
1303
1304   CODING_GET_INFO (coding, attr, charset_list);
1305
1306   if (bom != utf_without_bom)
1307     {
1308       int c1, c2, c3;
1309
1310       src_base = src;
1311       ONE_MORE_BYTE (c1);
1312       if (! UTF_8_3_OCTET_LEADING_P (c1))
1313         src = src_base;
1314       else
1315         {
1316           ONE_MORE_BYTE (c2);
1317           if (! UTF_8_EXTRA_OCTET_P (c2))
1318             src = src_base;
1319           else
1320             {
1321               ONE_MORE_BYTE (c3);
1322               if (! UTF_8_EXTRA_OCTET_P (c3))
1323                 src = src_base;
1324               else
1325                 {
1326                   if ((c1 != UTF_8_BOM_1)
1327                       || (c2 != UTF_8_BOM_2) || (c3 != UTF_8_BOM_3))
1328                     src = src_base;
1329                   else
1330                     CODING_UTF_8_BOM (coding) = utf_without_bom;
1331                 }
1332             }
1333         }
1334     }
1335   CODING_UTF_8_BOM (coding) = utf_without_bom;
1336
1337   while (1)
1338     {
1339       int c, c1, c2, c3, c4, c5;
1340
1341       src_base = src;
1342       consumed_chars_base = consumed_chars;
1343
1344       if (charbuf >= charbuf_end)
1345         {
1346           if (byte_after_cr >= 0)
1347             src_base--;
1348           break;
1349         }
1350
1351       if (byte_after_cr >= 0)
1352         c1 = byte_after_cr, byte_after_cr = -1;
1353       else
1354         ONE_MORE_BYTE (c1);
1355       if (c1 < 0)
1356         {
1357           c = - c1;
1358         }
1359       else if (UTF_8_1_OCTET_P (c1))
1360         {
1361           if (eol_dos && c1 == '\r')
1362             ONE_MORE_BYTE (byte_after_cr);
1363           c = c1;
1364         }
1365       else
1366         {
1367           ONE_MORE_BYTE (c2);
1368           if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1369             goto invalid_code;
1370           if (UTF_8_2_OCTET_LEADING_P (c1))
1371             {
1372               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1373               /* Reject overlong sequences here and below.  Encoders
1374                  producing them are incorrect, they can be misleading,
1375                  and they mess up read/write invariance.  */
1376               if (c < 128)
1377                 goto invalid_code;
1378             }
1379           else
1380             {
1381               ONE_MORE_BYTE (c3);
1382               if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1383                 goto invalid_code;
1384               if (UTF_8_3_OCTET_LEADING_P (c1))
1385                 {
1386                   c = (((c1 & 0xF) << 12)
1387                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1388                   if (c < 0x800
1389                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1390                     goto invalid_code;
1391                 }
1392               else
1393                 {
1394                   ONE_MORE_BYTE (c4);
1395                   if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1396                     goto invalid_code;
1397                   if (UTF_8_4_OCTET_LEADING_P (c1))
1398                     {
1399                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1400                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1401                     if (c < 0x10000)
1402                       goto invalid_code;
1403                     }
1404                   else
1405                     {
1406                       ONE_MORE_BYTE (c5);
1407                       if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
1408                         goto invalid_code;
1409                       if (UTF_8_5_OCTET_LEADING_P (c1))
1410                         {
1411                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1412                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1413                                | (c5 & 0x3F));
1414                           if ((c > MAX_CHAR) || (c < 0x200000))
1415                             goto invalid_code;
1416                         }
1417                       else
1418                         goto invalid_code;
1419                     }
1420                 }
1421             }
1422         }
1423
1424       *charbuf++ = c;
1425       continue;
1426
1427     invalid_code:
1428       src = src_base;
1429       consumed_chars = consumed_chars_base;
1430       ONE_MORE_BYTE (c);
1431       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1432       coding->errors++;
1433     }
1434
1435  no_more_source:
1436   coding->consumed_char += consumed_chars_base;
1437   coding->consumed = src_base - coding->source;
1438   coding->charbuf_used = charbuf - coding->charbuf;
1439 }
1440
1441
1442 static int
1443 encode_coding_utf_8 (struct coding_system *coding)
1444 {
1445   int multibytep = coding->dst_multibyte;
1446   int *charbuf = coding->charbuf;
1447   int *charbuf_end = charbuf + coding->charbuf_used;
1448   unsigned char *dst = coding->destination + coding->produced;
1449   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1450   int produced_chars = 0;
1451   int c;
1452
1453   if (CODING_UTF_8_BOM (coding) == utf_with_bom)
1454     {
1455       ASSURE_DESTINATION (3);
1456       EMIT_THREE_BYTES (UTF_8_BOM_1, UTF_8_BOM_2, UTF_8_BOM_3);
1457       CODING_UTF_8_BOM (coding) = utf_without_bom;
1458     }
1459
1460   if (multibytep)
1461     {
1462       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1463
1464       while (charbuf < charbuf_end)
1465         {
1466           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1467
1468           ASSURE_DESTINATION (safe_room);
1469           c = *charbuf++;
1470           if (CHAR_BYTE8_P (c))
1471             {
1472               c = CHAR_TO_BYTE8 (c);
1473               EMIT_ONE_BYTE (c);
1474             }
1475           else
1476             {
1477               CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
1478               for (p = str; p < pend; p++)
1479                 EMIT_ONE_BYTE (*p);
1480             }
1481         }
1482     }
1483   else
1484     {
1485       int safe_room = MAX_MULTIBYTE_LENGTH;
1486
1487       while (charbuf < charbuf_end)
1488         {
1489           ASSURE_DESTINATION (safe_room);
1490           c = *charbuf++;
1491           if (CHAR_BYTE8_P (c))
1492             *dst++ = CHAR_TO_BYTE8 (c);
1493           else
1494             CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
1495           produced_chars++;
1496         }
1497     }
1498   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1499   coding->produced_char += produced_chars;
1500   coding->produced = dst - coding->destination;
1501   return 0;
1502 }
1503
1504
1505 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1506    Check if a text is encoded in one of UTF-16 based coding systems.
1507    If it is, return 1, else return 0.  */
1508
1509 #define UTF_16_HIGH_SURROGATE_P(val) \
1510   (((val) & 0xFC00) == 0xD800)
1511
1512 #define UTF_16_LOW_SURROGATE_P(val) \
1513   (((val) & 0xFC00) == 0xDC00)
1514
1515
1516 static int
1517 detect_coding_utf_16 (struct coding_system *coding,
1518                       struct coding_detection_info *detect_info)
1519 {
1520   const unsigned char *src = coding->source;
1521   const unsigned char *src_end = coding->source + coding->src_bytes;
1522   int multibytep = coding->src_multibyte;
1523   int c1, c2;
1524
1525   detect_info->checked |= CATEGORY_MASK_UTF_16;
1526   if (coding->mode & CODING_MODE_LAST_BLOCK
1527       && (coding->src_chars & 1))
1528     {
1529       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1530       return 0;
1531     }
1532
1533   TWO_MORE_BYTES (c1, c2);
1534   if ((c1 == 0xFF) && (c2 == 0xFE))
1535     {
1536       detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1537                              | CATEGORY_MASK_UTF_16_AUTO);
1538       detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1539                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1540                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1541     }
1542   else if ((c1 == 0xFE) && (c2 == 0xFF))
1543     {
1544       detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1545                              | CATEGORY_MASK_UTF_16_AUTO);
1546       detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1547                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1548                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1549     }
1550   else if (c2 < 0)
1551     {
1552       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1553       return 0;
1554     }
1555   else
1556     {
1557       /* We check the dispersion of Eth and Oth bytes where E is even and
1558          O is odd.  If both are high, we assume binary data.*/
1559       unsigned char e[256], o[256];
1560       unsigned e_num = 1, o_num = 1;
1561
1562       memset (e, 0, 256);
1563       memset (o, 0, 256);
1564       e[c1] = 1;
1565       o[c2] = 1;
1566
1567       detect_info->rejected |= (CATEGORY_MASK_UTF_16_AUTO
1568                                 |CATEGORY_MASK_UTF_16_BE
1569                                 | CATEGORY_MASK_UTF_16_LE);
1570
1571       while ((detect_info->rejected & CATEGORY_MASK_UTF_16)
1572              != CATEGORY_MASK_UTF_16)
1573         {
1574           TWO_MORE_BYTES (c1, c2);
1575           if (c2 < 0)
1576             break;
1577           if (! e[c1])
1578             {
1579               e[c1] = 1;
1580               e_num++;
1581               if (e_num >= 128)
1582                 detect_info->rejected |= CATEGORY_MASK_UTF_16_BE_NOSIG;
1583             }
1584           if (! o[c2])
1585             {
1586               o[c2] = 1;
1587               o_num++;
1588               if (o_num >= 128)
1589                 detect_info->rejected |= CATEGORY_MASK_UTF_16_LE_NOSIG;
1590             }
1591         }
1592       return 0;
1593     }
1594
1595  no_more_source:
1596   return 1;
1597 }
1598
1599 static void
1600 decode_coding_utf_16 (struct coding_system *coding)
1601 {
1602   const unsigned char *src = coding->source + coding->consumed;
1603   const unsigned char *src_end = coding->source + coding->src_bytes;
1604   const unsigned char *src_base;
1605   int *charbuf = coding->charbuf + coding->charbuf_used;
1606   /* We may produces at most 3 chars in one loop.  */
1607   int *charbuf_end = coding->charbuf + coding->charbuf_size - 2;
1608   int consumed_chars = 0, consumed_chars_base = 0;
1609   int multibytep = coding->src_multibyte;
1610   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1611   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1612   int surrogate = CODING_UTF_16_SURROGATE (coding);
1613   Lisp_Object attr, charset_list;
1614   int eol_dos =
1615     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1616   int byte_after_cr1 = -1, byte_after_cr2 = -1;
1617
1618   CODING_GET_INFO (coding, attr, charset_list);
1619
1620   if (bom == utf_with_bom)
1621     {
1622       int c, c1, c2;
1623
1624       src_base = src;
1625       ONE_MORE_BYTE (c1);
1626       ONE_MORE_BYTE (c2);
1627       c = (c1 << 8) | c2;
1628
1629       if (endian == utf_16_big_endian
1630           ? c != 0xFEFF : c != 0xFFFE)
1631         {
1632           /* The first two bytes are not BOM.  Treat them as bytes
1633              for a normal character.  */
1634           src = src_base;
1635           coding->errors++;
1636         }
1637       CODING_UTF_16_BOM (coding) = utf_without_bom;
1638     }
1639   else if (bom == utf_detect_bom)
1640     {
1641       /* We have already tried to detect BOM and failed in
1642          detect_coding.  */
1643       CODING_UTF_16_BOM (coding) = utf_without_bom;
1644     }
1645
1646   while (1)
1647     {
1648       int c, c1, c2;
1649
1650       src_base = src;
1651       consumed_chars_base = consumed_chars;
1652
1653       if (charbuf >= charbuf_end)
1654         {
1655           if (byte_after_cr1 >= 0)
1656             src_base -= 2;
1657           break;
1658         }
1659
1660       if (byte_after_cr1 >= 0)
1661         c1 = byte_after_cr1, byte_after_cr1 = -1;
1662       else
1663         ONE_MORE_BYTE (c1);
1664       if (c1 < 0)
1665         {
1666           *charbuf++ = -c1;
1667           continue;
1668         }
1669       if (byte_after_cr2 >= 0)
1670         c2 = byte_after_cr2, byte_after_cr2 = -1;
1671       else
1672         ONE_MORE_BYTE (c2);
1673       if (c2 < 0)
1674         {
1675           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1676           *charbuf++ = -c2;
1677           continue;
1678         }
1679       c = (endian == utf_16_big_endian
1680            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1681
1682       if (surrogate)
1683         {
1684           if (! UTF_16_LOW_SURROGATE_P (c))
1685             {
1686               if (endian == utf_16_big_endian)
1687                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1688               else
1689                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1690               *charbuf++ = c1;
1691               *charbuf++ = c2;
1692               coding->errors++;
1693               if (UTF_16_HIGH_SURROGATE_P (c))
1694                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1695               else
1696                 *charbuf++ = c;
1697             }
1698           else
1699             {
1700               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1701               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1702               *charbuf++ = 0x10000 + c;
1703             }
1704         }
1705       else
1706         {
1707           if (UTF_16_HIGH_SURROGATE_P (c))
1708             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1709           else
1710             {
1711               if (eol_dos && c == '\r')
1712                 {
1713                   ONE_MORE_BYTE (byte_after_cr1);
1714                   ONE_MORE_BYTE (byte_after_cr2);
1715                 }
1716               *charbuf++ = c;
1717             }
1718         }
1719     }
1720
1721  no_more_source:
1722   coding->consumed_char += consumed_chars_base;
1723   coding->consumed = src_base - coding->source;
1724   coding->charbuf_used = charbuf - coding->charbuf;
1725 }
1726
1727 static int
1728 encode_coding_utf_16 (struct coding_system *coding)
1729 {
1730   int multibytep = coding->dst_multibyte;
1731   int *charbuf = coding->charbuf;
1732   int *charbuf_end = charbuf + coding->charbuf_used;
1733   unsigned char *dst = coding->destination + coding->produced;
1734   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1735   int safe_room = 8;
1736   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1737   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1738   int produced_chars = 0;
1739   Lisp_Object attrs, charset_list;
1740   int c;
1741
1742   CODING_GET_INFO (coding, attrs, charset_list);
1743
1744   if (bom != utf_without_bom)
1745     {
1746       ASSURE_DESTINATION (safe_room);
1747       if (big_endian)
1748         EMIT_TWO_BYTES (0xFE, 0xFF);
1749       else
1750         EMIT_TWO_BYTES (0xFF, 0xFE);
1751       CODING_UTF_16_BOM (coding) = utf_without_bom;
1752     }
1753
1754   while (charbuf < charbuf_end)
1755     {
1756       ASSURE_DESTINATION (safe_room);
1757       c = *charbuf++;
1758       if (c > MAX_UNICODE_CHAR)
1759         c = coding->default_char;
1760
1761       if (c < 0x10000)
1762         {
1763           if (big_endian)
1764             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1765           else
1766             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1767         }
1768       else
1769         {
1770           int c1, c2;
1771
1772           c -= 0x10000;
1773           c1 = (c >> 10) + 0xD800;
1774           c2 = (c & 0x3FF) + 0xDC00;
1775           if (big_endian)
1776             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1777           else
1778             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1779         }
1780     }
1781   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1782   coding->produced = dst - coding->destination;
1783   coding->produced_char += produced_chars;
1784   return 0;
1785 }
1786
1787 \f
1788 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1789
1790 /* Emacs' internal format for representation of multiple character
1791    sets is a kind of multi-byte encoding, i.e. characters are
1792    represented by variable-length sequences of one-byte codes.
1793
1794    ASCII characters and control characters (e.g. `tab', `newline') are
1795    represented by one-byte sequences which are their ASCII codes, in
1796    the range 0x00 through 0x7F.
1797
1798    8-bit characters of the range 0x80..0x9F are represented by
1799    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1800    code + 0x20).
1801
1802    8-bit characters of the range 0xA0..0xFF are represented by
1803    one-byte sequences which are their 8-bit code.
1804
1805    The other characters are represented by a sequence of `base
1806    leading-code', optional `extended leading-code', and one or two
1807    `position-code's.  The length of the sequence is determined by the
1808    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1809    whereas extended leading-code and position-code take the range 0xA0
1810    through 0xFF.  See `charset.h' for more details about leading-code
1811    and position-code.
1812
1813    --- CODE RANGE of Emacs' internal format ---
1814    character set        range
1815    -------------        -----
1816    ascii                0x00..0x7F
1817    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1818    eight-bit-graphic    0xA0..0xBF
1819    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1820    ---------------------------------------------
1821
1822    As this is the internal character representation, the format is
1823    usually not used externally (i.e. in a file or in a data sent to a
1824    process).  But, it is possible to have a text externally in this
1825    format (i.e. by encoding by the coding system `emacs-mule').
1826
1827    In that case, a sequence of one-byte codes has a slightly different
1828    form.
1829
1830    At first, all characters in eight-bit-control are represented by
1831    one-byte sequences which are their 8-bit code.
1832
1833    Next, character composition data are represented by the byte
1834    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1835    where,
1836         METHOD is 0xF2 plus one of composition method (enum
1837         composition_method),
1838
1839         BYTES is 0xA0 plus a byte length of this composition data,
1840
1841         CHARS is 0xA0 plus a number of characters composed by this
1842         data,
1843
1844         COMPONENTs are characters of multibyte form or composition
1845         rules encoded by two-byte of ASCII codes.
1846
1847    In addition, for backward compatibility, the following formats are
1848    also recognized as composition data on decoding.
1849
1850    0x80 MSEQ ...
1851    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1852
1853    Here,
1854         MSEQ is a multibyte form but in these special format:
1855           ASCII: 0xA0 ASCII_CODE+0x80,
1856           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1857         RULE is a one byte code of the range 0xA0..0xF0 that
1858         represents a composition rule.
1859   */
1860
1861 char emacs_mule_bytes[256];
1862
1863
1864 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1865    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
1866    else return 0.  */
1867
1868 static int
1869 detect_coding_emacs_mule (struct coding_system *coding,
1870                           struct coding_detection_info *detect_info)
1871 {
1872   const unsigned char *src = coding->source, *src_base;
1873   const unsigned char *src_end = coding->source + coding->src_bytes;
1874   int multibytep = coding->src_multibyte;
1875   int consumed_chars = 0;
1876   int c;
1877   int found = 0;
1878
1879   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1880   /* A coding system of this category is always ASCII compatible.  */
1881   src += coding->head_ascii;
1882
1883   while (1)
1884     {
1885       src_base = src;
1886       ONE_MORE_BYTE (c);
1887       if (c < 0)
1888         continue;
1889       if (c == 0x80)
1890         {
1891           /* Perhaps the start of composite character.  We simply skip
1892              it because analyzing it is too heavy for detecting.  But,
1893              at least, we check that the composite character
1894              constitutes of more than 4 bytes.  */
1895           const unsigned char *src_start;
1896
1897         repeat:
1898           src_start = src;
1899           do
1900             {
1901               ONE_MORE_BYTE (c);
1902             }
1903           while (c >= 0xA0);
1904
1905           if (src - src_start <= 4)
1906             break;
1907           found = CATEGORY_MASK_EMACS_MULE;
1908           if (c == 0x80)
1909             goto repeat;
1910         }
1911
1912       if (c < 0x80)
1913         {
1914           if (c < 0x20
1915               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1916             break;
1917         }
1918       else
1919         {
1920           int more_bytes = emacs_mule_bytes[c] - 1;
1921
1922           while (more_bytes > 0)
1923             {
1924               ONE_MORE_BYTE (c);
1925               if (c < 0xA0)
1926                 {
1927                   src--;        /* Unread the last byte.  */
1928                   break;
1929                 }
1930               more_bytes--;
1931             }
1932           if (more_bytes != 0)
1933             break;
1934           found = CATEGORY_MASK_EMACS_MULE;
1935         }
1936     }
1937   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1938   return 0;
1939
1940  no_more_source:
1941   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1942     {
1943       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1944       return 0;
1945     }
1946   detect_info->found |= found;
1947   return 1;
1948 }
1949
1950
1951 /* Parse emacs-mule multibyte sequence at SRC and return the decoded
1952    character.  If CMP_STATUS indicates that we must expect MSEQ or
1953    RULE described above, decode it and return the negative value of
1954    the decoded character or rule.  If an invalid byte is found, return
1955    -1.  If SRC is too short, return -2.  */
1956
1957 static int
1958 emacs_mule_char (struct coding_system *coding, const unsigned char *src,
1959                  int *nbytes, int *nchars, int *id,
1960                  struct composition_status *cmp_status)
1961 {
1962   const unsigned char *src_end = coding->source + coding->src_bytes;
1963   const unsigned char *src_base = src;
1964   int multibytep = coding->src_multibyte;
1965   int charset_ID;
1966   unsigned code;
1967   int c;
1968   int consumed_chars = 0;
1969   int mseq_found = 0;
1970
1971   ONE_MORE_BYTE (c);
1972   if (c < 0)
1973     {
1974       c = -c;
1975       charset_ID = emacs_mule_charset[0];
1976     }
1977   else
1978     {
1979       if (c >= 0xA0)
1980         {
1981           if (cmp_status->state != COMPOSING_NO
1982               && cmp_status->old_form)
1983             {
1984               if (cmp_status->state == COMPOSING_CHAR)
1985                 {
1986                   if (c == 0xA0)
1987                     {
1988                       ONE_MORE_BYTE (c);
1989                       c -= 0x80;
1990                       if (c < 0)
1991                         goto invalid_code;
1992                     }
1993                   else
1994                     c -= 0x20;
1995                   mseq_found = 1;
1996                 }
1997               else
1998                 {
1999                   *nbytes = src - src_base;
2000                   *nchars = consumed_chars;
2001                   return -c;
2002                 }
2003             }
2004           else
2005             goto invalid_code;
2006         }
2007
2008       switch (emacs_mule_bytes[c])
2009         {
2010         case 2:
2011           if ((charset_ID = emacs_mule_charset[c]) < 0)
2012             goto invalid_code;
2013           ONE_MORE_BYTE (c);
2014           if (c < 0xA0)
2015             goto invalid_code;
2016           code = c & 0x7F;
2017           break;
2018
2019         case 3:
2020           if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
2021               || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
2022             {
2023               ONE_MORE_BYTE (c);
2024               if (c < 0xA0 || (charset_ID = emacs_mule_charset[c]) < 0)
2025                 goto invalid_code;
2026               ONE_MORE_BYTE (c);
2027               if (c < 0xA0)
2028                 goto invalid_code;
2029               code = c & 0x7F;
2030             }
2031           else
2032             {
2033               if ((charset_ID = emacs_mule_charset[c]) < 0)
2034                 goto invalid_code;
2035               ONE_MORE_BYTE (c);
2036               if (c < 0xA0)
2037                 goto invalid_code;
2038               code = (c & 0x7F) << 8;
2039               ONE_MORE_BYTE (c);
2040               if (c < 0xA0)
2041                 goto invalid_code;
2042               code |= c & 0x7F;
2043             }
2044           break;
2045
2046         case 4:
2047           ONE_MORE_BYTE (c);
2048           if (c < 0 || (charset_ID = emacs_mule_charset[c]) < 0)
2049             goto invalid_code;
2050           ONE_MORE_BYTE (c);
2051           if (c < 0xA0)
2052             goto invalid_code;
2053           code = (c & 0x7F) << 8;
2054           ONE_MORE_BYTE (c);
2055           if (c < 0xA0)
2056             goto invalid_code;
2057           code |= c & 0x7F;
2058           break;
2059
2060         case 1:
2061           code = c;
2062           charset_ID = ASCII_BYTE_P (code) ? charset_ascii : charset_eight_bit;
2063           break;
2064
2065         default:
2066           abort ();
2067         }
2068       CODING_DECODE_CHAR (coding, src, src_base, src_end,
2069                           CHARSET_FROM_ID (charset_ID), code, c);
2070       if (c < 0)
2071         goto invalid_code;
2072     }
2073   *nbytes = src - src_base;
2074   *nchars = consumed_chars;
2075   if (id)
2076     *id = charset_ID;
2077   return (mseq_found ? -c : c);
2078
2079  no_more_source:
2080   return -2;
2081
2082  invalid_code:
2083   return -1;
2084 }
2085
2086
2087 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2088
2089 /* Handle these composition sequence ('|': the end of header elements,
2090    BYTES and CHARS >= 0xA0):
2091
2092    (1) relative composition: 0x80 0xF2 BYTES CHARS | CHAR ...
2093    (2) altchar composition:  0x80 0xF4 BYTES CHARS | ALT ... ALT CHAR ...
2094    (3) alt&rule composition: 0x80 0xF5 BYTES CHARS | ALT RULE ... ALT CHAR ...
2095
2096    and these old form:
2097
2098    (4) relative composition: 0x80 | MSEQ ... MSEQ
2099    (5) rulebase composition: 0x80 0xFF | MSEQ MRULE ... MSEQ
2100
2101    When the starter 0x80 and the following header elements are found,
2102    this annotation header is produced.
2103
2104         [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS NBYTES METHOD ]
2105
2106    NCHARS is CHARS - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2107    NBYTES is BYTES - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2108
2109    Then, upon reading the following elements, these codes are produced
2110    until the composition end is found:
2111
2112    (1) CHAR ... CHAR
2113    (2) ALT ... ALT CHAR ... CHAR
2114    (3) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT CHAR ... CHAR
2115    (4) CHAR ... CHAR
2116    (5) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
2117
2118    When the composition end is found, LENGTH and NCHARS in the
2119    annotation header is updated as below:
2120
2121    (1) LENGTH: unchanged, NCHARS: unchanged
2122    (2) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2123    (3) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2124    (4) LENGTH: unchanged,  NCHARS: number of CHARs
2125    (5) LENGTH: unchanged,  NCHARS: number of CHARs
2126
2127    If an error is found while composing, the annotation header is
2128    changed to the original composition header (plus filler -1s) as
2129    below:
2130
2131    (1),(2),(3)  [ 0x80 0xF2+METHOD BYTES CHARS -1 ]
2132    (5)          [ 0x80 0xFF -1 -1- -1 ]
2133
2134    and the sequence [ -2 DECODED-RULE ] is changed to the original
2135    byte sequence as below:
2136         o the original byte sequence is B: [ B -1 ]
2137         o the original byte sequence is B1 B2: [ B1 B2 ]
2138
2139    Most of the routines are implemented by macros because many
2140    variables and labels in the caller decode_coding_emacs_mule must be
2141    accessible, and they are usually called just once (thus doesn't
2142    increase the size of compiled object).  */
2143
2144 /* Decode a composition rule represented by C as a component of
2145    composition sequence of Emacs 20 style.  Set RULE to the decoded
2146    rule. */
2147
2148 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(c, rule)  \
2149   do {                                                  \
2150     int gref, nref;                                     \
2151                                                         \
2152     c -= 0xA0;                                          \
2153     if (c < 0 || c >= 81)                               \
2154       goto invalid_code;                                \
2155     gref = c / 9, nref = c % 9;                         \
2156     if (gref == 4) gref = 10;                           \
2157     if (nref == 4) nref = 10;                           \
2158     rule = COMPOSITION_ENCODE_RULE (gref, nref);        \
2159   } while (0)
2160
2161
2162 /* Decode a composition rule represented by C and the following byte
2163    at SRC as a component of composition sequence of Emacs 21 style.
2164    Set RULE to the decoded rule.  */
2165
2166 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(c, rule)  \
2167   do {                                                  \
2168     int gref, nref;                                     \
2169                                                         \
2170     gref = c - 0x20;                                    \
2171     if (gref < 0 || gref >= 81)                         \
2172       goto invalid_code;                                \
2173     ONE_MORE_BYTE (c);                                  \
2174     nref = c - 0x20;                                    \
2175     if (nref < 0 || nref >= 81)                         \
2176       goto invalid_code;                                \
2177     rule = COMPOSITION_ENCODE_RULE (gref, nref);        \
2178   } while (0)
2179
2180
2181 /* Start of Emacs 21 style format.  The first three bytes at SRC are
2182    (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is the
2183    byte length of this composition information, CHARS is the number of
2184    characters composed by this composition.  */
2185
2186 #define DECODE_EMACS_MULE_21_COMPOSITION()                              \
2187   do {                                                                  \
2188     enum composition_method method = c - 0xF2;                          \
2189     int nbytes, nchars;                                                 \
2190                                                                         \
2191     ONE_MORE_BYTE (c);                                                  \
2192     if (c < 0)                                                          \
2193       goto invalid_code;                                                \
2194     nbytes = c - 0xA0;                                                  \
2195     if (nbytes < 3 || (method == COMPOSITION_RELATIVE && nbytes != 4))  \
2196       goto invalid_code;                                                \
2197     ONE_MORE_BYTE (c);                                                  \
2198     nchars = c - 0xA0;                                                  \
2199     if (nchars <= 0 || nchars >= MAX_COMPOSITION_COMPONENTS)            \
2200       goto invalid_code;                                                \
2201     cmp_status->old_form = 0;                                           \
2202     cmp_status->method = method;                                        \
2203     if (method == COMPOSITION_RELATIVE)                                 \
2204       cmp_status->state = COMPOSING_CHAR;                               \
2205     else                                                                \
2206       cmp_status->state = COMPOSING_COMPONENT_CHAR;                     \
2207     cmp_status->length = MAX_ANNOTATION_LENGTH;                         \
2208     cmp_status->nchars = nchars;                                        \
2209     cmp_status->ncomps = nbytes - 4;                                    \
2210     ADD_COMPOSITION_DATA (charbuf, nchars, nbytes, method);             \
2211   } while (0)
2212
2213
2214 /* Start of Emacs 20 style format for relative composition.  */
2215
2216 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION()             \
2217   do {                                                          \
2218     cmp_status->old_form = 1;                                   \
2219     cmp_status->method = COMPOSITION_RELATIVE;                  \
2220     cmp_status->state = COMPOSING_CHAR;                         \
2221     cmp_status->length = MAX_ANNOTATION_LENGTH;                 \
2222     cmp_status->nchars = cmp_status->ncomps = 0;                \
2223     ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);   \
2224   } while (0)
2225
2226
2227 /* Start of Emacs 20 style format for rule-base composition.  */
2228
2229 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION()             \
2230   do {                                                          \
2231     cmp_status->old_form = 1;                                   \
2232     cmp_status->method = COMPOSITION_WITH_RULE;                 \
2233     cmp_status->state = COMPOSING_CHAR;                         \
2234     cmp_status->length = MAX_ANNOTATION_LENGTH;                 \
2235     cmp_status->nchars = cmp_status->ncomps = 0;                \
2236     ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);   \
2237   } while (0)
2238
2239
2240 #define DECODE_EMACS_MULE_COMPOSITION_START()           \
2241   do {                                                  \
2242     const unsigned char *current_src = src;             \
2243                                                         \
2244     ONE_MORE_BYTE (c);                                  \
2245     if (c < 0)                                          \
2246       goto invalid_code;                                \
2247     if (c - 0xF2 >= COMPOSITION_RELATIVE                \
2248         && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)  \
2249       DECODE_EMACS_MULE_21_COMPOSITION ();              \
2250     else if (c < 0xA0)                                  \
2251       goto invalid_code;                                \
2252     else if (c < 0xC0)                                  \
2253       {                                                 \
2254         DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION ();   \
2255         /* Re-read C as a composition component.  */    \
2256         src = current_src;                              \
2257       }                                                 \
2258     else if (c == 0xFF)                                 \
2259       DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION ();     \
2260     else                                                \
2261       goto invalid_code;                                \
2262   } while (0)
2263
2264 #define EMACS_MULE_COMPOSITION_END()                            \
2265   do {                                                          \
2266     int idx = - cmp_status->length;                             \
2267                                                                 \
2268     if (cmp_status->old_form)                                   \
2269       charbuf[idx + 2] = cmp_status->nchars;                    \
2270     else if (cmp_status->method > COMPOSITION_RELATIVE)         \
2271       charbuf[idx] = charbuf[idx + 2] - cmp_status->length;     \
2272     cmp_status->state = COMPOSING_NO;                           \
2273   } while (0)
2274
2275
2276 static int
2277 emacs_mule_finish_composition (int *charbuf,
2278                                struct composition_status *cmp_status)
2279 {
2280   int idx = - cmp_status->length;
2281   int new_chars;
2282
2283   if (cmp_status->old_form && cmp_status->nchars > 0)
2284     {
2285       charbuf[idx + 2] = cmp_status->nchars;
2286       new_chars = 0;
2287       if (cmp_status->method == COMPOSITION_WITH_RULE
2288           && cmp_status->state == COMPOSING_CHAR)
2289         {
2290           /* The last rule was invalid.  */
2291           int rule = charbuf[-1] + 0xA0;
2292
2293           charbuf[-2] = BYTE8_TO_CHAR (rule);
2294           charbuf[-1] = -1;
2295           new_chars = 1;
2296         }
2297     }
2298   else
2299     {
2300       charbuf[idx++] = BYTE8_TO_CHAR (0x80);
2301
2302       if (cmp_status->method == COMPOSITION_WITH_RULE)
2303         {
2304           charbuf[idx++] = BYTE8_TO_CHAR (0xFF);
2305           charbuf[idx++] = -3;
2306           charbuf[idx++] = 0;
2307           new_chars = 1;
2308         }
2309       else
2310         {
2311           int nchars = charbuf[idx + 1] + 0xA0;
2312           int nbytes = charbuf[idx + 2] + 0xA0;
2313
2314           charbuf[idx++] = BYTE8_TO_CHAR (0xF2 + cmp_status->method);
2315           charbuf[idx++] = BYTE8_TO_CHAR (nbytes);
2316           charbuf[idx++] = BYTE8_TO_CHAR (nchars);
2317           charbuf[idx++] = -1;
2318           new_chars = 4;
2319         }
2320     }
2321   cmp_status->state = COMPOSING_NO;
2322   return new_chars;
2323 }
2324
2325 #define EMACS_MULE_MAYBE_FINISH_COMPOSITION()                             \
2326   do {                                                                    \
2327     if (cmp_status->state != COMPOSING_NO)                                \
2328       char_offset += emacs_mule_finish_composition (charbuf, cmp_status); \
2329   } while (0)
2330
2331
2332 static void
2333 decode_coding_emacs_mule (struct coding_system *coding)
2334 {
2335   const unsigned char *src = coding->source + coding->consumed;
2336   const unsigned char *src_end = coding->source + coding->src_bytes;
2337   const unsigned char *src_base;
2338   int *charbuf = coding->charbuf + coding->charbuf_used;
2339   /* We may produce two annotations (charset and composition) in one
2340      loop and one more charset annotation at the end.  */
2341   int *charbuf_end
2342     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
2343   int consumed_chars = 0, consumed_chars_base;
2344   int multibytep = coding->src_multibyte;
2345   Lisp_Object attrs, charset_list;
2346   int char_offset = coding->produced_char;
2347   int last_offset = char_offset;
2348   int last_id = charset_ascii;
2349   int eol_dos =
2350     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
2351   int byte_after_cr = -1;
2352   struct composition_status *cmp_status = &coding->spec.emacs_mule.cmp_status;
2353
2354   CODING_GET_INFO (coding, attrs, charset_list);
2355
2356   if (cmp_status->state != COMPOSING_NO)
2357     {
2358       int i;
2359
2360       for (i = 0; i < cmp_status->length; i++)
2361         *charbuf++ = cmp_status->carryover[i];
2362       coding->annotated = 1;
2363     }
2364
2365   while (1)
2366     {
2367       int c, id IF_LINT (= 0);
2368
2369       src_base = src;
2370       consumed_chars_base = consumed_chars;
2371
2372       if (charbuf >= charbuf_end)
2373         {
2374           if (byte_after_cr >= 0)
2375             src_base--;
2376           break;
2377         }
2378
2379       if (byte_after_cr >= 0)
2380         c = byte_after_cr, byte_after_cr = -1;
2381       else
2382         ONE_MORE_BYTE (c);
2383
2384       if (c < 0 || c == 0x80)
2385         {
2386           EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2387           if (c < 0)
2388             {
2389               *charbuf++ = -c;
2390               char_offset++;
2391             }
2392           else
2393             DECODE_EMACS_MULE_COMPOSITION_START ();
2394           continue;
2395         }
2396
2397       if (c < 0x80)
2398         {
2399           if (eol_dos && c == '\r')
2400             ONE_MORE_BYTE (byte_after_cr);
2401           id = charset_ascii;
2402           if (cmp_status->state != COMPOSING_NO)
2403             {
2404               if (cmp_status->old_form)
2405                 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2406               else if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2407                 cmp_status->ncomps--;
2408             }
2409         }
2410       else
2411         {
2412           int nchars IF_LINT (= 0), nbytes IF_LINT (= 0);
2413           /* emacs_mule_char can load a charset map from a file, which
2414              allocates a large structure and might cause buffer text
2415              to be relocated as result.  Thus, we need to remember the
2416              original pointer to buffer text, and fix up all related
2417              pointers after the call.  */
2418           const unsigned char *orig = coding->source;
2419           EMACS_INT offset;
2420
2421           c = emacs_mule_char (coding, src_base, &nbytes, &nchars, &id,
2422                                cmp_status);
2423           offset = coding->source - orig;
2424           if (offset)
2425             {
2426               src += offset;
2427               src_base += offset;
2428               src_end += offset;
2429             }
2430           if (c < 0)
2431             {
2432               if (c == -1)
2433                 goto invalid_code;
2434               if (c == -2)
2435                 break;
2436             }
2437           src = src_base + nbytes;
2438           consumed_chars = consumed_chars_base + nchars;
2439           if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2440             cmp_status->ncomps -= nchars;
2441         }
2442
2443       /* Now if C >= 0, we found a normally encoded character, if C <
2444          0, we found an old-style composition component character or
2445          rule.  */
2446
2447       if (cmp_status->state == COMPOSING_NO)
2448         {
2449           if (last_id != id)
2450             {
2451               if (last_id != charset_ascii)
2452                 ADD_CHARSET_DATA (charbuf, char_offset - last_offset,
2453                                   last_id);
2454               last_id = id;
2455               last_offset = char_offset;
2456             }
2457           *charbuf++ = c;
2458           char_offset++;
2459         }
2460       else if (cmp_status->state == COMPOSING_CHAR)
2461         {
2462           if (cmp_status->old_form)
2463             {
2464               if (c >= 0)
2465                 {
2466                   EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2467                   *charbuf++ = c;
2468                   char_offset++;
2469                 }
2470               else
2471                 {
2472                   *charbuf++ = -c;
2473                   cmp_status->nchars++;
2474                   cmp_status->length++;
2475                   if (cmp_status->nchars == MAX_COMPOSITION_COMPONENTS)
2476                     EMACS_MULE_COMPOSITION_END ();
2477                   else if (cmp_status->method == COMPOSITION_WITH_RULE)
2478                     cmp_status->state = COMPOSING_RULE;
2479                 }
2480             }
2481           else
2482             {
2483               *charbuf++ = c;
2484               cmp_status->length++;
2485               cmp_status->nchars--;
2486               if (cmp_status->nchars == 0)
2487                 EMACS_MULE_COMPOSITION_END ();
2488             }
2489         }
2490       else if (cmp_status->state == COMPOSING_RULE)
2491         {
2492           int rule;
2493
2494           if (c >= 0)
2495             {
2496               EMACS_MULE_COMPOSITION_END ();
2497               *charbuf++ = c;
2498               char_offset++;
2499             }
2500           else
2501             {
2502               c = -c;
2503               DECODE_EMACS_MULE_COMPOSITION_RULE_20 (c, rule);
2504               if (rule < 0)
2505                 goto invalid_code;
2506               *charbuf++ = -2;
2507               *charbuf++ = rule;
2508               cmp_status->length += 2;
2509               cmp_status->state = COMPOSING_CHAR;
2510             }
2511         }
2512       else if (cmp_status->state == COMPOSING_COMPONENT_CHAR)
2513         {
2514           *charbuf++ = c;
2515           cmp_status->length++;
2516           if (cmp_status->ncomps == 0)
2517             cmp_status->state = COMPOSING_CHAR;
2518           else if (cmp_status->ncomps > 0)
2519             {
2520               if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)
2521                 cmp_status->state = COMPOSING_COMPONENT_RULE;
2522             }
2523           else
2524             EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2525         }
2526       else                      /* COMPOSING_COMPONENT_RULE */
2527         {
2528           int rule;
2529
2530           DECODE_EMACS_MULE_COMPOSITION_RULE_21 (c, rule);
2531           if (rule < 0)
2532             goto invalid_code;
2533           *charbuf++ = -2;
2534           *charbuf++ = rule;
2535           cmp_status->length += 2;
2536           cmp_status->ncomps--;
2537           if (cmp_status->ncomps > 0)
2538             cmp_status->state = COMPOSING_COMPONENT_CHAR;
2539           else
2540             EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2541         }
2542       continue;
2543
2544     invalid_code:
2545       EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2546       src = src_base;
2547       consumed_chars = consumed_chars_base;
2548       ONE_MORE_BYTE (c);
2549       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2550       char_offset++;
2551       coding->errors++;
2552     }
2553
2554  no_more_source:
2555   if (cmp_status->state != COMPOSING_NO)
2556     {
2557       if (coding->mode & CODING_MODE_LAST_BLOCK)
2558         EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2559       else
2560         {
2561           int i;
2562
2563           charbuf -= cmp_status->length;
2564           for (i = 0; i < cmp_status->length; i++)
2565             cmp_status->carryover[i] = charbuf[i];
2566         }
2567     }
2568   if (last_id != charset_ascii)
2569     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2570   coding->consumed_char += consumed_chars_base;
2571   coding->consumed = src_base - coding->source;
2572   coding->charbuf_used = charbuf - coding->charbuf;
2573 }
2574
2575
2576 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2577   do {                                          \
2578     if (id < 0xA0)                              \
2579       codes[0] = id, codes[1] = 0;              \
2580     else if (id < 0xE0)                         \
2581       codes[0] = 0x9A, codes[1] = id;           \
2582     else if (id < 0xF0)                         \
2583       codes[0] = 0x9B, codes[1] = id;           \
2584     else if (id < 0xF5)                         \
2585       codes[0] = 0x9C, codes[1] = id;           \
2586     else                                        \
2587       codes[0] = 0x9D, codes[1] = id;           \
2588   } while (0);
2589
2590
2591 static int
2592 encode_coding_emacs_mule (struct coding_system *coding)
2593 {
2594   int multibytep = coding->dst_multibyte;
2595   int *charbuf = coding->charbuf;
2596   int *charbuf_end = charbuf + coding->charbuf_used;
2597   unsigned char *dst = coding->destination + coding->produced;
2598   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2599   int safe_room = 8;
2600   int produced_chars = 0;
2601   Lisp_Object attrs, charset_list;
2602   int c;
2603   int preferred_charset_id = -1;
2604
2605   CODING_GET_INFO (coding, attrs, charset_list);
2606   if (! EQ (charset_list, Vemacs_mule_charset_list))
2607     {
2608       CODING_ATTR_CHARSET_LIST (attrs)
2609         = charset_list = Vemacs_mule_charset_list;
2610     }
2611
2612   while (charbuf < charbuf_end)
2613     {
2614       ASSURE_DESTINATION (safe_room);
2615       c = *charbuf++;
2616
2617       if (c < 0)
2618         {
2619           /* Handle an annotation.  */
2620           switch (*charbuf)
2621             {
2622             case CODING_ANNOTATE_COMPOSITION_MASK:
2623               /* Not yet implemented.  */
2624               break;
2625             case CODING_ANNOTATE_CHARSET_MASK:
2626               preferred_charset_id = charbuf[3];
2627               if (preferred_charset_id >= 0
2628                   && NILP (Fmemq (make_number (preferred_charset_id),
2629                                   charset_list)))
2630                 preferred_charset_id = -1;
2631               break;
2632             default:
2633               abort ();
2634             }
2635           charbuf += -c - 1;
2636           continue;
2637         }
2638
2639       if (ASCII_CHAR_P (c))
2640         EMIT_ONE_ASCII_BYTE (c);
2641       else if (CHAR_BYTE8_P (c))
2642         {
2643           c = CHAR_TO_BYTE8 (c);
2644           EMIT_ONE_BYTE (c);
2645         }
2646       else
2647         {
2648           struct charset *charset;
2649           unsigned code;
2650           int dimension;
2651           int emacs_mule_id;
2652           unsigned char leading_codes[2];
2653
2654           if (preferred_charset_id >= 0)
2655             {
2656               charset = CHARSET_FROM_ID (preferred_charset_id);
2657               if (CHAR_CHARSET_P (c, charset))
2658                 code = ENCODE_CHAR (charset, c);
2659               else
2660                 charset = char_charset (c, charset_list, &code);
2661             }
2662           else
2663             charset = char_charset (c, charset_list, &code);
2664           if (! charset)
2665             {
2666               c = coding->default_char;
2667               if (ASCII_CHAR_P (c))
2668                 {
2669                   EMIT_ONE_ASCII_BYTE (c);
2670                   continue;
2671                 }
2672               charset = char_charset (c, charset_list, &code);
2673             }
2674           dimension = CHARSET_DIMENSION (charset);
2675           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2676           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2677           EMIT_ONE_BYTE (leading_codes[0]);
2678           if (leading_codes[1])
2679             EMIT_ONE_BYTE (leading_codes[1]);
2680           if (dimension == 1)
2681             EMIT_ONE_BYTE (code | 0x80);
2682           else
2683             {
2684               code |= 0x8080;
2685               EMIT_ONE_BYTE (code >> 8);
2686               EMIT_ONE_BYTE (code & 0xFF);
2687             }
2688         }
2689     }
2690   record_conversion_result (coding, CODING_RESULT_SUCCESS);
2691   coding->produced_char += produced_chars;
2692   coding->produced = dst - coding->destination;
2693   return 0;
2694 }
2695
2696 \f
2697 /*** 7. ISO2022 handlers ***/
2698
2699 /* The following note describes the coding system ISO2022 briefly.
2700    Since the intention of this note is to help understand the
2701    functions in this file, some parts are NOT ACCURATE or are OVERLY
2702    SIMPLIFIED.  For thorough understanding, please refer to the
2703    original document of ISO2022.  This is equivalent to the standard
2704    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2705
2706    ISO2022 provides many mechanisms to encode several character sets
2707    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2708    is encoded using bytes less than 128.  This may make the encoded
2709    text a little bit longer, but the text passes more easily through
2710    several types of gateway, some of which strip off the MSB (Most
2711    Significant Bit).
2712
2713    There are two kinds of character sets: control character sets and
2714    graphic character sets.  The former contain control characters such
2715    as `newline' and `escape' to provide control functions (control
2716    functions are also provided by escape sequences).  The latter
2717    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2718    two control character sets and many graphic character sets.
2719
2720    Graphic character sets are classified into one of the following
2721    four classes, according to the number of bytes (DIMENSION) and
2722    number of characters in one dimension (CHARS) of the set:
2723    - DIMENSION1_CHARS94
2724    - DIMENSION1_CHARS96
2725    - DIMENSION2_CHARS94
2726    - DIMENSION2_CHARS96
2727
2728    In addition, each character set is assigned an identification tag,
2729    unique for each set, called the "final character" (denoted as <F>
2730    hereafter).  The <F> of each character set is decided by ECMA(*)
2731    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2732    (0x30..0x3F are for private use only).
2733
2734    Note (*): ECMA = European Computer Manufacturers Association
2735
2736    Here are examples of graphic character sets [NAME(<F>)]:
2737         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2738         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2739         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2740         o DIMENSION2_CHARS96 -- none for the moment
2741
2742    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2743         C0 [0x00..0x1F] -- control character plane 0
2744         GL [0x20..0x7F] -- graphic character plane 0
2745         C1 [0x80..0x9F] -- control character plane 1
2746         GR [0xA0..0xFF] -- graphic character plane 1
2747
2748    A control character set is directly designated and invoked to C0 or
2749    C1 by an escape sequence.  The most common case is that:
2750    - ISO646's  control character set is designated/invoked to C0, and
2751    - ISO6429's control character set is designated/invoked to C1,
2752    and usually these designations/invocations are omitted in encoded
2753    text.  In a 7-bit environment, only C0 can be used, and a control
2754    character for C1 is encoded by an appropriate escape sequence to
2755    fit into the environment.  All control characters for C1 are
2756    defined to have corresponding escape sequences.
2757
2758    A graphic character set is at first designated to one of four
2759    graphic registers (G0 through G3), then these graphic registers are
2760    invoked to GL or GR.  These designations and invocations can be
2761    done independently.  The most common case is that G0 is invoked to
2762    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2763    these invocations and designations are omitted in encoded text.
2764    In a 7-bit environment, only GL can be used.
2765
2766    When a graphic character set of CHARS94 is invoked to GL, codes
2767    0x20 and 0x7F of the GL area work as control characters SPACE and
2768    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2769    be used.
2770
2771    There are two ways of invocation: locking-shift and single-shift.
2772    With locking-shift, the invocation lasts until the next different
2773    invocation, whereas with single-shift, the invocation affects the
2774    following character only and doesn't affect the locking-shift
2775    state.  Invocations are done by the following control characters or
2776    escape sequences:
2777
2778    ----------------------------------------------------------------------
2779    abbrev  function                  cntrl escape seq   description
2780    ----------------------------------------------------------------------
2781    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2782    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2783    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2784    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2785    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2786    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2787    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2788    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2789    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2790    ----------------------------------------------------------------------
2791    (*) These are not used by any known coding system.
2792
2793    Control characters for these functions are defined by macros
2794    ISO_CODE_XXX in `coding.h'.
2795
2796    Designations are done by the following escape sequences:
2797    ----------------------------------------------------------------------
2798    escape sequence      description
2799    ----------------------------------------------------------------------
2800    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2801    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2802    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2803    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2804    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2805    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2806    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2807    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2808    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2809    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2810    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2811    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2812    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2813    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2814    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2815    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2816    ----------------------------------------------------------------------
2817
2818    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2819    of dimension 1, chars 94, and final character <F>, etc...
2820
2821    Note (*): Although these designations are not allowed in ISO2022,
2822    Emacs accepts them on decoding, and produces them on encoding
2823    CHARS96 character sets in a coding system which is characterized as
2824    7-bit environment, non-locking-shift, and non-single-shift.
2825
2826    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2827    '(' must be omitted.  We refer to this as "short-form" hereafter.
2828
2829    Now you may notice that there are a lot of ways of encoding the
2830    same multilingual text in ISO2022.  Actually, there exist many
2831    coding systems such as Compound Text (used in X11's inter client
2832    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2833    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2834    localized platforms), and all of these are variants of ISO2022.
2835
2836    In addition to the above, Emacs handles two more kinds of escape
2837    sequences: ISO6429's direction specification and Emacs' private
2838    sequence for specifying character composition.
2839
2840    ISO6429's direction specification takes the following form:
2841         o CSI ']'      -- end of the current direction
2842         o CSI '0' ']'  -- end of the current direction
2843         o CSI '1' ']'  -- start of left-to-right text
2844         o CSI '2' ']'  -- start of right-to-left text
2845    The control character CSI (0x9B: control sequence introducer) is
2846    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2847
2848    Character composition specification takes the following form:
2849         o ESC '0' -- start relative composition
2850         o ESC '1' -- end composition
2851         o ESC '2' -- start rule-base composition (*)
2852         o ESC '3' -- start relative composition with alternate chars  (**)
2853         o ESC '4' -- start rule-base composition with alternate chars  (**)
2854   Since these are not standard escape sequences of any ISO standard,
2855   the use of them with these meanings is restricted to Emacs only.
2856
2857   (*) This form is used only in Emacs 20.7 and older versions,
2858   but newer versions can safely decode it.
2859   (**) This form is used only in Emacs 21.1 and newer versions,
2860   and older versions can't decode it.
2861
2862   Here's a list of example usages of these composition escape
2863   sequences (categorized by `enum composition_method').
2864
2865   COMPOSITION_RELATIVE:
2866         ESC 0 CHAR [ CHAR ] ESC 1
2867   COMPOSITION_WITH_RULE:
2868         ESC 2 CHAR [ RULE CHAR ] ESC 1
2869   COMPOSITION_WITH_ALTCHARS:
2870         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2871   COMPOSITION_WITH_RULE_ALTCHARS:
2872         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2873
2874 enum iso_code_class_type iso_code_class[256];
2875
2876 #define SAFE_CHARSET_P(coding, id)      \
2877   ((id) <= (coding)->max_charset_id     \
2878    && (coding)->safe_charsets[id] != 255)
2879
2880 static void
2881 setup_iso_safe_charsets (Lisp_Object attrs)
2882 {
2883   Lisp_Object charset_list, safe_charsets;
2884   Lisp_Object request;
2885   Lisp_Object reg_usage;
2886   Lisp_Object tail;
2887   int reg94, reg96;
2888   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2889   int max_charset_id;
2890
2891   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2892   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2893       && ! EQ (charset_list, Viso_2022_charset_list))
2894     {
2895       CODING_ATTR_CHARSET_LIST (attrs)
2896         = charset_list = Viso_2022_charset_list;
2897       ASET (attrs, coding_attr_safe_charsets, Qnil);
2898     }
2899
2900   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2901     return;
2902
2903   max_charset_id = 0;
2904   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2905     {
2906       int id = XINT (XCAR (tail));
2907       if (max_charset_id < id)
2908         max_charset_id = id;
2909     }
2910
2911   safe_charsets = make_uninit_string (max_charset_id + 1);
2912   memset (SDATA (safe_charsets), 255, max_charset_id + 1);
2913   request = AREF (attrs, coding_attr_iso_request);
2914   reg_usage = AREF (attrs, coding_attr_iso_usage);
2915   reg94 = XINT (XCAR (reg_usage));
2916   reg96 = XINT (XCDR (reg_usage));
2917
2918   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2919     {
2920       Lisp_Object id;
2921       Lisp_Object reg;
2922       struct charset *charset;
2923
2924       id = XCAR (tail);
2925       charset = CHARSET_FROM_ID (XINT (id));
2926       reg = Fcdr (Fassq (id, request));
2927       if (! NILP (reg))
2928         SSET (safe_charsets, XINT (id), XINT (reg));
2929       else if (charset->iso_chars_96)
2930         {
2931           if (reg96 < 4)
2932             SSET (safe_charsets, XINT (id), reg96);
2933         }
2934       else
2935         {
2936           if (reg94 < 4)
2937             SSET (safe_charsets, XINT (id), reg94);
2938         }
2939     }
2940   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2941 }
2942
2943
2944 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2945    Check if a text is encoded in one of ISO-2022 based coding systems.
2946    If it is, return 1, else return 0.  */
2947
2948 static int
2949 detect_coding_iso_2022 (struct coding_system *coding,
2950                         struct coding_detection_info *detect_info)
2951 {
2952   const unsigned char *src = coding->source, *src_base = src;
2953   const unsigned char *src_end = coding->source + coding->src_bytes;
2954   int multibytep = coding->src_multibyte;
2955   int single_shifting = 0;
2956   int id;
2957   int c, c1;
2958   int consumed_chars = 0;
2959   int i;
2960   int rejected = 0;
2961   int found = 0;
2962   int composition_count = -1;
2963
2964   detect_info->checked |= CATEGORY_MASK_ISO;
2965
2966   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2967     {
2968       struct coding_system *this = &(coding_categories[i]);
2969       Lisp_Object attrs, val;
2970
2971       if (this->id < 0)
2972         continue;
2973       attrs = CODING_ID_ATTRS (this->id);
2974       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2975           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Viso_2022_charset_list))
2976         setup_iso_safe_charsets (attrs);
2977       val = CODING_ATTR_SAFE_CHARSETS (attrs);
2978       this->max_charset_id = SCHARS (val) - 1;
2979       this->safe_charsets = SDATA (val);
2980     }
2981
2982   /* A coding system of this category is always ASCII compatible.  */
2983   src += coding->head_ascii;
2984
2985   while (rejected != CATEGORY_MASK_ISO)
2986     {
2987       src_base = src;
2988       ONE_MORE_BYTE (c);
2989       switch (c)
2990         {
2991         case ISO_CODE_ESC:
2992           if (inhibit_iso_escape_detection)
2993             break;
2994           single_shifting = 0;
2995           ONE_MORE_BYTE (c);
2996           if (c == 'N' || c == 'O')
2997             {
2998               /* ESC <Fe> for SS2 or SS3.  */
2999               single_shifting = 1;
3000               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
3001             }
3002           else if (c == '1')
3003             {
3004               /* End of composition.  */
3005               if (composition_count < 0
3006                   || composition_count > MAX_COMPOSITION_COMPONENTS)
3007                 /* Invalid */
3008                 break;
3009               composition_count = -1;
3010               found |= CATEGORY_MASK_ISO;
3011             }
3012           else if (c >= '0' && c <= '4')
3013             {
3014               /* ESC <Fp> for start/end composition.  */
3015               composition_count = 0;
3016             }
3017           else
3018             {
3019               if (c >= '(' && c <= '/')
3020                 {
3021                   /* Designation sequence for a charset of dimension 1.  */
3022                   ONE_MORE_BYTE (c1);
3023                   if (c1 < ' ' || c1 >= 0x80
3024                       || (id = iso_charset_table[0][c >= ','][c1]) < 0)
3025                     /* Invalid designation sequence.  Just ignore.  */
3026                     break;
3027                 }
3028               else if (c == '$')
3029                 {
3030                   /* Designation sequence for a charset of dimension 2.  */
3031                   ONE_MORE_BYTE (c);
3032                   if (c >= '@' && c <= 'B')
3033                     /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
3034                     id = iso_charset_table[1][0][c];
3035                   else if (c >= '(' && c <= '/')
3036                     {
3037                       ONE_MORE_BYTE (c1);
3038                       if (c1 < ' ' || c1 >= 0x80
3039                           || (id = iso_charset_table[1][c >= ','][c1]) < 0)
3040                         /* Invalid designation sequence.  Just ignore.  */
3041                         break;
3042                     }
3043                   else
3044                     /* Invalid designation sequence.  Just ignore it.  */
3045                     break;
3046                 }
3047               else
3048                 {
3049                   /* Invalid escape sequence.  Just ignore it.  */
3050                   break;
3051                 }
3052
3053               /* We found a valid designation sequence for CHARSET.  */
3054               rejected |= CATEGORY_MASK_ISO_8BIT;
3055               if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
3056                                   id))
3057                 found |= CATEGORY_MASK_ISO_7;
3058               else
3059                 rejected |= CATEGORY_MASK_ISO_7;
3060               if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
3061                                   id))
3062                 found |= CATEGORY_MASK_ISO_7_TIGHT;
3063               else
3064                 rejected |= CATEGORY_MASK_ISO_7_TIGHT;
3065               if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
3066                                   id))
3067                 found |= CATEGORY_MASK_ISO_7_ELSE;
3068               else
3069                 rejected |= CATEGORY_MASK_ISO_7_ELSE;
3070               if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
3071                                   id))
3072                 found |= CATEGORY_MASK_ISO_8_ELSE;
3073               else
3074                 rejected |= CATEGORY_MASK_ISO_8_ELSE;
3075             }
3076           break;
3077
3078         case ISO_CODE_SO:
3079         case ISO_CODE_SI:
3080           /* Locking shift out/in.  */
3081           if (inhibit_iso_escape_detection)
3082             break;
3083           single_shifting = 0;
3084           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
3085           break;
3086
3087         case ISO_CODE_CSI:
3088           /* Control sequence introducer.  */
3089           single_shifting = 0;
3090           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3091           found |= CATEGORY_MASK_ISO_8_ELSE;
3092           goto check_extra_latin;
3093
3094         case ISO_CODE_SS2:
3095         case ISO_CODE_SS3:
3096           /* Single shift.   */
3097           if (inhibit_iso_escape_detection)
3098             break;
3099           single_shifting = 0;
3100           rejected |= CATEGORY_MASK_ISO_7BIT;
3101           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3102               & CODING_ISO_FLAG_SINGLE_SHIFT)
3103             {
3104               found |= CATEGORY_MASK_ISO_8_1;
3105               single_shifting = 1;
3106             }
3107           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
3108               & CODING_ISO_FLAG_SINGLE_SHIFT)
3109             {
3110               found |= CATEGORY_MASK_ISO_8_2;
3111               single_shifting = 1;
3112             }
3113           if (single_shifting)
3114             break;
3115         check_extra_latin:
3116           if (! VECTORP (Vlatin_extra_code_table)
3117               || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3118             {
3119               rejected = CATEGORY_MASK_ISO;
3120               break;
3121             }
3122           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3123               & CODING_ISO_FLAG_LATIN_EXTRA)
3124             found |= CATEGORY_MASK_ISO_8_1;
3125           else
3126             rejected |= CATEGORY_MASK_ISO_8_1;
3127           rejected |= CATEGORY_MASK_ISO_8_2;
3128           break;
3129
3130         default:
3131           if (c < 0)
3132             continue;
3133           if (c < 0x80)
3134             {
3135               if (composition_count >= 0)
3136                 composition_count++;
3137               single_shifting = 0;
3138               break;
3139             }
3140           if (c >= 0xA0)
3141             {
3142               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3143               found |= CATEGORY_MASK_ISO_8_1;
3144               /* Check the length of succeeding codes of the range
3145                  0xA0..0FF.  If the byte length is even, we include
3146                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
3147                  only when we are not single shifting.  */
3148               if (! single_shifting
3149                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
3150                 {
3151                   int len = 1;
3152                   while (src < src_end)
3153                     {
3154                       src_base = src;
3155                       ONE_MORE_BYTE (c);
3156                       if (c < 0xA0)
3157                         {
3158                           src = src_base;
3159                           break;
3160                         }
3161                       len++;
3162                     }
3163
3164                   if (len & 1 && src < src_end)
3165                     {
3166                       rejected |= CATEGORY_MASK_ISO_8_2;
3167                       if (composition_count >= 0)
3168                         composition_count += len;
3169                     }
3170                   else
3171                     {
3172                       found |= CATEGORY_MASK_ISO_8_2;
3173                       if (composition_count >= 0)
3174                         composition_count += len / 2;
3175                     }
3176                 }
3177               break;
3178             }
3179         }
3180     }
3181   detect_info->rejected |= CATEGORY_MASK_ISO;
3182   return 0;
3183
3184  no_more_source:
3185   detect_info->rejected |= rejected;
3186   detect_info->found |= (found & ~rejected);
3187   return 1;
3188 }
3189
3190
3191 /* Set designation state into CODING.  Set CHARS_96 to -1 if the
3192    escape sequence should be kept.  */
3193 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
3194   do {                                                                  \
3195     int id, prev;                                                       \
3196                                                                         \
3197     if (final < '0' || final >= 128                                     \
3198         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
3199         || !SAFE_CHARSET_P (coding, id))                                \
3200       {                                                                 \
3201         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
3202         chars_96 = -1;                                                  \
3203         break;                                                          \
3204       }                                                                 \
3205     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
3206     if (id == charset_jisx0201_roman)                                   \
3207       {                                                                 \
3208         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
3209           id = charset_ascii;                                           \
3210       }                                                                 \
3211     else if (id == charset_jisx0208_1978)                               \
3212       {                                                                 \
3213         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
3214           id = charset_jisx0208;                                        \
3215       }                                                                 \
3216     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
3217     /* If there was an invalid designation to REG previously, and this  \
3218        designation is ASCII to REG, we should keep this designation     \
3219        sequence.  */                                                    \
3220     if (prev == -2 && id == charset_ascii)                              \
3221       chars_96 = -1;                                                    \
3222   } while (0)
3223
3224
3225 /* Handle these composition sequence (ALT: alternate char):
3226
3227    (1) relative composition: ESC 0 CHAR ... ESC 1
3228    (2) rulebase composition: ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3229    (3) altchar composition:  ESC 3 ALT ... ALT ESC 0 CHAR ... ESC 1
3230    (4) alt&rule composition: ESC 4 ALT RULE ... ALT ESC 0 CHAR ... ESC 1
3231
3232    When the start sequence (ESC 0/2/3/4) is found, this annotation
3233    header is produced.
3234
3235         [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) 0 METHOD ]
3236
3237    Then, upon reading CHAR or RULE (one or two bytes), these codes are
3238    produced until the end sequence (ESC 1) is found:
3239
3240    (1) CHAR ... CHAR
3241    (2) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
3242    (3) ALT ... ALT -1 -1 CHAR ... CHAR
3243    (4) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT -1 -1 CHAR ... CHAR
3244
3245    When the end sequence (ESC 1) is found, LENGTH and NCHARS in the
3246    annotation header is updated as below:
3247
3248    (1) LENGTH: unchanged,  NCHARS: number of CHARs
3249    (2) LENGTH: unchanged,  NCHARS: number of CHARs
3250    (3) LENGTH: += number of ALTs + 2,  NCHARS: number of CHARs
3251    (4) LENGTH: += number of ALTs * 3,  NCHARS: number of CHARs
3252
3253    If an error is found while composing, the annotation header is
3254    changed to:
3255
3256         [ ESC '0'/'2'/'3'/'4' -2 0 ]
3257
3258    and the sequence [ -2 DECODED-RULE ] is changed to the original
3259    byte sequence as below:
3260         o the original byte sequence is B: [ B -1 ]
3261         o the original byte sequence is B1 B2: [ B1 B2 ]
3262    and the sequence [ -1 -1 ] is changed to the original byte
3263    sequence:
3264         [ ESC '0' ]
3265 */
3266
3267 /* Decode a composition rule C1 and maybe one more byte from the
3268    source, and set RULE to the encoded composition rule, NBYTES to the
3269    length of the composition rule.  If the rule is invalid, set RULE
3270    to some negative value.  */
3271
3272 #define DECODE_COMPOSITION_RULE(rule, nbytes)                           \
3273   do {                                                                  \
3274     rule = c1 - 32;                                                     \
3275     if (rule < 0)                                                       \
3276       break;                                                            \
3277     if (rule < 81)              /* old format (before ver.21) */        \
3278       {                                                                 \
3279         int gref = (rule) / 9;                                          \
3280         int nref = (rule) % 9;                                          \
3281         if (gref == 4) gref = 10;                                       \
3282         if (nref == 4) nref = 10;                                       \
3283         rule = COMPOSITION_ENCODE_RULE (gref, nref);                    \
3284         nbytes = 1;                                                     \
3285       }                                                                 \
3286     else                        /* new format (after ver.21) */         \
3287       {                                                                 \
3288         int b;                                                          \
3289                                                                         \
3290         ONE_MORE_BYTE (b);                                              \
3291         rule = COMPOSITION_ENCODE_RULE (rule - 81, b - 32);             \
3292         if (rule >= 0)                                                  \
3293           rule += 0x100;   /* to destinguish it from the old format */  \
3294         nbytes = 2;                                                     \
3295       }                                                                 \
3296   } while (0)
3297
3298 #define ENCODE_COMPOSITION_RULE(rule)                           \
3299   do {                                                          \
3300     int gref = (rule % 0x100) / 12, nref = (rule % 0x100) % 12; \
3301                                                                 \
3302     if (rule < 0x100)           /* old format */                \
3303       {                                                         \
3304         if (gref == 10) gref = 4;                               \
3305         if (nref == 10) nref = 4;                               \
3306         charbuf[idx] = 32 + gref * 9 + nref;                    \
3307         charbuf[idx + 1] = -1;                                  \
3308         new_chars++;                                            \
3309       }                                                         \
3310     else                                /* new format */        \
3311       {                                                         \
3312         charbuf[idx] = 32 + 81 + gref;                          \
3313         charbuf[idx + 1] = 32 + nref;                           \
3314         new_chars += 2;                                         \
3315       }                                                         \
3316   } while (0)
3317
3318 /* Finish the current composition as invalid.  */
3319
3320 static int finish_composition (int *, struct composition_status *);
3321
3322 static int
3323 finish_composition (int *charbuf, struct composition_status *cmp_status)
3324 {
3325   int idx = - cmp_status->length;
3326   int new_chars;
3327
3328   /* Recover the original ESC sequence */
3329   charbuf[idx++] = ISO_CODE_ESC;
3330   charbuf[idx++] = (cmp_status->method == COMPOSITION_RELATIVE ? '0'
3331                     : cmp_status->method == COMPOSITION_WITH_RULE ? '2'
3332                     : cmp_status->method == COMPOSITION_WITH_ALTCHARS ? '3'
3333                     /* cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS */
3334                     : '4');
3335   charbuf[idx++] = -2;
3336   charbuf[idx++] = 0;
3337   charbuf[idx++] = -1;
3338   new_chars = cmp_status->nchars;
3339   if (cmp_status->method >= COMPOSITION_WITH_RULE)
3340     for (; idx < 0; idx++)
3341       {
3342         int elt = charbuf[idx];
3343
3344         if (elt == -2)
3345           {
3346             ENCODE_COMPOSITION_RULE (charbuf[idx + 1]);
3347             idx++;
3348           }
3349         else if (elt == -1)
3350           {
3351             charbuf[idx++] = ISO_CODE_ESC;
3352             charbuf[idx] = '0';
3353             new_chars += 2;
3354           }
3355       }
3356   cmp_status->state = COMPOSING_NO;
3357   return new_chars;
3358 }
3359
3360 /* If characters are under composition, finish the composition.  */
3361 #define MAYBE_FINISH_COMPOSITION()                              \
3362   do {                                                          \
3363     if (cmp_status->state != COMPOSING_NO)                      \
3364       char_offset += finish_composition (charbuf, cmp_status);  \
3365   } while (0)
3366
3367 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
3368
3369    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
3370    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3371    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
3372    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
3373
3374    Produce this annotation sequence now:
3375
3376    [ -LENGTH(==-4) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) METHOD ]
3377 */
3378
3379 #define DECODE_COMPOSITION_START(c1)                                       \
3380   do {                                                                     \
3381     if (c1 == '0'                                                          \
3382         && ((cmp_status->state == COMPOSING_COMPONENT_CHAR                 \
3383              && cmp_status->method == COMPOSITION_WITH_ALTCHARS)           \
3384             || (cmp_status->state == COMPOSING_COMPONENT_RULE              \
3385                 && cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS))) \
3386       {                                                                    \
3387         *charbuf++ = -1;                                                   \
3388         *charbuf++= -1;                                                    \
3389         cmp_status->state = COMPOSING_CHAR;                                \
3390         cmp_status->length += 2;                                           \
3391       }                                                                    \
3392     else                                                                   \
3393       {                                                                    \
3394         MAYBE_FINISH_COMPOSITION ();                                       \
3395         cmp_status->method = (c1 == '0' ? COMPOSITION_RELATIVE             \
3396                               : c1 == '2' ? COMPOSITION_WITH_RULE          \
3397                               : c1 == '3' ? COMPOSITION_WITH_ALTCHARS      \
3398                               : COMPOSITION_WITH_RULE_ALTCHARS);           \
3399         cmp_status->state                                                  \
3400           = (c1 <= '2' ? COMPOSING_CHAR : COMPOSING_COMPONENT_CHAR);       \
3401         ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);          \
3402         cmp_status->length = MAX_ANNOTATION_LENGTH;                        \
3403         cmp_status->nchars = cmp_status->ncomps = 0;                       \
3404         coding->annotated = 1;                                             \
3405       }                                                                    \
3406   } while (0)
3407
3408
3409 /* Handle composition end sequence ESC 1.  */
3410
3411 #define DECODE_COMPOSITION_END()                                        \
3412   do {                                                                  \
3413     if (cmp_status->nchars == 0                                         \
3414         || ((cmp_status->state == COMPOSING_CHAR)                       \
3415             == (cmp_status->method == COMPOSITION_WITH_RULE)))          \
3416       {                                                                 \
3417         MAYBE_FINISH_COMPOSITION ();                                    \
3418         goto invalid_code;                                              \
3419       }                                                                 \
3420     if (cmp_status->method == COMPOSITION_WITH_ALTCHARS)                \
3421       charbuf[- cmp_status->length] -= cmp_status->ncomps + 2;          \
3422     else if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)      \
3423       charbuf[- cmp_status->length] -= cmp_status->ncomps * 3;          \
3424     charbuf[- cmp_status->length + 2] = cmp_status->nchars;             \
3425     char_offset += cmp_status->nchars;                                  \
3426     cmp_status->state = COMPOSING_NO;                                   \
3427   } while (0)
3428
3429 /* Store a composition rule RULE in charbuf, and update cmp_status.  */
3430
3431 #define STORE_COMPOSITION_RULE(rule)    \
3432   do {                                  \
3433     *charbuf++ = -2;                    \
3434     *charbuf++ = rule;                  \
3435     cmp_status->length += 2;            \
3436     cmp_status->state--;                \
3437   } while (0)
3438
3439 /* Store a composed char or a component char C in charbuf, and update
3440    cmp_status.  */
3441
3442 #define STORE_COMPOSITION_CHAR(c)                                       \
3443   do {                                                                  \
3444     *charbuf++ = (c);                                                   \
3445     cmp_status->length++;                                               \
3446     if (cmp_status->state == COMPOSING_CHAR)                            \
3447       cmp_status->nchars++;                                             \
3448     else                                                                \
3449       cmp_status->ncomps++;                                             \
3450     if (cmp_status->method == COMPOSITION_WITH_RULE                     \
3451         || (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS        \
3452             && cmp_status->state == COMPOSING_COMPONENT_CHAR))          \
3453       cmp_status->state++;                                              \
3454   } while (0)
3455
3456
3457 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
3458
3459 static void
3460 decode_coding_iso_2022 (struct coding_system *coding)
3461 {
3462   const unsigned char *src = coding->source + coding->consumed;
3463   const unsigned char *src_end = coding->source + coding->src_bytes;
3464   const unsigned char *src_base;
3465   int *charbuf = coding->charbuf + coding->charbuf_used;
3466   /* We may produce two annotations (charset and composition) in one
3467      loop and one more charset annotation at the end.  */
3468   int *charbuf_end
3469     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
3470   int consumed_chars = 0, consumed_chars_base;
3471   int multibytep = coding->src_multibyte;
3472   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
3473   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3474   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3475   int charset_id_2, charset_id_3;
3476   struct charset *charset;
3477   int c;
3478   struct composition_status *cmp_status = CODING_ISO_CMP_STATUS (coding);
3479   Lisp_Object attrs, charset_list;
3480   int char_offset = coding->produced_char;
3481   int last_offset = char_offset;
3482   int last_id = charset_ascii;
3483   int eol_dos =
3484     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
3485   int byte_after_cr = -1;
3486   int i;
3487
3488   CODING_GET_INFO (coding, attrs, charset_list);
3489   setup_iso_safe_charsets (attrs);
3490   /* Charset list may have been changed.  */
3491   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3492   coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
3493
3494   if (cmp_status->state != COMPOSING_NO)
3495     {
3496       for (i = 0; i < cmp_status->length; i++)
3497         *charbuf++ = cmp_status->carryover[i];
3498       coding->annotated = 1;
3499     }
3500
3501   while (1)
3502     {
3503       int c1, c2, c3;
3504
3505       src_base = src;
3506       consumed_chars_base = consumed_chars;
3507
3508       if (charbuf >= charbuf_end)
3509         {
3510           if (byte_after_cr >= 0)
3511             src_base--;
3512           break;
3513         }
3514
3515       if (byte_after_cr >= 0)
3516         c1 = byte_after_cr, byte_after_cr = -1;
3517       else
3518         ONE_MORE_BYTE (c1);
3519       if (c1 < 0)
3520         goto invalid_code;
3521
3522       if (CODING_ISO_EXTSEGMENT_LEN (coding) > 0)
3523         {
3524           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3525           char_offset++;
3526           CODING_ISO_EXTSEGMENT_LEN (coding)--;
3527           continue;
3528         }
3529
3530       if (CODING_ISO_EMBEDDED_UTF_8 (coding))
3531         {
3532           if (c1 == ISO_CODE_ESC)
3533             {
3534               if (src + 1 >= src_end)
3535                 goto no_more_source;
3536               *charbuf++ = ISO_CODE_ESC;
3537               char_offset++;
3538               if (src[0] == '%' && src[1] == '@')
3539                 {
3540                   src += 2;
3541                   consumed_chars += 2;
3542                   char_offset += 2;
3543                   /* We are sure charbuf can contain two more chars. */
3544                   *charbuf++ = '%';
3545                   *charbuf++ = '@';
3546                   CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
3547                 }
3548             }
3549           else
3550             {
3551               *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3552               char_offset++;
3553             }
3554           continue;
3555         }
3556
3557       if ((cmp_status->state == COMPOSING_RULE
3558            || cmp_status->state == COMPOSING_COMPONENT_RULE)
3559           && c1 != ISO_CODE_ESC)
3560         {
3561           int rule, nbytes;
3562
3563           DECODE_COMPOSITION_RULE (rule, nbytes);
3564           if (rule < 0)
3565             goto invalid_code;
3566           STORE_COMPOSITION_RULE (rule);
3567           continue;
3568         }
3569
3570       /* We produce at most one character.  */
3571       switch (iso_code_class [c1])
3572         {
3573         case ISO_0x20_or_0x7F:
3574           if (charset_id_0 < 0
3575               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
3576             /* This is SPACE or DEL.  */
3577             charset = CHARSET_FROM_ID (charset_ascii);
3578           else
3579             charset = CHARSET_FROM_ID (charset_id_0);
3580           break;
3581
3582         case ISO_graphic_plane_0:
3583           if (charset_id_0 < 0)
3584             charset = CHARSET_FROM_ID (charset_ascii);
3585           else
3586             charset = CHARSET_FROM_ID (charset_id_0);
3587           break;
3588
3589         case ISO_0xA0_or_0xFF:
3590           if (charset_id_1 < 0
3591               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
3592               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3593             goto invalid_code;
3594           /* This is a graphic character, we fall down ... */
3595
3596         case ISO_graphic_plane_1:
3597           if (charset_id_1 < 0)
3598             goto invalid_code;
3599           charset = CHARSET_FROM_ID (charset_id_1);
3600           break;
3601
3602         case ISO_control_0:
3603           if (eol_dos && c1 == '\r')
3604             ONE_MORE_BYTE (byte_after_cr);
3605           MAYBE_FINISH_COMPOSITION ();
3606           charset = CHARSET_FROM_ID (charset_ascii);
3607           break;
3608
3609         case ISO_control_1:
3610           goto invalid_code;
3611
3612         case ISO_shift_out:
3613           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3614               || CODING_ISO_DESIGNATION (coding, 1) < 0)
3615             goto invalid_code;
3616           CODING_ISO_INVOCATION (coding, 0) = 1;
3617           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3618           continue;
3619
3620         case ISO_shift_in:
3621           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3622             goto invalid_code;
3623           CODING_ISO_INVOCATION (coding, 0) = 0;
3624           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3625           continue;
3626
3627         case ISO_single_shift_2_7:
3628           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS))
3629             goto invalid_code;
3630         case ISO_single_shift_2:
3631           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3632             goto invalid_code;
3633           /* SS2 is handled as an escape sequence of ESC 'N' */
3634           c1 = 'N';
3635           goto label_escape_sequence;
3636
3637         case ISO_single_shift_3:
3638           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3639             goto invalid_code;
3640           /* SS2 is handled as an escape sequence of ESC 'O' */
3641           c1 = 'O';
3642           goto label_escape_sequence;
3643
3644         case ISO_control_sequence_introducer:
3645           /* CSI is handled as an escape sequence of ESC '[' ...  */
3646           c1 = '[';
3647           goto label_escape_sequence;
3648
3649         case ISO_escape:
3650           ONE_MORE_BYTE (c1);
3651         label_escape_sequence:
3652           /* Escape sequences handled here are invocation,
3653              designation, direction specification, and character
3654              composition specification.  */
3655           switch (c1)
3656             {
3657             case '&':           /* revision of following character set */
3658               ONE_MORE_BYTE (c1);
3659               if (!(c1 >= '@' && c1 <= '~'))
3660                 goto invalid_code;
3661               ONE_MORE_BYTE (c1);
3662               if (c1 != ISO_CODE_ESC)
3663                 goto invalid_code;
3664               ONE_MORE_BYTE (c1);
3665               goto label_escape_sequence;
3666
3667             case '$':           /* designation of 2-byte character set */
3668               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3669                 goto invalid_code;
3670               {
3671                 int reg, chars96;
3672
3673                 ONE_MORE_BYTE (c1);
3674                 if (c1 >= '@' && c1 <= 'B')
3675                   {     /* designation of JISX0208.1978, GB2312.1980,
3676                            or JISX0208.1980 */
3677                     reg = 0, chars96 = 0;
3678                   }
3679                 else if (c1 >= 0x28 && c1 <= 0x2B)
3680                   { /* designation of DIMENSION2_CHARS94 character set */
3681                     reg = c1 - 0x28, chars96 = 0;
3682                     ONE_MORE_BYTE (c1);
3683                   }
3684                 else if (c1 >= 0x2C && c1 <= 0x2F)
3685                   { /* designation of DIMENSION2_CHARS96 character set */
3686                     reg = c1 - 0x2C, chars96 = 1;
3687                     ONE_MORE_BYTE (c1);
3688                   }
3689                 else
3690                   goto invalid_code;
3691                 DECODE_DESIGNATION (reg, 2, chars96, c1);
3692                 /* We must update these variables now.  */
3693                 if (reg == 0)
3694                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3695                 else if (reg == 1)
3696                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3697                 if (chars96 < 0)
3698                   goto invalid_code;
3699               }
3700               continue;
3701
3702             case 'n':           /* invocation of locking-shift-2 */
3703               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3704                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3705                 goto invalid_code;
3706               CODING_ISO_INVOCATION (coding, 0) = 2;
3707               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3708               continue;
3709
3710             case 'o':           /* invocation of locking-shift-3 */
3711               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3712                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3713                 goto invalid_code;
3714               CODING_ISO_INVOCATION (coding, 0) = 3;
3715               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3716               continue;
3717
3718             case 'N':           /* invocation of single-shift-2 */
3719               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3720                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3721                 goto invalid_code;
3722               charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3723               if (charset_id_2 < 0)
3724                 charset = CHARSET_FROM_ID (charset_ascii);
3725               else
3726                 charset = CHARSET_FROM_ID (charset_id_2);
3727               ONE_MORE_BYTE (c1);
3728               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3729                 goto invalid_code;
3730               break;
3731
3732             case 'O':           /* invocation of single-shift-3 */
3733               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3734                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3735                 goto invalid_code;
3736               charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3737               if (charset_id_3 < 0)
3738                 charset = CHARSET_FROM_ID (charset_ascii);
3739               else
3740                 charset = CHARSET_FROM_ID (charset_id_3);
3741               ONE_MORE_BYTE (c1);
3742               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3743                 goto invalid_code;
3744               break;
3745
3746             case '0': case '2': case '3': case '4': /* start composition */
3747               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3748                 goto invalid_code;
3749               if (last_id != charset_ascii)
3750                 {
3751                   ADD_CHARSET_DATA (charbuf, char_offset- last_offset, last_id);
3752                   last_id = charset_ascii;
3753                   last_offset = char_offset;
3754                 }
3755               DECODE_COMPOSITION_START (c1);
3756               continue;
3757
3758             case '1':           /* end composition */
3759               if (cmp_status->state == COMPOSING_NO)
3760                 goto invalid_code;
3761               DECODE_COMPOSITION_END ();
3762               continue;
3763
3764             case '[':           /* specification of direction */
3765               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION))
3766                 goto invalid_code;
3767               /* For the moment, nested direction is not supported.
3768                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3769                  left-to-right, and nonzero means right-to-left.  */
3770               ONE_MORE_BYTE (c1);
3771               switch (c1)
3772                 {
3773                 case ']':       /* end of the current direction */
3774                   coding->mode &= ~CODING_MODE_DIRECTION;
3775
3776                 case '0':       /* end of the current direction */
3777                 case '1':       /* start of left-to-right direction */
3778                   ONE_MORE_BYTE (c1);
3779                   if (c1 == ']')
3780                     coding->mode &= ~CODING_MODE_DIRECTION;
3781                   else
3782                     goto invalid_code;
3783                   break;
3784
3785                 case '2':       /* start of right-to-left direction */
3786                   ONE_MORE_BYTE (c1);
3787                   if (c1 == ']')
3788                     coding->mode |= CODING_MODE_DIRECTION;
3789                   else
3790                     goto invalid_code;
3791                   break;
3792
3793                 default:
3794                   goto invalid_code;
3795                 }
3796               continue;
3797
3798             case '%':
3799               ONE_MORE_BYTE (c1);
3800               if (c1 == '/')
3801                 {
3802                   /* CTEXT extended segment:
3803                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3804                      We keep these bytes as is for the moment.
3805                      They may be decoded by post-read-conversion.  */
3806                   int dim, M, L;
3807                   int size;
3808
3809                   ONE_MORE_BYTE (dim);
3810                   if (dim < '0' || dim > '4')
3811                     goto invalid_code;
3812                   ONE_MORE_BYTE (M);
3813                   if (M < 128)
3814                     goto invalid_code;
3815                   ONE_MORE_BYTE (L);
3816                   if (L < 128)
3817                     goto invalid_code;
3818                   size = ((M - 128) * 128) + (L - 128);
3819                   if (charbuf + 6 > charbuf_end)
3820                     goto break_loop;
3821                   *charbuf++ = ISO_CODE_ESC;
3822                   *charbuf++ = '%';
3823                   *charbuf++ = '/';
3824                   *charbuf++ = dim;
3825                   *charbuf++ = BYTE8_TO_CHAR (M);
3826                   *charbuf++ = BYTE8_TO_CHAR (L);
3827                   CODING_ISO_EXTSEGMENT_LEN (coding) = size;
3828                 }
3829               else if (c1 == 'G')
3830                 {
3831                   /* XFree86 extension for embedding UTF-8 in CTEXT:
3832                      ESC % G --UTF-8-BYTES-- ESC % @
3833                      We keep these bytes as is for the moment.
3834                      They may be decoded by post-read-conversion.  */
3835                   if (charbuf + 3 > charbuf_end)
3836                     goto break_loop;
3837                   *charbuf++ = ISO_CODE_ESC;
3838                   *charbuf++ = '%';
3839                   *charbuf++ = 'G';
3840                   CODING_ISO_EMBEDDED_UTF_8 (coding) = 1;
3841                 }
3842               else
3843                 goto invalid_code;
3844               continue;
3845               break;
3846
3847             default:
3848               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3849                 goto invalid_code;
3850               {
3851                 int reg, chars96;
3852
3853                 if (c1 >= 0x28 && c1 <= 0x2B)
3854                   { /* designation of DIMENSION1_CHARS94 character set */
3855                     reg = c1 - 0x28, chars96 = 0;
3856                     ONE_MORE_BYTE (c1);
3857                   }
3858                 else if (c1 >= 0x2C && c1 <= 0x2F)
3859                   { /* designation of DIMENSION1_CHARS96 character set */
3860                     reg = c1 - 0x2C, chars96 = 1;
3861                     ONE_MORE_BYTE (c1);
3862                   }
3863                 else
3864                   goto invalid_code;
3865                 DECODE_DESIGNATION (reg, 1, chars96, c1);
3866                 /* We must update these variables now.  */
3867                 if (reg == 0)
3868                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3869                 else if (reg == 1)
3870                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3871                 if (chars96 < 0)
3872                   goto invalid_code;
3873               }
3874               continue;
3875             }
3876           break;
3877
3878         default:
3879           abort ();
3880         }
3881
3882       if (cmp_status->state == COMPOSING_NO
3883           && charset->id != charset_ascii
3884           && last_id != charset->id)
3885         {
3886           if (last_id != charset_ascii)
3887             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3888           last_id = charset->id;
3889           last_offset = char_offset;
3890         }
3891
3892       /* Now we know CHARSET and 1st position code C1 of a character.
3893          Produce a decoded character while getting 2nd and 3rd
3894          position codes C2, C3 if necessary.  */
3895       if (CHARSET_DIMENSION (charset) > 1)
3896         {
3897           ONE_MORE_BYTE (c2);
3898           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0)
3899               || ((c1 & 0x80) != (c2 & 0x80)))
3900             /* C2 is not in a valid range.  */
3901             goto invalid_code;
3902           if (CHARSET_DIMENSION (charset) == 2)
3903             c1 = (c1 << 8) | c2;
3904           else
3905             {
3906               ONE_MORE_BYTE (c3);
3907               if (c3 < 0x20 || (c3 >= 0x80 && c3 < 0xA0)
3908                   || ((c1 & 0x80) != (c3 & 0x80)))
3909                 /* C3 is not in a valid range.  */
3910                 goto invalid_code;
3911               c1 = (c1 << 16) | (c2 << 8) | c2;
3912             }
3913         }
3914       c1 &= 0x7F7F7F;
3915       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3916       if (c < 0)
3917         {
3918           MAYBE_FINISH_COMPOSITION ();
3919           for (; src_base < src; src_base++, char_offset++)
3920             {
3921               if (ASCII_BYTE_P (*src_base))
3922                 *charbuf++ = *src_base;
3923               else
3924                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3925             }
3926         }
3927       else if (cmp_status->state == COMPOSING_NO)
3928         {
3929           *charbuf++ = c;
3930           char_offset++;
3931         }
3932       else if ((cmp_status->state == COMPOSING_CHAR
3933                 ? cmp_status->nchars
3934                 : cmp_status->ncomps)
3935                >= MAX_COMPOSITION_COMPONENTS)
3936         {
3937           /* Too long composition.  */
3938           MAYBE_FINISH_COMPOSITION ();
3939           *charbuf++ = c;
3940           char_offset++;
3941         }
3942       else
3943         STORE_COMPOSITION_CHAR (c);
3944       continue;
3945
3946     invalid_code:
3947       MAYBE_FINISH_COMPOSITION ();
3948       src = src_base;
3949       consumed_chars = consumed_chars_base;
3950       ONE_MORE_BYTE (c);
3951       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3952       char_offset++;
3953       coding->errors++;
3954       continue;
3955
3956     break_loop:
3957       break;
3958     }
3959
3960  no_more_source:
3961   if (cmp_status->state != COMPOSING_NO)
3962     {
3963       if (coding->mode & CODING_MODE_LAST_BLOCK)
3964         MAYBE_FINISH_COMPOSITION ();
3965       else
3966         {
3967           charbuf -= cmp_status->length;
3968           for (i = 0; i < cmp_status->length; i++)
3969             cmp_status->carryover[i] = charbuf[i];
3970         }
3971     }
3972   else if (last_id != charset_ascii)
3973     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3974   coding->consumed_char += consumed_chars_base;
3975   coding->consumed = src_base - coding->source;
3976   coding->charbuf_used = charbuf - coding->charbuf;
3977 }
3978
3979
3980 /* ISO2022 encoding stuff.  */
3981
3982 /*
3983    It is not enough to say just "ISO2022" on encoding, we have to
3984    specify more details.  In Emacs, each coding system of ISO2022
3985    variant has the following specifications:
3986         1. Initial designation to G0 thru G3.
3987         2. Allows short-form designation?
3988         3. ASCII should be designated to G0 before control characters?
3989         4. ASCII should be designated to G0 at end of line?
3990         5. 7-bit environment or 8-bit environment?
3991         6. Use locking-shift?
3992         7. Use Single-shift?
3993    And the following two are only for Japanese:
3994         8. Use ASCII in place of JIS0201-1976-Roman?
3995         9. Use JISX0208-1983 in place of JISX0208-1978?
3996    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3997    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
3998    details.
3999 */
4000
4001 /* Produce codes (escape sequence) for designating CHARSET to graphic
4002    register REG at DST, and increment DST.  If <final-char> of CHARSET is
4003    '@', 'A', or 'B' and the coding system CODING allows, produce
4004    designation sequence of short-form.  */
4005
4006 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
4007   do {                                                                  \
4008     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
4009     const char *intermediate_char_94 = "()*+";                          \
4010     const char *intermediate_char_96 = ",-./";                          \
4011     int revision = -1;                                                  \
4012                                                                         \
4013     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
4014       revision = CHARSET_ISO_REVISION (charset);                        \
4015                                                                         \
4016     if (revision >= 0)                                                  \
4017       {                                                                 \
4018         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
4019         EMIT_ONE_BYTE ('@' + revision);                                 \
4020       }                                                                 \
4021     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
4022     if (CHARSET_DIMENSION (charset) == 1)                               \
4023       {                                                                 \
4024         int b;                                                          \
4025         if (! CHARSET_ISO_CHARS_96 (charset))                           \
4026           b = intermediate_char_94[reg];                                \
4027         else                                                            \
4028           b = intermediate_char_96[reg];                                \
4029         EMIT_ONE_ASCII_BYTE (b);                                        \
4030       }                                                                 \
4031     else                                                                \
4032       {                                                                 \
4033         EMIT_ONE_ASCII_BYTE ('$');                                      \
4034         if (! CHARSET_ISO_CHARS_96 (charset))                           \
4035           {                                                             \
4036             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
4037                 || reg != 0                                             \
4038                 || final_char < '@' || final_char > 'B')                \
4039               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
4040           }                                                             \
4041         else                                                            \
4042           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
4043       }                                                                 \
4044     EMIT_ONE_ASCII_BYTE (final_char);                                   \
4045                                                                         \
4046     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
4047   } while (0)
4048
4049
4050 /* The following two macros produce codes (control character or escape
4051    sequence) for ISO2022 single-shift functions (single-shift-2 and
4052    single-shift-3).  */
4053
4054 #define ENCODE_SINGLE_SHIFT_2                                           \
4055   do {                                                                  \
4056     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
4057       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
4058     else                                                                \
4059       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
4060     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
4061   } while (0)
4062
4063
4064 #define ENCODE_SINGLE_SHIFT_3                                           \
4065   do {                                                                  \
4066     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
4067       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
4068     else                                                                \
4069       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
4070     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
4071   } while (0)
4072
4073
4074 /* The following four macros produce codes (control character or
4075    escape sequence) for ISO2022 locking-shift functions (shift-in,
4076    shift-out, locking-shift-2, and locking-shift-3).  */
4077
4078 #define ENCODE_SHIFT_IN                                 \
4079   do {                                                  \
4080     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
4081     CODING_ISO_INVOCATION (coding, 0) = 0;              \
4082   } while (0)
4083
4084
4085 #define ENCODE_SHIFT_OUT                                \
4086   do {                                                  \
4087     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
4088     CODING_ISO_INVOCATION (coding, 0) = 1;              \
4089   } while (0)
4090
4091
4092 #define ENCODE_LOCKING_SHIFT_2                          \
4093   do {                                                  \
4094     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
4095     CODING_ISO_INVOCATION (coding, 0) = 2;              \
4096   } while (0)
4097
4098
4099 #define ENCODE_LOCKING_SHIFT_3                          \
4100   do {                                                  \
4101     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
4102     CODING_ISO_INVOCATION (coding, 0) = 3;              \
4103   } while (0)
4104
4105
4106 /* Produce codes for a DIMENSION1 character whose character set is
4107    CHARSET and whose position-code is C1.  Designation and invocation
4108    sequences are also produced in advance if necessary.  */
4109
4110 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
4111   do {                                                                  \
4112     int id = CHARSET_ID (charset);                                      \
4113                                                                         \
4114     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
4115         && id == charset_ascii)                                         \
4116       {                                                                 \
4117         id = charset_jisx0201_roman;                                    \
4118         charset = CHARSET_FROM_ID (id);                                 \
4119       }                                                                 \
4120                                                                         \
4121     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
4122       {                                                                 \
4123         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
4124           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
4125         else                                                            \
4126           EMIT_ONE_BYTE (c1 | 0x80);                                    \
4127         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
4128         break;                                                          \
4129       }                                                                 \
4130     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
4131       {                                                                 \
4132         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
4133         break;                                                          \
4134       }                                                                 \
4135     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
4136       {                                                                 \
4137         EMIT_ONE_BYTE (c1 | 0x80);                                      \
4138         break;                                                          \
4139       }                                                                 \
4140     else                                                                \
4141       /* Since CHARSET is not yet invoked to any graphic planes, we     \
4142          must invoke it, or, at first, designate it to some graphic     \
4143          register.  Then repeat the loop to actually produce the        \
4144          character.  */                                                 \
4145       dst = encode_invocation_designation (charset, coding, dst,        \
4146                                            &produced_chars);            \
4147   } while (1)
4148
4149
4150 /* Produce codes for a DIMENSION2 character whose character set is
4151    CHARSET and whose position-codes are C1 and C2.  Designation and
4152    invocation codes are also produced in advance if necessary.  */
4153
4154 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
4155   do {                                                                  \
4156     int id = CHARSET_ID (charset);                                      \
4157                                                                         \
4158     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
4159         && id == charset_jisx0208)                                      \
4160       {                                                                 \
4161         id = charset_jisx0208_1978;                                     \
4162         charset = CHARSET_FROM_ID (id);                                 \
4163       }                                                                 \
4164                                                                         \
4165     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
4166       {                                                                 \
4167         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
4168           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
4169         else                                                            \
4170           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
4171         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
4172         break;                                                          \
4173       }                                                                 \
4174     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
4175       {                                                                 \
4176         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
4177         break;                                                          \
4178       }                                                                 \
4179     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
4180       {                                                                 \
4181         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
4182         break;                                                          \
4183       }                                                                 \
4184     else                                                                \
4185       /* Since CHARSET is not yet invoked to any graphic planes, we     \
4186          must invoke it, or, at first, designate it to some graphic     \
4187          register.  Then repeat the loop to actually produce the        \
4188          character.  */                                                 \
4189       dst = encode_invocation_designation (charset, coding, dst,        \
4190                                            &produced_chars);            \
4191   } while (1)
4192
4193
4194 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
4195   do {                                                                     \
4196     int code = ENCODE_CHAR ((charset), (c));                               \
4197                                                                            \
4198     if (CHARSET_DIMENSION (charset) == 1)                                  \
4199       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
4200     else                                                                   \
4201       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
4202   } while (0)
4203
4204
4205 /* Produce designation and invocation codes at a place pointed by DST
4206    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
4207    Return new DST.  */
4208
4209 static unsigned char *
4210 encode_invocation_designation (struct charset *charset,
4211                                struct coding_system *coding,
4212                                unsigned char *dst, int *p_nchars)
4213 {
4214   int multibytep = coding->dst_multibyte;
4215   int produced_chars = *p_nchars;
4216   int reg;                      /* graphic register number */
4217   int id = CHARSET_ID (charset);
4218
4219   /* At first, check designations.  */
4220   for (reg = 0; reg < 4; reg++)
4221     if (id == CODING_ISO_DESIGNATION (coding, reg))
4222       break;
4223
4224   if (reg >= 4)
4225     {
4226       /* CHARSET is not yet designated to any graphic registers.  */
4227       /* At first check the requested designation.  */
4228       reg = CODING_ISO_REQUEST (coding, id);
4229       if (reg < 0)
4230         /* Since CHARSET requests no special designation, designate it
4231            to graphic register 0.  */
4232         reg = 0;
4233
4234       ENCODE_DESIGNATION (charset, reg, coding);
4235     }
4236
4237   if (CODING_ISO_INVOCATION (coding, 0) != reg
4238       && CODING_ISO_INVOCATION (coding, 1) != reg)
4239     {
4240       /* Since the graphic register REG is not invoked to any graphic
4241          planes, invoke it to graphic plane 0.  */
4242       switch (reg)
4243         {
4244         case 0:                 /* graphic register 0 */
4245           ENCODE_SHIFT_IN;
4246           break;
4247
4248         case 1:                 /* graphic register 1 */
4249           ENCODE_SHIFT_OUT;
4250           break;
4251
4252         case 2:                 /* graphic register 2 */
4253           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4254             ENCODE_SINGLE_SHIFT_2;
4255           else
4256             ENCODE_LOCKING_SHIFT_2;
4257           break;
4258
4259         case 3:                 /* graphic register 3 */
4260           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4261             ENCODE_SINGLE_SHIFT_3;
4262           else
4263             ENCODE_LOCKING_SHIFT_3;
4264           break;
4265         }
4266     }
4267
4268   *p_nchars = produced_chars;
4269   return dst;
4270 }
4271
4272
4273 /* Produce codes for designation and invocation to reset the graphic
4274    planes and registers to initial state.  */
4275 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
4276   do {                                                                  \
4277     int reg;                                                            \
4278     struct charset *charset;                                            \
4279                                                                         \
4280     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
4281       ENCODE_SHIFT_IN;                                                  \
4282     for (reg = 0; reg < 4; reg++)                                       \
4283       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
4284           && (CODING_ISO_DESIGNATION (coding, reg)                      \
4285               != CODING_ISO_INITIAL (coding, reg)))                     \
4286         {                                                               \
4287           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
4288           ENCODE_DESIGNATION (charset, reg, coding);                    \
4289         }                                                               \
4290   } while (0)
4291
4292
4293 /* Produce designation sequences of charsets in the line started from
4294    SRC to a place pointed by DST, and return updated DST.
4295
4296    If the current block ends before any end-of-line, we may fail to
4297    find all the necessary designations.  */
4298
4299 static unsigned char *
4300 encode_designation_at_bol (struct coding_system *coding, int *charbuf,
4301                            unsigned char *dst)
4302 {
4303   struct charset *charset;
4304   /* Table of charsets to be designated to each graphic register.  */
4305   int r[4];
4306   int c, found = 0, reg;
4307   int produced_chars = 0;
4308   int multibytep = coding->dst_multibyte;
4309   Lisp_Object attrs;
4310   Lisp_Object charset_list;
4311
4312   attrs = CODING_ID_ATTRS (coding->id);
4313   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4314   if (EQ (charset_list, Qiso_2022))
4315     charset_list = Viso_2022_charset_list;
4316
4317   for (reg = 0; reg < 4; reg++)
4318     r[reg] = -1;
4319
4320   while (found < 4)
4321     {
4322       int id;
4323
4324       c = *charbuf++;
4325       if (c == '\n')
4326         break;
4327       charset = char_charset (c, charset_list, NULL);
4328       id = CHARSET_ID (charset);
4329       reg = CODING_ISO_REQUEST (coding, id);
4330       if (reg >= 0 && r[reg] < 0)
4331         {
4332           found++;
4333           r[reg] = id;
4334         }
4335     }
4336
4337   if (found)
4338     {
4339       for (reg = 0; reg < 4; reg++)
4340         if (r[reg] >= 0
4341             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
4342           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
4343     }
4344
4345   return dst;
4346 }
4347
4348 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
4349
4350 static int
4351 encode_coding_iso_2022 (struct coding_system *coding)
4352 {
4353   int multibytep = coding->dst_multibyte;
4354   int *charbuf = coding->charbuf;
4355   int *charbuf_end = charbuf + coding->charbuf_used;
4356   unsigned char *dst = coding->destination + coding->produced;
4357   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4358   int safe_room = 16;
4359   int bol_designation
4360     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
4361        && CODING_ISO_BOL (coding));
4362   int produced_chars = 0;
4363   Lisp_Object attrs, eol_type, charset_list;
4364   int ascii_compatible;
4365   int c;
4366   int preferred_charset_id = -1;
4367
4368   CODING_GET_INFO (coding, attrs, charset_list);
4369   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
4370   if (VECTORP (eol_type))
4371     eol_type = Qunix;
4372
4373   setup_iso_safe_charsets (attrs);
4374   /* Charset list may have been changed.  */
4375   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4376   coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
4377
4378   ascii_compatible
4379     = (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))
4380        && ! (CODING_ISO_FLAGS (coding) & (CODING_ISO_FLAG_DESIGNATION
4381                                           | CODING_ISO_FLAG_LOCKING_SHIFT)));
4382
4383   while (charbuf < charbuf_end)
4384     {
4385       ASSURE_DESTINATION (safe_room);
4386
4387       if (bol_designation)
4388         {
4389           unsigned char *dst_prev = dst;
4390
4391           /* We have to produce designation sequences if any now.  */
4392           dst = encode_designation_at_bol (coding, charbuf, dst);
4393           bol_designation = 0;
4394           /* We are sure that designation sequences are all ASCII bytes.  */
4395           produced_chars += dst - dst_prev;
4396         }
4397
4398       c = *charbuf++;
4399
4400       if (c < 0)
4401         {
4402           /* Handle an annotation.  */
4403           switch (*charbuf)
4404             {
4405             case CODING_ANNOTATE_COMPOSITION_MASK:
4406               /* Not yet implemented.  */
4407               break;
4408             case CODING_ANNOTATE_CHARSET_MASK:
4409               preferred_charset_id = charbuf[2];
4410               if (preferred_charset_id >= 0
4411                   && NILP (Fmemq (make_number (preferred_charset_id),
4412                                   charset_list)))
4413                 preferred_charset_id = -1;
4414               break;
4415             default:
4416               abort ();
4417             }
4418           charbuf += -c - 1;
4419           continue;
4420         }
4421
4422       /* Now encode the character C.  */
4423       if (c < 0x20 || c == 0x7F)
4424         {
4425           if (c == '\n'
4426               || (c == '\r' && EQ (eol_type, Qmac)))
4427             {
4428               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4429                 ENCODE_RESET_PLANE_AND_REGISTER ();
4430               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
4431                 {
4432                   int i;
4433
4434                   for (i = 0; i < 4; i++)
4435                     CODING_ISO_DESIGNATION (coding, i)
4436                       = CODING_ISO_INITIAL (coding, i);
4437                 }
4438               bol_designation
4439                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
4440             }
4441           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
4442             ENCODE_RESET_PLANE_AND_REGISTER ();
4443           EMIT_ONE_ASCII_BYTE (c);
4444         }
4445       else if (ASCII_CHAR_P (c))
4446         {
4447           if (ascii_compatible)
4448             EMIT_ONE_ASCII_BYTE (c);
4449           else
4450             {
4451               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
4452               ENCODE_ISO_CHARACTER (charset, c);
4453             }
4454         }
4455       else if (CHAR_BYTE8_P (c))
4456         {
4457           c = CHAR_TO_BYTE8 (c);
4458           EMIT_ONE_BYTE (c);
4459         }
4460       else
4461         {
4462           struct charset *charset;
4463
4464           if (preferred_charset_id >= 0)
4465             {
4466               charset = CHARSET_FROM_ID (preferred_charset_id);
4467               if (! CHAR_CHARSET_P (c, charset))
4468                 charset = char_charset (c, charset_list, NULL);
4469             }
4470           else
4471             charset = char_charset (c, charset_list, NULL);
4472           if (!charset)
4473             {
4474               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4475                 {
4476                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4477                   charset = CHARSET_FROM_ID (charset_ascii);
4478                 }
4479               else
4480                 {
4481                   c = coding->default_char;
4482                   charset = char_charset (c, charset_list, NULL);
4483                 }
4484             }
4485           ENCODE_ISO_CHARACTER (charset, c);
4486         }
4487     }
4488
4489   if (coding->mode & CODING_MODE_LAST_BLOCK
4490       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4491     {
4492       ASSURE_DESTINATION (safe_room);
4493       ENCODE_RESET_PLANE_AND_REGISTER ();
4494     }
4495   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4496   CODING_ISO_BOL (coding) = bol_designation;
4497   coding->produced_char += produced_chars;
4498   coding->produced = dst - coding->destination;
4499   return 0;
4500 }
4501
4502 \f
4503 /*** 8,9. SJIS and BIG5 handlers ***/
4504
4505 /* Although SJIS and BIG5 are not ISO's coding system, they are used
4506    quite widely.  So, for the moment, Emacs supports them in the bare
4507    C code.  But, in the future, they may be supported only by CCL.  */
4508
4509 /* SJIS is a coding system encoding three character sets: ASCII, right
4510    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
4511    as is.  A character of charset katakana-jisx0201 is encoded by
4512    "position-code + 0x80".  A character of charset japanese-jisx0208
4513    is encoded in 2-byte but two position-codes are divided and shifted
4514    so that it fit in the range below.
4515
4516    --- CODE RANGE of SJIS ---
4517    (character set)      (range)
4518    ASCII                0x00 .. 0x7F
4519    KATAKANA-JISX0201    0xA0 .. 0xDF
4520    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
4521             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
4522    -------------------------------
4523
4524 */
4525
4526 /* BIG5 is a coding system encoding two character sets: ASCII and
4527    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
4528    character set and is encoded in two-byte.
4529
4530    --- CODE RANGE of BIG5 ---
4531    (character set)      (range)
4532    ASCII                0x00 .. 0x7F
4533    Big5 (1st byte)      0xA1 .. 0xFE
4534         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
4535    --------------------------
4536
4537   */
4538
4539 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4540    Check if a text is encoded in SJIS.  If it is, return
4541    CATEGORY_MASK_SJIS, else return 0.  */
4542
4543 static int
4544 detect_coding_sjis (struct coding_system *coding,
4545                     struct coding_detection_info *detect_info)
4546 {
4547   const unsigned char *src = coding->source, *src_base;
4548   const unsigned char *src_end = coding->source + coding->src_bytes;
4549   int multibytep = coding->src_multibyte;
4550   int consumed_chars = 0;
4551   int found = 0;
4552   int c;
4553   Lisp_Object attrs, charset_list;
4554   int max_first_byte_of_2_byte_code;
4555
4556   CODING_GET_INFO (coding, attrs, charset_list);
4557   max_first_byte_of_2_byte_code
4558     = (XINT (Flength (charset_list)) > 3 ? 0xFC : 0xEF);
4559
4560   detect_info->checked |= CATEGORY_MASK_SJIS;
4561   /* A coding system of this category is always ASCII compatible.  */
4562   src += coding->head_ascii;
4563
4564   while (1)
4565     {
4566       src_base = src;
4567       ONE_MORE_BYTE (c);
4568       if (c < 0x80)
4569         continue;
4570       if ((c >= 0x81 && c <= 0x9F)
4571           || (c >= 0xE0 && c <= max_first_byte_of_2_byte_code))
4572         {
4573           ONE_MORE_BYTE (c);
4574           if (c < 0x40 || c == 0x7F || c > 0xFC)
4575             break;
4576           found = CATEGORY_MASK_SJIS;
4577         }
4578       else if (c >= 0xA0 && c < 0xE0)
4579         found = CATEGORY_MASK_SJIS;
4580       else
4581         break;
4582     }
4583   detect_info->rejected |= CATEGORY_MASK_SJIS;
4584   return 0;
4585
4586  no_more_source:
4587   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4588     {
4589       detect_info->rejected |= CATEGORY_MASK_SJIS;
4590       return 0;
4591     }
4592   detect_info->found |= found;
4593   return 1;
4594 }
4595
4596 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4597    Check if a text is encoded in BIG5.  If it is, return
4598    CATEGORY_MASK_BIG5, else return 0.  */
4599
4600 static int
4601 detect_coding_big5 (struct coding_system *coding,
4602                     struct coding_detection_info *detect_info)
4603 {
4604   const unsigned char *src = coding->source, *src_base;
4605   const unsigned char *src_end = coding->source + coding->src_bytes;
4606   int multibytep = coding->src_multibyte;
4607   int consumed_chars = 0;
4608   int found = 0;
4609   int c;
4610
4611   detect_info->checked |= CATEGORY_MASK_BIG5;
4612   /* A coding system of this category is always ASCII compatible.  */
4613   src += coding->head_ascii;
4614
4615   while (1)
4616     {
4617       src_base = src;
4618       ONE_MORE_BYTE (c);
4619       if (c < 0x80)
4620         continue;
4621       if (c >= 0xA1)
4622         {
4623           ONE_MORE_BYTE (c);
4624           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
4625             return 0;
4626           found = CATEGORY_MASK_BIG5;
4627         }
4628       else
4629         break;
4630     }
4631   detect_info->rejected |= CATEGORY_MASK_BIG5;
4632   return 0;
4633
4634  no_more_source:
4635   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4636     {
4637       detect_info->rejected |= CATEGORY_MASK_BIG5;
4638       return 0;
4639     }
4640   detect_info->found |= found;
4641   return 1;
4642 }
4643
4644 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4645    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
4646
4647 static void
4648 decode_coding_sjis (struct coding_system *coding)
4649 {
4650   const unsigned char *src = coding->source + coding->consumed;
4651   const unsigned char *src_end = coding->source + coding->src_bytes;
4652   const unsigned char *src_base;
4653   int *charbuf = coding->charbuf + coding->charbuf_used;
4654   /* We may produce one charset annotation in one loop and one more at
4655      the end.  */
4656   int *charbuf_end
4657     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4658   int consumed_chars = 0, consumed_chars_base;
4659   int multibytep = coding->src_multibyte;
4660   struct charset *charset_roman, *charset_kanji, *charset_kana;
4661   struct charset *charset_kanji2;
4662   Lisp_Object attrs, charset_list, val;
4663   int char_offset = coding->produced_char;
4664   int last_offset = char_offset;
4665   int last_id = charset_ascii;
4666   int eol_dos =
4667     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4668   int byte_after_cr = -1;
4669
4670   CODING_GET_INFO (coding, attrs, charset_list);
4671
4672   val = charset_list;
4673   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4674   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4675   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4676   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4677
4678   while (1)
4679     {
4680       int c, c1;
4681       struct charset *charset;
4682
4683       src_base = src;
4684       consumed_chars_base = consumed_chars;
4685
4686       if (charbuf >= charbuf_end)
4687         {
4688           if (byte_after_cr >= 0)
4689             src_base--;
4690           break;
4691         }
4692
4693       if (byte_after_cr >= 0)
4694         c = byte_after_cr, byte_after_cr = -1;
4695       else
4696         ONE_MORE_BYTE (c);
4697       if (c < 0)
4698         goto invalid_code;
4699       if (c < 0x80)
4700         {
4701           if (eol_dos && c == '\r')
4702             ONE_MORE_BYTE (byte_after_cr);
4703           charset = charset_roman;
4704         }
4705       else if (c == 0x80 || c == 0xA0)
4706         goto invalid_code;
4707       else if (c >= 0xA1 && c <= 0xDF)
4708         {
4709           /* SJIS -> JISX0201-Kana */
4710           c &= 0x7F;
4711           charset = charset_kana;
4712         }
4713       else if (c <= 0xEF)
4714         {
4715           /* SJIS -> JISX0208 */
4716           ONE_MORE_BYTE (c1);
4717           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4718             goto invalid_code;
4719           c = (c << 8) | c1;
4720           SJIS_TO_JIS (c);
4721           charset = charset_kanji;
4722         }
4723       else if (c <= 0xFC && charset_kanji2)
4724         {
4725           /* SJIS -> JISX0213-2 */
4726           ONE_MORE_BYTE (c1);
4727           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4728             goto invalid_code;
4729           c = (c << 8) | c1;
4730           SJIS_TO_JIS2 (c);
4731           charset = charset_kanji2;
4732         }
4733       else
4734         goto invalid_code;
4735       if (charset->id != charset_ascii
4736           && last_id != charset->id)
4737         {
4738           if (last_id != charset_ascii)
4739             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4740           last_id = charset->id;
4741           last_offset = char_offset;
4742         }
4743       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4744       *charbuf++ = c;
4745       char_offset++;
4746       continue;
4747
4748     invalid_code:
4749       src = src_base;
4750       consumed_chars = consumed_chars_base;
4751       ONE_MORE_BYTE (c);
4752       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4753       char_offset++;
4754       coding->errors++;
4755     }
4756
4757  no_more_source:
4758   if (last_id != charset_ascii)
4759     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4760   coding->consumed_char += consumed_chars_base;
4761   coding->consumed = src_base - coding->source;
4762   coding->charbuf_used = charbuf - coding->charbuf;
4763 }
4764
4765 static void
4766 decode_coding_big5 (struct coding_system *coding)
4767 {
4768   const unsigned char *src = coding->source + coding->consumed;
4769   const unsigned char *src_end = coding->source + coding->src_bytes;
4770   const unsigned char *src_base;
4771   int *charbuf = coding->charbuf + coding->charbuf_used;
4772   /* We may produce one charset annotation in one loop and one more at
4773      the end.  */
4774   int *charbuf_end
4775     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4776   int consumed_chars = 0, consumed_chars_base;
4777   int multibytep = coding->src_multibyte;
4778   struct charset *charset_roman, *charset_big5;
4779   Lisp_Object attrs, charset_list, val;
4780   int char_offset = coding->produced_char;
4781   int last_offset = char_offset;
4782   int last_id = charset_ascii;
4783   int eol_dos =
4784     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4785   int byte_after_cr = -1;
4786
4787   CODING_GET_INFO (coding, attrs, charset_list);
4788   val = charset_list;
4789   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4790   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4791
4792   while (1)
4793     {
4794       int c, c1;
4795       struct charset *charset;
4796
4797       src_base = src;
4798       consumed_chars_base = consumed_chars;
4799
4800       if (charbuf >= charbuf_end)
4801         {
4802           if (byte_after_cr >= 0)
4803             src_base--;
4804           break;
4805         }
4806
4807       if (byte_after_cr >= 0)
4808         c = byte_after_cr, byte_after_cr = -1;
4809       else
4810         ONE_MORE_BYTE (c);
4811
4812       if (c < 0)
4813         goto invalid_code;
4814       if (c < 0x80)
4815         {
4816           if (eol_dos && c == '\r')
4817             ONE_MORE_BYTE (byte_after_cr);
4818           charset = charset_roman;
4819         }
4820       else
4821         {
4822           /* BIG5 -> Big5 */
4823           if (c < 0xA1 || c > 0xFE)
4824             goto invalid_code;
4825           ONE_MORE_BYTE (c1);
4826           if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4827             goto invalid_code;
4828           c = c << 8 | c1;
4829           charset = charset_big5;
4830         }
4831       if (charset->id != charset_ascii
4832           && last_id != charset->id)
4833         {
4834           if (last_id != charset_ascii)
4835             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4836           last_id = charset->id;
4837           last_offset = char_offset;
4838         }
4839       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4840       *charbuf++ = c;
4841       char_offset++;
4842       continue;
4843
4844     invalid_code:
4845       src = src_base;
4846       consumed_chars = consumed_chars_base;
4847       ONE_MORE_BYTE (c);
4848       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4849       char_offset++;
4850       coding->errors++;
4851     }
4852
4853  no_more_source:
4854   if (last_id != charset_ascii)
4855     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4856   coding->consumed_char += consumed_chars_base;
4857   coding->consumed = src_base - coding->source;
4858   coding->charbuf_used = charbuf - coding->charbuf;
4859 }
4860
4861 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4862    This function can encode charsets `ascii', `katakana-jisx0201',
4863    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
4864    are sure that all these charsets are registered as official charset
4865    (i.e. do not have extended leading-codes).  Characters of other
4866    charsets are produced without any encoding.  If SJIS_P is 1, encode
4867    SJIS text, else encode BIG5 text.  */
4868
4869 static int
4870 encode_coding_sjis (struct coding_system *coding)
4871 {
4872   int multibytep = coding->dst_multibyte;
4873   int *charbuf = coding->charbuf;
4874   int *charbuf_end = charbuf + coding->charbuf_used;
4875   unsigned char *dst = coding->destination + coding->produced;
4876   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4877   int safe_room = 4;
4878   int produced_chars = 0;
4879   Lisp_Object attrs, charset_list, val;
4880   int ascii_compatible;
4881   struct charset *charset_roman, *charset_kanji, *charset_kana;
4882   struct charset *charset_kanji2;
4883   int c;
4884
4885   CODING_GET_INFO (coding, attrs, charset_list);
4886   val = charset_list;
4887   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4888   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4889   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4890   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4891
4892   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4893
4894   while (charbuf < charbuf_end)
4895     {
4896       ASSURE_DESTINATION (safe_room);
4897       c = *charbuf++;
4898       /* Now encode the character C.  */
4899       if (ASCII_CHAR_P (c) && ascii_compatible)
4900         EMIT_ONE_ASCII_BYTE (c);
4901       else if (CHAR_BYTE8_P (c))
4902         {
4903           c = CHAR_TO_BYTE8 (c);
4904           EMIT_ONE_BYTE (c);
4905         }
4906       else
4907         {
4908           unsigned code;
4909           struct charset *charset = char_charset (c, charset_list, &code);
4910
4911           if (!charset)
4912             {
4913               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4914                 {
4915                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4916                   charset = CHARSET_FROM_ID (charset_ascii);
4917                 }
4918               else
4919                 {
4920                   c = coding->default_char;
4921                   charset = char_charset (c, charset_list, &code);
4922                 }
4923             }
4924           if (code == CHARSET_INVALID_CODE (charset))
4925             abort ();
4926           if (charset == charset_kanji)
4927             {
4928               int c1, c2;
4929               JIS_TO_SJIS (code);
4930               c1 = code >> 8, c2 = code & 0xFF;
4931               EMIT_TWO_BYTES (c1, c2);
4932             }
4933           else if (charset == charset_kana)
4934             EMIT_ONE_BYTE (code | 0x80);
4935           else if (charset_kanji2 && charset == charset_kanji2)
4936             {
4937               int c1, c2;
4938
4939               c1 = code >> 8;
4940               if (c1 == 0x21 || (c1 >= 0x23 && c1 <= 0x25)
4941                   || c1 == 0x28
4942                   || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
4943                 {
4944                   JIS_TO_SJIS2 (code);
4945                   c1 = code >> 8, c2 = code & 0xFF;
4946                   EMIT_TWO_BYTES (c1, c2);
4947                 }
4948               else
4949                 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4950             }
4951           else
4952             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4953         }
4954     }
4955   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4956   coding->produced_char += produced_chars;
4957   coding->produced = dst - coding->destination;
4958   return 0;
4959 }
4960
4961 static int
4962 encode_coding_big5 (struct coding_system *coding)
4963 {
4964   int multibytep = coding->dst_multibyte;
4965   int *charbuf = coding->charbuf;
4966   int *charbuf_end = charbuf + coding->charbuf_used;
4967   unsigned char *dst = coding->destination + coding->produced;
4968   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4969   int safe_room = 4;
4970   int produced_chars = 0;
4971   Lisp_Object attrs, charset_list, val;
4972   int ascii_compatible;
4973   struct charset *charset_roman, *charset_big5;
4974   int c;
4975
4976   CODING_GET_INFO (coding, attrs, charset_list);
4977   val = charset_list;
4978   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4979   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4980   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4981
4982   while (charbuf < charbuf_end)
4983     {
4984       ASSURE_DESTINATION (safe_room);
4985       c = *charbuf++;
4986       /* Now encode the character C.  */
4987       if (ASCII_CHAR_P (c) && ascii_compatible)
4988         EMIT_ONE_ASCII_BYTE (c);
4989       else if (CHAR_BYTE8_P (c))
4990         {
4991           c = CHAR_TO_BYTE8 (c);
4992           EMIT_ONE_BYTE (c);
4993         }
4994       else
4995         {
4996           unsigned code;
4997           struct charset *charset = char_charset (c, charset_list, &code);
4998
4999           if (! charset)
5000             {
5001               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5002                 {
5003                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5004                   charset = CHARSET_FROM_ID (charset_ascii);
5005                 }
5006               else
5007                 {
5008                   c = coding->default_char;
5009                   charset = char_charset (c, charset_list, &code);
5010                 }
5011             }
5012           if (code == CHARSET_INVALID_CODE (charset))
5013             abort ();
5014           if (charset == charset_big5)
5015             {
5016               int c1, c2;
5017
5018               c1 = code >> 8, c2 = code & 0xFF;
5019               EMIT_TWO_BYTES (c1, c2);
5020             }
5021           else
5022             EMIT_ONE_ASCII_BYTE (code & 0x7F);
5023         }
5024     }
5025   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5026   coding->produced_char += produced_chars;
5027   coding->produced = dst - coding->destination;
5028   return 0;
5029 }
5030
5031 \f
5032 /*** 10. CCL handlers ***/
5033
5034 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5035    Check if a text is encoded in a coding system of which
5036    encoder/decoder are written in CCL program.  If it is, return
5037    CATEGORY_MASK_CCL, else return 0.  */
5038
5039 static int
5040 detect_coding_ccl (struct coding_system *coding,
5041                    struct coding_detection_info *detect_info)
5042 {
5043   const unsigned char *src = coding->source, *src_base;
5044   const unsigned char *src_end = coding->source + coding->src_bytes;
5045   int multibytep = coding->src_multibyte;
5046   int consumed_chars = 0;
5047   int found = 0;
5048   unsigned char *valids;
5049   int head_ascii = coding->head_ascii;
5050   Lisp_Object attrs;
5051
5052   detect_info->checked |= CATEGORY_MASK_CCL;
5053
5054   coding = &coding_categories[coding_category_ccl];
5055   valids = CODING_CCL_VALIDS (coding);
5056   attrs = CODING_ID_ATTRS (coding->id);
5057   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5058     src += head_ascii;
5059
5060   while (1)
5061     {
5062       int c;
5063
5064       src_base = src;
5065       ONE_MORE_BYTE (c);
5066       if (c < 0 || ! valids[c])
5067         break;
5068       if ((valids[c] > 1))
5069         found = CATEGORY_MASK_CCL;
5070     }
5071   detect_info->rejected |= CATEGORY_MASK_CCL;
5072   return 0;
5073
5074  no_more_source:
5075   detect_info->found |= found;
5076   return 1;
5077 }
5078
5079 static void
5080 decode_coding_ccl (struct coding_system *coding)
5081 {
5082   const unsigned char *src = coding->source + coding->consumed;
5083   const unsigned char *src_end = coding->source + coding->src_bytes;
5084   int *charbuf = coding->charbuf + coding->charbuf_used;
5085   int *charbuf_end = coding->charbuf + coding->charbuf_size;
5086   int consumed_chars = 0;
5087   int multibytep = coding->src_multibyte;
5088   struct ccl_program *ccl = &coding->spec.ccl->ccl;
5089   int source_charbuf[1024];
5090   int source_byteidx[1025];
5091   Lisp_Object attrs, charset_list;
5092
5093   CODING_GET_INFO (coding, attrs, charset_list);
5094
5095   while (1)
5096     {
5097       const unsigned char *p = src;
5098       int i = 0;
5099
5100       if (multibytep)
5101         {
5102           while (i < 1024 && p < src_end)
5103             {
5104               source_byteidx[i] = p - src;
5105               source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
5106             }
5107           source_byteidx[i] = p - src;
5108         }
5109       else
5110         while (i < 1024 && p < src_end)
5111           source_charbuf[i++] = *p++;
5112
5113       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
5114         ccl->last_block = 1;
5115       ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf,
5116                   charset_list);
5117       charbuf += ccl->produced;
5118       if (multibytep)
5119         src += source_byteidx[ccl->consumed];
5120       else
5121         src += ccl->consumed;
5122       consumed_chars += ccl->consumed;
5123       if (p == src_end || ccl->status != CCL_STAT_SUSPEND_BY_SRC)
5124         break;
5125     }
5126
5127   switch (ccl->status)
5128     {
5129     case CCL_STAT_SUSPEND_BY_SRC:
5130       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5131       break;
5132     case CCL_STAT_SUSPEND_BY_DST:
5133       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
5134       break;
5135     case CCL_STAT_QUIT:
5136     case CCL_STAT_INVALID_CMD:
5137       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
5138       break;
5139     default:
5140       record_conversion_result (coding, CODING_RESULT_SUCCESS);
5141       break;
5142     }
5143   coding->consumed_char += consumed_chars;
5144   coding->consumed = src - coding->source;
5145   coding->charbuf_used = charbuf - coding->charbuf;
5146 }
5147
5148 static int
5149 encode_coding_ccl (struct coding_system *coding)
5150 {
5151   struct ccl_program *ccl = &coding->spec.ccl->ccl;
5152   int multibytep = coding->dst_multibyte;
5153   int *charbuf = coding->charbuf;
5154   int *charbuf_end = charbuf + coding->charbuf_used;
5155   unsigned char *dst = coding->destination + coding->produced;
5156   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5157   int destination_charbuf[1024];
5158   int i, produced_chars = 0;
5159   Lisp_Object attrs, charset_list;
5160
5161   CODING_GET_INFO (coding, attrs, charset_list);
5162   if (coding->consumed_char == coding->src_chars
5163       && coding->mode & CODING_MODE_LAST_BLOCK)
5164     ccl->last_block = 1;
5165
5166   while (charbuf < charbuf_end)
5167     {
5168       ccl_driver (ccl, charbuf, destination_charbuf,
5169                   charbuf_end - charbuf, 1024, charset_list);
5170       if (multibytep)
5171         {
5172           ASSURE_DESTINATION (ccl->produced * 2);
5173           for (i = 0; i < ccl->produced; i++)
5174             EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
5175         }
5176       else
5177         {
5178           ASSURE_DESTINATION (ccl->produced);
5179           for (i = 0; i < ccl->produced; i++)
5180             *dst++ = destination_charbuf[i] & 0xFF;
5181           produced_chars += ccl->produced;
5182         }
5183       charbuf += ccl->consumed;
5184       if (ccl->status == CCL_STAT_QUIT
5185           || ccl->status == CCL_STAT_INVALID_CMD)
5186         break;
5187     }
5188
5189   switch (ccl->status)
5190     {
5191     case CCL_STAT_SUSPEND_BY_SRC:
5192       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5193       break;
5194     case CCL_STAT_SUSPEND_BY_DST:
5195       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
5196       break;
5197     case CCL_STAT_QUIT:
5198     case CCL_STAT_INVALID_CMD:
5199       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
5200       break;
5201     default:
5202       record_conversion_result (coding, CODING_RESULT_SUCCESS);
5203       break;
5204     }
5205
5206   coding->produced_char += produced_chars;
5207   coding->produced = dst - coding->destination;
5208   return 0;
5209 }
5210
5211
5212 \f
5213 /*** 10, 11. no-conversion handlers ***/
5214
5215 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
5216
5217 static void
5218 decode_coding_raw_text (struct coding_system *coding)
5219 {
5220   int eol_dos =
5221     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5222
5223   coding->chars_at_source = 1;
5224   coding->consumed_char = coding->src_chars;
5225   coding->consumed = coding->src_bytes;
5226   if (eol_dos && coding->source[coding->src_bytes - 1] == '\r')
5227     {
5228       coding->consumed_char--;
5229       coding->consumed--;
5230       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5231     }
5232   else
5233     record_conversion_result (coding, CODING_RESULT_SUCCESS);
5234 }
5235
5236 static int
5237 encode_coding_raw_text (struct coding_system *coding)
5238 {
5239   int multibytep = coding->dst_multibyte;
5240   int *charbuf = coding->charbuf;
5241   int *charbuf_end = coding->charbuf + coding->charbuf_used;
5242   unsigned char *dst = coding->destination + coding->produced;
5243   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5244   int produced_chars = 0;
5245   int c;
5246
5247   if (multibytep)
5248     {
5249       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
5250
5251       if (coding->src_multibyte)
5252         while (charbuf < charbuf_end)
5253           {
5254             ASSURE_DESTINATION (safe_room);
5255             c = *charbuf++;
5256             if (ASCII_CHAR_P (c))
5257               EMIT_ONE_ASCII_BYTE (c);
5258             else if (CHAR_BYTE8_P (c))
5259               {
5260                 c = CHAR_TO_BYTE8 (c);
5261                 EMIT_ONE_BYTE (c);
5262               }
5263             else
5264               {
5265                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
5266
5267                 CHAR_STRING_ADVANCE (c, p1);
5268                 do
5269                   {
5270                     EMIT_ONE_BYTE (*p0);
5271                     p0++;
5272                   }
5273                 while (p0 < p1);
5274               }
5275           }
5276       else
5277         while (charbuf < charbuf_end)
5278           {
5279             ASSURE_DESTINATION (safe_room);
5280             c = *charbuf++;
5281             EMIT_ONE_BYTE (c);
5282           }
5283     }
5284   else
5285     {
5286       if (coding->src_multibyte)
5287         {
5288           int safe_room = MAX_MULTIBYTE_LENGTH;
5289
5290           while (charbuf < charbuf_end)
5291             {
5292               ASSURE_DESTINATION (safe_room);
5293               c = *charbuf++;
5294               if (ASCII_CHAR_P (c))
5295                 *dst++ = c;
5296               else if (CHAR_BYTE8_P (c))
5297                 *dst++ = CHAR_TO_BYTE8 (c);
5298               else
5299                 CHAR_STRING_ADVANCE (c, dst);
5300             }
5301         }
5302       else
5303         {
5304           ASSURE_DESTINATION (charbuf_end - charbuf);
5305           while (charbuf < charbuf_end && dst < dst_end)
5306             *dst++ = *charbuf++;
5307         }
5308       produced_chars = dst - (coding->destination + coding->produced);
5309     }
5310   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5311   coding->produced_char += produced_chars;
5312   coding->produced = dst - coding->destination;
5313   return 0;
5314 }
5315
5316 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5317    Check if a text is encoded in a charset-based coding system.  If it
5318    is, return 1, else return 0.  */
5319
5320 static int
5321 detect_coding_charset (struct coding_system *coding,
5322                        struct coding_detection_info *detect_info)
5323 {
5324   const unsigned char *src = coding->source, *src_base;
5325   const unsigned char *src_end = coding->source + coding->src_bytes;
5326   int multibytep = coding->src_multibyte;
5327   int consumed_chars = 0;
5328   Lisp_Object attrs, valids, name;
5329   int found = 0;
5330   int head_ascii = coding->head_ascii;
5331   int check_latin_extra = 0;
5332
5333   detect_info->checked |= CATEGORY_MASK_CHARSET;
5334
5335   coding = &coding_categories[coding_category_charset];
5336   attrs = CODING_ID_ATTRS (coding->id);
5337   valids = AREF (attrs, coding_attr_charset_valids);
5338   name = CODING_ID_NAME (coding->id);
5339   if (strncmp (SSDATA (SYMBOL_NAME (name)),
5340                "iso-8859-", sizeof ("iso-8859-") - 1) == 0
5341       || strncmp (SSDATA (SYMBOL_NAME (name)),
5342                   "iso-latin-", sizeof ("iso-latin-") - 1) == 0)
5343     check_latin_extra = 1;
5344
5345   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5346     src += head_ascii;
5347
5348   while (1)
5349     {
5350       int c;
5351       Lisp_Object val;
5352       struct charset *charset;
5353       int dim, idx;
5354
5355       src_base = src;
5356       ONE_MORE_BYTE (c);
5357       if (c < 0)
5358         continue;
5359       val = AREF (valids, c);
5360       if (NILP (val))
5361         break;
5362       if (c >= 0x80)
5363         {
5364           if (c < 0xA0
5365               && check_latin_extra
5366               && (!VECTORP (Vlatin_extra_code_table)
5367                   || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])))
5368             break;
5369           found = CATEGORY_MASK_CHARSET;
5370         }
5371       if (INTEGERP (val))
5372         {
5373           charset = CHARSET_FROM_ID (XFASTINT (val));
5374           dim = CHARSET_DIMENSION (charset);
5375           for (idx = 1; idx < dim; idx++)
5376             {
5377               if (src == src_end)
5378                 goto too_short;
5379               ONE_MORE_BYTE (c);
5380               if (c < charset->code_space[(dim - 1 - idx) * 2]
5381                   || c > charset->code_space[(dim - 1 - idx) * 2 + 1])
5382                 break;
5383             }
5384           if (idx < dim)
5385             break;
5386         }
5387       else
5388         {
5389           idx = 1;
5390           for (; CONSP (val); val = XCDR (val))
5391             {
5392               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5393               dim = CHARSET_DIMENSION (charset);
5394               while (idx < dim)
5395                 {
5396                   if (src == src_end)
5397                     goto too_short;
5398                   ONE_MORE_BYTE (c);
5399                   if (c < charset->code_space[(dim - 1 - idx) * 4]
5400                       || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
5401                     break;
5402                   idx++;
5403                 }
5404               if (idx == dim)
5405                 {
5406                   val = Qnil;
5407                   break;
5408                 }
5409             }
5410           if (CONSP (val))
5411             break;
5412         }
5413     }
5414  too_short:
5415   detect_info->rejected |= CATEGORY_MASK_CHARSET;
5416   return 0;
5417
5418  no_more_source:
5419   detect_info->found |= found;
5420   return 1;
5421 }
5422
5423 static void
5424 decode_coding_charset (struct coding_system *coding)
5425 {
5426   const unsigned char *src = coding->source + coding->consumed;
5427   const unsigned char *src_end = coding->source + coding->src_bytes;
5428   const unsigned char *src_base;
5429   int *charbuf = coding->charbuf + coding->charbuf_used;
5430   /* We may produce one charset annotation in one loop and one more at
5431      the end.  */
5432   int *charbuf_end
5433     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
5434   int consumed_chars = 0, consumed_chars_base;
5435   int multibytep = coding->src_multibyte;
5436   Lisp_Object attrs, charset_list, valids;
5437   int char_offset = coding->produced_char;
5438   int last_offset = char_offset;
5439   int last_id = charset_ascii;
5440   int eol_dos =
5441     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5442   int byte_after_cr = -1;
5443
5444   CODING_GET_INFO (coding, attrs, charset_list);
5445   valids = AREF (attrs, coding_attr_charset_valids);
5446
5447   while (1)
5448     {
5449       int c;
5450       Lisp_Object val;
5451       struct charset *charset;
5452       int dim;
5453       int len = 1;
5454       unsigned code;
5455
5456       src_base = src;
5457       consumed_chars_base = consumed_chars;
5458
5459       if (charbuf >= charbuf_end)
5460         {
5461           if (byte_after_cr >= 0)
5462             src_base--;
5463           break;
5464         }
5465
5466       if (byte_after_cr >= 0)
5467         {
5468           c = byte_after_cr;
5469           byte_after_cr = -1;
5470         }
5471       else
5472         {
5473           ONE_MORE_BYTE (c);
5474           if (eol_dos && c == '\r')
5475             ONE_MORE_BYTE (byte_after_cr);
5476         }
5477       if (c < 0)
5478         goto invalid_code;
5479       code = c;
5480
5481       val = AREF (valids, c);
5482       if (! INTEGERP (val) && ! CONSP (val))
5483         goto invalid_code;
5484       if (INTEGERP (val))
5485         {
5486           charset = CHARSET_FROM_ID (XFASTINT (val));
5487           dim = CHARSET_DIMENSION (charset);
5488           while (len < dim)
5489             {
5490               ONE_MORE_BYTE (c);
5491               code = (code << 8) | c;
5492               len++;
5493             }
5494           CODING_DECODE_CHAR (coding, src, src_base, src_end,
5495                               charset, code, c);
5496         }
5497       else
5498         {
5499           /* VAL is a list of charset IDs.  It is assured that the
5500              list is sorted by charset dimensions (smaller one
5501              comes first).  */
5502           while (CONSP (val))
5503             {
5504               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5505               dim = CHARSET_DIMENSION (charset);
5506               while (len < dim)
5507                 {
5508                   ONE_MORE_BYTE (c);
5509                   code = (code << 8) | c;
5510                   len++;
5511                 }
5512               CODING_DECODE_CHAR (coding, src, src_base,
5513                                   src_end, charset, code, c);
5514               if (c >= 0)
5515                 break;
5516               val = XCDR (val);
5517             }
5518         }
5519       if (c < 0)
5520         goto invalid_code;
5521       if (charset->id != charset_ascii
5522           && last_id != charset->id)
5523         {
5524           if (last_id != charset_ascii)
5525             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5526           last_id = charset->id;
5527           last_offset = char_offset;
5528         }
5529
5530       *charbuf++ = c;
5531       char_offset++;
5532       continue;
5533
5534     invalid_code:
5535       src = src_base;
5536       consumed_chars = consumed_chars_base;
5537       ONE_MORE_BYTE (c);
5538       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
5539       char_offset++;
5540       coding->errors++;
5541     }
5542
5543  no_more_source:
5544   if (last_id != charset_ascii)
5545     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5546   coding->consumed_char += consumed_chars_base;
5547   coding->consumed = src_base - coding->source;
5548   coding->charbuf_used = charbuf - coding->charbuf;
5549 }
5550
5551 static int
5552 encode_coding_charset (struct coding_system *coding)
5553 {
5554   int multibytep = coding->dst_multibyte;
5555   int *charbuf = coding->charbuf;
5556   int *charbuf_end = charbuf + coding->charbuf_used;
5557   unsigned char *dst = coding->destination + coding->produced;
5558   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5559   int safe_room = MAX_MULTIBYTE_LENGTH;
5560   int produced_chars = 0;
5561   Lisp_Object attrs, charset_list;
5562   int ascii_compatible;
5563   int c;
5564
5565   CODING_GET_INFO (coding, attrs, charset_list);
5566   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5567
5568   while (charbuf < charbuf_end)
5569     {
5570       struct charset *charset;
5571       unsigned code;
5572
5573       ASSURE_DESTINATION (safe_room);
5574       c = *charbuf++;
5575       if (ascii_compatible && ASCII_CHAR_P (c))
5576         EMIT_ONE_ASCII_BYTE (c);
5577       else if (CHAR_BYTE8_P (c))
5578         {
5579           c = CHAR_TO_BYTE8 (c);
5580           EMIT_ONE_BYTE (c);
5581         }
5582       else
5583         {
5584           charset = char_charset (c, charset_list, &code);
5585           if (charset)
5586             {
5587               if (CHARSET_DIMENSION (charset) == 1)
5588                 EMIT_ONE_BYTE (code);
5589               else if (CHARSET_DIMENSION (charset) == 2)
5590                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
5591               else if (CHARSET_DIMENSION (charset) == 3)
5592                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
5593               else
5594                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
5595                                  (code >> 8) & 0xFF, code & 0xFF);
5596             }
5597           else
5598             {
5599               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5600                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5601               else
5602                 c = coding->default_char;
5603               EMIT_ONE_BYTE (c);
5604             }
5605         }
5606     }
5607
5608   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5609   coding->produced_char += produced_chars;
5610   coding->produced = dst - coding->destination;
5611   return 0;
5612 }
5613
5614 \f
5615 /*** 7. C library functions ***/
5616
5617 /* Setup coding context CODING from information about CODING_SYSTEM.
5618    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
5619    CODING_SYSTEM is invalid, signal an error.  */
5620
5621 void
5622 setup_coding_system (Lisp_Object coding_system, struct coding_system *coding)
5623 {
5624   Lisp_Object attrs;
5625   Lisp_Object eol_type;
5626   Lisp_Object coding_type;
5627   Lisp_Object val;
5628
5629   if (NILP (coding_system))
5630     coding_system = Qundecided;
5631
5632   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
5633
5634   attrs = CODING_ID_ATTRS (coding->id);
5635   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
5636
5637   coding->mode = 0;
5638   coding->head_ascii = -1;
5639   if (VECTORP (eol_type))
5640     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5641                             | CODING_REQUIRE_DETECTION_MASK);
5642   else if (! EQ (eol_type, Qunix))
5643     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5644                             | CODING_REQUIRE_ENCODING_MASK);
5645   else
5646     coding->common_flags = 0;
5647   if (! NILP (CODING_ATTR_POST_READ (attrs)))
5648     coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5649   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5650     coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5651   if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
5652     coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
5653
5654   val = CODING_ATTR_SAFE_CHARSETS (attrs);
5655   coding->max_charset_id = SCHARS (val) - 1;
5656   coding->safe_charsets = SDATA (val);
5657   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
5658   coding->carryover_bytes = 0;
5659
5660   coding_type = CODING_ATTR_TYPE (attrs);
5661   if (EQ (coding_type, Qundecided))
5662     {
5663       coding->detector = NULL;
5664       coding->decoder = decode_coding_raw_text;
5665       coding->encoder = encode_coding_raw_text;
5666       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5667     }
5668   else if (EQ (coding_type, Qiso_2022))
5669     {
5670       int i;
5671       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5672
5673       /* Invoke graphic register 0 to plane 0.  */
5674       CODING_ISO_INVOCATION (coding, 0) = 0;
5675       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
5676       CODING_ISO_INVOCATION (coding, 1)
5677         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
5678       /* Setup the initial status of designation.  */
5679       for (i = 0; i < 4; i++)
5680         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
5681       /* Not single shifting initially.  */
5682       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
5683       /* Beginning of buffer should also be regarded as bol. */
5684       CODING_ISO_BOL (coding) = 1;
5685       coding->detector = detect_coding_iso_2022;
5686       coding->decoder = decode_coding_iso_2022;
5687       coding->encoder = encode_coding_iso_2022;
5688       if (flags & CODING_ISO_FLAG_SAFE)
5689         coding->mode |= CODING_MODE_SAFE_ENCODING;
5690       coding->common_flags
5691         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5692             | CODING_REQUIRE_FLUSHING_MASK);
5693       if (flags & CODING_ISO_FLAG_COMPOSITION)
5694         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
5695       if (flags & CODING_ISO_FLAG_DESIGNATION)
5696         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
5697       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5698         {
5699           setup_iso_safe_charsets (attrs);
5700           val = CODING_ATTR_SAFE_CHARSETS (attrs);
5701           coding->max_charset_id = SCHARS (val) - 1;
5702           coding->safe_charsets = SDATA (val);
5703         }
5704       CODING_ISO_FLAGS (coding) = flags;
5705       CODING_ISO_CMP_STATUS (coding)->state = COMPOSING_NO;
5706       CODING_ISO_CMP_STATUS (coding)->method = COMPOSITION_NO;
5707       CODING_ISO_EXTSEGMENT_LEN (coding) = 0;
5708       CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
5709     }
5710   else if (EQ (coding_type, Qcharset))
5711     {
5712       coding->detector = detect_coding_charset;
5713       coding->decoder = decode_coding_charset;
5714       coding->encoder = encode_coding_charset;
5715       coding->common_flags
5716         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5717     }
5718   else if (EQ (coding_type, Qutf_8))
5719     {
5720       val = AREF (attrs, coding_attr_utf_bom);
5721       CODING_UTF_8_BOM (coding) = (CONSP (val) ? utf_detect_bom
5722                                    : EQ (val, Qt) ? utf_with_bom
5723                                    : utf_without_bom);
5724       coding->detector = detect_coding_utf_8;
5725       coding->decoder = decode_coding_utf_8;
5726       coding->encoder = encode_coding_utf_8;
5727       coding->common_flags
5728         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5729       if (CODING_UTF_8_BOM (coding) == utf_detect_bom)
5730         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5731     }
5732   else if (EQ (coding_type, Qutf_16))
5733     {
5734       val = AREF (attrs, coding_attr_utf_bom);
5735       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_detect_bom
5736                                     : EQ (val, Qt) ? utf_with_bom
5737                                     : utf_without_bom);
5738       val = AREF (attrs, coding_attr_utf_16_endian);
5739       CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
5740                                        : utf_16_little_endian);
5741       CODING_UTF_16_SURROGATE (coding) = 0;
5742       coding->detector = detect_coding_utf_16;
5743       coding->decoder = decode_coding_utf_16;
5744       coding->encoder = encode_coding_utf_16;
5745       coding->common_flags
5746         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5747       if (CODING_UTF_16_BOM (coding) == utf_detect_bom)
5748         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5749     }
5750   else if (EQ (coding_type, Qccl))
5751     {
5752       coding->detector = detect_coding_ccl;
5753       coding->decoder = decode_coding_ccl;
5754       coding->encoder = encode_coding_ccl;
5755       coding->common_flags
5756         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5757             | CODING_REQUIRE_FLUSHING_MASK);
5758     }
5759   else if (EQ (coding_type, Qemacs_mule))
5760     {
5761       coding->detector = detect_coding_emacs_mule;
5762       coding->decoder = decode_coding_emacs_mule;
5763       coding->encoder = encode_coding_emacs_mule;
5764       coding->common_flags
5765         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5766       coding->spec.emacs_mule.full_support = 1;
5767       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5768           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5769         {
5770           Lisp_Object tail, safe_charsets;
5771           int max_charset_id = 0;
5772
5773           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5774                tail = XCDR (tail))
5775             if (max_charset_id < XFASTINT (XCAR (tail)))
5776               max_charset_id = XFASTINT (XCAR (tail));
5777           safe_charsets = make_uninit_string (max_charset_id + 1);
5778           memset (SDATA (safe_charsets), 255, max_charset_id + 1);
5779           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5780                tail = XCDR (tail))
5781             SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
5782           coding->max_charset_id = max_charset_id;
5783           coding->safe_charsets = SDATA (safe_charsets);
5784           coding->spec.emacs_mule.full_support = 1;
5785         }
5786       coding->spec.emacs_mule.cmp_status.state = COMPOSING_NO;
5787       coding->spec.emacs_mule.cmp_status.method = COMPOSITION_NO;
5788     }
5789   else if (EQ (coding_type, Qshift_jis))
5790     {
5791       coding->detector = detect_coding_sjis;
5792       coding->decoder = decode_coding_sjis;
5793       coding->encoder = encode_coding_sjis;
5794       coding->common_flags
5795         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5796     }
5797   else if (EQ (coding_type, Qbig5))
5798     {
5799       coding->detector = detect_coding_big5;
5800       coding->decoder = decode_coding_big5;
5801       coding->encoder = encode_coding_big5;
5802       coding->common_flags
5803         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5804     }
5805   else                          /* EQ (coding_type, Qraw_text) */
5806     {
5807       coding->detector = NULL;
5808       coding->decoder = decode_coding_raw_text;
5809       coding->encoder = encode_coding_raw_text;
5810       if (! EQ (eol_type, Qunix))
5811         {
5812           coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5813           if (! VECTORP (eol_type))
5814             coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5815         }
5816
5817     }
5818
5819   return;
5820 }
5821
5822 /* Return a list of charsets supported by CODING.  */
5823
5824 Lisp_Object
5825 coding_charset_list (struct coding_system *coding)
5826 {
5827   Lisp_Object attrs, charset_list;
5828
5829   CODING_GET_INFO (coding, attrs, charset_list);
5830   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5831     {
5832       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5833
5834       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5835         charset_list = Viso_2022_charset_list;
5836     }
5837   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5838     {
5839       charset_list = Vemacs_mule_charset_list;
5840     }
5841   return charset_list;
5842 }
5843
5844
5845 /* Return a list of charsets supported by CODING-SYSTEM.  */
5846
5847 Lisp_Object
5848 coding_system_charset_list (Lisp_Object coding_system)
5849 {
5850   int id;
5851   Lisp_Object attrs, charset_list;
5852
5853   CHECK_CODING_SYSTEM_GET_ID (coding_system, id);
5854   attrs = CODING_ID_ATTRS (id);
5855
5856   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5857     {
5858       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5859
5860       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5861         charset_list = Viso_2022_charset_list;
5862       else
5863         charset_list = CODING_ATTR_CHARSET_LIST (attrs);
5864     }
5865   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5866     {
5867       charset_list = Vemacs_mule_charset_list;
5868     }
5869   else
5870     {
5871       charset_list = CODING_ATTR_CHARSET_LIST (attrs);
5872     }
5873   return charset_list;
5874 }
5875
5876
5877 /* Return raw-text or one of its subsidiaries that has the same
5878    eol_type as CODING-SYSTEM.  */
5879
5880 Lisp_Object
5881 raw_text_coding_system (Lisp_Object coding_system)
5882 {
5883   Lisp_Object spec, attrs;
5884   Lisp_Object eol_type, raw_text_eol_type;
5885
5886   if (NILP (coding_system))
5887     return Qraw_text;
5888   spec = CODING_SYSTEM_SPEC (coding_system);
5889   attrs = AREF (spec, 0);
5890
5891   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
5892     return coding_system;
5893
5894   eol_type = AREF (spec, 2);
5895   if (VECTORP (eol_type))
5896     return Qraw_text;
5897   spec = CODING_SYSTEM_SPEC (Qraw_text);
5898   raw_text_eol_type = AREF (spec, 2);
5899   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
5900           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
5901           : AREF (raw_text_eol_type, 2));
5902 }
5903
5904
5905 /* If CODING_SYSTEM doesn't specify end-of-line format, return one of
5906    the subsidiary that has the same eol-spec as PARENT (if it is not
5907    nil and specifies end-of-line format) or the system's setting
5908    (system_eol_type).  */
5909
5910 Lisp_Object
5911 coding_inherit_eol_type (Lisp_Object coding_system, Lisp_Object parent)
5912 {
5913   Lisp_Object spec, eol_type;
5914
5915   if (NILP (coding_system))
5916     coding_system = Qraw_text;
5917   spec = CODING_SYSTEM_SPEC (coding_system);
5918   eol_type = AREF (spec, 2);
5919   if (VECTORP (eol_type))
5920     {
5921       Lisp_Object parent_eol_type;
5922
5923       if (! NILP (parent))
5924         {
5925           Lisp_Object parent_spec;
5926
5927           parent_spec = CODING_SYSTEM_SPEC (parent);
5928           parent_eol_type = AREF (parent_spec, 2);
5929           if (VECTORP (parent_eol_type))
5930             parent_eol_type = system_eol_type;
5931         }
5932       else
5933         parent_eol_type = system_eol_type;
5934       if (EQ (parent_eol_type, Qunix))
5935         coding_system = AREF (eol_type, 0);
5936       else if (EQ (parent_eol_type, Qdos))
5937         coding_system = AREF (eol_type, 1);
5938       else if (EQ (parent_eol_type, Qmac))
5939         coding_system = AREF (eol_type, 2);
5940     }
5941   return coding_system;
5942 }
5943
5944
5945 /* Check if text-conversion and eol-conversion of CODING_SYSTEM are
5946    decided for writing to a process.  If not, complement them, and
5947    return a new coding system.  */
5948
5949 Lisp_Object
5950 complement_process_encoding_system (Lisp_Object coding_system)
5951 {
5952   Lisp_Object coding_base = Qnil, eol_base = Qnil;
5953   Lisp_Object spec, attrs;
5954   int i;
5955
5956   for (i = 0; i < 3; i++)
5957     {
5958       if (i == 1)
5959         coding_system = CDR_SAFE (Vdefault_process_coding_system);
5960       else if (i == 2)
5961         coding_system = preferred_coding_system ();
5962       spec = CODING_SYSTEM_SPEC (coding_system);
5963       if (NILP (spec))
5964         continue;
5965       attrs = AREF (spec, 0);
5966       if (NILP (coding_base) && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
5967         coding_base = CODING_ATTR_BASE_NAME (attrs);
5968       if (NILP (eol_base) && ! VECTORP (AREF (spec, 2)))
5969         eol_base = coding_system;
5970       if (! NILP (coding_base) && ! NILP (eol_base))
5971         break;
5972     }
5973
5974   if (i > 0)
5975     /* The original CODING_SYSTEM didn't specify text-conversion or
5976        eol-conversion.  Be sure that we return a fully complemented
5977        coding system.  */
5978     coding_system = coding_inherit_eol_type (coding_base, eol_base);
5979   return coding_system;
5980 }
5981
5982
5983 /* Emacs has a mechanism to automatically detect a coding system if it
5984    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
5985    it's impossible to distinguish some coding systems accurately
5986    because they use the same range of codes.  So, at first, coding
5987    systems are categorized into 7, those are:
5988
5989    o coding-category-emacs-mule
5990
5991         The category for a coding system which has the same code range
5992         as Emacs' internal format.  Assigned the coding-system (Lisp
5993         symbol) `emacs-mule' by default.
5994
5995    o coding-category-sjis
5996
5997         The category for a coding system which has the same code range
5998         as SJIS.  Assigned the coding-system (Lisp
5999         symbol) `japanese-shift-jis' by default.
6000
6001    o coding-category-iso-7
6002
6003         The category for a coding system which has the same code range
6004         as ISO2022 of 7-bit environment.  This doesn't use any locking
6005         shift and single shift functions.  This can encode/decode all
6006         charsets.  Assigned the coding-system (Lisp symbol)
6007         `iso-2022-7bit' by default.
6008
6009    o coding-category-iso-7-tight
6010
6011         Same as coding-category-iso-7 except that this can
6012         encode/decode only the specified charsets.
6013
6014    o coding-category-iso-8-1
6015
6016         The category for a coding system which has the same code range
6017         as ISO2022 of 8-bit environment and graphic plane 1 used only
6018         for DIMENSION1 charset.  This doesn't use any locking shift
6019         and single shift functions.  Assigned the coding-system (Lisp
6020         symbol) `iso-latin-1' by default.
6021
6022    o coding-category-iso-8-2
6023
6024         The category for a coding system which has the same code range
6025         as ISO2022 of 8-bit environment and graphic plane 1 used only
6026         for DIMENSION2 charset.  This doesn't use any locking shift
6027         and single shift functions.  Assigned the coding-system (Lisp
6028         symbol) `japanese-iso-8bit' by default.
6029
6030    o coding-category-iso-7-else
6031
6032         The category for a coding system which has the same code range
6033         as ISO2022 of 7-bit environment but uses locking shift or
6034         single shift functions.  Assigned the coding-system (Lisp
6035         symbol) `iso-2022-7bit-lock' by default.
6036
6037    o coding-category-iso-8-else
6038
6039         The category for a coding system which has the same code range
6040         as ISO2022 of 8-bit environment but uses locking shift or
6041         single shift functions.  Assigned the coding-system (Lisp
6042         symbol) `iso-2022-8bit-ss2' by default.
6043
6044    o coding-category-big5
6045
6046         The category for a coding system which has the same code range
6047         as BIG5.  Assigned the coding-system (Lisp symbol)
6048         `cn-big5' by default.
6049
6050    o coding-category-utf-8
6051
6052         The category for a coding system which has the same code range
6053         as UTF-8 (cf. RFC3629).  Assigned the coding-system (Lisp
6054         symbol) `utf-8' by default.
6055
6056    o coding-category-utf-16-be
6057
6058         The category for a coding system in which a text has an
6059         Unicode signature (cf. Unicode Standard) in the order of BIG
6060         endian at the head.  Assigned the coding-system (Lisp symbol)
6061         `utf-16-be' by default.
6062
6063    o coding-category-utf-16-le
6064
6065         The category for a coding system in which a text has an
6066         Unicode signature (cf. Unicode Standard) in the order of
6067         LITTLE endian at the head.  Assigned the coding-system (Lisp
6068         symbol) `utf-16-le' by default.
6069
6070    o coding-category-ccl
6071
6072         The category for a coding system of which encoder/decoder is
6073         written in CCL programs.  The default value is nil, i.e., no
6074         coding system is assigned.
6075
6076    o coding-category-binary
6077
6078         The category for a coding system not categorized in any of the
6079         above.  Assigned the coding-system (Lisp symbol)
6080         `no-conversion' by default.
6081
6082    Each of them is a Lisp symbol and the value is an actual
6083    `coding-system's (this is also a Lisp symbol) assigned by a user.
6084    What Emacs does actually is to detect a category of coding system.
6085    Then, it uses a `coding-system' assigned to it.  If Emacs can't
6086    decide only one possible category, it selects a category of the
6087    highest priority.  Priorities of categories are also specified by a
6088    user in a Lisp variable `coding-category-list'.
6089
6090 */
6091
6092 #define EOL_SEEN_NONE   0
6093 #define EOL_SEEN_LF     1
6094 #define EOL_SEEN_CR     2
6095 #define EOL_SEEN_CRLF   4
6096
6097 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
6098    SOURCE is encoded.  If CATEGORY is one of
6099    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
6100    two-byte, else they are encoded by one-byte.
6101
6102    Return one of EOL_SEEN_XXX.  */
6103
6104 #define MAX_EOL_CHECK_COUNT 3
6105
6106 static int
6107 detect_eol (const unsigned char *source, EMACS_INT src_bytes,
6108             enum coding_category category)
6109 {
6110   const unsigned char *src = source, *src_end = src + src_bytes;
6111   unsigned char c;
6112   int total  = 0;
6113   int eol_seen = EOL_SEEN_NONE;
6114
6115   if ((1 << category) & CATEGORY_MASK_UTF_16)
6116     {
6117       int msb, lsb;
6118
6119       msb = category == (coding_category_utf_16_le
6120                          | coding_category_utf_16_le_nosig);
6121       lsb = 1 - msb;
6122
6123       while (src + 1 < src_end)
6124         {
6125           c = src[lsb];
6126           if (src[msb] == 0 && (c == '\n' || c == '\r'))
6127             {
6128               int this_eol;
6129
6130               if (c == '\n')
6131                 this_eol = EOL_SEEN_LF;
6132               else if (src + 3 >= src_end
6133                        || src[msb + 2] != 0
6134                        || src[lsb + 2] != '\n')
6135                 this_eol = EOL_SEEN_CR;
6136               else
6137                 {
6138                   this_eol = EOL_SEEN_CRLF;
6139                   src += 2;
6140                 }
6141
6142               if (eol_seen == EOL_SEEN_NONE)
6143                 /* This is the first end-of-line.  */
6144                 eol_seen = this_eol;
6145               else if (eol_seen != this_eol)
6146                 {
6147                   /* The found type is different from what found before.
6148                      Allow for stray ^M characters in DOS EOL files.  */
6149                   if ((eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF)
6150                       || (eol_seen == EOL_SEEN_CRLF
6151                           && this_eol == EOL_SEEN_CR))
6152                     eol_seen = EOL_SEEN_CRLF;
6153                   else
6154                     {
6155                       eol_seen = EOL_SEEN_LF;
6156                       break;
6157                     }
6158                 }
6159               if (++total == MAX_EOL_CHECK_COUNT)
6160                 break;
6161             }
6162           src += 2;
6163         }
6164     }
6165   else
6166     while (src < src_end)
6167       {
6168         c = *src++;
6169         if (c == '\n' || c == '\r')
6170           {
6171             int this_eol;
6172
6173             if (c == '\n')
6174               this_eol = EOL_SEEN_LF;
6175             else if (src >= src_end || *src != '\n')
6176               this_eol = EOL_SEEN_CR;
6177             else
6178               this_eol = EOL_SEEN_CRLF, src++;
6179
6180             if (eol_seen == EOL_SEEN_NONE)
6181               /* This is the first end-of-line.  */
6182               eol_seen = this_eol;
6183             else if (eol_seen != this_eol)
6184               {
6185                 /* The found type is different from what found before.
6186                    Allow for stray ^M characters in DOS EOL files.  */
6187                 if ((eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF)
6188                     || (eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR))
6189                   eol_seen = EOL_SEEN_CRLF;
6190                 else
6191                   {
6192                     eol_seen = EOL_SEEN_LF;
6193                     break;
6194                   }
6195               }
6196             if (++total == MAX_EOL_CHECK_COUNT)
6197               break;
6198           }
6199       }
6200   return eol_seen;
6201 }
6202
6203
6204 static Lisp_Object
6205 adjust_coding_eol_type (struct coding_system *coding, int eol_seen)
6206 {
6207   Lisp_Object eol_type;
6208
6209   eol_type = CODING_ID_EOL_TYPE (coding->id);
6210   if (eol_seen & EOL_SEEN_LF)
6211     {
6212       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
6213       eol_type = Qunix;
6214     }
6215   else if (eol_seen & EOL_SEEN_CRLF)
6216     {
6217       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
6218       eol_type = Qdos;
6219     }
6220   else if (eol_seen & EOL_SEEN_CR)
6221     {
6222       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
6223       eol_type = Qmac;
6224     }
6225   return eol_type;
6226 }
6227
6228 /* Detect how a text specified in CODING is encoded.  If a coding
6229    system is detected, update fields of CODING by the detected coding
6230    system.  */
6231
6232 void
6233 detect_coding (struct coding_system *coding)
6234 {
6235   const unsigned char *src, *src_end;
6236   int saved_mode = coding->mode;
6237
6238   coding->consumed = coding->consumed_char = 0;
6239   coding->produced = coding->produced_char = 0;
6240   coding_set_source (coding);
6241
6242   src_end = coding->source + coding->src_bytes;
6243   coding->head_ascii = 0;
6244
6245   /* If we have not yet decided the text encoding type, detect it
6246      now.  */
6247   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
6248     {
6249       int c, i;
6250       struct coding_detection_info detect_info;
6251       int null_byte_found = 0, eight_bit_found = 0;
6252
6253       detect_info.checked = detect_info.found = detect_info.rejected = 0;
6254       for (src = coding->source; src < src_end; src++)
6255         {
6256           c = *src;
6257           if (c & 0x80)
6258             {
6259               eight_bit_found = 1;
6260               if (null_byte_found)
6261                 break;
6262             }
6263           else if (c < 0x20)
6264             {
6265               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
6266                   && ! inhibit_iso_escape_detection
6267                   && ! detect_info.checked)
6268                 {
6269                   if (detect_coding_iso_2022 (coding, &detect_info))
6270                     {
6271                       /* We have scanned the whole data.  */
6272                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
6273                         {
6274                           /* We didn't find an 8-bit code.  We may
6275                              have found a null-byte, but it's very
6276                              rare that a binary file conforms to
6277                              ISO-2022.  */
6278                           src = src_end;
6279                           coding->head_ascii = src - coding->source;
6280                         }
6281                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
6282                       break;
6283                     }
6284                 }
6285               else if (! c && !inhibit_null_byte_detection)
6286                 {
6287                   null_byte_found = 1;
6288                   if (eight_bit_found)
6289                     break;
6290                 }
6291               if (! eight_bit_found)
6292                 coding->head_ascii++;
6293             }
6294           else if (! eight_bit_found)
6295             coding->head_ascii++;
6296         }
6297
6298       if (null_byte_found || eight_bit_found
6299           || coding->head_ascii < coding->src_bytes
6300           || detect_info.found)
6301         {
6302           enum coding_category category;
6303           struct coding_system *this;
6304
6305           if (coding->head_ascii == coding->src_bytes)
6306             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
6307             for (i = 0; i < coding_category_raw_text; i++)
6308               {
6309                 category = coding_priorities[i];
6310                 this = coding_categories + category;
6311                 if (detect_info.found & (1 << category))
6312                   break;
6313               }
6314           else
6315             {
6316               if (null_byte_found)
6317                 {
6318                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
6319                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
6320                 }
6321               for (i = 0; i < coding_category_raw_text; i++)
6322                 {
6323                   category = coding_priorities[i];
6324                   this = coding_categories + category;
6325                   if (this->id < 0)
6326                     {
6327                       /* No coding system of this category is defined.  */
6328                       detect_info.rejected |= (1 << category);
6329                     }
6330                   else if (category >= coding_category_raw_text)
6331                     continue;
6332                   else if (detect_info.checked & (1 << category))
6333                     {
6334                       if (detect_info.found & (1 << category))
6335                         break;
6336                     }
6337                   else if ((*(this->detector)) (coding, &detect_info)
6338                            && detect_info.found & (1 << category))
6339                     {
6340                       if (category == coding_category_utf_16_auto)
6341                         {
6342                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6343                             category = coding_category_utf_16_le;
6344                           else
6345                             category = coding_category_utf_16_be;
6346                         }
6347                       break;
6348                     }
6349                 }
6350             }
6351
6352           if (i < coding_category_raw_text)
6353             setup_coding_system (CODING_ID_NAME (this->id), coding);
6354           else if (null_byte_found)
6355             setup_coding_system (Qno_conversion, coding);
6356           else if ((detect_info.rejected & CATEGORY_MASK_ANY)
6357                    == CATEGORY_MASK_ANY)
6358             setup_coding_system (Qraw_text, coding);
6359           else if (detect_info.rejected)
6360             for (i = 0; i < coding_category_raw_text; i++)
6361               if (! (detect_info.rejected & (1 << coding_priorities[i])))
6362                 {
6363                   this = coding_categories + coding_priorities[i];
6364                   setup_coding_system (CODING_ID_NAME (this->id), coding);
6365                   break;
6366                 }
6367         }
6368     }
6369   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6370            == coding_category_utf_8_auto)
6371     {
6372       Lisp_Object coding_systems;
6373       struct coding_detection_info detect_info;
6374
6375       coding_systems
6376         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6377       detect_info.found = detect_info.rejected = 0;
6378       coding->head_ascii = 0;
6379       if (CONSP (coding_systems)
6380           && detect_coding_utf_8 (coding, &detect_info))
6381         {
6382           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
6383             setup_coding_system (XCAR (coding_systems), coding);
6384           else
6385             setup_coding_system (XCDR (coding_systems), coding);
6386         }
6387     }
6388   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6389            == coding_category_utf_16_auto)
6390     {
6391       Lisp_Object coding_systems;
6392       struct coding_detection_info detect_info;
6393
6394       coding_systems
6395         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6396       detect_info.found = detect_info.rejected = 0;
6397       coding->head_ascii = 0;
6398       if (CONSP (coding_systems)
6399           && detect_coding_utf_16 (coding, &detect_info))
6400         {
6401           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6402             setup_coding_system (XCAR (coding_systems), coding);
6403           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
6404             setup_coding_system (XCDR (coding_systems), coding);
6405         }
6406     }
6407   coding->mode = saved_mode;
6408 }
6409
6410
6411 static void
6412 decode_eol (struct coding_system *coding)
6413 {
6414   Lisp_Object eol_type;
6415   unsigned char *p, *pbeg, *pend;
6416
6417   eol_type = CODING_ID_EOL_TYPE (coding->id);
6418   if (EQ (eol_type, Qunix) || inhibit_eol_conversion)
6419     return;
6420
6421   if (NILP (coding->dst_object))
6422     pbeg = coding->destination;
6423   else
6424     pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
6425   pend = pbeg + coding->produced;
6426
6427   if (VECTORP (eol_type))
6428     {
6429       int eol_seen = EOL_SEEN_NONE;
6430
6431       for (p = pbeg; p < pend; p++)
6432         {
6433           if (*p == '\n')
6434             eol_seen |= EOL_SEEN_LF;
6435           else if (*p == '\r')
6436             {
6437               if (p + 1 < pend && *(p + 1) == '\n')
6438                 {
6439                   eol_seen |= EOL_SEEN_CRLF;
6440                   p++;
6441                 }
6442               else
6443                 eol_seen |= EOL_SEEN_CR;
6444             }
6445         }
6446       /* Handle DOS-style EOLs in a file with stray ^M characters.  */
6447       if ((eol_seen & EOL_SEEN_CRLF) != 0
6448           && (eol_seen & EOL_SEEN_CR) != 0
6449           && (eol_seen & EOL_SEEN_LF) == 0)
6450         eol_seen = EOL_SEEN_CRLF;
6451       else if (eol_seen != EOL_SEEN_NONE
6452           && eol_seen != EOL_SEEN_LF
6453           && eol_seen != EOL_SEEN_CRLF
6454           && eol_seen != EOL_SEEN_CR)
6455         eol_seen = EOL_SEEN_LF;
6456       if (eol_seen != EOL_SEEN_NONE)
6457         eol_type = adjust_coding_eol_type (coding, eol_seen);
6458     }
6459
6460   if (EQ (eol_type, Qmac))
6461     {
6462       for (p = pbeg; p < pend; p++)
6463         if (*p == '\r')
6464           *p = '\n';
6465     }
6466   else if (EQ (eol_type, Qdos))
6467     {
6468       int n = 0;
6469
6470       if (NILP (coding->dst_object))
6471         {
6472           /* Start deleting '\r' from the tail to minimize the memory
6473              movement.  */
6474           for (p = pend - 2; p >= pbeg; p--)
6475             if (*p == '\r')
6476               {
6477                 memmove (p, p + 1, pend-- - p - 1);
6478                 n++;
6479               }
6480         }
6481       else
6482         {
6483           int pos_byte = coding->dst_pos_byte;
6484           int pos = coding->dst_pos;
6485           int pos_end = pos + coding->produced_char - 1;
6486
6487           while (pos < pos_end)
6488             {
6489               p = BYTE_POS_ADDR (pos_byte);
6490               if (*p == '\r' && p[1] == '\n')
6491                 {
6492                   del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
6493                   n++;
6494                   pos_end--;
6495                 }
6496               pos++;
6497               if (coding->dst_multibyte)
6498                 pos_byte += BYTES_BY_CHAR_HEAD (*p);
6499               else
6500                 pos_byte++;
6501             }
6502         }
6503       coding->produced -= n;
6504       coding->produced_char -= n;
6505     }
6506 }
6507
6508
6509 /* Return a translation table (or list of them) from coding system
6510    attribute vector ATTRS for encoding (ENCODEP is nonzero) or
6511    decoding (ENCODEP is zero). */
6512
6513 static Lisp_Object
6514 get_translation_table (Lisp_Object attrs, int encodep, int *max_lookup)
6515 {
6516   Lisp_Object standard, translation_table;
6517   Lisp_Object val;
6518
6519   if (NILP (Venable_character_translation))
6520     {
6521       if (max_lookup)
6522         *max_lookup = 0;
6523       return Qnil;
6524     }
6525   if (encodep)
6526     translation_table = CODING_ATTR_ENCODE_TBL (attrs),
6527       standard = Vstandard_translation_table_for_encode;
6528   else
6529     translation_table = CODING_ATTR_DECODE_TBL (attrs),
6530       standard = Vstandard_translation_table_for_decode;
6531   if (NILP (translation_table))
6532     translation_table = standard;
6533   else
6534     {
6535       if (SYMBOLP (translation_table))
6536         translation_table = Fget (translation_table, Qtranslation_table);
6537       else if (CONSP (translation_table))
6538         {
6539           translation_table = Fcopy_sequence (translation_table);
6540           for (val = translation_table; CONSP (val); val = XCDR (val))
6541             if (SYMBOLP (XCAR (val)))
6542               XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
6543         }
6544       if (CHAR_TABLE_P (standard))
6545         {
6546           if (CONSP (translation_table))
6547             translation_table = nconc2 (translation_table,
6548                                         Fcons (standard, Qnil));
6549           else
6550             translation_table = Fcons (translation_table,
6551                                        Fcons (standard, Qnil));
6552         }
6553     }
6554
6555   if (max_lookup)
6556     {
6557       *max_lookup = 1;
6558       if (CHAR_TABLE_P (translation_table)
6559           && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
6560         {
6561           val = XCHAR_TABLE (translation_table)->extras[1];
6562           if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6563             *max_lookup = XFASTINT (val);
6564         }
6565       else if (CONSP (translation_table))
6566         {
6567           Lisp_Object tail;
6568
6569           for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
6570             if (CHAR_TABLE_P (XCAR (tail))
6571                 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
6572               {
6573                 Lisp_Object tailval = XCHAR_TABLE (XCAR (tail))->extras[1];
6574                 if (NATNUMP (tailval) && *max_lookup < XFASTINT (tailval))
6575                   *max_lookup = XFASTINT (tailval);
6576               }
6577         }
6578     }
6579   return translation_table;
6580 }
6581
6582 #define LOOKUP_TRANSLATION_TABLE(table, c, trans)               \
6583   do {                                                          \
6584     trans = Qnil;                                               \
6585     if (CHAR_TABLE_P (table))                                   \
6586       {                                                         \
6587         trans = CHAR_TABLE_REF (table, c);                      \
6588         if (CHARACTERP (trans))                                 \
6589           c = XFASTINT (trans), trans = Qnil;                   \
6590       }                                                         \
6591     else if (CONSP (table))                                     \
6592       {                                                         \
6593         Lisp_Object tail;                                       \
6594                                                                 \
6595         for (tail = table; CONSP (tail); tail = XCDR (tail))    \
6596           if (CHAR_TABLE_P (XCAR (tail)))                       \
6597             {                                                   \
6598               trans = CHAR_TABLE_REF (XCAR (tail), c);          \
6599               if (CHARACTERP (trans))                           \
6600                 c = XFASTINT (trans), trans = Qnil;             \
6601               else if (! NILP (trans))                          \
6602                 break;                                          \
6603             }                                                   \
6604       }                                                         \
6605   } while (0)
6606
6607
6608 /* Return a translation of character(s) at BUF according to TRANS.
6609    TRANS is TO-CHAR or ((FROM .  TO) ...) where
6610    FROM = [FROM-CHAR ...], TO is TO-CHAR or [TO-CHAR ...].
6611    The return value is TO-CHAR or ([FROM-CHAR ...] . TO) if a
6612    translation is found, and Qnil if not found..
6613    If BUF is too short to lookup characters in FROM, return Qt.  */
6614
6615 static Lisp_Object
6616 get_translation (Lisp_Object trans, int *buf, int *buf_end)
6617 {
6618
6619   if (INTEGERP (trans))
6620     return trans;
6621   for (; CONSP (trans); trans = XCDR (trans))
6622     {
6623       Lisp_Object val = XCAR (trans);
6624       Lisp_Object from = XCAR (val);
6625       int len = ASIZE (from);
6626       int i;
6627
6628       for (i = 0; i < len; i++)
6629         {
6630           if (buf + i == buf_end)
6631             return Qt;
6632           if (XINT (AREF (from, i)) != buf[i])
6633             break;
6634         }
6635       if (i == len)
6636         return val;
6637     }
6638   return Qnil;
6639 }
6640
6641
6642 static int
6643 produce_chars (struct coding_system *coding, Lisp_Object translation_table,
6644                int last_block)
6645 {
6646   unsigned char *dst = coding->destination + coding->produced;
6647   unsigned char *dst_end = coding->destination + coding->dst_bytes;
6648   EMACS_INT produced;
6649   EMACS_INT produced_chars = 0;
6650   int carryover = 0;
6651
6652   if (! coding->chars_at_source)
6653     {
6654       /* Source characters are in coding->charbuf.  */
6655       int *buf = coding->charbuf;
6656       int *buf_end = buf + coding->charbuf_used;
6657
6658       if (EQ (coding->src_object, coding->dst_object))
6659         {
6660           coding_set_source (coding);
6661           dst_end = ((unsigned char *) coding->source) + coding->consumed;
6662         }
6663
6664       while (buf < buf_end)
6665         {
6666           int c = *buf, i;
6667
6668           if (c >= 0)
6669             {
6670               int from_nchars = 1, to_nchars = 1;
6671               Lisp_Object trans = Qnil;
6672
6673               LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6674               if (! NILP (trans))
6675                 {
6676                   trans = get_translation (trans, buf, buf_end);
6677                   if (INTEGERP (trans))
6678                     c = XINT (trans);
6679                   else if (CONSP (trans))
6680                     {
6681                       from_nchars = ASIZE (XCAR (trans));
6682                       trans = XCDR (trans);
6683                       if (INTEGERP (trans))
6684                         c = XINT (trans);
6685                       else
6686                         {
6687                           to_nchars = ASIZE (trans);
6688                           c = XINT (AREF (trans, 0));
6689                         }
6690                     }
6691                   else if (EQ (trans, Qt) && ! last_block)
6692                     break;
6693                 }
6694
6695               if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
6696                 {
6697                   dst = alloc_destination (coding,
6698                                            buf_end - buf
6699                                            + MAX_MULTIBYTE_LENGTH * to_nchars,
6700                                            dst);
6701                   if (EQ (coding->src_object, coding->dst_object))
6702                     {
6703                       coding_set_source (coding);
6704                       dst_end = (((unsigned char *) coding->source)
6705                                  + coding->consumed);
6706                     }
6707                   else
6708                     dst_end = coding->destination + coding->dst_bytes;
6709                 }
6710
6711               for (i = 0; i < to_nchars; i++)
6712                 {
6713                   if (i > 0)
6714                     c = XINT (AREF (trans, i));
6715                   if (coding->dst_multibyte
6716                       || ! CHAR_BYTE8_P (c))
6717                     CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
6718                   else
6719                     *dst++ = CHAR_TO_BYTE8 (c);
6720                 }
6721               produced_chars += to_nchars;
6722               buf += from_nchars;
6723             }
6724           else
6725             /* This is an annotation datum.  (-C) is the length.  */
6726             buf += -c;
6727         }
6728       carryover = buf_end - buf;
6729     }
6730   else
6731     {
6732       /* Source characters are at coding->source.  */
6733       const unsigned char *src = coding->source;
6734       const unsigned char *src_end = src + coding->consumed;
6735
6736       if (EQ (coding->dst_object, coding->src_object))
6737         dst_end = (unsigned char *) src;
6738       if (coding->src_multibyte != coding->dst_multibyte)
6739         {
6740           if (coding->src_multibyte)
6741             {
6742               int multibytep = 1;
6743               EMACS_INT consumed_chars = 0;
6744
6745               while (1)
6746                 {
6747                   const unsigned char *src_base = src;
6748                   int c;
6749
6750                   ONE_MORE_BYTE (c);
6751                   if (dst == dst_end)
6752                     {
6753                       if (EQ (coding->src_object, coding->dst_object))
6754                         dst_end = (unsigned char *) src;
6755                       if (dst == dst_end)
6756                         {
6757                           EMACS_INT offset = src - coding->source;
6758
6759                           dst = alloc_destination (coding, src_end - src + 1,
6760                                                    dst);
6761                           dst_end = coding->destination + coding->dst_bytes;
6762                           coding_set_source (coding);
6763                           src = coding->source + offset;
6764                           src_end = coding->source + coding->src_bytes;
6765                           if (EQ (coding->src_object, coding->dst_object))
6766                             dst_end = (unsigned char *) src;
6767                         }
6768                     }
6769                   *dst++ = c;
6770                   produced_chars++;
6771                 }
6772             no_more_source:
6773               ;
6774             }
6775           else
6776             while (src < src_end)
6777               {
6778                 int multibytep = 1;
6779                 int c = *src++;
6780
6781                 if (dst >= dst_end - 1)
6782                   {
6783                     if (EQ (coding->src_object, coding->dst_object))
6784                       dst_end = (unsigned char *) src;
6785                     if (dst >= dst_end - 1)
6786                       {
6787                         EMACS_INT offset = src - coding->source;
6788                         EMACS_INT more_bytes;
6789
6790                         if (EQ (coding->src_object, coding->dst_object))
6791                           more_bytes = ((src_end - src) / 2) + 2;
6792                         else
6793                           more_bytes = src_end - src + 2;
6794                         dst = alloc_destination (coding, more_bytes, dst);
6795                         dst_end = coding->destination + coding->dst_bytes;
6796                         coding_set_source (coding);
6797                         src = coding->source + offset;
6798                         src_end = coding->source + coding->src_bytes;
6799                         if (EQ (coding->src_object, coding->dst_object))
6800                           dst_end = (unsigned char *) src;
6801                       }
6802                   }
6803                 EMIT_ONE_BYTE (c);
6804               }
6805         }
6806       else
6807         {
6808           if (!EQ (coding->src_object, coding->dst_object))
6809             {
6810               EMACS_INT require = coding->src_bytes - coding->dst_bytes;
6811
6812               if (require > 0)
6813                 {
6814                   EMACS_INT offset = src - coding->source;
6815
6816                   dst = alloc_destination (coding, require, dst);
6817                   coding_set_source (coding);
6818                   src = coding->source + offset;
6819                   src_end = coding->source + coding->src_bytes;
6820                 }
6821             }
6822           produced_chars = coding->consumed_char;
6823           while (src < src_end)
6824             *dst++ = *src++;
6825         }
6826     }
6827
6828   produced = dst - (coding->destination + coding->produced);
6829   if (BUFFERP (coding->dst_object) && produced_chars > 0)
6830     insert_from_gap (produced_chars, produced);
6831   coding->produced += produced;
6832   coding->produced_char += produced_chars;
6833   return carryover;
6834 }
6835
6836 /* Compose text in CODING->object according to the annotation data at
6837    CHARBUF.  CHARBUF is an array:
6838      [ -LENGTH ANNOTATION_MASK NCHARS NBYTES METHOD [ COMPONENTS... ] ]
6839  */
6840
6841 static INLINE void
6842 produce_composition (struct coding_system *coding, int *charbuf, EMACS_INT pos)
6843 {
6844   int len;
6845   EMACS_INT to;
6846   enum composition_method method;
6847   Lisp_Object components;
6848
6849   len = -charbuf[0] - MAX_ANNOTATION_LENGTH;
6850   to = pos + charbuf[2];
6851   method = (enum composition_method) (charbuf[4]);
6852
6853   if (method == COMPOSITION_RELATIVE)
6854     components = Qnil;
6855   else
6856     {
6857       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
6858       int i, j;
6859
6860       if (method == COMPOSITION_WITH_RULE)
6861         len = charbuf[2] * 3 - 2;
6862       charbuf += MAX_ANNOTATION_LENGTH;
6863       /* charbuf = [ CHRA ... CHAR] or [ CHAR -2 RULE ... CHAR ] */
6864       for (i = j = 0; i < len && charbuf[i] != -1; i++, j++)
6865         {
6866           if (charbuf[i] >= 0)
6867             args[j] = make_number (charbuf[i]);
6868           else
6869             {
6870               i++;
6871               args[j] = make_number (charbuf[i] % 0x100);
6872             }
6873         }
6874       components = (i == j ? Fstring (j, args) : Fvector (j, args));
6875     }
6876   compose_text (pos, to, components, Qnil, coding->dst_object);
6877 }
6878
6879
6880 /* Put `charset' property on text in CODING->object according to
6881    the annotation data at CHARBUF.  CHARBUF is an array:
6882      [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
6883  */
6884
6885 static INLINE void
6886 produce_charset (struct coding_system *coding, int *charbuf, EMACS_INT pos)
6887 {
6888   EMACS_INT from = pos - charbuf[2];
6889   struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
6890
6891   Fput_text_property (make_number (from), make_number (pos),
6892                       Qcharset, CHARSET_NAME (charset),
6893                       coding->dst_object);
6894 }
6895
6896
6897 #define CHARBUF_SIZE 0x4000
6898
6899 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
6900   do {                                                                  \
6901     int size = CHARBUF_SIZE;                                            \
6902                                                                         \
6903     coding->charbuf = NULL;                                             \
6904     while (size > 1024)                                                 \
6905       {                                                                 \
6906         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
6907         if (coding->charbuf)                                            \
6908           break;                                                        \
6909         size >>= 1;                                                     \
6910       }                                                                 \
6911     if (! coding->charbuf)                                              \
6912       {                                                                 \
6913         record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
6914         return coding->result;                                          \
6915       }                                                                 \
6916     coding->charbuf_size = size;                                        \
6917   } while (0)
6918
6919
6920 static void
6921 produce_annotation (struct coding_system *coding, EMACS_INT pos)
6922 {
6923   int *charbuf = coding->charbuf;
6924   int *charbuf_end = charbuf + coding->charbuf_used;
6925
6926   if (NILP (coding->dst_object))
6927     return;
6928
6929   while (charbuf < charbuf_end)
6930     {
6931       if (*charbuf >= 0)
6932         pos++, charbuf++;
6933       else
6934         {
6935           int len = -*charbuf;
6936
6937           if (len > 2)
6938             switch (charbuf[1])
6939               {
6940               case CODING_ANNOTATE_COMPOSITION_MASK:
6941                 produce_composition (coding, charbuf, pos);
6942                 break;
6943               case CODING_ANNOTATE_CHARSET_MASK:
6944                 produce_charset (coding, charbuf, pos);
6945                 break;
6946               }
6947           charbuf += len;
6948         }
6949     }
6950 }
6951
6952 /* Decode the data at CODING->src_object into CODING->dst_object.
6953    CODING->src_object is a buffer, a string, or nil.
6954    CODING->dst_object is a buffer.
6955
6956    If CODING->src_object is a buffer, it must be the current buffer.
6957    In this case, if CODING->src_pos is positive, it is a position of
6958    the source text in the buffer, otherwise, the source text is in the
6959    gap area of the buffer, and CODING->src_pos specifies the offset of
6960    the text from GPT (which must be the same as PT).  If this is the
6961    same buffer as CODING->dst_object, CODING->src_pos must be
6962    negative.
6963
6964    If CODING->src_object is a string, CODING->src_pos is an index to
6965    that string.
6966
6967    If CODING->src_object is nil, CODING->source must already point to
6968    the non-relocatable memory area.  In this case, CODING->src_pos is
6969    an offset from CODING->source.
6970
6971    The decoded data is inserted at the current point of the buffer
6972    CODING->dst_object.
6973 */
6974
6975 static int
6976 decode_coding (struct coding_system *coding)
6977 {
6978   Lisp_Object attrs;
6979   Lisp_Object undo_list;
6980   Lisp_Object translation_table;
6981   struct ccl_spec cclspec;
6982   int carryover;
6983   int i;
6984
6985   if (BUFFERP (coding->src_object)
6986       && coding->src_pos > 0
6987       && coding->src_pos < GPT
6988       && coding->src_pos + coding->src_chars > GPT)
6989     move_gap_both (coding->src_pos, coding->src_pos_byte);
6990
6991   undo_list = Qt;
6992   if (BUFFERP (coding->dst_object))
6993     {
6994       if (current_buffer != XBUFFER (coding->dst_object))
6995         set_buffer_internal (XBUFFER (coding->dst_object));
6996       if (GPT != PT)
6997         move_gap_both (PT, PT_BYTE);
6998       undo_list = BVAR (current_buffer, undo_list);
6999       BVAR (current_buffer, undo_list) = Qt;
7000     }
7001
7002   coding->consumed = coding->consumed_char = 0;
7003   coding->produced = coding->produced_char = 0;
7004   coding->chars_at_source = 0;
7005   record_conversion_result (coding, CODING_RESULT_SUCCESS);
7006   coding->errors = 0;
7007
7008   ALLOC_CONVERSION_WORK_AREA (coding);
7009
7010   attrs = CODING_ID_ATTRS (coding->id);
7011   translation_table = get_translation_table (attrs, 0, NULL);
7012
7013   carryover = 0;
7014   if (coding->decoder == decode_coding_ccl)
7015     {
7016       coding->spec.ccl = &cclspec;
7017       setup_ccl_program (&cclspec.ccl, CODING_CCL_DECODER (coding));
7018     }
7019   do
7020     {
7021       EMACS_INT pos = coding->dst_pos + coding->produced_char;
7022
7023       coding_set_source (coding);
7024       coding->annotated = 0;
7025       coding->charbuf_used = carryover;
7026       (*(coding->decoder)) (coding);
7027       coding_set_destination (coding);
7028       carryover = produce_chars (coding, translation_table, 0);
7029       if (coding->annotated)
7030         produce_annotation (coding, pos);
7031       for (i = 0; i < carryover; i++)
7032         coding->charbuf[i]
7033           = coding->charbuf[coding->charbuf_used - carryover + i];
7034     }
7035   while (coding->result == CODING_RESULT_INSUFFICIENT_DST
7036          || (coding->consumed < coding->src_bytes
7037              && (coding->result == CODING_RESULT_SUCCESS
7038                  || coding->result == CODING_RESULT_INVALID_SRC)));
7039
7040   if (carryover > 0)
7041     {
7042       coding_set_destination (coding);
7043       coding->charbuf_used = carryover;
7044       produce_chars (coding, translation_table, 1);
7045     }
7046
7047   coding->carryover_bytes = 0;
7048   if (coding->consumed < coding->src_bytes)
7049     {
7050       int nbytes = coding->src_bytes - coding->consumed;
7051       const unsigned char *src;
7052
7053       coding_set_source (coding);
7054       coding_set_destination (coding);
7055       src = coding->source + coding->consumed;
7056
7057       if (coding->mode & CODING_MODE_LAST_BLOCK)
7058         {
7059           /* Flush out unprocessed data as binary chars.  We are sure
7060              that the number of data is less than the size of
7061              coding->charbuf.  */
7062           coding->charbuf_used = 0;
7063           coding->chars_at_source = 0;
7064
7065           while (nbytes-- > 0)
7066             {
7067               int c = *src++;
7068
7069               if (c & 0x80)
7070                 c = BYTE8_TO_CHAR (c);
7071               coding->charbuf[coding->charbuf_used++] = c;
7072             }
7073           produce_chars (coding, Qnil, 1);
7074         }
7075       else
7076         {
7077           /* Record unprocessed bytes in coding->carryover.  We are
7078              sure that the number of data is less than the size of
7079              coding->carryover.  */
7080           unsigned char *p = coding->carryover;
7081
7082           if (nbytes > sizeof coding->carryover)
7083             nbytes = sizeof coding->carryover;
7084           coding->carryover_bytes = nbytes;
7085           while (nbytes-- > 0)
7086             *p++ = *src++;
7087         }
7088       coding->consumed = coding->src_bytes;
7089     }
7090
7091   if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix)
7092       && !inhibit_eol_conversion)
7093     decode_eol (coding);
7094   if (BUFFERP (coding->dst_object))
7095     {
7096       BVAR (current_buffer, undo_list) = undo_list;
7097       record_insert (coding->dst_pos, coding->produced_char);
7098     }
7099   return coding->result;
7100 }
7101
7102
7103 /* Extract an annotation datum from a composition starting at POS and
7104    ending before LIMIT of CODING->src_object (buffer or string), store
7105    the data in BUF, set *STOP to a starting position of the next
7106    composition (if any) or to LIMIT, and return the address of the
7107    next element of BUF.
7108
7109    If such an annotation is not found, set *STOP to a starting
7110    position of a composition after POS (if any) or to LIMIT, and
7111    return BUF.  */
7112
7113 static INLINE int *
7114 handle_composition_annotation (EMACS_INT pos, EMACS_INT limit,
7115                                struct coding_system *coding, int *buf,
7116                                EMACS_INT *stop)
7117 {
7118   EMACS_INT start, end;
7119   Lisp_Object prop;
7120
7121   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
7122       || end > limit)
7123     *stop = limit;
7124   else if (start > pos)
7125     *stop = start;
7126   else
7127     {
7128       if (start == pos)
7129         {
7130           /* We found a composition.  Store the corresponding
7131              annotation data in BUF.  */
7132           int *head = buf;
7133           enum composition_method method = COMPOSITION_METHOD (prop);
7134           int nchars = COMPOSITION_LENGTH (prop);
7135
7136           ADD_COMPOSITION_DATA (buf, nchars, 0, method);
7137           if (method != COMPOSITION_RELATIVE)
7138             {
7139               Lisp_Object components;
7140               int len, i, i_byte;
7141
7142               components = COMPOSITION_COMPONENTS (prop);
7143               if (VECTORP (components))
7144                 {
7145                   len = XVECTOR (components)->size;
7146                   for (i = 0; i < len; i++)
7147                     *buf++ = XINT (AREF (components, i));
7148                 }
7149               else if (STRINGP (components))
7150                 {
7151                   len = SCHARS (components);
7152                   i = i_byte = 0;
7153                   while (i < len)
7154                     {
7155                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
7156                       buf++;
7157                     }
7158                 }
7159               else if (INTEGERP (components))
7160                 {
7161                   len = 1;
7162                   *buf++ = XINT (components);
7163                 }
7164               else if (CONSP (components))
7165                 {
7166                   for (len = 0; CONSP (components);
7167                        len++, components = XCDR (components))
7168                     *buf++ = XINT (XCAR (components));
7169                 }
7170               else
7171                 abort ();
7172               *head -= len;
7173             }
7174         }
7175
7176       if (find_composition (end, limit, &start, &end, &prop,
7177                             coding->src_object)
7178           && end <= limit)
7179         *stop = start;
7180       else
7181         *stop = limit;
7182     }
7183   return buf;
7184 }
7185
7186
7187 /* Extract an annotation datum from a text property `charset' at POS of
7188    CODING->src_object (buffer of string), store the data in BUF, set
7189    *STOP to the position where the value of `charset' property changes
7190    (limiting by LIMIT), and return the address of the next element of
7191    BUF.
7192
7193    If the property value is nil, set *STOP to the position where the
7194    property value is non-nil (limiting by LIMIT), and return BUF.  */
7195
7196 static INLINE int *
7197 handle_charset_annotation (EMACS_INT pos, EMACS_INT limit,
7198                            struct coding_system *coding, int *buf,
7199                            EMACS_INT *stop)
7200 {
7201   Lisp_Object val, next;
7202   int id;
7203
7204   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
7205   if (! NILP (val) && CHARSETP (val))
7206     id = XINT (CHARSET_SYMBOL_ID (val));
7207   else
7208     id = -1;
7209   ADD_CHARSET_DATA (buf, 0, id);
7210   next = Fnext_single_property_change (make_number (pos), Qcharset,
7211                                        coding->src_object,
7212                                        make_number (limit));
7213   *stop = XINT (next);
7214   return buf;
7215 }
7216
7217
7218 static void
7219 consume_chars (struct coding_system *coding, Lisp_Object translation_table,
7220                int max_lookup)
7221 {
7222   int *buf = coding->charbuf;
7223   int *buf_end = coding->charbuf + coding->charbuf_size;
7224   const unsigned char *src = coding->source + coding->consumed;
7225   const unsigned char *src_end = coding->source + coding->src_bytes;
7226   EMACS_INT pos = coding->src_pos + coding->consumed_char;
7227   EMACS_INT end_pos = coding->src_pos + coding->src_chars;
7228   int multibytep = coding->src_multibyte;
7229   Lisp_Object eol_type;
7230   int c;
7231   EMACS_INT stop, stop_composition, stop_charset;
7232   int *lookup_buf = NULL;
7233
7234   if (! NILP (translation_table))
7235     lookup_buf = alloca (sizeof (int) * max_lookup);
7236
7237   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
7238   if (VECTORP (eol_type))
7239     eol_type = Qunix;
7240
7241   /* Note: composition handling is not yet implemented.  */
7242   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7243
7244   if (NILP (coding->src_object))
7245     stop = stop_composition = stop_charset = end_pos;
7246   else
7247     {
7248       if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
7249         stop = stop_composition = pos;
7250       else
7251         stop = stop_composition = end_pos;
7252       if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
7253         stop = stop_charset = pos;
7254       else
7255         stop_charset = end_pos;
7256     }
7257
7258   /* Compensate for CRLF and conversion.  */
7259   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
7260   while (buf < buf_end)
7261     {
7262       Lisp_Object trans;
7263
7264       if (pos == stop)
7265         {
7266           if (pos == end_pos)
7267             break;
7268           if (pos == stop_composition)
7269             buf = handle_composition_annotation (pos, end_pos, coding,
7270                                                  buf, &stop_composition);
7271           if (pos == stop_charset)
7272             buf = handle_charset_annotation (pos, end_pos, coding,
7273                                              buf, &stop_charset);
7274           stop = (stop_composition < stop_charset
7275                   ? stop_composition : stop_charset);
7276         }
7277
7278       if (! multibytep)
7279         {
7280           EMACS_INT bytes;
7281
7282           if (coding->encoder == encode_coding_raw_text
7283               || coding->encoder == encode_coding_ccl)
7284             c = *src++, pos++;
7285           else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
7286             c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
7287           else
7288             c = BYTE8_TO_CHAR (*src), src++, pos++;
7289         }
7290       else
7291         c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
7292       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
7293         c = '\n';
7294       if (! EQ (eol_type, Qunix))
7295         {
7296           if (c == '\n')
7297             {
7298               if (EQ (eol_type, Qdos))
7299                 *buf++ = '\r';
7300               else
7301                 c = '\r';
7302             }
7303         }
7304
7305       trans = Qnil;
7306       LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
7307       if (NILP (trans))
7308         *buf++ = c;
7309       else
7310         {
7311           int from_nchars = 1, to_nchars = 1;
7312           int *lookup_buf_end;
7313           const unsigned char *p = src;
7314           int i;
7315
7316           lookup_buf[0] = c;
7317           for (i = 1; i < max_lookup && p < src_end; i++)
7318             lookup_buf[i] = STRING_CHAR_ADVANCE (p);
7319           lookup_buf_end = lookup_buf + i;
7320           trans = get_translation (trans, lookup_buf, lookup_buf_end);
7321           if (INTEGERP (trans))
7322             c = XINT (trans);
7323           else if (CONSP (trans))
7324             {
7325               from_nchars = ASIZE (XCAR (trans));
7326               trans = XCDR (trans);
7327               if (INTEGERP (trans))
7328                 c = XINT (trans);
7329               else
7330                 {
7331                   to_nchars = ASIZE (trans);
7332                   if (buf + to_nchars > buf_end)
7333                     break;
7334                   c = XINT (AREF (trans, 0));
7335                 }
7336             }
7337           else
7338             break;
7339           *buf++ = c;
7340           for (i = 1; i < to_nchars; i++)
7341             *buf++ = XINT (AREF (trans, i));
7342           for (i = 1; i < from_nchars; i++, pos++)
7343             src += MULTIBYTE_LENGTH_NO_CHECK (src);
7344         }
7345     }
7346
7347   coding->consumed = src - coding->source;
7348   coding->consumed_char = pos - coding->src_pos;
7349   coding->charbuf_used = buf - coding->charbuf;
7350   coding->chars_at_source = 0;
7351 }
7352
7353
7354 /* Encode the text at CODING->src_object into CODING->dst_object.
7355    CODING->src_object is a buffer or a string.
7356    CODING->dst_object is a buffer or nil.
7357
7358    If CODING->src_object is a buffer, it must be the current buffer.
7359    In this case, if CODING->src_pos is positive, it is a position of
7360    the source text in the buffer, otherwise. the source text is in the
7361    gap area of the buffer, and coding->src_pos specifies the offset of
7362    the text from GPT (which must be the same as PT).  If this is the
7363    same buffer as CODING->dst_object, CODING->src_pos must be
7364    negative and CODING should not have `pre-write-conversion'.
7365
7366    If CODING->src_object is a string, CODING should not have
7367    `pre-write-conversion'.
7368
7369    If CODING->dst_object is a buffer, the encoded data is inserted at
7370    the current point of that buffer.
7371
7372    If CODING->dst_object is nil, the encoded data is placed at the
7373    memory area specified by CODING->destination.  */
7374
7375 static int
7376 encode_coding (struct coding_system *coding)
7377 {
7378   Lisp_Object attrs;
7379   Lisp_Object translation_table;
7380   int max_lookup;
7381   struct ccl_spec cclspec;
7382
7383   attrs = CODING_ID_ATTRS (coding->id);
7384   if (coding->encoder == encode_coding_raw_text)
7385     translation_table = Qnil, max_lookup = 0;
7386   else
7387     translation_table = get_translation_table (attrs, 1, &max_lookup);
7388
7389   if (BUFFERP (coding->dst_object))
7390     {
7391       set_buffer_internal (XBUFFER (coding->dst_object));
7392       coding->dst_multibyte
7393         = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
7394     }
7395
7396   coding->consumed = coding->consumed_char = 0;
7397   coding->produced = coding->produced_char = 0;
7398   record_conversion_result (coding, CODING_RESULT_SUCCESS);
7399   coding->errors = 0;
7400
7401   ALLOC_CONVERSION_WORK_AREA (coding);
7402
7403   if (coding->encoder == encode_coding_ccl)
7404     {
7405       coding->spec.ccl = &cclspec;
7406       setup_ccl_program (&cclspec.ccl, CODING_CCL_ENCODER (coding));
7407     }
7408   do {
7409     coding_set_source (coding);
7410     consume_chars (coding, translation_table, max_lookup);
7411     coding_set_destination (coding);
7412     (*(coding->encoder)) (coding);
7413   } while (coding->consumed_char < coding->src_chars);
7414
7415   if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
7416     insert_from_gap (coding->produced_char, coding->produced);
7417
7418   return (coding->result);
7419 }
7420
7421
7422 /* Name (or base name) of work buffer for code conversion.  */
7423 static Lisp_Object Vcode_conversion_workbuf_name;
7424
7425 /* A working buffer used by the top level conversion.  Once it is
7426    created, it is never destroyed.  It has the name
7427    Vcode_conversion_workbuf_name.  The other working buffers are
7428    destroyed after the use is finished, and their names are modified
7429    versions of Vcode_conversion_workbuf_name.  */
7430 static Lisp_Object Vcode_conversion_reused_workbuf;
7431
7432 /* 1 iff Vcode_conversion_reused_workbuf is already in use.  */
7433 static int reused_workbuf_in_use;
7434
7435
7436 /* Return a working buffer of code conversion.  MULTIBYTE specifies the
7437    multibyteness of returning buffer.  */
7438
7439 static Lisp_Object
7440 make_conversion_work_buffer (int multibyte)
7441 {
7442   Lisp_Object name, workbuf;
7443   struct buffer *current;
7444
7445   if (reused_workbuf_in_use++)
7446     {
7447       name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
7448       workbuf = Fget_buffer_create (name);
7449     }
7450   else
7451     {
7452       if (NILP (Fbuffer_live_p (Vcode_conversion_reused_workbuf)))
7453         Vcode_conversion_reused_workbuf
7454           = Fget_buffer_create (Vcode_conversion_workbuf_name);
7455       workbuf = Vcode_conversion_reused_workbuf;
7456     }
7457   current = current_buffer;
7458   set_buffer_internal (XBUFFER (workbuf));
7459   /* We can't allow modification hooks to run in the work buffer.  For
7460      instance, directory_files_internal assumes that file decoding
7461      doesn't compile new regexps.  */
7462   Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt);
7463   Ferase_buffer ();
7464   BVAR (current_buffer, undo_list) = Qt;
7465   BVAR (current_buffer, enable_multibyte_characters) = multibyte ? Qt : Qnil;
7466   set_buffer_internal (current);
7467   return workbuf;
7468 }
7469
7470
7471 static Lisp_Object
7472 code_conversion_restore (Lisp_Object arg)
7473 {
7474   Lisp_Object current, workbuf;
7475   struct gcpro gcpro1;
7476
7477   GCPRO1 (arg);
7478   current = XCAR (arg);
7479   workbuf = XCDR (arg);
7480   if (! NILP (workbuf))
7481     {
7482       if (EQ (workbuf, Vcode_conversion_reused_workbuf))
7483         reused_workbuf_in_use = 0;
7484       else if (! NILP (Fbuffer_live_p (workbuf)))
7485         Fkill_buffer (workbuf);
7486     }
7487   set_buffer_internal (XBUFFER (current));
7488   UNGCPRO;
7489   return Qnil;
7490 }
7491
7492 Lisp_Object
7493 code_conversion_save (int with_work_buf, int multibyte)
7494 {
7495   Lisp_Object workbuf = Qnil;
7496
7497   if (with_work_buf)
7498     workbuf = make_conversion_work_buffer (multibyte);
7499   record_unwind_protect (code_conversion_restore,
7500                          Fcons (Fcurrent_buffer (), workbuf));
7501   return workbuf;
7502 }
7503
7504 int
7505 decode_coding_gap (struct coding_system *coding,
7506                    EMACS_INT chars, EMACS_INT bytes)
7507 {
7508   int count = SPECPDL_INDEX ();
7509   Lisp_Object attrs;
7510
7511   code_conversion_save (0, 0);
7512
7513   coding->src_object = Fcurrent_buffer ();
7514   coding->src_chars = chars;
7515   coding->src_bytes = bytes;
7516   coding->src_pos = -chars;
7517   coding->src_pos_byte = -bytes;
7518   coding->src_multibyte = chars < bytes;
7519   coding->dst_object = coding->src_object;
7520   coding->dst_pos = PT;
7521   coding->dst_pos_byte = PT_BYTE;
7522   coding->dst_multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
7523
7524   if (CODING_REQUIRE_DETECTION (coding))
7525     detect_coding (coding);
7526
7527   coding->mode |= CODING_MODE_LAST_BLOCK;
7528   current_buffer->text->inhibit_shrinking = 1;
7529   decode_coding (coding);
7530   current_buffer->text->inhibit_shrinking = 0;
7531
7532   attrs = CODING_ID_ATTRS (coding->id);
7533   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7534     {
7535       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7536       Lisp_Object val;
7537
7538       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7539       val = call1 (CODING_ATTR_POST_READ (attrs),
7540                    make_number (coding->produced_char));
7541       CHECK_NATNUM (val);
7542       coding->produced_char += Z - prev_Z;
7543       coding->produced += Z_BYTE - prev_Z_BYTE;
7544     }
7545
7546   unbind_to (count, Qnil);
7547   return coding->result;
7548 }
7549
7550 int
7551 encode_coding_gap (struct coding_system *coding,
7552                    EMACS_INT chars, EMACS_INT bytes)
7553 {
7554   int count = SPECPDL_INDEX ();
7555
7556   code_conversion_save (0, 0);
7557
7558   coding->src_object = Fcurrent_buffer ();
7559   coding->src_chars = chars;
7560   coding->src_bytes = bytes;
7561   coding->src_pos = -chars;
7562   coding->src_pos_byte = -bytes;
7563   coding->src_multibyte = chars < bytes;
7564   coding->dst_object = coding->src_object;
7565   coding->dst_pos = PT;
7566   coding->dst_pos_byte = PT_BYTE;
7567
7568   encode_coding (coding);
7569
7570   unbind_to (count, Qnil);
7571   return coding->result;
7572 }
7573
7574
7575 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
7576    SRC_OBJECT into DST_OBJECT by coding context CODING.
7577
7578    SRC_OBJECT is a buffer, a string, or Qnil.
7579
7580    If it is a buffer, the text is at point of the buffer.  FROM and TO
7581    are positions in the buffer.
7582
7583    If it is a string, the text is at the beginning of the string.
7584    FROM and TO are indices to the string.
7585
7586    If it is nil, the text is at coding->source.  FROM and TO are
7587    indices to coding->source.
7588
7589    DST_OBJECT is a buffer, Qt, or Qnil.
7590
7591    If it is a buffer, the decoded text is inserted at point of the
7592    buffer.  If the buffer is the same as SRC_OBJECT, the source text
7593    is deleted.
7594
7595    If it is Qt, a string is made from the decoded text, and
7596    set in CODING->dst_object.
7597
7598    If it is Qnil, the decoded text is stored at CODING->destination.
7599    The caller must allocate CODING->dst_bytes bytes at
7600    CODING->destination by xmalloc.  If the decoded text is longer than
7601    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
7602  */
7603
7604 void
7605 decode_coding_object (struct coding_system *coding,
7606                       Lisp_Object src_object,
7607                       EMACS_INT from, EMACS_INT from_byte,
7608                       EMACS_INT to, EMACS_INT to_byte,
7609                       Lisp_Object dst_object)
7610 {
7611   int count = SPECPDL_INDEX ();
7612   unsigned char *destination IF_LINT (= NULL);
7613   EMACS_INT dst_bytes IF_LINT (= 0);
7614   EMACS_INT chars = to - from;
7615   EMACS_INT bytes = to_byte - from_byte;
7616   Lisp_Object attrs;
7617   int saved_pt = -1, saved_pt_byte IF_LINT (= 0);
7618   int need_marker_adjustment = 0;
7619   Lisp_Object old_deactivate_mark;
7620
7621   old_deactivate_mark = Vdeactivate_mark;
7622
7623   if (NILP (dst_object))
7624     {
7625       destination = coding->destination;
7626       dst_bytes = coding->dst_bytes;
7627     }
7628
7629   coding->src_object = src_object;
7630   coding->src_chars = chars;
7631   coding->src_bytes = bytes;
7632   coding->src_multibyte = chars < bytes;
7633
7634   if (STRINGP (src_object))
7635     {
7636       coding->src_pos = from;
7637       coding->src_pos_byte = from_byte;
7638     }
7639   else if (BUFFERP (src_object))
7640     {
7641       set_buffer_internal (XBUFFER (src_object));
7642       if (from != GPT)
7643         move_gap_both (from, from_byte);
7644       if (EQ (src_object, dst_object))
7645         {
7646           struct Lisp_Marker *tail;
7647
7648           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7649             {
7650               tail->need_adjustment
7651                 = tail->charpos == (tail->insertion_type ? from : to);
7652               need_marker_adjustment |= tail->need_adjustment;
7653             }
7654           saved_pt = PT, saved_pt_byte = PT_BYTE;
7655           TEMP_SET_PT_BOTH (from, from_byte);
7656           current_buffer->text->inhibit_shrinking = 1;
7657           del_range_both (from, from_byte, to, to_byte, 1);
7658           coding->src_pos = -chars;
7659           coding->src_pos_byte = -bytes;
7660         }
7661       else
7662         {
7663           coding->src_pos = from;
7664           coding->src_pos_byte = from_byte;
7665         }
7666     }
7667
7668   if (CODING_REQUIRE_DETECTION (coding))
7669     detect_coding (coding);
7670   attrs = CODING_ID_ATTRS (coding->id);
7671
7672   if (EQ (dst_object, Qt)
7673       || (! NILP (CODING_ATTR_POST_READ (attrs))
7674           && NILP (dst_object)))
7675     {
7676       coding->dst_multibyte = !CODING_FOR_UNIBYTE (coding);
7677       coding->dst_object = code_conversion_save (1, coding->dst_multibyte);
7678       coding->dst_pos = BEG;
7679       coding->dst_pos_byte = BEG_BYTE;
7680     }
7681   else if (BUFFERP (dst_object))
7682     {
7683       code_conversion_save (0, 0);
7684       coding->dst_object = dst_object;
7685       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7686       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7687       coding->dst_multibyte
7688         = ! NILP (BVAR (XBUFFER (dst_object), enable_multibyte_characters));
7689     }
7690   else
7691     {
7692       code_conversion_save (0, 0);
7693       coding->dst_object = Qnil;
7694       /* Most callers presume this will return a multibyte result, and they
7695          won't use `binary' or `raw-text' anyway, so let's not worry about
7696          CODING_FOR_UNIBYTE.  */
7697       coding->dst_multibyte = 1;
7698     }
7699
7700   decode_coding (coding);
7701
7702   if (BUFFERP (coding->dst_object))
7703     set_buffer_internal (XBUFFER (coding->dst_object));
7704
7705   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7706     {
7707       struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7708       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7709       Lisp_Object val;
7710
7711       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7712       GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7713               old_deactivate_mark);
7714       val = safe_call1 (CODING_ATTR_POST_READ (attrs),
7715                         make_number (coding->produced_char));
7716       UNGCPRO;
7717       CHECK_NATNUM (val);
7718       coding->produced_char += Z - prev_Z;
7719       coding->produced += Z_BYTE - prev_Z_BYTE;
7720     }
7721
7722   if (EQ (dst_object, Qt))
7723     {
7724       coding->dst_object = Fbuffer_string ();
7725     }
7726   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
7727     {
7728       set_buffer_internal (XBUFFER (coding->dst_object));
7729       if (dst_bytes < coding->produced)
7730         {
7731           destination = xrealloc (destination, coding->produced);
7732           if (! destination)
7733             {
7734               record_conversion_result (coding,
7735                                         CODING_RESULT_INSUFFICIENT_MEM);
7736               unbind_to (count, Qnil);
7737               return;
7738             }
7739           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
7740             move_gap_both (BEGV, BEGV_BYTE);
7741           memcpy (destination, BEGV_ADDR, coding->produced);
7742           coding->destination = destination;
7743         }
7744     }
7745
7746   if (saved_pt >= 0)
7747     {
7748       /* This is the case of:
7749          (BUFFERP (src_object) && EQ (src_object, dst_object))
7750          As we have moved PT while replacing the original buffer
7751          contents, we must recover it now.  */
7752       set_buffer_internal (XBUFFER (src_object));
7753       current_buffer->text->inhibit_shrinking = 0;
7754       if (saved_pt < from)
7755         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7756       else if (saved_pt < from + chars)
7757         TEMP_SET_PT_BOTH (from, from_byte);
7758       else if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
7759         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7760                           saved_pt_byte + (coding->produced - bytes));
7761       else
7762         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7763                           saved_pt_byte + (coding->produced - bytes));
7764
7765       if (need_marker_adjustment)
7766         {
7767           struct Lisp_Marker *tail;
7768
7769           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7770             if (tail->need_adjustment)
7771               {
7772                 tail->need_adjustment = 0;
7773                 if (tail->insertion_type)
7774                   {
7775                     tail->bytepos = from_byte;
7776                     tail->charpos = from;
7777                   }
7778                 else
7779                   {
7780                     tail->bytepos = from_byte + coding->produced;
7781                     tail->charpos
7782                       = (NILP (BVAR (current_buffer, enable_multibyte_characters))
7783                          ? tail->bytepos : from + coding->produced_char);
7784                   }
7785               }
7786         }
7787     }
7788
7789   Vdeactivate_mark = old_deactivate_mark;
7790   unbind_to (count, coding->dst_object);
7791 }
7792
7793
7794 void
7795 encode_coding_object (struct coding_system *coding,
7796                       Lisp_Object src_object,
7797                       EMACS_INT from, EMACS_INT from_byte,
7798                       EMACS_INT to, EMACS_INT to_byte,
7799                       Lisp_Object dst_object)
7800 {
7801   int count = SPECPDL_INDEX ();
7802   EMACS_INT chars = to - from;
7803   EMACS_INT bytes = to_byte - from_byte;
7804   Lisp_Object attrs;
7805   int saved_pt = -1, saved_pt_byte IF_LINT (= 0);
7806   int need_marker_adjustment = 0;
7807   int kill_src_buffer = 0;
7808   Lisp_Object old_deactivate_mark;
7809
7810   old_deactivate_mark = Vdeactivate_mark;
7811
7812   coding->src_object = src_object;
7813   coding->src_chars = chars;
7814   coding->src_bytes = bytes;
7815   coding->src_multibyte = chars < bytes;
7816
7817   attrs = CODING_ID_ATTRS (coding->id);
7818
7819   if (EQ (src_object, dst_object))
7820     {
7821       struct Lisp_Marker *tail;
7822
7823       for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7824         {
7825           tail->need_adjustment
7826             = tail->charpos == (tail->insertion_type ? from : to);
7827           need_marker_adjustment |= tail->need_adjustment;
7828         }
7829     }
7830
7831   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
7832     {
7833       coding->src_object = code_conversion_save (1, coding->src_multibyte);
7834       set_buffer_internal (XBUFFER (coding->src_object));
7835       if (STRINGP (src_object))
7836         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
7837       else if (BUFFERP (src_object))
7838         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
7839       else
7840         insert_1_both ((char *) coding->source + from, chars, bytes, 0, 0, 0);
7841
7842       if (EQ (src_object, dst_object))
7843         {
7844           set_buffer_internal (XBUFFER (src_object));
7845           saved_pt = PT, saved_pt_byte = PT_BYTE;
7846           del_range_both (from, from_byte, to, to_byte, 1);
7847           set_buffer_internal (XBUFFER (coding->src_object));
7848         }
7849
7850       {
7851         Lisp_Object args[3];
7852         struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7853
7854         GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7855                 old_deactivate_mark);
7856         args[0] = CODING_ATTR_PRE_WRITE (attrs);
7857         args[1] = make_number (BEG);
7858         args[2] = make_number (Z);
7859         safe_call (3, args);
7860         UNGCPRO;
7861       }
7862       if (XBUFFER (coding->src_object) != current_buffer)
7863         kill_src_buffer = 1;
7864       coding->src_object = Fcurrent_buffer ();
7865       if (BEG != GPT)
7866         move_gap_both (BEG, BEG_BYTE);
7867       coding->src_chars = Z - BEG;
7868       coding->src_bytes = Z_BYTE - BEG_BYTE;
7869       coding->src_pos = BEG;
7870       coding->src_pos_byte = BEG_BYTE;
7871       coding->src_multibyte = Z < Z_BYTE;
7872     }
7873   else if (STRINGP (src_object))
7874     {
7875       code_conversion_save (0, 0);
7876       coding->src_pos = from;
7877       coding->src_pos_byte = from_byte;
7878     }
7879   else if (BUFFERP (src_object))
7880     {
7881       code_conversion_save (0, 0);
7882       set_buffer_internal (XBUFFER (src_object));
7883       if (EQ (src_object, dst_object))
7884         {
7885           saved_pt = PT, saved_pt_byte = PT_BYTE;
7886           coding->src_object = del_range_1 (from, to, 1, 1);
7887           coding->src_pos = 0;
7888           coding->src_pos_byte = 0;
7889         }
7890       else
7891         {
7892           if (from < GPT && to >= GPT)
7893             move_gap_both (from, from_byte);
7894           coding->src_pos = from;
7895           coding->src_pos_byte = from_byte;
7896         }
7897     }
7898   else
7899     code_conversion_save (0, 0);
7900
7901   if (BUFFERP (dst_object))
7902     {
7903       coding->dst_object = dst_object;
7904       if (EQ (src_object, dst_object))
7905         {
7906           coding->dst_pos = from;
7907           coding->dst_pos_byte = from_byte;
7908         }
7909       else
7910         {
7911           struct buffer *current = current_buffer;
7912
7913           set_buffer_temp (XBUFFER (dst_object));
7914           coding->dst_pos = PT;
7915           coding->dst_pos_byte = PT_BYTE;
7916           move_gap_both (coding->dst_pos, coding->dst_pos_byte);
7917           set_buffer_temp (current);
7918         }
7919       coding->dst_multibyte
7920         = ! NILP (BVAR (XBUFFER (dst_object), enable_multibyte_characters));
7921     }
7922   else if (EQ (dst_object, Qt))
7923     {
7924       coding->dst_object = Qnil;
7925       coding->dst_bytes = coding->src_chars;
7926       if (coding->dst_bytes == 0)
7927         coding->dst_bytes = 1;
7928       coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
7929       coding->dst_multibyte = 0;
7930     }
7931   else
7932     {
7933       coding->dst_object = Qnil;
7934       coding->dst_multibyte = 0;
7935     }
7936
7937   encode_coding (coding);
7938
7939   if (EQ (dst_object, Qt))
7940     {
7941       if (BUFFERP (coding->dst_object))
7942         coding->dst_object = Fbuffer_string ();
7943       else
7944         {
7945           coding->dst_object
7946             = make_unibyte_string ((char *) coding->destination,
7947                                    coding->produced);
7948           xfree (coding->destination);
7949         }
7950     }
7951
7952   if (saved_pt >= 0)
7953     {
7954       /* This is the case of:
7955          (BUFFERP (src_object) && EQ (src_object, dst_object))
7956          As we have moved PT while replacing the original buffer
7957          contents, we must recover it now.  */
7958       set_buffer_internal (XBUFFER (src_object));
7959       if (saved_pt < from)
7960         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7961       else if (saved_pt < from + chars)
7962         TEMP_SET_PT_BOTH (from, from_byte);
7963       else if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
7964         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7965                           saved_pt_byte + (coding->produced - bytes));
7966       else
7967         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7968                           saved_pt_byte + (coding->produced - bytes));
7969
7970       if (need_marker_adjustment)
7971         {
7972           struct Lisp_Marker *tail;
7973
7974           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7975             if (tail->need_adjustment)
7976               {
7977                 tail->need_adjustment = 0;
7978                 if (tail->insertion_type)
7979                   {
7980                     tail->bytepos = from_byte;
7981                     tail->charpos = from;
7982                   }
7983                 else
7984                   {
7985                     tail->bytepos = from_byte + coding->produced;
7986                     tail->charpos
7987                       = (NILP (BVAR (current_buffer, enable_multibyte_characters))
7988                          ? tail->bytepos : from + coding->produced_char);
7989                   }
7990               }
7991         }
7992     }
7993
7994   if (kill_src_buffer)
7995     Fkill_buffer (coding->src_object);
7996
7997   Vdeactivate_mark = old_deactivate_mark;
7998   unbind_to (count, Qnil);
7999 }
8000
8001
8002 Lisp_Object
8003 preferred_coding_system (void)
8004 {
8005   int id = coding_categories[coding_priorities[0]].id;
8006
8007   return CODING_ID_NAME (id);
8008 }
8009
8010 \f
8011 #ifdef emacs
8012 /*** 8. Emacs Lisp library functions ***/
8013
8014 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
8015        doc: /* Return t if OBJECT is nil or a coding-system.
8016 See the documentation of `define-coding-system' for information
8017 about coding-system objects.  */)
8018   (Lisp_Object object)
8019 {
8020   if (NILP (object)
8021       || CODING_SYSTEM_ID (object) >= 0)
8022     return Qt;
8023   if (! SYMBOLP (object)
8024       || NILP (Fget (object, Qcoding_system_define_form)))
8025     return Qnil;
8026   return Qt;
8027 }
8028
8029 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
8030        Sread_non_nil_coding_system, 1, 1, 0,
8031        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
8032   (Lisp_Object prompt)
8033 {
8034   Lisp_Object val;
8035   do
8036     {
8037       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8038                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
8039     }
8040   while (SCHARS (val) == 0);
8041   return (Fintern (val, Qnil));
8042 }
8043
8044 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
8045        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
8046 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.
8047 Ignores case when completing coding systems (all Emacs coding systems
8048 are lower-case).  */)
8049   (Lisp_Object prompt, Lisp_Object default_coding_system)
8050 {
8051   Lisp_Object val;
8052   int count = SPECPDL_INDEX ();
8053
8054   if (SYMBOLP (default_coding_system))
8055     default_coding_system = SYMBOL_NAME (default_coding_system);
8056   specbind (Qcompletion_ignore_case, Qt);
8057   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8058                           Qt, Qnil, Qcoding_system_history,
8059                           default_coding_system, Qnil);
8060   unbind_to (count, Qnil);
8061   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
8062 }
8063
8064 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
8065        1, 1, 0,
8066        doc: /* Check validity of CODING-SYSTEM.
8067 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
8068 It is valid if it is nil or a symbol defined as a coding system by the
8069 function `define-coding-system'.  */)
8070   (Lisp_Object coding_system)
8071 {
8072   Lisp_Object define_form;
8073
8074   define_form = Fget (coding_system, Qcoding_system_define_form);
8075   if (! NILP (define_form))
8076     {
8077       Fput (coding_system, Qcoding_system_define_form, Qnil);
8078       safe_eval (define_form);
8079     }
8080   if (!NILP (Fcoding_system_p (coding_system)))
8081     return coding_system;
8082   xsignal1 (Qcoding_system_error, coding_system);
8083 }
8084
8085 \f
8086 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
8087    HIGHEST is nonzero, return the coding system of the highest
8088    priority among the detected coding systems.  Otherwise return a
8089    list of detected coding systems sorted by their priorities.  If
8090    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
8091    multibyte form but contains only ASCII and eight-bit chars.
8092    Otherwise, the bytes are raw bytes.
8093
8094    CODING-SYSTEM controls the detection as below:
8095
8096    If it is nil, detect both text-format and eol-format.  If the
8097    text-format part of CODING-SYSTEM is already specified
8098    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
8099    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
8100    detect only text-format.  */
8101
8102 Lisp_Object
8103 detect_coding_system (const unsigned char *src,
8104                       EMACS_INT src_chars, EMACS_INT src_bytes,
8105                       int highest, int multibytep,
8106                       Lisp_Object coding_system)
8107 {
8108   const unsigned char *src_end = src + src_bytes;
8109   Lisp_Object attrs, eol_type;
8110   Lisp_Object val = Qnil;
8111   struct coding_system coding;
8112   int id;
8113   struct coding_detection_info detect_info;
8114   enum coding_category base_category;
8115   int null_byte_found = 0, eight_bit_found = 0;
8116
8117   if (NILP (coding_system))
8118     coding_system = Qundecided;
8119   setup_coding_system (coding_system, &coding);
8120   attrs = CODING_ID_ATTRS (coding.id);
8121   eol_type = CODING_ID_EOL_TYPE (coding.id);
8122   coding_system = CODING_ATTR_BASE_NAME (attrs);
8123
8124   coding.source = src;
8125   coding.src_chars = src_chars;
8126   coding.src_bytes = src_bytes;
8127   coding.src_multibyte = multibytep;
8128   coding.consumed = 0;
8129   coding.mode |= CODING_MODE_LAST_BLOCK;
8130   coding.head_ascii = 0;
8131
8132   detect_info.checked = detect_info.found = detect_info.rejected = 0;
8133
8134   /* At first, detect text-format if necessary.  */
8135   base_category = XINT (CODING_ATTR_CATEGORY (attrs));
8136   if (base_category == coding_category_undecided)
8137     {
8138       enum coding_category category IF_LINT (= 0);
8139       struct coding_system *this IF_LINT (= NULL);
8140       int c, i;
8141
8142       /* Skip all ASCII bytes except for a few ISO2022 controls.  */
8143       for (; src < src_end; src++)
8144         {
8145           c = *src;
8146           if (c & 0x80)
8147             {
8148               eight_bit_found = 1;
8149               if (null_byte_found)
8150                 break;
8151             }
8152           else if (c < 0x20)
8153             {
8154               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
8155                   && ! inhibit_iso_escape_detection
8156                   && ! detect_info.checked)
8157                 {
8158                   if (detect_coding_iso_2022 (&coding, &detect_info))
8159                     {
8160                       /* We have scanned the whole data.  */
8161                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
8162                         {
8163                           /* We didn't find an 8-bit code.  We may
8164                              have found a null-byte, but it's very
8165                              rare that a binary file confirm to
8166                              ISO-2022.  */
8167                           src = src_end;
8168                           coding.head_ascii = src - coding.source;
8169                         }
8170                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
8171                       break;
8172                     }
8173                 }
8174               else if (! c && !inhibit_null_byte_detection)
8175                 {
8176                   null_byte_found = 1;
8177                   if (eight_bit_found)
8178                     break;
8179                 }
8180               if (! eight_bit_found)
8181                 coding.head_ascii++;
8182             }
8183           else if (! eight_bit_found)
8184             coding.head_ascii++;
8185         }
8186
8187       if (null_byte_found || eight_bit_found
8188           || coding.head_ascii < coding.src_bytes
8189           || detect_info.found)
8190         {
8191           if (coding.head_ascii == coding.src_bytes)
8192             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
8193             for (i = 0; i < coding_category_raw_text; i++)
8194               {
8195                 category = coding_priorities[i];
8196                 this = coding_categories + category;
8197                 if (detect_info.found & (1 << category))
8198                   break;
8199               }
8200           else
8201             {
8202               if (null_byte_found)
8203                 {
8204                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
8205                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
8206                 }
8207               for (i = 0; i < coding_category_raw_text; i++)
8208                 {
8209                   category = coding_priorities[i];
8210                   this = coding_categories + category;
8211
8212                   if (this->id < 0)
8213                     {
8214                       /* No coding system of this category is defined.  */
8215                       detect_info.rejected |= (1 << category);
8216                     }
8217                   else if (category >= coding_category_raw_text)
8218                     continue;
8219                   else if (detect_info.checked & (1 << category))
8220                     {
8221                       if (highest
8222                           && (detect_info.found & (1 << category)))
8223                         break;
8224                     }
8225                   else if ((*(this->detector)) (&coding, &detect_info)
8226                            && highest
8227                            && (detect_info.found & (1 << category)))
8228                     {
8229                       if (category == coding_category_utf_16_auto)
8230                         {
8231                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8232                             category = coding_category_utf_16_le;
8233                           else
8234                             category = coding_category_utf_16_be;
8235                         }
8236                       break;
8237                     }
8238                 }
8239             }
8240         }
8241
8242       if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY
8243           || null_byte_found)
8244         {
8245           detect_info.found = CATEGORY_MASK_RAW_TEXT;
8246           id = CODING_SYSTEM_ID (Qno_conversion);
8247           val = Fcons (make_number (id), Qnil);
8248         }
8249       else if (! detect_info.rejected && ! detect_info.found)
8250         {
8251           detect_info.found = CATEGORY_MASK_ANY;
8252           id = coding_categories[coding_category_undecided].id;
8253           val = Fcons (make_number (id), Qnil);
8254         }
8255       else if (highest)
8256         {
8257           if (detect_info.found)
8258             {
8259               detect_info.found = 1 << category;
8260               val = Fcons (make_number (this->id), Qnil);
8261             }
8262           else
8263             for (i = 0; i < coding_category_raw_text; i++)
8264               if (! (detect_info.rejected & (1 << coding_priorities[i])))
8265                 {
8266                   detect_info.found = 1 << coding_priorities[i];
8267                   id = coding_categories[coding_priorities[i]].id;
8268                   val = Fcons (make_number (id), Qnil);
8269                   break;
8270                 }
8271         }
8272       else
8273         {
8274           int mask = detect_info.rejected | detect_info.found;
8275           int found = 0;
8276
8277           for (i = coding_category_raw_text - 1; i >= 0; i--)
8278             {
8279               category = coding_priorities[i];
8280               if (! (mask & (1 << category)))
8281                 {
8282                   found |= 1 << category;
8283                   id = coding_categories[category].id;
8284                   if (id >= 0)
8285                     val = Fcons (make_number (id), val);
8286                 }
8287             }
8288           for (i = coding_category_raw_text - 1; i >= 0; i--)
8289             {
8290               category = coding_priorities[i];
8291               if (detect_info.found & (1 << category))
8292                 {
8293                   id = coding_categories[category].id;
8294                   val = Fcons (make_number (id), val);
8295                 }
8296             }
8297           detect_info.found |= found;
8298         }
8299     }
8300   else if (base_category == coding_category_utf_8_auto)
8301     {
8302       if (detect_coding_utf_8 (&coding, &detect_info))
8303         {
8304           struct coding_system *this;
8305
8306           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
8307             this = coding_categories + coding_category_utf_8_sig;
8308           else
8309             this = coding_categories + coding_category_utf_8_nosig;
8310           val = Fcons (make_number (this->id), Qnil);
8311         }
8312     }
8313   else if (base_category == coding_category_utf_16_auto)
8314     {
8315       if (detect_coding_utf_16 (&coding, &detect_info))
8316         {
8317           struct coding_system *this;
8318
8319           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8320             this = coding_categories + coding_category_utf_16_le;
8321           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
8322             this = coding_categories + coding_category_utf_16_be;
8323           else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
8324             this = coding_categories + coding_category_utf_16_be_nosig;
8325           else
8326             this = coding_categories + coding_category_utf_16_le_nosig;
8327           val = Fcons (make_number (this->id), Qnil);
8328         }
8329     }
8330   else
8331     {
8332       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
8333       val = Fcons (make_number (coding.id), Qnil);
8334     }
8335
8336   /* Then, detect eol-format if necessary.  */
8337   {
8338     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol = -1;
8339     Lisp_Object tail;
8340
8341     if (VECTORP (eol_type))
8342       {
8343         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
8344           {
8345             if (null_byte_found)
8346               normal_eol = EOL_SEEN_LF;
8347             else
8348               normal_eol = detect_eol (coding.source, src_bytes,
8349                                        coding_category_raw_text);
8350           }
8351         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
8352                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
8353           utf_16_be_eol = detect_eol (coding.source, src_bytes,
8354                                       coding_category_utf_16_be);
8355         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
8356                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
8357           utf_16_le_eol = detect_eol (coding.source, src_bytes,
8358                                       coding_category_utf_16_le);
8359       }
8360     else
8361       {
8362         if (EQ (eol_type, Qunix))
8363           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
8364         else if (EQ (eol_type, Qdos))
8365           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
8366         else
8367           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
8368       }
8369
8370     for (tail = val; CONSP (tail); tail = XCDR (tail))
8371       {
8372         enum coding_category category;
8373         int this_eol;
8374
8375         id = XINT (XCAR (tail));
8376         attrs = CODING_ID_ATTRS (id);
8377         category = XINT (CODING_ATTR_CATEGORY (attrs));
8378         eol_type = CODING_ID_EOL_TYPE (id);
8379         if (VECTORP (eol_type))
8380           {
8381             if (category == coding_category_utf_16_be
8382                 || category == coding_category_utf_16_be_nosig)
8383               this_eol = utf_16_be_eol;
8384             else if (category == coding_category_utf_16_le
8385                      || category == coding_category_utf_16_le_nosig)
8386               this_eol = utf_16_le_eol;
8387             else
8388               this_eol = normal_eol;
8389
8390             if (this_eol == EOL_SEEN_LF)
8391               XSETCAR (tail, AREF (eol_type, 0));
8392             else if (this_eol == EOL_SEEN_CRLF)
8393               XSETCAR (tail, AREF (eol_type, 1));
8394             else if (this_eol == EOL_SEEN_CR)
8395               XSETCAR (tail, AREF (eol_type, 2));
8396             else
8397               XSETCAR (tail, CODING_ID_NAME (id));
8398           }
8399         else
8400           XSETCAR (tail, CODING_ID_NAME (id));
8401       }
8402   }
8403
8404   return (highest ? (CONSP (val) ? XCAR (val) : Qnil) : val);
8405 }
8406
8407
8408 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
8409        2, 3, 0,
8410        doc: /* Detect coding system of the text in the region between START and END.
8411 Return a list of possible coding systems ordered by priority.
8412 The coding systems to try and their priorities follows what
8413 the function `coding-system-priority-list' (which see) returns.
8414
8415 If only ASCII characters are found (except for such ISO-2022 control
8416 characters as ESC), it returns a list of single element `undecided'
8417 or its subsidiary coding system according to a detected end-of-line
8418 format.
8419
8420 If optional argument HIGHEST is non-nil, return the coding system of
8421 highest priority.  */)
8422   (Lisp_Object start, Lisp_Object end, Lisp_Object highest)
8423 {
8424   int from, to;
8425   int from_byte, to_byte;
8426
8427   CHECK_NUMBER_COERCE_MARKER (start);
8428   CHECK_NUMBER_COERCE_MARKER (end);
8429
8430   validate_region (&start, &end);
8431   from = XINT (start), to = XINT (end);
8432   from_byte = CHAR_TO_BYTE (from);
8433   to_byte = CHAR_TO_BYTE (to);
8434
8435   if (from < GPT && to >= GPT)
8436     move_gap_both (to, to_byte);
8437
8438   return detect_coding_system (BYTE_POS_ADDR (from_byte),
8439                                to - from, to_byte - from_byte,
8440                                !NILP (highest),
8441                                !NILP (BVAR (current_buffer
8442                                       , enable_multibyte_characters)),
8443                                Qnil);
8444 }
8445
8446 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
8447        1, 2, 0,
8448        doc: /* Detect coding system of the text in STRING.
8449 Return a list of possible coding systems ordered by priority.
8450 The coding systems to try and their priorities follows what
8451 the function `coding-system-priority-list' (which see) returns.
8452
8453 If only ASCII characters are found (except for such ISO-2022 control
8454 characters as ESC), it returns a list of single element `undecided'
8455 or its subsidiary coding system according to a detected end-of-line
8456 format.
8457
8458 If optional argument HIGHEST is non-nil, return the coding system of
8459 highest priority.  */)
8460   (Lisp_Object string, Lisp_Object highest)
8461 {
8462   CHECK_STRING (string);
8463
8464   return detect_coding_system (SDATA (string),
8465                                SCHARS (string), SBYTES (string),
8466                                !NILP (highest), STRING_MULTIBYTE (string),
8467                                Qnil);
8468 }
8469
8470
8471 static INLINE int
8472 char_encodable_p (int c, Lisp_Object attrs)
8473 {
8474   Lisp_Object tail;
8475   struct charset *charset;
8476   Lisp_Object translation_table;
8477
8478   translation_table = CODING_ATTR_TRANS_TBL (attrs);
8479   if (! NILP (translation_table))
8480     c = translate_char (translation_table, c);
8481   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
8482        CONSP (tail); tail = XCDR (tail))
8483     {
8484       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
8485       if (CHAR_CHARSET_P (c, charset))
8486         break;
8487     }
8488   return (! NILP (tail));
8489 }
8490
8491
8492 /* Return a list of coding systems that safely encode the text between
8493    START and END.  If EXCLUDE is non-nil, it is a list of coding
8494    systems not to check.  The returned list doesn't contain any such
8495    coding systems.  In any case, if the text contains only ASCII or is
8496    unibyte, return t.  */
8497
8498 DEFUN ("find-coding-systems-region-internal",
8499        Ffind_coding_systems_region_internal,
8500        Sfind_coding_systems_region_internal, 2, 3, 0,
8501        doc: /* Internal use only.  */)
8502   (Lisp_Object start, Lisp_Object end, Lisp_Object exclude)
8503 {
8504   Lisp_Object coding_attrs_list, safe_codings;
8505   EMACS_INT start_byte, end_byte;
8506   const unsigned char *p, *pbeg, *pend;
8507   int c;
8508   Lisp_Object tail, elt, work_table;
8509
8510   if (STRINGP (start))
8511     {
8512       if (!STRING_MULTIBYTE (start)
8513           || SCHARS (start) == SBYTES (start))
8514         return Qt;
8515       start_byte = 0;
8516       end_byte = SBYTES (start);
8517     }
8518   else
8519     {
8520       CHECK_NUMBER_COERCE_MARKER (start);
8521       CHECK_NUMBER_COERCE_MARKER (end);
8522       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8523         args_out_of_range (start, end);
8524       if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
8525         return Qt;
8526       start_byte = CHAR_TO_BYTE (XINT (start));
8527       end_byte = CHAR_TO_BYTE (XINT (end));
8528       if (XINT (end) - XINT (start) == end_byte - start_byte)
8529         return Qt;
8530
8531       if (XINT (start) < GPT && XINT (end) > GPT)
8532         {
8533           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8534             move_gap_both (XINT (start), start_byte);
8535           else
8536             move_gap_both (XINT (end), end_byte);
8537         }
8538     }
8539
8540   coding_attrs_list = Qnil;
8541   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
8542     if (NILP (exclude)
8543         || NILP (Fmemq (XCAR (tail), exclude)))
8544       {
8545         Lisp_Object attrs;
8546
8547         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
8548         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
8549             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
8550           {
8551             ASET (attrs, coding_attr_trans_tbl,
8552                   get_translation_table (attrs, 1, NULL));
8553             coding_attrs_list = Fcons (attrs, coding_attrs_list);
8554           }
8555       }
8556
8557   if (STRINGP (start))
8558     p = pbeg = SDATA (start);
8559   else
8560     p = pbeg = BYTE_POS_ADDR (start_byte);
8561   pend = p + (end_byte - start_byte);
8562
8563   while (p < pend && ASCII_BYTE_P (*p)) p++;
8564   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8565
8566   work_table = Fmake_char_table (Qnil, Qnil);
8567   while (p < pend)
8568     {
8569       if (ASCII_BYTE_P (*p))
8570         p++;
8571       else
8572         {
8573           c = STRING_CHAR_ADVANCE (p);
8574           if (!NILP (char_table_ref (work_table, c)))
8575             /* This character was already checked.  Ignore it.  */
8576             continue;
8577
8578           charset_map_loaded = 0;
8579           for (tail = coding_attrs_list; CONSP (tail);)
8580             {
8581               elt = XCAR (tail);
8582               if (NILP (elt))
8583                 tail = XCDR (tail);
8584               else if (char_encodable_p (c, elt))
8585                 tail = XCDR (tail);
8586               else if (CONSP (XCDR (tail)))
8587                 {
8588                   XSETCAR (tail, XCAR (XCDR (tail)));
8589                   XSETCDR (tail, XCDR (XCDR (tail)));
8590                 }
8591               else
8592                 {
8593                   XSETCAR (tail, Qnil);
8594                   tail = XCDR (tail);
8595                 }
8596             }
8597           if (charset_map_loaded)
8598             {
8599               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8600
8601               if (STRINGP (start))
8602                 pbeg = SDATA (start);
8603               else
8604                 pbeg = BYTE_POS_ADDR (start_byte);
8605               p = pbeg + p_offset;
8606               pend = pbeg + pend_offset;
8607             }
8608           char_table_set (work_table, c, Qt);
8609         }
8610     }
8611
8612   safe_codings = list2 (Qraw_text, Qno_conversion);
8613   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
8614     if (! NILP (XCAR (tail)))
8615       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
8616
8617   return safe_codings;
8618 }
8619
8620
8621 DEFUN ("unencodable-char-position", Funencodable_char_position,
8622        Sunencodable_char_position, 3, 5, 0,
8623        doc: /*
8624 Return position of first un-encodable character in a region.
8625 START and END specify the region and CODING-SYSTEM specifies the
8626 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
8627
8628 If optional 4th argument COUNT is non-nil, it specifies at most how
8629 many un-encodable characters to search.  In this case, the value is a
8630 list of positions.
8631
8632 If optional 5th argument STRING is non-nil, it is a string to search
8633 for un-encodable characters.  In that case, START and END are indexes
8634 to the string.  */)
8635   (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object count, Lisp_Object string)
8636 {
8637   int n;
8638   struct coding_system coding;
8639   Lisp_Object attrs, charset_list, translation_table;
8640   Lisp_Object positions;
8641   int from, to;
8642   const unsigned char *p, *stop, *pend;
8643   int ascii_compatible;
8644
8645   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
8646   attrs = CODING_ID_ATTRS (coding.id);
8647   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
8648     return Qnil;
8649   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
8650   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8651   translation_table = get_translation_table (attrs, 1, NULL);
8652
8653   if (NILP (string))
8654     {
8655       validate_region (&start, &end);
8656       from = XINT (start);
8657       to = XINT (end);
8658       if (NILP (BVAR (current_buffer, enable_multibyte_characters))
8659           || (ascii_compatible
8660               && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
8661         return Qnil;
8662       p = CHAR_POS_ADDR (from);
8663       pend = CHAR_POS_ADDR (to);
8664       if (from < GPT && to >= GPT)
8665         stop = GPT_ADDR;
8666       else
8667         stop = pend;
8668     }
8669   else
8670     {
8671       CHECK_STRING (string);
8672       CHECK_NATNUM (start);
8673       CHECK_NATNUM (end);
8674       from = XINT (start);
8675       to = XINT (end);
8676       if (from > to
8677           || to > SCHARS (string))
8678         args_out_of_range_3 (string, start, end);
8679       if (! STRING_MULTIBYTE (string))
8680         return Qnil;
8681       p = SDATA (string) + string_char_to_byte (string, from);
8682       stop = pend = SDATA (string) + string_char_to_byte (string, to);
8683       if (ascii_compatible && (to - from) == (pend - p))
8684         return Qnil;
8685     }
8686
8687   if (NILP (count))
8688     n = 1;
8689   else
8690     {
8691       CHECK_NATNUM (count);
8692       n = XINT (count);
8693     }
8694
8695   positions = Qnil;
8696   while (1)
8697     {
8698       int c;
8699
8700       if (ascii_compatible)
8701         while (p < stop && ASCII_BYTE_P (*p))
8702           p++, from++;
8703       if (p >= stop)
8704         {
8705           if (p >= pend)
8706             break;
8707           stop = pend;
8708           p = GAP_END_ADDR;
8709         }
8710
8711       c = STRING_CHAR_ADVANCE (p);
8712       if (! (ASCII_CHAR_P (c) && ascii_compatible)
8713           && ! char_charset (translate_char (translation_table, c),
8714                              charset_list, NULL))
8715         {
8716           positions = Fcons (make_number (from), positions);
8717           n--;
8718           if (n == 0)
8719             break;
8720         }
8721
8722       from++;
8723     }
8724
8725   return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
8726 }
8727
8728
8729 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
8730        Scheck_coding_systems_region, 3, 3, 0,
8731        doc: /* Check if the region is encodable by coding systems.
8732
8733 START and END are buffer positions specifying the region.
8734 CODING-SYSTEM-LIST is a list of coding systems to check.
8735
8736 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
8737 CODING-SYSTEM is a member of CODING-SYSTEM-LIST and can't encode the
8738 whole region, POS0, POS1, ... are buffer positions where non-encodable
8739 characters are found.
8740
8741 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
8742 value is nil.
8743
8744 START may be a string.  In that case, check if the string is
8745 encodable, and the value contains indices to the string instead of
8746 buffer positions.  END is ignored.
8747
8748 If the current buffer (or START if it is a string) is unibyte, the value
8749 is nil.  */)
8750   (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system_list)
8751 {
8752   Lisp_Object list;
8753   EMACS_INT start_byte, end_byte;
8754   int pos;
8755   const unsigned char *p, *pbeg, *pend;
8756   int c;
8757   Lisp_Object tail, elt, attrs;
8758
8759   if (STRINGP (start))
8760     {
8761       if (!STRING_MULTIBYTE (start)
8762           || SCHARS (start) == SBYTES (start))
8763         return Qnil;
8764       start_byte = 0;
8765       end_byte = SBYTES (start);
8766       pos = 0;
8767     }
8768   else
8769     {
8770       CHECK_NUMBER_COERCE_MARKER (start);
8771       CHECK_NUMBER_COERCE_MARKER (end);
8772       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8773         args_out_of_range (start, end);
8774       if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
8775         return Qnil;
8776       start_byte = CHAR_TO_BYTE (XINT (start));
8777       end_byte = CHAR_TO_BYTE (XINT (end));
8778       if (XINT (end) - XINT (start) == end_byte - start_byte)
8779         return Qnil;
8780
8781       if (XINT (start) < GPT && XINT (end) > GPT)
8782         {
8783           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8784             move_gap_both (XINT (start), start_byte);
8785           else
8786             move_gap_both (XINT (end), end_byte);
8787         }
8788       pos = XINT (start);
8789     }
8790
8791   list = Qnil;
8792   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
8793     {
8794       elt = XCAR (tail);
8795       attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
8796       ASET (attrs, coding_attr_trans_tbl,
8797             get_translation_table (attrs, 1, NULL));
8798       list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
8799     }
8800
8801   if (STRINGP (start))
8802     p = pbeg = SDATA (start);
8803   else
8804     p = pbeg = BYTE_POS_ADDR (start_byte);
8805   pend = p + (end_byte - start_byte);
8806
8807   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
8808   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8809
8810   while (p < pend)
8811     {
8812       if (ASCII_BYTE_P (*p))
8813         p++;
8814       else
8815         {
8816           c = STRING_CHAR_ADVANCE (p);
8817
8818           charset_map_loaded = 0;
8819           for (tail = list; CONSP (tail); tail = XCDR (tail))
8820             {
8821               elt = XCDR (XCAR (tail));
8822               if (! char_encodable_p (c, XCAR (elt)))
8823                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
8824             }
8825           if (charset_map_loaded)
8826             {
8827               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8828
8829               if (STRINGP (start))
8830                 pbeg = SDATA (start);
8831               else
8832                 pbeg = BYTE_POS_ADDR (start_byte);
8833               p = pbeg + p_offset;
8834               pend = pbeg + pend_offset;
8835             }
8836         }
8837       pos++;
8838     }
8839
8840   tail = list;
8841   list = Qnil;
8842   for (; CONSP (tail); tail = XCDR (tail))
8843     {
8844       elt = XCAR (tail);
8845       if (CONSP (XCDR (XCDR (elt))))
8846         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
8847                       list);
8848     }
8849
8850   return list;
8851 }
8852
8853
8854 Lisp_Object
8855 code_convert_region (Lisp_Object start, Lisp_Object end,
8856                      Lisp_Object coding_system, Lisp_Object dst_object,
8857                      int encodep, int norecord)
8858 {
8859   struct coding_system coding;
8860   EMACS_INT from, from_byte, to, to_byte;
8861   Lisp_Object src_object;
8862
8863   CHECK_NUMBER_COERCE_MARKER (start);
8864   CHECK_NUMBER_COERCE_MARKER (end);
8865   if (NILP (coding_system))
8866     coding_system = Qno_conversion;
8867   else
8868     CHECK_CODING_SYSTEM (coding_system);
8869   src_object = Fcurrent_buffer ();
8870   if (NILP (dst_object))
8871     dst_object = src_object;
8872   else if (! EQ (dst_object, Qt))
8873     CHECK_BUFFER (dst_object);
8874
8875   validate_region (&start, &end);
8876   from = XFASTINT (start);
8877   from_byte = CHAR_TO_BYTE (from);
8878   to = XFASTINT (end);
8879   to_byte = CHAR_TO_BYTE (to);
8880
8881   setup_coding_system (coding_system, &coding);
8882   coding.mode |= CODING_MODE_LAST_BLOCK;
8883
8884   if (encodep)
8885     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8886                           dst_object);
8887   else
8888     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8889                           dst_object);
8890   if (! norecord)
8891     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
8892
8893   return (BUFFERP (dst_object)
8894           ? make_number (coding.produced_char)
8895           : coding.dst_object);
8896 }
8897
8898
8899 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
8900        3, 4, "r\nzCoding system: ",
8901        doc: /* Decode the current region from the specified coding system.
8902 When called from a program, takes four arguments:
8903         START, END, CODING-SYSTEM, and DESTINATION.
8904 START and END are buffer positions.
8905
8906 Optional 4th arguments DESTINATION specifies where the decoded text goes.
8907 If nil, the region between START and END is replaced by the decoded text.
8908 If buffer, the decoded text is inserted in that buffer after point (point
8909 does not move).
8910 In those cases, the length of the decoded text is returned.
8911 If DESTINATION is t, the decoded text is returned.
8912
8913 This function sets `last-coding-system-used' to the precise coding system
8914 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8915 not fully specified.)  */)
8916   (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object destination)
8917 {
8918   return code_convert_region (start, end, coding_system, destination, 0, 0);
8919 }
8920
8921 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
8922        3, 4, "r\nzCoding system: ",
8923        doc: /* Encode the current region by specified coding system.
8924 When called from a program, takes four arguments:
8925         START, END, CODING-SYSTEM and DESTINATION.
8926 START and END are buffer positions.
8927
8928 Optional 4th arguments DESTINATION specifies where the encoded text goes.
8929 If nil, the region between START and END is replace by the encoded text.
8930 If buffer, the encoded text is inserted in that buffer after point (point
8931 does not move).
8932 In those cases, the length of the encoded text is returned.
8933 If DESTINATION is t, the encoded text is returned.
8934
8935 This function sets `last-coding-system-used' to the precise coding system
8936 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8937 not fully specified.)  */)
8938   (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object destination)
8939 {
8940   return code_convert_region (start, end, coding_system, destination, 1, 0);
8941 }
8942
8943 Lisp_Object
8944 code_convert_string (Lisp_Object string, Lisp_Object coding_system,
8945                      Lisp_Object dst_object, int encodep, int nocopy, int norecord)
8946 {
8947   struct coding_system coding;
8948   EMACS_INT chars, bytes;
8949
8950   CHECK_STRING (string);
8951   if (NILP (coding_system))
8952     {
8953       if (! norecord)
8954         Vlast_coding_system_used = Qno_conversion;
8955       if (NILP (dst_object))
8956         return (nocopy ? Fcopy_sequence (string) : string);
8957     }
8958
8959   if (NILP (coding_system))
8960     coding_system = Qno_conversion;
8961   else
8962     CHECK_CODING_SYSTEM (coding_system);
8963   if (NILP (dst_object))
8964     dst_object = Qt;
8965   else if (! EQ (dst_object, Qt))
8966     CHECK_BUFFER (dst_object);
8967
8968   setup_coding_system (coding_system, &coding);
8969   coding.mode |= CODING_MODE_LAST_BLOCK;
8970   chars = SCHARS (string);
8971   bytes = SBYTES (string);
8972   if (encodep)
8973     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
8974   else
8975     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
8976   if (! norecord)
8977     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
8978
8979   return (BUFFERP (dst_object)
8980           ? make_number (coding.produced_char)
8981           : coding.dst_object);
8982 }
8983
8984
8985 /* Encode or decode STRING according to CODING_SYSTEM.
8986    Do not set Vlast_coding_system_used.
8987
8988    This function is called only from macros DECODE_FILE and
8989    ENCODE_FILE, thus we ignore character composition.  */
8990
8991 Lisp_Object
8992 code_convert_string_norecord (Lisp_Object string, Lisp_Object coding_system,
8993                               int encodep)
8994 {
8995   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
8996 }
8997
8998
8999 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
9000        2, 4, 0,
9001        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
9002
9003 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
9004 if the decoding operation is trivial.
9005
9006 Optional fourth arg BUFFER non-nil means that the decoded text is
9007 inserted in that buffer after point (point does not move).  In this
9008 case, the return value is the length of the decoded text.
9009
9010 This function sets `last-coding-system-used' to the precise coding system
9011 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9012 not fully specified.)  */)
9013   (Lisp_Object string, Lisp_Object coding_system, Lisp_Object nocopy, Lisp_Object buffer)
9014 {
9015   return code_convert_string (string, coding_system, buffer,
9016                               0, ! NILP (nocopy), 0);
9017 }
9018
9019 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
9020        2, 4, 0,
9021        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
9022
9023 Optional third arg NOCOPY non-nil means it is OK to return STRING
9024 itself if the encoding operation is trivial.
9025
9026 Optional fourth arg BUFFER non-nil means that the encoded text is
9027 inserted in that buffer after point (point does not move).  In this
9028 case, the return value is the length of the encoded text.
9029
9030 This function sets `last-coding-system-used' to the precise coding system
9031 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9032 not fully specified.)  */)
9033   (Lisp_Object string, Lisp_Object coding_system, Lisp_Object nocopy, Lisp_Object buffer)
9034 {
9035   return code_convert_string (string, coding_system, buffer,
9036                               1, ! NILP (nocopy), 1);
9037 }
9038
9039 \f
9040 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
9041        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
9042 Return the corresponding character.  */)
9043   (Lisp_Object code)
9044 {
9045   Lisp_Object spec, attrs, val;
9046   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
9047   int c;
9048
9049   CHECK_NATNUM (code);
9050   c = XFASTINT (code);
9051   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9052   attrs = AREF (spec, 0);
9053
9054   if (ASCII_BYTE_P (c)
9055       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9056     return code;
9057
9058   val = CODING_ATTR_CHARSET_LIST (attrs);
9059   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9060   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9061   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
9062
9063   if (c <= 0x7F)
9064     charset = charset_roman;
9065   else if (c >= 0xA0 && c < 0xDF)
9066     {
9067       charset = charset_kana;
9068       c -= 0x80;
9069     }
9070   else
9071     {
9072       int c1 = c >> 8, c2 = c & 0xFF;
9073
9074       if (c1 < 0x81 || (c1 > 0x9F && c1 < 0xE0) || c1 > 0xEF
9075           || c2 < 0x40 || c2 == 0x7F || c2 > 0xFC)
9076         error ("Invalid code: %d", code);
9077       SJIS_TO_JIS (c);
9078       charset = charset_kanji;
9079     }
9080   c = DECODE_CHAR (charset, c);
9081   if (c < 0)
9082     error ("Invalid code: %d", code);
9083   return make_number (c);
9084 }
9085
9086
9087 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
9088        doc: /* Encode a Japanese character CH to shift_jis encoding.
9089 Return the corresponding code in SJIS.  */)
9090   (Lisp_Object ch)
9091 {
9092   Lisp_Object spec, attrs, charset_list;
9093   int c;
9094   struct charset *charset;
9095   unsigned code;
9096
9097   CHECK_CHARACTER (ch);
9098   c = XFASTINT (ch);
9099   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9100   attrs = AREF (spec, 0);
9101
9102   if (ASCII_CHAR_P (c)
9103       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9104     return ch;
9105
9106   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9107   charset = char_charset (c, charset_list, &code);
9108   if (code == CHARSET_INVALID_CODE (charset))
9109     error ("Can't encode by shift_jis encoding: %d", c);
9110   JIS_TO_SJIS (code);
9111
9112   return make_number (code);
9113 }
9114
9115 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
9116        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
9117 Return the corresponding character.  */)
9118   (Lisp_Object code)
9119 {
9120   Lisp_Object spec, attrs, val;
9121   struct charset *charset_roman, *charset_big5, *charset;
9122   int c;
9123
9124   CHECK_NATNUM (code);
9125   c = XFASTINT (code);
9126   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9127   attrs = AREF (spec, 0);
9128
9129   if (ASCII_BYTE_P (c)
9130       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9131     return code;
9132
9133   val = CODING_ATTR_CHARSET_LIST (attrs);
9134   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9135   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
9136
9137   if (c <= 0x7F)
9138     charset = charset_roman;
9139   else
9140     {
9141       int b1 = c >> 8, b2 = c & 0x7F;
9142       if (b1 < 0xA1 || b1 > 0xFE
9143           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
9144         error ("Invalid code: %d", code);
9145       charset = charset_big5;
9146     }
9147   c = DECODE_CHAR (charset, (unsigned )c);
9148   if (c < 0)
9149     error ("Invalid code: %d", code);
9150   return make_number (c);
9151 }
9152
9153 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
9154        doc: /* Encode the Big5 character CH to BIG5 coding system.
9155 Return the corresponding character code in Big5.  */)
9156   (Lisp_Object ch)
9157 {
9158   Lisp_Object spec, attrs, charset_list;
9159   struct charset *charset;
9160   int c;
9161   unsigned code;
9162
9163   CHECK_CHARACTER (ch);
9164   c = XFASTINT (ch);
9165   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9166   attrs = AREF (spec, 0);
9167   if (ASCII_CHAR_P (c)
9168       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9169     return ch;
9170
9171   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9172   charset = char_charset (c, charset_list, &code);
9173   if (code == CHARSET_INVALID_CODE (charset))
9174     error ("Can't encode by Big5 encoding: %d", c);
9175
9176   return make_number (code);
9177 }
9178
9179 \f
9180 DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
9181        Sset_terminal_coding_system_internal, 1, 2, 0,
9182        doc: /* Internal use only.  */)
9183   (Lisp_Object coding_system, Lisp_Object terminal)
9184 {
9185   struct terminal *term = get_terminal (terminal, 1);
9186   struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (term);
9187   CHECK_SYMBOL (coding_system);
9188   setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
9189   /* We had better not send unsafe characters to terminal.  */
9190   terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
9191   /* Character composition should be disabled.  */
9192   terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9193   terminal_coding->src_multibyte = 1;
9194   terminal_coding->dst_multibyte = 0;
9195   if (terminal_coding->common_flags & CODING_REQUIRE_ENCODING_MASK)
9196     term->charset_list = coding_charset_list (terminal_coding);
9197   else
9198     term->charset_list = Fcons (make_number (charset_ascii), Qnil);
9199   return Qnil;
9200 }
9201
9202 DEFUN ("set-safe-terminal-coding-system-internal",
9203        Fset_safe_terminal_coding_system_internal,
9204        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
9205        doc: /* Internal use only.  */)
9206   (Lisp_Object coding_system)
9207 {
9208   CHECK_SYMBOL (coding_system);
9209   setup_coding_system (Fcheck_coding_system (coding_system),
9210                        &safe_terminal_coding);
9211   /* Character composition should be disabled.  */
9212   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9213   safe_terminal_coding.src_multibyte = 1;
9214   safe_terminal_coding.dst_multibyte = 0;
9215   return Qnil;
9216 }
9217
9218 DEFUN ("terminal-coding-system", Fterminal_coding_system,
9219        Sterminal_coding_system, 0, 1, 0,
9220        doc: /* Return coding system specified for terminal output on the given terminal.
9221 TERMINAL may be a terminal object, a frame, or nil for the selected
9222 frame's terminal device.  */)
9223   (Lisp_Object terminal)
9224 {
9225   struct coding_system *terminal_coding
9226     = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
9227   Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
9228
9229   /* For backward compatibility, return nil if it is `undecided'. */
9230   return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
9231 }
9232
9233 DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
9234        Sset_keyboard_coding_system_internal, 1, 2, 0,
9235        doc: /* Internal use only.  */)
9236   (Lisp_Object coding_system, Lisp_Object terminal)
9237 {
9238   struct terminal *t = get_terminal (terminal, 1);
9239   CHECK_SYMBOL (coding_system);
9240   if (NILP (coding_system))
9241     coding_system = Qno_conversion;
9242   else
9243     Fcheck_coding_system (coding_system);
9244   setup_coding_system (coding_system, TERMINAL_KEYBOARD_CODING (t));
9245   /* Character composition should be disabled.  */
9246   TERMINAL_KEYBOARD_CODING (t)->common_flags
9247     &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9248   return Qnil;
9249 }
9250
9251 DEFUN ("keyboard-coding-system",
9252        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 1, 0,
9253        doc: /* Return coding system specified for decoding keyboard input.  */)
9254   (Lisp_Object terminal)
9255 {
9256   return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING
9257                          (get_terminal (terminal, 1))->id);
9258 }
9259
9260 \f
9261 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
9262        Sfind_operation_coding_system,  1, MANY, 0,
9263        doc: /* Choose a coding system for an operation based on the target name.
9264 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
9265 DECODING-SYSTEM is the coding system to use for decoding
9266 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
9267 for encoding (in case OPERATION does encoding).
9268
9269 The first argument OPERATION specifies an I/O primitive:
9270   For file I/O, `insert-file-contents' or `write-region'.
9271   For process I/O, `call-process', `call-process-region', or `start-process'.
9272   For network I/O, `open-network-stream'.
9273
9274 The remaining arguments should be the same arguments that were passed
9275 to the primitive.  Depending on which primitive, one of those arguments
9276 is selected as the TARGET.  For example, if OPERATION does file I/O,
9277 whichever argument specifies the file name is TARGET.
9278
9279 TARGET has a meaning which depends on OPERATION:
9280   For file I/O, TARGET is a file name (except for the special case below).
9281   For process I/O, TARGET is a process name.
9282   For network I/O, TARGET is a service name or a port number.
9283
9284 This function looks up what is specified for TARGET in
9285 `file-coding-system-alist', `process-coding-system-alist',
9286 or `network-coding-system-alist' depending on OPERATION.
9287 They may specify a coding system, a cons of coding systems,
9288 or a function symbol to call.
9289 In the last case, we call the function with one argument,
9290 which is a list of all the arguments given to this function.
9291 If the function can't decide a coding system, it can return
9292 `undecided' so that the normal code-detection is performed.
9293
9294 If OPERATION is `insert-file-contents', the argument corresponding to
9295 TARGET may be a cons (FILENAME . BUFFER).  In that case, FILENAME is a
9296 file name to look up, and BUFFER is a buffer that contains the file's
9297 contents (not yet decoded).  If `file-coding-system-alist' specifies a
9298 function to call for FILENAME, that function should examine the
9299 contents of BUFFER instead of reading the file.
9300
9301 usage: (find-operation-coding-system OPERATION ARGUMENTS...)  */)
9302   (int nargs, Lisp_Object *args)
9303 {
9304   Lisp_Object operation, target_idx, target, val;
9305   register Lisp_Object chain;
9306
9307   if (nargs < 2)
9308     error ("Too few arguments");
9309   operation = args[0];
9310   if (!SYMBOLP (operation)
9311       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
9312     error ("Invalid first argument");
9313   if (nargs < 1 + XINT (target_idx))
9314     error ("Too few arguments for operation: %s",
9315            SDATA (SYMBOL_NAME (operation)));
9316   target = args[XINT (target_idx) + 1];
9317   if (!(STRINGP (target)
9318         || (EQ (operation, Qinsert_file_contents) && CONSP (target)
9319             && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
9320         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
9321     error ("Invalid %dth argument", XINT (target_idx) + 1);
9322   if (CONSP (target))
9323     target = XCAR (target);
9324
9325   chain = ((EQ (operation, Qinsert_file_contents)
9326             || EQ (operation, Qwrite_region))
9327            ? Vfile_coding_system_alist
9328            : (EQ (operation, Qopen_network_stream)
9329               ? Vnetwork_coding_system_alist
9330               : Vprocess_coding_system_alist));
9331   if (NILP (chain))
9332     return Qnil;
9333
9334   for (; CONSP (chain); chain = XCDR (chain))
9335     {
9336       Lisp_Object elt;
9337
9338       elt = XCAR (chain);
9339       if (CONSP (elt)
9340           && ((STRINGP (target)
9341                && STRINGP (XCAR (elt))
9342                && fast_string_match (XCAR (elt), target) >= 0)
9343               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
9344         {
9345           val = XCDR (elt);
9346           /* Here, if VAL is both a valid coding system and a valid
9347              function symbol, we return VAL as a coding system.  */
9348           if (CONSP (val))
9349             return val;
9350           if (! SYMBOLP (val))
9351             return Qnil;
9352           if (! NILP (Fcoding_system_p (val)))
9353             return Fcons (val, val);
9354           if (! NILP (Ffboundp (val)))
9355             {
9356               /* We use call1 rather than safe_call1
9357                  so as to get bug reports about functions called here
9358                  which don't handle the current interface.  */
9359               val = call1 (val, Flist (nargs, args));
9360               if (CONSP (val))
9361                 return val;
9362               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
9363                 return Fcons (val, val);
9364             }
9365           return Qnil;
9366         }
9367     }
9368   return Qnil;
9369 }
9370
9371 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
9372        Sset_coding_system_priority, 0, MANY, 0,
9373        doc: /* Assign higher priority to the coding systems given as arguments.
9374 If multiple coding systems belong to the same category,
9375 all but the first one are ignored.
9376
9377 usage: (set-coding-system-priority &rest coding-systems)  */)
9378   (int nargs, Lisp_Object *args)
9379 {
9380   int i, j;
9381   int changed[coding_category_max];
9382   enum coding_category priorities[coding_category_max];
9383
9384   memset (changed, 0, sizeof changed);
9385
9386   for (i = j = 0; i < nargs; i++)
9387     {
9388       enum coding_category category;
9389       Lisp_Object spec, attrs;
9390
9391       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
9392       attrs = AREF (spec, 0);
9393       category = XINT (CODING_ATTR_CATEGORY (attrs));
9394       if (changed[category])
9395         /* Ignore this coding system because a coding system of the
9396            same category already had a higher priority.  */
9397         continue;
9398       changed[category] = 1;
9399       priorities[j++] = category;
9400       if (coding_categories[category].id >= 0
9401           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
9402         setup_coding_system (args[i], &coding_categories[category]);
9403       Fset (AREF (Vcoding_category_table, category), args[i]);
9404     }
9405
9406   /* Now we have decided top J priorities.  Reflect the order of the
9407      original priorities to the remaining priorities.  */
9408
9409   for (i = j, j = 0; i < coding_category_max; i++, j++)
9410     {
9411       while (j < coding_category_max
9412              && changed[coding_priorities[j]])
9413         j++;
9414       if (j == coding_category_max)
9415         abort ();
9416       priorities[i] = coding_priorities[j];
9417     }
9418
9419   memcpy (coding_priorities, priorities, sizeof priorities);
9420
9421   /* Update `coding-category-list'.  */
9422   Vcoding_category_list = Qnil;
9423   for (i = coding_category_max - 1; i >= 0; i--)
9424     Vcoding_category_list
9425       = Fcons (AREF (Vcoding_category_table, priorities[i]),
9426                Vcoding_category_list);
9427
9428   return Qnil;
9429 }
9430
9431 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
9432        Scoding_system_priority_list, 0, 1, 0,
9433        doc: /* Return a list of coding systems ordered by their priorities.
9434 The list contains a subset of coding systems; i.e. coding systems
9435 assigned to each coding category (see `coding-category-list').
9436
9437 HIGHESTP non-nil means just return the highest priority one.  */)
9438   (Lisp_Object highestp)
9439 {
9440   int i;
9441   Lisp_Object val;
9442
9443   for (i = 0, val = Qnil; i < coding_category_max; i++)
9444     {
9445       enum coding_category category = coding_priorities[i];
9446       int id = coding_categories[category].id;
9447       Lisp_Object attrs;
9448
9449       if (id < 0)
9450         continue;
9451       attrs = CODING_ID_ATTRS (id);
9452       if (! NILP (highestp))
9453         return CODING_ATTR_BASE_NAME (attrs);
9454       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
9455     }
9456   return Fnreverse (val);
9457 }
9458
9459 static const char *const suffixes[] = { "-unix", "-dos", "-mac" };
9460
9461 static Lisp_Object
9462 make_subsidiaries (Lisp_Object base)
9463 {
9464   Lisp_Object subsidiaries;
9465   int base_name_len = SBYTES (SYMBOL_NAME (base));
9466   char *buf = (char *) alloca (base_name_len + 6);
9467   int i;
9468
9469   memcpy (buf, SDATA (SYMBOL_NAME (base)), base_name_len);
9470   subsidiaries = Fmake_vector (make_number (3), Qnil);
9471   for (i = 0; i < 3; i++)
9472     {
9473       memcpy (buf + base_name_len, suffixes[i], strlen (suffixes[i]) + 1);
9474       ASET (subsidiaries, i, intern (buf));
9475     }
9476   return subsidiaries;
9477 }
9478
9479
9480 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
9481        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
9482        doc: /* For internal use only.
9483 usage: (define-coding-system-internal ...)  */)
9484   (int nargs, Lisp_Object *args)
9485 {
9486   Lisp_Object name;
9487   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
9488   Lisp_Object attrs;            /* Vector of attributes.  */
9489   Lisp_Object eol_type;
9490   Lisp_Object aliases;
9491   Lisp_Object coding_type, charset_list, safe_charsets;
9492   enum coding_category category;
9493   Lisp_Object tail, val;
9494   int max_charset_id = 0;
9495   int i;
9496
9497   if (nargs < coding_arg_max)
9498     goto short_args;
9499
9500   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
9501
9502   name = args[coding_arg_name];
9503   CHECK_SYMBOL (name);
9504   CODING_ATTR_BASE_NAME (attrs) = name;
9505
9506   val = args[coding_arg_mnemonic];
9507   if (! STRINGP (val))
9508     CHECK_CHARACTER (val);
9509   CODING_ATTR_MNEMONIC (attrs) = val;
9510
9511   coding_type = args[coding_arg_coding_type];
9512   CHECK_SYMBOL (coding_type);
9513   CODING_ATTR_TYPE (attrs) = coding_type;
9514
9515   charset_list = args[coding_arg_charset_list];
9516   if (SYMBOLP (charset_list))
9517     {
9518       if (EQ (charset_list, Qiso_2022))
9519         {
9520           if (! EQ (coding_type, Qiso_2022))
9521             error ("Invalid charset-list");
9522           charset_list = Viso_2022_charset_list;
9523         }
9524       else if (EQ (charset_list, Qemacs_mule))
9525         {
9526           if (! EQ (coding_type, Qemacs_mule))
9527             error ("Invalid charset-list");
9528           charset_list = Vemacs_mule_charset_list;
9529         }
9530       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9531         if (max_charset_id < XFASTINT (XCAR (tail)))
9532           max_charset_id = XFASTINT (XCAR (tail));
9533     }
9534   else
9535     {
9536       charset_list = Fcopy_sequence (charset_list);
9537       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9538         {
9539           struct charset *charset;
9540
9541           val = XCAR (tail);
9542           CHECK_CHARSET_GET_CHARSET (val, charset);
9543           if (EQ (coding_type, Qiso_2022)
9544               ? CHARSET_ISO_FINAL (charset) < 0
9545               : EQ (coding_type, Qemacs_mule)
9546               ? CHARSET_EMACS_MULE_ID (charset) < 0
9547               : 0)
9548             error ("Can't handle charset `%s'",
9549                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9550
9551           XSETCAR (tail, make_number (charset->id));
9552           if (max_charset_id < charset->id)
9553             max_charset_id = charset->id;
9554         }
9555     }
9556   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
9557
9558   safe_charsets = make_uninit_string (max_charset_id + 1);
9559   memset (SDATA (safe_charsets), 255, max_charset_id + 1);
9560   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9561     SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
9562   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
9563
9564   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
9565
9566   val = args[coding_arg_decode_translation_table];
9567   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9568     CHECK_SYMBOL (val);
9569   CODING_ATTR_DECODE_TBL (attrs) = val;
9570
9571   val = args[coding_arg_encode_translation_table];
9572   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9573     CHECK_SYMBOL (val);
9574   CODING_ATTR_ENCODE_TBL (attrs) = val;
9575
9576   val = args[coding_arg_post_read_conversion];
9577   CHECK_SYMBOL (val);
9578   CODING_ATTR_POST_READ (attrs) = val;
9579
9580   val = args[coding_arg_pre_write_conversion];
9581   CHECK_SYMBOL (val);
9582   CODING_ATTR_PRE_WRITE (attrs) = val;
9583
9584   val = args[coding_arg_default_char];
9585   if (NILP (val))
9586     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
9587   else
9588     {
9589       CHECK_CHARACTER (val);
9590       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9591     }
9592
9593   val = args[coding_arg_for_unibyte];
9594   CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
9595
9596   val = args[coding_arg_plist];
9597   CHECK_LIST (val);
9598   CODING_ATTR_PLIST (attrs) = val;
9599
9600   if (EQ (coding_type, Qcharset))
9601     {
9602       /* Generate a lisp vector of 256 elements.  Each element is nil,
9603          integer, or a list of charset IDs.
9604
9605          If Nth element is nil, the byte code N is invalid in this
9606          coding system.
9607
9608          If Nth element is a number NUM, N is the first byte of a
9609          charset whose ID is NUM.
9610
9611          If Nth element is a list of charset IDs, N is the first byte
9612          of one of them.  The list is sorted by dimensions of the
9613          charsets.  A charset of smaller dimension comes first. */
9614       val = Fmake_vector (make_number (256), Qnil);
9615
9616       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9617         {
9618           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
9619           int dim = CHARSET_DIMENSION (charset);
9620           int idx = (dim - 1) * 4;
9621
9622           if (CHARSET_ASCII_COMPATIBLE_P (charset))
9623             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9624
9625           for (i = charset->code_space[idx];
9626                i <= charset->code_space[idx + 1]; i++)
9627             {
9628               Lisp_Object tmp, tmp2;
9629               int dim2;
9630
9631               tmp = AREF (val, i);
9632               if (NILP (tmp))
9633                 tmp = XCAR (tail);
9634               else if (NUMBERP (tmp))
9635                 {
9636                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
9637                   if (dim < dim2)
9638                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
9639                   else
9640                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
9641                 }
9642               else
9643                 {
9644                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
9645                     {
9646                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
9647                       if (dim < dim2)
9648                         break;
9649                     }
9650                   if (NILP (tmp2))
9651                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
9652                   else
9653                     {
9654                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
9655                       XSETCAR (tmp2, XCAR (tail));
9656                     }
9657                 }
9658               ASET (val, i, tmp);
9659             }
9660         }
9661       ASET (attrs, coding_attr_charset_valids, val);
9662       category = coding_category_charset;
9663     }
9664   else if (EQ (coding_type, Qccl))
9665     {
9666       Lisp_Object valids;
9667
9668       if (nargs < coding_arg_ccl_max)
9669         goto short_args;
9670
9671       val = args[coding_arg_ccl_decoder];
9672       CHECK_CCL_PROGRAM (val);
9673       if (VECTORP (val))
9674         val = Fcopy_sequence (val);
9675       ASET (attrs, coding_attr_ccl_decoder, val);
9676
9677       val = args[coding_arg_ccl_encoder];
9678       CHECK_CCL_PROGRAM (val);
9679       if (VECTORP (val))
9680         val = Fcopy_sequence (val);
9681       ASET (attrs, coding_attr_ccl_encoder, val);
9682
9683       val = args[coding_arg_ccl_valids];
9684       valids = Fmake_string (make_number (256), make_number (0));
9685       for (tail = val; !NILP (tail); tail = Fcdr (tail))
9686         {
9687           int from, to;
9688
9689           val = Fcar (tail);
9690           if (INTEGERP (val))
9691             {
9692               from = to = XINT (val);
9693               if (from < 0 || from > 255)
9694                 args_out_of_range_3 (val, make_number (0), make_number (255));
9695             }
9696           else
9697             {
9698               CHECK_CONS (val);
9699               CHECK_NATNUM_CAR (val);
9700               CHECK_NATNUM_CDR (val);
9701               from = XINT (XCAR (val));
9702               if (from > 255)
9703                 args_out_of_range_3 (XCAR (val),
9704                                      make_number (0), make_number (255));
9705               to = XINT (XCDR (val));
9706               if (to < from || to > 255)
9707                 args_out_of_range_3 (XCDR (val),
9708                                      XCAR (val), make_number (255));
9709             }
9710           for (i = from; i <= to; i++)
9711             SSET (valids, i, 1);
9712         }
9713       ASET (attrs, coding_attr_ccl_valids, valids);
9714
9715       category = coding_category_ccl;
9716     }
9717   else if (EQ (coding_type, Qutf_16))
9718     {
9719       Lisp_Object bom, endian;
9720
9721       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9722
9723       if (nargs < coding_arg_utf16_max)
9724         goto short_args;
9725
9726       bom = args[coding_arg_utf16_bom];
9727       if (! NILP (bom) && ! EQ (bom, Qt))
9728         {
9729           CHECK_CONS (bom);
9730           val = XCAR (bom);
9731           CHECK_CODING_SYSTEM (val);
9732           val = XCDR (bom);
9733           CHECK_CODING_SYSTEM (val);
9734         }
9735       ASET (attrs, coding_attr_utf_bom, bom);
9736
9737       endian = args[coding_arg_utf16_endian];
9738       CHECK_SYMBOL (endian);
9739       if (NILP (endian))
9740         endian = Qbig;
9741       else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
9742         error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
9743       ASET (attrs, coding_attr_utf_16_endian, endian);
9744
9745       category = (CONSP (bom)
9746                   ? coding_category_utf_16_auto
9747                   : NILP (bom)
9748                   ? (EQ (endian, Qbig)
9749                      ? coding_category_utf_16_be_nosig
9750                      : coding_category_utf_16_le_nosig)
9751                   : (EQ (endian, Qbig)
9752                      ? coding_category_utf_16_be
9753                      : coding_category_utf_16_le));
9754     }
9755   else if (EQ (coding_type, Qiso_2022))
9756     {
9757       Lisp_Object initial, reg_usage, request, flags;
9758
9759       if (nargs < coding_arg_iso2022_max)
9760         goto short_args;
9761
9762       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
9763       CHECK_VECTOR (initial);
9764       for (i = 0; i < 4; i++)
9765         {
9766           val = Faref (initial, make_number (i));
9767           if (! NILP (val))
9768             {
9769               struct charset *charset;
9770
9771               CHECK_CHARSET_GET_CHARSET (val, charset);
9772               ASET (initial, i, make_number (CHARSET_ID (charset)));
9773               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
9774                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9775             }
9776           else
9777             ASET (initial, i, make_number (-1));
9778         }
9779
9780       reg_usage = args[coding_arg_iso2022_reg_usage];
9781       CHECK_CONS (reg_usage);
9782       CHECK_NUMBER_CAR (reg_usage);
9783       CHECK_NUMBER_CDR (reg_usage);
9784
9785       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
9786       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
9787         {
9788           int id;
9789           Lisp_Object tmp1;
9790
9791           val = Fcar (tail);
9792           CHECK_CONS (val);
9793           tmp1 = XCAR (val);
9794           CHECK_CHARSET_GET_ID (tmp1, id);
9795           CHECK_NATNUM_CDR (val);
9796           if (XINT (XCDR (val)) >= 4)
9797             error ("Invalid graphic register number: %d", XINT (XCDR (val)));
9798           XSETCAR (val, make_number (id));
9799         }
9800
9801       flags = args[coding_arg_iso2022_flags];
9802       CHECK_NATNUM (flags);
9803       i = XINT (flags);
9804       if (EQ (args[coding_arg_charset_list], Qiso_2022))
9805         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
9806
9807       ASET (attrs, coding_attr_iso_initial, initial);
9808       ASET (attrs, coding_attr_iso_usage, reg_usage);
9809       ASET (attrs, coding_attr_iso_request, request);
9810       ASET (attrs, coding_attr_iso_flags, flags);
9811       setup_iso_safe_charsets (attrs);
9812
9813       if (i & CODING_ISO_FLAG_SEVEN_BITS)
9814         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
9815                           | CODING_ISO_FLAG_SINGLE_SHIFT))
9816                     ? coding_category_iso_7_else
9817                     : EQ (args[coding_arg_charset_list], Qiso_2022)
9818                     ? coding_category_iso_7
9819                     : coding_category_iso_7_tight);
9820       else
9821         {
9822           int id = XINT (AREF (initial, 1));
9823
9824           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
9825                        || EQ (args[coding_arg_charset_list], Qiso_2022)
9826                        || id < 0)
9827                       ? coding_category_iso_8_else
9828                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
9829                       ? coding_category_iso_8_1
9830                       : coding_category_iso_8_2);
9831         }
9832       if (category != coding_category_iso_8_1
9833           && category != coding_category_iso_8_2)
9834         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9835     }
9836   else if (EQ (coding_type, Qemacs_mule))
9837     {
9838       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
9839         ASET (attrs, coding_attr_emacs_mule_full, Qt);
9840       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9841       category = coding_category_emacs_mule;
9842     }
9843   else if (EQ (coding_type, Qshift_jis))
9844     {
9845
9846       struct charset *charset;
9847
9848       if (XINT (Flength (charset_list)) != 3
9849           && XINT (Flength (charset_list)) != 4)
9850         error ("There should be three or four charsets");
9851
9852       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9853       if (CHARSET_DIMENSION (charset) != 1)
9854         error ("Dimension of charset %s is not one",
9855                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9856       if (CHARSET_ASCII_COMPATIBLE_P (charset))
9857         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9858
9859       charset_list = XCDR (charset_list);
9860       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9861       if (CHARSET_DIMENSION (charset) != 1)
9862         error ("Dimension of charset %s is not one",
9863                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9864
9865       charset_list = XCDR (charset_list);
9866       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9867       if (CHARSET_DIMENSION (charset) != 2)
9868         error ("Dimension of charset %s is not two",
9869                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9870
9871       charset_list = XCDR (charset_list);
9872       if (! NILP (charset_list))
9873         {
9874           charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9875           if (CHARSET_DIMENSION (charset) != 2)
9876             error ("Dimension of charset %s is not two",
9877                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9878         }
9879
9880       category = coding_category_sjis;
9881       Vsjis_coding_system = name;
9882     }
9883   else if (EQ (coding_type, Qbig5))
9884     {
9885       struct charset *charset;
9886
9887       if (XINT (Flength (charset_list)) != 2)
9888         error ("There should be just two charsets");
9889
9890       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9891       if (CHARSET_DIMENSION (charset) != 1)
9892         error ("Dimension of charset %s is not one",
9893                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9894       if (CHARSET_ASCII_COMPATIBLE_P (charset))
9895         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9896
9897       charset_list = XCDR (charset_list);
9898       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9899       if (CHARSET_DIMENSION (charset) != 2)
9900         error ("Dimension of charset %s is not two",
9901                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9902
9903       category = coding_category_big5;
9904       Vbig5_coding_system = name;
9905     }
9906   else if (EQ (coding_type, Qraw_text))
9907     {
9908       category = coding_category_raw_text;
9909       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9910     }
9911   else if (EQ (coding_type, Qutf_8))
9912     {
9913       Lisp_Object bom;
9914
9915       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9916
9917       if (nargs < coding_arg_utf8_max)
9918         goto short_args;
9919
9920       bom = args[coding_arg_utf8_bom];
9921       if (! NILP (bom) && ! EQ (bom, Qt))
9922         {
9923           CHECK_CONS (bom);
9924           val = XCAR (bom);
9925           CHECK_CODING_SYSTEM (val);
9926           val = XCDR (bom);
9927           CHECK_CODING_SYSTEM (val);
9928         }
9929       ASET (attrs, coding_attr_utf_bom, bom);
9930
9931       category = (CONSP (bom) ? coding_category_utf_8_auto
9932                   : NILP (bom) ? coding_category_utf_8_nosig
9933                   : coding_category_utf_8_sig);
9934     }
9935   else if (EQ (coding_type, Qundecided))
9936     category = coding_category_undecided;
9937   else
9938     error ("Invalid coding system type: %s",
9939            SDATA (SYMBOL_NAME (coding_type)));
9940
9941   CODING_ATTR_CATEGORY (attrs) = make_number (category);
9942   CODING_ATTR_PLIST (attrs)
9943     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
9944                                 CODING_ATTR_PLIST (attrs)));
9945   CODING_ATTR_PLIST (attrs)
9946     = Fcons (QCascii_compatible_p,
9947              Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
9948                     CODING_ATTR_PLIST (attrs)));
9949
9950   eol_type = args[coding_arg_eol_type];
9951   if (! NILP (eol_type)
9952       && ! EQ (eol_type, Qunix)
9953       && ! EQ (eol_type, Qdos)
9954       && ! EQ (eol_type, Qmac))
9955     error ("Invalid eol-type");
9956
9957   aliases = Fcons (name, Qnil);
9958
9959   if (NILP (eol_type))
9960     {
9961       eol_type = make_subsidiaries (name);
9962       for (i = 0; i < 3; i++)
9963         {
9964           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
9965
9966           this_name = AREF (eol_type, i);
9967           this_aliases = Fcons (this_name, Qnil);
9968           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
9969           this_spec = Fmake_vector (make_number (3), attrs);
9970           ASET (this_spec, 1, this_aliases);
9971           ASET (this_spec, 2, this_eol_type);
9972           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
9973           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
9974           val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist);
9975           if (NILP (val))
9976             Vcoding_system_alist
9977               = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
9978                        Vcoding_system_alist);
9979         }
9980     }
9981
9982   spec_vec = Fmake_vector (make_number (3), attrs);
9983   ASET (spec_vec, 1, aliases);
9984   ASET (spec_vec, 2, eol_type);
9985
9986   Fputhash (name, spec_vec, Vcoding_system_hash_table);
9987   Vcoding_system_list = Fcons (name, Vcoding_system_list);
9988   val = Fassoc (Fsymbol_name (name), Vcoding_system_alist);
9989   if (NILP (val))
9990     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
9991                                   Vcoding_system_alist);
9992
9993   {
9994     int id = coding_categories[category].id;
9995
9996     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
9997       setup_coding_system (name, &coding_categories[category]);
9998   }
9999
10000   return Qnil;
10001
10002  short_args:
10003   return Fsignal (Qwrong_number_of_arguments,
10004                   Fcons (intern ("define-coding-system-internal"),
10005                          make_number (nargs)));
10006 }
10007
10008
10009 DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
10010        3, 3, 0,
10011        doc: /* Change value in CODING-SYSTEM's property list PROP to VAL.  */)
10012   (Lisp_Object coding_system, Lisp_Object prop, Lisp_Object val)
10013 {
10014   Lisp_Object spec, attrs;
10015
10016   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10017   attrs = AREF (spec, 0);
10018   if (EQ (prop, QCmnemonic))
10019     {
10020       if (! STRINGP (val))
10021         CHECK_CHARACTER (val);
10022       CODING_ATTR_MNEMONIC (attrs) = val;
10023     }
10024   else if (EQ (prop, QCdefault_char))
10025     {
10026       if (NILP (val))
10027         val = make_number (' ');
10028       else
10029         CHECK_CHARACTER (val);
10030       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
10031     }
10032   else if (EQ (prop, QCdecode_translation_table))
10033     {
10034       if (! CHAR_TABLE_P (val) && ! CONSP (val))
10035         CHECK_SYMBOL (val);
10036       CODING_ATTR_DECODE_TBL (attrs) = val;
10037     }
10038   else if (EQ (prop, QCencode_translation_table))
10039     {
10040       if (! CHAR_TABLE_P (val) && ! CONSP (val))
10041         CHECK_SYMBOL (val);
10042       CODING_ATTR_ENCODE_TBL (attrs) = val;
10043     }
10044   else if (EQ (prop, QCpost_read_conversion))
10045     {
10046       CHECK_SYMBOL (val);
10047       CODING_ATTR_POST_READ (attrs) = val;
10048     }
10049   else if (EQ (prop, QCpre_write_conversion))
10050     {
10051       CHECK_SYMBOL (val);
10052       CODING_ATTR_PRE_WRITE (attrs) = val;
10053     }
10054   else if (EQ (prop, QCascii_compatible_p))
10055     {
10056       CODING_ATTR_ASCII_COMPAT (attrs) = val;
10057     }
10058
10059   CODING_ATTR_PLIST (attrs)
10060     = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
10061   return val;
10062 }
10063
10064
10065 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
10066        Sdefine_coding_system_alias, 2, 2, 0,
10067        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
10068   (Lisp_Object alias, Lisp_Object coding_system)
10069 {
10070   Lisp_Object spec, aliases, eol_type, val;
10071
10072   CHECK_SYMBOL (alias);
10073   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10074   aliases = AREF (spec, 1);
10075   /* ALIASES should be a list of length more than zero, and the first
10076      element is a base coding system.  Append ALIAS at the tail of the
10077      list.  */
10078   while (!NILP (XCDR (aliases)))
10079     aliases = XCDR (aliases);
10080   XSETCDR (aliases, Fcons (alias, Qnil));
10081
10082   eol_type = AREF (spec, 2);
10083   if (VECTORP (eol_type))
10084     {
10085       Lisp_Object subsidiaries;
10086       int i;
10087
10088       subsidiaries = make_subsidiaries (alias);
10089       for (i = 0; i < 3; i++)
10090         Fdefine_coding_system_alias (AREF (subsidiaries, i),
10091                                      AREF (eol_type, i));
10092     }
10093
10094   Fputhash (alias, spec, Vcoding_system_hash_table);
10095   Vcoding_system_list = Fcons (alias, Vcoding_system_list);
10096   val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist);
10097   if (NILP (val))
10098     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
10099                                   Vcoding_system_alist);
10100
10101   return Qnil;
10102 }
10103
10104 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
10105        1, 1, 0,
10106        doc: /* Return the base of CODING-SYSTEM.
10107 Any alias or subsidiary coding system is not a base coding system.  */)
10108   (Lisp_Object coding_system)
10109 {
10110   Lisp_Object spec, attrs;
10111
10112   if (NILP (coding_system))
10113     return (Qno_conversion);
10114   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10115   attrs = AREF (spec, 0);
10116   return CODING_ATTR_BASE_NAME (attrs);
10117 }
10118
10119 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
10120        1, 1, 0,
10121        doc: "Return the property list of CODING-SYSTEM.")
10122   (Lisp_Object coding_system)
10123 {
10124   Lisp_Object spec, attrs;
10125
10126   if (NILP (coding_system))
10127     coding_system = Qno_conversion;
10128   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10129   attrs = AREF (spec, 0);
10130   return CODING_ATTR_PLIST (attrs);
10131 }
10132
10133
10134 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
10135        1, 1, 0,
10136        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
10137   (Lisp_Object coding_system)
10138 {
10139   Lisp_Object spec;
10140
10141   if (NILP (coding_system))
10142     coding_system = Qno_conversion;
10143   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10144   return AREF (spec, 1);
10145 }
10146
10147 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
10148        Scoding_system_eol_type, 1, 1, 0,
10149        doc: /* Return eol-type of CODING-SYSTEM.
10150 An eol-type is an integer 0, 1, 2, or a vector of coding systems.
10151
10152 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
10153 and CR respectively.
10154
10155 A vector value indicates that a format of end-of-line should be
10156 detected automatically.  Nth element of the vector is the subsidiary
10157 coding system whose eol-type is N.  */)
10158   (Lisp_Object coding_system)
10159 {
10160   Lisp_Object spec, eol_type;
10161   int n;
10162
10163   if (NILP (coding_system))
10164     coding_system = Qno_conversion;
10165   if (! CODING_SYSTEM_P (coding_system))
10166     return Qnil;
10167   spec = CODING_SYSTEM_SPEC (coding_system);
10168   eol_type = AREF (spec, 2);
10169   if (VECTORP (eol_type))
10170     return Fcopy_sequence (eol_type);
10171   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
10172   return make_number (n);
10173 }
10174
10175 #endif /* emacs */
10176
10177 \f
10178 /*** 9. Post-amble ***/
10179
10180 void
10181 init_coding_once (void)
10182 {
10183   int i;
10184
10185   for (i = 0; i < coding_category_max; i++)
10186     {
10187       coding_categories[i].id = -1;
10188       coding_priorities[i] = i;
10189     }
10190
10191   /* ISO2022 specific initialize routine.  */
10192   for (i = 0; i < 0x20; i++)
10193     iso_code_class[i] = ISO_control_0;
10194   for (i = 0x21; i < 0x7F; i++)
10195     iso_code_class[i] = ISO_graphic_plane_0;
10196   for (i = 0x80; i < 0xA0; i++)
10197     iso_code_class[i] = ISO_control_1;
10198   for (i = 0xA1; i < 0xFF; i++)
10199     iso_code_class[i] = ISO_graphic_plane_1;
10200   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
10201   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
10202   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
10203   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
10204   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
10205   iso_code_class[ISO_CODE_ESC] = ISO_escape;
10206   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
10207   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
10208   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
10209
10210   for (i = 0; i < 256; i++)
10211     {
10212       emacs_mule_bytes[i] = 1;
10213     }
10214   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
10215   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
10216   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
10217   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
10218 }
10219
10220 #ifdef emacs
10221
10222 void
10223 syms_of_coding (void)
10224 {
10225   staticpro (&Vcoding_system_hash_table);
10226   {
10227     Lisp_Object args[2];
10228     args[0] = QCtest;
10229     args[1] = Qeq;
10230     Vcoding_system_hash_table = Fmake_hash_table (2, args);
10231   }
10232
10233   staticpro (&Vsjis_coding_system);
10234   Vsjis_coding_system = Qnil;
10235
10236   staticpro (&Vbig5_coding_system);
10237   Vbig5_coding_system = Qnil;
10238
10239   staticpro (&Vcode_conversion_reused_workbuf);
10240   Vcode_conversion_reused_workbuf = Qnil;
10241
10242   staticpro (&Vcode_conversion_workbuf_name);
10243   Vcode_conversion_workbuf_name = make_pure_c_string (" *code-conversion-work*");
10244
10245   reused_workbuf_in_use = 0;
10246
10247   DEFSYM (Qcharset, "charset");
10248   DEFSYM (Qtarget_idx, "target-idx");
10249   DEFSYM (Qcoding_system_history, "coding-system-history");
10250   Fset (Qcoding_system_history, Qnil);
10251
10252   /* Target FILENAME is the first argument.  */
10253   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
10254   /* Target FILENAME is the third argument.  */
10255   Fput (Qwrite_region, Qtarget_idx, make_number (2));
10256
10257   DEFSYM (Qcall_process, "call-process");
10258   /* Target PROGRAM is the first argument.  */
10259   Fput (Qcall_process, Qtarget_idx, make_number (0));
10260
10261   DEFSYM (Qcall_process_region, "call-process-region");
10262   /* Target PROGRAM is the third argument.  */
10263   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
10264
10265   DEFSYM (Qstart_process, "start-process");
10266   /* Target PROGRAM is the third argument.  */
10267   Fput (Qstart_process, Qtarget_idx, make_number (2));
10268
10269   DEFSYM (Qopen_network_stream, "open-network-stream");
10270   /* Target SERVICE is the fourth argument.  */
10271   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
10272
10273   DEFSYM (Qcoding_system, "coding-system");
10274   DEFSYM (Qcoding_aliases, "coding-aliases");
10275
10276   DEFSYM (Qeol_type, "eol-type");
10277   DEFSYM (Qunix, "unix");
10278   DEFSYM (Qdos, "dos");
10279
10280   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
10281   DEFSYM (Qpost_read_conversion, "post-read-conversion");
10282   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
10283   DEFSYM (Qdefault_char, "default-char");
10284   DEFSYM (Qundecided, "undecided");
10285   DEFSYM (Qno_conversion, "no-conversion");
10286   DEFSYM (Qraw_text, "raw-text");
10287
10288   DEFSYM (Qiso_2022, "iso-2022");
10289
10290   DEFSYM (Qutf_8, "utf-8");
10291   DEFSYM (Qutf_8_emacs, "utf-8-emacs");
10292
10293   DEFSYM (Qutf_16, "utf-16");
10294   DEFSYM (Qbig, "big");
10295   DEFSYM (Qlittle, "little");
10296
10297   DEFSYM (Qshift_jis, "shift-jis");
10298   DEFSYM (Qbig5, "big5");
10299
10300   DEFSYM (Qcoding_system_p, "coding-system-p");
10301
10302   DEFSYM (Qcoding_system_error, "coding-system-error");
10303   Fput (Qcoding_system_error, Qerror_conditions,
10304         pure_cons (Qcoding_system_error, pure_cons (Qerror, Qnil)));
10305   Fput (Qcoding_system_error, Qerror_message,
10306         make_pure_c_string ("Invalid coding system"));
10307
10308   /* Intern this now in case it isn't already done.
10309      Setting this variable twice is harmless.
10310      But don't staticpro it here--that is done in alloc.c.  */
10311   Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
10312
10313   DEFSYM (Qtranslation_table, "translation-table");
10314   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
10315   DEFSYM (Qtranslation_table_id, "translation-table-id");
10316   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
10317   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
10318
10319   DEFSYM (Qvalid_codes, "valid-codes");
10320
10321   DEFSYM (Qemacs_mule, "emacs-mule");
10322
10323   DEFSYM (QCcategory, ":category");
10324   DEFSYM (QCmnemonic, ":mnemonic");
10325   DEFSYM (QCdefault_char, ":default-char");
10326   DEFSYM (QCdecode_translation_table, ":decode-translation-table");
10327   DEFSYM (QCencode_translation_table, ":encode-translation-table");
10328   DEFSYM (QCpost_read_conversion, ":post-read-conversion");
10329   DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
10330   DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
10331
10332   Vcoding_category_table
10333     = Fmake_vector (make_number (coding_category_max), Qnil);
10334   staticpro (&Vcoding_category_table);
10335   /* Followings are target of code detection.  */
10336   ASET (Vcoding_category_table, coding_category_iso_7,
10337         intern_c_string ("coding-category-iso-7"));
10338   ASET (Vcoding_category_table, coding_category_iso_7_tight,
10339         intern_c_string ("coding-category-iso-7-tight"));
10340   ASET (Vcoding_category_table, coding_category_iso_8_1,
10341         intern_c_string ("coding-category-iso-8-1"));
10342   ASET (Vcoding_category_table, coding_category_iso_8_2,
10343         intern_c_string ("coding-category-iso-8-2"));
10344   ASET (Vcoding_category_table, coding_category_iso_7_else,
10345         intern_c_string ("coding-category-iso-7-else"));
10346   ASET (Vcoding_category_table, coding_category_iso_8_else,
10347         intern_c_string ("coding-category-iso-8-else"));
10348   ASET (Vcoding_category_table, coding_category_utf_8_auto,
10349         intern_c_string ("coding-category-utf-8-auto"));
10350   ASET (Vcoding_category_table, coding_category_utf_8_nosig,
10351         intern_c_string ("coding-category-utf-8"));
10352   ASET (Vcoding_category_table, coding_category_utf_8_sig,
10353         intern_c_string ("coding-category-utf-8-sig"));
10354   ASET (Vcoding_category_table, coding_category_utf_16_be,
10355         intern_c_string ("coding-category-utf-16-be"));
10356   ASET (Vcoding_category_table, coding_category_utf_16_auto,
10357         intern_c_string ("coding-category-utf-16-auto"));
10358   ASET (Vcoding_category_table, coding_category_utf_16_le,
10359         intern_c_string ("coding-category-utf-16-le"));
10360   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
10361         intern_c_string ("coding-category-utf-16-be-nosig"));
10362   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
10363         intern_c_string ("coding-category-utf-16-le-nosig"));
10364   ASET (Vcoding_category_table, coding_category_charset,
10365         intern_c_string ("coding-category-charset"));
10366   ASET (Vcoding_category_table, coding_category_sjis,
10367         intern_c_string ("coding-category-sjis"));
10368   ASET (Vcoding_category_table, coding_category_big5,
10369         intern_c_string ("coding-category-big5"));
10370   ASET (Vcoding_category_table, coding_category_ccl,
10371         intern_c_string ("coding-category-ccl"));
10372   ASET (Vcoding_category_table, coding_category_emacs_mule,
10373         intern_c_string ("coding-category-emacs-mule"));
10374   /* Followings are NOT target of code detection.  */
10375   ASET (Vcoding_category_table, coding_category_raw_text,
10376         intern_c_string ("coding-category-raw-text"));
10377   ASET (Vcoding_category_table, coding_category_undecided,
10378         intern_c_string ("coding-category-undecided"));
10379
10380   DEFSYM (Qinsufficient_source, "insufficient-source");
10381   DEFSYM (Qinconsistent_eol, "inconsistent-eol");
10382   DEFSYM (Qinvalid_source, "invalid-source");
10383   DEFSYM (Qinterrupted, "interrupted");
10384   DEFSYM (Qinsufficient_memory, "insufficient-memory");
10385   DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
10386
10387   defsubr (&Scoding_system_p);
10388   defsubr (&Sread_coding_system);
10389   defsubr (&Sread_non_nil_coding_system);
10390   defsubr (&Scheck_coding_system);
10391   defsubr (&Sdetect_coding_region);
10392   defsubr (&Sdetect_coding_string);
10393   defsubr (&Sfind_coding_systems_region_internal);
10394   defsubr (&Sunencodable_char_position);
10395   defsubr (&Scheck_coding_systems_region);
10396   defsubr (&Sdecode_coding_region);
10397   defsubr (&Sencode_coding_region);
10398   defsubr (&Sdecode_coding_string);
10399   defsubr (&Sencode_coding_string);
10400   defsubr (&Sdecode_sjis_char);
10401   defsubr (&Sencode_sjis_char);
10402   defsubr (&Sdecode_big5_char);
10403   defsubr (&Sencode_big5_char);
10404   defsubr (&Sset_terminal_coding_system_internal);
10405   defsubr (&Sset_safe_terminal_coding_system_internal);
10406   defsubr (&Sterminal_coding_system);
10407   defsubr (&Sset_keyboard_coding_system_internal);
10408   defsubr (&Skeyboard_coding_system);
10409   defsubr (&Sfind_operation_coding_system);
10410   defsubr (&Sset_coding_system_priority);
10411   defsubr (&Sdefine_coding_system_internal);
10412   defsubr (&Sdefine_coding_system_alias);
10413   defsubr (&Scoding_system_put);
10414   defsubr (&Scoding_system_base);
10415   defsubr (&Scoding_system_plist);
10416   defsubr (&Scoding_system_aliases);
10417   defsubr (&Scoding_system_eol_type);
10418   defsubr (&Scoding_system_priority_list);
10419
10420   DEFVAR_LISP ("coding-system-list", Vcoding_system_list,
10421                doc: /* List of coding systems.
10422
10423 Do not alter the value of this variable manually.  This variable should be
10424 updated by the functions `define-coding-system' and
10425 `define-coding-system-alias'.  */);
10426   Vcoding_system_list = Qnil;
10427
10428   DEFVAR_LISP ("coding-system-alist", Vcoding_system_alist,
10429                doc: /* Alist of coding system names.
10430 Each element is one element list of coding system name.
10431 This variable is given to `completing-read' as COLLECTION argument.
10432
10433 Do not alter the value of this variable manually.  This variable should be
10434 updated by the functions `make-coding-system' and
10435 `define-coding-system-alias'.  */);
10436   Vcoding_system_alist = Qnil;
10437
10438   DEFVAR_LISP ("coding-category-list", Vcoding_category_list,
10439                doc: /* List of coding-categories (symbols) ordered by priority.
10440
10441 On detecting a coding system, Emacs tries code detection algorithms
10442 associated with each coding-category one by one in this order.  When
10443 one algorithm agrees with a byte sequence of source text, the coding
10444 system bound to the corresponding coding-category is selected.
10445
10446 Don't modify this variable directly, but use `set-coding-system-priority'.  */);
10447   {
10448     int i;
10449
10450     Vcoding_category_list = Qnil;
10451     for (i = coding_category_max - 1; i >= 0; i--)
10452       Vcoding_category_list
10453         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
10454                  Vcoding_category_list);
10455   }
10456
10457   DEFVAR_LISP ("coding-system-for-read", Vcoding_system_for_read,
10458                doc: /* Specify the coding system for read operations.
10459 It is useful to bind this variable with `let', but do not set it globally.
10460 If the value is a coding system, it is used for decoding on read operation.
10461 If not, an appropriate element is used from one of the coding system alists.
10462 There are three such tables: `file-coding-system-alist',
10463 `process-coding-system-alist', and `network-coding-system-alist'.  */);
10464   Vcoding_system_for_read = Qnil;
10465
10466   DEFVAR_LISP ("coding-system-for-write", Vcoding_system_for_write,
10467                doc: /* Specify the coding system for write operations.
10468 Programs bind this variable with `let', but you should not set it globally.
10469 If the value is a coding system, it is used for encoding of output,
10470 when writing it to a file and when sending it to a file or subprocess.
10471
10472 If this does not specify a coding system, an appropriate element
10473 is used from one of the coding system alists.
10474 There are three such tables: `file-coding-system-alist',
10475 `process-coding-system-alist', and `network-coding-system-alist'.
10476 For output to files, if the above procedure does not specify a coding system,
10477 the value of `buffer-file-coding-system' is used.  */);
10478   Vcoding_system_for_write = Qnil;
10479
10480   DEFVAR_LISP ("last-coding-system-used", Vlast_coding_system_used,
10481                doc: /*
10482 Coding system used in the latest file or process I/O.  */);
10483   Vlast_coding_system_used = Qnil;
10484
10485   DEFVAR_LISP ("last-code-conversion-error", Vlast_code_conversion_error,
10486                doc: /*
10487 Error status of the last code conversion.
10488
10489 When an error was detected in the last code conversion, this variable
10490 is set to one of the following symbols.
10491   `insufficient-source'
10492   `inconsistent-eol'
10493   `invalid-source'
10494   `interrupted'
10495   `insufficient-memory'
10496 When no error was detected, the value doesn't change.  So, to check
10497 the error status of a code conversion by this variable, you must
10498 explicitly set this variable to nil before performing code
10499 conversion.  */);
10500   Vlast_code_conversion_error = Qnil;
10501
10502   DEFVAR_BOOL ("inhibit-eol-conversion", inhibit_eol_conversion,
10503                doc: /*
10504 *Non-nil means always inhibit code conversion of end-of-line format.
10505 See info node `Coding Systems' and info node `Text and Binary' concerning
10506 such conversion.  */);
10507   inhibit_eol_conversion = 0;
10508
10509   DEFVAR_BOOL ("inherit-process-coding-system", inherit_process_coding_system,
10510                doc: /*
10511 Non-nil means process buffer inherits coding system of process output.
10512 Bind it to t if the process output is to be treated as if it were a file
10513 read from some filesystem.  */);
10514   inherit_process_coding_system = 0;
10515
10516   DEFVAR_LISP ("file-coding-system-alist", Vfile_coding_system_alist,
10517                doc: /*
10518 Alist to decide a coding system to use for a file I/O operation.
10519 The format is ((PATTERN . VAL) ...),
10520 where PATTERN is a regular expression matching a file name,
10521 VAL is a coding system, a cons of coding systems, or a function symbol.
10522 If VAL is a coding system, it is used for both decoding and encoding
10523 the file contents.
10524 If VAL is a cons of coding systems, the car part is used for decoding,
10525 and the cdr part is used for encoding.
10526 If VAL is a function symbol, the function must return a coding system
10527 or a cons of coding systems which are used as above.  The function is
10528 called with an argument that is a list of the arguments with which
10529 `find-operation-coding-system' was called.  If the function can't decide
10530 a coding system, it can return `undecided' so that the normal
10531 code-detection is performed.
10532
10533 See also the function `find-operation-coding-system'
10534 and the variable `auto-coding-alist'.  */);
10535   Vfile_coding_system_alist = Qnil;
10536
10537   DEFVAR_LISP ("process-coding-system-alist", Vprocess_coding_system_alist,
10538                doc: /*
10539 Alist to decide a coding system to use for a process I/O operation.
10540 The format is ((PATTERN . VAL) ...),
10541 where PATTERN is a regular expression matching a program name,
10542 VAL is a coding system, a cons of coding systems, or a function symbol.
10543 If VAL is a coding system, it is used for both decoding what received
10544 from the program and encoding what sent to the program.
10545 If VAL is a cons of coding systems, the car part is used for decoding,
10546 and the cdr part is used for encoding.
10547 If VAL is a function symbol, the function must return a coding system
10548 or a cons of coding systems which are used as above.
10549
10550 See also the function `find-operation-coding-system'.  */);
10551   Vprocess_coding_system_alist = Qnil;
10552
10553   DEFVAR_LISP ("network-coding-system-alist", Vnetwork_coding_system_alist,
10554                doc: /*
10555 Alist to decide a coding system to use for a network I/O operation.
10556 The format is ((PATTERN . VAL) ...),
10557 where PATTERN is a regular expression matching a network service name
10558 or is a port number to connect to,
10559 VAL is a coding system, a cons of coding systems, or a function symbol.
10560 If VAL is a coding system, it is used for both decoding what received
10561 from the network stream and encoding what sent to the network stream.
10562 If VAL is a cons of coding systems, the car part is used for decoding,
10563 and the cdr part is used for encoding.
10564 If VAL is a function symbol, the function must return a coding system
10565 or a cons of coding systems which are used as above.
10566
10567 See also the function `find-operation-coding-system'.  */);
10568   Vnetwork_coding_system_alist = Qnil;
10569
10570   DEFVAR_LISP ("locale-coding-system", Vlocale_coding_system,
10571                doc: /* Coding system to use with system messages.
10572 Also used for decoding keyboard input on X Window system.  */);
10573   Vlocale_coding_system = Qnil;
10574
10575   /* The eol mnemonics are reset in startup.el system-dependently.  */
10576   DEFVAR_LISP ("eol-mnemonic-unix", eol_mnemonic_unix,
10577                doc: /*
10578 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
10579   eol_mnemonic_unix = make_pure_c_string (":");
10580
10581   DEFVAR_LISP ("eol-mnemonic-dos", eol_mnemonic_dos,
10582                doc: /*
10583 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
10584   eol_mnemonic_dos = make_pure_c_string ("\\");
10585
10586   DEFVAR_LISP ("eol-mnemonic-mac", eol_mnemonic_mac,
10587                doc: /*
10588 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
10589   eol_mnemonic_mac = make_pure_c_string ("/");
10590
10591   DEFVAR_LISP ("eol-mnemonic-undecided", eol_mnemonic_undecided,
10592                doc: /*
10593 *String displayed in mode line when end-of-line format is not yet determined.  */);
10594   eol_mnemonic_undecided = make_pure_c_string (":");
10595
10596   DEFVAR_LISP ("enable-character-translation", Venable_character_translation,
10597                doc: /*
10598 *Non-nil enables character translation while encoding and decoding.  */);
10599   Venable_character_translation = Qt;
10600
10601   DEFVAR_LISP ("standard-translation-table-for-decode",
10602                Vstandard_translation_table_for_decode,
10603                doc: /* Table for translating characters while decoding.  */);
10604   Vstandard_translation_table_for_decode = Qnil;
10605
10606   DEFVAR_LISP ("standard-translation-table-for-encode",
10607                Vstandard_translation_table_for_encode,
10608                doc: /* Table for translating characters while encoding.  */);
10609   Vstandard_translation_table_for_encode = Qnil;
10610
10611   DEFVAR_LISP ("charset-revision-table", Vcharset_revision_table,
10612                doc: /* Alist of charsets vs revision numbers.
10613 While encoding, if a charset (car part of an element) is found,
10614 designate it with the escape sequence identifying revision (cdr part
10615 of the element).  */);
10616   Vcharset_revision_table = Qnil;
10617
10618   DEFVAR_LISP ("default-process-coding-system",
10619                Vdefault_process_coding_system,
10620                doc: /* Cons of coding systems used for process I/O by default.
10621 The car part is used for decoding a process output,
10622 the cdr part is used for encoding a text to be sent to a process.  */);
10623   Vdefault_process_coding_system = Qnil;
10624
10625   DEFVAR_LISP ("latin-extra-code-table", Vlatin_extra_code_table,
10626                doc: /*
10627 Table of extra Latin codes in the range 128..159 (inclusive).
10628 This is a vector of length 256.
10629 If Nth element is non-nil, the existence of code N in a file
10630 \(or output of subprocess) doesn't prevent it to be detected as
10631 a coding system of ISO 2022 variant which has a flag
10632 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
10633 or reading output of a subprocess.
10634 Only 128th through 159th elements have a meaning.  */);
10635   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
10636
10637   DEFVAR_LISP ("select-safe-coding-system-function",
10638                Vselect_safe_coding_system_function,
10639                doc: /*
10640 Function to call to select safe coding system for encoding a text.
10641
10642 If set, this function is called to force a user to select a proper
10643 coding system which can encode the text in the case that a default
10644 coding system used in each operation can't encode the text.  The
10645 function should take care that the buffer is not modified while
10646 the coding system is being selected.
10647
10648 The default value is `select-safe-coding-system' (which see).  */);
10649   Vselect_safe_coding_system_function = Qnil;
10650
10651   DEFVAR_BOOL ("coding-system-require-warning",
10652                coding_system_require_warning,
10653                doc: /* Internal use only.
10654 If non-nil, on writing a file, `select-safe-coding-system-function' is
10655 called even if `coding-system-for-write' is non-nil.  The command
10656 `universal-coding-system-argument' binds this variable to t temporarily.  */);
10657   coding_system_require_warning = 0;
10658
10659
10660   DEFVAR_BOOL ("inhibit-iso-escape-detection",
10661                inhibit_iso_escape_detection,
10662                doc: /*
10663 If non-nil, Emacs ignores ISO-2022 escape sequences during code detection.
10664
10665 When Emacs reads text, it tries to detect how the text is encoded.
10666 This code detection is sensitive to escape sequences.  If Emacs sees
10667 a valid ISO-2022 escape sequence, it assumes the text is encoded in one
10668 of the ISO2022 encodings, and decodes text by the corresponding coding
10669 system (e.g. `iso-2022-7bit').
10670
10671 However, there may be a case that you want to read escape sequences in
10672 a file as is.  In such a case, you can set this variable to non-nil.
10673 Then the code detection will ignore any escape sequences, and no text is
10674 detected as encoded in some ISO-2022 encoding.  The result is that all
10675 escape sequences become visible in a buffer.
10676
10677 The default value is nil, and it is strongly recommended not to change
10678 it.  That is because many Emacs Lisp source files that contain
10679 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
10680 in Emacs's distribution, and they won't be decoded correctly on
10681 reading if you suppress escape sequence detection.
10682
10683 The other way to read escape sequences in a file without decoding is
10684 to explicitly specify some coding system that doesn't use ISO-2022
10685 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
10686   inhibit_iso_escape_detection = 0;
10687
10688   DEFVAR_BOOL ("inhibit-null-byte-detection",
10689                inhibit_null_byte_detection,
10690                doc: /* If non-nil, Emacs ignores null bytes on code detection.
10691 By default, Emacs treats it as binary data, and does not attempt to
10692 decode it.  The effect is as if you specified `no-conversion' for
10693 reading that text.
10694
10695 Set this to non-nil when a regular text happens to include null bytes.
10696 Examples are Index nodes of Info files and null-byte delimited output
10697 from GNU Find and GNU Grep.  Emacs will then ignore the null bytes and
10698 decode text as usual.  */);
10699   inhibit_null_byte_detection = 0;
10700
10701   DEFVAR_LISP ("translation-table-for-input", Vtranslation_table_for_input,
10702                doc: /* Char table for translating self-inserting characters.
10703 This is applied to the result of input methods, not their input.
10704 See also `keyboard-translate-table'.
10705
10706 Use of this variable for character code unification was rendered
10707 obsolete in Emacs 23.1 and later, since Unicode is now the basis of
10708 internal character representation.  */);
10709     Vtranslation_table_for_input = Qnil;
10710
10711   {
10712     Lisp_Object args[coding_arg_max];
10713     Lisp_Object plist[16];
10714     int i;
10715
10716     for (i = 0; i < coding_arg_max; i++)
10717       args[i] = Qnil;
10718
10719     plist[0] = intern_c_string (":name");
10720     plist[1] = args[coding_arg_name] = Qno_conversion;
10721     plist[2] = intern_c_string (":mnemonic");
10722     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
10723     plist[4] = intern_c_string (":coding-type");
10724     plist[5] = args[coding_arg_coding_type] = Qraw_text;
10725     plist[6] = intern_c_string (":ascii-compatible-p");
10726     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
10727     plist[8] = intern_c_string (":default-char");
10728     plist[9] = args[coding_arg_default_char] = make_number (0);
10729     plist[10] = intern_c_string (":for-unibyte");
10730     plist[11] = args[coding_arg_for_unibyte] = Qt;
10731     plist[12] = intern_c_string (":docstring");
10732     plist[13] = make_pure_c_string ("Do no conversion.\n\
10733 \n\
10734 When you visit a file with this coding, the file is read into a\n\
10735 unibyte buffer as is, thus each byte of a file is treated as a\n\
10736 character.");
10737     plist[14] = intern_c_string (":eol-type");
10738     plist[15] = args[coding_arg_eol_type] = Qunix;
10739     args[coding_arg_plist] = Flist (16, plist);
10740     Fdefine_coding_system_internal (coding_arg_max, args);
10741
10742     plist[1] = args[coding_arg_name] = Qundecided;
10743     plist[3] = args[coding_arg_mnemonic] = make_number ('-');
10744     plist[5] = args[coding_arg_coding_type] = Qundecided;
10745     /* This is already set.
10746        plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
10747     plist[8] = intern_c_string (":charset-list");
10748     plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
10749     plist[11] = args[coding_arg_for_unibyte] = Qnil;
10750     plist[13] = make_pure_c_string ("No conversion on encoding, automatic conversion on decoding.");
10751     plist[15] = args[coding_arg_eol_type] = Qnil;
10752     args[coding_arg_plist] = Flist (16, plist);
10753     Fdefine_coding_system_internal (coding_arg_max, args);
10754   }
10755
10756   setup_coding_system (Qno_conversion, &safe_terminal_coding);
10757
10758   {
10759     int i;
10760
10761     for (i = 0; i < coding_category_max; i++)
10762       Fset (AREF (Vcoding_category_table, i), Qno_conversion);
10763   }
10764 #if defined (DOS_NT)
10765   system_eol_type = Qdos;
10766 #else
10767   system_eol_type = Qunix;
10768 #endif
10769   staticpro (&system_eol_type);
10770 }
10771
10772 char *
10773 emacs_strerror (int error_number)
10774 {
10775   char *str;
10776
10777   synchronize_system_messages_locale ();
10778   str = strerror (error_number);
10779
10780   if (! NILP (Vlocale_coding_system))
10781     {
10782       Lisp_Object dec = code_convert_string_norecord (build_string (str),
10783                                                       Vlocale_coding_system,
10784                                                       0);
10785       str = SSDATA (dec);
10786     }
10787
10788   return str;
10789 }
10790
10791 #endif /* emacs */