src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 2001-2011 Free Software Foundation, Inc.
   3    Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   4      2005, 2006, 2007, 2008, 2009, 2010, 2011
   5      National Institute of Advanced Industrial Science and Technology (AIST)
   6      Registration Number H14PRO021
   7    Copyright (C) 2003
   8      National Institute of Advanced Industrial Science and Technology (AIST)
   9      Registration Number H13PRO009
  10
  11 This file is part of GNU Emacs.
  12
  13 GNU Emacs is free software: you can redistribute it and/or modify
  14 it under the terms of the GNU General Public License as published by
  15 the Free Software Foundation, either version 3 of the License, or
  16 (at your option) any later version.
  17
  18 GNU Emacs is distributed in the hope that it will be useful,
  19 but WITHOUT ANY WARRANTY; without even the implied warranty of
  20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  21 GNU General Public License for more details.
  22
  23 You should have received a copy of the GNU General Public License
  24 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  25
  26 /*** TABLE OF CONTENTS ***
  27
  28   0. General comments
  29   1. Preamble
  30   2. Emacs' internal format (emacs-utf-8) handlers
  31   3. UTF-8 handlers
  32   4. UTF-16 handlers
  33   5. Charset-base coding systems handlers
  34   6. emacs-mule (old Emacs' internal format) handlers
  35   7. ISO2022 handlers
  36   8. Shift-JIS and BIG5 handlers
  37   9. CCL handlers
  38   10. C library functions
  39   11. Emacs Lisp library functions
  40   12. Postamble
  41
  42 */
  43
  44 /*** 0. General comments ***
  45
  46
  47 CODING SYSTEM
  48
  49   A coding system is an object for an encoding mechanism that contains
  50   information about how to convert byte sequences to character
  51   sequences and vice versa.  When we say "decode", it means converting
  52   a byte sequence of a specific coding system into a character
  53   sequence that is represented by Emacs' internal coding system
  54   `emacs-utf-8', and when we say "encode", it means converting a
  55   character sequence of emacs-utf-8 to a byte sequence of a specific
  56   coding system.
  57
  58   In Emacs Lisp, a coding system is represented by a Lisp symbol.  On
  59   the C level, a coding system is represented by a vector of attributes
  60   stored in the hash table Vcharset_hash_table.  The conversion from
  61   coding system symbol to attributes vector is done by looking up
  62   Vcharset_hash_table by the symbol.
  63
  64   Coding systems are classified into the following types depending on
  65   the encoding mechanism.  Here's a brief description of the types.
  66
  67   o UTF-8
  68
  69   o UTF-16
  70
  71   o Charset-base coding system
  72
  73   A coding system defined by one or more (coded) character sets.
  74   Decoding and encoding are done by a code converter defined for each
  75   character set.
  76
  77   o Old Emacs internal format (emacs-mule)
  78
  79   The coding system adopted by old versions of Emacs (20 and 21).
  80
  81   o ISO2022-base coding system
  82
  83   The most famous coding system for multiple character sets.  X's
  84   Compound Text, various EUCs (Extended Unix Code), and coding systems
  85   used in the Internet communication such as ISO-2022-JP are all
  86   variants of ISO2022.
  87
  88   o SJIS (or Shift-JIS or MS-Kanji-Code)
  89
  90   A coding system to encode character sets: ASCII, JISX0201, and
  91   JISX0208.  Widely used for PC's in Japan.  Details are described in
  92   section 8.
  93
  94   o BIG5
  95
  96   A coding system to encode character sets: ASCII and Big5.  Widely
  97   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  98   described in section 8.  In this file, when we write "big5" (all
  99   lowercase), we mean the coding system, and when we write "Big5"
 100   (capitalized), we mean the character set.
 101
 102   o CCL
 103
 104   If a user wants to decode/encode text encoded in a coding system
 105   not listed above, he can supply a decoder and an encoder for it in
 106   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 107   program while decoding/encoding.
 108
 109   o Raw-text
 110
 111   A coding system for text containing raw eight-bit data.  Emacs
 112   treats each byte of source text as a character (except for
 113   end-of-line conversion).
 114
 115   o No-conversion
 116
 117   Like raw text, but don't do end-of-line conversion.
 118
 119
 120 END-OF-LINE FORMAT
 121
 122   How text end-of-line is encoded depends on operating system.  For
 123   instance, Unix's format is just one byte of LF (line-feed) code,
 124   whereas DOS's format is two-byte sequence of `carriage-return' and
 125   `line-feed' codes.  MacOS's format is usually one byte of
 126   `carriage-return'.
 127
 128   Since text character encoding and end-of-line encoding are
 129   independent, any coding system described above can take any format
 130   of end-of-line (except for no-conversion).
 131
 132 STRUCT CODING_SYSTEM
 133
 134   Before using a coding system for code conversion (i.e. decoding and
 135   encoding), we setup a structure of type `struct coding_system'.
 136   This structure keeps various information about a specific code
 137   conversion (e.g. the location of source and destination data).
 138
 139 */
 140
 141 /* COMMON MACROS */
 142
 143
 144 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 145
 146   These functions check if a byte sequence specified as a source in
 147   CODING conforms to the format of XXX, and update the members of
 148   DETECT_INFO.
 149
 150   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 151
 152   Below is the template of these functions.  */
 153
 154 #if 0
 155 static int
 156 detect_coding_XXX (struct coding_system *coding,
 157                    struct coding_detection_info *detect_info)
 158 {
 159   const unsigned char *src = coding->source;
 160   const unsigned char *src_end = coding->source + coding->src_bytes;
 161   int multibytep = coding->src_multibyte;
 162   EMACS_INT consumed_chars = 0;
 163   int found = 0;
 164   ...;
 165
 166   while (1)
 167     {
 168       /* Get one byte from the source.  If the source is exhausted, jump
 169          to no_more_source:.  */
 170       ONE_MORE_BYTE (c);
 171
 172       if (! __C_conforms_to_XXX___ (c))
 173         break;
 174       if (! __C_strongly_suggests_XXX__ (c))
 175         found = CATEGORY_MASK_XXX;
 176     }
 177   /* The byte sequence is invalid for XXX.  */
 178   detect_info->rejected |= CATEGORY_MASK_XXX;
 179   return 0;
 180
 181  no_more_source:
 182   /* The source exhausted successfully.  */
 183   detect_info->found |= found;
 184   return 1;
 185 }
 186 #endif
 187
 188 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 189
 190   These functions decode a byte sequence specified as a source by
 191   CODING.  The resulting multibyte text goes to a place pointed to by
 192   CODING->charbuf, the length of which should not exceed
 193   CODING->charbuf_size;
 194
 195   These functions set the information of original and decoded texts in
 196   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 197   They also set CODING->result to one of CODING_RESULT_XXX indicating
 198   how the decoding is finished.
 199
 200   Below is the template of these functions.  */
 201
 202 #if 0
 203 static void
 204 decode_coding_XXXX (struct coding_system *coding)
 205 {
 206   const unsigned char *src = coding->source + coding->consumed;
 207   const unsigned char *src_end = coding->source + coding->src_bytes;
 208   /* SRC_BASE remembers the start position in source in each loop.
 209      The loop will be exited when there's not enough source code, or
 210      when there's no room in CHARBUF for a decoded character.  */
 211   const unsigned char *src_base;
 212   /* A buffer to produce decoded characters.  */
 213   int *charbuf = coding->charbuf + coding->charbuf_used;
 214   int *charbuf_end = coding->charbuf + coding->charbuf_size;
 215   int multibytep = coding->src_multibyte;
 216
 217   while (1)
 218     {
 219       src_base = src;
 220       if (charbuf < charbuf_end)
 221         /* No more room to produce a decoded character.  */
 222         break;
 223       ONE_MORE_BYTE (c);
 224       /* Decode it. */
 225     }
 226
 227  no_more_source:
 228   if (src_base < src_end
 229       && coding->mode & CODING_MODE_LAST_BLOCK)
 230     /* If the source ends by partial bytes to construct a character,
 231        treat them as eight-bit raw data.  */
 232     while (src_base < src_end && charbuf < charbuf_end)
 233       *charbuf++ = *src_base++;
 234   /* Remember how many bytes and characters we consumed.  If the
 235      source is multibyte, the bytes and chars are not identical.  */
 236   coding->consumed = coding->consumed_char = src_base - coding->source;
 237   /* Remember how many characters we produced.  */
 238   coding->charbuf_used = charbuf - coding->charbuf;
 239 }
 240 #endif
 241
 242 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 243
 244   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 245   internal multibyte format by CODING.  The resulting byte sequence
 246   goes to a place pointed to by DESTINATION, the length of which
 247   should not exceed DST_BYTES.
 248
 249   These functions set the information of original and encoded texts in
 250   the members produced, produced_char, consumed, and consumed_char of
 251   the structure *CODING.  They also set the member result to one of
 252   CODING_RESULT_XXX indicating how the encoding finished.
 253
 254   DST_BYTES zero means that source area and destination area are
 255   overlapped, which means that we can produce a encoded text until it
 256   reaches at the head of not-yet-encoded source text.
 257
 258   Below is a template of these functions.  */
 259 #if 0
 260 static void
 261 encode_coding_XXX (struct coding_system *coding)
 262 {
 263   int multibytep = coding->dst_multibyte;
 264   int *charbuf = coding->charbuf;
 265   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 266   unsigned char *dst = coding->destination + coding->produced;
 267   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 268   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 269   EMACS_INT produced_chars = 0;
 270
 271   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 272     {
 273       int c = *charbuf;
 274       /* Encode C into DST, and increment DST.  */
 275     }
 276  label_no_more_destination:
 277   /* How many chars and bytes we produced.  */
 278   coding->produced_char += produced_chars;
 279   coding->produced = dst - coding->destination;
 280 }
 281 #endif
 282
 283 \f
 284 /*** 1. Preamble ***/
 285
 286 #include <config.h>
 287 #include <stdio.h>
 288 #include <setjmp.h>
 289
 290 #include "lisp.h"
 291 #include "buffer.h"
 292 #include "character.h"
 293 #include "charset.h"
 294 #include "ccl.h"
 295 #include "composite.h"
 296 #include "coding.h"
 297 #include "window.h"
 298 #include "frame.h"
 299 #include "termhooks.h"
 300
 301 Lisp_Object Vcoding_system_hash_table;
 302
 303 static Lisp_Object Qcoding_system, Qeol_type;
 304 static Lisp_Object Qcoding_aliases;
 305 Lisp_Object Qunix, Qdos;
 306 Lisp_Object Qbuffer_file_coding_system;
 307 static Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 308 static Lisp_Object Qdefault_char;
 309 Lisp_Object Qno_conversion, Qundecided;
 310 Lisp_Object Qcharset, Qutf_8;
 311 static Lisp_Object Qiso_2022;
 312 static Lisp_Object Qutf_16, Qshift_jis, Qbig5;
 313 static Lisp_Object Qbig, Qlittle;
 314 static Lisp_Object Qcoding_system_history;
 315 static Lisp_Object Qvalid_codes;
 316 static Lisp_Object QCcategory, QCmnemonic, QCdefault_char;
 317 static Lisp_Object QCdecode_translation_table, QCencode_translation_table;
 318 static Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
 319 static Lisp_Object QCascii_compatible_p;
 320
 321 Lisp_Object Qcall_process, Qcall_process_region;
 322 Lisp_Object Qstart_process, Qopen_network_stream;
 323 static Lisp_Object Qtarget_idx;
 324
 325 static Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
 326 static Lisp_Object Qinterrupted, Qinsufficient_memory;
 327
 328 /* If a symbol has this property, evaluate the value to define the
 329    symbol as a coding system.  */
 330 static Lisp_Object Qcoding_system_define_form;
 331
 332 /* Format of end-of-line decided by system.  This is Qunix on
 333    Unix and Mac, Qdos on DOS/Windows.
 334    This has an effect only for external encoding (i.e. for output to
 335    file and process), not for in-buffer or Lisp string encoding.  */
 336 static Lisp_Object system_eol_type;
 337
 338 #ifdef emacs
 339
 340 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 341
 342 /* Coding system emacs-mule and raw-text are for converting only
 343    end-of-line format.  */
 344 Lisp_Object Qemacs_mule, Qraw_text;
 345 Lisp_Object Qutf_8_emacs;
 346
 347 /* Coding-systems are handed between Emacs Lisp programs and C internal
 348    routines by the following three variables.  */
 349 /* Coding system to be used to encode text for terminal display when
 350    terminal coding system is nil.  */
 351 struct coding_system safe_terminal_coding;
 352
 353 #endif /* emacs */
 354
 355 Lisp_Object Qtranslation_table;
 356 Lisp_Object Qtranslation_table_id;
 357 static Lisp_Object Qtranslation_table_for_decode;
 358 static Lisp_Object Qtranslation_table_for_encode;
 359
 360 /* Two special coding systems.  */
 361 static Lisp_Object Vsjis_coding_system;
 362 static Lisp_Object Vbig5_coding_system;
 363
 364 /* ISO2022 section */
 365
 366 #define CODING_ISO_INITIAL(coding, reg)                 \
 367   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 368                      coding_attr_iso_initial),          \
 369                reg)))
 370
 371
 372 #define CODING_ISO_REQUEST(coding, charset_id)          \
 373   (((charset_id) <= (coding)->max_charset_id            \
 374     ? ((coding)->safe_charsets[charset_id] != 255       \
 375        ? (coding)->safe_charsets[charset_id]            \
 376        : -1)                                            \
 377     : -1))
 378
 379
 380 #define CODING_ISO_FLAGS(coding)        \
 381   ((coding)->spec.iso_2022.flags)
 382 #define CODING_ISO_DESIGNATION(coding, reg)     \
 383   ((coding)->spec.iso_2022.current_designation[reg])
 384 #define CODING_ISO_INVOCATION(coding, plane)    \
 385   ((coding)->spec.iso_2022.current_invocation[plane])
 386 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 387   ((coding)->spec.iso_2022.single_shifting)
 388 #define CODING_ISO_BOL(coding)  \
 389   ((coding)->spec.iso_2022.bol)
 390 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 391   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 392 #define CODING_ISO_CMP_STATUS(coding)   \
 393   (&(coding)->spec.iso_2022.cmp_status)
 394 #define CODING_ISO_EXTSEGMENT_LEN(coding)       \
 395   ((coding)->spec.iso_2022.ctext_extended_segment_len)
 396 #define CODING_ISO_EMBEDDED_UTF_8(coding)       \
 397   ((coding)->spec.iso_2022.embedded_utf_8)
 398
 399 /* Control characters of ISO2022.  */
 400                         /* code */      /* function */
 401 #define ISO_CODE_SO     0x0E            /* shift-out */
 402 #define ISO_CODE_SI     0x0F            /* shift-in */
 403 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 404 #define ISO_CODE_ESC    0x1B            /* escape */
 405 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 406 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 407 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 408
 409 /* All code (1-byte) of ISO2022 is classified into one of the
 410    followings.  */
 411 enum iso_code_class_type
 412   {
 413     ISO_control_0,              /* Control codes in the range
 414                                    0x00..0x1F and 0x7F, except for the
 415                                    following 5 codes.  */
 416     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 417     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 418     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 419     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 420     ISO_control_1,              /* Control codes in the range
 421                                    0x80..0x9F, except for the
 422                                    following 3 codes.  */
 423     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 424     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 425     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 426     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 427     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 428     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 429     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 430   };
 431
 432 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 433     `iso-flags' attribute of an iso2022 coding system.  */
 434
 435 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 436    instead of the correct short-form sequence (e.g. ESC $ A).  */
 437 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 438
 439 /* If set, reset graphic planes and registers at end-of-line to the
 440    initial state.  */
 441 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 442
 443 /* If set, reset graphic planes and registers before any control
 444    characters to the initial state.  */
 445 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 446
 447 /* If set, encode by 7-bit environment.  */
 448 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 449
 450 /* If set, use locking-shift function.  */
 451 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 452
 453 /* If set, use single-shift function.  Overwrite
 454    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 455 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 456
 457 /* If set, use designation escape sequence.  */
 458 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 459
 460 /* If set, produce revision number sequence.  */
 461 #define CODING_ISO_FLAG_REVISION        0x0080
 462
 463 /* If set, produce ISO6429's direction specifying sequence.  */
 464 #define CODING_ISO_FLAG_DIRECTION       0x0100
 465
 466 /* If set, assume designation states are reset at beginning of line on
 467    output.  */
 468 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 469
 470 /* If set, designation sequence should be placed at beginning of line
 471    on output.  */
 472 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 473
 474 /* If set, do not encode unsafe characters on output.  */
 475 #define CODING_ISO_FLAG_SAFE            0x0800
 476
 477 /* If set, extra latin codes (128..159) are accepted as a valid code
 478    on input.  */
 479 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 480
 481 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 482
 483 /* #define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000 */
 484
 485 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 486
 487 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 488
 489 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 490
 491 /* A character to be produced on output if encoding of the original
 492    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 493 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 494
 495 /* UTF-8 section */
 496 #define CODING_UTF_8_BOM(coding)        \
 497   ((coding)->spec.utf_8_bom)
 498
 499 /* UTF-16 section */
 500 #define CODING_UTF_16_BOM(coding)       \
 501   ((coding)->spec.utf_16.bom)
 502
 503 #define CODING_UTF_16_ENDIAN(coding)    \
 504   ((coding)->spec.utf_16.endian)
 505
 506 #define CODING_UTF_16_SURROGATE(coding) \
 507   ((coding)->spec.utf_16.surrogate)
 508
 509
 510 /* CCL section */
 511 #define CODING_CCL_DECODER(coding)      \
 512   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 513 #define CODING_CCL_ENCODER(coding)      \
 514   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 515 #define CODING_CCL_VALIDS(coding)                                          \
 516   (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
 517
 518 /* Index for each coding category in `coding_categories' */
 519
 520 enum coding_category
 521   {
 522     coding_category_iso_7,
 523     coding_category_iso_7_tight,
 524     coding_category_iso_8_1,
 525     coding_category_iso_8_2,
 526     coding_category_iso_7_else,
 527     coding_category_iso_8_else,
 528     coding_category_utf_8_auto,
 529     coding_category_utf_8_nosig,
 530     coding_category_utf_8_sig,
 531     coding_category_utf_16_auto,
 532     coding_category_utf_16_be,
 533     coding_category_utf_16_le,
 534     coding_category_utf_16_be_nosig,
 535     coding_category_utf_16_le_nosig,
 536     coding_category_charset,
 537     coding_category_sjis,
 538     coding_category_big5,
 539     coding_category_ccl,
 540     coding_category_emacs_mule,
 541     /* All above are targets of code detection.  */
 542     coding_category_raw_text,
 543     coding_category_undecided,
 544     coding_category_max
 545   };
 546
 547 /* Definitions of flag bits used in detect_coding_XXXX.  */
 548 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 549 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 550 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 551 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 552 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 553 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 554 #define CATEGORY_MASK_UTF_8_AUTO        (1 << coding_category_utf_8_auto)
 555 #define CATEGORY_MASK_UTF_8_NOSIG       (1 << coding_category_utf_8_nosig)
 556 #define CATEGORY_MASK_UTF_8_SIG         (1 << coding_category_utf_8_sig)
 557 #define CATEGORY_MASK_UTF_16_AUTO       (1 << coding_category_utf_16_auto)
 558 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 559 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 560 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 561 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 562 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 563 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 564 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 565 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 566 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 567 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 568
 569 /* This value is returned if detect_coding_mask () find nothing other
 570    than ASCII characters.  */
 571 #define CATEGORY_MASK_ANY               \
 572   (CATEGORY_MASK_ISO_7                  \
 573    | CATEGORY_MASK_ISO_7_TIGHT          \
 574    | CATEGORY_MASK_ISO_8_1              \
 575    | CATEGORY_MASK_ISO_8_2              \
 576    | CATEGORY_MASK_ISO_7_ELSE           \
 577    | CATEGORY_MASK_ISO_8_ELSE           \
 578    | CATEGORY_MASK_UTF_8_AUTO           \
 579    | CATEGORY_MASK_UTF_8_NOSIG          \
 580    | CATEGORY_MASK_UTF_8_SIG            \
 581    | CATEGORY_MASK_UTF_16_AUTO          \
 582    | CATEGORY_MASK_UTF_16_BE            \
 583    | CATEGORY_MASK_UTF_16_LE            \
 584    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 585    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 586    | CATEGORY_MASK_CHARSET              \
 587    | CATEGORY_MASK_SJIS                 \
 588    | CATEGORY_MASK_BIG5                 \
 589    | CATEGORY_MASK_CCL                  \
 590    | CATEGORY_MASK_EMACS_MULE)
 591
 592
 593 #define CATEGORY_MASK_ISO_7BIT \
 594   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 595
 596 #define CATEGORY_MASK_ISO_8BIT \
 597   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 598
 599 #define CATEGORY_MASK_ISO_ELSE \
 600   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 601
 602 #define CATEGORY_MASK_ISO_ESCAPE        \
 603   (CATEGORY_MASK_ISO_7                  \
 604    | CATEGORY_MASK_ISO_7_TIGHT          \
 605    | CATEGORY_MASK_ISO_7_ELSE           \
 606    | CATEGORY_MASK_ISO_8_ELSE)
 607
 608 #define CATEGORY_MASK_ISO       \
 609   (  CATEGORY_MASK_ISO_7BIT     \
 610      | CATEGORY_MASK_ISO_8BIT   \
 611      | CATEGORY_MASK_ISO_ELSE)
 612
 613 #define CATEGORY_MASK_UTF_16            \
 614   (CATEGORY_MASK_UTF_16_AUTO            \
 615    | CATEGORY_MASK_UTF_16_BE            \
 616    | CATEGORY_MASK_UTF_16_LE            \
 617    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 618    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 619
 620 #define CATEGORY_MASK_UTF_8     \
 621   (CATEGORY_MASK_UTF_8_AUTO     \
 622    | CATEGORY_MASK_UTF_8_NOSIG  \
 623    | CATEGORY_MASK_UTF_8_SIG)
 624
 625 /* Table of coding categories (Lisp symbols).  This variable is for
 626    internal use only.  */
 627 static Lisp_Object Vcoding_category_table;
 628
 629 /* Table of coding-categories ordered by priority.  */
 630 static enum coding_category coding_priorities[coding_category_max];
 631
 632 /* Nth element is a coding context for the coding system bound to the
 633    Nth coding category.  */
 634 static struct coding_system coding_categories[coding_category_max];
 635
 636 /*** Commonly used macros and functions ***/
 637
 638 #ifndef min
 639 #define min(a, b) ((a) < (b) ? (a) : (b))
 640 #endif
 641 #ifndef max
 642 #define max(a, b) ((a) > (b) ? (a) : (b))
 643 #endif
 644
 645 #define CODING_GET_INFO(coding, attrs, charset_list)    \
 646   do {                                                  \
 647     (attrs) = CODING_ID_ATTRS ((coding)->id);           \
 648     (charset_list) = CODING_ATTR_CHARSET_LIST (attrs);  \
 649   } while (0)
 650
 651
 652 /* Safely get one byte from the source text pointed by SRC which ends
 653    at SRC_END, and set C to that byte.  If there are not enough bytes
 654    in the source, it jumps to `no_more_source'.  If multibytep is
 655    nonzero, and a multibyte character is found at SRC, set C to the
 656    negative value of the character code.  The caller should declare
 657    and set these variables appropriately in advance:
 658         src, src_end, multibytep */
 659
 660 #define ONE_MORE_BYTE(c)                                \
 661   do {                                                  \
 662     if (src == src_end)                                 \
 663       {                                                 \
 664         if (src_base < src)                             \
 665           record_conversion_result                      \
 666             (coding, CODING_RESULT_INSUFFICIENT_SRC);   \
 667         goto no_more_source;                            \
 668       }                                                 \
 669     c = *src++;                                         \
 670     if (multibytep && (c & 0x80))                       \
 671       {                                                 \
 672         if ((c & 0xFE) == 0xC0)                         \
 673           c = ((c & 1) << 6) | *src++;                  \
 674         else                                            \
 675           {                                             \
 676             src--;                                      \
 677             c = - string_char (src, &src, NULL);        \
 678             record_conversion_result                    \
 679               (coding, CODING_RESULT_INVALID_SRC);      \
 680           }                                             \
 681       }                                                 \
 682     consumed_chars++;                                   \
 683   } while (0)
 684
 685 /* Safely get two bytes from the source text pointed by SRC which ends
 686    at SRC_END, and set C1 and C2 to those bytes while skipping the
 687    heading multibyte characters.  If there are not enough bytes in the
 688    source, it jumps to `no_more_source'.  If multibytep is nonzero and
 689    a multibyte character is found for C2, set C2 to the negative value
 690    of the character code.  The caller should declare and set these
 691    variables appropriately in advance:
 692         src, src_end, multibytep
 693    It is intended that this macro is used in detect_coding_utf_16.  */
 694
 695 #define TWO_MORE_BYTES(c1, c2)                          \
 696   do {                                                  \
 697     do {                                                \
 698       if (src == src_end)                               \
 699         goto no_more_source;                            \
 700       c1 = *src++;                                      \
 701       if (multibytep && (c1 & 0x80))                    \
 702         {                                               \
 703           if ((c1 & 0xFE) == 0xC0)                      \
 704             c1 = ((c1 & 1) << 6) | *src++;              \
 705           else                                          \
 706             {                                           \
 707               src += BYTES_BY_CHAR_HEAD (c1) - 1;       \
 708               c1 = -1;                                  \
 709             }                                           \
 710         }                                               \
 711     } while (c1 < 0);                                   \
 712     if (src == src_end)                                 \
 713       goto no_more_source;                              \
 714     c2 = *src++;                                        \
 715     if (multibytep && (c2 & 0x80))                      \
 716       {                                                 \
 717         if ((c2 & 0xFE) == 0xC0)                        \
 718           c2 = ((c2 & 1) << 6) | *src++;                \
 719         else                                            \
 720           c2 = -1;                                      \
 721       }                                                 \
 722   } while (0)
 723
 724
 725 /* Store a byte C in the place pointed by DST and increment DST to the
 726    next free point, and increment PRODUCED_CHARS.  The caller should
 727    assure that C is 0..127, and declare and set the variable `dst'
 728    appropriately in advance.
 729 */
 730
 731
 732 #define EMIT_ONE_ASCII_BYTE(c)  \
 733   do {                          \
 734     produced_chars++;           \
 735     *dst++ = (c);               \
 736   } while (0)
 737
 738
 739 /* Like EMIT_ONE_ASCII_BYTE but store two bytes; C1 and C2.  */
 740
 741 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 742   do {                                  \
 743     produced_chars += 2;                \
 744     *dst++ = (c1), *dst++ = (c2);       \
 745   } while (0)
 746
 747
 748 /* Store a byte C in the place pointed by DST and increment DST to the
 749    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 750    nonzero, store in an appropriate multibyte from.  The caller should
 751    declare and set the variables `dst' and `multibytep' appropriately
 752    in advance.  */
 753
 754 #define EMIT_ONE_BYTE(c)                \
 755   do {                                  \
 756     produced_chars++;                   \
 757     if (multibytep)                     \
 758       {                                 \
 759         unsigned ch = (c);              \
 760         if (ch >= 0x80)                 \
 761           ch = BYTE8_TO_CHAR (ch);      \
 762         CHAR_STRING_ADVANCE (ch, dst);  \
 763       }                                 \
 764     else                                \
 765       *dst++ = (c);                     \
 766   } while (0)
 767
 768
 769 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 770
 771 #define EMIT_TWO_BYTES(c1, c2)          \
 772   do {                                  \
 773     produced_chars += 2;                \
 774     if (multibytep)                     \
 775       {                                 \
 776         unsigned ch;                    \
 777                                         \
 778         ch = (c1);                      \
 779         if (ch >= 0x80)                 \
 780           ch = BYTE8_TO_CHAR (ch);      \
 781         CHAR_STRING_ADVANCE (ch, dst);  \
 782         ch = (c2);                      \
 783         if (ch >= 0x80)                 \
 784           ch = BYTE8_TO_CHAR (ch);      \
 785         CHAR_STRING_ADVANCE (ch, dst);  \
 786       }                                 \
 787     else                                \
 788       {                                 \
 789         *dst++ = (c1);                  \
 790         *dst++ = (c2);                  \
 791       }                                 \
 792   } while (0)
 793
 794
 795 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 796   do {                                  \
 797     EMIT_ONE_BYTE (c1);                 \
 798     EMIT_TWO_BYTES (c2, c3);            \
 799   } while (0)
 800
 801
 802 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 803   do {                                          \
 804     EMIT_TWO_BYTES (c1, c2);                    \
 805     EMIT_TWO_BYTES (c3, c4);                    \
 806   } while (0)
 807
 808
 809 /* Prototypes for static functions.  */
 810 static void record_conversion_result (struct coding_system *coding,
 811                                       enum coding_result_code result);
 812 static int detect_coding_utf_8 (struct coding_system *,
 813                                 struct coding_detection_info *info);
 814 static void decode_coding_utf_8 (struct coding_system *);
 815 static int encode_coding_utf_8 (struct coding_system *);
 816
 817 static int detect_coding_utf_16 (struct coding_system *,
 818                                  struct coding_detection_info *info);
 819 static void decode_coding_utf_16 (struct coding_system *);
 820 static int encode_coding_utf_16 (struct coding_system *);
 821
 822 static int detect_coding_iso_2022 (struct coding_system *,
 823                                    struct coding_detection_info *info);
 824 static void decode_coding_iso_2022 (struct coding_system *);
 825 static int encode_coding_iso_2022 (struct coding_system *);
 826
 827 static int detect_coding_emacs_mule (struct coding_system *,
 828                                      struct coding_detection_info *info);
 829 static void decode_coding_emacs_mule (struct coding_system *);
 830 static int encode_coding_emacs_mule (struct coding_system *);
 831
 832 static int detect_coding_sjis (struct coding_system *,
 833                                struct coding_detection_info *info);
 834 static void decode_coding_sjis (struct coding_system *);
 835 static int encode_coding_sjis (struct coding_system *);
 836
 837 static int detect_coding_big5 (struct coding_system *,
 838                                struct coding_detection_info *info);
 839 static void decode_coding_big5 (struct coding_system *);
 840 static int encode_coding_big5 (struct coding_system *);
 841
 842 static int detect_coding_ccl (struct coding_system *,
 843                               struct coding_detection_info *info);
 844 static void decode_coding_ccl (struct coding_system *);
 845 static int encode_coding_ccl (struct coding_system *);
 846
 847 static void decode_coding_raw_text (struct coding_system *);
 848 static int encode_coding_raw_text (struct coding_system *);
 849
 850 static EMACS_INT coding_set_source (struct coding_system *);
 851 static EMACS_INT coding_set_destination (struct coding_system *);
 852 static void coding_alloc_by_realloc (struct coding_system *, EMACS_INT);
 853 static void coding_alloc_by_making_gap (struct coding_system *,
 854                                         EMACS_INT, EMACS_INT);
 855 static unsigned char *alloc_destination (struct coding_system *,
 856                                          EMACS_INT, unsigned char *);
 857 static void setup_iso_safe_charsets (Lisp_Object);
 858 static int encode_designation_at_bol (struct coding_system *,
 859                                       int *, int *, unsigned char *);
 860 static int detect_eol (const unsigned char *,
 861                        EMACS_INT, enum coding_category);
 862 static Lisp_Object adjust_coding_eol_type (struct coding_system *, int);
 863 static void decode_eol (struct coding_system *);
 864 static Lisp_Object get_translation_table (Lisp_Object, int, int *);
 865 static Lisp_Object get_translation (Lisp_Object, int *, int *);
 866 static int produce_chars (struct coding_system *, Lisp_Object, int);
 867 static inline void produce_charset (struct coding_system *, int *,
 868                                     EMACS_INT);
 869 static void produce_annotation (struct coding_system *, EMACS_INT);
 870 static int decode_coding (struct coding_system *);
 871 static inline int *handle_composition_annotation (EMACS_INT, EMACS_INT,
 872                                                   struct coding_system *,
 873                                                   int *, EMACS_INT *);
 874 static inline int *handle_charset_annotation (EMACS_INT, EMACS_INT,
 875                                               struct coding_system *,
 876                                               int *, EMACS_INT *);
 877 static void consume_chars (struct coding_system *, Lisp_Object, int);
 878 static int encode_coding (struct coding_system *);
 879 static Lisp_Object make_conversion_work_buffer (int);
 880 static Lisp_Object code_conversion_restore (Lisp_Object);
 881 static inline int char_encodable_p (int, Lisp_Object);
 882 static Lisp_Object make_subsidiaries (Lisp_Object);
 883
 884 static void
 885 record_conversion_result (struct coding_system *coding,
 886                           enum coding_result_code result)
 887 {
 888   coding->result = result;
 889   switch (result)
 890     {
 891     case CODING_RESULT_INSUFFICIENT_SRC:
 892       Vlast_code_conversion_error = Qinsufficient_source;
 893       break;
 894     case CODING_RESULT_INCONSISTENT_EOL:
 895       Vlast_code_conversion_error = Qinconsistent_eol;
 896       break;
 897     case CODING_RESULT_INVALID_SRC:
 898       Vlast_code_conversion_error = Qinvalid_source;
 899       break;
 900     case CODING_RESULT_INTERRUPT:
 901       Vlast_code_conversion_error = Qinterrupted;
 902       break;
 903     case CODING_RESULT_INSUFFICIENT_MEM:
 904       Vlast_code_conversion_error = Qinsufficient_memory;
 905       break;
 906     case CODING_RESULT_INSUFFICIENT_DST:
 907       /* Don't record this error in Vlast_code_conversion_error
 908          because it happens just temporarily and is resolved when the
 909          whole conversion is finished.  */
 910       break;
 911     case CODING_RESULT_SUCCESS:
 912       break;
 913     default:
 914       Vlast_code_conversion_error = intern ("Unknown error");
 915     }
 916 }
 917
 918 /* These wrapper macros are used to preserve validity of pointers into
 919    buffer text across calls to decode_char, encode_char, etc, which
 920    could cause relocation of buffers if it loads a charset map,
 921    because loading a charset map allocates large structures.  */
 922
 923 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
 924   do {                                                                       \
 925     EMACS_INT offset;                                                        \
 926                                                                              \
 927     charset_map_loaded = 0;                                                  \
 928     c = DECODE_CHAR (charset, code);                                         \
 929     if (charset_map_loaded                                                   \
 930         && (offset = coding_set_source (coding)))                            \
 931       {                                                                      \
 932         src += offset;                                                       \
 933         src_base += offset;                                                  \
 934         src_end += offset;                                                   \
 935       }                                                                      \
 936   } while (0)
 937
 938 #define CODING_ENCODE_CHAR(coding, dst, dst_end, charset, c, code)      \
 939   do {                                                                  \
 940     EMACS_INT offset;                                                   \
 941                                                                         \
 942     charset_map_loaded = 0;                                             \
 943     code = ENCODE_CHAR (charset, c);                                    \
 944     if (charset_map_loaded                                              \
 945         && (offset = coding_set_destination (coding)))                  \
 946       {                                                                 \
 947         dst += offset;                                                  \
 948         dst_end += offset;                                              \
 949       }                                                                 \
 950   } while (0)
 951
 952 #define CODING_CHAR_CHARSET(coding, dst, dst_end, c, charset_list, code_return, charset) \
 953   do {                                                                  \
 954     EMACS_INT offset;                                                   \
 955                                                                         \
 956     charset_map_loaded = 0;                                             \
 957     charset = char_charset (c, charset_list, code_return);              \
 958     if (charset_map_loaded                                              \
 959         && (offset = coding_set_destination (coding)))                  \
 960       {                                                                 \
 961         dst += offset;                                                  \
 962         dst_end += offset;                                              \
 963       }                                                                 \
 964   } while (0)
 965
 966 #define CODING_CHAR_CHARSET_P(coding, dst, dst_end, c, charset, result) \
 967   do {                                                                  \
 968     EMACS_INT offset;                                                   \
 969                                                                         \
 970     charset_map_loaded = 0;                                             \
 971     result = CHAR_CHARSET_P (c, charset);                               \
 972     if (charset_map_loaded                                              \
 973         && (offset = coding_set_destination (coding)))                  \
 974       {                                                                 \
 975         dst += offset;                                                  \
 976         dst_end += offset;                                              \
 977       }                                                                 \
 978   } while (0)
 979
 980
 981 /* If there are at least BYTES length of room at dst, allocate memory
 982    for coding->destination and update dst and dst_end.  We don't have
 983    to take care of coding->source which will be relocated.  It is
 984    handled by calling coding_set_source in encode_coding.  */
 985
 986 #define ASSURE_DESTINATION(bytes)                               \
 987   do {                                                          \
 988     if (dst + (bytes) >= dst_end)                               \
 989       {                                                         \
 990         EMACS_INT more_bytes = charbuf_end - charbuf + (bytes); \
 991                                                                 \
 992         dst = alloc_destination (coding, more_bytes, dst);      \
 993         dst_end = coding->destination + coding->dst_bytes;      \
 994       }                                                         \
 995   } while (0)
 996
 997
 998 /* Store multibyte form of the character C in P, and advance P to the
 999    end of the multibyte form.  This is like CHAR_STRING_ADVANCE but it
1000    never calls MAYBE_UNIFY_CHAR.  */
1001
1002 #define CHAR_STRING_ADVANCE_NO_UNIFY(c, p)      \
1003   do {                                          \
1004     if ((c) <= MAX_1_BYTE_CHAR)                 \
1005       *(p)++ = (c);                             \
1006     else if ((c) <= MAX_2_BYTE_CHAR)            \
1007       *(p)++ = (0xC0 | ((c) >> 6)),             \
1008         *(p)++ = (0x80 | ((c) & 0x3F));         \
1009     else if ((c) <= MAX_3_BYTE_CHAR)            \
1010       *(p)++ = (0xE0 | ((c) >> 12)),            \
1011         *(p)++ = (0x80 | (((c) >> 6) & 0x3F)),  \
1012         *(p)++ = (0x80 | ((c) & 0x3F));         \
1013     else if ((c) <= MAX_4_BYTE_CHAR)            \
1014       *(p)++ = (0xF0 | (c >> 18)),              \
1015         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1016         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1017         *(p)++ = (0x80 | (c & 0x3F));           \
1018     else if ((c) <= MAX_5_BYTE_CHAR)            \
1019       *(p)++ = 0xF8,                            \
1020         *(p)++ = (0x80 | ((c >> 18) & 0x0F)),   \
1021         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1022         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1023         *(p)++ = (0x80 | (c & 0x3F));           \
1024     else                                        \
1025       (p) += BYTE8_STRING ((c) - 0x3FFF80, p);  \
1026   } while (0)
1027
1028
1029 /* Return the character code of character whose multibyte form is at
1030    P, and advance P to the end of the multibyte form.  This is like
1031    STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR.  */
1032
1033 #define STRING_CHAR_ADVANCE_NO_UNIFY(p)                         \
1034   (!((p)[0] & 0x80)                                             \
1035    ? *(p)++                                                     \
1036    : ! ((p)[0] & 0x20)                                          \
1037    ? ((p) += 2,                                                 \
1038       ((((p)[-2] & 0x1F) << 6)                                  \
1039        | ((p)[-1] & 0x3F)                                       \
1040        | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))    \
1041    : ! ((p)[0] & 0x10)                                          \
1042    ? ((p) += 3,                                                 \
1043       ((((p)[-3] & 0x0F) << 12)                                 \
1044        | (((p)[-2] & 0x3F) << 6)                                \
1045        | ((p)[-1] & 0x3F)))                                     \
1046    : ! ((p)[0] & 0x08)                                          \
1047    ? ((p) += 4,                                                 \
1048       ((((p)[-4] & 0xF) << 18)                                  \
1049        | (((p)[-3] & 0x3F) << 12)                               \
1050        | (((p)[-2] & 0x3F) << 6)                                \
1051        | ((p)[-1] & 0x3F)))                                     \
1052    : ((p) += 5,                                                 \
1053       ((((p)[-4] & 0x3F) << 18)                                 \
1054        | (((p)[-3] & 0x3F) << 12)                               \
1055        | (((p)[-2] & 0x3F) << 6)                                \
1056        | ((p)[-1] & 0x3F))))
1057
1058
1059 /* Update coding->source from coding->src_object, and return how many
1060    bytes coding->source was changed.  */
1061
1062 static EMACS_INT
1063 coding_set_source (struct coding_system *coding)
1064 {
1065   const unsigned char *orig = coding->source;
1066
1067   if (BUFFERP (coding->src_object))
1068     {
1069       struct buffer *buf = XBUFFER (coding->src_object);
1070
1071       if (coding->src_pos < 0)
1072         coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
1073       else
1074         coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
1075     }
1076   else if (STRINGP (coding->src_object))
1077     {
1078       coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
1079     }
1080   else
1081     {
1082       /* Otherwise, the source is C string and is never relocated
1083          automatically.  Thus we don't have to update anything.  */
1084     }
1085   return coding->source - orig;
1086 }
1087
1088
1089 /* Update coding->destination from coding->dst_object, and return how
1090    many bytes coding->destination was changed.  */
1091
1092 static EMACS_INT
1093 coding_set_destination (struct coding_system *coding)
1094 {
1095   const unsigned char *orig = coding->destination;
1096
1097   if (BUFFERP (coding->dst_object))
1098     {
1099       if (BUFFERP (coding->src_object) && coding->src_pos < 0)
1100         {
1101           coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
1102           coding->dst_bytes = (GAP_END_ADDR
1103                                - (coding->src_bytes - coding->consumed)
1104                                - coding->destination);
1105         }
1106       else
1107         {
1108           /* We are sure that coding->dst_pos_byte is before the gap
1109              of the buffer. */
1110           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
1111                                  + coding->dst_pos_byte - BEG_BYTE);
1112           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
1113                                - coding->destination);
1114         }
1115     }
1116   else
1117     {
1118       /* Otherwise, the destination is C string and is never relocated
1119          automatically.  Thus we don't have to update anything.  */
1120     }
1121   return coding->destination - orig;
1122 }
1123
1124
1125 static void
1126 coding_alloc_by_realloc (struct coding_system *coding, EMACS_INT bytes)
1127 {
1128   if (STRING_BYTES_BOUND - coding->dst_bytes < bytes)
1129     string_overflow ();
1130   coding->destination = (unsigned char *) xrealloc (coding->destination,
1131                                                     coding->dst_bytes + bytes);
1132   coding->dst_bytes += bytes;
1133 }
1134
1135 static void
1136 coding_alloc_by_making_gap (struct coding_system *coding,
1137                             EMACS_INT gap_head_used, EMACS_INT bytes)
1138 {
1139   if (EQ (coding->src_object, coding->dst_object))
1140     {
1141       /* The gap may contain the produced data at the head and not-yet
1142          consumed data at the tail.  To preserve those data, we at
1143          first make the gap size to zero, then increase the gap
1144          size.  */
1145       EMACS_INT add = GAP_SIZE;
1146
1147       GPT += gap_head_used, GPT_BYTE += gap_head_used;
1148       GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
1149       make_gap (bytes);
1150       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
1151       GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
1152     }
1153   else
1154     {
1155       Lisp_Object this_buffer;
1156
1157       this_buffer = Fcurrent_buffer ();
1158       set_buffer_internal (XBUFFER (coding->dst_object));
1159       make_gap (bytes);
1160       set_buffer_internal (XBUFFER (this_buffer));
1161     }
1162 }
1163
1164
1165 static unsigned char *
1166 alloc_destination (struct coding_system *coding, EMACS_INT nbytes,
1167                    unsigned char *dst)
1168 {
1169   EMACS_INT offset = dst - coding->destination;
1170
1171   if (BUFFERP (coding->dst_object))
1172     {
1173       struct buffer *buf = XBUFFER (coding->dst_object);
1174
1175       coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
1176     }
1177   else
1178     coding_alloc_by_realloc (coding, nbytes);
1179   coding_set_destination (coding);
1180   dst = coding->destination + offset;
1181   return dst;
1182 }
1183
1184 /** Macros for annotations.  */
1185
1186 /* An annotation data is stored in the array coding->charbuf in this
1187    format:
1188      [ -LENGTH ANNOTATION_MASK NCHARS ... ]
1189    LENGTH is the number of elements in the annotation.
1190    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1191    NCHARS is the number of characters in the text annotated.
1192
1193    The format of the following elements depend on ANNOTATION_MASK.
1194
1195    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1196    follows:
1197      ... NBYTES METHOD [ COMPOSITION-COMPONENTS ... ]
1198
1199    NBYTES is the number of bytes specified in the header part of
1200    old-style emacs-mule encoding, or 0 for the other kind of
1201    composition.
1202
1203    METHOD is one of enum composition_method.
1204
1205    Optional COMPOSITION-COMPONENTS are characters and composition
1206    rules.
1207
1208    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1209    follows.
1210
1211    If ANNOTATION_MASK is 0, this annotation is just a space holder to
1212    recover from an invalid annotation, and should be skipped by
1213    produce_annotation.  */
1214
1215 /* Maximum length of the header of annotation data.  */
1216 #define MAX_ANNOTATION_LENGTH 5
1217
1218 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars)     \
1219   do {                                                  \
1220     *(buf)++ = -(len);                                  \
1221     *(buf)++ = (mask);                                  \
1222     *(buf)++ = (nchars);                                \
1223     coding->annotated = 1;                              \
1224   } while (0);
1225
1226 #define ADD_COMPOSITION_DATA(buf, nchars, nbytes, method)                   \
1227   do {                                                                      \
1228     ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1229     *buf++ = nbytes;                                                        \
1230     *buf++ = method;                                                        \
1231   } while (0)
1232
1233
1234 #define ADD_CHARSET_DATA(buf, nchars, id)                               \
1235   do {                                                                  \
1236     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1237     *buf++ = id;                                                        \
1238   } while (0)
1239
1240 \f
1241 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1242
1243
1244
1245 \f
1246 /*** 3. UTF-8 ***/
1247
1248 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1249    Check if a text is encoded in UTF-8.  If it is, return 1, else
1250    return 0.  */
1251
1252 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1253 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1254 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1255 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1256 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1257 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1258
1259 #define UTF_8_BOM_1 0xEF
1260 #define UTF_8_BOM_2 0xBB
1261 #define UTF_8_BOM_3 0xBF
1262
1263 static int
1264 detect_coding_utf_8 (struct coding_system *coding,
1265                      struct coding_detection_info *detect_info)
1266 {
1267   const unsigned char *src = coding->source, *src_base;
1268   const unsigned char *src_end = coding->source + coding->src_bytes;
1269   int multibytep = coding->src_multibyte;
1270   EMACS_INT consumed_chars = 0;
1271   int bom_found = 0;
1272   int found = 0;
1273
1274   detect_info->checked |= CATEGORY_MASK_UTF_8;
1275   /* A coding system of this category is always ASCII compatible.  */
1276   src += coding->head_ascii;
1277
1278   while (1)
1279     {
1280       int c, c1, c2, c3, c4;
1281
1282       src_base = src;
1283       ONE_MORE_BYTE (c);
1284       if (c < 0 || UTF_8_1_OCTET_P (c))
1285         continue;
1286       ONE_MORE_BYTE (c1);
1287       if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
1288         break;
1289       if (UTF_8_2_OCTET_LEADING_P (c))
1290         {
1291           found = 1;
1292           continue;
1293         }
1294       ONE_MORE_BYTE (c2);
1295       if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1296         break;
1297       if (UTF_8_3_OCTET_LEADING_P (c))
1298         {
1299           found = 1;
1300           if (src_base == coding->source
1301               && c == UTF_8_BOM_1 && c1 == UTF_8_BOM_2 && c2 == UTF_8_BOM_3)
1302             bom_found = 1;
1303           continue;
1304         }
1305       ONE_MORE_BYTE (c3);
1306       if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1307         break;
1308       if (UTF_8_4_OCTET_LEADING_P (c))
1309         {
1310           found = 1;
1311           continue;
1312         }
1313       ONE_MORE_BYTE (c4);
1314       if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1315         break;
1316       if (UTF_8_5_OCTET_LEADING_P (c))
1317         {
1318           found = 1;
1319           continue;
1320         }
1321       break;
1322     }
1323   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1324   return 0;
1325
1326  no_more_source:
1327   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1328     {
1329       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1330       return 0;
1331     }
1332   if (bom_found)
1333     {
1334       /* The first character 0xFFFE doesn't necessarily mean a BOM.  */
1335       detect_info->found |= CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG;
1336     }
1337   else
1338     {
1339       detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG;
1340       if (found)
1341         detect_info->found |= CATEGORY_MASK_UTF_8_NOSIG;
1342     }
1343   return 1;
1344 }
1345
1346
1347 static void
1348 decode_coding_utf_8 (struct coding_system *coding)
1349 {
1350   const unsigned char *src = coding->source + coding->consumed;
1351   const unsigned char *src_end = coding->source + coding->src_bytes;
1352   const unsigned char *src_base;
1353   int *charbuf = coding->charbuf + coding->charbuf_used;
1354   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1355   EMACS_INT consumed_chars = 0, consumed_chars_base = 0;
1356   int multibytep = coding->src_multibyte;
1357   enum utf_bom_type bom = CODING_UTF_8_BOM (coding);
1358   int eol_dos =
1359     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1360   int byte_after_cr = -1;
1361
1362   if (bom != utf_without_bom)
1363     {
1364       int c1, c2, c3;
1365
1366       src_base = src;
1367       ONE_MORE_BYTE (c1);
1368       if (! UTF_8_3_OCTET_LEADING_P (c1))
1369         src = src_base;
1370       else
1371         {
1372           ONE_MORE_BYTE (c2);
1373           if (! UTF_8_EXTRA_OCTET_P (c2))
1374             src = src_base;
1375           else
1376             {
1377               ONE_MORE_BYTE (c3);
1378               if (! UTF_8_EXTRA_OCTET_P (c3))
1379                 src = src_base;
1380               else
1381                 {
1382                   if ((c1 != UTF_8_BOM_1)
1383                       || (c2 != UTF_8_BOM_2) || (c3 != UTF_8_BOM_3))
1384                     src = src_base;
1385                   else
1386                     CODING_UTF_8_BOM (coding) = utf_without_bom;
1387                 }
1388             }
1389         }
1390     }
1391   CODING_UTF_8_BOM (coding) = utf_without_bom;
1392
1393   while (1)
1394     {
1395       int c, c1, c2, c3, c4, c5;
1396
1397       src_base = src;
1398       consumed_chars_base = consumed_chars;
1399
1400       if (charbuf >= charbuf_end)
1401         {
1402           if (byte_after_cr >= 0)
1403             src_base--;
1404           break;
1405         }
1406
1407       if (byte_after_cr >= 0)
1408         c1 = byte_after_cr, byte_after_cr = -1;
1409       else
1410         ONE_MORE_BYTE (c1);
1411       if (c1 < 0)
1412         {
1413           c = - c1;
1414         }
1415       else if (UTF_8_1_OCTET_P (c1))
1416         {
1417           if (eol_dos && c1 == '\r')
1418             ONE_MORE_BYTE (byte_after_cr);
1419           c = c1;
1420         }
1421       else
1422         {
1423           ONE_MORE_BYTE (c2);
1424           if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1425             goto invalid_code;
1426           if (UTF_8_2_OCTET_LEADING_P (c1))
1427             {
1428               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1429               /* Reject overlong sequences here and below.  Encoders
1430                  producing them are incorrect, they can be misleading,
1431                  and they mess up read/write invariance.  */
1432               if (c < 128)
1433                 goto invalid_code;
1434             }
1435           else
1436             {
1437               ONE_MORE_BYTE (c3);
1438               if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1439                 goto invalid_code;
1440               if (UTF_8_3_OCTET_LEADING_P (c1))
1441                 {
1442                   c = (((c1 & 0xF) << 12)
1443                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1444                   if (c < 0x800
1445                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1446                     goto invalid_code;
1447                 }
1448               else
1449                 {
1450                   ONE_MORE_BYTE (c4);
1451                   if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1452                     goto invalid_code;
1453                   if (UTF_8_4_OCTET_LEADING_P (c1))
1454                     {
1455                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1456                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1457                     if (c < 0x10000)
1458                       goto invalid_code;
1459                     }
1460                   else
1461                     {
1462                       ONE_MORE_BYTE (c5);
1463                       if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
1464                         goto invalid_code;
1465                       if (UTF_8_5_OCTET_LEADING_P (c1))
1466                         {
1467                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1468                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1469                                | (c5 & 0x3F));
1470                           if ((c > MAX_CHAR) || (c < 0x200000))
1471                             goto invalid_code;
1472                         }
1473                       else
1474                         goto invalid_code;
1475                     }
1476                 }
1477             }
1478         }
1479
1480       *charbuf++ = c;
1481       continue;
1482
1483     invalid_code:
1484       src = src_base;
1485       consumed_chars = consumed_chars_base;
1486       ONE_MORE_BYTE (c);
1487       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1488       coding->errors++;
1489     }
1490
1491  no_more_source:
1492   coding->consumed_char += consumed_chars_base;
1493   coding->consumed = src_base - coding->source;
1494   coding->charbuf_used = charbuf - coding->charbuf;
1495 }
1496
1497
1498 static int
1499 encode_coding_utf_8 (struct coding_system *coding)
1500 {
1501   int multibytep = coding->dst_multibyte;
1502   int *charbuf = coding->charbuf;
1503   int *charbuf_end = charbuf + coding->charbuf_used;
1504   unsigned char *dst = coding->destination + coding->produced;
1505   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1506   EMACS_INT produced_chars = 0;
1507   int c;
1508
1509   if (CODING_UTF_8_BOM (coding) == utf_with_bom)
1510     {
1511       ASSURE_DESTINATION (3);
1512       EMIT_THREE_BYTES (UTF_8_BOM_1, UTF_8_BOM_2, UTF_8_BOM_3);
1513       CODING_UTF_8_BOM (coding) = utf_without_bom;
1514     }
1515
1516   if (multibytep)
1517     {
1518       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1519
1520       while (charbuf < charbuf_end)
1521         {
1522           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1523
1524           ASSURE_DESTINATION (safe_room);
1525           c = *charbuf++;
1526           if (CHAR_BYTE8_P (c))
1527             {
1528               c = CHAR_TO_BYTE8 (c);
1529               EMIT_ONE_BYTE (c);
1530             }
1531           else
1532             {
1533               CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
1534               for (p = str; p < pend; p++)
1535                 EMIT_ONE_BYTE (*p);
1536             }
1537         }
1538     }
1539   else
1540     {
1541       int safe_room = MAX_MULTIBYTE_LENGTH;
1542
1543       while (charbuf < charbuf_end)
1544         {
1545           ASSURE_DESTINATION (safe_room);
1546           c = *charbuf++;
1547           if (CHAR_BYTE8_P (c))
1548             *dst++ = CHAR_TO_BYTE8 (c);
1549           else
1550             CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
1551           produced_chars++;
1552         }
1553     }
1554   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1555   coding->produced_char += produced_chars;
1556   coding->produced = dst - coding->destination;
1557   return 0;
1558 }
1559
1560
1561 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1562    Check if a text is encoded in one of UTF-16 based coding systems.
1563    If it is, return 1, else return 0.  */
1564
1565 #define UTF_16_HIGH_SURROGATE_P(val) \
1566   (((val) & 0xFC00) == 0xD800)
1567
1568 #define UTF_16_LOW_SURROGATE_P(val) \
1569   (((val) & 0xFC00) == 0xDC00)
1570
1571
1572 static int
1573 detect_coding_utf_16 (struct coding_system *coding,
1574                       struct coding_detection_info *detect_info)
1575 {
1576   const unsigned char *src = coding->source;
1577   const unsigned char *src_end = coding->source + coding->src_bytes;
1578   int multibytep = coding->src_multibyte;
1579   int c1, c2;
1580
1581   detect_info->checked |= CATEGORY_MASK_UTF_16;
1582   if (coding->mode & CODING_MODE_LAST_BLOCK
1583       && (coding->src_chars & 1))
1584     {
1585       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1586       return 0;
1587     }
1588
1589   TWO_MORE_BYTES (c1, c2);
1590   if ((c1 == 0xFF) && (c2 == 0xFE))
1591     {
1592       detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1593                              | CATEGORY_MASK_UTF_16_AUTO);
1594       detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1595                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1596                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1597     }
1598   else if ((c1 == 0xFE) && (c2 == 0xFF))
1599     {
1600       detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1601                              | CATEGORY_MASK_UTF_16_AUTO);
1602       detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1603                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1604                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1605     }
1606   else if (c2 < 0)
1607     {
1608       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1609       return 0;
1610     }
1611   else
1612     {
1613       /* We check the dispersion of Eth and Oth bytes where E is even and
1614          O is odd.  If both are high, we assume binary data.*/
1615       unsigned char e[256], o[256];
1616       unsigned e_num = 1, o_num = 1;
1617
1618       memset (e, 0, 256);
1619       memset (o, 0, 256);
1620       e[c1] = 1;
1621       o[c2] = 1;
1622
1623       detect_info->rejected |= (CATEGORY_MASK_UTF_16_AUTO
1624                                 |CATEGORY_MASK_UTF_16_BE
1625                                 | CATEGORY_MASK_UTF_16_LE);
1626
1627       while ((detect_info->rejected & CATEGORY_MASK_UTF_16)
1628              != CATEGORY_MASK_UTF_16)
1629         {
1630           TWO_MORE_BYTES (c1, c2);
1631           if (c2 < 0)
1632             break;
1633           if (! e[c1])
1634             {
1635               e[c1] = 1;
1636               e_num++;
1637               if (e_num >= 128)
1638                 detect_info->rejected |= CATEGORY_MASK_UTF_16_BE_NOSIG;
1639             }
1640           if (! o[c2])
1641             {
1642               o[c2] = 1;
1643               o_num++;
1644               if (o_num >= 128)
1645                 detect_info->rejected |= CATEGORY_MASK_UTF_16_LE_NOSIG;
1646             }
1647         }
1648       return 0;
1649     }
1650
1651  no_more_source:
1652   return 1;
1653 }
1654
1655 static void
1656 decode_coding_utf_16 (struct coding_system *coding)
1657 {
1658   const unsigned char *src = coding->source + coding->consumed;
1659   const unsigned char *src_end = coding->source + coding->src_bytes;
1660   const unsigned char *src_base;
1661   int *charbuf = coding->charbuf + coding->charbuf_used;
1662   /* We may produces at most 3 chars in one loop.  */
1663   int *charbuf_end = coding->charbuf + coding->charbuf_size - 2;
1664   EMACS_INT consumed_chars = 0, consumed_chars_base = 0;
1665   int multibytep = coding->src_multibyte;
1666   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1667   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1668   int surrogate = CODING_UTF_16_SURROGATE (coding);
1669   int eol_dos =
1670     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1671   int byte_after_cr1 = -1, byte_after_cr2 = -1;
1672
1673   if (bom == utf_with_bom)
1674     {
1675       int c, c1, c2;
1676
1677       src_base = src;
1678       ONE_MORE_BYTE (c1);
1679       ONE_MORE_BYTE (c2);
1680       c = (c1 << 8) | c2;
1681
1682       if (endian == utf_16_big_endian
1683           ? c != 0xFEFF : c != 0xFFFE)
1684         {
1685           /* The first two bytes are not BOM.  Treat them as bytes
1686              for a normal character.  */
1687           src = src_base;
1688           coding->errors++;
1689         }
1690       CODING_UTF_16_BOM (coding) = utf_without_bom;
1691     }
1692   else if (bom == utf_detect_bom)
1693     {
1694       /* We have already tried to detect BOM and failed in
1695          detect_coding.  */
1696       CODING_UTF_16_BOM (coding) = utf_without_bom;
1697     }
1698
1699   while (1)
1700     {
1701       int c, c1, c2;
1702
1703       src_base = src;
1704       consumed_chars_base = consumed_chars;
1705
1706       if (charbuf >= charbuf_end)
1707         {
1708           if (byte_after_cr1 >= 0)
1709             src_base -= 2;
1710           break;
1711         }
1712
1713       if (byte_after_cr1 >= 0)
1714         c1 = byte_after_cr1, byte_after_cr1 = -1;
1715       else
1716         ONE_MORE_BYTE (c1);
1717       if (c1 < 0)
1718         {
1719           *charbuf++ = -c1;
1720           continue;
1721         }
1722       if (byte_after_cr2 >= 0)
1723         c2 = byte_after_cr2, byte_after_cr2 = -1;
1724       else
1725         ONE_MORE_BYTE (c2);
1726       if (c2 < 0)
1727         {
1728           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1729           *charbuf++ = -c2;
1730           continue;
1731         }
1732       c = (endian == utf_16_big_endian
1733            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1734
1735       if (surrogate)
1736         {
1737           if (! UTF_16_LOW_SURROGATE_P (c))
1738             {
1739               if (endian == utf_16_big_endian)
1740                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1741               else
1742                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1743               *charbuf++ = c1;
1744               *charbuf++ = c2;
1745               coding->errors++;
1746               if (UTF_16_HIGH_SURROGATE_P (c))
1747                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1748               else
1749                 *charbuf++ = c;
1750             }
1751           else
1752             {
1753               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1754               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1755               *charbuf++ = 0x10000 + c;
1756             }
1757         }
1758       else
1759         {
1760           if (UTF_16_HIGH_SURROGATE_P (c))
1761             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1762           else
1763             {
1764               if (eol_dos && c == '\r')
1765                 {
1766                   ONE_MORE_BYTE (byte_after_cr1);
1767                   ONE_MORE_BYTE (byte_after_cr2);
1768                 }
1769               *charbuf++ = c;
1770             }
1771         }
1772     }
1773
1774  no_more_source:
1775   coding->consumed_char += consumed_chars_base;
1776   coding->consumed = src_base - coding->source;
1777   coding->charbuf_used = charbuf - coding->charbuf;
1778 }
1779
1780 static int
1781 encode_coding_utf_16 (struct coding_system *coding)
1782 {
1783   int multibytep = coding->dst_multibyte;
1784   int *charbuf = coding->charbuf;
1785   int *charbuf_end = charbuf + coding->charbuf_used;
1786   unsigned char *dst = coding->destination + coding->produced;
1787   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1788   int safe_room = 8;
1789   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1790   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1791   EMACS_INT produced_chars = 0;
1792   int c;
1793
1794   if (bom != utf_without_bom)
1795     {
1796       ASSURE_DESTINATION (safe_room);
1797       if (big_endian)
1798         EMIT_TWO_BYTES (0xFE, 0xFF);
1799       else
1800         EMIT_TWO_BYTES (0xFF, 0xFE);
1801       CODING_UTF_16_BOM (coding) = utf_without_bom;
1802     }
1803
1804   while (charbuf < charbuf_end)
1805     {
1806       ASSURE_DESTINATION (safe_room);
1807       c = *charbuf++;
1808       if (c > MAX_UNICODE_CHAR)
1809         c = coding->default_char;
1810
1811       if (c < 0x10000)
1812         {
1813           if (big_endian)
1814             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1815           else
1816             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1817         }
1818       else
1819         {
1820           int c1, c2;
1821
1822           c -= 0x10000;
1823           c1 = (c >> 10) + 0xD800;
1824           c2 = (c & 0x3FF) + 0xDC00;
1825           if (big_endian)
1826             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1827           else
1828             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1829         }
1830     }
1831   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1832   coding->produced = dst - coding->destination;
1833   coding->produced_char += produced_chars;
1834   return 0;
1835 }
1836
1837 \f
1838 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1839
1840 /* Emacs' internal format for representation of multiple character
1841    sets is a kind of multi-byte encoding, i.e. characters are
1842    represented by variable-length sequences of one-byte codes.
1843
1844    ASCII characters and control characters (e.g. `tab', `newline') are
1845    represented by one-byte sequences which are their ASCII codes, in
1846    the range 0x00 through 0x7F.
1847
1848    8-bit characters of the range 0x80..0x9F are represented by
1849    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1850    code + 0x20).
1851
1852    8-bit characters of the range 0xA0..0xFF are represented by
1853    one-byte sequences which are their 8-bit code.
1854
1855    The other characters are represented by a sequence of `base
1856    leading-code', optional `extended leading-code', and one or two
1857    `position-code's.  The length of the sequence is determined by the
1858    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1859    whereas extended leading-code and position-code take the range 0xA0
1860    through 0xFF.  See `charset.h' for more details about leading-code
1861    and position-code.
1862
1863    --- CODE RANGE of Emacs' internal format ---
1864    character set        range
1865    -------------        -----
1866    ascii                0x00..0x7F
1867    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1868    eight-bit-graphic    0xA0..0xBF
1869    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1870    ---------------------------------------------
1871
1872    As this is the internal character representation, the format is
1873    usually not used externally (i.e. in a file or in a data sent to a
1874    process).  But, it is possible to have a text externally in this
1875    format (i.e. by encoding by the coding system `emacs-mule').
1876
1877    In that case, a sequence of one-byte codes has a slightly different
1878    form.
1879
1880    At first, all characters in eight-bit-control are represented by
1881    one-byte sequences which are their 8-bit code.
1882
1883    Next, character composition data are represented by the byte
1884    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1885    where,
1886         METHOD is 0xF2 plus one of composition method (enum
1887         composition_method),
1888
1889         BYTES is 0xA0 plus a byte length of this composition data,
1890
1891         CHARS is 0xA0 plus a number of characters composed by this
1892         data,
1893
1894         COMPONENTs are characters of multibyte form or composition
1895         rules encoded by two-byte of ASCII codes.
1896
1897    In addition, for backward compatibility, the following formats are
1898    also recognized as composition data on decoding.
1899
1900    0x80 MSEQ ...
1901    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1902
1903    Here,
1904         MSEQ is a multibyte form but in these special format:
1905           ASCII: 0xA0 ASCII_CODE+0x80,
1906           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1907         RULE is a one byte code of the range 0xA0..0xF0 that
1908         represents a composition rule.
1909   */
1910
1911 char emacs_mule_bytes[256];
1912
1913
1914 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1915    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
1916    else return 0.  */
1917
1918 static int
1919 detect_coding_emacs_mule (struct coding_system *coding,
1920                           struct coding_detection_info *detect_info)
1921 {
1922   const unsigned char *src = coding->source, *src_base;
1923   const unsigned char *src_end = coding->source + coding->src_bytes;
1924   int multibytep = coding->src_multibyte;
1925   EMACS_INT consumed_chars = 0;
1926   int c;
1927   int found = 0;
1928
1929   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1930   /* A coding system of this category is always ASCII compatible.  */
1931   src += coding->head_ascii;
1932
1933   while (1)
1934     {
1935       src_base = src;
1936       ONE_MORE_BYTE (c);
1937       if (c < 0)
1938         continue;
1939       if (c == 0x80)
1940         {
1941           /* Perhaps the start of composite character.  We simply skip
1942              it because analyzing it is too heavy for detecting.  But,
1943              at least, we check that the composite character
1944              constitutes of more than 4 bytes.  */
1945           const unsigned char *src_start;
1946
1947         repeat:
1948           src_start = src;
1949           do
1950             {
1951               ONE_MORE_BYTE (c);
1952             }
1953           while (c >= 0xA0);
1954
1955           if (src - src_start <= 4)
1956             break;
1957           found = CATEGORY_MASK_EMACS_MULE;
1958           if (c == 0x80)
1959             goto repeat;
1960         }
1961
1962       if (c < 0x80)
1963         {
1964           if (c < 0x20
1965               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1966             break;
1967         }
1968       else
1969         {
1970           int more_bytes = emacs_mule_bytes[c] - 1;
1971
1972           while (more_bytes > 0)
1973             {
1974               ONE_MORE_BYTE (c);
1975               if (c < 0xA0)
1976                 {
1977                   src--;        /* Unread the last byte.  */
1978                   break;
1979                 }
1980               more_bytes--;
1981             }
1982           if (more_bytes != 0)
1983             break;
1984           found = CATEGORY_MASK_EMACS_MULE;
1985         }
1986     }
1987   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1988   return 0;
1989
1990  no_more_source:
1991   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1992     {
1993       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1994       return 0;
1995     }
1996   detect_info->found |= found;
1997   return 1;
1998 }
1999
2000
2001 /* Parse emacs-mule multibyte sequence at SRC and return the decoded
2002    character.  If CMP_STATUS indicates that we must expect MSEQ or
2003    RULE described above, decode it and return the negative value of
2004    the decoded character or rule.  If an invalid byte is found, return
2005    -1.  If SRC is too short, return -2.  */
2006
2007 static int
2008 emacs_mule_char (struct coding_system *coding, const unsigned char *src,
2009                  int *nbytes, int *nchars, int *id,
2010                  struct composition_status *cmp_status)
2011 {
2012   const unsigned char *src_end = coding->source + coding->src_bytes;
2013   const unsigned char *src_base = src;
2014   int multibytep = coding->src_multibyte;
2015   int charset_ID;
2016   unsigned code;
2017   int c;
2018   int consumed_chars = 0;
2019   int mseq_found = 0;
2020
2021   ONE_MORE_BYTE (c);
2022   if (c < 0)
2023     {
2024       c = -c;
2025       charset_ID = emacs_mule_charset[0];
2026     }
2027   else
2028     {
2029       if (c >= 0xA0)
2030         {
2031           if (cmp_status->state != COMPOSING_NO
2032               && cmp_status->old_form)
2033             {
2034               if (cmp_status->state == COMPOSING_CHAR)
2035                 {
2036                   if (c == 0xA0)
2037                     {
2038                       ONE_MORE_BYTE (c);
2039                       c -= 0x80;
2040                       if (c < 0)
2041                         goto invalid_code;
2042                     }
2043                   else
2044                     c -= 0x20;
2045                   mseq_found = 1;
2046                 }
2047               else
2048                 {
2049                   *nbytes = src - src_base;
2050                   *nchars = consumed_chars;
2051                   return -c;
2052                 }
2053             }
2054           else
2055             goto invalid_code;
2056         }
2057
2058       switch (emacs_mule_bytes[c])
2059         {
2060         case 2:
2061           if ((charset_ID = emacs_mule_charset[c]) < 0)
2062             goto invalid_code;
2063           ONE_MORE_BYTE (c);
2064           if (c < 0xA0)
2065             goto invalid_code;
2066           code = c & 0x7F;
2067           break;
2068
2069         case 3:
2070           if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
2071               || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
2072             {
2073               ONE_MORE_BYTE (c);
2074               if (c < 0xA0 || (charset_ID = emacs_mule_charset[c]) < 0)
2075                 goto invalid_code;
2076               ONE_MORE_BYTE (c);
2077               if (c < 0xA0)
2078                 goto invalid_code;
2079               code = c & 0x7F;
2080             }
2081           else
2082             {
2083               if ((charset_ID = emacs_mule_charset[c]) < 0)
2084                 goto invalid_code;
2085               ONE_MORE_BYTE (c);
2086               if (c < 0xA0)
2087                 goto invalid_code;
2088               code = (c & 0x7F) << 8;
2089               ONE_MORE_BYTE (c);
2090               if (c < 0xA0)
2091                 goto invalid_code;
2092               code |= c & 0x7F;
2093             }
2094           break;
2095
2096         case 4:
2097           ONE_MORE_BYTE (c);
2098           if (c < 0 || (charset_ID = emacs_mule_charset[c]) < 0)
2099             goto invalid_code;
2100           ONE_MORE_BYTE (c);
2101           if (c < 0xA0)
2102             goto invalid_code;
2103           code = (c & 0x7F) << 8;
2104           ONE_MORE_BYTE (c);
2105           if (c < 0xA0)
2106             goto invalid_code;
2107           code |= c & 0x7F;
2108           break;
2109
2110         case 1:
2111           code = c;
2112           charset_ID = ASCII_BYTE_P (code) ? charset_ascii : charset_eight_bit;
2113           break;
2114
2115         default:
2116           abort ();
2117         }
2118       CODING_DECODE_CHAR (coding, src, src_base, src_end,
2119                           CHARSET_FROM_ID (charset_ID), code, c);
2120       if (c < 0)
2121         goto invalid_code;
2122     }
2123   *nbytes = src - src_base;
2124   *nchars = consumed_chars;
2125   if (id)
2126     *id = charset_ID;
2127   return (mseq_found ? -c : c);
2128
2129  no_more_source:
2130   return -2;
2131
2132  invalid_code:
2133   return -1;
2134 }
2135
2136
2137 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2138
2139 /* Handle these composition sequence ('|': the end of header elements,
2140    BYTES and CHARS >= 0xA0):
2141
2142    (1) relative composition: 0x80 0xF2 BYTES CHARS | CHAR ...
2143    (2) altchar composition:  0x80 0xF4 BYTES CHARS | ALT ... ALT CHAR ...
2144    (3) alt&rule composition: 0x80 0xF5 BYTES CHARS | ALT RULE ... ALT CHAR ...
2145
2146    and these old form:
2147
2148    (4) relative composition: 0x80 | MSEQ ... MSEQ
2149    (5) rulebase composition: 0x80 0xFF | MSEQ MRULE ... MSEQ
2150
2151    When the starter 0x80 and the following header elements are found,
2152    this annotation header is produced.
2153
2154         [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS NBYTES METHOD ]
2155
2156    NCHARS is CHARS - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2157    NBYTES is BYTES - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2158
2159    Then, upon reading the following elements, these codes are produced
2160    until the composition end is found:
2161
2162    (1) CHAR ... CHAR
2163    (2) ALT ... ALT CHAR ... CHAR
2164    (3) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT CHAR ... CHAR
2165    (4) CHAR ... CHAR
2166    (5) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
2167
2168    When the composition end is found, LENGTH and NCHARS in the
2169    annotation header is updated as below:
2170
2171    (1) LENGTH: unchanged, NCHARS: unchanged
2172    (2) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2173    (3) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2174    (4) LENGTH: unchanged,  NCHARS: number of CHARs
2175    (5) LENGTH: unchanged,  NCHARS: number of CHARs
2176
2177    If an error is found while composing, the annotation header is
2178    changed to the original composition header (plus filler -1s) as
2179    below:
2180
2181    (1),(2),(3)  [ 0x80 0xF2+METHOD BYTES CHARS -1 ]
2182    (5)          [ 0x80 0xFF -1 -1- -1 ]
2183
2184    and the sequence [ -2 DECODED-RULE ] is changed to the original
2185    byte sequence as below:
2186         o the original byte sequence is B: [ B -1 ]
2187         o the original byte sequence is B1 B2: [ B1 B2 ]
2188
2189    Most of the routines are implemented by macros because many
2190    variables and labels in the caller decode_coding_emacs_mule must be
2191    accessible, and they are usually called just once (thus doesn't
2192    increase the size of compiled object).  */
2193
2194 /* Decode a composition rule represented by C as a component of
2195    composition sequence of Emacs 20 style.  Set RULE to the decoded
2196    rule. */
2197
2198 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(c, rule)  \
2199   do {                                                  \
2200     int gref, nref;                                     \
2201                                                         \
2202     c -= 0xA0;                                          \
2203     if (c < 0 || c >= 81)                               \
2204       goto invalid_code;                                \
2205     gref = c / 9, nref = c % 9;                         \
2206     if (gref == 4) gref = 10;                           \
2207     if (nref == 4) nref = 10;                           \
2208     rule = COMPOSITION_ENCODE_RULE (gref, nref);        \
2209   } while (0)
2210
2211
2212 /* Decode a composition rule represented by C and the following byte
2213    at SRC as a component of composition sequence of Emacs 21 style.
2214    Set RULE to the decoded rule.  */
2215
2216 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(c, rule)  \
2217   do {                                                  \
2218     int gref, nref;                                     \
2219                                                         \
2220     gref = c - 0x20;                                    \
2221     if (gref < 0 || gref >= 81)                         \
2222       goto invalid_code;                                \
2223     ONE_MORE_BYTE (c);                                  \
2224     nref = c - 0x20;                                    \
2225     if (nref < 0 || nref >= 81)                         \
2226       goto invalid_code;                                \
2227     rule = COMPOSITION_ENCODE_RULE (gref, nref);        \
2228   } while (0)
2229
2230
2231 /* Start of Emacs 21 style format.  The first three bytes at SRC are
2232    (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is the
2233    byte length of this composition information, CHARS is the number of
2234    characters composed by this composition.  */
2235
2236 #define DECODE_EMACS_MULE_21_COMPOSITION()                              \
2237   do {                                                                  \
2238     enum composition_method method = c - 0xF2;                          \
2239     int nbytes, nchars;                                                 \
2240                                                                         \
2241     ONE_MORE_BYTE (c);                                                  \
2242     if (c < 0)                                                          \
2243       goto invalid_code;                                                \
2244     nbytes = c - 0xA0;                                                  \
2245     if (nbytes < 3 || (method == COMPOSITION_RELATIVE && nbytes != 4))  \
2246       goto invalid_code;                                                \
2247     ONE_MORE_BYTE (c);                                                  \
2248     nchars = c - 0xA0;                                                  \
2249     if (nchars <= 0 || nchars >= MAX_COMPOSITION_COMPONENTS)            \
2250       goto invalid_code;                                                \
2251     cmp_status->old_form = 0;                                           \
2252     cmp_status->method = method;                                        \
2253     if (method == COMPOSITION_RELATIVE)                                 \
2254       cmp_status->state = COMPOSING_CHAR;                               \
2255     else                                                                \
2256       cmp_status->state = COMPOSING_COMPONENT_CHAR;                     \
2257     cmp_status->length = MAX_ANNOTATION_LENGTH;                         \
2258     cmp_status->nchars = nchars;                                        \
2259     cmp_status->ncomps = nbytes - 4;                                    \
2260     ADD_COMPOSITION_DATA (charbuf, nchars, nbytes, method);             \
2261   } while (0)
2262
2263
2264 /* Start of Emacs 20 style format for relative composition.  */
2265
2266 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION()             \
2267   do {                                                          \
2268     cmp_status->old_form = 1;                                   \
2269     cmp_status->method = COMPOSITION_RELATIVE;                  \
2270     cmp_status->state = COMPOSING_CHAR;                         \
2271     cmp_status->length = MAX_ANNOTATION_LENGTH;                 \
2272     cmp_status->nchars = cmp_status->ncomps = 0;                \
2273     ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);   \
2274   } while (0)
2275
2276
2277 /* Start of Emacs 20 style format for rule-base composition.  */
2278
2279 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION()             \
2280   do {                                                          \
2281     cmp_status->old_form = 1;                                   \
2282     cmp_status->method = COMPOSITION_WITH_RULE;                 \
2283     cmp_status->state = COMPOSING_CHAR;                         \
2284     cmp_status->length = MAX_ANNOTATION_LENGTH;                 \
2285     cmp_status->nchars = cmp_status->ncomps = 0;                \
2286     ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);   \
2287   } while (0)
2288
2289
2290 #define DECODE_EMACS_MULE_COMPOSITION_START()           \
2291   do {                                                  \
2292     const unsigned char *current_src = src;             \
2293                                                         \
2294     ONE_MORE_BYTE (c);                                  \
2295     if (c < 0)                                          \
2296       goto invalid_code;                                \
2297     if (c - 0xF2 >= COMPOSITION_RELATIVE                \
2298         && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)  \
2299       DECODE_EMACS_MULE_21_COMPOSITION ();              \
2300     else if (c < 0xA0)                                  \
2301       goto invalid_code;                                \
2302     else if (c < 0xC0)                                  \
2303       {                                                 \
2304         DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION ();   \
2305         /* Re-read C as a composition component.  */    \
2306         src = current_src;                              \
2307       }                                                 \
2308     else if (c == 0xFF)                                 \
2309       DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION ();     \
2310     else                                                \
2311       goto invalid_code;                                \
2312   } while (0)
2313
2314 #define EMACS_MULE_COMPOSITION_END()                            \
2315   do {                                                          \
2316     int idx = - cmp_status->length;                             \
2317                                                                 \
2318     if (cmp_status->old_form)                                   \
2319       charbuf[idx + 2] = cmp_status->nchars;                    \
2320     else if (cmp_status->method > COMPOSITION_RELATIVE)         \
2321       charbuf[idx] = charbuf[idx + 2] - cmp_status->length;     \
2322     cmp_status->state = COMPOSING_NO;                           \
2323   } while (0)
2324
2325
2326 static int
2327 emacs_mule_finish_composition (int *charbuf,
2328                                struct composition_status *cmp_status)
2329 {
2330   int idx = - cmp_status->length;
2331   int new_chars;
2332
2333   if (cmp_status->old_form && cmp_status->nchars > 0)
2334     {
2335       charbuf[idx + 2] = cmp_status->nchars;
2336       new_chars = 0;
2337       if (cmp_status->method == COMPOSITION_WITH_RULE
2338           && cmp_status->state == COMPOSING_CHAR)
2339         {
2340           /* The last rule was invalid.  */
2341           int rule = charbuf[-1] + 0xA0;
2342
2343           charbuf[-2] = BYTE8_TO_CHAR (rule);
2344           charbuf[-1] = -1;
2345           new_chars = 1;
2346         }
2347     }
2348   else
2349     {
2350       charbuf[idx++] = BYTE8_TO_CHAR (0x80);
2351
2352       if (cmp_status->method == COMPOSITION_WITH_RULE)
2353         {
2354           charbuf[idx++] = BYTE8_TO_CHAR (0xFF);
2355           charbuf[idx++] = -3;
2356           charbuf[idx++] = 0;
2357           new_chars = 1;
2358         }
2359       else
2360         {
2361           int nchars = charbuf[idx + 1] + 0xA0;
2362           int nbytes = charbuf[idx + 2] + 0xA0;
2363
2364           charbuf[idx++] = BYTE8_TO_CHAR (0xF2 + cmp_status->method);
2365           charbuf[idx++] = BYTE8_TO_CHAR (nbytes);
2366           charbuf[idx++] = BYTE8_TO_CHAR (nchars);
2367           charbuf[idx++] = -1;
2368           new_chars = 4;
2369         }
2370     }
2371   cmp_status->state = COMPOSING_NO;
2372   return new_chars;
2373 }
2374
2375 #define EMACS_MULE_MAYBE_FINISH_COMPOSITION()                             \
2376   do {                                                                    \
2377     if (cmp_status->state != COMPOSING_NO)                                \
2378       char_offset += emacs_mule_finish_composition (charbuf, cmp_status); \
2379   } while (0)
2380
2381
2382 static void
2383 decode_coding_emacs_mule (struct coding_system *coding)
2384 {
2385   const unsigned char *src = coding->source + coding->consumed;
2386   const unsigned char *src_end = coding->source + coding->src_bytes;
2387   const unsigned char *src_base;
2388   int *charbuf = coding->charbuf + coding->charbuf_used;
2389   /* We may produce two annotations (charset and composition) in one
2390      loop and one more charset annotation at the end.  */
2391   int *charbuf_end
2392     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3)
2393       /* We can produce up to 2 characters in a loop.  */
2394       - 1;
2395   EMACS_INT consumed_chars = 0, consumed_chars_base;
2396   int multibytep = coding->src_multibyte;
2397   EMACS_INT char_offset = coding->produced_char;
2398   EMACS_INT last_offset = char_offset;
2399   int last_id = charset_ascii;
2400   int eol_dos =
2401     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
2402   int byte_after_cr = -1;
2403   struct composition_status *cmp_status = &coding->spec.emacs_mule.cmp_status;
2404
2405   if (cmp_status->state != COMPOSING_NO)
2406     {
2407       int i;
2408
2409       if (charbuf_end - charbuf < cmp_status->length)
2410         abort ();
2411       for (i = 0; i < cmp_status->length; i++)
2412         *charbuf++ = cmp_status->carryover[i];
2413       coding->annotated = 1;
2414     }
2415
2416   while (1)
2417     {
2418       int c, id IF_LINT (= 0);
2419
2420       src_base = src;
2421       consumed_chars_base = consumed_chars;
2422
2423       if (charbuf >= charbuf_end)
2424         {
2425           if (byte_after_cr >= 0)
2426             src_base--;
2427           break;
2428         }
2429
2430       if (byte_after_cr >= 0)
2431         c = byte_after_cr, byte_after_cr = -1;
2432       else
2433         ONE_MORE_BYTE (c);
2434
2435       if (c < 0 || c == 0x80)
2436         {
2437           EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2438           if (c < 0)
2439             {
2440               *charbuf++ = -c;
2441               char_offset++;
2442             }
2443           else
2444             DECODE_EMACS_MULE_COMPOSITION_START ();
2445           continue;
2446         }
2447
2448       if (c < 0x80)
2449         {
2450           if (eol_dos && c == '\r')
2451             ONE_MORE_BYTE (byte_after_cr);
2452           id = charset_ascii;
2453           if (cmp_status->state != COMPOSING_NO)
2454             {
2455               if (cmp_status->old_form)
2456                 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2457               else if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2458                 cmp_status->ncomps--;
2459             }
2460         }
2461       else
2462         {
2463           int nchars IF_LINT (= 0), nbytes IF_LINT (= 0);
2464           /* emacs_mule_char can load a charset map from a file, which
2465              allocates a large structure and might cause buffer text
2466              to be relocated as result.  Thus, we need to remember the
2467              original pointer to buffer text, and fix up all related
2468              pointers after the call.  */
2469           const unsigned char *orig = coding->source;
2470           EMACS_INT offset;
2471
2472           c = emacs_mule_char (coding, src_base, &nbytes, &nchars, &id,
2473                                cmp_status);
2474           offset = coding->source - orig;
2475           if (offset)
2476             {
2477               src += offset;
2478               src_base += offset;
2479               src_end += offset;
2480             }
2481           if (c < 0)
2482             {
2483               if (c == -1)
2484                 goto invalid_code;
2485               if (c == -2)
2486                 break;
2487             }
2488           src = src_base + nbytes;
2489           consumed_chars = consumed_chars_base + nchars;
2490           if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2491             cmp_status->ncomps -= nchars;
2492         }
2493
2494       /* Now if C >= 0, we found a normally encoded character, if C <
2495          0, we found an old-style composition component character or
2496          rule.  */
2497
2498       if (cmp_status->state == COMPOSING_NO)
2499         {
2500           if (last_id != id)
2501             {
2502               if (last_id != charset_ascii)
2503                 ADD_CHARSET_DATA (charbuf, char_offset - last_offset,
2504                                   last_id);
2505               last_id = id;
2506               last_offset = char_offset;
2507             }
2508           *charbuf++ = c;
2509           char_offset++;
2510         }
2511       else if (cmp_status->state == COMPOSING_CHAR)
2512         {
2513           if (cmp_status->old_form)
2514             {
2515               if (c >= 0)
2516                 {
2517                   EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2518                   *charbuf++ = c;
2519                   char_offset++;
2520                 }
2521               else
2522                 {
2523                   *charbuf++ = -c;
2524                   cmp_status->nchars++;
2525                   cmp_status->length++;
2526                   if (cmp_status->nchars == MAX_COMPOSITION_COMPONENTS)
2527                     EMACS_MULE_COMPOSITION_END ();
2528                   else if (cmp_status->method == COMPOSITION_WITH_RULE)
2529                     cmp_status->state = COMPOSING_RULE;
2530                 }
2531             }
2532           else
2533             {
2534               *charbuf++ = c;
2535               cmp_status->length++;
2536               cmp_status->nchars--;
2537               if (cmp_status->nchars == 0)
2538                 EMACS_MULE_COMPOSITION_END ();
2539             }
2540         }
2541       else if (cmp_status->state == COMPOSING_RULE)
2542         {
2543           int rule;
2544
2545           if (c >= 0)
2546             {
2547               EMACS_MULE_COMPOSITION_END ();
2548               *charbuf++ = c;
2549               char_offset++;
2550             }
2551           else
2552             {
2553               c = -c;
2554               DECODE_EMACS_MULE_COMPOSITION_RULE_20 (c, rule);
2555               if (rule < 0)
2556                 goto invalid_code;
2557               *charbuf++ = -2;
2558               *charbuf++ = rule;
2559               cmp_status->length += 2;
2560               cmp_status->state = COMPOSING_CHAR;
2561             }
2562         }
2563       else if (cmp_status->state == COMPOSING_COMPONENT_CHAR)
2564         {
2565           *charbuf++ = c;
2566           cmp_status->length++;
2567           if (cmp_status->ncomps == 0)
2568             cmp_status->state = COMPOSING_CHAR;
2569           else if (cmp_status->ncomps > 0)
2570             {
2571               if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)
2572                 cmp_status->state = COMPOSING_COMPONENT_RULE;
2573             }
2574           else
2575             EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2576         }
2577       else                      /* COMPOSING_COMPONENT_RULE */
2578         {
2579           int rule;
2580
2581           DECODE_EMACS_MULE_COMPOSITION_RULE_21 (c, rule);
2582           if (rule < 0)
2583             goto invalid_code;
2584           *charbuf++ = -2;
2585           *charbuf++ = rule;
2586           cmp_status->length += 2;
2587           cmp_status->ncomps--;
2588           if (cmp_status->ncomps > 0)
2589             cmp_status->state = COMPOSING_COMPONENT_CHAR;
2590           else
2591             EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2592         }
2593       continue;
2594
2595     invalid_code:
2596       EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2597       src = src_base;
2598       consumed_chars = consumed_chars_base;
2599       ONE_MORE_BYTE (c);
2600       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2601       char_offset++;
2602       coding->errors++;
2603     }
2604
2605  no_more_source:
2606   if (cmp_status->state != COMPOSING_NO)
2607     {
2608       if (coding->mode & CODING_MODE_LAST_BLOCK)
2609         EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2610       else
2611         {
2612           int i;
2613
2614           charbuf -= cmp_status->length;
2615           for (i = 0; i < cmp_status->length; i++)
2616             cmp_status->carryover[i] = charbuf[i];
2617         }
2618     }
2619   if (last_id != charset_ascii)
2620     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2621   coding->consumed_char += consumed_chars_base;
2622   coding->consumed = src_base - coding->source;
2623   coding->charbuf_used = charbuf - coding->charbuf;
2624 }
2625
2626
2627 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2628   do {                                          \
2629     if (id < 0xA0)                              \
2630       codes[0] = id, codes[1] = 0;              \
2631     else if (id < 0xE0)                         \
2632       codes[0] = 0x9A, codes[1] = id;           \
2633     else if (id < 0xF0)                         \
2634       codes[0] = 0x9B, codes[1] = id;           \
2635     else if (id < 0xF5)                         \
2636       codes[0] = 0x9C, codes[1] = id;           \
2637     else                                        \
2638       codes[0] = 0x9D, codes[1] = id;           \
2639   } while (0);
2640
2641
2642 static int
2643 encode_coding_emacs_mule (struct coding_system *coding)
2644 {
2645   int multibytep = coding->dst_multibyte;
2646   int *charbuf = coding->charbuf;
2647   int *charbuf_end = charbuf + coding->charbuf_used;
2648   unsigned char *dst = coding->destination + coding->produced;
2649   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2650   int safe_room = 8;
2651   EMACS_INT produced_chars = 0;
2652   Lisp_Object attrs, charset_list;
2653   int c;
2654   int preferred_charset_id = -1;
2655
2656   CODING_GET_INFO (coding, attrs, charset_list);
2657   if (! EQ (charset_list, Vemacs_mule_charset_list))
2658     {
2659       CODING_ATTR_CHARSET_LIST (attrs)
2660         = charset_list = Vemacs_mule_charset_list;
2661     }
2662
2663   while (charbuf < charbuf_end)
2664     {
2665       ASSURE_DESTINATION (safe_room);
2666       c = *charbuf++;
2667
2668       if (c < 0)
2669         {
2670           /* Handle an annotation.  */
2671           switch (*charbuf)
2672             {
2673             case CODING_ANNOTATE_COMPOSITION_MASK:
2674               /* Not yet implemented.  */
2675               break;
2676             case CODING_ANNOTATE_CHARSET_MASK:
2677               preferred_charset_id = charbuf[3];
2678               if (preferred_charset_id >= 0
2679                   && NILP (Fmemq (make_number (preferred_charset_id),
2680                                   charset_list)))
2681                 preferred_charset_id = -1;
2682               break;
2683             default:
2684               abort ();
2685             }
2686           charbuf += -c - 1;
2687           continue;
2688         }
2689
2690       if (ASCII_CHAR_P (c))
2691         EMIT_ONE_ASCII_BYTE (c);
2692       else if (CHAR_BYTE8_P (c))
2693         {
2694           c = CHAR_TO_BYTE8 (c);
2695           EMIT_ONE_BYTE (c);
2696         }
2697       else
2698         {
2699           struct charset *charset;
2700           unsigned code;
2701           int dimension;
2702           int emacs_mule_id;
2703           unsigned char leading_codes[2];
2704
2705           if (preferred_charset_id >= 0)
2706             {
2707               int result;
2708
2709               charset = CHARSET_FROM_ID (preferred_charset_id);
2710               CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result);
2711               if (result)
2712                 code = ENCODE_CHAR (charset, c);
2713               else
2714                 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
2715                                      &code, charset);
2716             }
2717           else
2718             CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
2719                                  &code, charset);
2720           if (! charset)
2721             {
2722               c = coding->default_char;
2723               if (ASCII_CHAR_P (c))
2724                 {
2725                   EMIT_ONE_ASCII_BYTE (c);
2726                   continue;
2727                 }
2728               CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
2729                                    &code, charset);
2730             }
2731           dimension = CHARSET_DIMENSION (charset);
2732           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2733           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2734           EMIT_ONE_BYTE (leading_codes[0]);
2735           if (leading_codes[1])
2736             EMIT_ONE_BYTE (leading_codes[1]);
2737           if (dimension == 1)
2738             EMIT_ONE_BYTE (code | 0x80);
2739           else
2740             {
2741               code |= 0x8080;
2742               EMIT_ONE_BYTE (code >> 8);
2743               EMIT_ONE_BYTE (code & 0xFF);
2744             }
2745         }
2746     }
2747   record_conversion_result (coding, CODING_RESULT_SUCCESS);
2748   coding->produced_char += produced_chars;
2749   coding->produced = dst - coding->destination;
2750   return 0;
2751 }
2752
2753 \f
2754 /*** 7. ISO2022 handlers ***/
2755
2756 /* The following note describes the coding system ISO2022 briefly.
2757    Since the intention of this note is to help understand the
2758    functions in this file, some parts are NOT ACCURATE or are OVERLY
2759    SIMPLIFIED.  For thorough understanding, please refer to the
2760    original document of ISO2022.  This is equivalent to the standard
2761    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2762
2763    ISO2022 provides many mechanisms to encode several character sets
2764    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2765    is encoded using bytes less than 128.  This may make the encoded
2766    text a little bit longer, but the text passes more easily through
2767    several types of gateway, some of which strip off the MSB (Most
2768    Significant Bit).
2769
2770    There are two kinds of character sets: control character sets and
2771    graphic character sets.  The former contain control characters such
2772    as `newline' and `escape' to provide control functions (control
2773    functions are also provided by escape sequences).  The latter
2774    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2775    two control character sets and many graphic character sets.
2776
2777    Graphic character sets are classified into one of the following
2778    four classes, according to the number of bytes (DIMENSION) and
2779    number of characters in one dimension (CHARS) of the set:
2780    - DIMENSION1_CHARS94
2781    - DIMENSION1_CHARS96
2782    - DIMENSION2_CHARS94
2783    - DIMENSION2_CHARS96
2784
2785    In addition, each character set is assigned an identification tag,
2786    unique for each set, called the "final character" (denoted as <F>
2787    hereafter).  The <F> of each character set is decided by ECMA(*)
2788    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2789    (0x30..0x3F are for private use only).
2790
2791    Note (*): ECMA = European Computer Manufacturers Association
2792
2793    Here are examples of graphic character sets [NAME(<F>)]:
2794         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2795         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2796         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2797         o DIMENSION2_CHARS96 -- none for the moment
2798
2799    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2800         C0 [0x00..0x1F] -- control character plane 0
2801         GL [0x20..0x7F] -- graphic character plane 0
2802         C1 [0x80..0x9F] -- control character plane 1
2803         GR [0xA0..0xFF] -- graphic character plane 1
2804
2805    A control character set is directly designated and invoked to C0 or
2806    C1 by an escape sequence.  The most common case is that:
2807    - ISO646's  control character set is designated/invoked to C0, and
2808    - ISO6429's control character set is designated/invoked to C1,
2809    and usually these designations/invocations are omitted in encoded
2810    text.  In a 7-bit environment, only C0 can be used, and a control
2811    character for C1 is encoded by an appropriate escape sequence to
2812    fit into the environment.  All control characters for C1 are
2813    defined to have corresponding escape sequences.
2814
2815    A graphic character set is at first designated to one of four
2816    graphic registers (G0 through G3), then these graphic registers are
2817    invoked to GL or GR.  These designations and invocations can be
2818    done independently.  The most common case is that G0 is invoked to
2819    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2820    these invocations and designations are omitted in encoded text.
2821    In a 7-bit environment, only GL can be used.
2822
2823    When a graphic character set of CHARS94 is invoked to GL, codes
2824    0x20 and 0x7F of the GL area work as control characters SPACE and
2825    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2826    be used.
2827
2828    There are two ways of invocation: locking-shift and single-shift.
2829    With locking-shift, the invocation lasts until the next different
2830    invocation, whereas with single-shift, the invocation affects the
2831    following character only and doesn't affect the locking-shift
2832    state.  Invocations are done by the following control characters or
2833    escape sequences:
2834
2835    ----------------------------------------------------------------------
2836    abbrev  function                  cntrl escape seq   description
2837    ----------------------------------------------------------------------
2838    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2839    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2840    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2841    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2842    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2843    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2844    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2845    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2846    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2847    ----------------------------------------------------------------------
2848    (*) These are not used by any known coding system.
2849
2850    Control characters for these functions are defined by macros
2851    ISO_CODE_XXX in `coding.h'.
2852
2853    Designations are done by the following escape sequences:
2854    ----------------------------------------------------------------------
2855    escape sequence      description
2856    ----------------------------------------------------------------------
2857    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2858    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2859    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2860    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2861    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2862    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2863    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2864    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2865    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2866    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2867    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2868    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2869    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2870    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2871    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2872    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2873    ----------------------------------------------------------------------
2874
2875    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2876    of dimension 1, chars 94, and final character <F>, etc...
2877
2878    Note (*): Although these designations are not allowed in ISO2022,
2879    Emacs accepts them on decoding, and produces them on encoding
2880    CHARS96 character sets in a coding system which is characterized as
2881    7-bit environment, non-locking-shift, and non-single-shift.
2882
2883    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2884    '(' must be omitted.  We refer to this as "short-form" hereafter.
2885
2886    Now you may notice that there are a lot of ways of encoding the
2887    same multilingual text in ISO2022.  Actually, there exist many
2888    coding systems such as Compound Text (used in X11's inter client
2889    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2890    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2891    localized platforms), and all of these are variants of ISO2022.
2892
2893    In addition to the above, Emacs handles two more kinds of escape
2894    sequences: ISO6429's direction specification and Emacs' private
2895    sequence for specifying character composition.
2896
2897    ISO6429's direction specification takes the following form:
2898         o CSI ']'      -- end of the current direction
2899         o CSI '0' ']'  -- end of the current direction
2900         o CSI '1' ']'  -- start of left-to-right text
2901         o CSI '2' ']'  -- start of right-to-left text
2902    The control character CSI (0x9B: control sequence introducer) is
2903    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2904
2905    Character composition specification takes the following form:
2906         o ESC '0' -- start relative composition
2907         o ESC '1' -- end composition
2908         o ESC '2' -- start rule-base composition (*)
2909         o ESC '3' -- start relative composition with alternate chars  (**)
2910         o ESC '4' -- start rule-base composition with alternate chars  (**)
2911   Since these are not standard escape sequences of any ISO standard,
2912   the use of them with these meanings is restricted to Emacs only.
2913
2914   (*) This form is used only in Emacs 20.7 and older versions,
2915   but newer versions can safely decode it.
2916   (**) This form is used only in Emacs 21.1 and newer versions,
2917   and older versions can't decode it.
2918
2919   Here's a list of example usages of these composition escape
2920   sequences (categorized by `enum composition_method').
2921
2922   COMPOSITION_RELATIVE:
2923         ESC 0 CHAR [ CHAR ] ESC 1
2924   COMPOSITION_WITH_RULE:
2925         ESC 2 CHAR [ RULE CHAR ] ESC 1
2926   COMPOSITION_WITH_ALTCHARS:
2927         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2928   COMPOSITION_WITH_RULE_ALTCHARS:
2929         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2930
2931 static enum iso_code_class_type iso_code_class[256];
2932
2933 #define SAFE_CHARSET_P(coding, id)      \
2934   ((id) <= (coding)->max_charset_id     \
2935    && (coding)->safe_charsets[id] != 255)
2936
2937 static void
2938 setup_iso_safe_charsets (Lisp_Object attrs)
2939 {
2940   Lisp_Object charset_list, safe_charsets;
2941   Lisp_Object request;
2942   Lisp_Object reg_usage;
2943   Lisp_Object tail;
2944   int reg94, reg96;
2945   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2946   int max_charset_id;
2947
2948   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2949   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2950       && ! EQ (charset_list, Viso_2022_charset_list))
2951     {
2952       CODING_ATTR_CHARSET_LIST (attrs)
2953         = charset_list = Viso_2022_charset_list;
2954       ASET (attrs, coding_attr_safe_charsets, Qnil);
2955     }
2956
2957   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2958     return;
2959
2960   max_charset_id = 0;
2961   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2962     {
2963       int id = XINT (XCAR (tail));
2964       if (max_charset_id < id)
2965         max_charset_id = id;
2966     }
2967
2968   safe_charsets = make_uninit_string (max_charset_id + 1);
2969   memset (SDATA (safe_charsets), 255, max_charset_id + 1);
2970   request = AREF (attrs, coding_attr_iso_request);
2971   reg_usage = AREF (attrs, coding_attr_iso_usage);
2972   reg94 = XINT (XCAR (reg_usage));
2973   reg96 = XINT (XCDR (reg_usage));
2974
2975   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2976     {
2977       Lisp_Object id;
2978       Lisp_Object reg;
2979       struct charset *charset;
2980
2981       id = XCAR (tail);
2982       charset = CHARSET_FROM_ID (XINT (id));
2983       reg = Fcdr (Fassq (id, request));
2984       if (! NILP (reg))
2985         SSET (safe_charsets, XINT (id), XINT (reg));
2986       else if (charset->iso_chars_96)
2987         {
2988           if (reg96 < 4)
2989             SSET (safe_charsets, XINT (id), reg96);
2990         }
2991       else
2992         {
2993           if (reg94 < 4)
2994             SSET (safe_charsets, XINT (id), reg94);
2995         }
2996     }
2997   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2998 }
2999
3000
3001 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3002    Check if a text is encoded in one of ISO-2022 based coding systems.
3003    If it is, return 1, else return 0.  */
3004
3005 static int
3006 detect_coding_iso_2022 (struct coding_system *coding,
3007                         struct coding_detection_info *detect_info)
3008 {
3009   const unsigned char *src = coding->source, *src_base = src;
3010   const unsigned char *src_end = coding->source + coding->src_bytes;
3011   int multibytep = coding->src_multibyte;
3012   int single_shifting = 0;
3013   int id;
3014   int c, c1;
3015   EMACS_INT consumed_chars = 0;
3016   int i;
3017   int rejected = 0;
3018   int found = 0;
3019   int composition_count = -1;
3020
3021   detect_info->checked |= CATEGORY_MASK_ISO;
3022
3023   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
3024     {
3025       struct coding_system *this = &(coding_categories[i]);
3026       Lisp_Object attrs, val;
3027
3028       if (this->id < 0)
3029         continue;
3030       attrs = CODING_ID_ATTRS (this->id);
3031       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
3032           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Viso_2022_charset_list))
3033         setup_iso_safe_charsets (attrs);
3034       val = CODING_ATTR_SAFE_CHARSETS (attrs);
3035       this->max_charset_id = SCHARS (val) - 1;
3036       this->safe_charsets = SDATA (val);
3037     }
3038
3039   /* A coding system of this category is always ASCII compatible.  */
3040   src += coding->head_ascii;
3041
3042   while (rejected != CATEGORY_MASK_ISO)
3043     {
3044       src_base = src;
3045       ONE_MORE_BYTE (c);
3046       switch (c)
3047         {
3048         case ISO_CODE_ESC:
3049           if (inhibit_iso_escape_detection)
3050             break;
3051           single_shifting = 0;
3052           ONE_MORE_BYTE (c);
3053           if (c == 'N' || c == 'O')
3054             {
3055               /* ESC <Fe> for SS2 or SS3.  */
3056               single_shifting = 1;
3057               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
3058             }
3059           else if (c == '1')
3060             {
3061               /* End of composition.  */
3062               if (composition_count < 0
3063                   || composition_count > MAX_COMPOSITION_COMPONENTS)
3064                 /* Invalid */
3065                 break;
3066               composition_count = -1;
3067               found |= CATEGORY_MASK_ISO;
3068             }
3069           else if (c >= '0' && c <= '4')
3070             {
3071               /* ESC <Fp> for start/end composition.  */
3072               composition_count = 0;
3073             }
3074           else
3075             {
3076               if (c >= '(' && c <= '/')
3077                 {
3078                   /* Designation sequence for a charset of dimension 1.  */
3079                   ONE_MORE_BYTE (c1);
3080                   if (c1 < ' ' || c1 >= 0x80
3081                       || (id = iso_charset_table[0][c >= ','][c1]) < 0)
3082                     /* Invalid designation sequence.  Just ignore.  */
3083                     break;
3084                 }
3085               else if (c == '$')
3086                 {
3087                   /* Designation sequence for a charset of dimension 2.  */
3088                   ONE_MORE_BYTE (c);
3089                   if (c >= '@' && c <= 'B')
3090                     /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
3091                     id = iso_charset_table[1][0][c];
3092                   else if (c >= '(' && c <= '/')
3093                     {
3094                       ONE_MORE_BYTE (c1);
3095                       if (c1 < ' ' || c1 >= 0x80
3096                           || (id = iso_charset_table[1][c >= ','][c1]) < 0)
3097                         /* Invalid designation sequence.  Just ignore.  */
3098                         break;
3099                     }
3100                   else
3101                     /* Invalid designation sequence.  Just ignore it.  */
3102                     break;
3103                 }
3104               else
3105                 {
3106                   /* Invalid escape sequence.  Just ignore it.  */
3107                   break;
3108                 }
3109
3110               /* We found a valid designation sequence for CHARSET.  */
3111               rejected |= CATEGORY_MASK_ISO_8BIT;
3112               if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
3113                                   id))
3114                 found |= CATEGORY_MASK_ISO_7;
3115               else
3116                 rejected |= CATEGORY_MASK_ISO_7;
3117               if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
3118                                   id))
3119                 found |= CATEGORY_MASK_ISO_7_TIGHT;
3120               else
3121                 rejected |= CATEGORY_MASK_ISO_7_TIGHT;
3122               if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
3123                                   id))
3124                 found |= CATEGORY_MASK_ISO_7_ELSE;
3125               else
3126                 rejected |= CATEGORY_MASK_ISO_7_ELSE;
3127               if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
3128                                   id))
3129                 found |= CATEGORY_MASK_ISO_8_ELSE;
3130               else
3131                 rejected |= CATEGORY_MASK_ISO_8_ELSE;
3132             }
3133           break;
3134
3135         case ISO_CODE_SO:
3136         case ISO_CODE_SI:
3137           /* Locking shift out/in.  */
3138           if (inhibit_iso_escape_detection)
3139             break;
3140           single_shifting = 0;
3141           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
3142           break;
3143
3144         case ISO_CODE_CSI:
3145           /* Control sequence introducer.  */
3146           single_shifting = 0;
3147           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3148           found |= CATEGORY_MASK_ISO_8_ELSE;
3149           goto check_extra_latin;
3150
3151         case ISO_CODE_SS2:
3152         case ISO_CODE_SS3:
3153           /* Single shift.   */
3154           if (inhibit_iso_escape_detection)
3155             break;
3156           single_shifting = 0;
3157           rejected |= CATEGORY_MASK_ISO_7BIT;
3158           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3159               & CODING_ISO_FLAG_SINGLE_SHIFT)
3160             {
3161               found |= CATEGORY_MASK_ISO_8_1;
3162               single_shifting = 1;
3163             }
3164           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
3165               & CODING_ISO_FLAG_SINGLE_SHIFT)
3166             {
3167               found |= CATEGORY_MASK_ISO_8_2;
3168               single_shifting = 1;
3169             }
3170           if (single_shifting)
3171             break;
3172         check_extra_latin:
3173           if (! VECTORP (Vlatin_extra_code_table)
3174               || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3175             {
3176               rejected = CATEGORY_MASK_ISO;
3177               break;
3178             }
3179           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3180               & CODING_ISO_FLAG_LATIN_EXTRA)
3181             found |= CATEGORY_MASK_ISO_8_1;
3182           else
3183             rejected |= CATEGORY_MASK_ISO_8_1;
3184           rejected |= CATEGORY_MASK_ISO_8_2;
3185           break;
3186
3187         default:
3188           if (c < 0)
3189             continue;
3190           if (c < 0x80)
3191             {
3192               if (composition_count >= 0)
3193                 composition_count++;
3194               single_shifting = 0;
3195               break;
3196             }
3197           if (c >= 0xA0)
3198             {
3199               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3200               found |= CATEGORY_MASK_ISO_8_1;
3201               /* Check the length of succeeding codes of the range
3202                  0xA0..0FF.  If the byte length is even, we include
3203                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
3204                  only when we are not single shifting.  */
3205               if (! single_shifting
3206                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
3207                 {
3208                   int len = 1;
3209                   while (src < src_end)
3210                     {
3211                       src_base = src;
3212                       ONE_MORE_BYTE (c);
3213                       if (c < 0xA0)
3214                         {
3215                           src = src_base;
3216                           break;
3217                         }
3218                       len++;
3219                     }
3220
3221                   if (len & 1 && src < src_end)
3222                     {
3223                       rejected |= CATEGORY_MASK_ISO_8_2;
3224                       if (composition_count >= 0)
3225                         composition_count += len;
3226                     }
3227                   else
3228                     {
3229                       found |= CATEGORY_MASK_ISO_8_2;
3230                       if (composition_count >= 0)
3231                         composition_count += len / 2;
3232                     }
3233                 }
3234               break;
3235             }
3236         }
3237     }
3238   detect_info->rejected |= CATEGORY_MASK_ISO;
3239   return 0;
3240
3241  no_more_source:
3242   detect_info->rejected |= rejected;
3243   detect_info->found |= (found & ~rejected);
3244   return 1;
3245 }
3246
3247
3248 /* Set designation state into CODING.  Set CHARS_96 to -1 if the
3249    escape sequence should be kept.  */
3250 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
3251   do {                                                                  \
3252     int id, prev;                                                       \
3253                                                                         \
3254     if (final < '0' || final >= 128                                     \
3255         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
3256         || !SAFE_CHARSET_P (coding, id))                                \
3257       {                                                                 \
3258         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
3259         chars_96 = -1;                                                  \
3260         break;                                                          \
3261       }                                                                 \
3262     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
3263     if (id == charset_jisx0201_roman)                                   \
3264       {                                                                 \
3265         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
3266           id = charset_ascii;                                           \
3267       }                                                                 \
3268     else if (id == charset_jisx0208_1978)                               \
3269       {                                                                 \
3270         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
3271           id = charset_jisx0208;                                        \
3272       }                                                                 \
3273     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
3274     /* If there was an invalid designation to REG previously, and this  \
3275        designation is ASCII to REG, we should keep this designation     \
3276        sequence.  */                                                    \
3277     if (prev == -2 && id == charset_ascii)                              \
3278       chars_96 = -1;                                                    \
3279   } while (0)
3280
3281
3282 /* Handle these composition sequence (ALT: alternate char):
3283
3284    (1) relative composition: ESC 0 CHAR ... ESC 1
3285    (2) rulebase composition: ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3286    (3) altchar composition:  ESC 3 ALT ... ALT ESC 0 CHAR ... ESC 1
3287    (4) alt&rule composition: ESC 4 ALT RULE ... ALT ESC 0 CHAR ... ESC 1
3288
3289    When the start sequence (ESC 0/2/3/4) is found, this annotation
3290    header is produced.
3291
3292         [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) 0 METHOD ]
3293
3294    Then, upon reading CHAR or RULE (one or two bytes), these codes are
3295    produced until the end sequence (ESC 1) is found:
3296
3297    (1) CHAR ... CHAR
3298    (2) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
3299    (3) ALT ... ALT -1 -1 CHAR ... CHAR
3300    (4) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT -1 -1 CHAR ... CHAR
3301
3302    When the end sequence (ESC 1) is found, LENGTH and NCHARS in the
3303    annotation header is updated as below:
3304
3305    (1) LENGTH: unchanged,  NCHARS: number of CHARs
3306    (2) LENGTH: unchanged,  NCHARS: number of CHARs
3307    (3) LENGTH: += number of ALTs + 2,  NCHARS: number of CHARs
3308    (4) LENGTH: += number of ALTs * 3,  NCHARS: number of CHARs
3309
3310    If an error is found while composing, the annotation header is
3311    changed to:
3312
3313         [ ESC '0'/'2'/'3'/'4' -2 0 ]
3314
3315    and the sequence [ -2 DECODED-RULE ] is changed to the original
3316    byte sequence as below:
3317         o the original byte sequence is B: [ B -1 ]
3318         o the original byte sequence is B1 B2: [ B1 B2 ]
3319    and the sequence [ -1 -1 ] is changed to the original byte
3320    sequence:
3321         [ ESC '0' ]
3322 */
3323
3324 /* Decode a composition rule C1 and maybe one more byte from the
3325    source, and set RULE to the encoded composition rule.  If the rule
3326    is invalid, goto invalid_code.  */
3327
3328 #define DECODE_COMPOSITION_RULE(rule)                                   \
3329   do {                                                                  \
3330     rule = c1 - 32;                                                     \
3331     if (rule < 0)                                                       \
3332       goto invalid_code;                                                \
3333     if (rule < 81)              /* old format (before ver.21) */        \
3334       {                                                                 \
3335         int gref = (rule) / 9;                                          \
3336         int nref = (rule) % 9;                                          \
3337         if (gref == 4) gref = 10;                                       \
3338         if (nref == 4) nref = 10;                                       \
3339         rule = COMPOSITION_ENCODE_RULE (gref, nref);                    \
3340       }                                                                 \
3341     else                        /* new format (after ver.21) */         \
3342       {                                                                 \
3343         int b;                                                          \
3344                                                                         \
3345         ONE_MORE_BYTE (b);                                              \
3346         if (! COMPOSITION_ENCODE_RULE_VALID (rule - 81, b - 32))        \
3347           goto invalid_code;                                            \
3348         rule = COMPOSITION_ENCODE_RULE (rule - 81, b - 32);             \
3349         rule += 0x100;   /* Distinguish it from the old format.  */     \
3350       }                                                                 \
3351   } while (0)
3352
3353 #define ENCODE_COMPOSITION_RULE(rule)                           \
3354   do {                                                          \
3355     int gref = (rule % 0x100) / 12, nref = (rule % 0x100) % 12; \
3356                                                                 \
3357     if (rule < 0x100)           /* old format */                \
3358       {                                                         \
3359         if (gref == 10) gref = 4;                               \
3360         if (nref == 10) nref = 4;                               \
3361         charbuf[idx] = 32 + gref * 9 + nref;                    \
3362         charbuf[idx + 1] = -1;                                  \
3363         new_chars++;                                            \
3364       }                                                         \
3365     else                                /* new format */        \
3366       {                                                         \
3367         charbuf[idx] = 32 + 81 + gref;                          \
3368         charbuf[idx + 1] = 32 + nref;                           \
3369         new_chars += 2;                                         \
3370       }                                                         \
3371   } while (0)
3372
3373 /* Finish the current composition as invalid.  */
3374
3375 static int finish_composition (int *, struct composition_status *);
3376
3377 static int
3378 finish_composition (int *charbuf, struct composition_status *cmp_status)
3379 {
3380   int idx = - cmp_status->length;
3381   int new_chars;
3382
3383   /* Recover the original ESC sequence */
3384   charbuf[idx++] = ISO_CODE_ESC;
3385   charbuf[idx++] = (cmp_status->method == COMPOSITION_RELATIVE ? '0'
3386                     : cmp_status->method == COMPOSITION_WITH_RULE ? '2'
3387                     : cmp_status->method == COMPOSITION_WITH_ALTCHARS ? '3'
3388                     /* cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS */
3389                     : '4');
3390   charbuf[idx++] = -2;
3391   charbuf[idx++] = 0;
3392   charbuf[idx++] = -1;
3393   new_chars = cmp_status->nchars;
3394   if (cmp_status->method >= COMPOSITION_WITH_RULE)
3395     for (; idx < 0; idx++)
3396       {
3397         int elt = charbuf[idx];
3398
3399         if (elt == -2)
3400           {
3401             ENCODE_COMPOSITION_RULE (charbuf[idx + 1]);
3402             idx++;
3403           }
3404         else if (elt == -1)
3405           {
3406             charbuf[idx++] = ISO_CODE_ESC;
3407             charbuf[idx] = '0';
3408             new_chars += 2;
3409           }
3410       }
3411   cmp_status->state = COMPOSING_NO;
3412   return new_chars;
3413 }
3414
3415 /* If characters are under composition, finish the composition.  */
3416 #define MAYBE_FINISH_COMPOSITION()                              \
3417   do {                                                          \
3418     if (cmp_status->state != COMPOSING_NO)                      \
3419       char_offset += finish_composition (charbuf, cmp_status);  \
3420   } while (0)
3421
3422 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
3423
3424    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
3425    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3426    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
3427    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
3428
3429    Produce this annotation sequence now:
3430
3431    [ -LENGTH(==-4) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) METHOD ]
3432 */
3433
3434 #define DECODE_COMPOSITION_START(c1)                                       \
3435   do {                                                                     \
3436     if (c1 == '0'                                                          \
3437         && ((cmp_status->state == COMPOSING_COMPONENT_CHAR                 \
3438              && cmp_status->method == COMPOSITION_WITH_ALTCHARS)           \
3439             || (cmp_status->state == COMPOSING_COMPONENT_RULE              \
3440                 && cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS))) \
3441       {                                                                    \
3442         *charbuf++ = -1;                                                   \
3443         *charbuf++= -1;                                                    \
3444         cmp_status->state = COMPOSING_CHAR;                                \
3445         cmp_status->length += 2;                                           \
3446       }                                                                    \
3447     else                                                                   \
3448       {                                                                    \
3449         MAYBE_FINISH_COMPOSITION ();                                       \
3450         cmp_status->method = (c1 == '0' ? COMPOSITION_RELATIVE             \
3451                               : c1 == '2' ? COMPOSITION_WITH_RULE          \
3452                               : c1 == '3' ? COMPOSITION_WITH_ALTCHARS      \
3453                               : COMPOSITION_WITH_RULE_ALTCHARS);           \
3454         cmp_status->state                                                  \
3455           = (c1 <= '2' ? COMPOSING_CHAR : COMPOSING_COMPONENT_CHAR);       \
3456         ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);          \
3457         cmp_status->length = MAX_ANNOTATION_LENGTH;                        \
3458         cmp_status->nchars = cmp_status->ncomps = 0;                       \
3459         coding->annotated = 1;                                             \
3460       }                                                                    \
3461   } while (0)
3462
3463
3464 /* Handle composition end sequence ESC 1.  */
3465
3466 #define DECODE_COMPOSITION_END()                                        \
3467   do {                                                                  \
3468     if (cmp_status->nchars == 0                                         \
3469         || ((cmp_status->state == COMPOSING_CHAR)                       \
3470             == (cmp_status->method == COMPOSITION_WITH_RULE)))          \
3471       {                                                                 \
3472         MAYBE_FINISH_COMPOSITION ();                                    \
3473         goto invalid_code;                                              \
3474       }                                                                 \
3475     if (cmp_status->method == COMPOSITION_WITH_ALTCHARS)                \
3476       charbuf[- cmp_status->length] -= cmp_status->ncomps + 2;          \
3477     else if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)      \
3478       charbuf[- cmp_status->length] -= cmp_status->ncomps * 3;          \
3479     charbuf[- cmp_status->length + 2] = cmp_status->nchars;             \
3480     char_offset += cmp_status->nchars;                                  \
3481     cmp_status->state = COMPOSING_NO;                                   \
3482   } while (0)
3483
3484 /* Store a composition rule RULE in charbuf, and update cmp_status.  */
3485
3486 #define STORE_COMPOSITION_RULE(rule)    \
3487   do {                                  \
3488     *charbuf++ = -2;                    \
3489     *charbuf++ = rule;                  \
3490     cmp_status->length += 2;            \
3491     cmp_status->state--;                \
3492   } while (0)
3493
3494 /* Store a composed char or a component char C in charbuf, and update
3495    cmp_status.  */
3496
3497 #define STORE_COMPOSITION_CHAR(c)                                       \
3498   do {                                                                  \
3499     *charbuf++ = (c);                                                   \
3500     cmp_status->length++;                                               \
3501     if (cmp_status->state == COMPOSING_CHAR)                            \
3502       cmp_status->nchars++;                                             \
3503     else                                                                \
3504       cmp_status->ncomps++;                                             \
3505     if (cmp_status->method == COMPOSITION_WITH_RULE                     \
3506         || (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS        \
3507             && cmp_status->state == COMPOSING_COMPONENT_CHAR))          \
3508       cmp_status->state++;                                              \
3509   } while (0)
3510
3511
3512 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
3513
3514 static void
3515 decode_coding_iso_2022 (struct coding_system *coding)
3516 {
3517   const unsigned char *src = coding->source + coding->consumed;
3518   const unsigned char *src_end = coding->source + coding->src_bytes;
3519   const unsigned char *src_base;
3520   int *charbuf = coding->charbuf + coding->charbuf_used;
3521   /* We may produce two annotations (charset and composition) in one
3522      loop and one more charset annotation at the end.  */
3523   int *charbuf_end
3524     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
3525   EMACS_INT consumed_chars = 0, consumed_chars_base;
3526   int multibytep = coding->src_multibyte;
3527   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
3528   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3529   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3530   int charset_id_2, charset_id_3;
3531   struct charset *charset;
3532   int c;
3533   struct composition_status *cmp_status = CODING_ISO_CMP_STATUS (coding);
3534   Lisp_Object attrs = CODING_ID_ATTRS (coding->id);
3535   EMACS_INT char_offset = coding->produced_char;
3536   EMACS_INT last_offset = char_offset;
3537   int last_id = charset_ascii;
3538   int eol_dos =
3539     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
3540   int byte_after_cr = -1;
3541   int i;
3542
3543   setup_iso_safe_charsets (attrs);
3544   coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
3545
3546   if (cmp_status->state != COMPOSING_NO)
3547     {
3548       if (charbuf_end - charbuf < cmp_status->length)
3549         abort ();
3550       for (i = 0; i < cmp_status->length; i++)
3551         *charbuf++ = cmp_status->carryover[i];
3552       coding->annotated = 1;
3553     }
3554
3555   while (1)
3556     {
3557       int c1, c2, c3;
3558
3559       src_base = src;
3560       consumed_chars_base = consumed_chars;
3561
3562       if (charbuf >= charbuf_end)
3563         {
3564           if (byte_after_cr >= 0)
3565             src_base--;
3566           break;
3567         }
3568
3569       if (byte_after_cr >= 0)
3570         c1 = byte_after_cr, byte_after_cr = -1;
3571       else
3572         ONE_MORE_BYTE (c1);
3573       if (c1 < 0)
3574         goto invalid_code;
3575
3576       if (CODING_ISO_EXTSEGMENT_LEN (coding) > 0)
3577         {
3578           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3579           char_offset++;
3580           CODING_ISO_EXTSEGMENT_LEN (coding)--;
3581           continue;
3582         }
3583
3584       if (CODING_ISO_EMBEDDED_UTF_8 (coding))
3585         {
3586           if (c1 == ISO_CODE_ESC)
3587             {
3588               if (src + 1 >= src_end)
3589                 goto no_more_source;
3590               *charbuf++ = ISO_CODE_ESC;
3591               char_offset++;
3592               if (src[0] == '%' && src[1] == '@')
3593                 {
3594                   src += 2;
3595                   consumed_chars += 2;
3596                   char_offset += 2;
3597                   /* We are sure charbuf can contain two more chars. */
3598                   *charbuf++ = '%';
3599                   *charbuf++ = '@';
3600                   CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
3601                 }
3602             }
3603           else
3604             {
3605               *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3606               char_offset++;
3607             }
3608           continue;
3609         }
3610
3611       if ((cmp_status->state == COMPOSING_RULE
3612            || cmp_status->state == COMPOSING_COMPONENT_RULE)
3613           && c1 != ISO_CODE_ESC)
3614         {
3615           int rule;
3616
3617           DECODE_COMPOSITION_RULE (rule);
3618           STORE_COMPOSITION_RULE (rule);
3619           continue;
3620         }
3621
3622       /* We produce at most one character.  */
3623       switch (iso_code_class [c1])
3624         {
3625         case ISO_0x20_or_0x7F:
3626           if (charset_id_0 < 0
3627               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
3628             /* This is SPACE or DEL.  */
3629             charset = CHARSET_FROM_ID (charset_ascii);
3630           else
3631             charset = CHARSET_FROM_ID (charset_id_0);
3632           break;
3633
3634         case ISO_graphic_plane_0:
3635           if (charset_id_0 < 0)
3636             charset = CHARSET_FROM_ID (charset_ascii);
3637           else
3638             charset = CHARSET_FROM_ID (charset_id_0);
3639           break;
3640
3641         case ISO_0xA0_or_0xFF:
3642           if (charset_id_1 < 0
3643               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
3644               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3645             goto invalid_code;
3646           /* This is a graphic character, we fall down ... */
3647
3648         case ISO_graphic_plane_1:
3649           if (charset_id_1 < 0)
3650             goto invalid_code;
3651           charset = CHARSET_FROM_ID (charset_id_1);
3652           break;
3653
3654         case ISO_control_0:
3655           if (eol_dos && c1 == '\r')
3656             ONE_MORE_BYTE (byte_after_cr);
3657           MAYBE_FINISH_COMPOSITION ();
3658           charset = CHARSET_FROM_ID (charset_ascii);
3659           break;
3660
3661         case ISO_control_1:
3662           goto invalid_code;
3663
3664         case ISO_shift_out:
3665           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3666               || CODING_ISO_DESIGNATION (coding, 1) < 0)
3667             goto invalid_code;
3668           CODING_ISO_INVOCATION (coding, 0) = 1;
3669           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3670           continue;
3671
3672         case ISO_shift_in:
3673           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3674             goto invalid_code;
3675           CODING_ISO_INVOCATION (coding, 0) = 0;
3676           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3677           continue;
3678
3679         case ISO_single_shift_2_7:
3680           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS))
3681             goto invalid_code;
3682         case ISO_single_shift_2:
3683           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3684             goto invalid_code;
3685           /* SS2 is handled as an escape sequence of ESC 'N' */
3686           c1 = 'N';
3687           goto label_escape_sequence;
3688
3689         case ISO_single_shift_3:
3690           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3691             goto invalid_code;
3692           /* SS2 is handled as an escape sequence of ESC 'O' */
3693           c1 = 'O';
3694           goto label_escape_sequence;
3695
3696         case ISO_control_sequence_introducer:
3697           /* CSI is handled as an escape sequence of ESC '[' ...  */
3698           c1 = '[';
3699           goto label_escape_sequence;
3700
3701         case ISO_escape:
3702           ONE_MORE_BYTE (c1);
3703         label_escape_sequence:
3704           /* Escape sequences handled here are invocation,
3705              designation, direction specification, and character
3706              composition specification.  */
3707           switch (c1)
3708             {
3709             case '&':           /* revision of following character set */
3710               ONE_MORE_BYTE (c1);
3711               if (!(c1 >= '@' && c1 <= '~'))
3712                 goto invalid_code;
3713               ONE_MORE_BYTE (c1);
3714               if (c1 != ISO_CODE_ESC)
3715                 goto invalid_code;
3716               ONE_MORE_BYTE (c1);
3717               goto label_escape_sequence;
3718
3719             case '$':           /* designation of 2-byte character set */
3720               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3721                 goto invalid_code;
3722               {
3723                 int reg, chars96;
3724
3725                 ONE_MORE_BYTE (c1);
3726                 if (c1 >= '@' && c1 <= 'B')
3727                   {     /* designation of JISX0208.1978, GB2312.1980,
3728                            or JISX0208.1980 */
3729                     reg = 0, chars96 = 0;
3730                   }
3731                 else if (c1 >= 0x28 && c1 <= 0x2B)
3732                   { /* designation of DIMENSION2_CHARS94 character set */
3733                     reg = c1 - 0x28, chars96 = 0;
3734                     ONE_MORE_BYTE (c1);
3735                   }
3736                 else if (c1 >= 0x2C && c1 <= 0x2F)
3737                   { /* designation of DIMENSION2_CHARS96 character set */
3738                     reg = c1 - 0x2C, chars96 = 1;
3739                     ONE_MORE_BYTE (c1);
3740                   }
3741                 else
3742                   goto invalid_code;
3743                 DECODE_DESIGNATION (reg, 2, chars96, c1);
3744                 /* We must update these variables now.  */
3745                 if (reg == 0)
3746                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3747                 else if (reg == 1)
3748                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3749                 if (chars96 < 0)
3750                   goto invalid_code;
3751               }
3752               continue;
3753
3754             case 'n':           /* invocation of locking-shift-2 */
3755               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3756                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3757                 goto invalid_code;
3758               CODING_ISO_INVOCATION (coding, 0) = 2;
3759               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3760               continue;
3761
3762             case 'o':           /* invocation of locking-shift-3 */
3763               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3764                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3765                 goto invalid_code;
3766               CODING_ISO_INVOCATION (coding, 0) = 3;
3767               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3768               continue;
3769
3770             case 'N':           /* invocation of single-shift-2 */
3771               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3772                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3773                 goto invalid_code;
3774               charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3775               if (charset_id_2 < 0)
3776                 charset = CHARSET_FROM_ID (charset_ascii);
3777               else
3778                 charset = CHARSET_FROM_ID (charset_id_2);
3779               ONE_MORE_BYTE (c1);
3780               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3781                 goto invalid_code;
3782               break;
3783
3784             case 'O':           /* invocation of single-shift-3 */
3785               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3786                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3787                 goto invalid_code;
3788               charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3789               if (charset_id_3 < 0)
3790                 charset = CHARSET_FROM_ID (charset_ascii);
3791               else
3792                 charset = CHARSET_FROM_ID (charset_id_3);
3793               ONE_MORE_BYTE (c1);
3794               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3795                 goto invalid_code;
3796               break;
3797
3798             case '0': case '2': case '3': case '4': /* start composition */
3799               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3800                 goto invalid_code;
3801               if (last_id != charset_ascii)
3802                 {
3803                   ADD_CHARSET_DATA (charbuf, char_offset- last_offset, last_id);
3804                   last_id = charset_ascii;
3805                   last_offset = char_offset;
3806                 }
3807               DECODE_COMPOSITION_START (c1);
3808               continue;
3809
3810             case '1':           /* end composition */
3811               if (cmp_status->state == COMPOSING_NO)
3812                 goto invalid_code;
3813               DECODE_COMPOSITION_END ();
3814               continue;
3815
3816             case '[':           /* specification of direction */
3817               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION))
3818                 goto invalid_code;
3819               /* For the moment, nested direction is not supported.
3820                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3821                  left-to-right, and nonzero means right-to-left.  */
3822               ONE_MORE_BYTE (c1);
3823               switch (c1)
3824                 {
3825                 case ']':       /* end of the current direction */
3826                   coding->mode &= ~CODING_MODE_DIRECTION;
3827
3828                 case '0':       /* end of the current direction */
3829                 case '1':       /* start of left-to-right direction */
3830                   ONE_MORE_BYTE (c1);
3831                   if (c1 == ']')
3832                     coding->mode &= ~CODING_MODE_DIRECTION;
3833                   else
3834                     goto invalid_code;
3835                   break;
3836
3837                 case '2':       /* start of right-to-left direction */
3838                   ONE_MORE_BYTE (c1);
3839                   if (c1 == ']')
3840                     coding->mode |= CODING_MODE_DIRECTION;
3841                   else
3842                     goto invalid_code;
3843                   break;
3844
3845                 default:
3846                   goto invalid_code;
3847                 }
3848               continue;
3849
3850             case '%':
3851               ONE_MORE_BYTE (c1);
3852               if (c1 == '/')
3853                 {
3854                   /* CTEXT extended segment:
3855                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3856                      We keep these bytes as is for the moment.
3857                      They may be decoded by post-read-conversion.  */
3858                   int dim, M, L;
3859                   int size;
3860
3861                   ONE_MORE_BYTE (dim);
3862                   if (dim < '0' || dim > '4')
3863                     goto invalid_code;
3864                   ONE_MORE_BYTE (M);
3865                   if (M < 128)
3866                     goto invalid_code;
3867                   ONE_MORE_BYTE (L);
3868                   if (L < 128)
3869                     goto invalid_code;
3870                   size = ((M - 128) * 128) + (L - 128);
3871                   if (charbuf + 6 > charbuf_end)
3872                     goto break_loop;
3873                   *charbuf++ = ISO_CODE_ESC;
3874                   *charbuf++ = '%';
3875                   *charbuf++ = '/';
3876                   *charbuf++ = dim;
3877                   *charbuf++ = BYTE8_TO_CHAR (M);
3878                   *charbuf++ = BYTE8_TO_CHAR (L);
3879                   CODING_ISO_EXTSEGMENT_LEN (coding) = size;
3880                 }
3881               else if (c1 == 'G')
3882                 {
3883                   /* XFree86 extension for embedding UTF-8 in CTEXT:
3884                      ESC % G --UTF-8-BYTES-- ESC % @
3885                      We keep these bytes as is for the moment.
3886                      They may be decoded by post-read-conversion.  */
3887                   if (charbuf + 3 > charbuf_end)
3888                     goto break_loop;
3889                   *charbuf++ = ISO_CODE_ESC;
3890                   *charbuf++ = '%';
3891                   *charbuf++ = 'G';
3892                   CODING_ISO_EMBEDDED_UTF_8 (coding) = 1;
3893                 }
3894               else
3895                 goto invalid_code;
3896               continue;
3897               break;
3898
3899             default:
3900               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3901                 goto invalid_code;
3902               {
3903                 int reg, chars96;
3904
3905                 if (c1 >= 0x28 && c1 <= 0x2B)
3906                   { /* designation of DIMENSION1_CHARS94 character set */
3907                     reg = c1 - 0x28, chars96 = 0;
3908                     ONE_MORE_BYTE (c1);
3909                   }
3910                 else if (c1 >= 0x2C && c1 <= 0x2F)
3911                   { /* designation of DIMENSION1_CHARS96 character set */
3912                     reg = c1 - 0x2C, chars96 = 1;
3913                     ONE_MORE_BYTE (c1);
3914                   }
3915                 else
3916                   goto invalid_code;
3917                 DECODE_DESIGNATION (reg, 1, chars96, c1);
3918                 /* We must update these variables now.  */
3919                 if (reg == 0)
3920                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3921                 else if (reg == 1)
3922                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3923                 if (chars96 < 0)
3924                   goto invalid_code;
3925               }
3926               continue;
3927             }
3928           break;
3929
3930         default:
3931           abort ();
3932         }
3933
3934       if (cmp_status->state == COMPOSING_NO
3935           && charset->id != charset_ascii
3936           && last_id != charset->id)
3937         {
3938           if (last_id != charset_ascii)
3939             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3940           last_id = charset->id;
3941           last_offset = char_offset;
3942         }
3943
3944       /* Now we know CHARSET and 1st position code C1 of a character.
3945          Produce a decoded character while getting 2nd and 3rd
3946          position codes C2, C3 if necessary.  */
3947       if (CHARSET_DIMENSION (charset) > 1)
3948         {
3949           ONE_MORE_BYTE (c2);
3950           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0)
3951               || ((c1 & 0x80) != (c2 & 0x80)))
3952             /* C2 is not in a valid range.  */
3953             goto invalid_code;
3954           if (CHARSET_DIMENSION (charset) == 2)
3955             c1 = (c1 << 8) | c2;
3956           else
3957             {
3958               ONE_MORE_BYTE (c3);
3959               if (c3 < 0x20 || (c3 >= 0x80 && c3 < 0xA0)
3960                   || ((c1 & 0x80) != (c3 & 0x80)))
3961                 /* C3 is not in a valid range.  */
3962                 goto invalid_code;
3963               c1 = (c1 << 16) | (c2 << 8) | c2;
3964             }
3965         }
3966       c1 &= 0x7F7F7F;
3967       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3968       if (c < 0)
3969         {
3970           MAYBE_FINISH_COMPOSITION ();
3971           for (; src_base < src; src_base++, char_offset++)
3972             {
3973               if (ASCII_BYTE_P (*src_base))
3974                 *charbuf++ = *src_base;
3975               else
3976                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3977             }
3978         }
3979       else if (cmp_status->state == COMPOSING_NO)
3980         {
3981           *charbuf++ = c;
3982           char_offset++;
3983         }
3984       else if ((cmp_status->state == COMPOSING_CHAR
3985                 ? cmp_status->nchars
3986                 : cmp_status->ncomps)
3987                >= MAX_COMPOSITION_COMPONENTS)
3988         {
3989           /* Too long composition.  */
3990           MAYBE_FINISH_COMPOSITION ();
3991           *charbuf++ = c;
3992           char_offset++;
3993         }
3994       else
3995         STORE_COMPOSITION_CHAR (c);
3996       continue;
3997
3998     invalid_code:
3999       MAYBE_FINISH_COMPOSITION ();
4000       src = src_base;
4001       consumed_chars = consumed_chars_base;
4002       ONE_MORE_BYTE (c);
4003       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4004       char_offset++;
4005       coding->errors++;
4006       continue;
4007
4008     break_loop:
4009       break;
4010     }
4011
4012  no_more_source:
4013   if (cmp_status->state != COMPOSING_NO)
4014     {
4015       if (coding->mode & CODING_MODE_LAST_BLOCK)
4016         MAYBE_FINISH_COMPOSITION ();
4017       else
4018         {
4019           charbuf -= cmp_status->length;
4020           for (i = 0; i < cmp_status->length; i++)
4021             cmp_status->carryover[i] = charbuf[i];
4022         }
4023     }
4024   else if (last_id != charset_ascii)
4025     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4026   coding->consumed_char += consumed_chars_base;
4027   coding->consumed = src_base - coding->source;
4028   coding->charbuf_used = charbuf - coding->charbuf;
4029 }
4030
4031
4032 /* ISO2022 encoding stuff.  */
4033
4034 /*
4035    It is not enough to say just "ISO2022" on encoding, we have to
4036    specify more details.  In Emacs, each coding system of ISO2022
4037    variant has the following specifications:
4038         1. Initial designation to G0 thru G3.
4039         2. Allows short-form designation?
4040         3. ASCII should be designated to G0 before control characters?
4041         4. ASCII should be designated to G0 at end of line?
4042         5. 7-bit environment or 8-bit environment?
4043         6. Use locking-shift?
4044         7. Use Single-shift?
4045    And the following two are only for Japanese:
4046         8. Use ASCII in place of JIS0201-1976-Roman?
4047         9. Use JISX0208-1983 in place of JISX0208-1978?
4048    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
4049    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
4050    details.
4051 */
4052
4053 /* Produce codes (escape sequence) for designating CHARSET to graphic
4054    register REG at DST, and increment DST.  If <final-char> of CHARSET is
4055    '@', 'A', or 'B' and the coding system CODING allows, produce
4056    designation sequence of short-form.  */
4057
4058 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
4059   do {                                                                  \
4060     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
4061     const char *intermediate_char_94 = "()*+";                          \
4062     const char *intermediate_char_96 = ",-./";                          \
4063     int revision = -1;                                                  \
4064                                                                         \
4065     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
4066       revision = CHARSET_ISO_REVISION (charset);                        \
4067                                                                         \
4068     if (revision >= 0)                                                  \
4069       {                                                                 \
4070         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
4071         EMIT_ONE_BYTE ('@' + revision);                                 \
4072       }                                                                 \
4073     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
4074     if (CHARSET_DIMENSION (charset) == 1)                               \
4075       {                                                                 \
4076         int b;                                                          \
4077         if (! CHARSET_ISO_CHARS_96 (charset))                           \
4078           b = intermediate_char_94[reg];                                \
4079         else                                                            \
4080           b = intermediate_char_96[reg];                                \
4081         EMIT_ONE_ASCII_BYTE (b);                                        \
4082       }                                                                 \
4083     else                                                                \
4084       {                                                                 \
4085         EMIT_ONE_ASCII_BYTE ('$');                                      \
4086         if (! CHARSET_ISO_CHARS_96 (charset))                           \
4087           {                                                             \
4088             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
4089                 || reg != 0                                             \
4090                 || final_char < '@' || final_char > 'B')                \
4091               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
4092           }                                                             \
4093         else                                                            \
4094           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
4095       }                                                                 \
4096     EMIT_ONE_ASCII_BYTE (final_char);                                   \
4097                                                                         \
4098     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
4099   } while (0)
4100
4101
4102 /* The following two macros produce codes (control character or escape
4103    sequence) for ISO2022 single-shift functions (single-shift-2 and
4104    single-shift-3).  */
4105
4106 #define ENCODE_SINGLE_SHIFT_2                                           \
4107   do {                                                                  \
4108     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
4109       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
4110     else                                                                \
4111       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
4112     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
4113   } while (0)
4114
4115
4116 #define ENCODE_SINGLE_SHIFT_3                                           \
4117   do {                                                                  \
4118     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
4119       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
4120     else                                                                \
4121       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
4122     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
4123   } while (0)
4124
4125
4126 /* The following four macros produce codes (control character or
4127    escape sequence) for ISO2022 locking-shift functions (shift-in,
4128    shift-out, locking-shift-2, and locking-shift-3).  */
4129
4130 #define ENCODE_SHIFT_IN                                 \
4131   do {                                                  \
4132     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
4133     CODING_ISO_INVOCATION (coding, 0) = 0;              \
4134   } while (0)
4135
4136
4137 #define ENCODE_SHIFT_OUT                                \
4138   do {                                                  \
4139     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
4140     CODING_ISO_INVOCATION (coding, 0) = 1;              \
4141   } while (0)
4142
4143
4144 #define ENCODE_LOCKING_SHIFT_2                          \
4145   do {                                                  \
4146     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
4147     CODING_ISO_INVOCATION (coding, 0) = 2;              \
4148   } while (0)
4149
4150
4151 #define ENCODE_LOCKING_SHIFT_3                          \
4152   do {                                                  \
4153     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
4154     CODING_ISO_INVOCATION (coding, 0) = 3;              \
4155   } while (0)
4156
4157
4158 /* Produce codes for a DIMENSION1 character whose character set is
4159    CHARSET and whose position-code is C1.  Designation and invocation
4160    sequences are also produced in advance if necessary.  */
4161
4162 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
4163   do {                                                                  \
4164     int id = CHARSET_ID (charset);                                      \
4165                                                                         \
4166     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
4167         && id == charset_ascii)                                         \
4168       {                                                                 \
4169         id = charset_jisx0201_roman;                                    \
4170         charset = CHARSET_FROM_ID (id);                                 \
4171       }                                                                 \
4172                                                                         \
4173     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
4174       {                                                                 \
4175         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
4176           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
4177         else                                                            \
4178           EMIT_ONE_BYTE (c1 | 0x80);                                    \
4179         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
4180         break;                                                          \
4181       }                                                                 \
4182     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
4183       {                                                                 \
4184         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
4185         break;                                                          \
4186       }                                                                 \
4187     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
4188       {                                                                 \
4189         EMIT_ONE_BYTE (c1 | 0x80);                                      \
4190         break;                                                          \
4191       }                                                                 \
4192     else                                                                \
4193       /* Since CHARSET is not yet invoked to any graphic planes, we     \
4194          must invoke it, or, at first, designate it to some graphic     \
4195          register.  Then repeat the loop to actually produce the        \
4196          character.  */                                                 \
4197       dst = encode_invocation_designation (charset, coding, dst,        \
4198                                            &produced_chars);            \
4199   } while (1)
4200
4201
4202 /* Produce codes for a DIMENSION2 character whose character set is
4203    CHARSET and whose position-codes are C1 and C2.  Designation and
4204    invocation codes are also produced in advance if necessary.  */
4205
4206 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
4207   do {                                                                  \
4208     int id = CHARSET_ID (charset);                                      \
4209                                                                         \
4210     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
4211         && id == charset_jisx0208)                                      \
4212       {                                                                 \
4213         id = charset_jisx0208_1978;                                     \
4214         charset = CHARSET_FROM_ID (id);                                 \
4215       }                                                                 \
4216                                                                         \
4217     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
4218       {                                                                 \
4219         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
4220           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
4221         else                                                            \
4222           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
4223         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
4224         break;                                                          \
4225       }                                                                 \
4226     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
4227       {                                                                 \
4228         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
4229         break;                                                          \
4230       }                                                                 \
4231     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
4232       {                                                                 \
4233         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
4234         break;                                                          \
4235       }                                                                 \
4236     else                                                                \
4237       /* Since CHARSET is not yet invoked to any graphic planes, we     \
4238          must invoke it, or, at first, designate it to some graphic     \
4239          register.  Then repeat the loop to actually produce the        \
4240          character.  */                                                 \
4241       dst = encode_invocation_designation (charset, coding, dst,        \
4242                                            &produced_chars);            \
4243   } while (1)
4244
4245
4246 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
4247   do {                                                                     \
4248     int code;                                                              \
4249     CODING_ENCODE_CHAR (coding, dst, dst_end, (charset), (c), code);       \
4250                                                                            \
4251     if (CHARSET_DIMENSION (charset) == 1)                                  \
4252       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
4253     else                                                                   \
4254       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
4255   } while (0)
4256
4257
4258 /* Produce designation and invocation codes at a place pointed by DST
4259    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
4260    Return new DST.  */
4261
4262 static unsigned char *
4263 encode_invocation_designation (struct charset *charset,
4264                                struct coding_system *coding,
4265                                unsigned char *dst, EMACS_INT *p_nchars)
4266 {
4267   int multibytep = coding->dst_multibyte;
4268   EMACS_INT produced_chars = *p_nchars;
4269   int reg;                      /* graphic register number */
4270   int id = CHARSET_ID (charset);
4271
4272   /* At first, check designations.  */
4273   for (reg = 0; reg < 4; reg++)
4274     if (id == CODING_ISO_DESIGNATION (coding, reg))
4275       break;
4276
4277   if (reg >= 4)
4278     {
4279       /* CHARSET is not yet designated to any graphic registers.  */
4280       /* At first check the requested designation.  */
4281       reg = CODING_ISO_REQUEST (coding, id);
4282       if (reg < 0)
4283         /* Since CHARSET requests no special designation, designate it
4284            to graphic register 0.  */
4285         reg = 0;
4286
4287       ENCODE_DESIGNATION (charset, reg, coding);
4288     }
4289
4290   if (CODING_ISO_INVOCATION (coding, 0) != reg
4291       && CODING_ISO_INVOCATION (coding, 1) != reg)
4292     {
4293       /* Since the graphic register REG is not invoked to any graphic
4294          planes, invoke it to graphic plane 0.  */
4295       switch (reg)
4296         {
4297         case 0:                 /* graphic register 0 */
4298           ENCODE_SHIFT_IN;
4299           break;
4300
4301         case 1:                 /* graphic register 1 */
4302           ENCODE_SHIFT_OUT;
4303           break;
4304
4305         case 2:                 /* graphic register 2 */
4306           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4307             ENCODE_SINGLE_SHIFT_2;
4308           else
4309             ENCODE_LOCKING_SHIFT_2;
4310           break;
4311
4312         case 3:                 /* graphic register 3 */
4313           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4314             ENCODE_SINGLE_SHIFT_3;
4315           else
4316             ENCODE_LOCKING_SHIFT_3;
4317           break;
4318         }
4319     }
4320
4321   *p_nchars = produced_chars;
4322   return dst;
4323 }
4324
4325
4326 /* Produce codes for designation and invocation to reset the graphic
4327    planes and registers to initial state.  */
4328 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
4329   do {                                                                  \
4330     int reg;                                                            \
4331     struct charset *charset;                                            \
4332                                                                         \
4333     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
4334       ENCODE_SHIFT_IN;                                                  \
4335     for (reg = 0; reg < 4; reg++)                                       \
4336       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
4337           && (CODING_ISO_DESIGNATION (coding, reg)                      \
4338               != CODING_ISO_INITIAL (coding, reg)))                     \
4339         {                                                               \
4340           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
4341           ENCODE_DESIGNATION (charset, reg, coding);                    \
4342         }                                                               \
4343   } while (0)
4344
4345
4346 /* Produce designation sequences of charsets in the line started from
4347    CHARBUF to a place pointed by DST, and return the number of
4348    produced bytes.  DST should not directly point a buffer text area
4349    which may be relocated by char_charset call.
4350
4351    If the current block ends before any end-of-line, we may fail to
4352    find all the necessary designations.  */
4353
4354 static int
4355 encode_designation_at_bol (struct coding_system *coding,
4356                            int *charbuf, int *charbuf_end,
4357                            unsigned char *dst)
4358 {
4359   unsigned char *orig;
4360   struct charset *charset;
4361   /* Table of charsets to be designated to each graphic register.  */
4362   int r[4];
4363   int c, found = 0, reg;
4364   EMACS_INT produced_chars = 0;
4365   int multibytep = coding->dst_multibyte;
4366   Lisp_Object attrs;
4367   Lisp_Object charset_list;
4368
4369   attrs = CODING_ID_ATTRS (coding->id);
4370   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4371   if (EQ (charset_list, Qiso_2022))
4372     charset_list = Viso_2022_charset_list;
4373
4374   for (reg = 0; reg < 4; reg++)
4375     r[reg] = -1;
4376
4377   while (charbuf < charbuf_end && found < 4)
4378     {
4379       int id;
4380
4381       c = *charbuf++;
4382       if (c == '\n')
4383         break;
4384       charset = char_charset (c, charset_list, NULL);
4385       id = CHARSET_ID (charset);
4386       reg = CODING_ISO_REQUEST (coding, id);
4387       if (reg >= 0 && r[reg] < 0)
4388         {
4389           found++;
4390           r[reg] = id;
4391         }
4392     }
4393
4394   if (found)
4395     {
4396       for (reg = 0; reg < 4; reg++)
4397         if (r[reg] >= 0
4398             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
4399           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
4400     }
4401
4402   return dst - orig;
4403 }
4404
4405 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
4406
4407 static int
4408 encode_coding_iso_2022 (struct coding_system *coding)
4409 {
4410   int multibytep = coding->dst_multibyte;
4411   int *charbuf = coding->charbuf;
4412   int *charbuf_end = charbuf + coding->charbuf_used;
4413   unsigned char *dst = coding->destination + coding->produced;
4414   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4415   int safe_room = 16;
4416   int bol_designation
4417     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
4418        && CODING_ISO_BOL (coding));
4419   EMACS_INT produced_chars = 0;
4420   Lisp_Object attrs, eol_type, charset_list;
4421   int ascii_compatible;
4422   int c;
4423   int preferred_charset_id = -1;
4424
4425   CODING_GET_INFO (coding, attrs, charset_list);
4426   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
4427   if (VECTORP (eol_type))
4428     eol_type = Qunix;
4429
4430   setup_iso_safe_charsets (attrs);
4431   /* Charset list may have been changed.  */
4432   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4433   coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
4434
4435   ascii_compatible
4436     = (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))
4437        && ! (CODING_ISO_FLAGS (coding) & (CODING_ISO_FLAG_DESIGNATION
4438                                           | CODING_ISO_FLAG_LOCKING_SHIFT)));
4439
4440   while (charbuf < charbuf_end)
4441     {
4442       ASSURE_DESTINATION (safe_room);
4443
4444       if (bol_designation)
4445         {
4446           /* We have to produce designation sequences if any now.  */
4447           unsigned char desig_buf[16];
4448           int nbytes;
4449           EMACS_INT offset;
4450
4451           charset_map_loaded = 0;
4452           nbytes = encode_designation_at_bol (coding, charbuf, charbuf_end,
4453                                               desig_buf);
4454           if (charset_map_loaded
4455               && (offset = coding_set_destination (coding)))
4456             {
4457               dst += offset;
4458               dst_end += offset;
4459             }
4460           memcpy (dst, desig_buf, nbytes);
4461           dst += nbytes;
4462           /* We are sure that designation sequences are all ASCII bytes.  */
4463           produced_chars += nbytes;
4464           bol_designation = 0;
4465           ASSURE_DESTINATION (safe_room);
4466         }
4467
4468       c = *charbuf++;
4469
4470       if (c < 0)
4471         {
4472           /* Handle an annotation.  */
4473           switch (*charbuf)
4474             {
4475             case CODING_ANNOTATE_COMPOSITION_MASK:
4476               /* Not yet implemented.  */
4477               break;
4478             case CODING_ANNOTATE_CHARSET_MASK:
4479               preferred_charset_id = charbuf[2];
4480               if (preferred_charset_id >= 0
4481                   && NILP (Fmemq (make_number (preferred_charset_id),
4482                                   charset_list)))
4483                 preferred_charset_id = -1;
4484               break;
4485             default:
4486               abort ();
4487             }
4488           charbuf += -c - 1;
4489           continue;
4490         }
4491
4492       /* Now encode the character C.  */
4493       if (c < 0x20 || c == 0x7F)
4494         {
4495           if (c == '\n'
4496               || (c == '\r' && EQ (eol_type, Qmac)))
4497             {
4498               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4499                 ENCODE_RESET_PLANE_AND_REGISTER ();
4500               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
4501                 {
4502                   int i;
4503
4504                   for (i = 0; i < 4; i++)
4505                     CODING_ISO_DESIGNATION (coding, i)
4506                       = CODING_ISO_INITIAL (coding, i);
4507                 }
4508               bol_designation
4509                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
4510             }
4511           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
4512             ENCODE_RESET_PLANE_AND_REGISTER ();
4513           EMIT_ONE_ASCII_BYTE (c);
4514         }
4515       else if (ASCII_CHAR_P (c))
4516         {
4517           if (ascii_compatible)
4518             EMIT_ONE_ASCII_BYTE (c);
4519           else
4520             {
4521               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
4522               ENCODE_ISO_CHARACTER (charset, c);
4523             }
4524         }
4525       else if (CHAR_BYTE8_P (c))
4526         {
4527           c = CHAR_TO_BYTE8 (c);
4528           EMIT_ONE_BYTE (c);
4529         }
4530       else
4531         {
4532           struct charset *charset;
4533
4534           if (preferred_charset_id >= 0)
4535             {
4536               int result;
4537
4538               charset = CHARSET_FROM_ID (preferred_charset_id);
4539               CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result);
4540               if (! result)
4541                 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
4542                                      NULL, charset);
4543             }
4544           else
4545             CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
4546                                  NULL, charset);
4547           if (!charset)
4548             {
4549               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4550                 {
4551                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4552                   charset = CHARSET_FROM_ID (charset_ascii);
4553                 }
4554               else
4555                 {
4556                   c = coding->default_char;
4557                   CODING_CHAR_CHARSET (coding, dst, dst_end, c,
4558                                        charset_list, NULL, charset);
4559                 }
4560             }
4561           ENCODE_ISO_CHARACTER (charset, c);
4562         }
4563     }
4564
4565   if (coding->mode & CODING_MODE_LAST_BLOCK
4566       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4567     {
4568       ASSURE_DESTINATION (safe_room);
4569       ENCODE_RESET_PLANE_AND_REGISTER ();
4570     }
4571   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4572   CODING_ISO_BOL (coding) = bol_designation;
4573   coding->produced_char += produced_chars;
4574   coding->produced = dst - coding->destination;
4575   return 0;
4576 }
4577
4578 \f
4579 /*** 8,9. SJIS and BIG5 handlers ***/
4580
4581 /* Although SJIS and BIG5 are not ISO's coding system, they are used
4582    quite widely.  So, for the moment, Emacs supports them in the bare
4583    C code.  But, in the future, they may be supported only by CCL.  */
4584
4585 /* SJIS is a coding system encoding three character sets: ASCII, right
4586    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
4587    as is.  A character of charset katakana-jisx0201 is encoded by
4588    "position-code + 0x80".  A character of charset japanese-jisx0208
4589    is encoded in 2-byte but two position-codes are divided and shifted
4590    so that it fit in the range below.
4591
4592    --- CODE RANGE of SJIS ---
4593    (character set)      (range)
4594    ASCII                0x00 .. 0x7F
4595    KATAKANA-JISX0201    0xA0 .. 0xDF
4596    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
4597             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
4598    -------------------------------
4599
4600 */
4601
4602 /* BIG5 is a coding system encoding two character sets: ASCII and
4603    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
4604    character set and is encoded in two-byte.
4605
4606    --- CODE RANGE of BIG5 ---
4607    (character set)      (range)
4608    ASCII                0x00 .. 0x7F
4609    Big5 (1st byte)      0xA1 .. 0xFE
4610         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
4611    --------------------------
4612
4613   */
4614
4615 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4616    Check if a text is encoded in SJIS.  If it is, return
4617    CATEGORY_MASK_SJIS, else return 0.  */
4618
4619 static int
4620 detect_coding_sjis (struct coding_system *coding,
4621                     struct coding_detection_info *detect_info)
4622 {
4623   const unsigned char *src = coding->source, *src_base;
4624   const unsigned char *src_end = coding->source + coding->src_bytes;
4625   int multibytep = coding->src_multibyte;
4626   EMACS_INT consumed_chars = 0;
4627   int found = 0;
4628   int c;
4629   Lisp_Object attrs, charset_list;
4630   int max_first_byte_of_2_byte_code;
4631
4632   CODING_GET_INFO (coding, attrs, charset_list);
4633   max_first_byte_of_2_byte_code
4634     = (XINT (Flength (charset_list)) > 3 ? 0xFC : 0xEF);
4635
4636   detect_info->checked |= CATEGORY_MASK_SJIS;
4637   /* A coding system of this category is always ASCII compatible.  */
4638   src += coding->head_ascii;
4639
4640   while (1)
4641     {
4642       src_base = src;
4643       ONE_MORE_BYTE (c);
4644       if (c < 0x80)
4645         continue;
4646       if ((c >= 0x81 && c <= 0x9F)
4647           || (c >= 0xE0 && c <= max_first_byte_of_2_byte_code))
4648         {
4649           ONE_MORE_BYTE (c);
4650           if (c < 0x40 || c == 0x7F || c > 0xFC)
4651             break;
4652           found = CATEGORY_MASK_SJIS;
4653         }
4654       else if (c >= 0xA0 && c < 0xE0)
4655         found = CATEGORY_MASK_SJIS;
4656       else
4657         break;
4658     }
4659   detect_info->rejected |= CATEGORY_MASK_SJIS;
4660   return 0;
4661
4662  no_more_source:
4663   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4664     {
4665       detect_info->rejected |= CATEGORY_MASK_SJIS;
4666       return 0;
4667     }
4668   detect_info->found |= found;
4669   return 1;
4670 }
4671
4672 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4673    Check if a text is encoded in BIG5.  If it is, return
4674    CATEGORY_MASK_BIG5, else return 0.  */
4675
4676 static int
4677 detect_coding_big5 (struct coding_system *coding,
4678                     struct coding_detection_info *detect_info)
4679 {
4680   const unsigned char *src = coding->source, *src_base;
4681   const unsigned char *src_end = coding->source + coding->src_bytes;
4682   int multibytep = coding->src_multibyte;
4683   EMACS_INT consumed_chars = 0;
4684   int found = 0;
4685   int c;
4686
4687   detect_info->checked |= CATEGORY_MASK_BIG5;
4688   /* A coding system of this category is always ASCII compatible.  */
4689   src += coding->head_ascii;
4690
4691   while (1)
4692     {
4693       src_base = src;
4694       ONE_MORE_BYTE (c);
4695       if (c < 0x80)
4696         continue;
4697       if (c >= 0xA1)
4698         {
4699           ONE_MORE_BYTE (c);
4700           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
4701             return 0;
4702           found = CATEGORY_MASK_BIG5;
4703         }
4704       else
4705         break;
4706     }
4707   detect_info->rejected |= CATEGORY_MASK_BIG5;
4708   return 0;
4709
4710  no_more_source:
4711   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4712     {
4713       detect_info->rejected |= CATEGORY_MASK_BIG5;
4714       return 0;
4715     }
4716   detect_info->found |= found;
4717   return 1;
4718 }
4719
4720 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4721    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
4722
4723 static void
4724 decode_coding_sjis (struct coding_system *coding)
4725 {
4726   const unsigned char *src = coding->source + coding->consumed;
4727   const unsigned char *src_end = coding->source + coding->src_bytes;
4728   const unsigned char *src_base;
4729   int *charbuf = coding->charbuf + coding->charbuf_used;
4730   /* We may produce one charset annotation in one loop and one more at
4731      the end.  */
4732   int *charbuf_end
4733     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4734   EMACS_INT consumed_chars = 0, consumed_chars_base;
4735   int multibytep = coding->src_multibyte;
4736   struct charset *charset_roman, *charset_kanji, *charset_kana;
4737   struct charset *charset_kanji2;
4738   Lisp_Object attrs, charset_list, val;
4739   EMACS_INT char_offset = coding->produced_char;
4740   EMACS_INT last_offset = char_offset;
4741   int last_id = charset_ascii;
4742   int eol_dos =
4743     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4744   int byte_after_cr = -1;
4745
4746   CODING_GET_INFO (coding, attrs, charset_list);
4747
4748   val = charset_list;
4749   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4750   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4751   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4752   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4753
4754   while (1)
4755     {
4756       int c, c1;
4757       struct charset *charset;
4758
4759       src_base = src;
4760       consumed_chars_base = consumed_chars;
4761
4762       if (charbuf >= charbuf_end)
4763         {
4764           if (byte_after_cr >= 0)
4765             src_base--;
4766           break;
4767         }
4768
4769       if (byte_after_cr >= 0)
4770         c = byte_after_cr, byte_after_cr = -1;
4771       else
4772         ONE_MORE_BYTE (c);
4773       if (c < 0)
4774         goto invalid_code;
4775       if (c < 0x80)
4776         {
4777           if (eol_dos && c == '\r')
4778             ONE_MORE_BYTE (byte_after_cr);
4779           charset = charset_roman;
4780         }
4781       else if (c == 0x80 || c == 0xA0)
4782         goto invalid_code;
4783       else if (c >= 0xA1 && c <= 0xDF)
4784         {
4785           /* SJIS -> JISX0201-Kana */
4786           c &= 0x7F;
4787           charset = charset_kana;
4788         }
4789       else if (c <= 0xEF)
4790         {
4791           /* SJIS -> JISX0208 */
4792           ONE_MORE_BYTE (c1);
4793           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4794             goto invalid_code;
4795           c = (c << 8) | c1;
4796           SJIS_TO_JIS (c);
4797           charset = charset_kanji;
4798         }
4799       else if (c <= 0xFC && charset_kanji2)
4800         {
4801           /* SJIS -> JISX0213-2 */
4802           ONE_MORE_BYTE (c1);
4803           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4804             goto invalid_code;
4805           c = (c << 8) | c1;
4806           SJIS_TO_JIS2 (c);
4807           charset = charset_kanji2;
4808         }
4809       else
4810         goto invalid_code;
4811       if (charset->id != charset_ascii
4812           && last_id != charset->id)
4813         {
4814           if (last_id != charset_ascii)
4815             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4816           last_id = charset->id;
4817           last_offset = char_offset;
4818         }
4819       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4820       *charbuf++ = c;
4821       char_offset++;
4822       continue;
4823
4824     invalid_code:
4825       src = src_base;
4826       consumed_chars = consumed_chars_base;
4827       ONE_MORE_BYTE (c);
4828       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4829       char_offset++;
4830       coding->errors++;
4831     }
4832
4833  no_more_source:
4834   if (last_id != charset_ascii)
4835     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4836   coding->consumed_char += consumed_chars_base;
4837   coding->consumed = src_base - coding->source;
4838   coding->charbuf_used = charbuf - coding->charbuf;
4839 }
4840
4841 static void
4842 decode_coding_big5 (struct coding_system *coding)
4843 {
4844   const unsigned char *src = coding->source + coding->consumed;
4845   const unsigned char *src_end = coding->source + coding->src_bytes;
4846   const unsigned char *src_base;
4847   int *charbuf = coding->charbuf + coding->charbuf_used;
4848   /* We may produce one charset annotation in one loop and one more at
4849      the end.  */
4850   int *charbuf_end
4851     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4852   EMACS_INT consumed_chars = 0, consumed_chars_base;
4853   int multibytep = coding->src_multibyte;
4854   struct charset *charset_roman, *charset_big5;
4855   Lisp_Object attrs, charset_list, val;
4856   EMACS_INT char_offset = coding->produced_char;
4857   EMACS_INT last_offset = char_offset;
4858   int last_id = charset_ascii;
4859   int eol_dos =
4860     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4861   int byte_after_cr = -1;
4862
4863   CODING_GET_INFO (coding, attrs, charset_list);
4864   val = charset_list;
4865   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4866   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4867
4868   while (1)
4869     {
4870       int c, c1;
4871       struct charset *charset;
4872
4873       src_base = src;
4874       consumed_chars_base = consumed_chars;
4875
4876       if (charbuf >= charbuf_end)
4877         {
4878           if (byte_after_cr >= 0)
4879             src_base--;
4880           break;
4881         }
4882
4883       if (byte_after_cr >= 0)
4884         c = byte_after_cr, byte_after_cr = -1;
4885       else
4886         ONE_MORE_BYTE (c);
4887
4888       if (c < 0)
4889         goto invalid_code;
4890       if (c < 0x80)
4891         {
4892           if (eol_dos && c == '\r')
4893             ONE_MORE_BYTE (byte_after_cr);
4894           charset = charset_roman;
4895         }
4896       else
4897         {
4898           /* BIG5 -> Big5 */
4899           if (c < 0xA1 || c > 0xFE)
4900             goto invalid_code;
4901           ONE_MORE_BYTE (c1);
4902           if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4903             goto invalid_code;
4904           c = c << 8 | c1;
4905           charset = charset_big5;
4906         }
4907       if (charset->id != charset_ascii
4908           && last_id != charset->id)
4909         {
4910           if (last_id != charset_ascii)
4911             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4912           last_id = charset->id;
4913           last_offset = char_offset;
4914         }
4915       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4916       *charbuf++ = c;
4917       char_offset++;
4918       continue;
4919
4920     invalid_code:
4921       src = src_base;
4922       consumed_chars = consumed_chars_base;
4923       ONE_MORE_BYTE (c);
4924       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4925       char_offset++;
4926       coding->errors++;
4927     }
4928
4929  no_more_source:
4930   if (last_id != charset_ascii)
4931     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4932   coding->consumed_char += consumed_chars_base;
4933   coding->consumed = src_base - coding->source;
4934   coding->charbuf_used = charbuf - coding->charbuf;
4935 }
4936
4937 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4938    This function can encode charsets `ascii', `katakana-jisx0201',
4939    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
4940    are sure that all these charsets are registered as official charset
4941    (i.e. do not have extended leading-codes).  Characters of other
4942    charsets are produced without any encoding.  If SJIS_P is 1, encode
4943    SJIS text, else encode BIG5 text.  */
4944
4945 static int
4946 encode_coding_sjis (struct coding_system *coding)
4947 {
4948   int multibytep = coding->dst_multibyte;
4949   int *charbuf = coding->charbuf;
4950   int *charbuf_end = charbuf + coding->charbuf_used;
4951   unsigned char *dst = coding->destination + coding->produced;
4952   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4953   int safe_room = 4;
4954   EMACS_INT produced_chars = 0;
4955   Lisp_Object attrs, charset_list, val;
4956   int ascii_compatible;
4957   struct charset *charset_kanji, *charset_kana;
4958   struct charset *charset_kanji2;
4959   int c;
4960
4961   CODING_GET_INFO (coding, attrs, charset_list);
4962   val = XCDR (charset_list);
4963   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4964   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4965   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4966
4967   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4968
4969   while (charbuf < charbuf_end)
4970     {
4971       ASSURE_DESTINATION (safe_room);
4972       c = *charbuf++;
4973       /* Now encode the character C.  */
4974       if (ASCII_CHAR_P (c) && ascii_compatible)
4975         EMIT_ONE_ASCII_BYTE (c);
4976       else if (CHAR_BYTE8_P (c))
4977         {
4978           c = CHAR_TO_BYTE8 (c);
4979           EMIT_ONE_BYTE (c);
4980         }
4981       else
4982         {
4983           unsigned code;
4984           struct charset *charset;
4985           CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
4986                                &code, charset);
4987
4988           if (!charset)
4989             {
4990               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4991                 {
4992                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4993                   charset = CHARSET_FROM_ID (charset_ascii);
4994                 }
4995               else
4996                 {
4997                   c = coding->default_char;
4998                   CODING_CHAR_CHARSET (coding, dst, dst_end, c,
4999                                        charset_list, &code, charset);
5000                 }
5001             }
5002           if (code == CHARSET_INVALID_CODE (charset))
5003             abort ();
5004           if (charset == charset_kanji)
5005             {
5006               int c1, c2;
5007               JIS_TO_SJIS (code);
5008               c1 = code >> 8, c2 = code & 0xFF;
5009               EMIT_TWO_BYTES (c1, c2);
5010             }
5011           else if (charset == charset_kana)
5012             EMIT_ONE_BYTE (code | 0x80);
5013           else if (charset_kanji2 && charset == charset_kanji2)
5014             {
5015               int c1, c2;
5016
5017               c1 = code >> 8;
5018               if (c1 == 0x21 || (c1 >= 0x23 && c1 <= 0x25)
5019                   || c1 == 0x28
5020                   || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
5021                 {
5022                   JIS_TO_SJIS2 (code);
5023                   c1 = code >> 8, c2 = code & 0xFF;
5024                   EMIT_TWO_BYTES (c1, c2);
5025                 }
5026               else
5027                 EMIT_ONE_ASCII_BYTE (code & 0x7F);
5028             }
5029           else
5030             EMIT_ONE_ASCII_BYTE (code & 0x7F);
5031         }
5032     }
5033   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5034   coding->produced_char += produced_chars;
5035   coding->produced = dst - coding->destination;
5036   return 0;
5037 }
5038
5039 static int
5040 encode_coding_big5 (struct coding_system *coding)
5041 {
5042   int multibytep = coding->dst_multibyte;
5043   int *charbuf = coding->charbuf;
5044   int *charbuf_end = charbuf + coding->charbuf_used;
5045   unsigned char *dst = coding->destination + coding->produced;
5046   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5047   int safe_room = 4;
5048   EMACS_INT produced_chars = 0;
5049   Lisp_Object attrs, charset_list, val;
5050   int ascii_compatible;
5051   struct charset *charset_big5;
5052   int c;
5053
5054   CODING_GET_INFO (coding, attrs, charset_list);
5055   val = XCDR (charset_list);
5056   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
5057   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5058
5059   while (charbuf < charbuf_end)
5060     {
5061       ASSURE_DESTINATION (safe_room);
5062       c = *charbuf++;
5063       /* Now encode the character C.  */
5064       if (ASCII_CHAR_P (c) && ascii_compatible)
5065         EMIT_ONE_ASCII_BYTE (c);
5066       else if (CHAR_BYTE8_P (c))
5067         {
5068           c = CHAR_TO_BYTE8 (c);
5069           EMIT_ONE_BYTE (c);
5070         }
5071       else
5072         {
5073           unsigned code;
5074           struct charset *charset;
5075           CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
5076                                &code, charset);
5077
5078           if (! charset)
5079             {
5080               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5081                 {
5082                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5083                   charset = CHARSET_FROM_ID (charset_ascii);
5084                 }
5085               else
5086                 {
5087                   c = coding->default_char;
5088                   CODING_CHAR_CHARSET (coding, dst, dst_end, c,
5089                                        charset_list, &code, charset);
5090                 }
5091             }
5092           if (code == CHARSET_INVALID_CODE (charset))
5093             abort ();
5094           if (charset == charset_big5)
5095             {
5096               int c1, c2;
5097
5098               c1 = code >> 8, c2 = code & 0xFF;
5099               EMIT_TWO_BYTES (c1, c2);
5100             }
5101           else
5102             EMIT_ONE_ASCII_BYTE (code & 0x7F);
5103         }
5104     }
5105   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5106   coding->produced_char += produced_chars;
5107   coding->produced = dst - coding->destination;
5108   return 0;
5109 }
5110
5111 \f
5112 /*** 10. CCL handlers ***/
5113
5114 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5115    Check if a text is encoded in a coding system of which
5116    encoder/decoder are written in CCL program.  If it is, return
5117    CATEGORY_MASK_CCL, else return 0.  */
5118
5119 static int
5120 detect_coding_ccl (struct coding_system *coding,
5121                    struct coding_detection_info *detect_info)
5122 {
5123   const unsigned char *src = coding->source, *src_base;
5124   const unsigned char *src_end = coding->source + coding->src_bytes;
5125   int multibytep = coding->src_multibyte;
5126   EMACS_INT consumed_chars = 0;
5127   int found = 0;
5128   unsigned char *valids;
5129   EMACS_INT head_ascii = coding->head_ascii;
5130   Lisp_Object attrs;
5131
5132   detect_info->checked |= CATEGORY_MASK_CCL;
5133
5134   coding = &coding_categories[coding_category_ccl];
5135   valids = CODING_CCL_VALIDS (coding);
5136   attrs = CODING_ID_ATTRS (coding->id);
5137   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5138     src += head_ascii;
5139
5140   while (1)
5141     {
5142       int c;
5143
5144       src_base = src;
5145       ONE_MORE_BYTE (c);
5146       if (c < 0 || ! valids[c])
5147         break;
5148       if ((valids[c] > 1))
5149         found = CATEGORY_MASK_CCL;
5150     }
5151   detect_info->rejected |= CATEGORY_MASK_CCL;
5152   return 0;
5153
5154  no_more_source:
5155   detect_info->found |= found;
5156   return 1;
5157 }
5158
5159 static void
5160 decode_coding_ccl (struct coding_system *coding)
5161 {
5162   const unsigned char *src = coding->source + coding->consumed;
5163   const unsigned char *src_end = coding->source + coding->src_bytes;
5164   int *charbuf = coding->charbuf + coding->charbuf_used;
5165   int *charbuf_end = coding->charbuf + coding->charbuf_size;
5166   EMACS_INT consumed_chars = 0;
5167   int multibytep = coding->src_multibyte;
5168   struct ccl_program *ccl = &coding->spec.ccl->ccl;
5169   int source_charbuf[1024];
5170   int source_byteidx[1025];
5171   Lisp_Object attrs, charset_list;
5172
5173   CODING_GET_INFO (coding, attrs, charset_list);
5174
5175   while (1)
5176     {
5177       const unsigned char *p = src;
5178       int i = 0;
5179
5180       if (multibytep)
5181         {
5182           while (i < 1024 && p < src_end)
5183             {
5184               source_byteidx[i] = p - src;
5185               source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
5186             }
5187           source_byteidx[i] = p - src;
5188         }
5189       else
5190         while (i < 1024 && p < src_end)
5191           source_charbuf[i++] = *p++;
5192
5193       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
5194         ccl->last_block = 1;
5195       ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf,
5196                   charset_list);
5197       charbuf += ccl->produced;
5198       if (multibytep)
5199         src += source_byteidx[ccl->consumed];
5200       else
5201         src += ccl->consumed;
5202       consumed_chars += ccl->consumed;
5203       if (p == src_end || ccl->status != CCL_STAT_SUSPEND_BY_SRC)
5204         break;
5205     }
5206
5207   switch (ccl->status)
5208     {
5209     case CCL_STAT_SUSPEND_BY_SRC:
5210       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5211       break;
5212     case CCL_STAT_SUSPEND_BY_DST:
5213       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
5214       break;
5215     case CCL_STAT_QUIT:
5216     case CCL_STAT_INVALID_CMD:
5217       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
5218       break;
5219     default:
5220       record_conversion_result (coding, CODING_RESULT_SUCCESS);
5221       break;
5222     }
5223   coding->consumed_char += consumed_chars;
5224   coding->consumed = src - coding->source;
5225   coding->charbuf_used = charbuf - coding->charbuf;
5226 }
5227
5228 static int
5229 encode_coding_ccl (struct coding_system *coding)
5230 {
5231   struct ccl_program *ccl = &coding->spec.ccl->ccl;
5232   int multibytep = coding->dst_multibyte;
5233   int *charbuf = coding->charbuf;
5234   int *charbuf_end = charbuf + coding->charbuf_used;
5235   unsigned char *dst = coding->destination + coding->produced;
5236   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5237   int destination_charbuf[1024];
5238   EMACS_INT produced_chars = 0;
5239   int i;
5240   Lisp_Object attrs, charset_list;
5241
5242   CODING_GET_INFO (coding, attrs, charset_list);
5243   if (coding->consumed_char == coding->src_chars
5244       && coding->mode & CODING_MODE_LAST_BLOCK)
5245     ccl->last_block = 1;
5246
5247   while (charbuf < charbuf_end)
5248     {
5249       ccl_driver (ccl, charbuf, destination_charbuf,
5250                   charbuf_end - charbuf, 1024, charset_list);
5251       if (multibytep)
5252         {
5253           ASSURE_DESTINATION (ccl->produced * 2);
5254           for (i = 0; i < ccl->produced; i++)
5255             EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
5256         }
5257       else
5258         {
5259           ASSURE_DESTINATION (ccl->produced);
5260           for (i = 0; i < ccl->produced; i++)
5261             *dst++ = destination_charbuf[i] & 0xFF;
5262           produced_chars += ccl->produced;
5263         }
5264       charbuf += ccl->consumed;
5265       if (ccl->status == CCL_STAT_QUIT
5266           || ccl->status == CCL_STAT_INVALID_CMD)
5267         break;
5268     }
5269
5270   switch (ccl->status)
5271     {
5272     case CCL_STAT_SUSPEND_BY_SRC:
5273       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5274       break;
5275     case CCL_STAT_SUSPEND_BY_DST:
5276       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
5277       break;
5278     case CCL_STAT_QUIT:
5279     case CCL_STAT_INVALID_CMD:
5280       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
5281       break;
5282     default:
5283       record_conversion_result (coding, CODING_RESULT_SUCCESS);
5284       break;
5285     }
5286
5287   coding->produced_char += produced_chars;
5288   coding->produced = dst - coding->destination;
5289   return 0;
5290 }
5291
5292
5293 \f
5294 /*** 10, 11. no-conversion handlers ***/
5295
5296 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
5297
5298 static void
5299 decode_coding_raw_text (struct coding_system *coding)
5300 {
5301   int eol_dos =
5302     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5303
5304   coding->chars_at_source = 1;
5305   coding->consumed_char = coding->src_chars;
5306   coding->consumed = coding->src_bytes;
5307   if (eol_dos && coding->source[coding->src_bytes - 1] == '\r')
5308     {
5309       coding->consumed_char--;
5310       coding->consumed--;
5311       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5312     }
5313   else
5314     record_conversion_result (coding, CODING_RESULT_SUCCESS);
5315 }
5316
5317 static int
5318 encode_coding_raw_text (struct coding_system *coding)
5319 {
5320   int multibytep = coding->dst_multibyte;
5321   int *charbuf = coding->charbuf;
5322   int *charbuf_end = coding->charbuf + coding->charbuf_used;
5323   unsigned char *dst = coding->destination + coding->produced;
5324   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5325   EMACS_INT produced_chars = 0;
5326   int c;
5327
5328   if (multibytep)
5329     {
5330       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
5331
5332       if (coding->src_multibyte)
5333         while (charbuf < charbuf_end)
5334           {
5335             ASSURE_DESTINATION (safe_room);
5336             c = *charbuf++;
5337             if (ASCII_CHAR_P (c))
5338               EMIT_ONE_ASCII_BYTE (c);
5339             else if (CHAR_BYTE8_P (c))
5340               {
5341                 c = CHAR_TO_BYTE8 (c);
5342                 EMIT_ONE_BYTE (c);
5343               }
5344             else
5345               {
5346                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
5347
5348                 CHAR_STRING_ADVANCE (c, p1);
5349                 do
5350                   {
5351                     EMIT_ONE_BYTE (*p0);
5352                     p0++;
5353                   }
5354                 while (p0 < p1);
5355               }
5356           }
5357       else
5358         while (charbuf < charbuf_end)
5359           {
5360             ASSURE_DESTINATION (safe_room);
5361             c = *charbuf++;
5362             EMIT_ONE_BYTE (c);
5363           }
5364     }
5365   else
5366     {
5367       if (coding->src_multibyte)
5368         {
5369           int safe_room = MAX_MULTIBYTE_LENGTH;
5370
5371           while (charbuf < charbuf_end)
5372             {
5373               ASSURE_DESTINATION (safe_room);
5374               c = *charbuf++;
5375               if (ASCII_CHAR_P (c))
5376                 *dst++ = c;
5377               else if (CHAR_BYTE8_P (c))
5378                 *dst++ = CHAR_TO_BYTE8 (c);
5379               else
5380                 CHAR_STRING_ADVANCE (c, dst);
5381             }
5382         }
5383       else
5384         {
5385           ASSURE_DESTINATION (charbuf_end - charbuf);
5386           while (charbuf < charbuf_end && dst < dst_end)
5387             *dst++ = *charbuf++;
5388         }
5389       produced_chars = dst - (coding->destination + coding->produced);
5390     }
5391   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5392   coding->produced_char += produced_chars;
5393   coding->produced = dst - coding->destination;
5394   return 0;
5395 }
5396
5397 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5398    Check if a text is encoded in a charset-based coding system.  If it
5399    is, return 1, else return 0.  */
5400
5401 static int
5402 detect_coding_charset (struct coding_system *coding,
5403                        struct coding_detection_info *detect_info)
5404 {
5405   const unsigned char *src = coding->source, *src_base;
5406   const unsigned char *src_end = coding->source + coding->src_bytes;
5407   int multibytep = coding->src_multibyte;
5408   EMACS_INT consumed_chars = 0;
5409   Lisp_Object attrs, valids, name;
5410   int found = 0;
5411   EMACS_INT head_ascii = coding->head_ascii;
5412   int check_latin_extra = 0;
5413
5414   detect_info->checked |= CATEGORY_MASK_CHARSET;
5415
5416   coding = &coding_categories[coding_category_charset];
5417   attrs = CODING_ID_ATTRS (coding->id);
5418   valids = AREF (attrs, coding_attr_charset_valids);
5419   name = CODING_ID_NAME (coding->id);
5420   if (strncmp (SSDATA (SYMBOL_NAME (name)),
5421                "iso-8859-", sizeof ("iso-8859-") - 1) == 0
5422       || strncmp (SSDATA (SYMBOL_NAME (name)),
5423                   "iso-latin-", sizeof ("iso-latin-") - 1) == 0)
5424     check_latin_extra = 1;
5425
5426   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5427     src += head_ascii;
5428
5429   while (1)
5430     {
5431       int c;
5432       Lisp_Object val;
5433       struct charset *charset;
5434       int dim, idx;
5435
5436       src_base = src;
5437       ONE_MORE_BYTE (c);
5438       if (c < 0)
5439         continue;
5440       val = AREF (valids, c);
5441       if (NILP (val))
5442         break;
5443       if (c >= 0x80)
5444         {
5445           if (c < 0xA0
5446               && check_latin_extra
5447               && (!VECTORP (Vlatin_extra_code_table)
5448                   || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])))
5449             break;
5450           found = CATEGORY_MASK_CHARSET;
5451         }
5452       if (INTEGERP (val))
5453         {
5454           charset = CHARSET_FROM_ID (XFASTINT (val));
5455           dim = CHARSET_DIMENSION (charset);
5456           for (idx = 1; idx < dim; idx++)
5457             {
5458               if (src == src_end)
5459                 goto too_short;
5460               ONE_MORE_BYTE (c);
5461               if (c < charset->code_space[(dim - 1 - idx) * 4]
5462                   || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
5463                 break;
5464             }
5465           if (idx < dim)
5466             break;
5467         }
5468       else
5469         {
5470           idx = 1;
5471           for (; CONSP (val); val = XCDR (val))
5472             {
5473               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5474               dim = CHARSET_DIMENSION (charset);
5475               while (idx < dim)
5476                 {
5477                   if (src == src_end)
5478                     goto too_short;
5479                   ONE_MORE_BYTE (c);
5480                   if (c < charset->code_space[(dim - 1 - idx) * 4]
5481                       || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
5482                     break;
5483                   idx++;
5484                 }
5485               if (idx == dim)
5486                 {
5487                   val = Qnil;
5488                   break;
5489                 }
5490             }
5491           if (CONSP (val))
5492             break;
5493         }
5494     }
5495  too_short:
5496   detect_info->rejected |= CATEGORY_MASK_CHARSET;
5497   return 0;
5498
5499  no_more_source:
5500   detect_info->found |= found;
5501   return 1;
5502 }
5503
5504 static void
5505 decode_coding_charset (struct coding_system *coding)
5506 {
5507   const unsigned char *src = coding->source + coding->consumed;
5508   const unsigned char *src_end = coding->source + coding->src_bytes;
5509   const unsigned char *src_base;
5510   int *charbuf = coding->charbuf + coding->charbuf_used;
5511   /* We may produce one charset annotation in one loop and one more at
5512      the end.  */
5513   int *charbuf_end
5514     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
5515   EMACS_INT consumed_chars = 0, consumed_chars_base;
5516   int multibytep = coding->src_multibyte;
5517   Lisp_Object attrs = CODING_ID_ATTRS (coding->id);
5518   Lisp_Object valids;
5519   EMACS_INT char_offset = coding->produced_char;
5520   EMACS_INT last_offset = char_offset;
5521   int last_id = charset_ascii;
5522   int eol_dos =
5523     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5524   int byte_after_cr = -1;
5525
5526   valids = AREF (attrs, coding_attr_charset_valids);
5527
5528   while (1)
5529     {
5530       int c;
5531       Lisp_Object val;
5532       struct charset *charset;
5533       int dim;
5534       int len = 1;
5535       unsigned code;
5536
5537       src_base = src;
5538       consumed_chars_base = consumed_chars;
5539
5540       if (charbuf >= charbuf_end)
5541         {
5542           if (byte_after_cr >= 0)
5543             src_base--;
5544           break;
5545         }
5546
5547       if (byte_after_cr >= 0)
5548         {
5549           c = byte_after_cr;
5550           byte_after_cr = -1;
5551         }
5552       else
5553         {
5554           ONE_MORE_BYTE (c);
5555           if (eol_dos && c == '\r')
5556             ONE_MORE_BYTE (byte_after_cr);
5557         }
5558       if (c < 0)
5559         goto invalid_code;
5560       code = c;
5561
5562       val = AREF (valids, c);
5563       if (! INTEGERP (val) && ! CONSP (val))
5564         goto invalid_code;
5565       if (INTEGERP (val))
5566         {
5567           charset = CHARSET_FROM_ID (XFASTINT (val));
5568           dim = CHARSET_DIMENSION (charset);
5569           while (len < dim)
5570             {
5571               ONE_MORE_BYTE (c);
5572               code = (code << 8) | c;
5573               len++;
5574             }
5575           CODING_DECODE_CHAR (coding, src, src_base, src_end,
5576                               charset, code, c);
5577         }
5578       else
5579         {
5580           /* VAL is a list of charset IDs.  It is assured that the
5581              list is sorted by charset dimensions (smaller one
5582              comes first).  */
5583           while (CONSP (val))
5584             {
5585               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5586               dim = CHARSET_DIMENSION (charset);
5587               while (len < dim)
5588                 {
5589                   ONE_MORE_BYTE (c);
5590                   code = (code << 8) | c;
5591                   len++;
5592                 }
5593               CODING_DECODE_CHAR (coding, src, src_base,
5594                                   src_end, charset, code, c);
5595               if (c >= 0)
5596                 break;
5597               val = XCDR (val);
5598             }
5599         }
5600       if (c < 0)
5601         goto invalid_code;
5602       if (charset->id != charset_ascii
5603           && last_id != charset->id)
5604         {
5605           if (last_id != charset_ascii)
5606             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5607           last_id = charset->id;
5608           last_offset = char_offset;
5609         }
5610
5611       *charbuf++ = c;
5612       char_offset++;
5613       continue;
5614
5615     invalid_code:
5616       src = src_base;
5617       consumed_chars = consumed_chars_base;
5618       ONE_MORE_BYTE (c);
5619       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
5620       char_offset++;
5621       coding->errors++;
5622     }
5623
5624  no_more_source:
5625   if (last_id != charset_ascii)
5626     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5627   coding->consumed_char += consumed_chars_base;
5628   coding->consumed = src_base - coding->source;
5629   coding->charbuf_used = charbuf - coding->charbuf;
5630 }
5631
5632 static int
5633 encode_coding_charset (struct coding_system *coding)
5634 {
5635   int multibytep = coding->dst_multibyte;
5636   int *charbuf = coding->charbuf;
5637   int *charbuf_end = charbuf + coding->charbuf_used;
5638   unsigned char *dst = coding->destination + coding->produced;
5639   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5640   int safe_room = MAX_MULTIBYTE_LENGTH;
5641   EMACS_INT produced_chars = 0;
5642   Lisp_Object attrs, charset_list;
5643   int ascii_compatible;
5644   int c;
5645
5646   CODING_GET_INFO (coding, attrs, charset_list);
5647   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5648
5649   while (charbuf < charbuf_end)
5650     {
5651       struct charset *charset;
5652       unsigned code;
5653
5654       ASSURE_DESTINATION (safe_room);
5655       c = *charbuf++;
5656       if (ascii_compatible && ASCII_CHAR_P (c))
5657         EMIT_ONE_ASCII_BYTE (c);
5658       else if (CHAR_BYTE8_P (c))
5659         {
5660           c = CHAR_TO_BYTE8 (c);
5661           EMIT_ONE_BYTE (c);
5662         }
5663       else
5664         {
5665           CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
5666                                &code, charset);
5667
5668           if (charset)
5669             {
5670               if (CHARSET_DIMENSION (charset) == 1)
5671                 EMIT_ONE_BYTE (code);
5672               else if (CHARSET_DIMENSION (charset) == 2)
5673                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
5674               else if (CHARSET_DIMENSION (charset) == 3)
5675                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
5676               else
5677                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
5678                                  (code >> 8) & 0xFF, code & 0xFF);
5679             }
5680           else
5681             {
5682               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5683                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5684               else
5685                 c = coding->default_char;
5686               EMIT_ONE_BYTE (c);
5687             }
5688         }
5689     }
5690
5691   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5692   coding->produced_char += produced_chars;
5693   coding->produced = dst - coding->destination;
5694   return 0;
5695 }
5696
5697 \f
5698 /*** 7. C library functions ***/
5699
5700 /* Setup coding context CODING from information about CODING_SYSTEM.
5701    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
5702    CODING_SYSTEM is invalid, signal an error.  */
5703
5704 void
5705 setup_coding_system (Lisp_Object coding_system, struct coding_system *coding)
5706 {
5707   Lisp_Object attrs;
5708   Lisp_Object eol_type;
5709   Lisp_Object coding_type;
5710   Lisp_Object val;
5711
5712   if (NILP (coding_system))
5713     coding_system = Qundecided;
5714
5715   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
5716
5717   attrs = CODING_ID_ATTRS (coding->id);
5718   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
5719
5720   coding->mode = 0;
5721   coding->head_ascii = -1;
5722   if (VECTORP (eol_type))
5723     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5724                             | CODING_REQUIRE_DETECTION_MASK);
5725   else if (! EQ (eol_type, Qunix))
5726     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5727                             | CODING_REQUIRE_ENCODING_MASK);
5728   else
5729     coding->common_flags = 0;
5730   if (! NILP (CODING_ATTR_POST_READ (attrs)))
5731     coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5732   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5733     coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5734   if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
5735     coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
5736
5737   val = CODING_ATTR_SAFE_CHARSETS (attrs);
5738   coding->max_charset_id = SCHARS (val) - 1;
5739   coding->safe_charsets = SDATA (val);
5740   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
5741   coding->carryover_bytes = 0;
5742
5743   coding_type = CODING_ATTR_TYPE (attrs);
5744   if (EQ (coding_type, Qundecided))
5745     {
5746       coding->detector = NULL;
5747       coding->decoder = decode_coding_raw_text;
5748       coding->encoder = encode_coding_raw_text;
5749       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5750     }
5751   else if (EQ (coding_type, Qiso_2022))
5752     {
5753       int i;
5754       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5755
5756       /* Invoke graphic register 0 to plane 0.  */
5757       CODING_ISO_INVOCATION (coding, 0) = 0;
5758       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
5759       CODING_ISO_INVOCATION (coding, 1)
5760         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
5761       /* Setup the initial status of designation.  */
5762       for (i = 0; i < 4; i++)
5763         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
5764       /* Not single shifting initially.  */
5765       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
5766       /* Beginning of buffer should also be regarded as bol. */
5767       CODING_ISO_BOL (coding) = 1;
5768       coding->detector = detect_coding_iso_2022;
5769       coding->decoder = decode_coding_iso_2022;
5770       coding->encoder = encode_coding_iso_2022;
5771       if (flags & CODING_ISO_FLAG_SAFE)
5772         coding->mode |= CODING_MODE_SAFE_ENCODING;
5773       coding->common_flags
5774         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5775             | CODING_REQUIRE_FLUSHING_MASK);
5776       if (flags & CODING_ISO_FLAG_COMPOSITION)
5777         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
5778       if (flags & CODING_ISO_FLAG_DESIGNATION)
5779         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
5780       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5781         {
5782           setup_iso_safe_charsets (attrs);
5783           val = CODING_ATTR_SAFE_CHARSETS (attrs);
5784           coding->max_charset_id = SCHARS (val) - 1;
5785           coding->safe_charsets = SDATA (val);
5786         }
5787       CODING_ISO_FLAGS (coding) = flags;
5788       CODING_ISO_CMP_STATUS (coding)->state = COMPOSING_NO;
5789       CODING_ISO_CMP_STATUS (coding)->method = COMPOSITION_NO;
5790       CODING_ISO_EXTSEGMENT_LEN (coding) = 0;
5791       CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
5792     }
5793   else if (EQ (coding_type, Qcharset))
5794     {
5795       coding->detector = detect_coding_charset;
5796       coding->decoder = decode_coding_charset;
5797       coding->encoder = encode_coding_charset;
5798       coding->common_flags
5799         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5800     }
5801   else if (EQ (coding_type, Qutf_8))
5802     {
5803       val = AREF (attrs, coding_attr_utf_bom);
5804       CODING_UTF_8_BOM (coding) = (CONSP (val) ? utf_detect_bom
5805                                    : EQ (val, Qt) ? utf_with_bom
5806                                    : utf_without_bom);
5807       coding->detector = detect_coding_utf_8;
5808       coding->decoder = decode_coding_utf_8;
5809       coding->encoder = encode_coding_utf_8;
5810       coding->common_flags
5811         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5812       if (CODING_UTF_8_BOM (coding) == utf_detect_bom)
5813         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5814     }
5815   else if (EQ (coding_type, Qutf_16))
5816     {
5817       val = AREF (attrs, coding_attr_utf_bom);
5818       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_detect_bom
5819                                     : EQ (val, Qt) ? utf_with_bom
5820                                     : utf_without_bom);
5821       val = AREF (attrs, coding_attr_utf_16_endian);
5822       CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
5823                                        : utf_16_little_endian);
5824       CODING_UTF_16_SURROGATE (coding) = 0;
5825       coding->detector = detect_coding_utf_16;
5826       coding->decoder = decode_coding_utf_16;
5827       coding->encoder = encode_coding_utf_16;
5828       coding->common_flags
5829         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5830       if (CODING_UTF_16_BOM (coding) == utf_detect_bom)
5831         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5832     }
5833   else if (EQ (coding_type, Qccl))
5834     {
5835       coding->detector = detect_coding_ccl;
5836       coding->decoder = decode_coding_ccl;
5837       coding->encoder = encode_coding_ccl;
5838       coding->common_flags
5839         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5840             | CODING_REQUIRE_FLUSHING_MASK);
5841     }
5842   else if (EQ (coding_type, Qemacs_mule))
5843     {
5844       coding->detector = detect_coding_emacs_mule;
5845       coding->decoder = decode_coding_emacs_mule;
5846       coding->encoder = encode_coding_emacs_mule;
5847       coding->common_flags
5848         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5849       coding->spec.emacs_mule.full_support = 1;
5850       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5851           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5852         {
5853           Lisp_Object tail, safe_charsets;
5854           int max_charset_id = 0;
5855
5856           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5857                tail = XCDR (tail))
5858             if (max_charset_id < XFASTINT (XCAR (tail)))
5859               max_charset_id = XFASTINT (XCAR (tail));
5860           safe_charsets = make_uninit_string (max_charset_id + 1);
5861           memset (SDATA (safe_charsets), 255, max_charset_id + 1);
5862           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5863                tail = XCDR (tail))
5864             SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
5865           coding->max_charset_id = max_charset_id;
5866           coding->safe_charsets = SDATA (safe_charsets);
5867           coding->spec.emacs_mule.full_support = 1;
5868         }
5869       coding->spec.emacs_mule.cmp_status.state = COMPOSING_NO;
5870       coding->spec.emacs_mule.cmp_status.method = COMPOSITION_NO;
5871     }
5872   else if (EQ (coding_type, Qshift_jis))
5873     {
5874       coding->detector = detect_coding_sjis;
5875       coding->decoder = decode_coding_sjis;
5876       coding->encoder = encode_coding_sjis;
5877       coding->common_flags
5878         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5879     }
5880   else if (EQ (coding_type, Qbig5))
5881     {
5882       coding->detector = detect_coding_big5;
5883       coding->decoder = decode_coding_big5;
5884       coding->encoder = encode_coding_big5;
5885       coding->common_flags
5886         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5887     }
5888   else                          /* EQ (coding_type, Qraw_text) */
5889     {
5890       coding->detector = NULL;
5891       coding->decoder = decode_coding_raw_text;
5892       coding->encoder = encode_coding_raw_text;
5893       if (! EQ (eol_type, Qunix))
5894         {
5895           coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5896           if (! VECTORP (eol_type))
5897             coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5898         }
5899
5900     }
5901
5902   return;
5903 }
5904
5905 /* Return a list of charsets supported by CODING.  */
5906
5907 Lisp_Object
5908 coding_charset_list (struct coding_system *coding)
5909 {
5910   Lisp_Object attrs, charset_list;
5911
5912   CODING_GET_INFO (coding, attrs, charset_list);
5913   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5914     {
5915       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5916
5917       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5918         charset_list = Viso_2022_charset_list;
5919     }
5920   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5921     {
5922       charset_list = Vemacs_mule_charset_list;
5923     }
5924   return charset_list;
5925 }
5926
5927
5928 /* Return a list of charsets supported by CODING-SYSTEM.  */
5929
5930 Lisp_Object
5931 coding_system_charset_list (Lisp_Object coding_system)
5932 {
5933   ptrdiff_t id;
5934   Lisp_Object attrs, charset_list;
5935
5936   CHECK_CODING_SYSTEM_GET_ID (coding_system, id);
5937   attrs = CODING_ID_ATTRS (id);
5938
5939   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5940     {
5941       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5942
5943       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5944         charset_list = Viso_2022_charset_list;
5945       else
5946         charset_list = CODING_ATTR_CHARSET_LIST (attrs);
5947     }
5948   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5949     {
5950       charset_list = Vemacs_mule_charset_list;
5951     }
5952   else
5953     {
5954       charset_list = CODING_ATTR_CHARSET_LIST (attrs);
5955     }
5956   return charset_list;
5957 }
5958
5959
5960 /* Return raw-text or one of its subsidiaries that has the same
5961    eol_type as CODING-SYSTEM.  */
5962
5963 Lisp_Object
5964 raw_text_coding_system (Lisp_Object coding_system)
5965 {
5966   Lisp_Object spec, attrs;
5967   Lisp_Object eol_type, raw_text_eol_type;
5968
5969   if (NILP (coding_system))
5970     return Qraw_text;
5971   spec = CODING_SYSTEM_SPEC (coding_system);
5972   attrs = AREF (spec, 0);
5973
5974   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
5975     return coding_system;
5976
5977   eol_type = AREF (spec, 2);
5978   if (VECTORP (eol_type))
5979     return Qraw_text;
5980   spec = CODING_SYSTEM_SPEC (Qraw_text);
5981   raw_text_eol_type = AREF (spec, 2);
5982   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
5983           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
5984           : AREF (raw_text_eol_type, 2));
5985 }
5986
5987
5988 /* If CODING_SYSTEM doesn't specify end-of-line format, return one of
5989    the subsidiary that has the same eol-spec as PARENT (if it is not
5990    nil and specifies end-of-line format) or the system's setting
5991    (system_eol_type).  */
5992
5993 Lisp_Object
5994 coding_inherit_eol_type (Lisp_Object coding_system, Lisp_Object parent)
5995 {
5996   Lisp_Object spec, eol_type;
5997
5998   if (NILP (coding_system))
5999     coding_system = Qraw_text;
6000   spec = CODING_SYSTEM_SPEC (coding_system);
6001   eol_type = AREF (spec, 2);
6002   if (VECTORP (eol_type))
6003     {
6004       Lisp_Object parent_eol_type;
6005
6006       if (! NILP (parent))
6007         {
6008           Lisp_Object parent_spec;
6009
6010           parent_spec = CODING_SYSTEM_SPEC (parent);
6011           parent_eol_type = AREF (parent_spec, 2);
6012           if (VECTORP (parent_eol_type))
6013             parent_eol_type = system_eol_type;
6014         }
6015       else
6016         parent_eol_type = system_eol_type;
6017       if (EQ (parent_eol_type, Qunix))
6018         coding_system = AREF (eol_type, 0);
6019       else if (EQ (parent_eol_type, Qdos))
6020         coding_system = AREF (eol_type, 1);
6021       else if (EQ (parent_eol_type, Qmac))
6022         coding_system = AREF (eol_type, 2);
6023     }
6024   return coding_system;
6025 }
6026
6027
6028 /* Check if text-conversion and eol-conversion of CODING_SYSTEM are
6029    decided for writing to a process.  If not, complement them, and
6030    return a new coding system.  */
6031
6032 Lisp_Object
6033 complement_process_encoding_system (Lisp_Object coding_system)
6034 {
6035   Lisp_Object coding_base = Qnil, eol_base = Qnil;
6036   Lisp_Object spec, attrs;
6037   int i;
6038
6039   for (i = 0; i < 3; i++)
6040     {
6041       if (i == 1)
6042         coding_system = CDR_SAFE (Vdefault_process_coding_system);
6043       else if (i == 2)
6044         coding_system = preferred_coding_system ();
6045       spec = CODING_SYSTEM_SPEC (coding_system);
6046       if (NILP (spec))
6047         continue;
6048       attrs = AREF (spec, 0);
6049       if (NILP (coding_base) && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
6050         coding_base = CODING_ATTR_BASE_NAME (attrs);
6051       if (NILP (eol_base) && ! VECTORP (AREF (spec, 2)))
6052         eol_base = coding_system;
6053       if (! NILP (coding_base) && ! NILP (eol_base))
6054         break;
6055     }
6056
6057   if (i > 0)
6058     /* The original CODING_SYSTEM didn't specify text-conversion or
6059        eol-conversion.  Be sure that we return a fully complemented
6060        coding system.  */
6061     coding_system = coding_inherit_eol_type (coding_base, eol_base);
6062   return coding_system;
6063 }
6064
6065
6066 /* Emacs has a mechanism to automatically detect a coding system if it
6067    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
6068    it's impossible to distinguish some coding systems accurately
6069    because they use the same range of codes.  So, at first, coding
6070    systems are categorized into 7, those are:
6071
6072    o coding-category-emacs-mule
6073
6074         The category for a coding system which has the same code range
6075         as Emacs' internal format.  Assigned the coding-system (Lisp
6076         symbol) `emacs-mule' by default.
6077
6078    o coding-category-sjis
6079
6080         The category for a coding system which has the same code range
6081         as SJIS.  Assigned the coding-system (Lisp
6082         symbol) `japanese-shift-jis' by default.
6083
6084    o coding-category-iso-7
6085
6086         The category for a coding system which has the same code range
6087         as ISO2022 of 7-bit environment.  This doesn't use any locking
6088         shift and single shift functions.  This can encode/decode all
6089         charsets.  Assigned the coding-system (Lisp symbol)
6090         `iso-2022-7bit' by default.
6091
6092    o coding-category-iso-7-tight
6093
6094         Same as coding-category-iso-7 except that this can
6095         encode/decode only the specified charsets.
6096
6097    o coding-category-iso-8-1
6098
6099         The category for a coding system which has the same code range
6100         as ISO2022 of 8-bit environment and graphic plane 1 used only
6101         for DIMENSION1 charset.  This doesn't use any locking shift
6102         and single shift functions.  Assigned the coding-system (Lisp
6103         symbol) `iso-latin-1' by default.
6104
6105    o coding-category-iso-8-2
6106
6107         The category for a coding system which has the same code range
6108         as ISO2022 of 8-bit environment and graphic plane 1 used only
6109         for DIMENSION2 charset.  This doesn't use any locking shift
6110         and single shift functions.  Assigned the coding-system (Lisp
6111         symbol) `japanese-iso-8bit' by default.
6112
6113    o coding-category-iso-7-else
6114
6115         The category for a coding system which has the same code range
6116         as ISO2022 of 7-bit environment but uses locking shift or
6117         single shift functions.  Assigned the coding-system (Lisp
6118         symbol) `iso-2022-7bit-lock' by default.
6119
6120    o coding-category-iso-8-else
6121
6122         The category for a coding system which has the same code range
6123         as ISO2022 of 8-bit environment but uses locking shift or
6124         single shift functions.  Assigned the coding-system (Lisp
6125         symbol) `iso-2022-8bit-ss2' by default.
6126
6127    o coding-category-big5
6128
6129         The category for a coding system which has the same code range
6130         as BIG5.  Assigned the coding-system (Lisp symbol)
6131         `cn-big5' by default.
6132
6133    o coding-category-utf-8
6134
6135         The category for a coding system which has the same code range
6136         as UTF-8 (cf. RFC3629).  Assigned the coding-system (Lisp
6137         symbol) `utf-8' by default.
6138
6139    o coding-category-utf-16-be
6140
6141         The category for a coding system in which a text has an
6142         Unicode signature (cf. Unicode Standard) in the order of BIG
6143         endian at the head.  Assigned the coding-system (Lisp symbol)
6144         `utf-16-be' by default.
6145
6146    o coding-category-utf-16-le
6147
6148         The category for a coding system in which a text has an
6149         Unicode signature (cf. Unicode Standard) in the order of
6150         LITTLE endian at the head.  Assigned the coding-system (Lisp
6151         symbol) `utf-16-le' by default.
6152
6153    o coding-category-ccl
6154
6155         The category for a coding system of which encoder/decoder is
6156         written in CCL programs.  The default value is nil, i.e., no
6157         coding system is assigned.
6158
6159    o coding-category-binary
6160
6161         The category for a coding system not categorized in any of the
6162         above.  Assigned the coding-system (Lisp symbol)
6163         `no-conversion' by default.
6164
6165    Each of them is a Lisp symbol and the value is an actual
6166    `coding-system's (this is also a Lisp symbol) assigned by a user.
6167    What Emacs does actually is to detect a category of coding system.
6168    Then, it uses a `coding-system' assigned to it.  If Emacs can't
6169    decide only one possible category, it selects a category of the
6170    highest priority.  Priorities of categories are also specified by a
6171    user in a Lisp variable `coding-category-list'.
6172
6173 */
6174
6175 #define EOL_SEEN_NONE   0
6176 #define EOL_SEEN_LF     1
6177 #define EOL_SEEN_CR     2
6178 #define EOL_SEEN_CRLF   4
6179
6180 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
6181    SOURCE is encoded.  If CATEGORY is one of
6182    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
6183    two-byte, else they are encoded by one-byte.
6184
6185    Return one of EOL_SEEN_XXX.  */
6186
6187 #define MAX_EOL_CHECK_COUNT 3
6188
6189 static int
6190 detect_eol (const unsigned char *source, EMACS_INT src_bytes,
6191             enum coding_category category)
6192 {
6193   const unsigned char *src = source, *src_end = src + src_bytes;
6194   unsigned char c;
6195   int total  = 0;
6196   int eol_seen = EOL_SEEN_NONE;
6197
6198   if ((1 << category) & CATEGORY_MASK_UTF_16)
6199     {
6200       int msb, lsb;
6201
6202       msb = category == (coding_category_utf_16_le
6203                          | coding_category_utf_16_le_nosig);
6204       lsb = 1 - msb;
6205
6206       while (src + 1 < src_end)
6207         {
6208           c = src[lsb];
6209           if (src[msb] == 0 && (c == '\n' || c == '\r'))
6210             {
6211               int this_eol;
6212
6213               if (c == '\n')
6214                 this_eol = EOL_SEEN_LF;
6215               else if (src + 3 >= src_end
6216                        || src[msb + 2] != 0
6217                        || src[lsb + 2] != '\n')
6218                 this_eol = EOL_SEEN_CR;
6219               else
6220                 {
6221                   this_eol = EOL_SEEN_CRLF;
6222                   src += 2;
6223                 }
6224
6225               if (eol_seen == EOL_SEEN_NONE)
6226                 /* This is the first end-of-line.  */
6227                 eol_seen = this_eol;
6228               else if (eol_seen != this_eol)
6229                 {
6230                   /* The found type is different from what found before.
6231                      Allow for stray ^M characters in DOS EOL files.  */
6232                   if ((eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF)
6233                       || (eol_seen == EOL_SEEN_CRLF
6234                           && this_eol == EOL_SEEN_CR))
6235                     eol_seen = EOL_SEEN_CRLF;
6236                   else
6237                     {
6238                       eol_seen = EOL_SEEN_LF;
6239                       break;
6240                     }
6241                 }
6242               if (++total == MAX_EOL_CHECK_COUNT)
6243                 break;
6244             }
6245           src += 2;
6246         }
6247     }
6248   else
6249     while (src < src_end)
6250       {
6251         c = *src++;
6252         if (c == '\n' || c == '\r')
6253           {
6254             int this_eol;
6255
6256             if (c == '\n')
6257               this_eol = EOL_SEEN_LF;
6258             else if (src >= src_end || *src != '\n')
6259               this_eol = EOL_SEEN_CR;
6260             else
6261               this_eol = EOL_SEEN_CRLF, src++;
6262
6263             if (eol_seen == EOL_SEEN_NONE)
6264               /* This is the first end-of-line.  */
6265               eol_seen = this_eol;
6266             else if (eol_seen != this_eol)
6267               {
6268                 /* The found type is different from what found before.
6269                    Allow for stray ^M characters in DOS EOL files.  */
6270                 if ((eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF)
6271                     || (eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR))
6272                   eol_seen = EOL_SEEN_CRLF;
6273                 else
6274                   {
6275                     eol_seen = EOL_SEEN_LF;
6276                     break;
6277                   }
6278               }
6279             if (++total == MAX_EOL_CHECK_COUNT)
6280               break;
6281           }
6282       }
6283   return eol_seen;
6284 }
6285
6286
6287 static Lisp_Object
6288 adjust_coding_eol_type (struct coding_system *coding, int eol_seen)
6289 {
6290   Lisp_Object eol_type;
6291
6292   eol_type = CODING_ID_EOL_TYPE (coding->id);
6293   if (eol_seen & EOL_SEEN_LF)
6294     {
6295       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
6296       eol_type = Qunix;
6297     }
6298   else if (eol_seen & EOL_SEEN_CRLF)
6299     {
6300       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
6301       eol_type = Qdos;
6302     }
6303   else if (eol_seen & EOL_SEEN_CR)
6304     {
6305       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
6306       eol_type = Qmac;
6307     }
6308   return eol_type;
6309 }
6310
6311 /* Detect how a text specified in CODING is encoded.  If a coding
6312    system is detected, update fields of CODING by the detected coding
6313    system.  */
6314
6315 static void
6316 detect_coding (struct coding_system *coding)
6317 {
6318   const unsigned char *src, *src_end;
6319   int saved_mode = coding->mode;
6320
6321   coding->consumed = coding->consumed_char = 0;
6322   coding->produced = coding->produced_char = 0;
6323   coding_set_source (coding);
6324
6325   src_end = coding->source + coding->src_bytes;
6326   coding->head_ascii = 0;
6327
6328   /* If we have not yet decided the text encoding type, detect it
6329      now.  */
6330   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
6331     {
6332       int c, i;
6333       struct coding_detection_info detect_info;
6334       int null_byte_found = 0, eight_bit_found = 0;
6335
6336       detect_info.checked = detect_info.found = detect_info.rejected = 0;
6337       for (src = coding->source; src < src_end; src++)
6338         {
6339           c = *src;
6340           if (c & 0x80)
6341             {
6342               eight_bit_found = 1;
6343               if (null_byte_found)
6344                 break;
6345             }
6346           else if (c < 0x20)
6347             {
6348               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
6349                   && ! inhibit_iso_escape_detection
6350                   && ! detect_info.checked)
6351                 {
6352                   if (detect_coding_iso_2022 (coding, &detect_info))
6353                     {
6354                       /* We have scanned the whole data.  */
6355                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
6356                         {
6357                           /* We didn't find an 8-bit code.  We may
6358                              have found a null-byte, but it's very
6359                              rare that a binary file conforms to
6360                              ISO-2022.  */
6361                           src = src_end;
6362                           coding->head_ascii = src - coding->source;
6363                         }
6364                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
6365                       break;
6366                     }
6367                 }
6368               else if (! c && !inhibit_null_byte_detection)
6369                 {
6370                   null_byte_found = 1;
6371                   if (eight_bit_found)
6372                     break;
6373                 }
6374               if (! eight_bit_found)
6375                 coding->head_ascii++;
6376             }
6377           else if (! eight_bit_found)
6378             coding->head_ascii++;
6379         }
6380
6381       if (null_byte_found || eight_bit_found
6382           || coding->head_ascii < coding->src_bytes
6383           || detect_info.found)
6384         {
6385           enum coding_category category;
6386           struct coding_system *this;
6387
6388           if (coding->head_ascii == coding->src_bytes)
6389             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
6390             for (i = 0; i < coding_category_raw_text; i++)
6391               {
6392                 category = coding_priorities[i];
6393                 this = coding_categories + category;
6394                 if (detect_info.found & (1 << category))
6395                   break;
6396               }
6397           else
6398             {
6399               if (null_byte_found)
6400                 {
6401                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
6402                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
6403                 }
6404               for (i = 0; i < coding_category_raw_text; i++)
6405                 {
6406                   category = coding_priorities[i];
6407                   this = coding_categories + category;
6408                   if (this->id < 0)
6409                     {
6410                       /* No coding system of this category is defined.  */
6411                       detect_info.rejected |= (1 << category);
6412                     }
6413                   else if (category >= coding_category_raw_text)
6414                     continue;
6415                   else if (detect_info.checked & (1 << category))
6416                     {
6417                       if (detect_info.found & (1 << category))
6418                         break;
6419                     }
6420                   else if ((*(this->detector)) (coding, &detect_info)
6421                            && detect_info.found & (1 << category))
6422                     {
6423                       if (category == coding_category_utf_16_auto)
6424                         {
6425                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6426                             category = coding_category_utf_16_le;
6427                           else
6428                             category = coding_category_utf_16_be;
6429                         }
6430                       break;
6431                     }
6432                 }
6433             }
6434
6435           if (i < coding_category_raw_text)
6436             setup_coding_system (CODING_ID_NAME (this->id), coding);
6437           else if (null_byte_found)
6438             setup_coding_system (Qno_conversion, coding);
6439           else if ((detect_info.rejected & CATEGORY_MASK_ANY)
6440                    == CATEGORY_MASK_ANY)
6441             setup_coding_system (Qraw_text, coding);
6442           else if (detect_info.rejected)
6443             for (i = 0; i < coding_category_raw_text; i++)
6444               if (! (detect_info.rejected & (1 << coding_priorities[i])))
6445                 {
6446                   this = coding_categories + coding_priorities[i];
6447                   setup_coding_system (CODING_ID_NAME (this->id), coding);
6448                   break;
6449                 }
6450         }
6451     }
6452   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6453            == coding_category_utf_8_auto)
6454     {
6455       Lisp_Object coding_systems;
6456       struct coding_detection_info detect_info;
6457
6458       coding_systems
6459         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6460       detect_info.found = detect_info.rejected = 0;
6461       coding->head_ascii = 0;
6462       if (CONSP (coding_systems)
6463           && detect_coding_utf_8 (coding, &detect_info))
6464         {
6465           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
6466             setup_coding_system (XCAR (coding_systems), coding);
6467           else
6468             setup_coding_system (XCDR (coding_systems), coding);
6469         }
6470     }
6471   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6472            == coding_category_utf_16_auto)
6473     {
6474       Lisp_Object coding_systems;
6475       struct coding_detection_info detect_info;
6476
6477       coding_systems
6478         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6479       detect_info.found = detect_info.rejected = 0;
6480       coding->head_ascii = 0;
6481       if (CONSP (coding_systems)
6482           && detect_coding_utf_16 (coding, &detect_info))
6483         {
6484           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6485             setup_coding_system (XCAR (coding_systems), coding);
6486           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
6487             setup_coding_system (XCDR (coding_systems), coding);
6488         }
6489     }
6490   coding->mode = saved_mode;
6491 }
6492
6493
6494 static void
6495 decode_eol (struct coding_system *coding)
6496 {
6497   Lisp_Object eol_type;
6498   unsigned char *p, *pbeg, *pend;
6499
6500   eol_type = CODING_ID_EOL_TYPE (coding->id);
6501   if (EQ (eol_type, Qunix) || inhibit_eol_conversion)
6502     return;
6503
6504   if (NILP (coding->dst_object))
6505     pbeg = coding->destination;
6506   else
6507     pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
6508   pend = pbeg + coding->produced;
6509
6510   if (VECTORP (eol_type))
6511     {
6512       int eol_seen = EOL_SEEN_NONE;
6513
6514       for (p = pbeg; p < pend; p++)
6515         {
6516           if (*p == '\n')
6517             eol_seen |= EOL_SEEN_LF;
6518           else if (*p == '\r')
6519             {
6520               if (p + 1 < pend && *(p + 1) == '\n')
6521                 {
6522                   eol_seen |= EOL_SEEN_CRLF;
6523                   p++;
6524                 }
6525               else
6526                 eol_seen |= EOL_SEEN_CR;
6527             }
6528         }
6529       /* Handle DOS-style EOLs in a file with stray ^M characters.  */
6530       if ((eol_seen & EOL_SEEN_CRLF) != 0
6531           && (eol_seen & EOL_SEEN_CR) != 0
6532           && (eol_seen & EOL_SEEN_LF) == 0)
6533         eol_seen = EOL_SEEN_CRLF;
6534       else if (eol_seen != EOL_SEEN_NONE
6535           && eol_seen != EOL_SEEN_LF
6536           && eol_seen != EOL_SEEN_CRLF
6537           && eol_seen != EOL_SEEN_CR)
6538         eol_seen = EOL_SEEN_LF;
6539       if (eol_seen != EOL_SEEN_NONE)
6540         eol_type = adjust_coding_eol_type (coding, eol_seen);
6541     }
6542
6543   if (EQ (eol_type, Qmac))
6544     {
6545       for (p = pbeg; p < pend; p++)
6546         if (*p == '\r')
6547           *p = '\n';
6548     }
6549   else if (EQ (eol_type, Qdos))
6550     {
6551       EMACS_INT n = 0;
6552
6553       if (NILP (coding->dst_object))
6554         {
6555           /* Start deleting '\r' from the tail to minimize the memory
6556              movement.  */
6557           for (p = pend - 2; p >= pbeg; p--)
6558             if (*p == '\r')
6559               {
6560                 memmove (p, p + 1, pend-- - p - 1);
6561                 n++;
6562               }
6563         }
6564       else
6565         {
6566           EMACS_INT pos_byte = coding->dst_pos_byte;
6567           EMACS_INT pos = coding->dst_pos;
6568           EMACS_INT pos_end = pos + coding->produced_char - 1;
6569
6570           while (pos < pos_end)
6571             {
6572               p = BYTE_POS_ADDR (pos_byte);
6573               if (*p == '\r' && p[1] == '\n')
6574                 {
6575                   del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
6576                   n++;
6577                   pos_end--;
6578                 }
6579               pos++;
6580               if (coding->dst_multibyte)
6581                 pos_byte += BYTES_BY_CHAR_HEAD (*p);
6582               else
6583                 pos_byte++;
6584             }
6585         }
6586       coding->produced -= n;
6587       coding->produced_char -= n;
6588     }
6589 }
6590
6591
6592 /* Return a translation table (or list of them) from coding system
6593    attribute vector ATTRS for encoding (ENCODEP is nonzero) or
6594    decoding (ENCODEP is zero). */
6595
6596 static Lisp_Object
6597 get_translation_table (Lisp_Object attrs, int encodep, int *max_lookup)
6598 {
6599   Lisp_Object standard, translation_table;
6600   Lisp_Object val;
6601
6602   if (NILP (Venable_character_translation))
6603     {
6604       if (max_lookup)
6605         *max_lookup = 0;
6606       return Qnil;
6607     }
6608   if (encodep)
6609     translation_table = CODING_ATTR_ENCODE_TBL (attrs),
6610       standard = Vstandard_translation_table_for_encode;
6611   else
6612     translation_table = CODING_ATTR_DECODE_TBL (attrs),
6613       standard = Vstandard_translation_table_for_decode;
6614   if (NILP (translation_table))
6615     translation_table = standard;
6616   else
6617     {
6618       if (SYMBOLP (translation_table))
6619         translation_table = Fget (translation_table, Qtranslation_table);
6620       else if (CONSP (translation_table))
6621         {
6622           translation_table = Fcopy_sequence (translation_table);
6623           for (val = translation_table; CONSP (val); val = XCDR (val))
6624             if (SYMBOLP (XCAR (val)))
6625               XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
6626         }
6627       if (CHAR_TABLE_P (standard))
6628         {
6629           if (CONSP (translation_table))
6630             translation_table = nconc2 (translation_table,
6631                                         Fcons (standard, Qnil));
6632           else
6633             translation_table = Fcons (translation_table,
6634                                        Fcons (standard, Qnil));
6635         }
6636     }
6637
6638   if (max_lookup)
6639     {
6640       *max_lookup = 1;
6641       if (CHAR_TABLE_P (translation_table)
6642           && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
6643         {
6644           val = XCHAR_TABLE (translation_table)->extras[1];
6645           if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6646             *max_lookup = XFASTINT (val);
6647         }
6648       else if (CONSP (translation_table))
6649         {
6650           Lisp_Object tail;
6651
6652           for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
6653             if (CHAR_TABLE_P (XCAR (tail))
6654                 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
6655               {
6656                 Lisp_Object tailval = XCHAR_TABLE (XCAR (tail))->extras[1];
6657                 if (NATNUMP (tailval) && *max_lookup < XFASTINT (tailval))
6658                   *max_lookup = XFASTINT (tailval);
6659               }
6660         }
6661     }
6662   return translation_table;
6663 }
6664
6665 #define LOOKUP_TRANSLATION_TABLE(table, c, trans)               \
6666   do {                                                          \
6667     trans = Qnil;                                               \
6668     if (CHAR_TABLE_P (table))                                   \
6669       {                                                         \
6670         trans = CHAR_TABLE_REF (table, c);                      \
6671         if (CHARACTERP (trans))                                 \
6672           c = XFASTINT (trans), trans = Qnil;                   \
6673       }                                                         \
6674     else if (CONSP (table))                                     \
6675       {                                                         \
6676         Lisp_Object tail;                                       \
6677                                                                 \
6678         for (tail = table; CONSP (tail); tail = XCDR (tail))    \
6679           if (CHAR_TABLE_P (XCAR (tail)))                       \
6680             {                                                   \
6681               trans = CHAR_TABLE_REF (XCAR (tail), c);          \
6682               if (CHARACTERP (trans))                           \
6683                 c = XFASTINT (trans), trans = Qnil;             \
6684               else if (! NILP (trans))                          \
6685                 break;                                          \
6686             }                                                   \
6687       }                                                         \
6688   } while (0)
6689
6690
6691 /* Return a translation of character(s) at BUF according to TRANS.
6692    TRANS is TO-CHAR or ((FROM .  TO) ...) where
6693    FROM = [FROM-CHAR ...], TO is TO-CHAR or [TO-CHAR ...].
6694    The return value is TO-CHAR or ([FROM-CHAR ...] . TO) if a
6695    translation is found, and Qnil if not found..
6696    If BUF is too short to lookup characters in FROM, return Qt.  */
6697
6698 static Lisp_Object
6699 get_translation (Lisp_Object trans, int *buf, int *buf_end)
6700 {
6701
6702   if (INTEGERP (trans))
6703     return trans;
6704   for (; CONSP (trans); trans = XCDR (trans))
6705     {
6706       Lisp_Object val = XCAR (trans);
6707       Lisp_Object from = XCAR (val);
6708       int len = ASIZE (from);
6709       int i;
6710
6711       for (i = 0; i < len; i++)
6712         {
6713           if (buf + i == buf_end)
6714             return Qt;
6715           if (XINT (AREF (from, i)) != buf[i])
6716             break;
6717         }
6718       if (i == len)
6719         return val;
6720     }
6721   return Qnil;
6722 }
6723
6724
6725 static int
6726 produce_chars (struct coding_system *coding, Lisp_Object translation_table,
6727                int last_block)
6728 {
6729   unsigned char *dst = coding->destination + coding->produced;
6730   unsigned char *dst_end = coding->destination + coding->dst_bytes;
6731   EMACS_INT produced;
6732   EMACS_INT produced_chars = 0;
6733   int carryover = 0;
6734
6735   if (! coding->chars_at_source)
6736     {
6737       /* Source characters are in coding->charbuf.  */
6738       int *buf = coding->charbuf;
6739       int *buf_end = buf + coding->charbuf_used;
6740
6741       if (EQ (coding->src_object, coding->dst_object))
6742         {
6743           coding_set_source (coding);
6744           dst_end = ((unsigned char *) coding->source) + coding->consumed;
6745         }
6746
6747       while (buf < buf_end)
6748         {
6749           int c = *buf, i;
6750
6751           if (c >= 0)
6752             {
6753               EMACS_INT from_nchars = 1, to_nchars = 1;
6754               Lisp_Object trans = Qnil;
6755
6756               LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6757               if (! NILP (trans))
6758                 {
6759                   trans = get_translation (trans, buf, buf_end);
6760                   if (INTEGERP (trans))
6761                     c = XINT (trans);
6762                   else if (CONSP (trans))
6763                     {
6764                       from_nchars = ASIZE (XCAR (trans));
6765                       trans = XCDR (trans);
6766                       if (INTEGERP (trans))
6767                         c = XINT (trans);
6768                       else
6769                         {
6770                           to_nchars = ASIZE (trans);
6771                           c = XINT (AREF (trans, 0));
6772                         }
6773                     }
6774                   else if (EQ (trans, Qt) && ! last_block)
6775                     break;
6776                 }
6777
6778               if ((dst_end - dst) / MAX_MULTIBYTE_LENGTH < to_nchars)
6779                 {
6780                   if (((min (PTRDIFF_MAX, SIZE_MAX) - (buf_end - buf))
6781                        / MAX_MULTIBYTE_LENGTH)
6782                       < to_nchars)
6783                     memory_full (SIZE_MAX);
6784                   dst = alloc_destination (coding,
6785                                            buf_end - buf
6786                                            + MAX_MULTIBYTE_LENGTH * to_nchars,
6787                                            dst);
6788                   if (EQ (coding->src_object, coding->dst_object))
6789                     {
6790                       coding_set_source (coding);
6791                       dst_end = (((unsigned char *) coding->source)
6792                                  + coding->consumed);
6793                     }
6794                   else
6795                     dst_end = coding->destination + coding->dst_bytes;
6796                 }
6797
6798               for (i = 0; i < to_nchars; i++)
6799                 {
6800                   if (i > 0)
6801                     c = XINT (AREF (trans, i));
6802                   if (coding->dst_multibyte
6803                       || ! CHAR_BYTE8_P (c))
6804                     CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
6805                   else
6806                     *dst++ = CHAR_TO_BYTE8 (c);
6807                 }
6808               produced_chars += to_nchars;
6809               buf += from_nchars;
6810             }
6811           else
6812             /* This is an annotation datum.  (-C) is the length.  */
6813             buf += -c;
6814         }
6815       carryover = buf_end - buf;
6816     }
6817   else
6818     {
6819       /* Source characters are at coding->source.  */
6820       const unsigned char *src = coding->source;
6821       const unsigned char *src_end = src + coding->consumed;
6822
6823       if (EQ (coding->dst_object, coding->src_object))
6824         dst_end = (unsigned char *) src;
6825       if (coding->src_multibyte != coding->dst_multibyte)
6826         {
6827           if (coding->src_multibyte)
6828             {
6829               int multibytep = 1;
6830               EMACS_INT consumed_chars = 0;
6831
6832               while (1)
6833                 {
6834                   const unsigned char *src_base = src;
6835                   int c;
6836
6837                   ONE_MORE_BYTE (c);
6838                   if (dst == dst_end)
6839                     {
6840                       if (EQ (coding->src_object, coding->dst_object))
6841                         dst_end = (unsigned char *) src;
6842                       if (dst == dst_end)
6843                         {
6844                           EMACS_INT offset = src - coding->source;
6845
6846                           dst = alloc_destination (coding, src_end - src + 1,
6847                                                    dst);
6848                           dst_end = coding->destination + coding->dst_bytes;
6849                           coding_set_source (coding);
6850                           src = coding->source + offset;
6851                           src_end = coding->source + coding->src_bytes;
6852                           if (EQ (coding->src_object, coding->dst_object))
6853                             dst_end = (unsigned char *) src;
6854                         }
6855                     }
6856                   *dst++ = c;
6857                   produced_chars++;
6858                 }
6859             no_more_source:
6860               ;
6861             }
6862           else
6863             while (src < src_end)
6864               {
6865                 int multibytep = 1;
6866                 int c = *src++;
6867
6868                 if (dst >= dst_end - 1)
6869                   {
6870                     if (EQ (coding->src_object, coding->dst_object))
6871                       dst_end = (unsigned char *) src;
6872                     if (dst >= dst_end - 1)
6873                       {
6874                         EMACS_INT offset = src - coding->source;
6875                         EMACS_INT more_bytes;
6876
6877                         if (EQ (coding->src_object, coding->dst_object))
6878                           more_bytes = ((src_end - src) / 2) + 2;
6879                         else
6880                           more_bytes = src_end - src + 2;
6881                         dst = alloc_destination (coding, more_bytes, dst);
6882                         dst_end = coding->destination + coding->dst_bytes;
6883                         coding_set_source (coding);
6884                         src = coding->source + offset;
6885                         src_end = coding->source + coding->src_bytes;
6886                         if (EQ (coding->src_object, coding->dst_object))
6887                           dst_end = (unsigned char *) src;
6888                       }
6889                   }
6890                 EMIT_ONE_BYTE (c);
6891               }
6892         }
6893       else
6894         {
6895           if (!EQ (coding->src_object, coding->dst_object))
6896             {
6897               EMACS_INT require = coding->src_bytes - coding->dst_bytes;
6898
6899               if (require > 0)
6900                 {
6901                   EMACS_INT offset = src - coding->source;
6902
6903                   dst = alloc_destination (coding, require, dst);
6904                   coding_set_source (coding);
6905                   src = coding->source + offset;
6906                   src_end = coding->source + coding->src_bytes;
6907                 }
6908             }
6909           produced_chars = coding->consumed_char;
6910           while (src < src_end)
6911             *dst++ = *src++;
6912         }
6913     }
6914
6915   produced = dst - (coding->destination + coding->produced);
6916   if (BUFFERP (coding->dst_object) && produced_chars > 0)
6917     insert_from_gap (produced_chars, produced);
6918   coding->produced += produced;
6919   coding->produced_char += produced_chars;
6920   return carryover;
6921 }
6922
6923 /* Compose text in CODING->object according to the annotation data at
6924    CHARBUF.  CHARBUF is an array:
6925      [ -LENGTH ANNOTATION_MASK NCHARS NBYTES METHOD [ COMPONENTS... ] ]
6926  */
6927
6928 static inline void
6929 produce_composition (struct coding_system *coding, int *charbuf, EMACS_INT pos)
6930 {
6931   int len;
6932   EMACS_INT to;
6933   enum composition_method method;
6934   Lisp_Object components;
6935
6936   len = -charbuf[0] - MAX_ANNOTATION_LENGTH;
6937   to = pos + charbuf[2];
6938   method = (enum composition_method) (charbuf[4]);
6939
6940   if (method == COMPOSITION_RELATIVE)
6941     components = Qnil;
6942   else
6943     {
6944       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
6945       int i, j;
6946
6947       if (method == COMPOSITION_WITH_RULE)
6948         len = charbuf[2] * 3 - 2;
6949       charbuf += MAX_ANNOTATION_LENGTH;
6950       /* charbuf = [ CHRA ... CHAR] or [ CHAR -2 RULE ... CHAR ] */
6951       for (i = j = 0; i < len && charbuf[i] != -1; i++, j++)
6952         {
6953           if (charbuf[i] >= 0)
6954             args[j] = make_number (charbuf[i]);
6955           else
6956             {
6957               i++;
6958               args[j] = make_number (charbuf[i] % 0x100);
6959             }
6960         }
6961       components = (i == j ? Fstring (j, args) : Fvector (j, args));
6962     }
6963   compose_text (pos, to, components, Qnil, coding->dst_object);
6964 }
6965
6966
6967 /* Put `charset' property on text in CODING->object according to
6968    the annotation data at CHARBUF.  CHARBUF is an array:
6969      [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
6970  */
6971
6972 static inline void
6973 produce_charset (struct coding_system *coding, int *charbuf, EMACS_INT pos)
6974 {
6975   EMACS_INT from = pos - charbuf[2];
6976   struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
6977
6978   Fput_text_property (make_number (from), make_number (pos),
6979                       Qcharset, CHARSET_NAME (charset),
6980                       coding->dst_object);
6981 }
6982
6983
6984 #define CHARBUF_SIZE 0x4000
6985
6986 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
6987   do {                                                                  \
6988     int size = CHARBUF_SIZE;                                            \
6989                                                                         \
6990     coding->charbuf = NULL;                                             \
6991     while (size > 1024)                                                 \
6992       {                                                                 \
6993         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
6994         if (coding->charbuf)                                            \
6995           break;                                                        \
6996         size >>= 1;                                                     \
6997       }                                                                 \
6998     if (! coding->charbuf)                                              \
6999       {                                                                 \
7000         record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
7001         return coding->result;                                          \
7002       }                                                                 \
7003     coding->charbuf_size = size;                                        \
7004   } while (0)
7005
7006
7007 static void
7008 produce_annotation (struct coding_system *coding, EMACS_INT pos)
7009 {
7010   int *charbuf = coding->charbuf;
7011   int *charbuf_end = charbuf + coding->charbuf_used;
7012
7013   if (NILP (coding->dst_object))
7014     return;
7015
7016   while (charbuf < charbuf_end)
7017     {
7018       if (*charbuf >= 0)
7019         pos++, charbuf++;
7020       else
7021         {
7022           int len = -*charbuf;
7023
7024           if (len > 2)
7025             switch (charbuf[1])
7026               {
7027               case CODING_ANNOTATE_COMPOSITION_MASK:
7028                 produce_composition (coding, charbuf, pos);
7029                 break;
7030               case CODING_ANNOTATE_CHARSET_MASK:
7031                 produce_charset (coding, charbuf, pos);
7032                 break;
7033               }
7034           charbuf += len;
7035         }
7036     }
7037 }
7038
7039 /* Decode the data at CODING->src_object into CODING->dst_object.
7040    CODING->src_object is a buffer, a string, or nil.
7041    CODING->dst_object is a buffer.
7042
7043    If CODING->src_object is a buffer, it must be the current buffer.
7044    In this case, if CODING->src_pos is positive, it is a position of
7045    the source text in the buffer, otherwise, the source text is in the
7046    gap area of the buffer, and CODING->src_pos specifies the offset of
7047    the text from GPT (which must be the same as PT).  If this is the
7048    same buffer as CODING->dst_object, CODING->src_pos must be
7049    negative.
7050
7051    If CODING->src_object is a string, CODING->src_pos is an index to
7052    that string.
7053
7054    If CODING->src_object is nil, CODING->source must already point to
7055    the non-relocatable memory area.  In this case, CODING->src_pos is
7056    an offset from CODING->source.
7057
7058    The decoded data is inserted at the current point of the buffer
7059    CODING->dst_object.
7060 */
7061
7062 static int
7063 decode_coding (struct coding_system *coding)
7064 {
7065   Lisp_Object attrs;
7066   Lisp_Object undo_list;
7067   Lisp_Object translation_table;
7068   struct ccl_spec cclspec;
7069   int carryover;
7070   int i;
7071
7072   if (BUFFERP (coding->src_object)
7073       && coding->src_pos > 0
7074       && coding->src_pos < GPT
7075       && coding->src_pos + coding->src_chars > GPT)
7076     move_gap_both (coding->src_pos, coding->src_pos_byte);
7077
7078   undo_list = Qt;
7079   if (BUFFERP (coding->dst_object))
7080     {
7081       if (current_buffer != XBUFFER (coding->dst_object))
7082         set_buffer_internal (XBUFFER (coding->dst_object));
7083       if (GPT != PT)
7084         move_gap_both (PT, PT_BYTE);
7085       undo_list = BVAR (current_buffer, undo_list);
7086       BVAR (current_buffer, undo_list) = Qt;
7087     }
7088
7089   coding->consumed = coding->consumed_char = 0;
7090   coding->produced = coding->produced_char = 0;
7091   coding->chars_at_source = 0;
7092   record_conversion_result (coding, CODING_RESULT_SUCCESS);
7093   coding->errors = 0;
7094
7095   ALLOC_CONVERSION_WORK_AREA (coding);
7096
7097   attrs = CODING_ID_ATTRS (coding->id);
7098   translation_table = get_translation_table (attrs, 0, NULL);
7099
7100   carryover = 0;
7101   if (coding->decoder == decode_coding_ccl)
7102     {
7103       coding->spec.ccl = &cclspec;
7104       setup_ccl_program (&cclspec.ccl, CODING_CCL_DECODER (coding));
7105     }
7106   do
7107     {
7108       EMACS_INT pos = coding->dst_pos + coding->produced_char;
7109
7110       coding_set_source (coding);
7111       coding->annotated = 0;
7112       coding->charbuf_used = carryover;
7113       (*(coding->decoder)) (coding);
7114       coding_set_destination (coding);
7115       carryover = produce_chars (coding, translation_table, 0);
7116       if (coding->annotated)
7117         produce_annotation (coding, pos);
7118       for (i = 0; i < carryover; i++)
7119         coding->charbuf[i]
7120           = coding->charbuf[coding->charbuf_used - carryover + i];
7121     }
7122   while (coding->result == CODING_RESULT_INSUFFICIENT_DST
7123          || (coding->consumed < coding->src_bytes
7124              && (coding->result == CODING_RESULT_SUCCESS
7125                  || coding->result == CODING_RESULT_INVALID_SRC)));
7126
7127   if (carryover > 0)
7128     {
7129       coding_set_destination (coding);
7130       coding->charbuf_used = carryover;
7131       produce_chars (coding, translation_table, 1);
7132     }
7133
7134   coding->carryover_bytes = 0;
7135   if (coding->consumed < coding->src_bytes)
7136     {
7137       int nbytes = coding->src_bytes - coding->consumed;
7138       const unsigned char *src;
7139
7140       coding_set_source (coding);
7141       coding_set_destination (coding);
7142       src = coding->source + coding->consumed;
7143
7144       if (coding->mode & CODING_MODE_LAST_BLOCK)
7145         {
7146           /* Flush out unprocessed data as binary chars.  We are sure
7147              that the number of data is less than the size of
7148              coding->charbuf.  */
7149           coding->charbuf_used = 0;
7150           coding->chars_at_source = 0;
7151
7152           while (nbytes-- > 0)
7153             {
7154               int c = *src++;
7155
7156               if (c & 0x80)
7157                 c = BYTE8_TO_CHAR (c);
7158               coding->charbuf[coding->charbuf_used++] = c;
7159             }
7160           produce_chars (coding, Qnil, 1);
7161         }
7162       else
7163         {
7164           /* Record unprocessed bytes in coding->carryover.  We are
7165              sure that the number of data is less than the size of
7166              coding->carryover.  */
7167           unsigned char *p = coding->carryover;
7168
7169           if (nbytes > sizeof coding->carryover)
7170             nbytes = sizeof coding->carryover;
7171           coding->carryover_bytes = nbytes;
7172           while (nbytes-- > 0)
7173             *p++ = *src++;
7174         }
7175       coding->consumed = coding->src_bytes;
7176     }
7177
7178   if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix)
7179       && !inhibit_eol_conversion)
7180     decode_eol (coding);
7181   if (BUFFERP (coding->dst_object))
7182     {
7183       BVAR (current_buffer, undo_list) = undo_list;
7184       record_insert (coding->dst_pos, coding->produced_char);
7185     }
7186   return coding->result;
7187 }
7188
7189
7190 /* Extract an annotation datum from a composition starting at POS and
7191    ending before LIMIT of CODING->src_object (buffer or string), store
7192    the data in BUF, set *STOP to a starting position of the next
7193    composition (if any) or to LIMIT, and return the address of the
7194    next element of BUF.
7195
7196    If such an annotation is not found, set *STOP to a starting
7197    position of a composition after POS (if any) or to LIMIT, and
7198    return BUF.  */
7199
7200 static inline int *
7201 handle_composition_annotation (EMACS_INT pos, EMACS_INT limit,
7202                                struct coding_system *coding, int *buf,
7203                                EMACS_INT *stop)
7204 {
7205   EMACS_INT start, end;
7206   Lisp_Object prop;
7207
7208   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
7209       || end > limit)
7210     *stop = limit;
7211   else if (start > pos)
7212     *stop = start;
7213   else
7214     {
7215       if (start == pos)
7216         {
7217           /* We found a composition.  Store the corresponding
7218              annotation data in BUF.  */
7219           int *head = buf;
7220           enum composition_method method = COMPOSITION_METHOD (prop);
7221           int nchars = COMPOSITION_LENGTH (prop);
7222
7223           ADD_COMPOSITION_DATA (buf, nchars, 0, method);
7224           if (method != COMPOSITION_RELATIVE)
7225             {
7226               Lisp_Object components;
7227               int len, i, i_byte;
7228
7229               components = COMPOSITION_COMPONENTS (prop);
7230               if (VECTORP (components))
7231                 {
7232                   len = ASIZE (components);
7233                   for (i = 0; i < len; i++)
7234                     *buf++ = XINT (AREF (components, i));
7235                 }
7236               else if (STRINGP (components))
7237                 {
7238                   len = SCHARS (components);
7239                   i = i_byte = 0;
7240                   while (i < len)
7241                     {
7242                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
7243                       buf++;
7244                     }
7245                 }
7246               else if (INTEGERP (components))
7247                 {
7248                   len = 1;
7249                   *buf++ = XINT (components);
7250                 }
7251               else if (CONSP (components))
7252                 {
7253                   for (len = 0; CONSP (components);
7254                        len++, components = XCDR (components))
7255                     *buf++ = XINT (XCAR (components));
7256                 }
7257               else
7258                 abort ();
7259               *head -= len;
7260             }
7261         }
7262
7263       if (find_composition (end, limit, &start, &end, &prop,
7264                             coding->src_object)
7265           && end <= limit)
7266         *stop = start;
7267       else
7268         *stop = limit;
7269     }
7270   return buf;
7271 }
7272
7273
7274 /* Extract an annotation datum from a text property `charset' at POS of
7275    CODING->src_object (buffer of string), store the data in BUF, set
7276    *STOP to the position where the value of `charset' property changes
7277    (limiting by LIMIT), and return the address of the next element of
7278    BUF.
7279
7280    If the property value is nil, set *STOP to the position where the
7281    property value is non-nil (limiting by LIMIT), and return BUF.  */
7282
7283 static inline int *
7284 handle_charset_annotation (EMACS_INT pos, EMACS_INT limit,
7285                            struct coding_system *coding, int *buf,
7286                            EMACS_INT *stop)
7287 {
7288   Lisp_Object val, next;
7289   int id;
7290
7291   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
7292   if (! NILP (val) && CHARSETP (val))
7293     id = XINT (CHARSET_SYMBOL_ID (val));
7294   else
7295     id = -1;
7296   ADD_CHARSET_DATA (buf, 0, id);
7297   next = Fnext_single_property_change (make_number (pos), Qcharset,
7298                                        coding->src_object,
7299                                        make_number (limit));
7300   *stop = XINT (next);
7301   return buf;
7302 }
7303
7304
7305 static void
7306 consume_chars (struct coding_system *coding, Lisp_Object translation_table,
7307                int max_lookup)
7308 {
7309   int *buf = coding->charbuf;
7310   int *buf_end = coding->charbuf + coding->charbuf_size;
7311   const unsigned char *src = coding->source + coding->consumed;
7312   const unsigned char *src_end = coding->source + coding->src_bytes;
7313   EMACS_INT pos = coding->src_pos + coding->consumed_char;
7314   EMACS_INT end_pos = coding->src_pos + coding->src_chars;
7315   int multibytep = coding->src_multibyte;
7316   Lisp_Object eol_type;
7317   int c;
7318   EMACS_INT stop, stop_composition, stop_charset;
7319   int *lookup_buf = NULL;
7320
7321   if (! NILP (translation_table))
7322     lookup_buf = alloca (sizeof (int) * max_lookup);
7323
7324   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
7325   if (VECTORP (eol_type))
7326     eol_type = Qunix;
7327
7328   /* Note: composition handling is not yet implemented.  */
7329   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7330
7331   if (NILP (coding->src_object))
7332     stop = stop_composition = stop_charset = end_pos;
7333   else
7334     {
7335       if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
7336         stop = stop_composition = pos;
7337       else
7338         stop = stop_composition = end_pos;
7339       if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
7340         stop = stop_charset = pos;
7341       else
7342         stop_charset = end_pos;
7343     }
7344
7345   /* Compensate for CRLF and conversion.  */
7346   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
7347   while (buf < buf_end)
7348     {
7349       Lisp_Object trans;
7350
7351       if (pos == stop)
7352         {
7353           if (pos == end_pos)
7354             break;
7355           if (pos == stop_composition)
7356             buf = handle_composition_annotation (pos, end_pos, coding,
7357                                                  buf, &stop_composition);
7358           if (pos == stop_charset)
7359             buf = handle_charset_annotation (pos, end_pos, coding,
7360                                              buf, &stop_charset);
7361           stop = (stop_composition < stop_charset
7362                   ? stop_composition : stop_charset);
7363         }
7364
7365       if (! multibytep)
7366         {
7367           EMACS_INT bytes;
7368
7369           if (coding->encoder == encode_coding_raw_text
7370               || coding->encoder == encode_coding_ccl)
7371             c = *src++, pos++;
7372           else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
7373             c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
7374           else
7375             c = BYTE8_TO_CHAR (*src), src++, pos++;
7376         }
7377       else
7378         c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
7379       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
7380         c = '\n';
7381       if (! EQ (eol_type, Qunix))
7382         {
7383           if (c == '\n')
7384             {
7385               if (EQ (eol_type, Qdos))
7386                 *buf++ = '\r';
7387               else
7388                 c = '\r';
7389             }
7390         }
7391
7392       trans = Qnil;
7393       LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
7394       if (NILP (trans))
7395         *buf++ = c;
7396       else
7397         {
7398           int from_nchars = 1, to_nchars = 1;
7399           int *lookup_buf_end;
7400           const unsigned char *p = src;
7401           int i;
7402
7403           lookup_buf[0] = c;
7404           for (i = 1; i < max_lookup && p < src_end; i++)
7405             lookup_buf[i] = STRING_CHAR_ADVANCE (p);
7406           lookup_buf_end = lookup_buf + i;
7407           trans = get_translation (trans, lookup_buf, lookup_buf_end);
7408           if (INTEGERP (trans))
7409             c = XINT (trans);
7410           else if (CONSP (trans))
7411             {
7412               from_nchars = ASIZE (XCAR (trans));
7413               trans = XCDR (trans);
7414               if (INTEGERP (trans))
7415                 c = XINT (trans);
7416               else
7417                 {
7418                   to_nchars = ASIZE (trans);
7419                   if (buf + to_nchars > buf_end)
7420                     break;
7421                   c = XINT (AREF (trans, 0));
7422                 }
7423             }
7424           else
7425             break;
7426           *buf++ = c;
7427           for (i = 1; i < to_nchars; i++)
7428             *buf++ = XINT (AREF (trans, i));
7429           for (i = 1; i < from_nchars; i++, pos++)
7430             src += MULTIBYTE_LENGTH_NO_CHECK (src);
7431         }
7432     }
7433
7434   coding->consumed = src - coding->source;
7435   coding->consumed_char = pos - coding->src_pos;
7436   coding->charbuf_used = buf - coding->charbuf;
7437   coding->chars_at_source = 0;
7438 }
7439
7440
7441 /* Encode the text at CODING->src_object into CODING->dst_object.
7442    CODING->src_object is a buffer or a string.
7443    CODING->dst_object is a buffer or nil.
7444
7445    If CODING->src_object is a buffer, it must be the current buffer.
7446    In this case, if CODING->src_pos is positive, it is a position of
7447    the source text in the buffer, otherwise. the source text is in the
7448    gap area of the buffer, and coding->src_pos specifies the offset of
7449    the text from GPT (which must be the same as PT).  If this is the
7450    same buffer as CODING->dst_object, CODING->src_pos must be
7451    negative and CODING should not have `pre-write-conversion'.
7452
7453    If CODING->src_object is a string, CODING should not have
7454    `pre-write-conversion'.
7455
7456    If CODING->dst_object is a buffer, the encoded data is inserted at
7457    the current point of that buffer.
7458
7459    If CODING->dst_object is nil, the encoded data is placed at the
7460    memory area specified by CODING->destination.  */
7461
7462 static int
7463 encode_coding (struct coding_system *coding)
7464 {
7465   Lisp_Object attrs;
7466   Lisp_Object translation_table;
7467   int max_lookup;
7468   struct ccl_spec cclspec;
7469
7470   attrs = CODING_ID_ATTRS (coding->id);
7471   if (coding->encoder == encode_coding_raw_text)
7472     translation_table = Qnil, max_lookup = 0;
7473   else
7474     translation_table = get_translation_table (attrs, 1, &max_lookup);
7475
7476   if (BUFFERP (coding->dst_object))
7477     {
7478       set_buffer_internal (XBUFFER (coding->dst_object));
7479       coding->dst_multibyte
7480         = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
7481     }
7482
7483   coding->consumed = coding->consumed_char = 0;
7484   coding->produced = coding->produced_char = 0;
7485   record_conversion_result (coding, CODING_RESULT_SUCCESS);
7486   coding->errors = 0;
7487
7488   ALLOC_CONVERSION_WORK_AREA (coding);
7489
7490   if (coding->encoder == encode_coding_ccl)
7491     {
7492       coding->spec.ccl = &cclspec;
7493       setup_ccl_program (&cclspec.ccl, CODING_CCL_ENCODER (coding));
7494     }
7495   do {
7496     coding_set_source (coding);
7497     consume_chars (coding, translation_table, max_lookup);
7498     coding_set_destination (coding);
7499     (*(coding->encoder)) (coding);
7500   } while (coding->consumed_char < coding->src_chars);
7501
7502   if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
7503     insert_from_gap (coding->produced_char, coding->produced);
7504
7505   return (coding->result);
7506 }
7507
7508
7509 /* Name (or base name) of work buffer for code conversion.  */
7510 static Lisp_Object Vcode_conversion_workbuf_name;
7511
7512 /* A working buffer used by the top level conversion.  Once it is
7513    created, it is never destroyed.  It has the name
7514    Vcode_conversion_workbuf_name.  The other working buffers are
7515    destroyed after the use is finished, and their names are modified
7516    versions of Vcode_conversion_workbuf_name.  */
7517 static Lisp_Object Vcode_conversion_reused_workbuf;
7518
7519 /* 1 iff Vcode_conversion_reused_workbuf is already in use.  */
7520 static int reused_workbuf_in_use;
7521
7522
7523 /* Return a working buffer of code conversion.  MULTIBYTE specifies the
7524    multibyteness of returning buffer.  */
7525
7526 static Lisp_Object
7527 make_conversion_work_buffer (int multibyte)
7528 {
7529   Lisp_Object name, workbuf;
7530   struct buffer *current;
7531
7532   if (reused_workbuf_in_use++)
7533     {
7534       name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
7535       workbuf = Fget_buffer_create (name);
7536     }
7537   else
7538     {
7539       if (NILP (Fbuffer_live_p (Vcode_conversion_reused_workbuf)))
7540         Vcode_conversion_reused_workbuf
7541           = Fget_buffer_create (Vcode_conversion_workbuf_name);
7542       workbuf = Vcode_conversion_reused_workbuf;
7543     }
7544   current = current_buffer;
7545   set_buffer_internal (XBUFFER (workbuf));
7546   /* We can't allow modification hooks to run in the work buffer.  For
7547      instance, directory_files_internal assumes that file decoding
7548      doesn't compile new regexps.  */
7549   Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt);
7550   Ferase_buffer ();
7551   BVAR (current_buffer, undo_list) = Qt;
7552   BVAR (current_buffer, enable_multibyte_characters) = multibyte ? Qt : Qnil;
7553   set_buffer_internal (current);
7554   return workbuf;
7555 }
7556
7557
7558 static Lisp_Object
7559 code_conversion_restore (Lisp_Object arg)
7560 {
7561   Lisp_Object current, workbuf;
7562   struct gcpro gcpro1;
7563
7564   GCPRO1 (arg);
7565   current = XCAR (arg);
7566   workbuf = XCDR (arg);
7567   if (! NILP (workbuf))
7568     {
7569       if (EQ (workbuf, Vcode_conversion_reused_workbuf))
7570         reused_workbuf_in_use = 0;
7571       else if (! NILP (Fbuffer_live_p (workbuf)))
7572         Fkill_buffer (workbuf);
7573     }
7574   set_buffer_internal (XBUFFER (current));
7575   UNGCPRO;
7576   return Qnil;
7577 }
7578
7579 Lisp_Object
7580 code_conversion_save (int with_work_buf, int multibyte)
7581 {
7582   Lisp_Object workbuf = Qnil;
7583
7584   if (with_work_buf)
7585     workbuf = make_conversion_work_buffer (multibyte);
7586   record_unwind_protect (code_conversion_restore,
7587                          Fcons (Fcurrent_buffer (), workbuf));
7588   return workbuf;
7589 }
7590
7591 int
7592 decode_coding_gap (struct coding_system *coding,
7593                    EMACS_INT chars, EMACS_INT bytes)
7594 {
7595   int count = SPECPDL_INDEX ();
7596   Lisp_Object attrs;
7597
7598   code_conversion_save (0, 0);
7599
7600   coding->src_object = Fcurrent_buffer ();
7601   coding->src_chars = chars;
7602   coding->src_bytes = bytes;
7603   coding->src_pos = -chars;
7604   coding->src_pos_byte = -bytes;
7605   coding->src_multibyte = chars < bytes;
7606   coding->dst_object = coding->src_object;
7607   coding->dst_pos = PT;
7608   coding->dst_pos_byte = PT_BYTE;
7609   coding->dst_multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
7610
7611   if (CODING_REQUIRE_DETECTION (coding))
7612     detect_coding (coding);
7613
7614   coding->mode |= CODING_MODE_LAST_BLOCK;
7615   current_buffer->text->inhibit_shrinking = 1;
7616   decode_coding (coding);
7617   current_buffer->text->inhibit_shrinking = 0;
7618
7619   attrs = CODING_ID_ATTRS (coding->id);
7620   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7621     {
7622       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7623       Lisp_Object val;
7624
7625       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7626       val = call1 (CODING_ATTR_POST_READ (attrs),
7627                    make_number (coding->produced_char));
7628       CHECK_NATNUM (val);
7629       coding->produced_char += Z - prev_Z;
7630       coding->produced += Z_BYTE - prev_Z_BYTE;
7631     }
7632
7633   unbind_to (count, Qnil);
7634   return coding->result;
7635 }
7636
7637
7638 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
7639    SRC_OBJECT into DST_OBJECT by coding context CODING.
7640
7641    SRC_OBJECT is a buffer, a string, or Qnil.
7642
7643    If it is a buffer, the text is at point of the buffer.  FROM and TO
7644    are positions in the buffer.
7645
7646    If it is a string, the text is at the beginning of the string.
7647    FROM and TO are indices to the string.
7648
7649    If it is nil, the text is at coding->source.  FROM and TO are
7650    indices to coding->source.
7651
7652    DST_OBJECT is a buffer, Qt, or Qnil.
7653
7654    If it is a buffer, the decoded text is inserted at point of the
7655    buffer.  If the buffer is the same as SRC_OBJECT, the source text
7656    is deleted.
7657
7658    If it is Qt, a string is made from the decoded text, and
7659    set in CODING->dst_object.
7660
7661    If it is Qnil, the decoded text is stored at CODING->destination.
7662    The caller must allocate CODING->dst_bytes bytes at
7663    CODING->destination by xmalloc.  If the decoded text is longer than
7664    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
7665  */
7666
7667 void
7668 decode_coding_object (struct coding_system *coding,
7669                       Lisp_Object src_object,
7670                       EMACS_INT from, EMACS_INT from_byte,
7671                       EMACS_INT to, EMACS_INT to_byte,
7672                       Lisp_Object dst_object)
7673 {
7674   int count = SPECPDL_INDEX ();
7675   unsigned char *destination IF_LINT (= NULL);
7676   EMACS_INT dst_bytes IF_LINT (= 0);
7677   EMACS_INT chars = to - from;
7678   EMACS_INT bytes = to_byte - from_byte;
7679   Lisp_Object attrs;
7680   int saved_pt = -1, saved_pt_byte IF_LINT (= 0);
7681   int need_marker_adjustment = 0;
7682   Lisp_Object old_deactivate_mark;
7683
7684   old_deactivate_mark = Vdeactivate_mark;
7685
7686   if (NILP (dst_object))
7687     {
7688       destination = coding->destination;
7689       dst_bytes = coding->dst_bytes;
7690     }
7691
7692   coding->src_object = src_object;
7693   coding->src_chars = chars;
7694   coding->src_bytes = bytes;
7695   coding->src_multibyte = chars < bytes;
7696
7697   if (STRINGP (src_object))
7698     {
7699       coding->src_pos = from;
7700       coding->src_pos_byte = from_byte;
7701     }
7702   else if (BUFFERP (src_object))
7703     {
7704       set_buffer_internal (XBUFFER (src_object));
7705       if (from != GPT)
7706         move_gap_both (from, from_byte);
7707       if (EQ (src_object, dst_object))
7708         {
7709           struct Lisp_Marker *tail;
7710
7711           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7712             {
7713               tail->need_adjustment
7714                 = tail->charpos == (tail->insertion_type ? from : to);
7715               need_marker_adjustment |= tail->need_adjustment;
7716             }
7717           saved_pt = PT, saved_pt_byte = PT_BYTE;
7718           TEMP_SET_PT_BOTH (from, from_byte);
7719           current_buffer->text->inhibit_shrinking = 1;
7720           del_range_both (from, from_byte, to, to_byte, 1);
7721           coding->src_pos = -chars;
7722           coding->src_pos_byte = -bytes;
7723         }
7724       else
7725         {
7726           coding->src_pos = from;
7727           coding->src_pos_byte = from_byte;
7728         }
7729     }
7730
7731   if (CODING_REQUIRE_DETECTION (coding))
7732     detect_coding (coding);
7733   attrs = CODING_ID_ATTRS (coding->id);
7734
7735   if (EQ (dst_object, Qt)
7736       || (! NILP (CODING_ATTR_POST_READ (attrs))
7737           && NILP (dst_object)))
7738     {
7739       coding->dst_multibyte = !CODING_FOR_UNIBYTE (coding);
7740       coding->dst_object = code_conversion_save (1, coding->dst_multibyte);
7741       coding->dst_pos = BEG;
7742       coding->dst_pos_byte = BEG_BYTE;
7743     }
7744   else if (BUFFERP (dst_object))
7745     {
7746       code_conversion_save (0, 0);
7747       coding->dst_object = dst_object;
7748       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7749       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7750       coding->dst_multibyte
7751         = ! NILP (BVAR (XBUFFER (dst_object), enable_multibyte_characters));
7752     }
7753   else
7754     {
7755       code_conversion_save (0, 0);
7756       coding->dst_object = Qnil;
7757       /* Most callers presume this will return a multibyte result, and they
7758          won't use `binary' or `raw-text' anyway, so let's not worry about
7759          CODING_FOR_UNIBYTE.  */
7760       coding->dst_multibyte = 1;
7761     }
7762
7763   decode_coding (coding);
7764
7765   if (BUFFERP (coding->dst_object))
7766     set_buffer_internal (XBUFFER (coding->dst_object));
7767
7768   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7769     {
7770       struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7771       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7772       Lisp_Object val;
7773
7774       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7775       GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7776               old_deactivate_mark);
7777       val = safe_call1 (CODING_ATTR_POST_READ (attrs),
7778                         make_number (coding->produced_char));
7779       UNGCPRO;
7780       CHECK_NATNUM (val);
7781       coding->produced_char += Z - prev_Z;
7782       coding->produced += Z_BYTE - prev_Z_BYTE;
7783     }
7784
7785   if (EQ (dst_object, Qt))
7786     {
7787       coding->dst_object = Fbuffer_string ();
7788     }
7789   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
7790     {
7791       set_buffer_internal (XBUFFER (coding->dst_object));
7792       if (dst_bytes < coding->produced)
7793         {
7794           destination = xrealloc (destination, coding->produced);
7795           if (! destination)
7796             {
7797               record_conversion_result (coding,
7798                                         CODING_RESULT_INSUFFICIENT_MEM);
7799               unbind_to (count, Qnil);
7800               return;
7801             }
7802           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
7803             move_gap_both (BEGV, BEGV_BYTE);
7804           memcpy (destination, BEGV_ADDR, coding->produced);
7805           coding->destination = destination;
7806         }
7807     }
7808
7809   if (saved_pt >= 0)
7810     {
7811       /* This is the case of:
7812          (BUFFERP (src_object) && EQ (src_object, dst_object))
7813          As we have moved PT while replacing the original buffer
7814          contents, we must recover it now.  */
7815       set_buffer_internal (XBUFFER (src_object));
7816       current_buffer->text->inhibit_shrinking = 0;
7817       if (saved_pt < from)
7818         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7819       else if (saved_pt < from + chars)
7820         TEMP_SET_PT_BOTH (from, from_byte);
7821       else if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
7822         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7823                           saved_pt_byte + (coding->produced - bytes));
7824       else
7825         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7826                           saved_pt_byte + (coding->produced - bytes));
7827
7828       if (need_marker_adjustment)
7829         {
7830           struct Lisp_Marker *tail;
7831
7832           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7833             if (tail->need_adjustment)
7834               {
7835                 tail->need_adjustment = 0;
7836                 if (tail->insertion_type)
7837                   {
7838                     tail->bytepos = from_byte;
7839                     tail->charpos = from;
7840                   }
7841                 else
7842                   {
7843                     tail->bytepos = from_byte + coding->produced;
7844                     tail->charpos
7845                       = (NILP (BVAR (current_buffer, enable_multibyte_characters))
7846                          ? tail->bytepos : from + coding->produced_char);
7847                   }
7848               }
7849         }
7850     }
7851
7852   Vdeactivate_mark = old_deactivate_mark;
7853   unbind_to (count, coding->dst_object);
7854 }
7855
7856
7857 void
7858 encode_coding_object (struct coding_system *coding,
7859                       Lisp_Object src_object,
7860                       EMACS_INT from, EMACS_INT from_byte,
7861                       EMACS_INT to, EMACS_INT to_byte,
7862                       Lisp_Object dst_object)
7863 {
7864   int count = SPECPDL_INDEX ();
7865   EMACS_INT chars = to - from;
7866   EMACS_INT bytes = to_byte - from_byte;
7867   Lisp_Object attrs;
7868   int saved_pt = -1, saved_pt_byte IF_LINT (= 0);
7869   int need_marker_adjustment = 0;
7870   int kill_src_buffer = 0;
7871   Lisp_Object old_deactivate_mark;
7872
7873   old_deactivate_mark = Vdeactivate_mark;
7874
7875   coding->src_object = src_object;
7876   coding->src_chars = chars;
7877   coding->src_bytes = bytes;
7878   coding->src_multibyte = chars < bytes;
7879
7880   attrs = CODING_ID_ATTRS (coding->id);
7881
7882   if (EQ (src_object, dst_object))
7883     {
7884       struct Lisp_Marker *tail;
7885
7886       for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7887         {
7888           tail->need_adjustment
7889             = tail->charpos == (tail->insertion_type ? from : to);
7890           need_marker_adjustment |= tail->need_adjustment;
7891         }
7892     }
7893
7894   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
7895     {
7896       coding->src_object = code_conversion_save (1, coding->src_multibyte);
7897       set_buffer_internal (XBUFFER (coding->src_object));
7898       if (STRINGP (src_object))
7899         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
7900       else if (BUFFERP (src_object))
7901         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
7902       else
7903         insert_1_both ((char *) coding->source + from, chars, bytes, 0, 0, 0);
7904
7905       if (EQ (src_object, dst_object))
7906         {
7907           set_buffer_internal (XBUFFER (src_object));
7908           saved_pt = PT, saved_pt_byte = PT_BYTE;
7909           del_range_both (from, from_byte, to, to_byte, 1);
7910           set_buffer_internal (XBUFFER (coding->src_object));
7911         }
7912
7913       {
7914         Lisp_Object args[3];
7915         struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7916
7917         GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7918                 old_deactivate_mark);
7919         args[0] = CODING_ATTR_PRE_WRITE (attrs);
7920         args[1] = make_number (BEG);
7921         args[2] = make_number (Z);
7922         safe_call (3, args);
7923         UNGCPRO;
7924       }
7925       if (XBUFFER (coding->src_object) != current_buffer)
7926         kill_src_buffer = 1;
7927       coding->src_object = Fcurrent_buffer ();
7928       if (BEG != GPT)
7929         move_gap_both (BEG, BEG_BYTE);
7930       coding->src_chars = Z - BEG;
7931       coding->src_bytes = Z_BYTE - BEG_BYTE;
7932       coding->src_pos = BEG;
7933       coding->src_pos_byte = BEG_BYTE;
7934       coding->src_multibyte = Z < Z_BYTE;
7935     }
7936   else if (STRINGP (src_object))
7937     {
7938       code_conversion_save (0, 0);
7939       coding->src_pos = from;
7940       coding->src_pos_byte = from_byte;
7941     }
7942   else if (BUFFERP (src_object))
7943     {
7944       code_conversion_save (0, 0);
7945       set_buffer_internal (XBUFFER (src_object));
7946       if (EQ (src_object, dst_object))
7947         {
7948           saved_pt = PT, saved_pt_byte = PT_BYTE;
7949           coding->src_object = del_range_1 (from, to, 1, 1);
7950           coding->src_pos = 0;
7951           coding->src_pos_byte = 0;
7952         }
7953       else
7954         {
7955           if (from < GPT && to >= GPT)
7956             move_gap_both (from, from_byte);
7957           coding->src_pos = from;
7958           coding->src_pos_byte = from_byte;
7959         }
7960     }
7961   else
7962     code_conversion_save (0, 0);
7963
7964   if (BUFFERP (dst_object))
7965     {
7966       coding->dst_object = dst_object;
7967       if (EQ (src_object, dst_object))
7968         {
7969           coding->dst_pos = from;
7970           coding->dst_pos_byte = from_byte;
7971         }
7972       else
7973         {
7974           struct buffer *current = current_buffer;
7975
7976           set_buffer_temp (XBUFFER (dst_object));
7977           coding->dst_pos = PT;
7978           coding->dst_pos_byte = PT_BYTE;
7979           move_gap_both (coding->dst_pos, coding->dst_pos_byte);
7980           set_buffer_temp (current);
7981         }
7982       coding->dst_multibyte
7983         = ! NILP (BVAR (XBUFFER (dst_object), enable_multibyte_characters));
7984     }
7985   else if (EQ (dst_object, Qt))
7986     {
7987       ptrdiff_t dst_bytes = max (1, coding->src_chars);
7988       coding->dst_object = Qnil;
7989       coding->destination = (unsigned char *) xmalloc (dst_bytes);
7990       coding->dst_bytes = dst_bytes;
7991       coding->dst_multibyte = 0;
7992     }
7993   else
7994     {
7995       coding->dst_object = Qnil;
7996       coding->dst_multibyte = 0;
7997     }
7998
7999   encode_coding (coding);
8000
8001   if (EQ (dst_object, Qt))
8002     {
8003       if (BUFFERP (coding->dst_object))
8004         coding->dst_object = Fbuffer_string ();
8005       else
8006         {
8007           coding->dst_object
8008             = make_unibyte_string ((char *) coding->destination,
8009                                    coding->produced);
8010           xfree (coding->destination);
8011         }
8012     }
8013
8014   if (saved_pt >= 0)
8015     {
8016       /* This is the case of:
8017          (BUFFERP (src_object) && EQ (src_object, dst_object))
8018          As we have moved PT while replacing the original buffer
8019          contents, we must recover it now.  */
8020       set_buffer_internal (XBUFFER (src_object));
8021       if (saved_pt < from)
8022         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
8023       else if (saved_pt < from + chars)
8024         TEMP_SET_PT_BOTH (from, from_byte);
8025       else if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
8026         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
8027                           saved_pt_byte + (coding->produced - bytes));
8028       else
8029         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
8030                           saved_pt_byte + (coding->produced - bytes));
8031
8032       if (need_marker_adjustment)
8033         {
8034           struct Lisp_Marker *tail;
8035
8036           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
8037             if (tail->need_adjustment)
8038               {
8039                 tail->need_adjustment = 0;
8040                 if (tail->insertion_type)
8041                   {
8042                     tail->bytepos = from_byte;
8043                     tail->charpos = from;
8044                   }
8045                 else
8046                   {
8047                     tail->bytepos = from_byte + coding->produced;
8048                     tail->charpos
8049                       = (NILP (BVAR (current_buffer, enable_multibyte_characters))
8050                          ? tail->bytepos : from + coding->produced_char);
8051                   }
8052               }
8053         }
8054     }
8055
8056   if (kill_src_buffer)
8057     Fkill_buffer (coding->src_object);
8058
8059   Vdeactivate_mark = old_deactivate_mark;
8060   unbind_to (count, Qnil);
8061 }
8062
8063
8064 Lisp_Object
8065 preferred_coding_system (void)
8066 {
8067   int id = coding_categories[coding_priorities[0]].id;
8068
8069   return CODING_ID_NAME (id);
8070 }
8071
8072 \f
8073 #ifdef emacs
8074 /*** 8. Emacs Lisp library functions ***/
8075
8076 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
8077        doc: /* Return t if OBJECT is nil or a coding-system.
8078 See the documentation of `define-coding-system' for information
8079 about coding-system objects.  */)
8080   (Lisp_Object object)
8081 {
8082   if (NILP (object)
8083       || CODING_SYSTEM_ID (object) >= 0)
8084     return Qt;
8085   if (! SYMBOLP (object)
8086       || NILP (Fget (object, Qcoding_system_define_form)))
8087     return Qnil;
8088   return Qt;
8089 }
8090
8091 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
8092        Sread_non_nil_coding_system, 1, 1, 0,
8093        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
8094   (Lisp_Object prompt)
8095 {
8096   Lisp_Object val;
8097   do
8098     {
8099       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8100                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
8101     }
8102   while (SCHARS (val) == 0);
8103   return (Fintern (val, Qnil));
8104 }
8105
8106 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
8107        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
8108 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.
8109 Ignores case when completing coding systems (all Emacs coding systems
8110 are lower-case).  */)
8111   (Lisp_Object prompt, Lisp_Object default_coding_system)
8112 {
8113   Lisp_Object val;
8114   int count = SPECPDL_INDEX ();
8115
8116   if (SYMBOLP (default_coding_system))
8117     default_coding_system = SYMBOL_NAME (default_coding_system);
8118   specbind (Qcompletion_ignore_case, Qt);
8119   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8120                           Qt, Qnil, Qcoding_system_history,
8121                           default_coding_system, Qnil);
8122   unbind_to (count, Qnil);
8123   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
8124 }
8125
8126 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
8127        1, 1, 0,
8128        doc: /* Check validity of CODING-SYSTEM.
8129 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
8130 It is valid if it is nil or a symbol defined as a coding system by the
8131 function `define-coding-system'.  */)
8132   (Lisp_Object coding_system)
8133 {
8134   Lisp_Object define_form;
8135
8136   define_form = Fget (coding_system, Qcoding_system_define_form);
8137   if (! NILP (define_form))
8138     {
8139       Fput (coding_system, Qcoding_system_define_form, Qnil);
8140       safe_eval (define_form);
8141     }
8142   if (!NILP (Fcoding_system_p (coding_system)))
8143     return coding_system;
8144   xsignal1 (Qcoding_system_error, coding_system);
8145 }
8146
8147 \f
8148 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
8149    HIGHEST is nonzero, return the coding system of the highest
8150    priority among the detected coding systems.  Otherwise return a
8151    list of detected coding systems sorted by their priorities.  If
8152    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
8153    multibyte form but contains only ASCII and eight-bit chars.
8154    Otherwise, the bytes are raw bytes.
8155
8156    CODING-SYSTEM controls the detection as below:
8157
8158    If it is nil, detect both text-format and eol-format.  If the
8159    text-format part of CODING-SYSTEM is already specified
8160    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
8161    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
8162    detect only text-format.  */
8163
8164 Lisp_Object
8165 detect_coding_system (const unsigned char *src,
8166                       EMACS_INT src_chars, EMACS_INT src_bytes,
8167                       int highest, int multibytep,
8168                       Lisp_Object coding_system)
8169 {
8170   const unsigned char *src_end = src + src_bytes;
8171   Lisp_Object attrs, eol_type;
8172   Lisp_Object val = Qnil;
8173   struct coding_system coding;
8174   ptrdiff_t id;
8175   struct coding_detection_info detect_info;
8176   enum coding_category base_category;
8177   int null_byte_found = 0, eight_bit_found = 0;
8178
8179   if (NILP (coding_system))
8180     coding_system = Qundecided;
8181   setup_coding_system (coding_system, &coding);
8182   attrs = CODING_ID_ATTRS (coding.id);
8183   eol_type = CODING_ID_EOL_TYPE (coding.id);
8184   coding_system = CODING_ATTR_BASE_NAME (attrs);
8185
8186   coding.source = src;
8187   coding.src_chars = src_chars;
8188   coding.src_bytes = src_bytes;
8189   coding.src_multibyte = multibytep;
8190   coding.consumed = 0;
8191   coding.mode |= CODING_MODE_LAST_BLOCK;
8192   coding.head_ascii = 0;
8193
8194   detect_info.checked = detect_info.found = detect_info.rejected = 0;
8195
8196   /* At first, detect text-format if necessary.  */
8197   base_category = XINT (CODING_ATTR_CATEGORY (attrs));
8198   if (base_category == coding_category_undecided)
8199     {
8200       enum coding_category category IF_LINT (= 0);
8201       struct coding_system *this IF_LINT (= NULL);
8202       int c, i;
8203
8204       /* Skip all ASCII bytes except for a few ISO2022 controls.  */
8205       for (; src < src_end; src++)
8206         {
8207           c = *src;
8208           if (c & 0x80)
8209             {
8210               eight_bit_found = 1;
8211               if (null_byte_found)
8212                 break;
8213             }
8214           else if (c < 0x20)
8215             {
8216               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
8217                   && ! inhibit_iso_escape_detection
8218                   && ! detect_info.checked)
8219                 {
8220                   if (detect_coding_iso_2022 (&coding, &detect_info))
8221                     {
8222                       /* We have scanned the whole data.  */
8223                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
8224                         {
8225                           /* We didn't find an 8-bit code.  We may
8226                              have found a null-byte, but it's very
8227                              rare that a binary file confirm to
8228                              ISO-2022.  */
8229                           src = src_end;
8230                           coding.head_ascii = src - coding.source;
8231                         }
8232                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
8233                       break;
8234                     }
8235                 }
8236               else if (! c && !inhibit_null_byte_detection)
8237                 {
8238                   null_byte_found = 1;
8239                   if (eight_bit_found)
8240                     break;
8241                 }
8242               if (! eight_bit_found)
8243                 coding.head_ascii++;
8244             }
8245           else if (! eight_bit_found)
8246             coding.head_ascii++;
8247         }
8248
8249       if (null_byte_found || eight_bit_found
8250           || coding.head_ascii < coding.src_bytes
8251           || detect_info.found)
8252         {
8253           if (coding.head_ascii == coding.src_bytes)
8254             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
8255             for (i = 0; i < coding_category_raw_text; i++)
8256               {
8257                 category = coding_priorities[i];
8258                 this = coding_categories + category;
8259                 if (detect_info.found & (1 << category))
8260                   break;
8261               }
8262           else
8263             {
8264               if (null_byte_found)
8265                 {
8266                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
8267                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
8268                 }
8269               for (i = 0; i < coding_category_raw_text; i++)
8270                 {
8271                   category = coding_priorities[i];
8272                   this = coding_categories + category;
8273
8274                   if (this->id < 0)
8275                     {
8276                       /* No coding system of this category is defined.  */
8277                       detect_info.rejected |= (1 << category);
8278                     }
8279                   else if (category >= coding_category_raw_text)
8280                     continue;
8281                   else if (detect_info.checked & (1 << category))
8282                     {
8283                       if (highest
8284                           && (detect_info.found & (1 << category)))
8285                         break;
8286                     }
8287                   else if ((*(this->detector)) (&coding, &detect_info)
8288                            && highest
8289                            && (detect_info.found & (1 << category)))
8290                     {
8291                       if (category == coding_category_utf_16_auto)
8292                         {
8293                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8294                             category = coding_category_utf_16_le;
8295                           else
8296                             category = coding_category_utf_16_be;
8297                         }
8298                       break;
8299                     }
8300                 }
8301             }
8302         }
8303
8304       if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY
8305           || null_byte_found)
8306         {
8307           detect_info.found = CATEGORY_MASK_RAW_TEXT;
8308           id = CODING_SYSTEM_ID (Qno_conversion);
8309           val = Fcons (make_number (id), Qnil);
8310         }
8311       else if (! detect_info.rejected && ! detect_info.found)
8312         {
8313           detect_info.found = CATEGORY_MASK_ANY;
8314           id = coding_categories[coding_category_undecided].id;
8315           val = Fcons (make_number (id), Qnil);
8316         }
8317       else if (highest)
8318         {
8319           if (detect_info.found)
8320             {
8321               detect_info.found = 1 << category;
8322               val = Fcons (make_number (this->id), Qnil);
8323             }
8324           else
8325             for (i = 0; i < coding_category_raw_text; i++)
8326               if (! (detect_info.rejected & (1 << coding_priorities[i])))
8327                 {
8328                   detect_info.found = 1 << coding_priorities[i];
8329                   id = coding_categories[coding_priorities[i]].id;
8330                   val = Fcons (make_number (id), Qnil);
8331                   break;
8332                 }
8333         }
8334       else
8335         {
8336           int mask = detect_info.rejected | detect_info.found;
8337           int found = 0;
8338
8339           for (i = coding_category_raw_text - 1; i >= 0; i--)
8340             {
8341               category = coding_priorities[i];
8342               if (! (mask & (1 << category)))
8343                 {
8344                   found |= 1 << category;
8345                   id = coding_categories[category].id;
8346                   if (id >= 0)
8347                     val = Fcons (make_number (id), val);
8348                 }
8349             }
8350           for (i = coding_category_raw_text - 1; i >= 0; i--)
8351             {
8352               category = coding_priorities[i];
8353               if (detect_info.found & (1 << category))
8354                 {
8355                   id = coding_categories[category].id;
8356                   val = Fcons (make_number (id), val);
8357                 }
8358             }
8359           detect_info.found |= found;
8360         }
8361     }
8362   else if (base_category == coding_category_utf_8_auto)
8363     {
8364       if (detect_coding_utf_8 (&coding, &detect_info))
8365         {
8366           struct coding_system *this;
8367
8368           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
8369             this = coding_categories + coding_category_utf_8_sig;
8370           else
8371             this = coding_categories + coding_category_utf_8_nosig;
8372           val = Fcons (make_number (this->id), Qnil);
8373         }
8374     }
8375   else if (base_category == coding_category_utf_16_auto)
8376     {
8377       if (detect_coding_utf_16 (&coding, &detect_info))
8378         {
8379           struct coding_system *this;
8380
8381           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8382             this = coding_categories + coding_category_utf_16_le;
8383           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
8384             this = coding_categories + coding_category_utf_16_be;
8385           else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
8386             this = coding_categories + coding_category_utf_16_be_nosig;
8387           else
8388             this = coding_categories + coding_category_utf_16_le_nosig;
8389           val = Fcons (make_number (this->id), Qnil);
8390         }
8391     }
8392   else
8393     {
8394       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
8395       val = Fcons (make_number (coding.id), Qnil);
8396     }
8397
8398   /* Then, detect eol-format if necessary.  */
8399   {
8400     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol = -1;
8401     Lisp_Object tail;
8402
8403     if (VECTORP (eol_type))
8404       {
8405         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
8406           {
8407             if (null_byte_found)
8408               normal_eol = EOL_SEEN_LF;
8409             else
8410               normal_eol = detect_eol (coding.source, src_bytes,
8411                                        coding_category_raw_text);
8412           }
8413         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
8414                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
8415           utf_16_be_eol = detect_eol (coding.source, src_bytes,
8416                                       coding_category_utf_16_be);
8417         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
8418                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
8419           utf_16_le_eol = detect_eol (coding.source, src_bytes,
8420                                       coding_category_utf_16_le);
8421       }
8422     else
8423       {
8424         if (EQ (eol_type, Qunix))
8425           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
8426         else if (EQ (eol_type, Qdos))
8427           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
8428         else
8429           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
8430       }
8431
8432     for (tail = val; CONSP (tail); tail = XCDR (tail))
8433       {
8434         enum coding_category category;
8435         int this_eol;
8436
8437         id = XINT (XCAR (tail));
8438         attrs = CODING_ID_ATTRS (id);
8439         category = XINT (CODING_ATTR_CATEGORY (attrs));
8440         eol_type = CODING_ID_EOL_TYPE (id);
8441         if (VECTORP (eol_type))
8442           {
8443             if (category == coding_category_utf_16_be
8444                 || category == coding_category_utf_16_be_nosig)
8445               this_eol = utf_16_be_eol;
8446             else if (category == coding_category_utf_16_le
8447                      || category == coding_category_utf_16_le_nosig)
8448               this_eol = utf_16_le_eol;
8449             else
8450               this_eol = normal_eol;
8451
8452             if (this_eol == EOL_SEEN_LF)
8453               XSETCAR (tail, AREF (eol_type, 0));
8454             else if (this_eol == EOL_SEEN_CRLF)
8455               XSETCAR (tail, AREF (eol_type, 1));
8456             else if (this_eol == EOL_SEEN_CR)
8457               XSETCAR (tail, AREF (eol_type, 2));
8458             else
8459               XSETCAR (tail, CODING_ID_NAME (id));
8460           }
8461         else
8462           XSETCAR (tail, CODING_ID_NAME (id));
8463       }
8464   }
8465
8466   return (highest ? (CONSP (val) ? XCAR (val) : Qnil) : val);
8467 }
8468
8469
8470 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
8471        2, 3, 0,
8472        doc: /* Detect coding system of the text in the region between START and END.
8473 Return a list of possible coding systems ordered by priority.
8474 The coding systems to try and their priorities follows what
8475 the function `coding-system-priority-list' (which see) returns.
8476
8477 If only ASCII characters are found (except for such ISO-2022 control
8478 characters as ESC), it returns a list of single element `undecided'
8479 or its subsidiary coding system according to a detected end-of-line
8480 format.
8481
8482 If optional argument HIGHEST is non-nil, return the coding system of
8483 highest priority.  */)
8484   (Lisp_Object start, Lisp_Object end, Lisp_Object highest)
8485 {
8486   int from, to;
8487   int from_byte, to_byte;
8488
8489   CHECK_NUMBER_COERCE_MARKER (start);
8490   CHECK_NUMBER_COERCE_MARKER (end);
8491
8492   validate_region (&start, &end);
8493   from = XINT (start), to = XINT (end);
8494   from_byte = CHAR_TO_BYTE (from);
8495   to_byte = CHAR_TO_BYTE (to);
8496
8497   if (from < GPT && to >= GPT)
8498     move_gap_both (to, to_byte);
8499
8500   return detect_coding_system (BYTE_POS_ADDR (from_byte),
8501                                to - from, to_byte - from_byte,
8502                                !NILP (highest),
8503                                !NILP (BVAR (current_buffer
8504                                       , enable_multibyte_characters)),
8505                                Qnil);
8506 }
8507
8508 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
8509        1, 2, 0,
8510        doc: /* Detect coding system of the text in STRING.
8511 Return a list of possible coding systems ordered by priority.
8512 The coding systems to try and their priorities follows what
8513 the function `coding-system-priority-list' (which see) returns.
8514
8515 If only ASCII characters are found (except for such ISO-2022 control
8516 characters as ESC), it returns a list of single element `undecided'
8517 or its subsidiary coding system according to a detected end-of-line
8518 format.
8519
8520 If optional argument HIGHEST is non-nil, return the coding system of
8521 highest priority.  */)
8522   (Lisp_Object string, Lisp_Object highest)
8523 {
8524   CHECK_STRING (string);
8525
8526   return detect_coding_system (SDATA (string),
8527                                SCHARS (string), SBYTES (string),
8528                                !NILP (highest), STRING_MULTIBYTE (string),
8529                                Qnil);
8530 }
8531
8532
8533 static inline int
8534 char_encodable_p (int c, Lisp_Object attrs)
8535 {
8536   Lisp_Object tail;
8537   struct charset *charset;
8538   Lisp_Object translation_table;
8539
8540   translation_table = CODING_ATTR_TRANS_TBL (attrs);
8541   if (! NILP (translation_table))
8542     c = translate_char (translation_table, c);
8543   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
8544        CONSP (tail); tail = XCDR (tail))
8545     {
8546       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
8547       if (CHAR_CHARSET_P (c, charset))
8548         break;
8549     }
8550   return (! NILP (tail));
8551 }
8552
8553
8554 /* Return a list of coding systems that safely encode the text between
8555    START and END.  If EXCLUDE is non-nil, it is a list of coding
8556    systems not to check.  The returned list doesn't contain any such
8557    coding systems.  In any case, if the text contains only ASCII or is
8558    unibyte, return t.  */
8559
8560 DEFUN ("find-coding-systems-region-internal",
8561        Ffind_coding_systems_region_internal,
8562        Sfind_coding_systems_region_internal, 2, 3, 0,
8563        doc: /* Internal use only.  */)
8564   (Lisp_Object start, Lisp_Object end, Lisp_Object exclude)
8565 {
8566   Lisp_Object coding_attrs_list, safe_codings;
8567   EMACS_INT start_byte, end_byte;
8568   const unsigned char *p, *pbeg, *pend;
8569   int c;
8570   Lisp_Object tail, elt, work_table;
8571
8572   if (STRINGP (start))
8573     {
8574       if (!STRING_MULTIBYTE (start)
8575           || SCHARS (start) == SBYTES (start))
8576         return Qt;
8577       start_byte = 0;
8578       end_byte = SBYTES (start);
8579     }
8580   else
8581     {
8582       CHECK_NUMBER_COERCE_MARKER (start);
8583       CHECK_NUMBER_COERCE_MARKER (end);
8584       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8585         args_out_of_range (start, end);
8586       if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
8587         return Qt;
8588       start_byte = CHAR_TO_BYTE (XINT (start));
8589       end_byte = CHAR_TO_BYTE (XINT (end));
8590       if (XINT (end) - XINT (start) == end_byte - start_byte)
8591         return Qt;
8592
8593       if (XINT (start) < GPT && XINT (end) > GPT)
8594         {
8595           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8596             move_gap_both (XINT (start), start_byte);
8597           else
8598             move_gap_both (XINT (end), end_byte);
8599         }
8600     }
8601
8602   coding_attrs_list = Qnil;
8603   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
8604     if (NILP (exclude)
8605         || NILP (Fmemq (XCAR (tail), exclude)))
8606       {
8607         Lisp_Object attrs;
8608
8609         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
8610         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
8611             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
8612           {
8613             ASET (attrs, coding_attr_trans_tbl,
8614                   get_translation_table (attrs, 1, NULL));
8615             coding_attrs_list = Fcons (attrs, coding_attrs_list);
8616           }
8617       }
8618
8619   if (STRINGP (start))
8620     p = pbeg = SDATA (start);
8621   else
8622     p = pbeg = BYTE_POS_ADDR (start_byte);
8623   pend = p + (end_byte - start_byte);
8624
8625   while (p < pend && ASCII_BYTE_P (*p)) p++;
8626   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8627
8628   work_table = Fmake_char_table (Qnil, Qnil);
8629   while (p < pend)
8630     {
8631       if (ASCII_BYTE_P (*p))
8632         p++;
8633       else
8634         {
8635           c = STRING_CHAR_ADVANCE (p);
8636           if (!NILP (char_table_ref (work_table, c)))
8637             /* This character was already checked.  Ignore it.  */
8638             continue;
8639
8640           charset_map_loaded = 0;
8641           for (tail = coding_attrs_list; CONSP (tail);)
8642             {
8643               elt = XCAR (tail);
8644               if (NILP (elt))
8645                 tail = XCDR (tail);
8646               else if (char_encodable_p (c, elt))
8647                 tail = XCDR (tail);
8648               else if (CONSP (XCDR (tail)))
8649                 {
8650                   XSETCAR (tail, XCAR (XCDR (tail)));
8651                   XSETCDR (tail, XCDR (XCDR (tail)));
8652                 }
8653               else
8654                 {
8655                   XSETCAR (tail, Qnil);
8656                   tail = XCDR (tail);
8657                 }
8658             }
8659           if (charset_map_loaded)
8660             {
8661               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8662
8663               if (STRINGP (start))
8664                 pbeg = SDATA (start);
8665               else
8666                 pbeg = BYTE_POS_ADDR (start_byte);
8667               p = pbeg + p_offset;
8668               pend = pbeg + pend_offset;
8669             }
8670           char_table_set (work_table, c, Qt);
8671         }
8672     }
8673
8674   safe_codings = list2 (Qraw_text, Qno_conversion);
8675   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
8676     if (! NILP (XCAR (tail)))
8677       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
8678
8679   return safe_codings;
8680 }
8681
8682
8683 DEFUN ("unencodable-char-position", Funencodable_char_position,
8684        Sunencodable_char_position, 3, 5, 0,
8685        doc: /*
8686 Return position of first un-encodable character in a region.
8687 START and END specify the region and CODING-SYSTEM specifies the
8688 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
8689
8690 If optional 4th argument COUNT is non-nil, it specifies at most how
8691 many un-encodable characters to search.  In this case, the value is a
8692 list of positions.
8693
8694 If optional 5th argument STRING is non-nil, it is a string to search
8695 for un-encodable characters.  In that case, START and END are indexes
8696 to the string.  */)
8697   (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object count, Lisp_Object string)
8698 {
8699   int n;
8700   struct coding_system coding;
8701   Lisp_Object attrs, charset_list, translation_table;
8702   Lisp_Object positions;
8703   int from, to;
8704   const unsigned char *p, *stop, *pend;
8705   int ascii_compatible;
8706
8707   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
8708   attrs = CODING_ID_ATTRS (coding.id);
8709   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
8710     return Qnil;
8711   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
8712   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8713   translation_table = get_translation_table (attrs, 1, NULL);
8714
8715   if (NILP (string))
8716     {
8717       validate_region (&start, &end);
8718       from = XINT (start);
8719       to = XINT (end);
8720       if (NILP (BVAR (current_buffer, enable_multibyte_characters))
8721           || (ascii_compatible
8722               && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
8723         return Qnil;
8724       p = CHAR_POS_ADDR (from);
8725       pend = CHAR_POS_ADDR (to);
8726       if (from < GPT && to >= GPT)
8727         stop = GPT_ADDR;
8728       else
8729         stop = pend;
8730     }
8731   else
8732     {
8733       CHECK_STRING (string);
8734       CHECK_NATNUM (start);
8735       CHECK_NATNUM (end);
8736       from = XINT (start);
8737       to = XINT (end);
8738       if (from > to
8739           || to > SCHARS (string))
8740         args_out_of_range_3 (string, start, end);
8741       if (! STRING_MULTIBYTE (string))
8742         return Qnil;
8743       p = SDATA (string) + string_char_to_byte (string, from);
8744       stop = pend = SDATA (string) + string_char_to_byte (string, to);
8745       if (ascii_compatible && (to - from) == (pend - p))
8746         return Qnil;
8747     }
8748
8749   if (NILP (count))
8750     n = 1;
8751   else
8752     {
8753       CHECK_NATNUM (count);
8754       n = XINT (count);
8755     }
8756
8757   positions = Qnil;
8758   while (1)
8759     {
8760       int c;
8761
8762       if (ascii_compatible)
8763         while (p < stop && ASCII_BYTE_P (*p))
8764           p++, from++;
8765       if (p >= stop)
8766         {
8767           if (p >= pend)
8768             break;
8769           stop = pend;
8770           p = GAP_END_ADDR;
8771         }
8772
8773       c = STRING_CHAR_ADVANCE (p);
8774       if (! (ASCII_CHAR_P (c) && ascii_compatible)
8775           && ! char_charset (translate_char (translation_table, c),
8776                              charset_list, NULL))
8777         {
8778           positions = Fcons (make_number (from), positions);
8779           n--;
8780           if (n == 0)
8781             break;
8782         }
8783
8784       from++;
8785     }
8786
8787   return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
8788 }
8789
8790
8791 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
8792        Scheck_coding_systems_region, 3, 3, 0,
8793        doc: /* Check if the region is encodable by coding systems.
8794
8795 START and END are buffer positions specifying the region.
8796 CODING-SYSTEM-LIST is a list of coding systems to check.
8797
8798 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
8799 CODING-SYSTEM is a member of CODING-SYSTEM-LIST and can't encode the
8800 whole region, POS0, POS1, ... are buffer positions where non-encodable
8801 characters are found.
8802
8803 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
8804 value is nil.
8805
8806 START may be a string.  In that case, check if the string is
8807 encodable, and the value contains indices to the string instead of
8808 buffer positions.  END is ignored.
8809
8810 If the current buffer (or START if it is a string) is unibyte, the value
8811 is nil.  */)
8812   (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system_list)
8813 {
8814   Lisp_Object list;
8815   EMACS_INT start_byte, end_byte;
8816   int pos;
8817   const unsigned char *p, *pbeg, *pend;
8818   int c;
8819   Lisp_Object tail, elt, attrs;
8820
8821   if (STRINGP (start))
8822     {
8823       if (!STRING_MULTIBYTE (start)
8824           || SCHARS (start) == SBYTES (start))
8825         return Qnil;
8826       start_byte = 0;
8827       end_byte = SBYTES (start);
8828       pos = 0;
8829     }
8830   else
8831     {
8832       CHECK_NUMBER_COERCE_MARKER (start);
8833       CHECK_NUMBER_COERCE_MARKER (end);
8834       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8835         args_out_of_range (start, end);
8836       if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
8837         return Qnil;
8838       start_byte = CHAR_TO_BYTE (XINT (start));
8839       end_byte = CHAR_TO_BYTE (XINT (end));
8840       if (XINT (end) - XINT (start) == end_byte - start_byte)
8841         return Qnil;
8842
8843       if (XINT (start) < GPT && XINT (end) > GPT)
8844         {
8845           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8846             move_gap_both (XINT (start), start_byte);
8847           else
8848             move_gap_both (XINT (end), end_byte);
8849         }
8850       pos = XINT (start);
8851     }
8852
8853   list = Qnil;
8854   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
8855     {
8856       elt = XCAR (tail);
8857       attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
8858       ASET (attrs, coding_attr_trans_tbl,
8859             get_translation_table (attrs, 1, NULL));
8860       list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
8861     }
8862
8863   if (STRINGP (start))
8864     p = pbeg = SDATA (start);
8865   else
8866     p = pbeg = BYTE_POS_ADDR (start_byte);
8867   pend = p + (end_byte - start_byte);
8868
8869   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
8870   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8871
8872   while (p < pend)
8873     {
8874       if (ASCII_BYTE_P (*p))
8875         p++;
8876       else
8877         {
8878           c = STRING_CHAR_ADVANCE (p);
8879
8880           charset_map_loaded = 0;
8881           for (tail = list; CONSP (tail); tail = XCDR (tail))
8882             {
8883               elt = XCDR (XCAR (tail));
8884               if (! char_encodable_p (c, XCAR (elt)))
8885                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
8886             }
8887           if (charset_map_loaded)
8888             {
8889               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8890
8891               if (STRINGP (start))
8892                 pbeg = SDATA (start);
8893               else
8894                 pbeg = BYTE_POS_ADDR (start_byte);
8895               p = pbeg + p_offset;
8896               pend = pbeg + pend_offset;
8897             }
8898         }
8899       pos++;
8900     }
8901
8902   tail = list;
8903   list = Qnil;
8904   for (; CONSP (tail); tail = XCDR (tail))
8905     {
8906       elt = XCAR (tail);
8907       if (CONSP (XCDR (XCDR (elt))))
8908         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
8909                       list);
8910     }
8911
8912   return list;
8913 }
8914
8915
8916 static Lisp_Object
8917 code_convert_region (Lisp_Object start, Lisp_Object end,
8918                      Lisp_Object coding_system, Lisp_Object dst_object,
8919                      int encodep, int norecord)
8920 {
8921   struct coding_system coding;
8922   EMACS_INT from, from_byte, to, to_byte;
8923   Lisp_Object src_object;
8924
8925   CHECK_NUMBER_COERCE_MARKER (start);
8926   CHECK_NUMBER_COERCE_MARKER (end);
8927   if (NILP (coding_system))
8928     coding_system = Qno_conversion;
8929   else
8930     CHECK_CODING_SYSTEM (coding_system);
8931   src_object = Fcurrent_buffer ();
8932   if (NILP (dst_object))
8933     dst_object = src_object;
8934   else if (! EQ (dst_object, Qt))
8935     CHECK_BUFFER (dst_object);
8936
8937   validate_region (&start, &end);
8938   from = XFASTINT (start);
8939   from_byte = CHAR_TO_BYTE (from);
8940   to = XFASTINT (end);
8941   to_byte = CHAR_TO_BYTE (to);
8942
8943   setup_coding_system (coding_system, &coding);
8944   coding.mode |= CODING_MODE_LAST_BLOCK;
8945
8946   if (encodep)
8947     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8948                           dst_object);
8949   else
8950     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8951                           dst_object);
8952   if (! norecord)
8953     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
8954
8955   return (BUFFERP (dst_object)
8956           ? make_number (coding.produced_char)
8957           : coding.dst_object);
8958 }
8959
8960
8961 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
8962        3, 4, "r\nzCoding system: ",
8963        doc: /* Decode the current region from the specified coding system.
8964 When called from a program, takes four arguments:
8965         START, END, CODING-SYSTEM, and DESTINATION.
8966 START and END are buffer positions.
8967
8968 Optional 4th arguments DESTINATION specifies where the decoded text goes.
8969 If nil, the region between START and END is replaced by the decoded text.
8970 If buffer, the decoded text is inserted in that buffer after point (point
8971 does not move).
8972 In those cases, the length of the decoded text is returned.
8973 If DESTINATION is t, the decoded text is returned.
8974
8975 This function sets `last-coding-system-used' to the precise coding system
8976 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8977 not fully specified.)  */)
8978   (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object destination)
8979 {
8980   return code_convert_region (start, end, coding_system, destination, 0, 0);
8981 }
8982
8983 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
8984        3, 4, "r\nzCoding system: ",
8985        doc: /* Encode the current region by specified coding system.
8986 When called from a program, takes four arguments:
8987         START, END, CODING-SYSTEM and DESTINATION.
8988 START and END are buffer positions.
8989
8990 Optional 4th arguments DESTINATION specifies where the encoded text goes.
8991 If nil, the region between START and END is replace by the encoded text.
8992 If buffer, the encoded text is inserted in that buffer after point (point
8993 does not move).
8994 In those cases, the length of the encoded text is returned.
8995 If DESTINATION is t, the encoded text is returned.
8996
8997 This function sets `last-coding-system-used' to the precise coding system
8998 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8999 not fully specified.)  */)
9000   (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object destination)
9001 {
9002   return code_convert_region (start, end, coding_system, destination, 1, 0);
9003 }
9004
9005 Lisp_Object
9006 code_convert_string (Lisp_Object string, Lisp_Object coding_system,
9007                      Lisp_Object dst_object, int encodep, int nocopy, int norecord)
9008 {
9009   struct coding_system coding;
9010   EMACS_INT chars, bytes;
9011
9012   CHECK_STRING (string);
9013   if (NILP (coding_system))
9014     {
9015       if (! norecord)
9016         Vlast_coding_system_used = Qno_conversion;
9017       if (NILP (dst_object))
9018         return (nocopy ? Fcopy_sequence (string) : string);
9019     }
9020
9021   if (NILP (coding_system))
9022     coding_system = Qno_conversion;
9023   else
9024     CHECK_CODING_SYSTEM (coding_system);
9025   if (NILP (dst_object))
9026     dst_object = Qt;
9027   else if (! EQ (dst_object, Qt))
9028     CHECK_BUFFER (dst_object);
9029
9030   setup_coding_system (coding_system, &coding);
9031   coding.mode |= CODING_MODE_LAST_BLOCK;
9032   chars = SCHARS (string);
9033   bytes = SBYTES (string);
9034   if (encodep)
9035     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9036   else
9037     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9038   if (! norecord)
9039     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
9040
9041   return (BUFFERP (dst_object)
9042           ? make_number (coding.produced_char)
9043           : coding.dst_object);
9044 }
9045
9046
9047 /* Encode or decode STRING according to CODING_SYSTEM.
9048    Do not set Vlast_coding_system_used.
9049
9050    This function is called only from macros DECODE_FILE and
9051    ENCODE_FILE, thus we ignore character composition.  */
9052
9053 Lisp_Object
9054 code_convert_string_norecord (Lisp_Object string, Lisp_Object coding_system,
9055                               int encodep)
9056 {
9057   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
9058 }
9059
9060
9061 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
9062        2, 4, 0,
9063        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
9064
9065 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
9066 if the decoding operation is trivial.
9067
9068 Optional fourth arg BUFFER non-nil means that the decoded text is
9069 inserted in that buffer after point (point does not move).  In this
9070 case, the return value is the length of the decoded text.
9071
9072 This function sets `last-coding-system-used' to the precise coding system
9073 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9074 not fully specified.)  */)
9075   (Lisp_Object string, Lisp_Object coding_system, Lisp_Object nocopy, Lisp_Object buffer)
9076 {
9077   return code_convert_string (string, coding_system, buffer,
9078                               0, ! NILP (nocopy), 0);
9079 }
9080
9081 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
9082        2, 4, 0,
9083        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
9084
9085 Optional third arg NOCOPY non-nil means it is OK to return STRING
9086 itself if the encoding operation is trivial.
9087
9088 Optional fourth arg BUFFER non-nil means that the encoded text is
9089 inserted in that buffer after point (point does not move).  In this
9090 case, the return value is the length of the encoded text.
9091
9092 This function sets `last-coding-system-used' to the precise coding system
9093 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9094 not fully specified.)  */)
9095   (Lisp_Object string, Lisp_Object coding_system, Lisp_Object nocopy, Lisp_Object buffer)
9096 {
9097   return code_convert_string (string, coding_system, buffer,
9098                               1, ! NILP (nocopy), 0);
9099 }
9100
9101 \f
9102 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
9103        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
9104 Return the corresponding character.  */)
9105   (Lisp_Object code)
9106 {
9107   Lisp_Object spec, attrs, val;
9108   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
9109   EMACS_INT ch;
9110   int c;
9111
9112   CHECK_NATNUM (code);
9113   ch = XFASTINT (code);
9114   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9115   attrs = AREF (spec, 0);
9116
9117   if (ASCII_BYTE_P (ch)
9118       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9119     return code;
9120
9121   val = CODING_ATTR_CHARSET_LIST (attrs);
9122   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9123   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9124   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
9125
9126   if (ch <= 0x7F)
9127     {
9128       c = ch;
9129       charset = charset_roman;
9130     }
9131   else if (ch >= 0xA0 && ch < 0xDF)
9132     {
9133       c = ch - 0x80;
9134       charset = charset_kana;
9135     }
9136   else
9137     {
9138       EMACS_INT c1 = ch >> 8;
9139       int c2 = ch & 0xFF;
9140
9141       if (c1 < 0x81 || (c1 > 0x9F && c1 < 0xE0) || c1 > 0xEF
9142           || c2 < 0x40 || c2 == 0x7F || c2 > 0xFC)
9143         error ("Invalid code: %"pI"d", ch);
9144       c = ch;
9145       SJIS_TO_JIS (c);
9146       charset = charset_kanji;
9147     }
9148   c = DECODE_CHAR (charset, c);
9149   if (c < 0)
9150     error ("Invalid code: %"pI"d", ch);
9151   return make_number (c);
9152 }
9153
9154
9155 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
9156        doc: /* Encode a Japanese character CH to shift_jis encoding.
9157 Return the corresponding code in SJIS.  */)
9158   (Lisp_Object ch)
9159 {
9160   Lisp_Object spec, attrs, charset_list;
9161   int c;
9162   struct charset *charset;
9163   unsigned code;
9164
9165   CHECK_CHARACTER (ch);
9166   c = XFASTINT (ch);
9167   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9168   attrs = AREF (spec, 0);
9169
9170   if (ASCII_CHAR_P (c)
9171       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9172     return ch;
9173
9174   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9175   charset = char_charset (c, charset_list, &code);
9176   if (code == CHARSET_INVALID_CODE (charset))
9177     error ("Can't encode by shift_jis encoding: %c", c);
9178   JIS_TO_SJIS (code);
9179
9180   return make_number (code);
9181 }
9182
9183 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
9184        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
9185 Return the corresponding character.  */)
9186   (Lisp_Object code)
9187 {
9188   Lisp_Object spec, attrs, val;
9189   struct charset *charset_roman, *charset_big5, *charset;
9190   EMACS_INT ch;
9191   int c;
9192
9193   CHECK_NATNUM (code);
9194   ch = XFASTINT (code);
9195   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9196   attrs = AREF (spec, 0);
9197
9198   if (ASCII_BYTE_P (ch)
9199       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9200     return code;
9201
9202   val = CODING_ATTR_CHARSET_LIST (attrs);
9203   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9204   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
9205
9206   if (ch <= 0x7F)
9207     {
9208       c = ch;
9209       charset = charset_roman;
9210     }
9211   else
9212     {
9213       EMACS_INT b1 = ch >> 8;
9214       int b2 = ch & 0x7F;
9215       if (b1 < 0xA1 || b1 > 0xFE
9216           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
9217         error ("Invalid code: %"pI"d", ch);
9218       c = ch;
9219       charset = charset_big5;
9220     }
9221   c = DECODE_CHAR (charset, c);
9222   if (c < 0)
9223     error ("Invalid code: %"pI"d", ch);
9224   return make_number (c);
9225 }
9226
9227 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
9228        doc: /* Encode the Big5 character CH to BIG5 coding system.
9229 Return the corresponding character code in Big5.  */)
9230   (Lisp_Object ch)
9231 {
9232   Lisp_Object spec, attrs, charset_list;
9233   struct charset *charset;
9234   int c;
9235   unsigned code;
9236
9237   CHECK_CHARACTER (ch);
9238   c = XFASTINT (ch);
9239   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9240   attrs = AREF (spec, 0);
9241   if (ASCII_CHAR_P (c)
9242       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9243     return ch;
9244
9245   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9246   charset = char_charset (c, charset_list, &code);
9247   if (code == CHARSET_INVALID_CODE (charset))
9248     error ("Can't encode by Big5 encoding: %c", c);
9249
9250   return make_number (code);
9251 }
9252
9253 \f
9254 DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
9255        Sset_terminal_coding_system_internal, 1, 2, 0,
9256        doc: /* Internal use only.  */)
9257   (Lisp_Object coding_system, Lisp_Object terminal)
9258 {
9259   struct terminal *term = get_terminal (terminal, 1);
9260   struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (term);
9261   CHECK_SYMBOL (coding_system);
9262   setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
9263   /* We had better not send unsafe characters to terminal.  */
9264   terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
9265   /* Character composition should be disabled.  */
9266   terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9267   terminal_coding->src_multibyte = 1;
9268   terminal_coding->dst_multibyte = 0;
9269   if (terminal_coding->common_flags & CODING_REQUIRE_ENCODING_MASK)
9270     term->charset_list = coding_charset_list (terminal_coding);
9271   else
9272     term->charset_list = Fcons (make_number (charset_ascii), Qnil);
9273   return Qnil;
9274 }
9275
9276 DEFUN ("set-safe-terminal-coding-system-internal",
9277        Fset_safe_terminal_coding_system_internal,
9278        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
9279        doc: /* Internal use only.  */)
9280   (Lisp_Object coding_system)
9281 {
9282   CHECK_SYMBOL (coding_system);
9283   setup_coding_system (Fcheck_coding_system (coding_system),
9284                        &safe_terminal_coding);
9285   /* Character composition should be disabled.  */
9286   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9287   safe_terminal_coding.src_multibyte = 1;
9288   safe_terminal_coding.dst_multibyte = 0;
9289   return Qnil;
9290 }
9291
9292 DEFUN ("terminal-coding-system", Fterminal_coding_system,
9293        Sterminal_coding_system, 0, 1, 0,
9294        doc: /* Return coding system specified for terminal output on the given terminal.
9295 TERMINAL may be a terminal object, a frame, or nil for the selected
9296 frame's terminal device.  */)
9297   (Lisp_Object terminal)
9298 {
9299   struct coding_system *terminal_coding
9300     = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
9301   Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
9302
9303   /* For backward compatibility, return nil if it is `undecided'. */
9304   return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
9305 }
9306
9307 DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
9308        Sset_keyboard_coding_system_internal, 1, 2, 0,
9309        doc: /* Internal use only.  */)
9310   (Lisp_Object coding_system, Lisp_Object terminal)
9311 {
9312   struct terminal *t = get_terminal (terminal, 1);
9313   CHECK_SYMBOL (coding_system);
9314   if (NILP (coding_system))
9315     coding_system = Qno_conversion;
9316   else
9317     Fcheck_coding_system (coding_system);
9318   setup_coding_system (coding_system, TERMINAL_KEYBOARD_CODING (t));
9319   /* Character composition should be disabled.  */
9320   TERMINAL_KEYBOARD_CODING (t)->common_flags
9321     &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9322   return Qnil;
9323 }
9324
9325 DEFUN ("keyboard-coding-system",
9326        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 1, 0,
9327        doc: /* Return coding system specified for decoding keyboard input.  */)
9328   (Lisp_Object terminal)
9329 {
9330   return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING
9331                          (get_terminal (terminal, 1))->id);
9332 }
9333
9334 \f
9335 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
9336        Sfind_operation_coding_system,  1, MANY, 0,
9337        doc: /* Choose a coding system for an operation based on the target name.
9338 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
9339 DECODING-SYSTEM is the coding system to use for decoding
9340 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
9341 for encoding (in case OPERATION does encoding).
9342
9343 The first argument OPERATION specifies an I/O primitive:
9344   For file I/O, `insert-file-contents' or `write-region'.
9345   For process I/O, `call-process', `call-process-region', or `start-process'.
9346   For network I/O, `open-network-stream'.
9347
9348 The remaining arguments should be the same arguments that were passed
9349 to the primitive.  Depending on which primitive, one of those arguments
9350 is selected as the TARGET.  For example, if OPERATION does file I/O,
9351 whichever argument specifies the file name is TARGET.
9352
9353 TARGET has a meaning which depends on OPERATION:
9354   For file I/O, TARGET is a file name (except for the special case below).
9355   For process I/O, TARGET is a process name.
9356   For network I/O, TARGET is a service name or a port number.
9357
9358 This function looks up what is specified for TARGET in
9359 `file-coding-system-alist', `process-coding-system-alist',
9360 or `network-coding-system-alist' depending on OPERATION.
9361 They may specify a coding system, a cons of coding systems,
9362 or a function symbol to call.
9363 In the last case, we call the function with one argument,
9364 which is a list of all the arguments given to this function.
9365 If the function can't decide a coding system, it can return
9366 `undecided' so that the normal code-detection is performed.
9367
9368 If OPERATION is `insert-file-contents', the argument corresponding to
9369 TARGET may be a cons (FILENAME . BUFFER).  In that case, FILENAME is a
9370 file name to look up, and BUFFER is a buffer that contains the file's
9371 contents (not yet decoded).  If `file-coding-system-alist' specifies a
9372 function to call for FILENAME, that function should examine the
9373 contents of BUFFER instead of reading the file.
9374
9375 usage: (find-operation-coding-system OPERATION ARGUMENTS...)  */)
9376   (ptrdiff_t nargs, Lisp_Object *args)
9377 {
9378   Lisp_Object operation, target_idx, target, val;
9379   register Lisp_Object chain;
9380
9381   if (nargs < 2)
9382     error ("Too few arguments");
9383   operation = args[0];
9384   if (!SYMBOLP (operation)
9385       || !NATNUMP (target_idx = Fget (operation, Qtarget_idx)))
9386     error ("Invalid first argument");
9387   if (nargs < 1 + XFASTINT (target_idx))
9388     error ("Too few arguments for operation `%s'",
9389            SDATA (SYMBOL_NAME (operation)));
9390   target = args[XFASTINT (target_idx) + 1];
9391   if (!(STRINGP (target)
9392         || (EQ (operation, Qinsert_file_contents) && CONSP (target)
9393             && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
9394         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
9395     error ("Invalid argument %"pI"d of operation `%s'",
9396            XFASTINT (target_idx) + 1, SDATA (SYMBOL_NAME (operation)));
9397   if (CONSP (target))
9398     target = XCAR (target);
9399
9400   chain = ((EQ (operation, Qinsert_file_contents)
9401             || EQ (operation, Qwrite_region))
9402            ? Vfile_coding_system_alist
9403            : (EQ (operation, Qopen_network_stream)
9404               ? Vnetwork_coding_system_alist
9405               : Vprocess_coding_system_alist));
9406   if (NILP (chain))
9407     return Qnil;
9408
9409   for (; CONSP (chain); chain = XCDR (chain))
9410     {
9411       Lisp_Object elt;
9412
9413       elt = XCAR (chain);
9414       if (CONSP (elt)
9415           && ((STRINGP (target)
9416                && STRINGP (XCAR (elt))
9417                && fast_string_match (XCAR (elt), target) >= 0)
9418               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
9419         {
9420           val = XCDR (elt);
9421           /* Here, if VAL is both a valid coding system and a valid
9422              function symbol, we return VAL as a coding system.  */
9423           if (CONSP (val))
9424             return val;
9425           if (! SYMBOLP (val))
9426             return Qnil;
9427           if (! NILP (Fcoding_system_p (val)))
9428             return Fcons (val, val);
9429           if (! NILP (Ffboundp (val)))
9430             {
9431               /* We use call1 rather than safe_call1
9432                  so as to get bug reports about functions called here
9433                  which don't handle the current interface.  */
9434               val = call1 (val, Flist (nargs, args));
9435               if (CONSP (val))
9436                 return val;
9437               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
9438                 return Fcons (val, val);
9439             }
9440           return Qnil;
9441         }
9442     }
9443   return Qnil;
9444 }
9445
9446 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
9447        Sset_coding_system_priority, 0, MANY, 0,
9448        doc: /* Assign higher priority to the coding systems given as arguments.
9449 If multiple coding systems belong to the same category,
9450 all but the first one are ignored.
9451
9452 usage: (set-coding-system-priority &rest coding-systems)  */)
9453   (ptrdiff_t nargs, Lisp_Object *args)
9454 {
9455   ptrdiff_t i, j;
9456   int changed[coding_category_max];
9457   enum coding_category priorities[coding_category_max];
9458
9459   memset (changed, 0, sizeof changed);
9460
9461   for (i = j = 0; i < nargs; i++)
9462     {
9463       enum coding_category category;
9464       Lisp_Object spec, attrs;
9465
9466       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
9467       attrs = AREF (spec, 0);
9468       category = XINT (CODING_ATTR_CATEGORY (attrs));
9469       if (changed[category])
9470         /* Ignore this coding system because a coding system of the
9471            same category already had a higher priority.  */
9472         continue;
9473       changed[category] = 1;
9474       priorities[j++] = category;
9475       if (coding_categories[category].id >= 0
9476           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
9477         setup_coding_system (args[i], &coding_categories[category]);
9478       Fset (AREF (Vcoding_category_table, category), args[i]);
9479     }
9480
9481   /* Now we have decided top J priorities.  Reflect the order of the
9482      original priorities to the remaining priorities.  */
9483
9484   for (i = j, j = 0; i < coding_category_max; i++, j++)
9485     {
9486       while (j < coding_category_max
9487              && changed[coding_priorities[j]])
9488         j++;
9489       if (j == coding_category_max)
9490         abort ();
9491       priorities[i] = coding_priorities[j];
9492     }
9493
9494   memcpy (coding_priorities, priorities, sizeof priorities);
9495
9496   /* Update `coding-category-list'.  */
9497   Vcoding_category_list = Qnil;
9498   for (i = coding_category_max; i-- > 0; )
9499     Vcoding_category_list
9500       = Fcons (AREF (Vcoding_category_table, priorities[i]),
9501                Vcoding_category_list);
9502
9503   return Qnil;
9504 }
9505
9506 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
9507        Scoding_system_priority_list, 0, 1, 0,
9508        doc: /* Return a list of coding systems ordered by their priorities.
9509 The list contains a subset of coding systems; i.e. coding systems
9510 assigned to each coding category (see `coding-category-list').
9511
9512 HIGHESTP non-nil means just return the highest priority one.  */)
9513   (Lisp_Object highestp)
9514 {
9515   int i;
9516   Lisp_Object val;
9517
9518   for (i = 0, val = Qnil; i < coding_category_max; i++)
9519     {
9520       enum coding_category category = coding_priorities[i];
9521       int id = coding_categories[category].id;
9522       Lisp_Object attrs;
9523
9524       if (id < 0)
9525         continue;
9526       attrs = CODING_ID_ATTRS (id);
9527       if (! NILP (highestp))
9528         return CODING_ATTR_BASE_NAME (attrs);
9529       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
9530     }
9531   return Fnreverse (val);
9532 }
9533
9534 static const char *const suffixes[] = { "-unix", "-dos", "-mac" };
9535
9536 static Lisp_Object
9537 make_subsidiaries (Lisp_Object base)
9538 {
9539   Lisp_Object subsidiaries;
9540   ptrdiff_t base_name_len = SBYTES (SYMBOL_NAME (base));
9541   char *buf = (char *) alloca (base_name_len + 6);
9542   int i;
9543
9544   memcpy (buf, SDATA (SYMBOL_NAME (base)), base_name_len);
9545   subsidiaries = Fmake_vector (make_number (3), Qnil);
9546   for (i = 0; i < 3; i++)
9547     {
9548       strcpy (buf + base_name_len, suffixes[i]);
9549       ASET (subsidiaries, i, intern (buf));
9550     }
9551   return subsidiaries;
9552 }
9553
9554
9555 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
9556        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
9557        doc: /* For internal use only.
9558 usage: (define-coding-system-internal ...)  */)
9559   (ptrdiff_t nargs, Lisp_Object *args)
9560 {
9561   Lisp_Object name;
9562   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
9563   Lisp_Object attrs;            /* Vector of attributes.  */
9564   Lisp_Object eol_type;
9565   Lisp_Object aliases;
9566   Lisp_Object coding_type, charset_list, safe_charsets;
9567   enum coding_category category;
9568   Lisp_Object tail, val;
9569   int max_charset_id = 0;
9570   int i;
9571
9572   if (nargs < coding_arg_max)
9573     goto short_args;
9574
9575   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
9576
9577   name = args[coding_arg_name];
9578   CHECK_SYMBOL (name);
9579   CODING_ATTR_BASE_NAME (attrs) = name;
9580
9581   val = args[coding_arg_mnemonic];
9582   if (! STRINGP (val))
9583     CHECK_CHARACTER (val);
9584   CODING_ATTR_MNEMONIC (attrs) = val;
9585
9586   coding_type = args[coding_arg_coding_type];
9587   CHECK_SYMBOL (coding_type);
9588   CODING_ATTR_TYPE (attrs) = coding_type;
9589
9590   charset_list = args[coding_arg_charset_list];
9591   if (SYMBOLP (charset_list))
9592     {
9593       if (EQ (charset_list, Qiso_2022))
9594         {
9595           if (! EQ (coding_type, Qiso_2022))
9596             error ("Invalid charset-list");
9597           charset_list = Viso_2022_charset_list;
9598         }
9599       else if (EQ (charset_list, Qemacs_mule))
9600         {
9601           if (! EQ (coding_type, Qemacs_mule))
9602             error ("Invalid charset-list");
9603           charset_list = Vemacs_mule_charset_list;
9604         }
9605       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9606         if (max_charset_id < XFASTINT (XCAR (tail)))
9607           max_charset_id = XFASTINT (XCAR (tail));
9608     }
9609   else
9610     {
9611       charset_list = Fcopy_sequence (charset_list);
9612       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9613         {
9614           struct charset *charset;
9615
9616           val = XCAR (tail);
9617           CHECK_CHARSET_GET_CHARSET (val, charset);
9618           if (EQ (coding_type, Qiso_2022)
9619               ? CHARSET_ISO_FINAL (charset) < 0
9620               : EQ (coding_type, Qemacs_mule)
9621               ? CHARSET_EMACS_MULE_ID (charset) < 0
9622               : 0)
9623             error ("Can't handle charset `%s'",
9624                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9625
9626           XSETCAR (tail, make_number (charset->id));
9627           if (max_charset_id < charset->id)
9628             max_charset_id = charset->id;
9629         }
9630     }
9631   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
9632
9633   safe_charsets = make_uninit_string (max_charset_id + 1);
9634   memset (SDATA (safe_charsets), 255, max_charset_id + 1);
9635   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9636     SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
9637   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
9638
9639   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
9640
9641   val = args[coding_arg_decode_translation_table];
9642   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9643     CHECK_SYMBOL (val);
9644   CODING_ATTR_DECODE_TBL (attrs) = val;
9645
9646   val = args[coding_arg_encode_translation_table];
9647   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9648     CHECK_SYMBOL (val);
9649   CODING_ATTR_ENCODE_TBL (attrs) = val;
9650
9651   val = args[coding_arg_post_read_conversion];
9652   CHECK_SYMBOL (val);
9653   CODING_ATTR_POST_READ (attrs) = val;
9654
9655   val = args[coding_arg_pre_write_conversion];
9656   CHECK_SYMBOL (val);
9657   CODING_ATTR_PRE_WRITE (attrs) = val;
9658
9659   val = args[coding_arg_default_char];
9660   if (NILP (val))
9661     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
9662   else
9663     {
9664       CHECK_CHARACTER (val);
9665       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9666     }
9667
9668   val = args[coding_arg_for_unibyte];
9669   CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
9670
9671   val = args[coding_arg_plist];
9672   CHECK_LIST (val);
9673   CODING_ATTR_PLIST (attrs) = val;
9674
9675   if (EQ (coding_type, Qcharset))
9676     {
9677       /* Generate a lisp vector of 256 elements.  Each element is nil,
9678          integer, or a list of charset IDs.
9679
9680          If Nth element is nil, the byte code N is invalid in this
9681          coding system.
9682
9683          If Nth element is a number NUM, N is the first byte of a
9684          charset whose ID is NUM.
9685
9686          If Nth element is a list of charset IDs, N is the first byte
9687          of one of them.  The list is sorted by dimensions of the
9688          charsets.  A charset of smaller dimension comes first. */
9689       val = Fmake_vector (make_number (256), Qnil);
9690
9691       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9692         {
9693           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
9694           int dim = CHARSET_DIMENSION (charset);
9695           int idx = (dim - 1) * 4;
9696
9697           if (CHARSET_ASCII_COMPATIBLE_P (charset))
9698             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9699
9700           for (i = charset->code_space[idx];
9701                i <= charset->code_space[idx + 1]; i++)
9702             {
9703               Lisp_Object tmp, tmp2;
9704               int dim2;
9705
9706               tmp = AREF (val, i);
9707               if (NILP (tmp))
9708                 tmp = XCAR (tail);
9709               else if (NUMBERP (tmp))
9710                 {
9711                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
9712                   if (dim < dim2)
9713                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
9714                   else
9715                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
9716                 }
9717               else
9718                 {
9719                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
9720                     {
9721                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
9722                       if (dim < dim2)
9723                         break;
9724                     }
9725                   if (NILP (tmp2))
9726                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
9727                   else
9728                     {
9729                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
9730                       XSETCAR (tmp2, XCAR (tail));
9731                     }
9732                 }
9733               ASET (val, i, tmp);
9734             }
9735         }
9736       ASET (attrs, coding_attr_charset_valids, val);
9737       category = coding_category_charset;
9738     }
9739   else if (EQ (coding_type, Qccl))
9740     {
9741       Lisp_Object valids;
9742
9743       if (nargs < coding_arg_ccl_max)
9744         goto short_args;
9745
9746       val = args[coding_arg_ccl_decoder];
9747       CHECK_CCL_PROGRAM (val);
9748       if (VECTORP (val))
9749         val = Fcopy_sequence (val);
9750       ASET (attrs, coding_attr_ccl_decoder, val);
9751
9752       val = args[coding_arg_ccl_encoder];
9753       CHECK_CCL_PROGRAM (val);
9754       if (VECTORP (val))
9755         val = Fcopy_sequence (val);
9756       ASET (attrs, coding_attr_ccl_encoder, val);
9757
9758       val = args[coding_arg_ccl_valids];
9759       valids = Fmake_string (make_number (256), make_number (0));
9760       for (tail = val; !NILP (tail); tail = Fcdr (tail))
9761         {
9762           int from, to;
9763
9764           val = Fcar (tail);
9765           if (INTEGERP (val))
9766             {
9767               from = to = XINT (val);
9768               if (from < 0 || from > 255)
9769                 args_out_of_range_3 (val, make_number (0), make_number (255));
9770             }
9771           else
9772             {
9773               CHECK_CONS (val);
9774               CHECK_NATNUM_CAR (val);
9775               CHECK_NATNUM_CDR (val);
9776               from = XINT (XCAR (val));
9777               if (from > 255)
9778                 args_out_of_range_3 (XCAR (val),
9779                                      make_number (0), make_number (255));
9780               to = XINT (XCDR (val));
9781               if (to < from || to > 255)
9782                 args_out_of_range_3 (XCDR (val),
9783                                      XCAR (val), make_number (255));
9784             }
9785           for (i = from; i <= to; i++)
9786             SSET (valids, i, 1);
9787         }
9788       ASET (attrs, coding_attr_ccl_valids, valids);
9789
9790       category = coding_category_ccl;
9791     }
9792   else if (EQ (coding_type, Qutf_16))
9793     {
9794       Lisp_Object bom, endian;
9795
9796       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9797
9798       if (nargs < coding_arg_utf16_max)
9799         goto short_args;
9800
9801       bom = args[coding_arg_utf16_bom];
9802       if (! NILP (bom) && ! EQ (bom, Qt))
9803         {
9804           CHECK_CONS (bom);
9805           val = XCAR (bom);
9806           CHECK_CODING_SYSTEM (val);
9807           val = XCDR (bom);
9808           CHECK_CODING_SYSTEM (val);
9809         }
9810       ASET (attrs, coding_attr_utf_bom, bom);
9811
9812       endian = args[coding_arg_utf16_endian];
9813       CHECK_SYMBOL (endian);
9814       if (NILP (endian))
9815         endian = Qbig;
9816       else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
9817         error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
9818       ASET (attrs, coding_attr_utf_16_endian, endian);
9819
9820       category = (CONSP (bom)
9821                   ? coding_category_utf_16_auto
9822                   : NILP (bom)
9823                   ? (EQ (endian, Qbig)
9824                      ? coding_category_utf_16_be_nosig
9825                      : coding_category_utf_16_le_nosig)
9826                   : (EQ (endian, Qbig)
9827                      ? coding_category_utf_16_be
9828                      : coding_category_utf_16_le));
9829     }
9830   else if (EQ (coding_type, Qiso_2022))
9831     {
9832       Lisp_Object initial, reg_usage, request, flags;
9833
9834       if (nargs < coding_arg_iso2022_max)
9835         goto short_args;
9836
9837       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
9838       CHECK_VECTOR (initial);
9839       for (i = 0; i < 4; i++)
9840         {
9841           val = Faref (initial, make_number (i));
9842           if (! NILP (val))
9843             {
9844               struct charset *charset;
9845
9846               CHECK_CHARSET_GET_CHARSET (val, charset);
9847               ASET (initial, i, make_number (CHARSET_ID (charset)));
9848               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
9849                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9850             }
9851           else
9852             ASET (initial, i, make_number (-1));
9853         }
9854
9855       reg_usage = args[coding_arg_iso2022_reg_usage];
9856       CHECK_CONS (reg_usage);
9857       CHECK_NUMBER_CAR (reg_usage);
9858       CHECK_NUMBER_CDR (reg_usage);
9859
9860       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
9861       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
9862         {
9863           int id;
9864           Lisp_Object tmp1;
9865
9866           val = Fcar (tail);
9867           CHECK_CONS (val);
9868           tmp1 = XCAR (val);
9869           CHECK_CHARSET_GET_ID (tmp1, id);
9870           CHECK_NATNUM_CDR (val);
9871           if (XINT (XCDR (val)) >= 4)
9872             error ("Invalid graphic register number: %"pI"d", XINT (XCDR (val)));
9873           XSETCAR (val, make_number (id));
9874         }
9875
9876       flags = args[coding_arg_iso2022_flags];
9877       CHECK_NATNUM (flags);
9878       i = XINT (flags);
9879       if (EQ (args[coding_arg_charset_list], Qiso_2022))
9880         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
9881
9882       ASET (attrs, coding_attr_iso_initial, initial);
9883       ASET (attrs, coding_attr_iso_usage, reg_usage);
9884       ASET (attrs, coding_attr_iso_request, request);
9885       ASET (attrs, coding_attr_iso_flags, flags);
9886       setup_iso_safe_charsets (attrs);
9887
9888       if (i & CODING_ISO_FLAG_SEVEN_BITS)
9889         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
9890                           | CODING_ISO_FLAG_SINGLE_SHIFT))
9891                     ? coding_category_iso_7_else
9892                     : EQ (args[coding_arg_charset_list], Qiso_2022)
9893                     ? coding_category_iso_7
9894                     : coding_category_iso_7_tight);
9895       else
9896         {
9897           int id = XINT (AREF (initial, 1));
9898
9899           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
9900                        || EQ (args[coding_arg_charset_list], Qiso_2022)
9901                        || id < 0)
9902                       ? coding_category_iso_8_else
9903                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
9904                       ? coding_category_iso_8_1
9905                       : coding_category_iso_8_2);
9906         }
9907       if (category != coding_category_iso_8_1
9908           && category != coding_category_iso_8_2)
9909         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9910     }
9911   else if (EQ (coding_type, Qemacs_mule))
9912     {
9913       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
9914         ASET (attrs, coding_attr_emacs_mule_full, Qt);
9915       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9916       category = coding_category_emacs_mule;
9917     }
9918   else if (EQ (coding_type, Qshift_jis))
9919     {
9920
9921       struct charset *charset;
9922
9923       if (XINT (Flength (charset_list)) != 3
9924           && XINT (Flength (charset_list)) != 4)
9925         error ("There should be three or four charsets");
9926
9927       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9928       if (CHARSET_DIMENSION (charset) != 1)
9929         error ("Dimension of charset %s is not one",
9930                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9931       if (CHARSET_ASCII_COMPATIBLE_P (charset))
9932         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9933
9934       charset_list = XCDR (charset_list);
9935       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9936       if (CHARSET_DIMENSION (charset) != 1)
9937         error ("Dimension of charset %s is not one",
9938                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9939
9940       charset_list = XCDR (charset_list);
9941       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9942       if (CHARSET_DIMENSION (charset) != 2)
9943         error ("Dimension of charset %s is not two",
9944                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9945
9946       charset_list = XCDR (charset_list);
9947       if (! NILP (charset_list))
9948         {
9949           charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9950           if (CHARSET_DIMENSION (charset) != 2)
9951             error ("Dimension of charset %s is not two",
9952                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9953         }
9954
9955       category = coding_category_sjis;
9956       Vsjis_coding_system = name;
9957     }
9958   else if (EQ (coding_type, Qbig5))
9959     {
9960       struct charset *charset;
9961
9962       if (XINT (Flength (charset_list)) != 2)
9963         error ("There should be just two charsets");
9964
9965       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9966       if (CHARSET_DIMENSION (charset) != 1)
9967         error ("Dimension of charset %s is not one",
9968                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9969       if (CHARSET_ASCII_COMPATIBLE_P (charset))
9970         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9971
9972       charset_list = XCDR (charset_list);
9973       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9974       if (CHARSET_DIMENSION (charset) != 2)
9975         error ("Dimension of charset %s is not two",
9976                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9977
9978       category = coding_category_big5;
9979       Vbig5_coding_system = name;
9980     }
9981   else if (EQ (coding_type, Qraw_text))
9982     {
9983       category = coding_category_raw_text;
9984       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9985     }
9986   else if (EQ (coding_type, Qutf_8))
9987     {
9988       Lisp_Object bom;
9989
9990       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9991
9992       if (nargs < coding_arg_utf8_max)
9993         goto short_args;
9994
9995       bom = args[coding_arg_utf8_bom];
9996       if (! NILP (bom) && ! EQ (bom, Qt))
9997         {
9998           CHECK_CONS (bom);
9999           val = XCAR (bom);
10000           CHECK_CODING_SYSTEM (val);
10001           val = XCDR (bom);
10002           CHECK_CODING_SYSTEM (val);
10003         }
10004       ASET (attrs, coding_attr_utf_bom, bom);
10005
10006       category = (CONSP (bom) ? coding_category_utf_8_auto
10007                   : NILP (bom) ? coding_category_utf_8_nosig
10008                   : coding_category_utf_8_sig);
10009     }
10010   else if (EQ (coding_type, Qundecided))
10011     category = coding_category_undecided;
10012   else
10013     error ("Invalid coding system type: %s",
10014            SDATA (SYMBOL_NAME (coding_type)));
10015
10016   CODING_ATTR_CATEGORY (attrs) = make_number (category);
10017   CODING_ATTR_PLIST (attrs)
10018     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
10019                                 CODING_ATTR_PLIST (attrs)));
10020   CODING_ATTR_PLIST (attrs)
10021     = Fcons (QCascii_compatible_p,
10022              Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
10023                     CODING_ATTR_PLIST (attrs)));
10024
10025   eol_type = args[coding_arg_eol_type];
10026   if (! NILP (eol_type)
10027       && ! EQ (eol_type, Qunix)
10028       && ! EQ (eol_type, Qdos)
10029       && ! EQ (eol_type, Qmac))
10030     error ("Invalid eol-type");
10031
10032   aliases = Fcons (name, Qnil);
10033
10034   if (NILP (eol_type))
10035     {
10036       eol_type = make_subsidiaries (name);
10037       for (i = 0; i < 3; i++)
10038         {
10039           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
10040
10041           this_name = AREF (eol_type, i);
10042           this_aliases = Fcons (this_name, Qnil);
10043           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
10044           this_spec = Fmake_vector (make_number (3), attrs);
10045           ASET (this_spec, 1, this_aliases);
10046           ASET (this_spec, 2, this_eol_type);
10047           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
10048           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
10049           val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist);
10050           if (NILP (val))
10051             Vcoding_system_alist
10052               = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
10053                        Vcoding_system_alist);
10054         }
10055     }
10056
10057   spec_vec = Fmake_vector (make_number (3), attrs);
10058   ASET (spec_vec, 1, aliases);
10059   ASET (spec_vec, 2, eol_type);
10060
10061   Fputhash (name, spec_vec, Vcoding_system_hash_table);
10062   Vcoding_system_list = Fcons (name, Vcoding_system_list);
10063   val = Fassoc (Fsymbol_name (name), Vcoding_system_alist);
10064   if (NILP (val))
10065     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
10066                                   Vcoding_system_alist);
10067
10068   {
10069     int id = coding_categories[category].id;
10070
10071     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
10072       setup_coding_system (name, &coding_categories[category]);
10073   }
10074
10075   return Qnil;
10076
10077  short_args:
10078   return Fsignal (Qwrong_number_of_arguments,
10079                   Fcons (intern ("define-coding-system-internal"),
10080                          make_number (nargs)));
10081 }
10082
10083
10084 DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
10085        3, 3, 0,
10086        doc: /* Change value in CODING-SYSTEM's property list PROP to VAL.  */)
10087   (Lisp_Object coding_system, Lisp_Object prop, Lisp_Object val)
10088 {
10089   Lisp_Object spec, attrs;
10090
10091   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10092   attrs = AREF (spec, 0);
10093   if (EQ (prop, QCmnemonic))
10094     {
10095       if (! STRINGP (val))
10096         CHECK_CHARACTER (val);
10097       CODING_ATTR_MNEMONIC (attrs) = val;
10098     }
10099   else if (EQ (prop, QCdefault_char))
10100     {
10101       if (NILP (val))
10102         val = make_number (' ');
10103       else
10104         CHECK_CHARACTER (val);
10105       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
10106     }
10107   else if (EQ (prop, QCdecode_translation_table))
10108     {
10109       if (! CHAR_TABLE_P (val) && ! CONSP (val))
10110         CHECK_SYMBOL (val);
10111       CODING_ATTR_DECODE_TBL (attrs) = val;
10112     }
10113   else if (EQ (prop, QCencode_translation_table))
10114     {
10115       if (! CHAR_TABLE_P (val) && ! CONSP (val))
10116         CHECK_SYMBOL (val);
10117       CODING_ATTR_ENCODE_TBL (attrs) = val;
10118     }
10119   else if (EQ (prop, QCpost_read_conversion))
10120     {
10121       CHECK_SYMBOL (val);
10122       CODING_ATTR_POST_READ (attrs) = val;
10123     }
10124   else if (EQ (prop, QCpre_write_conversion))
10125     {
10126       CHECK_SYMBOL (val);
10127       CODING_ATTR_PRE_WRITE (attrs) = val;
10128     }
10129   else if (EQ (prop, QCascii_compatible_p))
10130     {
10131       CODING_ATTR_ASCII_COMPAT (attrs) = val;
10132     }
10133
10134   CODING_ATTR_PLIST (attrs)
10135     = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
10136   return val;
10137 }
10138
10139
10140 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
10141        Sdefine_coding_system_alias, 2, 2, 0,
10142        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
10143   (Lisp_Object alias, Lisp_Object coding_system)
10144 {
10145   Lisp_Object spec, aliases, eol_type, val;
10146
10147   CHECK_SYMBOL (alias);
10148   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10149   aliases = AREF (spec, 1);
10150   /* ALIASES should be a list of length more than zero, and the first
10151      element is a base coding system.  Append ALIAS at the tail of the
10152      list.  */
10153   while (!NILP (XCDR (aliases)))
10154     aliases = XCDR (aliases);
10155   XSETCDR (aliases, Fcons (alias, Qnil));
10156
10157   eol_type = AREF (spec, 2);
10158   if (VECTORP (eol_type))
10159     {
10160       Lisp_Object subsidiaries;
10161       int i;
10162
10163       subsidiaries = make_subsidiaries (alias);
10164       for (i = 0; i < 3; i++)
10165         Fdefine_coding_system_alias (AREF (subsidiaries, i),
10166                                      AREF (eol_type, i));
10167     }
10168
10169   Fputhash (alias, spec, Vcoding_system_hash_table);
10170   Vcoding_system_list = Fcons (alias, Vcoding_system_list);
10171   val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist);
10172   if (NILP (val))
10173     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
10174                                   Vcoding_system_alist);
10175
10176   return Qnil;
10177 }
10178
10179 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
10180        1, 1, 0,
10181        doc: /* Return the base of CODING-SYSTEM.
10182 Any alias or subsidiary coding system is not a base coding system.  */)
10183   (Lisp_Object coding_system)
10184 {
10185   Lisp_Object spec, attrs;
10186
10187   if (NILP (coding_system))
10188     return (Qno_conversion);
10189   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10190   attrs = AREF (spec, 0);
10191   return CODING_ATTR_BASE_NAME (attrs);
10192 }
10193
10194 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
10195        1, 1, 0,
10196        doc: "Return the property list of CODING-SYSTEM.")
10197   (Lisp_Object coding_system)
10198 {
10199   Lisp_Object spec, attrs;
10200
10201   if (NILP (coding_system))
10202     coding_system = Qno_conversion;
10203   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10204   attrs = AREF (spec, 0);
10205   return CODING_ATTR_PLIST (attrs);
10206 }
10207
10208
10209 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
10210        1, 1, 0,
10211        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
10212   (Lisp_Object coding_system)
10213 {
10214   Lisp_Object spec;
10215
10216   if (NILP (coding_system))
10217     coding_system = Qno_conversion;
10218   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10219   return AREF (spec, 1);
10220 }
10221
10222 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
10223        Scoding_system_eol_type, 1, 1, 0,
10224        doc: /* Return eol-type of CODING-SYSTEM.
10225 An eol-type is an integer 0, 1, 2, or a vector of coding systems.
10226
10227 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
10228 and CR respectively.
10229
10230 A vector value indicates that a format of end-of-line should be
10231 detected automatically.  Nth element of the vector is the subsidiary
10232 coding system whose eol-type is N.  */)
10233   (Lisp_Object coding_system)
10234 {
10235   Lisp_Object spec, eol_type;
10236   int n;
10237
10238   if (NILP (coding_system))
10239     coding_system = Qno_conversion;
10240   if (! CODING_SYSTEM_P (coding_system))
10241     return Qnil;
10242   spec = CODING_SYSTEM_SPEC (coding_system);
10243   eol_type = AREF (spec, 2);
10244   if (VECTORP (eol_type))
10245     return Fcopy_sequence (eol_type);
10246   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
10247   return make_number (n);
10248 }
10249
10250 #endif /* emacs */
10251
10252 \f
10253 /*** 9. Post-amble ***/
10254
10255 void
10256 init_coding_once (void)
10257 {
10258   int i;
10259
10260   for (i = 0; i < coding_category_max; i++)
10261     {
10262       coding_categories[i].id = -1;
10263       coding_priorities[i] = i;
10264     }
10265
10266   /* ISO2022 specific initialize routine.  */
10267   for (i = 0; i < 0x20; i++)
10268     iso_code_class[i] = ISO_control_0;
10269   for (i = 0x21; i < 0x7F; i++)
10270     iso_code_class[i] = ISO_graphic_plane_0;
10271   for (i = 0x80; i < 0xA0; i++)
10272     iso_code_class[i] = ISO_control_1;
10273   for (i = 0xA1; i < 0xFF; i++)
10274     iso_code_class[i] = ISO_graphic_plane_1;
10275   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
10276   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
10277   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
10278   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
10279   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
10280   iso_code_class[ISO_CODE_ESC] = ISO_escape;
10281   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
10282   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
10283   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
10284
10285   for (i = 0; i < 256; i++)
10286     {
10287       emacs_mule_bytes[i] = 1;
10288     }
10289   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
10290   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
10291   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
10292   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
10293 }
10294
10295 #ifdef emacs
10296
10297 void
10298 syms_of_coding (void)
10299 {
10300   staticpro (&Vcoding_system_hash_table);
10301   {
10302     Lisp_Object args[2];
10303     args[0] = QCtest;
10304     args[1] = Qeq;
10305     Vcoding_system_hash_table = Fmake_hash_table (2, args);
10306   }
10307
10308   staticpro (&Vsjis_coding_system);
10309   Vsjis_coding_system = Qnil;
10310
10311   staticpro (&Vbig5_coding_system);
10312   Vbig5_coding_system = Qnil;
10313
10314   staticpro (&Vcode_conversion_reused_workbuf);
10315   Vcode_conversion_reused_workbuf = Qnil;
10316
10317   staticpro (&Vcode_conversion_workbuf_name);
10318   Vcode_conversion_workbuf_name = make_pure_c_string (" *code-conversion-work*");
10319
10320   reused_workbuf_in_use = 0;
10321
10322   DEFSYM (Qcharset, "charset");
10323   DEFSYM (Qtarget_idx, "target-idx");
10324   DEFSYM (Qcoding_system_history, "coding-system-history");
10325   Fset (Qcoding_system_history, Qnil);
10326
10327   /* Target FILENAME is the first argument.  */
10328   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
10329   /* Target FILENAME is the third argument.  */
10330   Fput (Qwrite_region, Qtarget_idx, make_number (2));
10331
10332   DEFSYM (Qcall_process, "call-process");
10333   /* Target PROGRAM is the first argument.  */
10334   Fput (Qcall_process, Qtarget_idx, make_number (0));
10335
10336   DEFSYM (Qcall_process_region, "call-process-region");
10337   /* Target PROGRAM is the third argument.  */
10338   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
10339
10340   DEFSYM (Qstart_process, "start-process");
10341   /* Target PROGRAM is the third argument.  */
10342   Fput (Qstart_process, Qtarget_idx, make_number (2));
10343
10344   DEFSYM (Qopen_network_stream, "open-network-stream");
10345   /* Target SERVICE is the fourth argument.  */
10346   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
10347
10348   DEFSYM (Qcoding_system, "coding-system");
10349   DEFSYM (Qcoding_aliases, "coding-aliases");
10350
10351   DEFSYM (Qeol_type, "eol-type");
10352   DEFSYM (Qunix, "unix");
10353   DEFSYM (Qdos, "dos");
10354
10355   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
10356   DEFSYM (Qpost_read_conversion, "post-read-conversion");
10357   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
10358   DEFSYM (Qdefault_char, "default-char");
10359   DEFSYM (Qundecided, "undecided");
10360   DEFSYM (Qno_conversion, "no-conversion");
10361   DEFSYM (Qraw_text, "raw-text");
10362
10363   DEFSYM (Qiso_2022, "iso-2022");
10364
10365   DEFSYM (Qutf_8, "utf-8");
10366   DEFSYM (Qutf_8_emacs, "utf-8-emacs");
10367
10368   DEFSYM (Qutf_16, "utf-16");
10369   DEFSYM (Qbig, "big");
10370   DEFSYM (Qlittle, "little");
10371
10372   DEFSYM (Qshift_jis, "shift-jis");
10373   DEFSYM (Qbig5, "big5");
10374
10375   DEFSYM (Qcoding_system_p, "coding-system-p");
10376
10377   DEFSYM (Qcoding_system_error, "coding-system-error");
10378   Fput (Qcoding_system_error, Qerror_conditions,
10379         pure_cons (Qcoding_system_error, pure_cons (Qerror, Qnil)));
10380   Fput (Qcoding_system_error, Qerror_message,
10381         make_pure_c_string ("Invalid coding system"));
10382
10383   /* Intern this now in case it isn't already done.
10384      Setting this variable twice is harmless.
10385      But don't staticpro it here--that is done in alloc.c.  */
10386   Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
10387
10388   DEFSYM (Qtranslation_table, "translation-table");
10389   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
10390   DEFSYM (Qtranslation_table_id, "translation-table-id");
10391   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
10392   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
10393
10394   DEFSYM (Qvalid_codes, "valid-codes");
10395
10396   DEFSYM (Qemacs_mule, "emacs-mule");
10397
10398   DEFSYM (QCcategory, ":category");
10399   DEFSYM (QCmnemonic, ":mnemonic");
10400   DEFSYM (QCdefault_char, ":default-char");
10401   DEFSYM (QCdecode_translation_table, ":decode-translation-table");
10402   DEFSYM (QCencode_translation_table, ":encode-translation-table");
10403   DEFSYM (QCpost_read_conversion, ":post-read-conversion");
10404   DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
10405   DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
10406
10407   Vcoding_category_table
10408     = Fmake_vector (make_number (coding_category_max), Qnil);
10409   staticpro (&Vcoding_category_table);
10410   /* Followings are target of code detection.  */
10411   ASET (Vcoding_category_table, coding_category_iso_7,
10412         intern_c_string ("coding-category-iso-7"));
10413   ASET (Vcoding_category_table, coding_category_iso_7_tight,
10414         intern_c_string ("coding-category-iso-7-tight"));
10415   ASET (Vcoding_category_table, coding_category_iso_8_1,
10416         intern_c_string ("coding-category-iso-8-1"));
10417   ASET (Vcoding_category_table, coding_category_iso_8_2,
10418         intern_c_string ("coding-category-iso-8-2"));
10419   ASET (Vcoding_category_table, coding_category_iso_7_else,
10420         intern_c_string ("coding-category-iso-7-else"));
10421   ASET (Vcoding_category_table, coding_category_iso_8_else,
10422         intern_c_string ("coding-category-iso-8-else"));
10423   ASET (Vcoding_category_table, coding_category_utf_8_auto,
10424         intern_c_string ("coding-category-utf-8-auto"));
10425   ASET (Vcoding_category_table, coding_category_utf_8_nosig,
10426         intern_c_string ("coding-category-utf-8"));
10427   ASET (Vcoding_category_table, coding_category_utf_8_sig,
10428         intern_c_string ("coding-category-utf-8-sig"));
10429   ASET (Vcoding_category_table, coding_category_utf_16_be,
10430         intern_c_string ("coding-category-utf-16-be"));
10431   ASET (Vcoding_category_table, coding_category_utf_16_auto,
10432         intern_c_string ("coding-category-utf-16-auto"));
10433   ASET (Vcoding_category_table, coding_category_utf_16_le,
10434         intern_c_string ("coding-category-utf-16-le"));
10435   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
10436         intern_c_string ("coding-category-utf-16-be-nosig"));
10437   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
10438         intern_c_string ("coding-category-utf-16-le-nosig"));
10439   ASET (Vcoding_category_table, coding_category_charset,
10440         intern_c_string ("coding-category-charset"));
10441   ASET (Vcoding_category_table, coding_category_sjis,
10442         intern_c_string ("coding-category-sjis"));
10443   ASET (Vcoding_category_table, coding_category_big5,
10444         intern_c_string ("coding-category-big5"));
10445   ASET (Vcoding_category_table, coding_category_ccl,
10446         intern_c_string ("coding-category-ccl"));
10447   ASET (Vcoding_category_table, coding_category_emacs_mule,
10448         intern_c_string ("coding-category-emacs-mule"));
10449   /* Followings are NOT target of code detection.  */
10450   ASET (Vcoding_category_table, coding_category_raw_text,
10451         intern_c_string ("coding-category-raw-text"));
10452   ASET (Vcoding_category_table, coding_category_undecided,
10453         intern_c_string ("coding-category-undecided"));
10454
10455   DEFSYM (Qinsufficient_source, "insufficient-source");
10456   DEFSYM (Qinconsistent_eol, "inconsistent-eol");
10457   DEFSYM (Qinvalid_source, "invalid-source");
10458   DEFSYM (Qinterrupted, "interrupted");
10459   DEFSYM (Qinsufficient_memory, "insufficient-memory");
10460   DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
10461
10462   defsubr (&Scoding_system_p);
10463   defsubr (&Sread_coding_system);
10464   defsubr (&Sread_non_nil_coding_system);
10465   defsubr (&Scheck_coding_system);
10466   defsubr (&Sdetect_coding_region);
10467   defsubr (&Sdetect_coding_string);
10468   defsubr (&Sfind_coding_systems_region_internal);
10469   defsubr (&Sunencodable_char_position);
10470   defsubr (&Scheck_coding_systems_region);
10471   defsubr (&Sdecode_coding_region);
10472   defsubr (&Sencode_coding_region);
10473   defsubr (&Sdecode_coding_string);
10474   defsubr (&Sencode_coding_string);
10475   defsubr (&Sdecode_sjis_char);
10476   defsubr (&Sencode_sjis_char);
10477   defsubr (&Sdecode_big5_char);
10478   defsubr (&Sencode_big5_char);
10479   defsubr (&Sset_terminal_coding_system_internal);
10480   defsubr (&Sset_safe_terminal_coding_system_internal);
10481   defsubr (&Sterminal_coding_system);
10482   defsubr (&Sset_keyboard_coding_system_internal);
10483   defsubr (&Skeyboard_coding_system);
10484   defsubr (&Sfind_operation_coding_system);
10485   defsubr (&Sset_coding_system_priority);
10486   defsubr (&Sdefine_coding_system_internal);
10487   defsubr (&Sdefine_coding_system_alias);
10488   defsubr (&Scoding_system_put);
10489   defsubr (&Scoding_system_base);
10490   defsubr (&Scoding_system_plist);
10491   defsubr (&Scoding_system_aliases);
10492   defsubr (&Scoding_system_eol_type);
10493   defsubr (&Scoding_system_priority_list);
10494
10495   DEFVAR_LISP ("coding-system-list", Vcoding_system_list,
10496                doc: /* List of coding systems.
10497
10498 Do not alter the value of this variable manually.  This variable should be
10499 updated by the functions `define-coding-system' and
10500 `define-coding-system-alias'.  */);
10501   Vcoding_system_list = Qnil;
10502
10503   DEFVAR_LISP ("coding-system-alist", Vcoding_system_alist,
10504                doc: /* Alist of coding system names.
10505 Each element is one element list of coding system name.
10506 This variable is given to `completing-read' as COLLECTION argument.
10507
10508 Do not alter the value of this variable manually.  This variable should be
10509 updated by the functions `make-coding-system' and
10510 `define-coding-system-alias'.  */);
10511   Vcoding_system_alist = Qnil;
10512
10513   DEFVAR_LISP ("coding-category-list", Vcoding_category_list,
10514                doc: /* List of coding-categories (symbols) ordered by priority.
10515
10516 On detecting a coding system, Emacs tries code detection algorithms
10517 associated with each coding-category one by one in this order.  When
10518 one algorithm agrees with a byte sequence of source text, the coding
10519 system bound to the corresponding coding-category is selected.
10520
10521 Don't modify this variable directly, but use `set-coding-system-priority'.  */);
10522   {
10523     int i;
10524
10525     Vcoding_category_list = Qnil;
10526     for (i = coding_category_max - 1; i >= 0; i--)
10527       Vcoding_category_list
10528         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
10529                  Vcoding_category_list);
10530   }
10531
10532   DEFVAR_LISP ("coding-system-for-read", Vcoding_system_for_read,
10533                doc: /* Specify the coding system for read operations.
10534 It is useful to bind this variable with `let', but do not set it globally.
10535 If the value is a coding system, it is used for decoding on read operation.
10536 If not, an appropriate element is used from one of the coding system alists.
10537 There are three such tables: `file-coding-system-alist',
10538 `process-coding-system-alist', and `network-coding-system-alist'.  */);
10539   Vcoding_system_for_read = Qnil;
10540
10541   DEFVAR_LISP ("coding-system-for-write", Vcoding_system_for_write,
10542                doc: /* Specify the coding system for write operations.
10543 Programs bind this variable with `let', but you should not set it globally.
10544 If the value is a coding system, it is used for encoding of output,
10545 when writing it to a file and when sending it to a file or subprocess.
10546
10547 If this does not specify a coding system, an appropriate element
10548 is used from one of the coding system alists.
10549 There are three such tables: `file-coding-system-alist',
10550 `process-coding-system-alist', and `network-coding-system-alist'.
10551 For output to files, if the above procedure does not specify a coding system,
10552 the value of `buffer-file-coding-system' is used.  */);
10553   Vcoding_system_for_write = Qnil;
10554
10555   DEFVAR_LISP ("last-coding-system-used", Vlast_coding_system_used,
10556                doc: /*
10557 Coding system used in the latest file or process I/O.  */);
10558   Vlast_coding_system_used = Qnil;
10559
10560   DEFVAR_LISP ("last-code-conversion-error", Vlast_code_conversion_error,
10561                doc: /*
10562 Error status of the last code conversion.
10563
10564 When an error was detected in the last code conversion, this variable
10565 is set to one of the following symbols.
10566   `insufficient-source'
10567   `inconsistent-eol'
10568   `invalid-source'
10569   `interrupted'
10570   `insufficient-memory'
10571 When no error was detected, the value doesn't change.  So, to check
10572 the error status of a code conversion by this variable, you must
10573 explicitly set this variable to nil before performing code
10574 conversion.  */);
10575   Vlast_code_conversion_error = Qnil;
10576
10577   DEFVAR_BOOL ("inhibit-eol-conversion", inhibit_eol_conversion,
10578                doc: /*
10579 *Non-nil means always inhibit code conversion of end-of-line format.
10580 See info node `Coding Systems' and info node `Text and Binary' concerning
10581 such conversion.  */);
10582   inhibit_eol_conversion = 0;
10583
10584   DEFVAR_BOOL ("inherit-process-coding-system", inherit_process_coding_system,
10585                doc: /*
10586 Non-nil means process buffer inherits coding system of process output.
10587 Bind it to t if the process output is to be treated as if it were a file
10588 read from some filesystem.  */);
10589   inherit_process_coding_system = 0;
10590
10591   DEFVAR_LISP ("file-coding-system-alist", Vfile_coding_system_alist,
10592                doc: /*
10593 Alist to decide a coding system to use for a file I/O operation.
10594 The format is ((PATTERN . VAL) ...),
10595 where PATTERN is a regular expression matching a file name,
10596 VAL is a coding system, a cons of coding systems, or a function symbol.
10597 If VAL is a coding system, it is used for both decoding and encoding
10598 the file contents.
10599 If VAL is a cons of coding systems, the car part is used for decoding,
10600 and the cdr part is used for encoding.
10601 If VAL is a function symbol, the function must return a coding system
10602 or a cons of coding systems which are used as above.  The function is
10603 called with an argument that is a list of the arguments with which
10604 `find-operation-coding-system' was called.  If the function can't decide
10605 a coding system, it can return `undecided' so that the normal
10606 code-detection is performed.
10607
10608 See also the function `find-operation-coding-system'
10609 and the variable `auto-coding-alist'.  */);
10610   Vfile_coding_system_alist = Qnil;
10611
10612   DEFVAR_LISP ("process-coding-system-alist", Vprocess_coding_system_alist,
10613                doc: /*
10614 Alist to decide a coding system to use for a process I/O operation.
10615 The format is ((PATTERN . VAL) ...),
10616 where PATTERN is a regular expression matching a program name,
10617 VAL is a coding system, a cons of coding systems, or a function symbol.
10618 If VAL is a coding system, it is used for both decoding what received
10619 from the program and encoding what sent to the program.
10620 If VAL is a cons of coding systems, the car part is used for decoding,
10621 and the cdr part is used for encoding.
10622 If VAL is a function symbol, the function must return a coding system
10623 or a cons of coding systems which are used as above.
10624
10625 See also the function `find-operation-coding-system'.  */);
10626   Vprocess_coding_system_alist = Qnil;
10627
10628   DEFVAR_LISP ("network-coding-system-alist", Vnetwork_coding_system_alist,
10629                doc: /*
10630 Alist to decide a coding system to use for a network I/O operation.
10631 The format is ((PATTERN . VAL) ...),
10632 where PATTERN is a regular expression matching a network service name
10633 or is a port number to connect to,
10634 VAL is a coding system, a cons of coding systems, or a function symbol.
10635 If VAL is a coding system, it is used for both decoding what received
10636 from the network stream and encoding what sent to the network stream.
10637 If VAL is a cons of coding systems, the car part is used for decoding,
10638 and the cdr part is used for encoding.
10639 If VAL is a function symbol, the function must return a coding system
10640 or a cons of coding systems which are used as above.
10641
10642 See also the function `find-operation-coding-system'.  */);
10643   Vnetwork_coding_system_alist = Qnil;
10644
10645   DEFVAR_LISP ("locale-coding-system", Vlocale_coding_system,
10646                doc: /* Coding system to use with system messages.
10647 Also used for decoding keyboard input on X Window system.  */);
10648   Vlocale_coding_system = Qnil;
10649
10650   /* The eol mnemonics are reset in startup.el system-dependently.  */
10651   DEFVAR_LISP ("eol-mnemonic-unix", eol_mnemonic_unix,
10652                doc: /*
10653 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
10654   eol_mnemonic_unix = make_pure_c_string (":");
10655
10656   DEFVAR_LISP ("eol-mnemonic-dos", eol_mnemonic_dos,
10657                doc: /*
10658 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
10659   eol_mnemonic_dos = make_pure_c_string ("\\");
10660
10661   DEFVAR_LISP ("eol-mnemonic-mac", eol_mnemonic_mac,
10662                doc: /*
10663 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
10664   eol_mnemonic_mac = make_pure_c_string ("/");
10665
10666   DEFVAR_LISP ("eol-mnemonic-undecided", eol_mnemonic_undecided,
10667                doc: /*
10668 *String displayed in mode line when end-of-line format is not yet determined.  */);
10669   eol_mnemonic_undecided = make_pure_c_string (":");
10670
10671   DEFVAR_LISP ("enable-character-translation", Venable_character_translation,
10672                doc: /*
10673 *Non-nil enables character translation while encoding and decoding.  */);
10674   Venable_character_translation = Qt;
10675
10676   DEFVAR_LISP ("standard-translation-table-for-decode",
10677                Vstandard_translation_table_for_decode,
10678                doc: /* Table for translating characters while decoding.  */);
10679   Vstandard_translation_table_for_decode = Qnil;
10680
10681   DEFVAR_LISP ("standard-translation-table-for-encode",
10682                Vstandard_translation_table_for_encode,
10683                doc: /* Table for translating characters while encoding.  */);
10684   Vstandard_translation_table_for_encode = Qnil;
10685
10686   DEFVAR_LISP ("charset-revision-table", Vcharset_revision_table,
10687                doc: /* Alist of charsets vs revision numbers.
10688 While encoding, if a charset (car part of an element) is found,
10689 designate it with the escape sequence identifying revision (cdr part
10690 of the element).  */);
10691   Vcharset_revision_table = Qnil;
10692
10693   DEFVAR_LISP ("default-process-coding-system",
10694                Vdefault_process_coding_system,
10695                doc: /* Cons of coding systems used for process I/O by default.
10696 The car part is used for decoding a process output,
10697 the cdr part is used for encoding a text to be sent to a process.  */);
10698   Vdefault_process_coding_system = Qnil;
10699
10700   DEFVAR_LISP ("latin-extra-code-table", Vlatin_extra_code_table,
10701                doc: /*
10702 Table of extra Latin codes in the range 128..159 (inclusive).
10703 This is a vector of length 256.
10704 If Nth element is non-nil, the existence of code N in a file
10705 \(or output of subprocess) doesn't prevent it to be detected as
10706 a coding system of ISO 2022 variant which has a flag
10707 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
10708 or reading output of a subprocess.
10709 Only 128th through 159th elements have a meaning.  */);
10710   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
10711
10712   DEFVAR_LISP ("select-safe-coding-system-function",
10713                Vselect_safe_coding_system_function,
10714                doc: /*
10715 Function to call to select safe coding system for encoding a text.
10716
10717 If set, this function is called to force a user to select a proper
10718 coding system which can encode the text in the case that a default
10719 coding system used in each operation can't encode the text.  The
10720 function should take care that the buffer is not modified while
10721 the coding system is being selected.
10722
10723 The default value is `select-safe-coding-system' (which see).  */);
10724   Vselect_safe_coding_system_function = Qnil;
10725
10726   DEFVAR_BOOL ("coding-system-require-warning",
10727                coding_system_require_warning,
10728                doc: /* Internal use only.
10729 If non-nil, on writing a file, `select-safe-coding-system-function' is
10730 called even if `coding-system-for-write' is non-nil.  The command
10731 `universal-coding-system-argument' binds this variable to t temporarily.  */);
10732   coding_system_require_warning = 0;
10733
10734
10735   DEFVAR_BOOL ("inhibit-iso-escape-detection",
10736                inhibit_iso_escape_detection,
10737                doc: /*
10738 If non-nil, Emacs ignores ISO-2022 escape sequences during code detection.
10739
10740 When Emacs reads text, it tries to detect how the text is encoded.
10741 This code detection is sensitive to escape sequences.  If Emacs sees
10742 a valid ISO-2022 escape sequence, it assumes the text is encoded in one
10743 of the ISO2022 encodings, and decodes text by the corresponding coding
10744 system (e.g. `iso-2022-7bit').
10745
10746 However, there may be a case that you want to read escape sequences in
10747 a file as is.  In such a case, you can set this variable to non-nil.
10748 Then the code detection will ignore any escape sequences, and no text is
10749 detected as encoded in some ISO-2022 encoding.  The result is that all
10750 escape sequences become visible in a buffer.
10751
10752 The default value is nil, and it is strongly recommended not to change
10753 it.  That is because many Emacs Lisp source files that contain
10754 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
10755 in Emacs's distribution, and they won't be decoded correctly on
10756 reading if you suppress escape sequence detection.
10757
10758 The other way to read escape sequences in a file without decoding is
10759 to explicitly specify some coding system that doesn't use ISO-2022
10760 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
10761   inhibit_iso_escape_detection = 0;
10762
10763   DEFVAR_BOOL ("inhibit-null-byte-detection",
10764                inhibit_null_byte_detection,
10765                doc: /* If non-nil, Emacs ignores null bytes on code detection.
10766 By default, Emacs treats it as binary data, and does not attempt to
10767 decode it.  The effect is as if you specified `no-conversion' for
10768 reading that text.
10769
10770 Set this to non-nil when a regular text happens to include null bytes.
10771 Examples are Index nodes of Info files and null-byte delimited output
10772 from GNU Find and GNU Grep.  Emacs will then ignore the null bytes and
10773 decode text as usual.  */);
10774   inhibit_null_byte_detection = 0;
10775
10776   DEFVAR_LISP ("translation-table-for-input", Vtranslation_table_for_input,
10777                doc: /* Char table for translating self-inserting characters.
10778 This is applied to the result of input methods, not their input.
10779 See also `keyboard-translate-table'.
10780
10781 Use of this variable for character code unification was rendered
10782 obsolete in Emacs 23.1 and later, since Unicode is now the basis of
10783 internal character representation.  */);
10784     Vtranslation_table_for_input = Qnil;
10785
10786   {
10787     Lisp_Object args[coding_arg_max];
10788     Lisp_Object plist[16];
10789     int i;
10790
10791     for (i = 0; i < coding_arg_max; i++)
10792       args[i] = Qnil;
10793
10794     plist[0] = intern_c_string (":name");
10795     plist[1] = args[coding_arg_name] = Qno_conversion;
10796     plist[2] = intern_c_string (":mnemonic");
10797     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
10798     plist[4] = intern_c_string (":coding-type");
10799     plist[5] = args[coding_arg_coding_type] = Qraw_text;
10800     plist[6] = intern_c_string (":ascii-compatible-p");
10801     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
10802     plist[8] = intern_c_string (":default-char");
10803     plist[9] = args[coding_arg_default_char] = make_number (0);
10804     plist[10] = intern_c_string (":for-unibyte");
10805     plist[11] = args[coding_arg_for_unibyte] = Qt;
10806     plist[12] = intern_c_string (":docstring");
10807     plist[13] = make_pure_c_string ("Do no conversion.\n\
10808 \n\
10809 When you visit a file with this coding, the file is read into a\n\
10810 unibyte buffer as is, thus each byte of a file is treated as a\n\
10811 character.");
10812     plist[14] = intern_c_string (":eol-type");
10813     plist[15] = args[coding_arg_eol_type] = Qunix;
10814     args[coding_arg_plist] = Flist (16, plist);
10815     Fdefine_coding_system_internal (coding_arg_max, args);
10816
10817     plist[1] = args[coding_arg_name] = Qundecided;
10818     plist[3] = args[coding_arg_mnemonic] = make_number ('-');
10819     plist[5] = args[coding_arg_coding_type] = Qundecided;
10820     /* This is already set.
10821        plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
10822     plist[8] = intern_c_string (":charset-list");
10823     plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
10824     plist[11] = args[coding_arg_for_unibyte] = Qnil;
10825     plist[13] = make_pure_c_string ("No conversion on encoding, automatic conversion on decoding.");
10826     plist[15] = args[coding_arg_eol_type] = Qnil;
10827     args[coding_arg_plist] = Flist (16, plist);
10828     Fdefine_coding_system_internal (coding_arg_max, args);
10829   }
10830
10831   setup_coding_system (Qno_conversion, &safe_terminal_coding);
10832
10833   {
10834     int i;
10835
10836     for (i = 0; i < coding_category_max; i++)
10837       Fset (AREF (Vcoding_category_table, i), Qno_conversion);
10838   }
10839 #if defined (DOS_NT)
10840   system_eol_type = Qdos;
10841 #else
10842   system_eol_type = Qunix;
10843 #endif
10844   staticpro (&system_eol_type);
10845 }
10846
10847 char *
10848 emacs_strerror (int error_number)
10849 {
10850   char *str;
10851
10852   synchronize_system_messages_locale ();
10853   str = strerror (error_number);
10854
10855   if (! NILP (Vlocale_coding_system))
10856     {
10857       Lisp_Object dec = code_convert_string_norecord (build_string (str),
10858                                                       Vlocale_coding_system,
10859                                                       0);
10860       str = SSDATA (dec);
10861     }
10862
10863   return str;
10864 }
10865
10866 #endif /* emacs */