src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 2001, 2002, 2003, 2004, 2005,
   3                  2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
   4    Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   5      2005, 2006, 2007, 2008, 2009, 2010
   6      National Institute of Advanced Industrial Science and Technology (AIST)
   7      Registration Number H14PRO021
   8    Copyright (C) 2003
   9      National Institute of Advanced Industrial Science and Technology (AIST)
  10      Registration Number H13PRO009
  11
  12 This file is part of GNU Emacs.
  13
  14 GNU Emacs is free software: you can redistribute it and/or modify
  15 it under the terms of the GNU General Public License as published by
  16 the Free Software Foundation, either version 3 of the License, or
  17 (at your option) any later version.
  18
  19 GNU Emacs is distributed in the hope that it will be useful,
  20 but WITHOUT ANY WARRANTY; without even the implied warranty of
  21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22 GNU General Public License for more details.
  23
  24 You should have received a copy of the GNU General Public License
  25 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  26
  27 /*** TABLE OF CONTENTS ***
  28
  29   0. General comments
  30   1. Preamble
  31   2. Emacs' internal format (emacs-utf-8) handlers
  32   3. UTF-8 handlers
  33   4. UTF-16 handlers
  34   5. Charset-base coding systems handlers
  35   6. emacs-mule (old Emacs' internal format) handlers
  36   7. ISO2022 handlers
  37   8. Shift-JIS and BIG5 handlers
  38   9. CCL handlers
  39   10. C library functions
  40   11. Emacs Lisp library functions
  41   12. Postamble
  42
  43 */
  44
  45 /*** 0. General comments ***
  46
  47
  48 CODING SYSTEM
  49
  50   A coding system is an object for an encoding mechanism that contains
  51   information about how to convert byte sequences to character
  52   sequences and vice versa.  When we say "decode", it means converting
  53   a byte sequence of a specific coding system into a character
  54   sequence that is represented by Emacs' internal coding system
  55   `emacs-utf-8', and when we say "encode", it means converting a
  56   character sequence of emacs-utf-8 to a byte sequence of a specific
  57   coding system.
  58
  59   In Emacs Lisp, a coding system is represented by a Lisp symbol.  In
  60   C level, a coding system is represented by a vector of attributes
  61   stored in the hash table Vcharset_hash_table.  The conversion from
  62   coding system symbol to attributes vector is done by looking up
  63   Vcharset_hash_table by the symbol.
  64
  65   Coding systems are classified into the following types depending on
  66   the encoding mechanism.  Here's a brief description of the types.
  67
  68   o UTF-8
  69
  70   o UTF-16
  71
  72   o Charset-base coding system
  73
  74   A coding system defined by one or more (coded) character sets.
  75   Decoding and encoding are done by a code converter defined for each
  76   character set.
  77
  78   o Old Emacs internal format (emacs-mule)
  79
  80   The coding system adopted by old versions of Emacs (20 and 21).
  81
  82   o ISO2022-base coding system
  83
  84   The most famous coding system for multiple character sets.  X's
  85   Compound Text, various EUCs (Extended Unix Code), and coding systems
  86   used in the Internet communication such as ISO-2022-JP are all
  87   variants of ISO2022.
  88
  89   o SJIS (or Shift-JIS or MS-Kanji-Code)
  90
  91   A coding system to encode character sets: ASCII, JISX0201, and
  92   JISX0208.  Widely used for PC's in Japan.  Details are described in
  93   section 8.
  94
  95   o BIG5
  96
  97   A coding system to encode character sets: ASCII and Big5.  Widely
  98   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  99   described in section 8.  In this file, when we write "big5" (all
 100   lowercase), we mean the coding system, and when we write "Big5"
 101   (capitalized), we mean the character set.
 102
 103   o CCL
 104
 105   If a user wants to decode/encode text encoded in a coding system
 106   not listed above, he can supply a decoder and an encoder for it in
 107   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 108   program while decoding/encoding.
 109
 110   o Raw-text
 111
 112   A coding system for text containing raw eight-bit data.  Emacs
 113   treats each byte of source text as a character (except for
 114   end-of-line conversion).
 115
 116   o No-conversion
 117
 118   Like raw text, but don't do end-of-line conversion.
 119
 120
 121 END-OF-LINE FORMAT
 122
 123   How text end-of-line is encoded depends on operating system.  For
 124   instance, Unix's format is just one byte of LF (line-feed) code,
 125   whereas DOS's format is two-byte sequence of `carriage-return' and
 126   `line-feed' codes.  MacOS's format is usually one byte of
 127   `carriage-return'.
 128
 129   Since text character encoding and end-of-line encoding are
 130   independent, any coding system described above can take any format
 131   of end-of-line (except for no-conversion).
 132
 133 STRUCT CODING_SYSTEM
 134
 135   Before using a coding system for code conversion (i.e. decoding and
 136   encoding), we setup a structure of type `struct coding_system'.
 137   This structure keeps various information about a specific code
 138   conversion (e.g. the location of source and destination data).
 139
 140 */
 141
 142 /* COMMON MACROS */
 143
 144
 145 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 146
 147   These functions check if a byte sequence specified as a source in
 148   CODING conforms to the format of XXX, and update the members of
 149   DETECT_INFO.
 150
 151   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 152
 153   Below is the template of these functions.  */
 154
 155 #if 0
 156 static int
 157 detect_coding_XXX (coding, detect_info)
 158      struct coding_system *coding;
 159      struct coding_detection_info *detect_info;
 160 {
 161   const unsigned char *src = coding->source;
 162   const unsigned char *src_end = coding->source + coding->src_bytes;
 163   int multibytep = coding->src_multibyte;
 164   int consumed_chars = 0;
 165   int found = 0;
 166   ...;
 167
 168   while (1)
 169     {
 170       /* Get one byte from the source.  If the souce is exausted, jump
 171          to no_more_source:.  */
 172       ONE_MORE_BYTE (c);
 173
 174       if (! __C_conforms_to_XXX___ (c))
 175         break;
 176       if (! __C_strongly_suggests_XXX__ (c))
 177         found = CATEGORY_MASK_XXX;
 178     }
 179   /* The byte sequence is invalid for XXX.  */
 180   detect_info->rejected |= CATEGORY_MASK_XXX;
 181   return 0;
 182
 183  no_more_source:
 184   /* The source exausted successfully.  */
 185   detect_info->found |= found;
 186   return 1;
 187 }
 188 #endif
 189
 190 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 191
 192   These functions decode a byte sequence specified as a source by
 193   CODING.  The resulting multibyte text goes to a place pointed to by
 194   CODING->charbuf, the length of which should not exceed
 195   CODING->charbuf_size;
 196
 197   These functions set the information of original and decoded texts in
 198   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 199   They also set CODING->result to one of CODING_RESULT_XXX indicating
 200   how the decoding is finished.
 201
 202   Below is the template of these functions.  */
 203
 204 #if 0
 205 static void
 206 decode_coding_XXXX (coding)
 207      struct coding_system *coding;
 208 {
 209   const unsigned char *src = coding->source + coding->consumed;
 210   const unsigned char *src_end = coding->source + coding->src_bytes;
 211   /* SRC_BASE remembers the start position in source in each loop.
 212      The loop will be exited when there's not enough source code, or
 213      when there's no room in CHARBUF for a decoded character.  */
 214   const unsigned char *src_base;
 215   /* A buffer to produce decoded characters.  */
 216   int *charbuf = coding->charbuf + coding->charbuf_used;
 217   int *charbuf_end = coding->charbuf + coding->charbuf_size;
 218   int multibytep = coding->src_multibyte;
 219
 220   while (1)
 221     {
 222       src_base = src;
 223       if (charbuf < charbuf_end)
 224         /* No more room to produce a decoded character.  */
 225         break;
 226       ONE_MORE_BYTE (c);
 227       /* Decode it. */
 228     }
 229
 230  no_more_source:
 231   if (src_base < src_end
 232       && coding->mode & CODING_MODE_LAST_BLOCK)
 233     /* If the source ends by partial bytes to construct a character,
 234        treat them as eight-bit raw data.  */
 235     while (src_base < src_end && charbuf < charbuf_end)
 236       *charbuf++ = *src_base++;
 237   /* Remember how many bytes and characters we consumed.  If the
 238      source is multibyte, the bytes and chars are not identical.  */
 239   coding->consumed = coding->consumed_char = src_base - coding->source;
 240   /* Remember how many characters we produced.  */
 241   coding->charbuf_used = charbuf - coding->charbuf;
 242 }
 243 #endif
 244
 245 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 246
 247   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 248   internal multibyte format by CODING.  The resulting byte sequence
 249   goes to a place pointed to by DESTINATION, the length of which
 250   should not exceed DST_BYTES.
 251
 252   These functions set the information of original and encoded texts in
 253   the members produced, produced_char, consumed, and consumed_char of
 254   the structure *CODING.  They also set the member result to one of
 255   CODING_RESULT_XXX indicating how the encoding finished.
 256
 257   DST_BYTES zero means that source area and destination area are
 258   overlapped, which means that we can produce a encoded text until it
 259   reaches at the head of not-yet-encoded source text.
 260
 261   Below is a template of these functions.  */
 262 #if 0
 263 static void
 264 encode_coding_XXX (coding)
 265      struct coding_system *coding;
 266 {
 267   int multibytep = coding->dst_multibyte;
 268   int *charbuf = coding->charbuf;
 269   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 270   unsigned char *dst = coding->destination + coding->produced;
 271   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 272   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 273   int produced_chars = 0;
 274
 275   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 276     {
 277       int c = *charbuf;
 278       /* Encode C into DST, and increment DST.  */
 279     }
 280  label_no_more_destination:
 281   /* How many chars and bytes we produced.  */
 282   coding->produced_char += produced_chars;
 283   coding->produced = dst - coding->destination;
 284 }
 285 #endif
 286
 287 \f
 288 /*** 1. Preamble ***/
 289
 290 #include <config.h>
 291 #include <stdio.h>
 292 #include <setjmp.h>
 293
 294 #include "lisp.h"
 295 #include "buffer.h"
 296 #include "character.h"
 297 #include "charset.h"
 298 #include "ccl.h"
 299 #include "composite.h"
 300 #include "coding.h"
 301 #include "window.h"
 302 #include "frame.h"
 303 #include "termhooks.h"
 304
 305 Lisp_Object Vcoding_system_hash_table;
 306
 307 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
 308 Lisp_Object Qunix, Qdos;
 309 extern Lisp_Object Qmac;        /* frame.c */
 310 Lisp_Object Qbuffer_file_coding_system;
 311 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 312 Lisp_Object Qdefault_char;
 313 Lisp_Object Qno_conversion, Qundecided;
 314 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
 315 Lisp_Object Qbig, Qlittle;
 316 Lisp_Object Qcoding_system_history;
 317 Lisp_Object Qvalid_codes;
 318 Lisp_Object QCcategory, QCmnemonic, QCdefault_char;
 319 Lisp_Object QCdecode_translation_table, QCencode_translation_table;
 320 Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
 321 Lisp_Object QCascii_compatible_p;
 322
 323 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 324 Lisp_Object Qcall_process, Qcall_process_region;
 325 Lisp_Object Qstart_process, Qopen_network_stream;
 326 Lisp_Object Qtarget_idx;
 327
 328 Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
 329 Lisp_Object Qinterrupted, Qinsufficient_memory;
 330
 331 extern Lisp_Object Qcompletion_ignore_case;
 332
 333 /* If a symbol has this property, evaluate the value to define the
 334    symbol as a coding system.  */
 335 static Lisp_Object Qcoding_system_define_form;
 336
 337 int coding_system_require_warning;
 338
 339 Lisp_Object Vselect_safe_coding_system_function;
 340
 341 /* Mnemonic string for each format of end-of-line.  */
 342 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 343 /* Mnemonic string to indicate format of end-of-line is not yet
 344    decided.  */
 345 Lisp_Object eol_mnemonic_undecided;
 346
 347 /* Format of end-of-line decided by system.  This is Qunix on
 348    Unix and Mac, Qdos on DOS/Windows.
 349    This has an effect only for external encoding (i.e. for output to
 350    file and process), not for in-buffer or Lisp string encoding.  */
 351 static Lisp_Object system_eol_type;
 352
 353 #ifdef emacs
 354
 355 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 356
 357 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 358
 359 /* Coding system emacs-mule and raw-text are for converting only
 360    end-of-line format.  */
 361 Lisp_Object Qemacs_mule, Qraw_text;
 362 Lisp_Object Qutf_8_emacs;
 363
 364 /* Coding-systems are handed between Emacs Lisp programs and C internal
 365    routines by the following three variables.  */
 366 /* Coding-system for reading files and receiving data from process.  */
 367 Lisp_Object Vcoding_system_for_read;
 368 /* Coding-system for writing files and sending data to process.  */
 369 Lisp_Object Vcoding_system_for_write;
 370 /* Coding-system actually used in the latest I/O.  */
 371 Lisp_Object Vlast_coding_system_used;
 372 /* Set to non-nil when an error is detected while code conversion.  */
 373 Lisp_Object Vlast_code_conversion_error;
 374 /* A vector of length 256 which contains information about special
 375    Latin codes (especially for dealing with Microsoft codes).  */
 376 Lisp_Object Vlatin_extra_code_table;
 377
 378 /* Flag to inhibit code conversion of end-of-line format.  */
 379 int inhibit_eol_conversion;
 380
 381 /* Flag to inhibit ISO2022 escape sequence detection.  */
 382 int inhibit_iso_escape_detection;
 383
 384 /* Flag to inhibit detection of binary files through null bytes.  */
 385 int inhibit_null_byte_detection;
 386
 387 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 388 int inherit_process_coding_system;
 389
 390 /* Coding system to be used to encode text for terminal display when
 391    terminal coding system is nil.  */
 392 struct coding_system safe_terminal_coding;
 393
 394 Lisp_Object Vfile_coding_system_alist;
 395 Lisp_Object Vprocess_coding_system_alist;
 396 Lisp_Object Vnetwork_coding_system_alist;
 397
 398 Lisp_Object Vlocale_coding_system;
 399
 400 #endif /* emacs */
 401
 402 /* Flag to tell if we look up translation table on character code
 403    conversion.  */
 404 Lisp_Object Venable_character_translation;
 405 /* Standard translation table to look up on decoding (reading).  */
 406 Lisp_Object Vstandard_translation_table_for_decode;
 407 /* Standard translation table to look up on encoding (writing).  */
 408 Lisp_Object Vstandard_translation_table_for_encode;
 409
 410 Lisp_Object Qtranslation_table;
 411 Lisp_Object Qtranslation_table_id;
 412 Lisp_Object Qtranslation_table_for_decode;
 413 Lisp_Object Qtranslation_table_for_encode;
 414
 415 /* Alist of charsets vs revision number.  */
 416 static Lisp_Object Vcharset_revision_table;
 417
 418 /* Default coding systems used for process I/O.  */
 419 Lisp_Object Vdefault_process_coding_system;
 420
 421 /* Char table for translating Quail and self-inserting input.  */
 422 Lisp_Object Vtranslation_table_for_input;
 423
 424 /* Two special coding systems.  */
 425 Lisp_Object Vsjis_coding_system;
 426 Lisp_Object Vbig5_coding_system;
 427
 428 /* ISO2022 section */
 429
 430 #define CODING_ISO_INITIAL(coding, reg)                 \
 431   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 432                      coding_attr_iso_initial),          \
 433                reg)))
 434
 435
 436 #define CODING_ISO_REQUEST(coding, charset_id)          \
 437   (((charset_id) <= (coding)->max_charset_id            \
 438     ? ((coding)->safe_charsets[charset_id] != 255       \
 439        ? (coding)->safe_charsets[charset_id]            \
 440        : -1)                                            \
 441     : -1))
 442
 443
 444 #define CODING_ISO_FLAGS(coding)        \
 445   ((coding)->spec.iso_2022.flags)
 446 #define CODING_ISO_DESIGNATION(coding, reg)     \
 447   ((coding)->spec.iso_2022.current_designation[reg])
 448 #define CODING_ISO_INVOCATION(coding, plane)    \
 449   ((coding)->spec.iso_2022.current_invocation[plane])
 450 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 451   ((coding)->spec.iso_2022.single_shifting)
 452 #define CODING_ISO_BOL(coding)  \
 453   ((coding)->spec.iso_2022.bol)
 454 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 455   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 456 #define CODING_ISO_CMP_STATUS(coding)   \
 457   (&(coding)->spec.iso_2022.cmp_status)
 458 #define CODING_ISO_EXTSEGMENT_LEN(coding)       \
 459   ((coding)->spec.iso_2022.ctext_extended_segment_len)
 460 #define CODING_ISO_EMBEDDED_UTF_8(coding)       \
 461   ((coding)->spec.iso_2022.embedded_utf_8)
 462
 463 /* Control characters of ISO2022.  */
 464                         /* code */      /* function */
 465 #define ISO_CODE_LF     0x0A            /* line-feed */
 466 #define ISO_CODE_CR     0x0D            /* carriage-return */
 467 #define ISO_CODE_SO     0x0E            /* shift-out */
 468 #define ISO_CODE_SI     0x0F            /* shift-in */
 469 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 470 #define ISO_CODE_ESC    0x1B            /* escape */
 471 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 472 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 473 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 474
 475 /* All code (1-byte) of ISO2022 is classified into one of the
 476    followings.  */
 477 enum iso_code_class_type
 478   {
 479     ISO_control_0,              /* Control codes in the range
 480                                    0x00..0x1F and 0x7F, except for the
 481                                    following 5 codes.  */
 482     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 483     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 484     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 485     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 486     ISO_control_1,              /* Control codes in the range
 487                                    0x80..0x9F, except for the
 488                                    following 3 codes.  */
 489     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 490     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 491     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 492     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 493     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 494     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 495     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 496   };
 497
 498 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 499     `iso-flags' attribute of an iso2022 coding system.  */
 500
 501 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 502    instead of the correct short-form sequence (e.g. ESC $ A).  */
 503 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 504
 505 /* If set, reset graphic planes and registers at end-of-line to the
 506    initial state.  */
 507 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 508
 509 /* If set, reset graphic planes and registers before any control
 510    characters to the initial state.  */
 511 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 512
 513 /* If set, encode by 7-bit environment.  */
 514 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 515
 516 /* If set, use locking-shift function.  */
 517 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 518
 519 /* If set, use single-shift function.  Overwrite
 520    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 521 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 522
 523 /* If set, use designation escape sequence.  */
 524 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 525
 526 /* If set, produce revision number sequence.  */
 527 #define CODING_ISO_FLAG_REVISION        0x0080
 528
 529 /* If set, produce ISO6429's direction specifying sequence.  */
 530 #define CODING_ISO_FLAG_DIRECTION       0x0100
 531
 532 /* If set, assume designation states are reset at beginning of line on
 533    output.  */
 534 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 535
 536 /* If set, designation sequence should be placed at beginning of line
 537    on output.  */
 538 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 539
 540 /* If set, do not encode unsafe charactes on output.  */
 541 #define CODING_ISO_FLAG_SAFE            0x0800
 542
 543 /* If set, extra latin codes (128..159) are accepted as a valid code
 544    on input.  */
 545 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 546
 547 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 548
 549 #define CODING_ISO_FLAG_EUC_TW_SHIFT    0x4000
 550
 551 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 552
 553 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 554
 555 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 556
 557 /* A character to be produced on output if encoding of the original
 558    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 559 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 560
 561 /* UTF-8 section */
 562 #define CODING_UTF_8_BOM(coding)        \
 563   ((coding)->spec.utf_8_bom)
 564
 565 /* UTF-16 section */
 566 #define CODING_UTF_16_BOM(coding)       \
 567   ((coding)->spec.utf_16.bom)
 568
 569 #define CODING_UTF_16_ENDIAN(coding)    \
 570   ((coding)->spec.utf_16.endian)
 571
 572 #define CODING_UTF_16_SURROGATE(coding) \
 573   ((coding)->spec.utf_16.surrogate)
 574
 575
 576 /* CCL section */
 577 #define CODING_CCL_DECODER(coding)      \
 578   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 579 #define CODING_CCL_ENCODER(coding)      \
 580   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 581 #define CODING_CCL_VALIDS(coding)                                          \
 582   (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
 583
 584 /* Index for each coding category in `coding_categories' */
 585
 586 enum coding_category
 587   {
 588     coding_category_iso_7,
 589     coding_category_iso_7_tight,
 590     coding_category_iso_8_1,
 591     coding_category_iso_8_2,
 592     coding_category_iso_7_else,
 593     coding_category_iso_8_else,
 594     coding_category_utf_8_auto,
 595     coding_category_utf_8_nosig,
 596     coding_category_utf_8_sig,
 597     coding_category_utf_16_auto,
 598     coding_category_utf_16_be,
 599     coding_category_utf_16_le,
 600     coding_category_utf_16_be_nosig,
 601     coding_category_utf_16_le_nosig,
 602     coding_category_charset,
 603     coding_category_sjis,
 604     coding_category_big5,
 605     coding_category_ccl,
 606     coding_category_emacs_mule,
 607     /* All above are targets of code detection.  */
 608     coding_category_raw_text,
 609     coding_category_undecided,
 610     coding_category_max
 611   };
 612
 613 /* Definitions of flag bits used in detect_coding_XXXX.  */
 614 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 615 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 616 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 617 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 618 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 619 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 620 #define CATEGORY_MASK_UTF_8_AUTO        (1 << coding_category_utf_8_auto)
 621 #define CATEGORY_MASK_UTF_8_NOSIG       (1 << coding_category_utf_8_nosig)
 622 #define CATEGORY_MASK_UTF_8_SIG         (1 << coding_category_utf_8_sig)
 623 #define CATEGORY_MASK_UTF_16_AUTO       (1 << coding_category_utf_16_auto)
 624 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 625 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 626 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 627 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 628 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 629 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 630 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 631 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 632 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 633 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 634
 635 /* This value is returned if detect_coding_mask () find nothing other
 636    than ASCII characters.  */
 637 #define CATEGORY_MASK_ANY               \
 638   (CATEGORY_MASK_ISO_7                  \
 639    | CATEGORY_MASK_ISO_7_TIGHT          \
 640    | CATEGORY_MASK_ISO_8_1              \
 641    | CATEGORY_MASK_ISO_8_2              \
 642    | CATEGORY_MASK_ISO_7_ELSE           \
 643    | CATEGORY_MASK_ISO_8_ELSE           \
 644    | CATEGORY_MASK_UTF_8_AUTO           \
 645    | CATEGORY_MASK_UTF_8_NOSIG          \
 646    | CATEGORY_MASK_UTF_8_SIG            \
 647    | CATEGORY_MASK_UTF_16_AUTO          \
 648    | CATEGORY_MASK_UTF_16_BE            \
 649    | CATEGORY_MASK_UTF_16_LE            \
 650    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 651    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 652    | CATEGORY_MASK_CHARSET              \
 653    | CATEGORY_MASK_SJIS                 \
 654    | CATEGORY_MASK_BIG5                 \
 655    | CATEGORY_MASK_CCL                  \
 656    | CATEGORY_MASK_EMACS_MULE)
 657
 658
 659 #define CATEGORY_MASK_ISO_7BIT \
 660   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 661
 662 #define CATEGORY_MASK_ISO_8BIT \
 663   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 664
 665 #define CATEGORY_MASK_ISO_ELSE \
 666   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 667
 668 #define CATEGORY_MASK_ISO_ESCAPE        \
 669   (CATEGORY_MASK_ISO_7                  \
 670    | CATEGORY_MASK_ISO_7_TIGHT          \
 671    | CATEGORY_MASK_ISO_7_ELSE           \
 672    | CATEGORY_MASK_ISO_8_ELSE)
 673
 674 #define CATEGORY_MASK_ISO       \
 675   (  CATEGORY_MASK_ISO_7BIT     \
 676      | CATEGORY_MASK_ISO_8BIT   \
 677      | CATEGORY_MASK_ISO_ELSE)
 678
 679 #define CATEGORY_MASK_UTF_16            \
 680   (CATEGORY_MASK_UTF_16_AUTO            \
 681    | CATEGORY_MASK_UTF_16_BE            \
 682    | CATEGORY_MASK_UTF_16_LE            \
 683    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 684    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 685
 686 #define CATEGORY_MASK_UTF_8     \
 687   (CATEGORY_MASK_UTF_8_AUTO     \
 688    | CATEGORY_MASK_UTF_8_NOSIG  \
 689    | CATEGORY_MASK_UTF_8_SIG)
 690
 691 /* List of symbols `coding-category-xxx' ordered by priority.  This
 692    variable is exposed to Emacs Lisp.  */
 693 static Lisp_Object Vcoding_category_list;
 694
 695 /* Table of coding categories (Lisp symbols).  This variable is for
 696    internal use oly.  */
 697 static Lisp_Object Vcoding_category_table;
 698
 699 /* Table of coding-categories ordered by priority.  */
 700 static enum coding_category coding_priorities[coding_category_max];
 701
 702 /* Nth element is a coding context for the coding system bound to the
 703    Nth coding category.  */
 704 static struct coding_system coding_categories[coding_category_max];
 705
 706 /*** Commonly used macros and functions ***/
 707
 708 #ifndef min
 709 #define min(a, b) ((a) < (b) ? (a) : (b))
 710 #endif
 711 #ifndef max
 712 #define max(a, b) ((a) > (b) ? (a) : (b))
 713 #endif
 714
 715 #define CODING_GET_INFO(coding, attrs, charset_list)    \
 716   do {                                                  \
 717     (attrs) = CODING_ID_ATTRS ((coding)->id);           \
 718     (charset_list) = CODING_ATTR_CHARSET_LIST (attrs);  \
 719   } while (0)
 720
 721
 722 /* Safely get one byte from the source text pointed by SRC which ends
 723    at SRC_END, and set C to that byte.  If there are not enough bytes
 724    in the source, it jumps to `no_more_source'.  If multibytep is
 725    nonzero, and a multibyte character is found at SRC, set C to the
 726    negative value of the character code.  The caller should declare
 727    and set these variables appropriately in advance:
 728         src, src_end, multibytep */
 729
 730 #define ONE_MORE_BYTE(c)                                \
 731   do {                                                  \
 732     if (src == src_end)                                 \
 733       {                                                 \
 734         if (src_base < src)                             \
 735           record_conversion_result                      \
 736             (coding, CODING_RESULT_INSUFFICIENT_SRC);   \
 737         goto no_more_source;                            \
 738       }                                                 \
 739     c = *src++;                                         \
 740     if (multibytep && (c & 0x80))                       \
 741       {                                                 \
 742         if ((c & 0xFE) == 0xC0)                         \
 743           c = ((c & 1) << 6) | *src++;                  \
 744         else                                            \
 745           {                                             \
 746             src--;                                      \
 747             c = - string_char (src, &src, NULL);        \
 748             record_conversion_result                    \
 749               (coding, CODING_RESULT_INVALID_SRC);      \
 750           }                                             \
 751       }                                                 \
 752     consumed_chars++;                                   \
 753   } while (0)
 754
 755 /* Safely get two bytes from the source text pointed by SRC which ends
 756    at SRC_END, and set C1 and C2 to those bytes while skipping the
 757    heading multibyte characters.  If there are not enough bytes in the
 758    source, it jumps to `no_more_source'.  If multibytep is nonzero and
 759    a multibyte character is found for C2, set C2 to the negative value
 760    of the character code.  The caller should declare and set these
 761    variables appropriately in advance:
 762         src, src_end, multibytep
 763    It is intended that this macro is used in detect_coding_utf_16.  */
 764
 765 #define TWO_MORE_BYTES(c1, c2)                          \
 766   do {                                                  \
 767     do {                                                \
 768       if (src == src_end)                               \
 769         goto no_more_source;                            \
 770       c1 = *src++;                                      \
 771       if (multibytep && (c1 & 0x80))                    \
 772         {                                               \
 773           if ((c1 & 0xFE) == 0xC0)                      \
 774             c1 = ((c1 & 1) << 6) | *src++;              \
 775           else                                          \
 776             {                                           \
 777               src += BYTES_BY_CHAR_HEAD (c1) - 1;       \
 778               c1 = -1;                                  \
 779             }                                           \
 780         }                                               \
 781     } while (c1 < 0);                                   \
 782     if (src == src_end)                                 \
 783       goto no_more_source;                              \
 784     c2 = *src++;                                        \
 785     if (multibytep && (c2 & 0x80))                      \
 786       {                                                 \
 787         if ((c2 & 0xFE) == 0xC0)                        \
 788           c2 = ((c2 & 1) << 6) | *src++;                \
 789         else                                            \
 790           c2 = -1;                                      \
 791       }                                                 \
 792   } while (0)
 793
 794
 795 #define ONE_MORE_BYTE_NO_CHECK(c)                       \
 796   do {                                                  \
 797     c = *src++;                                         \
 798     if (multibytep && (c & 0x80))                       \
 799       {                                                 \
 800         if ((c & 0xFE) == 0xC0)                         \
 801           c = ((c & 1) << 6) | *src++;                  \
 802         else                                            \
 803           {                                             \
 804             src--;                                      \
 805             c = - string_char (src, &src, NULL);        \
 806             record_conversion_result                    \
 807               (coding, CODING_RESULT_INVALID_SRC);      \
 808           }                                             \
 809       }                                                 \
 810     consumed_chars++;                                   \
 811   } while (0)
 812
 813
 814 /* Store a byte C in the place pointed by DST and increment DST to the
 815    next free point, and increment PRODUCED_CHARS.  The caller should
 816    assure that C is 0..127, and declare and set the variable `dst'
 817    appropriately in advance.
 818 */
 819
 820
 821 #define EMIT_ONE_ASCII_BYTE(c)  \
 822   do {                          \
 823     produced_chars++;           \
 824     *dst++ = (c);               \
 825   } while (0)
 826
 827
 828 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2.  */
 829
 830 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 831   do {                                  \
 832     produced_chars += 2;                \
 833     *dst++ = (c1), *dst++ = (c2);       \
 834   } while (0)
 835
 836
 837 /* Store a byte C in the place pointed by DST and increment DST to the
 838    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 839    nonzero, store in an appropriate multibyte from.  The caller should
 840    declare and set the variables `dst' and `multibytep' appropriately
 841    in advance.  */
 842
 843 #define EMIT_ONE_BYTE(c)                \
 844   do {                                  \
 845     produced_chars++;                   \
 846     if (multibytep)                     \
 847       {                                 \
 848         int ch = (c);                   \
 849         if (ch >= 0x80)                 \
 850           ch = BYTE8_TO_CHAR (ch);      \
 851         CHAR_STRING_ADVANCE (ch, dst);  \
 852       }                                 \
 853     else                                \
 854       *dst++ = (c);                     \
 855   } while (0)
 856
 857
 858 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 859
 860 #define EMIT_TWO_BYTES(c1, c2)          \
 861   do {                                  \
 862     produced_chars += 2;                \
 863     if (multibytep)                     \
 864       {                                 \
 865         int ch;                         \
 866                                         \
 867         ch = (c1);                      \
 868         if (ch >= 0x80)                 \
 869           ch = BYTE8_TO_CHAR (ch);      \
 870         CHAR_STRING_ADVANCE (ch, dst);  \
 871         ch = (c2);                      \
 872         if (ch >= 0x80)                 \
 873           ch = BYTE8_TO_CHAR (ch);      \
 874         CHAR_STRING_ADVANCE (ch, dst);  \
 875       }                                 \
 876     else                                \
 877       {                                 \
 878         *dst++ = (c1);                  \
 879         *dst++ = (c2);                  \
 880       }                                 \
 881   } while (0)
 882
 883
 884 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 885   do {                                  \
 886     EMIT_ONE_BYTE (c1);                 \
 887     EMIT_TWO_BYTES (c2, c3);            \
 888   } while (0)
 889
 890
 891 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 892   do {                                          \
 893     EMIT_TWO_BYTES (c1, c2);                    \
 894     EMIT_TWO_BYTES (c3, c4);                    \
 895   } while (0)
 896
 897
 898 /* Prototypes for static functions.  */
 899 static void record_conversion_result P_ ((struct coding_system *coding,
 900                                           enum coding_result_code result));
 901 static int detect_coding_utf_8 P_ ((struct coding_system *,
 902                                     struct coding_detection_info *info));
 903 static void decode_coding_utf_8 P_ ((struct coding_system *));
 904 static int encode_coding_utf_8 P_ ((struct coding_system *));
 905
 906 static int detect_coding_utf_16 P_ ((struct coding_system *,
 907                                      struct coding_detection_info *info));
 908 static void decode_coding_utf_16 P_ ((struct coding_system *));
 909 static int encode_coding_utf_16 P_ ((struct coding_system *));
 910
 911 static int detect_coding_iso_2022 P_ ((struct coding_system *,
 912                                        struct coding_detection_info *info));
 913 static void decode_coding_iso_2022 P_ ((struct coding_system *));
 914 static int encode_coding_iso_2022 P_ ((struct coding_system *));
 915
 916 static int detect_coding_emacs_mule P_ ((struct coding_system *,
 917                                          struct coding_detection_info *info));
 918 static void decode_coding_emacs_mule P_ ((struct coding_system *));
 919 static int encode_coding_emacs_mule P_ ((struct coding_system *));
 920
 921 static int detect_coding_sjis P_ ((struct coding_system *,
 922                                    struct coding_detection_info *info));
 923 static void decode_coding_sjis P_ ((struct coding_system *));
 924 static int encode_coding_sjis P_ ((struct coding_system *));
 925
 926 static int detect_coding_big5 P_ ((struct coding_system *,
 927                                    struct coding_detection_info *info));
 928 static void decode_coding_big5 P_ ((struct coding_system *));
 929 static int encode_coding_big5 P_ ((struct coding_system *));
 930
 931 static int detect_coding_ccl P_ ((struct coding_system *,
 932                                   struct coding_detection_info *info));
 933 static void decode_coding_ccl P_ ((struct coding_system *));
 934 static int encode_coding_ccl P_ ((struct coding_system *));
 935
 936 static void decode_coding_raw_text P_ ((struct coding_system *));
 937 static int encode_coding_raw_text P_ ((struct coding_system *));
 938
 939 static void coding_set_source P_ ((struct coding_system *));
 940 static void coding_set_destination P_ ((struct coding_system *));
 941 static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT));
 942 static void coding_alloc_by_making_gap P_ ((struct coding_system *,
 943                                             EMACS_INT, EMACS_INT));
 944 static unsigned char *alloc_destination P_ ((struct coding_system *,
 945                                              EMACS_INT, unsigned char *));
 946 static void setup_iso_safe_charsets P_ ((Lisp_Object));
 947 static unsigned char *encode_designation_at_bol P_ ((struct coding_system *,
 948                                                      int *, int *,
 949                                                      unsigned char *));
 950 static int detect_eol P_ ((const unsigned char *,
 951                            EMACS_INT, enum coding_category));
 952 static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int));
 953 static void decode_eol P_ ((struct coding_system *));
 954 static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *));
 955 static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *));
 956 static int produce_chars P_ ((struct coding_system *, Lisp_Object, int));
 957 static INLINE void produce_charset P_ ((struct coding_system *, int *,
 958                                         EMACS_INT));
 959 static void produce_annotation P_ ((struct coding_system *, EMACS_INT));
 960 static int decode_coding P_ ((struct coding_system *));
 961 static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT,
 962                                                       struct coding_system *,
 963                                                       int *, EMACS_INT *));
 964 static INLINE int *handle_charset_annotation P_ ((EMACS_INT, EMACS_INT,
 965                                                   struct coding_system *,
 966                                                   int *, EMACS_INT *));
 967 static void consume_chars P_ ((struct coding_system *, Lisp_Object, int));
 968 static int encode_coding P_ ((struct coding_system *));
 969 static Lisp_Object make_conversion_work_buffer P_ ((int));
 970 static Lisp_Object code_conversion_restore P_ ((Lisp_Object));
 971 static INLINE int char_encodable_p P_ ((int, Lisp_Object));
 972 static Lisp_Object make_subsidiaries P_ ((Lisp_Object));
 973
 974 static void
 975 record_conversion_result (struct coding_system *coding,
 976                           enum coding_result_code result)
 977 {
 978   coding->result = result;
 979   switch (result)
 980     {
 981     case CODING_RESULT_INSUFFICIENT_SRC:
 982       Vlast_code_conversion_error = Qinsufficient_source;
 983       break;
 984     case CODING_RESULT_INCONSISTENT_EOL:
 985       Vlast_code_conversion_error = Qinconsistent_eol;
 986       break;
 987     case CODING_RESULT_INVALID_SRC:
 988       Vlast_code_conversion_error = Qinvalid_source;
 989       break;
 990     case CODING_RESULT_INTERRUPT:
 991       Vlast_code_conversion_error = Qinterrupted;
 992       break;
 993     case CODING_RESULT_INSUFFICIENT_MEM:
 994       Vlast_code_conversion_error = Qinsufficient_memory;
 995       break;
 996     case CODING_RESULT_INSUFFICIENT_DST:
 997       /* Don't record this error in Vlast_code_conversion_error
 998          because it happens just temporarily and is resolved when the
 999          whole conversion is finished.  */
1000       break;
1001     case CODING_RESULT_SUCCESS:
1002       break;
1003     default:
1004       Vlast_code_conversion_error = intern ("Unknown error");
1005     }
1006 }
1007
1008 /* This wrapper macro is used to preserve validity of pointers into
1009    buffer text across calls to decode_char, which could cause
1010    relocation of buffers if it loads a charset map, because loading a
1011    charset map allocates large structures.  */
1012 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
1013   do {                                                                       \
1014     charset_map_loaded = 0;                                                  \
1015     c = DECODE_CHAR (charset, code);                                         \
1016     if (charset_map_loaded)                                                  \
1017       {                                                                      \
1018         const unsigned char *orig = coding->source;                          \
1019         EMACS_INT offset;                                                    \
1020                                                                              \
1021         coding_set_source (coding);                                          \
1022         offset = coding->source - orig;                                      \
1023         src += offset;                                                       \
1024         src_base += offset;                                                  \
1025         src_end += offset;                                                   \
1026       }                                                                      \
1027   } while (0)
1028
1029
1030 /* If there are at least BYTES length of room at dst, allocate memory
1031    for coding->destination and update dst and dst_end.  We don't have
1032    to take care of coding->source which will be relocated.  It is
1033    handled by calling coding_set_source in encode_coding.  */
1034
1035 #define ASSURE_DESTINATION(bytes)                               \
1036   do {                                                          \
1037     if (dst + (bytes) >= dst_end)                               \
1038       {                                                         \
1039         int more_bytes = charbuf_end - charbuf + (bytes);       \
1040                                                                 \
1041         dst = alloc_destination (coding, more_bytes, dst);      \
1042         dst_end = coding->destination + coding->dst_bytes;      \
1043       }                                                         \
1044   } while (0)
1045
1046
1047 /* Store multibyte form of the character C in P, and advance P to the
1048    end of the multibyte form.  This is like CHAR_STRING_ADVANCE but it
1049    never calls MAYBE_UNIFY_CHAR.  */
1050
1051 #define CHAR_STRING_ADVANCE_NO_UNIFY(c, p)      \
1052   do {                                          \
1053     if ((c) <= MAX_1_BYTE_CHAR)                 \
1054       *(p)++ = (c);                             \
1055     else if ((c) <= MAX_2_BYTE_CHAR)            \
1056       *(p)++ = (0xC0 | ((c) >> 6)),             \
1057         *(p)++ = (0x80 | ((c) & 0x3F));         \
1058     else if ((c) <= MAX_3_BYTE_CHAR)            \
1059       *(p)++ = (0xE0 | ((c) >> 12)),            \
1060         *(p)++ = (0x80 | (((c) >> 6) & 0x3F)),  \
1061         *(p)++ = (0x80 | ((c) & 0x3F));         \
1062     else if ((c) <= MAX_4_BYTE_CHAR)            \
1063       *(p)++ = (0xF0 | (c >> 18)),              \
1064         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1065         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1066         *(p)++ = (0x80 | (c & 0x3F));           \
1067     else if ((c) <= MAX_5_BYTE_CHAR)            \
1068       *(p)++ = 0xF8,                            \
1069         *(p)++ = (0x80 | ((c >> 18) & 0x0F)),   \
1070         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1071         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1072         *(p)++ = (0x80 | (c & 0x3F));           \
1073     else                                        \
1074       (p) += BYTE8_STRING ((c) - 0x3FFF80, p);  \
1075   } while (0)
1076
1077
1078 /* Return the character code of character whose multibyte form is at
1079    P, and advance P to the end of the multibyte form.  This is like
1080    STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR.  */
1081
1082 #define STRING_CHAR_ADVANCE_NO_UNIFY(p)                         \
1083   (!((p)[0] & 0x80)                                             \
1084    ? *(p)++                                                     \
1085    : ! ((p)[0] & 0x20)                                          \
1086    ? ((p) += 2,                                                 \
1087       ((((p)[-2] & 0x1F) << 6)                                  \
1088        | ((p)[-1] & 0x3F)                                       \
1089        | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))    \
1090    : ! ((p)[0] & 0x10)                                          \
1091    ? ((p) += 3,                                                 \
1092       ((((p)[-3] & 0x0F) << 12)                                 \
1093        | (((p)[-2] & 0x3F) << 6)                                \
1094        | ((p)[-1] & 0x3F)))                                     \
1095    : ! ((p)[0] & 0x08)                                          \
1096    ? ((p) += 4,                                                 \
1097       ((((p)[-4] & 0xF) << 18)                                  \
1098        | (((p)[-3] & 0x3F) << 12)                               \
1099        | (((p)[-2] & 0x3F) << 6)                                \
1100        | ((p)[-1] & 0x3F)))                                     \
1101    : ((p) += 5,                                                 \
1102       ((((p)[-4] & 0x3F) << 18)                                 \
1103        | (((p)[-3] & 0x3F) << 12)                               \
1104        | (((p)[-2] & 0x3F) << 6)                                \
1105        | ((p)[-1] & 0x3F))))
1106
1107
1108 static void
1109 coding_set_source (coding)
1110      struct coding_system *coding;
1111 {
1112   if (BUFFERP (coding->src_object))
1113     {
1114       struct buffer *buf = XBUFFER (coding->src_object);
1115
1116       if (coding->src_pos < 0)
1117         coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
1118       else
1119         coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
1120     }
1121   else if (STRINGP (coding->src_object))
1122     {
1123       coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
1124     }
1125   else
1126     /* Otherwise, the source is C string and is never relocated
1127        automatically.  Thus we don't have to update anything.  */
1128     ;
1129 }
1130
1131 static void
1132 coding_set_destination (coding)
1133      struct coding_system *coding;
1134 {
1135   if (BUFFERP (coding->dst_object))
1136     {
1137       if (coding->src_pos < 0)
1138         {
1139           coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
1140           coding->dst_bytes = (GAP_END_ADDR
1141                                - (coding->src_bytes - coding->consumed)
1142                                - coding->destination);
1143         }
1144       else
1145         {
1146           /* We are sure that coding->dst_pos_byte is before the gap
1147              of the buffer. */
1148           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
1149                                  + coding->dst_pos_byte - BEG_BYTE);
1150           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
1151                                - coding->destination);
1152         }
1153     }
1154   else
1155     /* Otherwise, the destination is C string and is never relocated
1156        automatically.  Thus we don't have to update anything.  */
1157     ;
1158 }
1159
1160
1161 static void
1162 coding_alloc_by_realloc (coding, bytes)
1163      struct coding_system *coding;
1164      EMACS_INT bytes;
1165 {
1166   coding->destination = (unsigned char *) xrealloc (coding->destination,
1167                                                     coding->dst_bytes + bytes);
1168   coding->dst_bytes += bytes;
1169 }
1170
1171 static void
1172 coding_alloc_by_making_gap (coding, gap_head_used, bytes)
1173      struct coding_system *coding;
1174      EMACS_INT gap_head_used, bytes;
1175 {
1176   if (EQ (coding->src_object, coding->dst_object))
1177     {
1178       /* The gap may contain the produced data at the head and not-yet
1179          consumed data at the tail.  To preserve those data, we at
1180          first make the gap size to zero, then increase the gap
1181          size.  */
1182       EMACS_INT add = GAP_SIZE;
1183
1184       GPT += gap_head_used, GPT_BYTE += gap_head_used;
1185       GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
1186       make_gap (bytes);
1187       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
1188       GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
1189     }
1190   else
1191     {
1192       Lisp_Object this_buffer;
1193
1194       this_buffer = Fcurrent_buffer ();
1195       set_buffer_internal (XBUFFER (coding->dst_object));
1196       make_gap (bytes);
1197       set_buffer_internal (XBUFFER (this_buffer));
1198     }
1199 }
1200
1201
1202 static unsigned char *
1203 alloc_destination (coding, nbytes, dst)
1204      struct coding_system *coding;
1205      EMACS_INT nbytes;
1206      unsigned char *dst;
1207 {
1208   EMACS_INT offset = dst - coding->destination;
1209
1210   if (BUFFERP (coding->dst_object))
1211     {
1212       struct buffer *buf = XBUFFER (coding->dst_object);
1213
1214       coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
1215     }
1216   else
1217     coding_alloc_by_realloc (coding, nbytes);
1218   coding_set_destination (coding);
1219   dst = coding->destination + offset;
1220   return dst;
1221 }
1222
1223 /** Macros for annotations.  */
1224
1225 /* An annotation data is stored in the array coding->charbuf in this
1226    format:
1227      [ -LENGTH ANNOTATION_MASK NCHARS ... ]
1228    LENGTH is the number of elements in the annotation.
1229    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1230    NCHARS is the number of characters in the text annotated.
1231
1232    The format of the following elements depend on ANNOTATION_MASK.
1233
1234    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1235    follows:
1236      ... NBYTES METHOD [ COMPOSITION-COMPONENTS ... ]
1237
1238    NBYTES is the number of bytes specified in the header part of
1239    old-style emacs-mule encoding, or 0 for the other kind of
1240    composition.
1241
1242    METHOD is one of enum composition_method.
1243
1244    Optionnal COMPOSITION-COMPONENTS are characters and composition
1245    rules.
1246
1247    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1248    follows.
1249
1250    If ANNOTATION_MASK is 0, this annotation is just a space holder to
1251    recover from an invalid annotation, and should be skipped by
1252    produce_annotation.  */
1253
1254 /* Maximum length of the header of annotation data.  */
1255 #define MAX_ANNOTATION_LENGTH 5
1256
1257 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars)     \
1258   do {                                                  \
1259     *(buf)++ = -(len);                                  \
1260     *(buf)++ = (mask);                                  \
1261     *(buf)++ = (nchars);                                \
1262     coding->annotated = 1;                              \
1263   } while (0);
1264
1265 #define ADD_COMPOSITION_DATA(buf, nchars, nbytes, method)                   \
1266   do {                                                                      \
1267     ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1268     *buf++ = nbytes;                                                        \
1269     *buf++ = method;                                                        \
1270   } while (0)
1271
1272
1273 #define ADD_CHARSET_DATA(buf, nchars, id)                               \
1274   do {                                                                  \
1275     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1276     *buf++ = id;                                                        \
1277   } while (0)
1278
1279 \f
1280 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1281
1282
1283
1284 \f
1285 /*** 3. UTF-8 ***/
1286
1287 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1288    Check if a text is encoded in UTF-8.  If it is, return 1, else
1289    return 0.  */
1290
1291 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1292 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1293 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1294 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1295 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1296 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1297
1298 #define UTF_BOM 0xFEFF
1299 #define UTF_8_BOM_1 0xEF
1300 #define UTF_8_BOM_2 0xBB
1301 #define UTF_8_BOM_3 0xBF
1302
1303 static int
1304 detect_coding_utf_8 (coding, detect_info)
1305      struct coding_system *coding;
1306      struct coding_detection_info *detect_info;
1307 {
1308   const unsigned char *src = coding->source, *src_base;
1309   const unsigned char *src_end = coding->source + coding->src_bytes;
1310   int multibytep = coding->src_multibyte;
1311   int consumed_chars = 0;
1312   int bom_found = 0;
1313   int found = 0;
1314
1315   detect_info->checked |= CATEGORY_MASK_UTF_8;
1316   /* A coding system of this category is always ASCII compatible.  */
1317   src += coding->head_ascii;
1318
1319   while (1)
1320     {
1321       int c, c1, c2, c3, c4;
1322
1323       src_base = src;
1324       ONE_MORE_BYTE (c);
1325       if (c < 0 || UTF_8_1_OCTET_P (c))
1326         continue;
1327       ONE_MORE_BYTE (c1);
1328       if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
1329         break;
1330       if (UTF_8_2_OCTET_LEADING_P (c))
1331         {
1332           found = 1;
1333           continue;
1334         }
1335       ONE_MORE_BYTE (c2);
1336       if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1337         break;
1338       if (UTF_8_3_OCTET_LEADING_P (c))
1339         {
1340           found = 1;
1341           if (src_base == coding->source
1342               && c == UTF_8_BOM_1 && c1 == UTF_8_BOM_2 && c2 == UTF_8_BOM_3)
1343             bom_found = 1;
1344           continue;
1345         }
1346       ONE_MORE_BYTE (c3);
1347       if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1348         break;
1349       if (UTF_8_4_OCTET_LEADING_P (c))
1350         {
1351           found = 1;
1352           continue;
1353         }
1354       ONE_MORE_BYTE (c4);
1355       if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1356         break;
1357       if (UTF_8_5_OCTET_LEADING_P (c))
1358         {
1359           found = 1;
1360           continue;
1361         }
1362       break;
1363     }
1364   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1365   return 0;
1366
1367  no_more_source:
1368   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1369     {
1370       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1371       return 0;
1372     }
1373   if (bom_found)
1374     {
1375       /* The first character 0xFFFE doesn't necessarily mean a BOM.  */
1376       detect_info->found |= CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG;
1377     }
1378   else
1379     {
1380       detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG;
1381       if (found)
1382         detect_info->found |= CATEGORY_MASK_UTF_8_NOSIG;
1383     }
1384   return 1;
1385 }
1386
1387
1388 static void
1389 decode_coding_utf_8 (coding)
1390      struct coding_system *coding;
1391 {
1392   const unsigned char *src = coding->source + coding->consumed;
1393   const unsigned char *src_end = coding->source + coding->src_bytes;
1394   const unsigned char *src_base;
1395   int *charbuf = coding->charbuf + coding->charbuf_used;
1396   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1397   int consumed_chars = 0, consumed_chars_base = 0;
1398   int multibytep = coding->src_multibyte;
1399   enum utf_bom_type bom = CODING_UTF_8_BOM (coding);
1400   Lisp_Object attr, charset_list;
1401   int eol_crlf =
1402     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1403   int byte_after_cr = -1;
1404
1405   CODING_GET_INFO (coding, attr, charset_list);
1406
1407   if (bom != utf_without_bom)
1408     {
1409       int c1, c2, c3;
1410
1411       src_base = src;
1412       ONE_MORE_BYTE (c1);
1413       if (! UTF_8_3_OCTET_LEADING_P (c1))
1414         src = src_base;
1415       else
1416         {
1417           ONE_MORE_BYTE (c2);
1418           if (! UTF_8_EXTRA_OCTET_P (c2))
1419             src = src_base;
1420           else
1421             {
1422               ONE_MORE_BYTE (c3);
1423               if (! UTF_8_EXTRA_OCTET_P (c3))
1424                 src = src_base;
1425               else
1426                 {
1427                   if ((c1 != UTF_8_BOM_1)
1428                       || (c2 != UTF_8_BOM_2) || (c3 != UTF_8_BOM_3))
1429                     src = src_base;
1430                   else
1431                     CODING_UTF_8_BOM (coding) = utf_without_bom;
1432                 }
1433             }
1434         }
1435     }
1436   CODING_UTF_8_BOM (coding) = utf_without_bom;
1437
1438
1439
1440   while (1)
1441     {
1442       int c, c1, c2, c3, c4, c5;
1443
1444       src_base = src;
1445       consumed_chars_base = consumed_chars;
1446
1447       if (charbuf >= charbuf_end)
1448         {
1449           if (byte_after_cr >= 0)
1450             src_base--;
1451           break;
1452         }
1453
1454       if (byte_after_cr >= 0)
1455         c1 = byte_after_cr, byte_after_cr = -1;
1456       else
1457         ONE_MORE_BYTE (c1);
1458       if (c1 < 0)
1459         {
1460           c = - c1;
1461         }
1462       else if (UTF_8_1_OCTET_P(c1))
1463         {
1464           if (eol_crlf && c1 == '\r')
1465             ONE_MORE_BYTE (byte_after_cr);
1466           c = c1;
1467         }
1468       else
1469         {
1470           ONE_MORE_BYTE (c2);
1471           if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1472             goto invalid_code;
1473           if (UTF_8_2_OCTET_LEADING_P (c1))
1474             {
1475               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1476               /* Reject overlong sequences here and below.  Encoders
1477                  producing them are incorrect, they can be misleading,
1478                  and they mess up read/write invariance.  */
1479               if (c < 128)
1480                 goto invalid_code;
1481             }
1482           else
1483             {
1484               ONE_MORE_BYTE (c3);
1485               if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1486                 goto invalid_code;
1487               if (UTF_8_3_OCTET_LEADING_P (c1))
1488                 {
1489                   c = (((c1 & 0xF) << 12)
1490                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1491                   if (c < 0x800
1492                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1493                     goto invalid_code;
1494                 }
1495               else
1496                 {
1497                   ONE_MORE_BYTE (c4);
1498                   if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1499                     goto invalid_code;
1500                   if (UTF_8_4_OCTET_LEADING_P (c1))
1501                     {
1502                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1503                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1504                     if (c < 0x10000)
1505                       goto invalid_code;
1506                     }
1507                   else
1508                     {
1509                       ONE_MORE_BYTE (c5);
1510                       if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
1511                         goto invalid_code;
1512                       if (UTF_8_5_OCTET_LEADING_P (c1))
1513                         {
1514                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1515                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1516                                | (c5 & 0x3F));
1517                           if ((c > MAX_CHAR) || (c < 0x200000))
1518                             goto invalid_code;
1519                         }
1520                       else
1521                         goto invalid_code;
1522                     }
1523                 }
1524             }
1525         }
1526
1527       *charbuf++ = c;
1528       continue;
1529
1530     invalid_code:
1531       src = src_base;
1532       consumed_chars = consumed_chars_base;
1533       ONE_MORE_BYTE (c);
1534       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1535       coding->errors++;
1536     }
1537
1538  no_more_source:
1539   coding->consumed_char += consumed_chars_base;
1540   coding->consumed = src_base - coding->source;
1541   coding->charbuf_used = charbuf - coding->charbuf;
1542 }
1543
1544
1545 static int
1546 encode_coding_utf_8 (coding)
1547      struct coding_system *coding;
1548 {
1549   int multibytep = coding->dst_multibyte;
1550   int *charbuf = coding->charbuf;
1551   int *charbuf_end = charbuf + coding->charbuf_used;
1552   unsigned char *dst = coding->destination + coding->produced;
1553   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1554   int produced_chars = 0;
1555   int c;
1556
1557   if (CODING_UTF_8_BOM (coding) == utf_with_bom)
1558     {
1559       ASSURE_DESTINATION (3);
1560       EMIT_THREE_BYTES (UTF_8_BOM_1, UTF_8_BOM_2, UTF_8_BOM_3);
1561       CODING_UTF_8_BOM (coding) = utf_without_bom;
1562     }
1563
1564   if (multibytep)
1565     {
1566       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1567
1568       while (charbuf < charbuf_end)
1569         {
1570           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1571
1572           ASSURE_DESTINATION (safe_room);
1573           c = *charbuf++;
1574           if (CHAR_BYTE8_P (c))
1575             {
1576               c = CHAR_TO_BYTE8 (c);
1577               EMIT_ONE_BYTE (c);
1578             }
1579           else
1580             {
1581               CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
1582               for (p = str; p < pend; p++)
1583                 EMIT_ONE_BYTE (*p);
1584             }
1585         }
1586     }
1587   else
1588     {
1589       int safe_room = MAX_MULTIBYTE_LENGTH;
1590
1591       while (charbuf < charbuf_end)
1592         {
1593           ASSURE_DESTINATION (safe_room);
1594           c = *charbuf++;
1595           if (CHAR_BYTE8_P (c))
1596             *dst++ = CHAR_TO_BYTE8 (c);
1597           else
1598             CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
1599           produced_chars++;
1600         }
1601     }
1602   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1603   coding->produced_char += produced_chars;
1604   coding->produced = dst - coding->destination;
1605   return 0;
1606 }
1607
1608
1609 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1610    Check if a text is encoded in one of UTF-16 based coding systems.
1611    If it is, return 1, else return 0.  */
1612
1613 #define UTF_16_HIGH_SURROGATE_P(val) \
1614   (((val) & 0xFC00) == 0xD800)
1615
1616 #define UTF_16_LOW_SURROGATE_P(val) \
1617   (((val) & 0xFC00) == 0xDC00)
1618
1619 #define UTF_16_INVALID_P(val)   \
1620   (((val) == 0xFFFE)            \
1621    || ((val) == 0xFFFF)         \
1622    || UTF_16_LOW_SURROGATE_P (val))
1623
1624
1625 static int
1626 detect_coding_utf_16 (coding, detect_info)
1627      struct coding_system *coding;
1628      struct coding_detection_info *detect_info;
1629 {
1630   const unsigned char *src = coding->source, *src_base = src;
1631   const unsigned char *src_end = coding->source + coding->src_bytes;
1632   int multibytep = coding->src_multibyte;
1633   int consumed_chars = 0;
1634   int c1, c2;
1635
1636   detect_info->checked |= CATEGORY_MASK_UTF_16;
1637   if (coding->mode & CODING_MODE_LAST_BLOCK
1638       && (coding->src_chars & 1))
1639     {
1640       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1641       return 0;
1642     }
1643
1644   TWO_MORE_BYTES (c1, c2);
1645   if ((c1 == 0xFF) && (c2 == 0xFE))
1646     {
1647       detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1648                              | CATEGORY_MASK_UTF_16_AUTO);
1649       detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1650                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1651                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1652     }
1653   else if ((c1 == 0xFE) && (c2 == 0xFF))
1654     {
1655       detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1656                              | CATEGORY_MASK_UTF_16_AUTO);
1657       detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1658                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1659                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1660     }
1661   else if (c2 < 0)
1662     {
1663       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1664       return 0;
1665     }
1666   else
1667     {
1668       /* We check the dispersion of Eth and Oth bytes where E is even and
1669          O is odd.  If both are high, we assume binary data.*/
1670       unsigned char e[256], o[256];
1671       unsigned e_num = 1, o_num = 1;
1672
1673       memset (e, 0, 256);
1674       memset (o, 0, 256);
1675       e[c1] = 1;
1676       o[c2] = 1;
1677
1678       detect_info->rejected |= (CATEGORY_MASK_UTF_16_AUTO
1679                                 |CATEGORY_MASK_UTF_16_BE
1680                                 | CATEGORY_MASK_UTF_16_LE);
1681
1682       while ((detect_info->rejected & CATEGORY_MASK_UTF_16)
1683              != CATEGORY_MASK_UTF_16)
1684         {
1685           TWO_MORE_BYTES (c1, c2);
1686           if (c2 < 0)
1687             break;
1688           if (! e[c1])
1689             {
1690               e[c1] = 1;
1691               e_num++;
1692               if (e_num >= 128)
1693                 detect_info->rejected |= CATEGORY_MASK_UTF_16_BE_NOSIG;
1694             }
1695           if (! o[c2])
1696             {
1697               o[c2] = 1;
1698               o_num++;
1699               if (o_num >= 128)
1700                 detect_info->rejected |= CATEGORY_MASK_UTF_16_LE_NOSIG;
1701             }
1702         }
1703       return 0;
1704     }
1705
1706  no_more_source:
1707   return 1;
1708 }
1709
1710 static void
1711 decode_coding_utf_16 (coding)
1712      struct coding_system *coding;
1713 {
1714   const unsigned char *src = coding->source + coding->consumed;
1715   const unsigned char *src_end = coding->source + coding->src_bytes;
1716   const unsigned char *src_base;
1717   int *charbuf = coding->charbuf + coding->charbuf_used;
1718   /* We may produces at most 3 chars in one loop.  */
1719   int *charbuf_end = coding->charbuf + coding->charbuf_size - 2;
1720   int consumed_chars = 0, consumed_chars_base = 0;
1721   int multibytep = coding->src_multibyte;
1722   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1723   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1724   int surrogate = CODING_UTF_16_SURROGATE (coding);
1725   Lisp_Object attr, charset_list;
1726   int eol_crlf =
1727     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1728   int byte_after_cr1 = -1, byte_after_cr2 = -1;
1729
1730   CODING_GET_INFO (coding, attr, charset_list);
1731
1732   if (bom == utf_with_bom)
1733     {
1734       int c, c1, c2;
1735
1736       src_base = src;
1737       ONE_MORE_BYTE (c1);
1738       ONE_MORE_BYTE (c2);
1739       c = (c1 << 8) | c2;
1740
1741       if (endian == utf_16_big_endian
1742           ? c != 0xFEFF : c != 0xFFFE)
1743         {
1744           /* The first two bytes are not BOM.  Treat them as bytes
1745              for a normal character.  */
1746           src = src_base;
1747           coding->errors++;
1748         }
1749       CODING_UTF_16_BOM (coding) = utf_without_bom;
1750     }
1751   else if (bom == utf_detect_bom)
1752     {
1753       /* We have already tried to detect BOM and failed in
1754          detect_coding.  */
1755       CODING_UTF_16_BOM (coding) = utf_without_bom;
1756     }
1757
1758   while (1)
1759     {
1760       int c, c1, c2;
1761
1762       src_base = src;
1763       consumed_chars_base = consumed_chars;
1764
1765       if (charbuf >= charbuf_end)
1766         {
1767           if (byte_after_cr1 >= 0)
1768             src_base -= 2;
1769           break;
1770         }
1771
1772       if (byte_after_cr1 >= 0)
1773         c1 = byte_after_cr1, byte_after_cr1 = -1;
1774       else
1775         ONE_MORE_BYTE (c1);
1776       if (c1 < 0)
1777         {
1778           *charbuf++ = -c1;
1779           continue;
1780         }
1781       if (byte_after_cr2 >= 0)
1782         c2 = byte_after_cr2, byte_after_cr2 = -1;
1783       else
1784         ONE_MORE_BYTE (c2);
1785       if (c2 < 0)
1786         {
1787           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1788           *charbuf++ = -c2;
1789           continue;
1790         }
1791       c = (endian == utf_16_big_endian
1792            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1793
1794       if (surrogate)
1795         {
1796           if (! UTF_16_LOW_SURROGATE_P (c))
1797             {
1798               if (endian == utf_16_big_endian)
1799                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1800               else
1801                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1802               *charbuf++ = c1;
1803               *charbuf++ = c2;
1804               coding->errors++;
1805               if (UTF_16_HIGH_SURROGATE_P (c))
1806                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1807               else
1808                 *charbuf++ = c;
1809             }
1810           else
1811             {
1812               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1813               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1814               *charbuf++ = 0x10000 + c;
1815             }
1816         }
1817       else
1818         {
1819           if (UTF_16_HIGH_SURROGATE_P (c))
1820             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1821           else
1822             {
1823               if (eol_crlf && c == '\r')
1824                 {
1825                   ONE_MORE_BYTE (byte_after_cr1);
1826                   ONE_MORE_BYTE (byte_after_cr2);
1827                 }
1828               *charbuf++ = c;
1829             }
1830         }
1831     }
1832
1833  no_more_source:
1834   coding->consumed_char += consumed_chars_base;
1835   coding->consumed = src_base - coding->source;
1836   coding->charbuf_used = charbuf - coding->charbuf;
1837 }
1838
1839 static int
1840 encode_coding_utf_16 (coding)
1841      struct coding_system *coding;
1842 {
1843   int multibytep = coding->dst_multibyte;
1844   int *charbuf = coding->charbuf;
1845   int *charbuf_end = charbuf + coding->charbuf_used;
1846   unsigned char *dst = coding->destination + coding->produced;
1847   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1848   int safe_room = 8;
1849   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1850   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1851   int produced_chars = 0;
1852   Lisp_Object attrs, charset_list;
1853   int c;
1854
1855   CODING_GET_INFO (coding, attrs, charset_list);
1856
1857   if (bom != utf_without_bom)
1858     {
1859       ASSURE_DESTINATION (safe_room);
1860       if (big_endian)
1861         EMIT_TWO_BYTES (0xFE, 0xFF);
1862       else
1863         EMIT_TWO_BYTES (0xFF, 0xFE);
1864       CODING_UTF_16_BOM (coding) = utf_without_bom;
1865     }
1866
1867   while (charbuf < charbuf_end)
1868     {
1869       ASSURE_DESTINATION (safe_room);
1870       c = *charbuf++;
1871       if (c > MAX_UNICODE_CHAR)
1872         c = coding->default_char;
1873
1874       if (c < 0x10000)
1875         {
1876           if (big_endian)
1877             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1878           else
1879             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1880         }
1881       else
1882         {
1883           int c1, c2;
1884
1885           c -= 0x10000;
1886           c1 = (c >> 10) + 0xD800;
1887           c2 = (c & 0x3FF) + 0xDC00;
1888           if (big_endian)
1889             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1890           else
1891             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1892         }
1893     }
1894   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1895   coding->produced = dst - coding->destination;
1896   coding->produced_char += produced_chars;
1897   return 0;
1898 }
1899
1900 \f
1901 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1902
1903 /* Emacs' internal format for representation of multiple character
1904    sets is a kind of multi-byte encoding, i.e. characters are
1905    represented by variable-length sequences of one-byte codes.
1906
1907    ASCII characters and control characters (e.g. `tab', `newline') are
1908    represented by one-byte sequences which are their ASCII codes, in
1909    the range 0x00 through 0x7F.
1910
1911    8-bit characters of the range 0x80..0x9F are represented by
1912    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1913    code + 0x20).
1914
1915    8-bit characters of the range 0xA0..0xFF are represented by
1916    one-byte sequences which are their 8-bit code.
1917
1918    The other characters are represented by a sequence of `base
1919    leading-code', optional `extended leading-code', and one or two
1920    `position-code's.  The length of the sequence is determined by the
1921    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1922    whereas extended leading-code and position-code take the range 0xA0
1923    through 0xFF.  See `charset.h' for more details about leading-code
1924    and position-code.
1925
1926    --- CODE RANGE of Emacs' internal format ---
1927    character set        range
1928    -------------        -----
1929    ascii                0x00..0x7F
1930    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1931    eight-bit-graphic    0xA0..0xBF
1932    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1933    ---------------------------------------------
1934
1935    As this is the internal character representation, the format is
1936    usually not used externally (i.e. in a file or in a data sent to a
1937    process).  But, it is possible to have a text externally in this
1938    format (i.e. by encoding by the coding system `emacs-mule').
1939
1940    In that case, a sequence of one-byte codes has a slightly different
1941    form.
1942
1943    At first, all characters in eight-bit-control are represented by
1944    one-byte sequences which are their 8-bit code.
1945
1946    Next, character composition data are represented by the byte
1947    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1948    where,
1949         METHOD is 0xF2 plus one of composition method (enum
1950         composition_method),
1951
1952         BYTES is 0xA0 plus a byte length of this composition data,
1953
1954         CHARS is 0xA0 plus a number of characters composed by this
1955         data,
1956
1957         COMPONENTs are characters of multibye form or composition
1958         rules encoded by two-byte of ASCII codes.
1959
1960    In addition, for backward compatibility, the following formats are
1961    also recognized as composition data on decoding.
1962
1963    0x80 MSEQ ...
1964    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1965
1966    Here,
1967         MSEQ is a multibyte form but in these special format:
1968           ASCII: 0xA0 ASCII_CODE+0x80,
1969           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1970         RULE is a one byte code of the range 0xA0..0xF0 that
1971         represents a composition rule.
1972   */
1973
1974 char emacs_mule_bytes[256];
1975
1976
1977 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1978    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
1979    else return 0.  */
1980
1981 static int
1982 detect_coding_emacs_mule (coding, detect_info)
1983      struct coding_system *coding;
1984      struct coding_detection_info *detect_info;
1985 {
1986   const unsigned char *src = coding->source, *src_base;
1987   const unsigned char *src_end = coding->source + coding->src_bytes;
1988   int multibytep = coding->src_multibyte;
1989   int consumed_chars = 0;
1990   int c;
1991   int found = 0;
1992
1993   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1994   /* A coding system of this category is always ASCII compatible.  */
1995   src += coding->head_ascii;
1996
1997   while (1)
1998     {
1999       src_base = src;
2000       ONE_MORE_BYTE (c);
2001       if (c < 0)
2002         continue;
2003       if (c == 0x80)
2004         {
2005           /* Perhaps the start of composite character.  We simply skip
2006              it because analyzing it is too heavy for detecting.  But,
2007              at least, we check that the composite character
2008              constitutes of more than 4 bytes.  */
2009           const unsigned char *src_base;
2010
2011         repeat:
2012           src_base = src;
2013           do
2014             {
2015               ONE_MORE_BYTE (c);
2016             }
2017           while (c >= 0xA0);
2018
2019           if (src - src_base <= 4)
2020             break;
2021           found = CATEGORY_MASK_EMACS_MULE;
2022           if (c == 0x80)
2023             goto repeat;
2024         }
2025
2026       if (c < 0x80)
2027         {
2028           if (c < 0x20
2029               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
2030             break;
2031         }
2032       else
2033         {
2034           int more_bytes = emacs_mule_bytes[c] - 1;
2035
2036           while (more_bytes > 0)
2037             {
2038               ONE_MORE_BYTE (c);
2039               if (c < 0xA0)
2040                 {
2041                   src--;        /* Unread the last byte.  */
2042                   break;
2043                 }
2044               more_bytes--;
2045             }
2046           if (more_bytes != 0)
2047             break;
2048           found = CATEGORY_MASK_EMACS_MULE;
2049         }
2050     }
2051   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2052   return 0;
2053
2054  no_more_source:
2055   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
2056     {
2057       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2058       return 0;
2059     }
2060   detect_info->found |= found;
2061   return 1;
2062 }
2063
2064
2065 /* Parse emacs-mule multibyte sequence at SRC and return the decoded
2066    character.  If CMP_STATUS indicates that we must expect MSEQ or
2067    RULE described above, decode it and return the negative value of
2068    the decoded character or rule.  If an invalid byte is found, return
2069    -1.  If SRC is too short, return -2.  */
2070
2071 int
2072 emacs_mule_char (coding, src, nbytes, nchars, id, cmp_status)
2073      struct coding_system *coding;
2074      const unsigned char *src;
2075      int *nbytes, *nchars, *id;
2076      struct composition_status *cmp_status;
2077 {
2078   const unsigned char *src_end = coding->source + coding->src_bytes;
2079   const unsigned char *src_base = src;
2080   int multibytep = coding->src_multibyte;
2081   struct charset *charset;
2082   unsigned code;
2083   int c;
2084   int consumed_chars = 0;
2085   int mseq_found = 0;
2086
2087   ONE_MORE_BYTE (c);
2088   if (c < 0)
2089     {
2090       c = -c;
2091       charset = emacs_mule_charset[0];
2092     }
2093   else
2094     {
2095       if (c >= 0xA0)
2096         {
2097           if (cmp_status->state != COMPOSING_NO
2098               && cmp_status->old_form)
2099             {
2100               if (cmp_status->state == COMPOSING_CHAR)
2101                 {
2102                   if (c == 0xA0)
2103                     {
2104                       ONE_MORE_BYTE (c);
2105                       c -= 0x80;
2106                       if (c < 0)
2107                         goto invalid_code;
2108                     }
2109                   else
2110                     c -= 0x20;
2111                   mseq_found = 1;
2112                 }
2113               else
2114                 {
2115                   *nbytes = src - src_base;
2116                   *nchars = consumed_chars;
2117                   return -c;
2118                 }
2119             }
2120           else
2121             goto invalid_code;
2122         }
2123
2124       switch (emacs_mule_bytes[c])
2125         {
2126         case 2:
2127           if (! (charset = emacs_mule_charset[c]))
2128             goto invalid_code;
2129           ONE_MORE_BYTE (c);
2130           if (c < 0xA0)
2131             goto invalid_code;
2132           code = c & 0x7F;
2133           break;
2134
2135         case 3:
2136           if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
2137               || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
2138             {
2139               ONE_MORE_BYTE (c);
2140               if (c < 0xA0 || ! (charset = emacs_mule_charset[c]))
2141                 goto invalid_code;
2142               ONE_MORE_BYTE (c);
2143               if (c < 0xA0)
2144                 goto invalid_code;
2145               code = c & 0x7F;
2146             }
2147           else
2148             {
2149               if (! (charset = emacs_mule_charset[c]))
2150                 goto invalid_code;
2151               ONE_MORE_BYTE (c);
2152               if (c < 0xA0)
2153                 goto invalid_code;
2154               code = (c & 0x7F) << 8;
2155               ONE_MORE_BYTE (c);
2156               if (c < 0xA0)
2157                 goto invalid_code;
2158               code |= c & 0x7F;
2159             }
2160           break;
2161
2162         case 4:
2163           ONE_MORE_BYTE (c);
2164           if (c < 0 || ! (charset = emacs_mule_charset[c]))
2165             goto invalid_code;
2166           ONE_MORE_BYTE (c);
2167           if (c < 0xA0)
2168             goto invalid_code;
2169           code = (c & 0x7F) << 8;
2170           ONE_MORE_BYTE (c);
2171           if (c < 0xA0)
2172             goto invalid_code;
2173           code |= c & 0x7F;
2174           break;
2175
2176         case 1:
2177           code = c;
2178           charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
2179                                      ? charset_ascii : charset_eight_bit);
2180           break;
2181
2182         default:
2183           abort ();
2184         }
2185       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, code, c);
2186       if (c < 0)
2187         goto invalid_code;
2188     }
2189   *nbytes = src - src_base;
2190   *nchars = consumed_chars;
2191   if (id)
2192     *id = charset->id;
2193   return (mseq_found ? -c : c);
2194
2195  no_more_source:
2196   return -2;
2197
2198  invalid_code:
2199   return -1;
2200 }
2201
2202
2203 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2204
2205 /* Handle these composition sequence ('|': the end of header elements,
2206    BYTES and CHARS >= 0xA0):
2207
2208    (1) relative composition: 0x80 0xF2 BYTES CHARS | CHAR ...
2209    (2) altchar composition:  0x80 0xF4 BYTES CHARS | ALT ... ALT CHAR ...
2210    (3) alt&rule composition: 0x80 0xF5 BYTES CHARS | ALT RULE ... ALT CHAR ...
2211
2212    and these old form:
2213
2214    (4) relative composition: 0x80 | MSEQ ... MSEQ
2215    (5) rulebase composition: 0x80 0xFF | MSEQ MRULE ... MSEQ
2216
2217    When the starter 0x80 and the following header elements are found,
2218    this annotation header is produced.
2219
2220         [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS NBYTES METHOD ]
2221
2222    NCHARS is CHARS - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2223    NBYTES is BYTES - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2224
2225    Then, upon reading the following elements, these codes are produced
2226    until the composition end is found:
2227
2228    (1) CHAR ... CHAR
2229    (2) ALT ... ALT CHAR ... CHAR
2230    (3) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT CHAR ... CHAR
2231    (4) CHAR ... CHAR
2232    (5) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
2233
2234    When the composition end is found, LENGTH and NCHARS in the
2235    annotation header is updated as below:
2236
2237    (1) LENGTH: unchanged, NCHARS: unchanged
2238    (2) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2239    (3) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2240    (4) LENGTH: unchanged,  NCHARS: number of CHARs
2241    (5) LENGTH: unchanged,  NCHARS: number of CHARs
2242
2243    If an error is found while composing, the annotation header is
2244    changed to the original composition header (plus filler -1s) as
2245    below:
2246
2247    (1),(2),(3)  [ 0x80 0xF2+METHOD BYTES CHARS -1 ]
2248    (5)          [ 0x80 0xFF -1 -1- -1 ]
2249
2250    and the sequence [ -2 DECODED-RULE ] is changed to the original
2251    byte sequence as below:
2252         o the original byte sequence is B: [ B -1 ]
2253         o the original byte sequence is B1 B2: [ B1 B2 ]
2254
2255    Most of the routines are implemented by macros because many
2256    variables and labels in the caller decode_coding_emacs_mule must be
2257    accessible, and they are usually called just once (thus doesn't
2258    increase the size of compiled object).  */
2259
2260 /* Decode a composition rule represented by C as a component of
2261    composition sequence of Emacs 20 style.  Set RULE to the decoded
2262    rule. */
2263
2264 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(c, rule)  \
2265   do {                                                  \
2266     int gref, nref;                                     \
2267                                                         \
2268     c -= 0xA0;                                          \
2269     if (c < 0 || c >= 81)                               \
2270       goto invalid_code;                                \
2271     gref = c / 9, nref = c % 9;                         \
2272     if (gref == 4) gref = 10;                           \
2273     if (nref == 4) nref = 10;                           \
2274     rule = COMPOSITION_ENCODE_RULE (gref, nref);        \
2275   } while (0)
2276
2277
2278 /* Decode a composition rule represented by C and the following byte
2279    at SRC as a component of composition sequence of Emacs 21 style.
2280    Set RULE to the decoded rule.  */
2281
2282 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(c, rule)  \
2283   do {                                                  \
2284     int gref, nref;                                     \
2285                                                         \
2286     gref = c - 0x20;                                    \
2287     if (gref < 0 || gref >= 81)                         \
2288       goto invalid_code;                                \
2289     ONE_MORE_BYTE (c);                                  \
2290     nref = c - 0x20;                                    \
2291     if (nref < 0 || nref >= 81)                         \
2292       goto invalid_code;                                \
2293     rule = COMPOSITION_ENCODE_RULE (gref, nref);        \
2294   } while (0)
2295
2296
2297 /* Start of Emacs 21 style format.  The first three bytes at SRC are
2298    (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is the
2299    byte length of this composition information, CHARS is the number of
2300    characters composed by this composition.  */
2301
2302 #define DECODE_EMACS_MULE_21_COMPOSITION()                              \
2303   do {                                                                  \
2304     enum composition_method method = c - 0xF2;                          \
2305     int *charbuf_base = charbuf;                                        \
2306     int nbytes, nchars;                                                 \
2307                                                                         \
2308     ONE_MORE_BYTE (c);                                                  \
2309     if (c < 0)                                                          \
2310       goto invalid_code;                                                \
2311     nbytes = c - 0xA0;                                                  \
2312     if (nbytes < 3 || (method == COMPOSITION_RELATIVE && nbytes != 4))  \
2313       goto invalid_code;                                                \
2314     ONE_MORE_BYTE (c);                                                  \
2315     nchars = c - 0xA0;                                                  \
2316     if (nchars <= 0 || nchars >= MAX_COMPOSITION_COMPONENTS)            \
2317       goto invalid_code;                                                \
2318     cmp_status->old_form = 0;                                           \
2319     cmp_status->method = method;                                        \
2320     if (method == COMPOSITION_RELATIVE)                                 \
2321       cmp_status->state = COMPOSING_CHAR;                               \
2322     else                                                                \
2323       cmp_status->state = COMPOSING_COMPONENT_CHAR;                     \
2324     cmp_status->length = MAX_ANNOTATION_LENGTH;                         \
2325     cmp_status->nchars = nchars;                                        \
2326     cmp_status->ncomps = nbytes - 4;                                    \
2327     ADD_COMPOSITION_DATA (charbuf, nchars, nbytes, method);             \
2328   } while (0)
2329
2330
2331 /* Start of Emacs 20 style format for relative composition.  */
2332
2333 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION()             \
2334   do {                                                          \
2335     cmp_status->old_form = 1;                                   \
2336     cmp_status->method = COMPOSITION_RELATIVE;                  \
2337     cmp_status->state = COMPOSING_CHAR;                         \
2338     cmp_status->length = MAX_ANNOTATION_LENGTH;                 \
2339     cmp_status->nchars = cmp_status->ncomps = 0;                \
2340     ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);   \
2341   } while (0)
2342
2343
2344 /* Start of Emacs 20 style format for rule-base composition.  */
2345
2346 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION()             \
2347   do {                                                          \
2348     cmp_status->old_form = 1;                                   \
2349     cmp_status->method = COMPOSITION_WITH_RULE;                 \
2350     cmp_status->state = COMPOSING_CHAR;                         \
2351     cmp_status->length = MAX_ANNOTATION_LENGTH;                 \
2352     cmp_status->nchars = cmp_status->ncomps = 0;                \
2353     ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);   \
2354   } while (0)
2355
2356
2357 #define DECODE_EMACS_MULE_COMPOSITION_START()           \
2358   do {                                                  \
2359     const unsigned char *current_src = src;             \
2360                                                         \
2361     ONE_MORE_BYTE (c);                                  \
2362     if (c < 0)                                          \
2363       goto invalid_code;                                \
2364     if (c - 0xF2 >= COMPOSITION_RELATIVE                \
2365         && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)  \
2366       DECODE_EMACS_MULE_21_COMPOSITION ();              \
2367     else if (c < 0xA0)                                  \
2368       goto invalid_code;                                \
2369     else if (c < 0xC0)                                  \
2370       {                                                 \
2371         DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION ();   \
2372         /* Re-read C as a composition component.  */    \
2373         src = current_src;                              \
2374       }                                                 \
2375     else if (c == 0xFF)                                 \
2376       DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION ();     \
2377     else                                                \
2378       goto invalid_code;                                \
2379   } while (0)
2380
2381 #define EMACS_MULE_COMPOSITION_END()                            \
2382   do {                                                          \
2383     int idx = - cmp_status->length;                             \
2384                                                                 \
2385     if (cmp_status->old_form)                                   \
2386       charbuf[idx + 2] = cmp_status->nchars;                    \
2387     else if (cmp_status->method > COMPOSITION_RELATIVE)         \
2388       charbuf[idx] = charbuf[idx + 2] - cmp_status->length;     \
2389     cmp_status->state = COMPOSING_NO;                           \
2390   } while (0)
2391
2392
2393 static int
2394 emacs_mule_finish_composition (charbuf, cmp_status)
2395      int *charbuf;
2396      struct composition_status *cmp_status;
2397 {
2398   int idx = - cmp_status->length;
2399   int new_chars;
2400
2401   if (cmp_status->old_form && cmp_status->nchars > 0)
2402     {
2403       charbuf[idx + 2] = cmp_status->nchars;
2404       new_chars = 0;
2405       if (cmp_status->method == COMPOSITION_WITH_RULE
2406           && cmp_status->state == COMPOSING_CHAR)
2407         {
2408           /* The last rule was invalid.  */
2409           int rule = charbuf[-1] + 0xA0;
2410
2411           charbuf[-2] = BYTE8_TO_CHAR (rule);
2412           charbuf[-1] = -1;
2413           new_chars = 1;
2414         }
2415     }
2416   else
2417     {
2418       charbuf[idx++] = BYTE8_TO_CHAR (0x80);
2419
2420       if (cmp_status->method == COMPOSITION_WITH_RULE)
2421         {
2422           charbuf[idx++] = BYTE8_TO_CHAR (0xFF);
2423           charbuf[idx++] = -3;
2424           charbuf[idx++] = 0;
2425           new_chars = 1;
2426         }
2427       else
2428         {
2429           int nchars = charbuf[idx + 1] + 0xA0;
2430           int nbytes = charbuf[idx + 2] + 0xA0;
2431
2432           charbuf[idx++] = BYTE8_TO_CHAR (0xF2 + cmp_status->method);
2433           charbuf[idx++] = BYTE8_TO_CHAR (nbytes);
2434           charbuf[idx++] = BYTE8_TO_CHAR (nchars);
2435           charbuf[idx++] = -1;
2436           new_chars = 4;
2437         }
2438     }
2439   cmp_status->state = COMPOSING_NO;
2440   return new_chars;
2441 }
2442
2443 #define EMACS_MULE_MAYBE_FINISH_COMPOSITION()                             \
2444   do {                                                                    \
2445     if (cmp_status->state != COMPOSING_NO)                                \
2446       char_offset += emacs_mule_finish_composition (charbuf, cmp_status); \
2447   } while (0)
2448
2449
2450 static void
2451 decode_coding_emacs_mule (coding)
2452      struct coding_system *coding;
2453 {
2454   const unsigned char *src = coding->source + coding->consumed;
2455   const unsigned char *src_end = coding->source + coding->src_bytes;
2456   const unsigned char *src_base;
2457   int *charbuf = coding->charbuf + coding->charbuf_used;
2458   /* We may produce two annocations (charset and composition) in one
2459      loop and one more charset annocation at the end.  */
2460   int *charbuf_end
2461     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
2462   int consumed_chars = 0, consumed_chars_base;
2463   int multibytep = coding->src_multibyte;
2464   Lisp_Object attrs, charset_list;
2465   int char_offset = coding->produced_char;
2466   int last_offset = char_offset;
2467   int last_id = charset_ascii;
2468   int eol_crlf =
2469     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
2470   int byte_after_cr = -1;
2471   struct composition_status *cmp_status = &coding->spec.emacs_mule.cmp_status;
2472
2473   CODING_GET_INFO (coding, attrs, charset_list);
2474
2475   if (cmp_status->state != COMPOSING_NO)
2476     {
2477       int i;
2478
2479       for (i = 0; i < cmp_status->length; i++)
2480         *charbuf++ = cmp_status->carryover[i];
2481       coding->annotated = 1;
2482     }
2483
2484   while (1)
2485     {
2486       int c, id;
2487
2488       src_base = src;
2489       consumed_chars_base = consumed_chars;
2490
2491       if (charbuf >= charbuf_end)
2492         {
2493           if (byte_after_cr >= 0)
2494             src_base--;
2495           break;
2496         }
2497
2498       if (byte_after_cr >= 0)
2499         c = byte_after_cr, byte_after_cr = -1;
2500       else
2501         ONE_MORE_BYTE (c);
2502
2503       if (c < 0 || c == 0x80)
2504         {
2505           EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2506           if (c < 0)
2507             {
2508               *charbuf++ = -c;
2509               char_offset++;
2510             }
2511           else
2512             DECODE_EMACS_MULE_COMPOSITION_START ();
2513           continue;
2514         }
2515
2516       if (c < 0x80)
2517         {
2518           if (eol_crlf && c == '\r')
2519             ONE_MORE_BYTE (byte_after_cr);
2520           id = charset_ascii;
2521           if (cmp_status->state != COMPOSING_NO)
2522             {
2523               if (cmp_status->old_form)
2524                 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2525               else if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2526                 cmp_status->ncomps--;
2527             }
2528         }
2529       else
2530         {
2531           int nchars, nbytes;
2532           /* emacs_mule_char can load a charset map from a file, which
2533              allocates a large structure and might cause buffer text
2534              to be relocated as result.  Thus, we need to remember the
2535              original pointer to buffer text, and fixup all related
2536              pointers after the call.  */
2537           const unsigned char *orig = coding->source;
2538           EMACS_INT offset;
2539
2540           c = emacs_mule_char (coding, src_base, &nbytes, &nchars, &id,
2541                                cmp_status);
2542           offset = coding->source - orig;
2543           if (offset)
2544             {
2545               src += offset;
2546               src_base += offset;
2547               src_end += offset;
2548             }
2549           if (c < 0)
2550             {
2551               if (c == -1)
2552                 goto invalid_code;
2553               if (c == -2)
2554                 break;
2555             }
2556           src = src_base + nbytes;
2557           consumed_chars = consumed_chars_base + nchars;
2558           if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2559             cmp_status->ncomps -= nchars;
2560         }
2561
2562       /* Now if C >= 0, we found a normally encoded characer, if C <
2563          0, we found an old-style composition component character or
2564          rule.  */
2565
2566       if (cmp_status->state == COMPOSING_NO)
2567         {
2568           if (last_id != id)
2569             {
2570               if (last_id != charset_ascii)
2571                 ADD_CHARSET_DATA (charbuf, char_offset - last_offset,
2572                                   last_id);
2573               last_id = id;
2574               last_offset = char_offset;
2575             }
2576           *charbuf++ = c;
2577           char_offset++;
2578         }
2579       else if (cmp_status->state == COMPOSING_CHAR)
2580         {
2581           if (cmp_status->old_form)
2582             {
2583               if (c >= 0)
2584                 {
2585                   EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2586                   *charbuf++ = c;
2587                   char_offset++;
2588                 }
2589               else
2590                 {
2591                   *charbuf++ = -c;
2592                   cmp_status->nchars++;
2593                   cmp_status->length++;
2594                   if (cmp_status->nchars == MAX_COMPOSITION_COMPONENTS)
2595                     EMACS_MULE_COMPOSITION_END ();
2596                   else if (cmp_status->method == COMPOSITION_WITH_RULE)
2597                     cmp_status->state = COMPOSING_RULE;
2598                 }
2599             }
2600           else
2601             {
2602               *charbuf++ = c;
2603               cmp_status->length++;
2604               cmp_status->nchars--;
2605               if (cmp_status->nchars == 0)
2606                 EMACS_MULE_COMPOSITION_END ();
2607             }
2608         }
2609       else if (cmp_status->state == COMPOSING_RULE)
2610         {
2611           int rule;
2612
2613           if (c >= 0)
2614             {
2615               EMACS_MULE_COMPOSITION_END ();
2616               *charbuf++ = c;
2617               char_offset++;
2618             }
2619           else
2620             {
2621               c = -c;
2622               DECODE_EMACS_MULE_COMPOSITION_RULE_20 (c, rule);
2623               if (rule < 0)
2624                 goto invalid_code;
2625               *charbuf++ = -2;
2626               *charbuf++ = rule;
2627               cmp_status->length += 2;
2628               cmp_status->state = COMPOSING_CHAR;
2629             }
2630         }
2631       else if (cmp_status->state == COMPOSING_COMPONENT_CHAR)
2632         {
2633           *charbuf++ = c;
2634           cmp_status->length++;
2635           if (cmp_status->ncomps == 0)
2636             cmp_status->state = COMPOSING_CHAR;
2637           else if (cmp_status->ncomps > 0)
2638             {
2639               if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)
2640                 cmp_status->state = COMPOSING_COMPONENT_RULE;
2641             }
2642           else
2643             EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2644         }
2645       else                      /* COMPOSING_COMPONENT_RULE */
2646         {
2647           int rule;
2648
2649           DECODE_EMACS_MULE_COMPOSITION_RULE_21 (c, rule);
2650           if (rule < 0)
2651             goto invalid_code;
2652           *charbuf++ = -2;
2653           *charbuf++ = rule;
2654           cmp_status->length += 2;
2655           cmp_status->ncomps--;
2656           if (cmp_status->ncomps > 0)
2657             cmp_status->state = COMPOSING_COMPONENT_CHAR;
2658           else
2659             EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2660         }
2661       continue;
2662
2663     retry:
2664       src = src_base;
2665       consumed_chars = consumed_chars_base;
2666       continue;
2667
2668     invalid_code:
2669       EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2670       src = src_base;
2671       consumed_chars = consumed_chars_base;
2672       ONE_MORE_BYTE (c);
2673       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2674       char_offset++;
2675       coding->errors++;
2676     }
2677
2678  no_more_source:
2679   if (cmp_status->state != COMPOSING_NO)
2680     {
2681       if (coding->mode & CODING_MODE_LAST_BLOCK)
2682         EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2683       else
2684         {
2685           int i;
2686
2687           charbuf -= cmp_status->length;
2688           for (i = 0; i < cmp_status->length; i++)
2689             cmp_status->carryover[i] = charbuf[i];
2690         }
2691     }
2692   if (last_id != charset_ascii)
2693     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2694   coding->consumed_char += consumed_chars_base;
2695   coding->consumed = src_base - coding->source;
2696   coding->charbuf_used = charbuf - coding->charbuf;
2697 }
2698
2699
2700 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2701   do {                                          \
2702     if (id < 0xA0)                              \
2703       codes[0] = id, codes[1] = 0;              \
2704     else if (id < 0xE0)                         \
2705       codes[0] = 0x9A, codes[1] = id;           \
2706     else if (id < 0xF0)                         \
2707       codes[0] = 0x9B, codes[1] = id;           \
2708     else if (id < 0xF5)                         \
2709       codes[0] = 0x9C, codes[1] = id;           \
2710     else                                        \
2711       codes[0] = 0x9D, codes[1] = id;           \
2712   } while (0);
2713
2714
2715 static int
2716 encode_coding_emacs_mule (coding)
2717      struct coding_system *coding;
2718 {
2719   int multibytep = coding->dst_multibyte;
2720   int *charbuf = coding->charbuf;
2721   int *charbuf_end = charbuf + coding->charbuf_used;
2722   unsigned char *dst = coding->destination + coding->produced;
2723   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2724   int safe_room = 8;
2725   int produced_chars = 0;
2726   Lisp_Object attrs, charset_list;
2727   int c;
2728   int preferred_charset_id = -1;
2729
2730   CODING_GET_INFO (coding, attrs, charset_list);
2731   if (! EQ (charset_list, Vemacs_mule_charset_list))
2732     {
2733       CODING_ATTR_CHARSET_LIST (attrs)
2734         = charset_list = Vemacs_mule_charset_list;
2735     }
2736
2737   while (charbuf < charbuf_end)
2738     {
2739       ASSURE_DESTINATION (safe_room);
2740       c = *charbuf++;
2741
2742       if (c < 0)
2743         {
2744           /* Handle an annotation.  */
2745           switch (*charbuf)
2746             {
2747             case CODING_ANNOTATE_COMPOSITION_MASK:
2748               /* Not yet implemented.  */
2749               break;
2750             case CODING_ANNOTATE_CHARSET_MASK:
2751               preferred_charset_id = charbuf[3];
2752               if (preferred_charset_id >= 0
2753                   && NILP (Fmemq (make_number (preferred_charset_id),
2754                                   charset_list)))
2755                 preferred_charset_id = -1;
2756               break;
2757             default:
2758               abort ();
2759             }
2760           charbuf += -c - 1;
2761           continue;
2762         }
2763
2764       if (ASCII_CHAR_P (c))
2765         EMIT_ONE_ASCII_BYTE (c);
2766       else if (CHAR_BYTE8_P (c))
2767         {
2768           c = CHAR_TO_BYTE8 (c);
2769           EMIT_ONE_BYTE (c);
2770         }
2771       else
2772         {
2773           struct charset *charset;
2774           unsigned code;
2775           int dimension;
2776           int emacs_mule_id;
2777           unsigned char leading_codes[2];
2778
2779           if (preferred_charset_id >= 0)
2780             {
2781               charset = CHARSET_FROM_ID (preferred_charset_id);
2782               if (CHAR_CHARSET_P (c, charset))
2783                 code = ENCODE_CHAR (charset, c);
2784               else
2785                 charset = char_charset (c, charset_list, &code);
2786             }
2787           else
2788             charset = char_charset (c, charset_list, &code);
2789           if (! charset)
2790             {
2791               c = coding->default_char;
2792               if (ASCII_CHAR_P (c))
2793                 {
2794                   EMIT_ONE_ASCII_BYTE (c);
2795                   continue;
2796                 }
2797               charset = char_charset (c, charset_list, &code);
2798             }
2799           dimension = CHARSET_DIMENSION (charset);
2800           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2801           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2802           EMIT_ONE_BYTE (leading_codes[0]);
2803           if (leading_codes[1])
2804             EMIT_ONE_BYTE (leading_codes[1]);
2805           if (dimension == 1)
2806             EMIT_ONE_BYTE (code | 0x80);
2807           else
2808             {
2809               code |= 0x8080;
2810               EMIT_ONE_BYTE (code >> 8);
2811               EMIT_ONE_BYTE (code & 0xFF);
2812             }
2813         }
2814     }
2815   record_conversion_result (coding, CODING_RESULT_SUCCESS);
2816   coding->produced_char += produced_chars;
2817   coding->produced = dst - coding->destination;
2818   return 0;
2819 }
2820
2821 \f
2822 /*** 7. ISO2022 handlers ***/
2823
2824 /* The following note describes the coding system ISO2022 briefly.
2825    Since the intention of this note is to help understand the
2826    functions in this file, some parts are NOT ACCURATE or are OVERLY
2827    SIMPLIFIED.  For thorough understanding, please refer to the
2828    original document of ISO2022.  This is equivalent to the standard
2829    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2830
2831    ISO2022 provides many mechanisms to encode several character sets
2832    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2833    is encoded using bytes less than 128.  This may make the encoded
2834    text a little bit longer, but the text passes more easily through
2835    several types of gateway, some of which strip off the MSB (Most
2836    Significant Bit).
2837
2838    There are two kinds of character sets: control character sets and
2839    graphic character sets.  The former contain control characters such
2840    as `newline' and `escape' to provide control functions (control
2841    functions are also provided by escape sequences).  The latter
2842    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2843    two control character sets and many graphic character sets.
2844
2845    Graphic character sets are classified into one of the following
2846    four classes, according to the number of bytes (DIMENSION) and
2847    number of characters in one dimension (CHARS) of the set:
2848    - DIMENSION1_CHARS94
2849    - DIMENSION1_CHARS96
2850    - DIMENSION2_CHARS94
2851    - DIMENSION2_CHARS96
2852
2853    In addition, each character set is assigned an identification tag,
2854    unique for each set, called the "final character" (denoted as <F>
2855    hereafter).  The <F> of each character set is decided by ECMA(*)
2856    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2857    (0x30..0x3F are for private use only).
2858
2859    Note (*): ECMA = European Computer Manufacturers Association
2860
2861    Here are examples of graphic character sets [NAME(<F>)]:
2862         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2863         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2864         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2865         o DIMENSION2_CHARS96 -- none for the moment
2866
2867    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2868         C0 [0x00..0x1F] -- control character plane 0
2869         GL [0x20..0x7F] -- graphic character plane 0
2870         C1 [0x80..0x9F] -- control character plane 1
2871         GR [0xA0..0xFF] -- graphic character plane 1
2872
2873    A control character set is directly designated and invoked to C0 or
2874    C1 by an escape sequence.  The most common case is that:
2875    - ISO646's  control character set is designated/invoked to C0, and
2876    - ISO6429's control character set is designated/invoked to C1,
2877    and usually these designations/invocations are omitted in encoded
2878    text.  In a 7-bit environment, only C0 can be used, and a control
2879    character for C1 is encoded by an appropriate escape sequence to
2880    fit into the environment.  All control characters for C1 are
2881    defined to have corresponding escape sequences.
2882
2883    A graphic character set is at first designated to one of four
2884    graphic registers (G0 through G3), then these graphic registers are
2885    invoked to GL or GR.  These designations and invocations can be
2886    done independently.  The most common case is that G0 is invoked to
2887    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2888    these invocations and designations are omitted in encoded text.
2889    In a 7-bit environment, only GL can be used.
2890
2891    When a graphic character set of CHARS94 is invoked to GL, codes
2892    0x20 and 0x7F of the GL area work as control characters SPACE and
2893    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2894    be used.
2895
2896    There are two ways of invocation: locking-shift and single-shift.
2897    With locking-shift, the invocation lasts until the next different
2898    invocation, whereas with single-shift, the invocation affects the
2899    following character only and doesn't affect the locking-shift
2900    state.  Invocations are done by the following control characters or
2901    escape sequences:
2902
2903    ----------------------------------------------------------------------
2904    abbrev  function                  cntrl escape seq   description
2905    ----------------------------------------------------------------------
2906    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2907    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2908    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2909    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2910    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2911    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2912    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2913    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2914    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2915    ----------------------------------------------------------------------
2916    (*) These are not used by any known coding system.
2917
2918    Control characters for these functions are defined by macros
2919    ISO_CODE_XXX in `coding.h'.
2920
2921    Designations are done by the following escape sequences:
2922    ----------------------------------------------------------------------
2923    escape sequence      description
2924    ----------------------------------------------------------------------
2925    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2926    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2927    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2928    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2929    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2930    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2931    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2932    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2933    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2934    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2935    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2936    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2937    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2938    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2939    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2940    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2941    ----------------------------------------------------------------------
2942
2943    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2944    of dimension 1, chars 94, and final character <F>, etc...
2945
2946    Note (*): Although these designations are not allowed in ISO2022,
2947    Emacs accepts them on decoding, and produces them on encoding
2948    CHARS96 character sets in a coding system which is characterized as
2949    7-bit environment, non-locking-shift, and non-single-shift.
2950
2951    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2952    '(' must be omitted.  We refer to this as "short-form" hereafter.
2953
2954    Now you may notice that there are a lot of ways of encoding the
2955    same multilingual text in ISO2022.  Actually, there exist many
2956    coding systems such as Compound Text (used in X11's inter client
2957    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2958    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2959    localized platforms), and all of these are variants of ISO2022.
2960
2961    In addition to the above, Emacs handles two more kinds of escape
2962    sequences: ISO6429's direction specification and Emacs' private
2963    sequence for specifying character composition.
2964
2965    ISO6429's direction specification takes the following form:
2966         o CSI ']'      -- end of the current direction
2967         o CSI '0' ']'  -- end of the current direction
2968         o CSI '1' ']'  -- start of left-to-right text
2969         o CSI '2' ']'  -- start of right-to-left text
2970    The control character CSI (0x9B: control sequence introducer) is
2971    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2972
2973    Character composition specification takes the following form:
2974         o ESC '0' -- start relative composition
2975         o ESC '1' -- end composition
2976         o ESC '2' -- start rule-base composition (*)
2977         o ESC '3' -- start relative composition with alternate chars  (**)
2978         o ESC '4' -- start rule-base composition with alternate chars  (**)
2979   Since these are not standard escape sequences of any ISO standard,
2980   the use of them with these meanings is restricted to Emacs only.
2981
2982   (*) This form is used only in Emacs 20.7 and older versions,
2983   but newer versions can safely decode it.
2984   (**) This form is used only in Emacs 21.1 and newer versions,
2985   and older versions can't decode it.
2986
2987   Here's a list of example usages of these composition escape
2988   sequences (categorized by `enum composition_method').
2989
2990   COMPOSITION_RELATIVE:
2991         ESC 0 CHAR [ CHAR ] ESC 1
2992   COMPOSITION_WITH_RULE:
2993         ESC 2 CHAR [ RULE CHAR ] ESC 1
2994   COMPOSITION_WITH_ALTCHARS:
2995         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2996   COMPOSITION_WITH_RULE_ALTCHARS:
2997         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2998
2999 enum iso_code_class_type iso_code_class[256];
3000
3001 #define SAFE_CHARSET_P(coding, id)      \
3002   ((id) <= (coding)->max_charset_id     \
3003    && (coding)->safe_charsets[id] != 255)
3004
3005
3006 #define SHIFT_OUT_OK(category)  \
3007   (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
3008
3009 static void
3010 setup_iso_safe_charsets (attrs)
3011      Lisp_Object attrs;
3012 {
3013   Lisp_Object charset_list, safe_charsets;
3014   Lisp_Object request;
3015   Lisp_Object reg_usage;
3016   Lisp_Object tail;
3017   int reg94, reg96;
3018   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
3019   int max_charset_id;
3020
3021   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3022   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
3023       && ! EQ (charset_list, Viso_2022_charset_list))
3024     {
3025       CODING_ATTR_CHARSET_LIST (attrs)
3026         = charset_list = Viso_2022_charset_list;
3027       ASET (attrs, coding_attr_safe_charsets, Qnil);
3028     }
3029
3030   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
3031     return;
3032
3033   max_charset_id = 0;
3034   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
3035     {
3036       int id = XINT (XCAR (tail));
3037       if (max_charset_id < id)
3038         max_charset_id = id;
3039     }
3040
3041   safe_charsets = make_uninit_string (max_charset_id + 1);
3042   memset (SDATA (safe_charsets), 255, max_charset_id + 1);
3043   request = AREF (attrs, coding_attr_iso_request);
3044   reg_usage = AREF (attrs, coding_attr_iso_usage);
3045   reg94 = XINT (XCAR (reg_usage));
3046   reg96 = XINT (XCDR (reg_usage));
3047
3048   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
3049     {
3050       Lisp_Object id;
3051       Lisp_Object reg;
3052       struct charset *charset;
3053
3054       id = XCAR (tail);
3055       charset = CHARSET_FROM_ID (XINT (id));
3056       reg = Fcdr (Fassq (id, request));
3057       if (! NILP (reg))
3058         SSET (safe_charsets, XINT (id), XINT (reg));
3059       else if (charset->iso_chars_96)
3060         {
3061           if (reg96 < 4)
3062             SSET (safe_charsets, XINT (id), reg96);
3063         }
3064       else
3065         {
3066           if (reg94 < 4)
3067             SSET (safe_charsets, XINT (id), reg94);
3068         }
3069     }
3070   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
3071 }
3072
3073
3074 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3075    Check if a text is encoded in one of ISO-2022 based codig systems.
3076    If it is, return 1, else return 0.  */
3077
3078 static int
3079 detect_coding_iso_2022 (coding, detect_info)
3080      struct coding_system *coding;
3081      struct coding_detection_info *detect_info;
3082 {
3083   const unsigned char *src = coding->source, *src_base = src;
3084   const unsigned char *src_end = coding->source + coding->src_bytes;
3085   int multibytep = coding->src_multibyte;
3086   int single_shifting = 0;
3087   int id;
3088   int c, c1;
3089   int consumed_chars = 0;
3090   int i;
3091   int rejected = 0;
3092   int found = 0;
3093   int composition_count = -1;
3094
3095   detect_info->checked |= CATEGORY_MASK_ISO;
3096
3097   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
3098     {
3099       struct coding_system *this = &(coding_categories[i]);
3100       Lisp_Object attrs, val;
3101
3102       if (this->id < 0)
3103         continue;
3104       attrs = CODING_ID_ATTRS (this->id);
3105       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
3106           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Viso_2022_charset_list))
3107         setup_iso_safe_charsets (attrs);
3108       val = CODING_ATTR_SAFE_CHARSETS (attrs);
3109       this->max_charset_id = SCHARS (val) - 1;
3110       this->safe_charsets = SDATA (val);
3111     }
3112
3113   /* A coding system of this category is always ASCII compatible.  */
3114   src += coding->head_ascii;
3115
3116   while (rejected != CATEGORY_MASK_ISO)
3117     {
3118       src_base = src;
3119       ONE_MORE_BYTE (c);
3120       switch (c)
3121         {
3122         case ISO_CODE_ESC:
3123           if (inhibit_iso_escape_detection)
3124             break;
3125           single_shifting = 0;
3126           ONE_MORE_BYTE (c);
3127           if (c >= '(' && c <= '/')
3128             {
3129               /* Designation sequence for a charset of dimension 1.  */
3130               ONE_MORE_BYTE (c1);
3131               if (c1 < ' ' || c1 >= 0x80
3132                   || (id = iso_charset_table[0][c >= ','][c1]) < 0)
3133                 /* Invalid designation sequence.  Just ignore.  */
3134                 break;
3135             }
3136           else if (c == '$')
3137             {
3138               /* Designation sequence for a charset of dimension 2.  */
3139               ONE_MORE_BYTE (c);
3140               if (c >= '@' && c <= 'B')
3141                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
3142                 id = iso_charset_table[1][0][c];
3143               else if (c >= '(' && c <= '/')
3144                 {
3145                   ONE_MORE_BYTE (c1);
3146                   if (c1 < ' ' || c1 >= 0x80
3147                       || (id = iso_charset_table[1][c >= ','][c1]) < 0)
3148                     /* Invalid designation sequence.  Just ignore.  */
3149                     break;
3150                 }
3151               else
3152                 /* Invalid designation sequence.  Just ignore it.  */
3153                 break;
3154             }
3155           else if (c == 'N' || c == 'O')
3156             {
3157               /* ESC <Fe> for SS2 or SS3.  */
3158               single_shifting = 1;
3159               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
3160               break;
3161             }
3162           else if (c == '1')
3163             {
3164               /* End of composition.  */
3165               if (composition_count < 0
3166                   || composition_count > MAX_COMPOSITION_COMPONENTS)
3167                 /* Invalid */
3168                 break;
3169               composition_count = -1;
3170               found |= CATEGORY_MASK_ISO;
3171             }
3172           else if (c >= '0' && c <= '4')
3173             {
3174               /* ESC <Fp> for start/end composition.  */
3175               composition_count = 0;
3176               break;
3177             }
3178           else
3179             {
3180               /* Invalid escape sequence.  Just ignore it.  */
3181               break;
3182             }
3183
3184           /* We found a valid designation sequence for CHARSET.  */
3185           rejected |= CATEGORY_MASK_ISO_8BIT;
3186           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
3187                               id))
3188             found |= CATEGORY_MASK_ISO_7;
3189           else
3190             rejected |= CATEGORY_MASK_ISO_7;
3191           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
3192                               id))
3193             found |= CATEGORY_MASK_ISO_7_TIGHT;
3194           else
3195             rejected |= CATEGORY_MASK_ISO_7_TIGHT;
3196           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
3197                               id))
3198             found |= CATEGORY_MASK_ISO_7_ELSE;
3199           else
3200             rejected |= CATEGORY_MASK_ISO_7_ELSE;
3201           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
3202                               id))
3203             found |= CATEGORY_MASK_ISO_8_ELSE;
3204           else
3205             rejected |= CATEGORY_MASK_ISO_8_ELSE;
3206           break;
3207
3208         case ISO_CODE_SO:
3209         case ISO_CODE_SI:
3210           /* Locking shift out/in.  */
3211           if (inhibit_iso_escape_detection)
3212             break;
3213           single_shifting = 0;
3214           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
3215           break;
3216
3217         case ISO_CODE_CSI:
3218           /* Control sequence introducer.  */
3219           single_shifting = 0;
3220           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3221           found |= CATEGORY_MASK_ISO_8_ELSE;
3222           goto check_extra_latin;
3223
3224         case ISO_CODE_SS2:
3225         case ISO_CODE_SS3:
3226           /* Single shift.   */
3227           if (inhibit_iso_escape_detection)
3228             break;
3229           single_shifting = 0;
3230           rejected |= CATEGORY_MASK_ISO_7BIT;
3231           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3232               & CODING_ISO_FLAG_SINGLE_SHIFT)
3233             found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
3234           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
3235               & CODING_ISO_FLAG_SINGLE_SHIFT)
3236             found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
3237           if (single_shifting)
3238             break;
3239           goto check_extra_latin;
3240
3241         default:
3242           if (c < 0)
3243             continue;
3244           if (c < 0x80)
3245             {
3246               if (composition_count >= 0)
3247                 composition_count++;
3248               single_shifting = 0;
3249               break;
3250             }
3251           if (c >= 0xA0)
3252             {
3253               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3254               found |= CATEGORY_MASK_ISO_8_1;
3255               /* Check the length of succeeding codes of the range
3256                  0xA0..0FF.  If the byte length is even, we include
3257                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
3258                  only when we are not single shifting.  */
3259               if (! single_shifting
3260                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
3261                 {
3262                   int i = 1;
3263                   while (src < src_end)
3264                     {
3265                       src_base = src;
3266                       ONE_MORE_BYTE (c);
3267                       if (c < 0xA0)
3268                         {
3269                           src = src_base;
3270                           break;
3271                         }
3272                       i++;
3273                     }
3274
3275                   if (i & 1 && src < src_end)
3276                     {
3277                       rejected |= CATEGORY_MASK_ISO_8_2;
3278                       if (composition_count >= 0)
3279                         composition_count += i;
3280                     }
3281                   else
3282                     {
3283                       found |= CATEGORY_MASK_ISO_8_2;
3284                       if (composition_count >= 0)
3285                         composition_count += i / 2;
3286                     }
3287                 }
3288               break;
3289             }
3290         check_extra_latin:
3291           single_shifting = 0;
3292           if (! VECTORP (Vlatin_extra_code_table)
3293               || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3294             {
3295               rejected = CATEGORY_MASK_ISO;
3296               break;
3297             }
3298           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3299               & CODING_ISO_FLAG_LATIN_EXTRA)
3300             found |= CATEGORY_MASK_ISO_8_1;
3301           else
3302             rejected |= CATEGORY_MASK_ISO_8_1;
3303           rejected |= CATEGORY_MASK_ISO_8_2;
3304         }
3305     }
3306   detect_info->rejected |= CATEGORY_MASK_ISO;
3307   return 0;
3308
3309  no_more_source:
3310   detect_info->rejected |= rejected;
3311   detect_info->found |= (found & ~rejected);
3312   return 1;
3313 }
3314
3315
3316 /* Set designation state into CODING.  Set CHARS_96 to -1 if the
3317    escape sequence should be kept.  */
3318 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
3319   do {                                                                  \
3320     int id, prev;                                                       \
3321                                                                         \
3322     if (final < '0' || final >= 128                                     \
3323         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
3324         || !SAFE_CHARSET_P (coding, id))                                \
3325       {                                                                 \
3326         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
3327         chars_96 = -1;                                                  \
3328         break;                                                          \
3329       }                                                                 \
3330     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
3331     if (id == charset_jisx0201_roman)                                   \
3332       {                                                                 \
3333         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
3334           id = charset_ascii;                                           \
3335       }                                                                 \
3336     else if (id == charset_jisx0208_1978)                               \
3337       {                                                                 \
3338         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
3339           id = charset_jisx0208;                                        \
3340       }                                                                 \
3341     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
3342     /* If there was an invalid designation to REG previously, and this  \
3343        designation is ASCII to REG, we should keep this designation     \
3344        sequence.  */                                                    \
3345     if (prev == -2 && id == charset_ascii)                              \
3346       chars_96 = -1;                                                    \
3347   } while (0)
3348
3349
3350 /* Handle these composition sequence (ALT: alternate char):
3351
3352    (1) relative composition: ESC 0 CHAR ... ESC 1
3353    (2) rulebase composition: ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3354    (3) altchar composition:  ESC 3 ALT ... ALT ESC 0 CHAR ... ESC 1
3355    (4) alt&rule composition: ESC 4 ALT RULE ... ALT ESC 0 CHAR ... ESC 1
3356
3357    When the start sequence (ESC 0/2/3/4) is found, this annotation
3358    header is produced.
3359
3360         [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) 0 METHOD ]
3361
3362    Then, upon reading CHAR or RULE (one or two bytes), these codes are
3363    produced until the end sequence (ESC 1) is found:
3364
3365    (1) CHAR ... CHAR
3366    (2) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
3367    (3) ALT ... ALT -1 -1 CHAR ... CHAR
3368    (4) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT -1 -1 CHAR ... CHAR
3369
3370    When the end sequence (ESC 1) is found, LENGTH and NCHARS in the
3371    annotation header is updated as below:
3372
3373    (1) LENGTH: unchanged,  NCHARS: number of CHARs
3374    (2) LENGTH: unchanged,  NCHARS: number of CHARs
3375    (3) LENGTH: += number of ALTs + 2,  NCHARS: number of CHARs
3376    (4) LENGTH: += number of ALTs * 3,  NCHARS: number of CHARs
3377
3378    If an error is found while composing, the annotation header is
3379    changed to:
3380
3381         [ ESC '0'/'2'/'3'/'4' -2 0 ]
3382
3383    and the sequence [ -2 DECODED-RULE ] is changed to the original
3384    byte sequence as below:
3385         o the original byte sequence is B: [ B -1 ]
3386         o the original byte sequence is B1 B2: [ B1 B2 ]
3387    and the sequence [ -1 -1 ] is changed to the original byte
3388    sequence:
3389         [ ESC '0' ]
3390 */
3391
3392 /* Decode a composition rule C1 and maybe one more byte from the
3393    source, and set RULE to the encoded composition rule, NBYTES to the
3394    length of the composition rule.  If the rule is invalid, set RULE
3395    to some negative value.  */
3396
3397 #define DECODE_COMPOSITION_RULE(rule, nbytes)                           \
3398   do {                                                                  \
3399     rule = c1 - 32;                                                     \
3400     if (rule < 0)                                                       \
3401       break;                                                            \
3402     if (rule < 81)              /* old format (before ver.21) */        \
3403       {                                                                 \
3404         int gref = (rule) / 9;                                          \
3405         int nref = (rule) % 9;                                          \
3406         if (gref == 4) gref = 10;                                       \
3407         if (nref == 4) nref = 10;                                       \
3408         rule = COMPOSITION_ENCODE_RULE (gref, nref);                    \
3409         nbytes = 1;                                                     \
3410       }                                                                 \
3411     else                        /* new format (after ver.21) */         \
3412       {                                                                 \
3413         int c;                                                          \
3414                                                                         \
3415         ONE_MORE_BYTE (c);                                              \
3416         rule = COMPOSITION_ENCODE_RULE (rule - 81, c - 32);             \
3417         if (rule >= 0)                                                  \
3418           rule += 0x100;   /* to destinguish it from the old format */  \
3419         nbytes = 2;                                                     \
3420       }                                                                 \
3421   } while (0)
3422
3423 #define ENCODE_COMPOSITION_RULE(rule)                           \
3424   do {                                                          \
3425     int gref = (rule % 0x100) / 12, nref = (rule % 0x100) % 12; \
3426                                                                 \
3427     if (rule < 0x100)           /* old format */                \
3428       {                                                         \
3429         if (gref == 10) gref = 4;                               \
3430         if (nref == 10) nref = 4;                               \
3431         charbuf[idx] = 32 + gref * 9 + nref;                    \
3432         charbuf[idx + 1] = -1;                                  \
3433         new_chars++;                                            \
3434       }                                                         \
3435     else                                /* new format */        \
3436       {                                                         \
3437         charbuf[idx] = 32 + 81 + gref;                          \
3438         charbuf[idx + 1] = 32 + nref;                           \
3439         new_chars += 2;                                         \
3440       }                                                         \
3441   } while (0)
3442
3443 /* Finish the current composition as invalid.  */
3444
3445 static int finish_composition P_ ((int *, struct composition_status *));
3446
3447 static int
3448 finish_composition (charbuf, cmp_status)
3449      int *charbuf;
3450      struct composition_status *cmp_status;
3451 {
3452   int idx = - cmp_status->length;
3453   int new_chars;
3454
3455   /* Recover the original ESC sequence */
3456   charbuf[idx++] = ISO_CODE_ESC;
3457   charbuf[idx++] = (cmp_status->method == COMPOSITION_RELATIVE ? '0'
3458                     : cmp_status->method == COMPOSITION_WITH_RULE ? '2'
3459                     : cmp_status->method == COMPOSITION_WITH_ALTCHARS ? '3'
3460                     /* cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS */
3461                     : '4');
3462   charbuf[idx++] = -2;
3463   charbuf[idx++] = 0;
3464   charbuf[idx++] = -1;
3465   new_chars = cmp_status->nchars;
3466   if (cmp_status->method >= COMPOSITION_WITH_RULE)
3467     for (; idx < 0; idx++)
3468       {
3469         int elt = charbuf[idx];
3470
3471         if (elt == -2)
3472           {
3473             ENCODE_COMPOSITION_RULE (charbuf[idx + 1]);
3474             idx++;
3475           }
3476         else if (elt == -1)
3477           {
3478             charbuf[idx++] = ISO_CODE_ESC;
3479             charbuf[idx] = '0';
3480             new_chars += 2;
3481           }
3482       }
3483   cmp_status->state = COMPOSING_NO;
3484   return new_chars;
3485 }
3486
3487 /* If characers are under composition, finish the composition.  */
3488 #define MAYBE_FINISH_COMPOSITION()                              \
3489   do {                                                          \
3490     if (cmp_status->state != COMPOSING_NO)                      \
3491       char_offset += finish_composition (charbuf, cmp_status);  \
3492   } while (0)
3493
3494 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
3495
3496    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
3497    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3498    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
3499    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
3500
3501    Produce this annotation sequence now:
3502
3503    [ -LENGTH(==-4) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) METHOD ]
3504 */
3505
3506 #define DECODE_COMPOSITION_START(c1)                                       \
3507   do {                                                                     \
3508     if (c1 == '0'                                                          \
3509         && ((cmp_status->state == COMPOSING_COMPONENT_CHAR                 \
3510              && cmp_status->method == COMPOSITION_WITH_ALTCHARS)           \
3511             || (cmp_status->state == COMPOSING_COMPONENT_RULE              \
3512                 && cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS))) \
3513       {                                                                    \
3514         *charbuf++ = -1;                                                   \
3515         *charbuf++= -1;                                                    \
3516         cmp_status->state = COMPOSING_CHAR;                                \
3517         cmp_status->length += 2;                                           \
3518       }                                                                    \
3519     else                                                                   \
3520       {                                                                    \
3521         MAYBE_FINISH_COMPOSITION ();                                       \
3522         cmp_status->method = (c1 == '0' ? COMPOSITION_RELATIVE             \
3523                               : c1 == '2' ? COMPOSITION_WITH_RULE          \
3524                               : c1 == '3' ? COMPOSITION_WITH_ALTCHARS      \
3525                               : COMPOSITION_WITH_RULE_ALTCHARS);           \
3526         cmp_status->state                                                  \
3527           = (c1 <= '2' ? COMPOSING_CHAR : COMPOSING_COMPONENT_CHAR);       \
3528         ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);          \
3529         cmp_status->length = MAX_ANNOTATION_LENGTH;                        \
3530         cmp_status->nchars = cmp_status->ncomps = 0;                       \
3531         coding->annotated = 1;                                             \
3532       }                                                                    \
3533   } while (0)
3534
3535
3536 /* Handle composition end sequence ESC 1.  */
3537
3538 #define DECODE_COMPOSITION_END()                                        \
3539   do {                                                                  \
3540     if (cmp_status->nchars == 0                                         \
3541         || ((cmp_status->state == COMPOSING_CHAR)                       \
3542             == (cmp_status->method == COMPOSITION_WITH_RULE)))          \
3543       {                                                                 \
3544         MAYBE_FINISH_COMPOSITION ();                                    \
3545         goto invalid_code;                                              \
3546       }                                                                 \
3547     if (cmp_status->method == COMPOSITION_WITH_ALTCHARS)                \
3548       charbuf[- cmp_status->length] -= cmp_status->ncomps + 2;          \
3549     else if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)      \
3550       charbuf[- cmp_status->length] -= cmp_status->ncomps * 3;          \
3551     charbuf[- cmp_status->length + 2] = cmp_status->nchars;             \
3552     char_offset += cmp_status->nchars;                                  \
3553     cmp_status->state = COMPOSING_NO;                                   \
3554   } while (0)
3555
3556 /* Store a composition rule RULE in charbuf, and update cmp_status.  */
3557
3558 #define STORE_COMPOSITION_RULE(rule)    \
3559   do {                                  \
3560     *charbuf++ = -2;                    \
3561     *charbuf++ = rule;                  \
3562     cmp_status->length += 2;            \
3563     cmp_status->state--;                \
3564   } while (0)
3565
3566 /* Store a composed char or a component char C in charbuf, and update
3567    cmp_status.  */
3568
3569 #define STORE_COMPOSITION_CHAR(c)                                       \
3570   do {                                                                  \
3571     *charbuf++ = (c);                                                   \
3572     cmp_status->length++;                                               \
3573     if (cmp_status->state == COMPOSING_CHAR)                            \
3574       cmp_status->nchars++;                                             \
3575     else                                                                \
3576       cmp_status->ncomps++;                                             \
3577     if (cmp_status->method == COMPOSITION_WITH_RULE                     \
3578         || (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS        \
3579             && cmp_status->state == COMPOSING_COMPONENT_CHAR))          \
3580       cmp_status->state++;                                              \
3581   } while (0)
3582
3583
3584 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
3585
3586 static void
3587 decode_coding_iso_2022 (coding)
3588      struct coding_system *coding;
3589 {
3590   const unsigned char *src = coding->source + coding->consumed;
3591   const unsigned char *src_end = coding->source + coding->src_bytes;
3592   const unsigned char *src_base;
3593   int *charbuf = coding->charbuf + coding->charbuf_used;
3594   /* We may produce two annocations (charset and composition) in one
3595      loop and one more charset annocation at the end.  */
3596   int *charbuf_end
3597     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
3598   int consumed_chars = 0, consumed_chars_base;
3599   int multibytep = coding->src_multibyte;
3600   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
3601   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3602   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3603   int charset_id_2, charset_id_3;
3604   struct charset *charset;
3605   int c;
3606   struct composition_status *cmp_status = CODING_ISO_CMP_STATUS (coding);
3607   Lisp_Object attrs, charset_list;
3608   int char_offset = coding->produced_char;
3609   int last_offset = char_offset;
3610   int last_id = charset_ascii;
3611   int eol_crlf =
3612     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
3613   int byte_after_cr = -1;
3614   int i;
3615
3616   CODING_GET_INFO (coding, attrs, charset_list);
3617   setup_iso_safe_charsets (attrs);
3618   /* Charset list may have been changed.  */
3619   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3620   coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
3621
3622   if (cmp_status->state != COMPOSING_NO)
3623     {
3624       for (i = 0; i < cmp_status->length; i++)
3625         *charbuf++ = cmp_status->carryover[i];
3626       coding->annotated = 1;
3627     }
3628
3629   while (1)
3630     {
3631       int c1, c2, c3;
3632
3633       src_base = src;
3634       consumed_chars_base = consumed_chars;
3635
3636       if (charbuf >= charbuf_end)
3637         {
3638           if (byte_after_cr >= 0)
3639             src_base--;
3640           break;
3641         }
3642
3643       if (byte_after_cr >= 0)
3644         c1 = byte_after_cr, byte_after_cr = -1;
3645       else
3646         ONE_MORE_BYTE (c1);
3647       if (c1 < 0)
3648         goto invalid_code;
3649
3650       if (CODING_ISO_EXTSEGMENT_LEN (coding) > 0)
3651         {
3652           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3653           char_offset++;
3654           CODING_ISO_EXTSEGMENT_LEN (coding)--;
3655           continue;
3656         }
3657
3658       if (CODING_ISO_EMBEDDED_UTF_8 (coding))
3659         {
3660           if (c1 == ISO_CODE_ESC)
3661             {
3662               if (src + 1 >= src_end)
3663                 goto no_more_source;
3664               *charbuf++ = ISO_CODE_ESC;
3665               char_offset++;
3666               if (src[0] == '%' && src[1] == '@')
3667                 {
3668                   src += 2;
3669                   consumed_chars += 2;
3670                   char_offset += 2;
3671                   /* We are sure charbuf can contain two more chars. */
3672                   *charbuf++ = '%';
3673                   *charbuf++ = '@';
3674                   CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
3675                 }
3676             }
3677           else
3678             {
3679               *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3680               char_offset++;
3681             }
3682           continue;
3683         }
3684
3685       if ((cmp_status->state == COMPOSING_RULE
3686            || cmp_status->state == COMPOSING_COMPONENT_RULE)
3687           && c1 != ISO_CODE_ESC)
3688         {
3689           int rule, nbytes;
3690
3691           DECODE_COMPOSITION_RULE (rule, nbytes);
3692           if (rule < 0)
3693             goto invalid_code;
3694           STORE_COMPOSITION_RULE (rule);
3695           continue;
3696         }
3697
3698       /* We produce at most one character.  */
3699       switch (iso_code_class [c1])
3700         {
3701         case ISO_0x20_or_0x7F:
3702           if (charset_id_0 < 0
3703               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
3704             /* This is SPACE or DEL.  */
3705             charset = CHARSET_FROM_ID (charset_ascii);
3706           else
3707             charset = CHARSET_FROM_ID (charset_id_0);
3708           break;
3709
3710         case ISO_graphic_plane_0:
3711           if (charset_id_0 < 0)
3712             charset = CHARSET_FROM_ID (charset_ascii);
3713           else
3714             charset = CHARSET_FROM_ID (charset_id_0);
3715           break;
3716
3717         case ISO_0xA0_or_0xFF:
3718           if (charset_id_1 < 0
3719               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
3720               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3721             goto invalid_code;
3722           /* This is a graphic character, we fall down ... */
3723
3724         case ISO_graphic_plane_1:
3725           if (charset_id_1 < 0)
3726             goto invalid_code;
3727           charset = CHARSET_FROM_ID (charset_id_1);
3728           break;
3729
3730         case ISO_control_0:
3731           if (eol_crlf && c1 == '\r')
3732             ONE_MORE_BYTE (byte_after_cr);
3733           MAYBE_FINISH_COMPOSITION ();
3734           charset = CHARSET_FROM_ID (charset_ascii);
3735           break;
3736
3737         case ISO_control_1:
3738           goto invalid_code;
3739
3740         case ISO_shift_out:
3741           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3742               || CODING_ISO_DESIGNATION (coding, 1) < 0)
3743             goto invalid_code;
3744           CODING_ISO_INVOCATION (coding, 0) = 1;
3745           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3746           continue;
3747
3748         case ISO_shift_in:
3749           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3750             goto invalid_code;
3751           CODING_ISO_INVOCATION (coding, 0) = 0;
3752           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3753           continue;
3754
3755         case ISO_single_shift_2_7:
3756           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS))
3757             goto invalid_code;
3758         case ISO_single_shift_2:
3759           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3760             goto invalid_code;
3761           /* SS2 is handled as an escape sequence of ESC 'N' */
3762           c1 = 'N';
3763           goto label_escape_sequence;
3764
3765         case ISO_single_shift_3:
3766           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3767             goto invalid_code;
3768           /* SS2 is handled as an escape sequence of ESC 'O' */
3769           c1 = 'O';
3770           goto label_escape_sequence;
3771
3772         case ISO_control_sequence_introducer:
3773           /* CSI is handled as an escape sequence of ESC '[' ...  */
3774           c1 = '[';
3775           goto label_escape_sequence;
3776
3777         case ISO_escape:
3778           ONE_MORE_BYTE (c1);
3779         label_escape_sequence:
3780           /* Escape sequences handled here are invocation,
3781              designation, direction specification, and character
3782              composition specification.  */
3783           switch (c1)
3784             {
3785             case '&':           /* revision of following character set */
3786               ONE_MORE_BYTE (c1);
3787               if (!(c1 >= '@' && c1 <= '~'))
3788                 goto invalid_code;
3789               ONE_MORE_BYTE (c1);
3790               if (c1 != ISO_CODE_ESC)
3791                 goto invalid_code;
3792               ONE_MORE_BYTE (c1);
3793               goto label_escape_sequence;
3794
3795             case '$':           /* designation of 2-byte character set */
3796               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3797                 goto invalid_code;
3798               {
3799                 int reg, chars96;
3800
3801                 ONE_MORE_BYTE (c1);
3802                 if (c1 >= '@' && c1 <= 'B')
3803                   {     /* designation of JISX0208.1978, GB2312.1980,
3804                            or JISX0208.1980 */
3805                     reg = 0, chars96 = 0;
3806                   }
3807                 else if (c1 >= 0x28 && c1 <= 0x2B)
3808                   { /* designation of DIMENSION2_CHARS94 character set */
3809                     reg = c1 - 0x28, chars96 = 0;
3810                     ONE_MORE_BYTE (c1);
3811                   }
3812                 else if (c1 >= 0x2C && c1 <= 0x2F)
3813                   { /* designation of DIMENSION2_CHARS96 character set */
3814                     reg = c1 - 0x2C, chars96 = 1;
3815                     ONE_MORE_BYTE (c1);
3816                   }
3817                 else
3818                   goto invalid_code;
3819                 DECODE_DESIGNATION (reg, 2, chars96, c1);
3820                 /* We must update these variables now.  */
3821                 if (reg == 0)
3822                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3823                 else if (reg == 1)
3824                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3825                 if (chars96 < 0)
3826                   goto invalid_code;
3827               }
3828               continue;
3829
3830             case 'n':           /* invocation of locking-shift-2 */
3831               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3832                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3833                 goto invalid_code;
3834               CODING_ISO_INVOCATION (coding, 0) = 2;
3835               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3836               continue;
3837
3838             case 'o':           /* invocation of locking-shift-3 */
3839               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3840                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3841                 goto invalid_code;
3842               CODING_ISO_INVOCATION (coding, 0) = 3;
3843               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3844               continue;
3845
3846             case 'N':           /* invocation of single-shift-2 */
3847               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3848                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3849                 goto invalid_code;
3850               charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3851               if (charset_id_2 < 0)
3852                 charset = CHARSET_FROM_ID (charset_ascii);
3853               else
3854                 charset = CHARSET_FROM_ID (charset_id_2);
3855               ONE_MORE_BYTE (c1);
3856               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3857                 goto invalid_code;
3858               break;
3859
3860             case 'O':           /* invocation of single-shift-3 */
3861               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3862                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3863                 goto invalid_code;
3864               charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3865               if (charset_id_3 < 0)
3866                 charset = CHARSET_FROM_ID (charset_ascii);
3867               else
3868                 charset = CHARSET_FROM_ID (charset_id_3);
3869               ONE_MORE_BYTE (c1);
3870               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3871                 goto invalid_code;
3872               break;
3873
3874             case '0': case '2': case '3': case '4': /* start composition */
3875               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3876                 goto invalid_code;
3877               if (last_id != charset_ascii)
3878                 {
3879                   ADD_CHARSET_DATA (charbuf, char_offset- last_offset, last_id);
3880                   last_id = charset_ascii;
3881                   last_offset = char_offset;
3882                 }
3883               DECODE_COMPOSITION_START (c1);
3884               continue;
3885
3886             case '1':           /* end composition */
3887               if (cmp_status->state == COMPOSING_NO)
3888                 goto invalid_code;
3889               DECODE_COMPOSITION_END ();
3890               continue;
3891
3892             case '[':           /* specification of direction */
3893               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION))
3894                 goto invalid_code;
3895               /* For the moment, nested direction is not supported.
3896                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3897                  left-to-right, and nozero means right-to-left.  */
3898               ONE_MORE_BYTE (c1);
3899               switch (c1)
3900                 {
3901                 case ']':       /* end of the current direction */
3902                   coding->mode &= ~CODING_MODE_DIRECTION;
3903
3904                 case '0':       /* end of the current direction */
3905                 case '1':       /* start of left-to-right direction */
3906                   ONE_MORE_BYTE (c1);
3907                   if (c1 == ']')
3908                     coding->mode &= ~CODING_MODE_DIRECTION;
3909                   else
3910                     goto invalid_code;
3911                   break;
3912
3913                 case '2':       /* start of right-to-left direction */
3914                   ONE_MORE_BYTE (c1);
3915                   if (c1 == ']')
3916                     coding->mode |= CODING_MODE_DIRECTION;
3917                   else
3918                     goto invalid_code;
3919                   break;
3920
3921                 default:
3922                   goto invalid_code;
3923                 }
3924               continue;
3925
3926             case '%':
3927               ONE_MORE_BYTE (c1);
3928               if (c1 == '/')
3929                 {
3930                   /* CTEXT extended segment:
3931                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3932                      We keep these bytes as is for the moment.
3933                      They may be decoded by post-read-conversion.  */
3934                   int dim, M, L;
3935                   int size;
3936
3937                   ONE_MORE_BYTE (dim);
3938                   if (dim < '0' || dim > '4')
3939                     goto invalid_code;
3940                   ONE_MORE_BYTE (M);
3941                   if (M < 128)
3942                     goto invalid_code;
3943                   ONE_MORE_BYTE (L);
3944                   if (L < 128)
3945                     goto invalid_code;
3946                   size = ((M - 128) * 128) + (L - 128);
3947                   if (charbuf + 6 > charbuf_end)
3948                     goto break_loop;
3949                   *charbuf++ = ISO_CODE_ESC;
3950                   *charbuf++ = '%';
3951                   *charbuf++ = '/';
3952                   *charbuf++ = dim;
3953                   *charbuf++ = BYTE8_TO_CHAR (M);
3954                   *charbuf++ = BYTE8_TO_CHAR (L);
3955                   CODING_ISO_EXTSEGMENT_LEN (coding) = size;
3956                 }
3957               else if (c1 == 'G')
3958                 {
3959                   /* XFree86 extension for embedding UTF-8 in CTEXT:
3960                      ESC % G --UTF-8-BYTES-- ESC % @
3961                      We keep these bytes as is for the moment.
3962                      They may be decoded by post-read-conversion.  */
3963                   if (charbuf + 3 > charbuf_end)
3964                     goto break_loop;
3965                   *charbuf++ = ISO_CODE_ESC;
3966                   *charbuf++ = '%';
3967                   *charbuf++ = 'G';
3968                   CODING_ISO_EMBEDDED_UTF_8 (coding) = 1;
3969                 }
3970               else
3971                 goto invalid_code;
3972               continue;
3973               break;
3974
3975             default:
3976               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3977                 goto invalid_code;
3978               {
3979                 int reg, chars96;
3980
3981                 if (c1 >= 0x28 && c1 <= 0x2B)
3982                   { /* designation of DIMENSION1_CHARS94 character set */
3983                     reg = c1 - 0x28, chars96 = 0;
3984                     ONE_MORE_BYTE (c1);
3985                   }
3986                 else if (c1 >= 0x2C && c1 <= 0x2F)
3987                   { /* designation of DIMENSION1_CHARS96 character set */
3988                     reg = c1 - 0x2C, chars96 = 1;
3989                     ONE_MORE_BYTE (c1);
3990                   }
3991                 else
3992                   goto invalid_code;
3993                 DECODE_DESIGNATION (reg, 1, chars96, c1);
3994                 /* We must update these variables now.  */
3995                 if (reg == 0)
3996                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3997                 else if (reg == 1)
3998                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3999                 if (chars96 < 0)
4000                   goto invalid_code;
4001               }
4002               continue;
4003             }
4004         }
4005
4006       if (cmp_status->state == COMPOSING_NO
4007           && charset->id != charset_ascii
4008           && last_id != charset->id)
4009         {
4010           if (last_id != charset_ascii)
4011             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4012           last_id = charset->id;
4013           last_offset = char_offset;
4014         }
4015
4016       /* Now we know CHARSET and 1st position code C1 of a character.
4017          Produce a decoded character while getting 2nd and 3rd
4018          position codes C2, C3 if necessary.  */
4019       if (CHARSET_DIMENSION (charset) > 1)
4020         {
4021           ONE_MORE_BYTE (c2);
4022           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0)
4023               || ((c1 & 0x80) != (c2 & 0x80)))
4024             /* C2 is not in a valid range.  */
4025             goto invalid_code;
4026           if (CHARSET_DIMENSION (charset) == 2)
4027             c1 = (c1 << 8) | c2;
4028           else
4029             {
4030               ONE_MORE_BYTE (c3);
4031               if (c3 < 0x20 || (c3 >= 0x80 && c3 < 0xA0)
4032                   || ((c1 & 0x80) != (c3 & 0x80)))
4033                 /* C3 is not in a valid range.  */
4034                 goto invalid_code;
4035               c1 = (c1 << 16) | (c2 << 8) | c2;
4036             }
4037         }
4038       c1 &= 0x7F7F7F;
4039       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
4040       if (c < 0)
4041         {
4042           MAYBE_FINISH_COMPOSITION ();
4043           for (; src_base < src; src_base++, char_offset++)
4044             {
4045               if (ASCII_BYTE_P (*src_base))
4046                 *charbuf++ = *src_base;
4047               else
4048                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
4049             }
4050         }
4051       else if (cmp_status->state == COMPOSING_NO)
4052         {
4053           *charbuf++ = c;
4054           char_offset++;
4055         }
4056       else if ((cmp_status->state == COMPOSING_CHAR
4057                 ? cmp_status->nchars
4058                 : cmp_status->ncomps)
4059                >= MAX_COMPOSITION_COMPONENTS)
4060         {
4061           /* Too long composition.  */
4062           MAYBE_FINISH_COMPOSITION ();
4063           *charbuf++ = c;
4064           char_offset++;
4065         }
4066       else
4067         STORE_COMPOSITION_CHAR (c);
4068       continue;
4069
4070     invalid_code:
4071       MAYBE_FINISH_COMPOSITION ();
4072       src = src_base;
4073       consumed_chars = consumed_chars_base;
4074       ONE_MORE_BYTE (c);
4075       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4076       char_offset++;
4077       coding->errors++;
4078       continue;
4079
4080     break_loop:
4081       break;
4082     }
4083
4084  no_more_source:
4085   if (cmp_status->state != COMPOSING_NO)
4086     {
4087       if (coding->mode & CODING_MODE_LAST_BLOCK)
4088         MAYBE_FINISH_COMPOSITION ();
4089       else
4090         {
4091           charbuf -= cmp_status->length;
4092           for (i = 0; i < cmp_status->length; i++)
4093             cmp_status->carryover[i] = charbuf[i];
4094         }
4095     }
4096   else if (last_id != charset_ascii)
4097     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4098   coding->consumed_char += consumed_chars_base;
4099   coding->consumed = src_base - coding->source;
4100   coding->charbuf_used = charbuf - coding->charbuf;
4101 }
4102
4103
4104 /* ISO2022 encoding stuff.  */
4105
4106 /*
4107    It is not enough to say just "ISO2022" on encoding, we have to
4108    specify more details.  In Emacs, each coding system of ISO2022
4109    variant has the following specifications:
4110         1. Initial designation to G0 thru G3.
4111         2. Allows short-form designation?
4112         3. ASCII should be designated to G0 before control characters?
4113         4. ASCII should be designated to G0 at end of line?
4114         5. 7-bit environment or 8-bit environment?
4115         6. Use locking-shift?
4116         7. Use Single-shift?
4117    And the following two are only for Japanese:
4118         8. Use ASCII in place of JIS0201-1976-Roman?
4119         9. Use JISX0208-1983 in place of JISX0208-1978?
4120    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
4121    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
4122    details.
4123 */
4124
4125 /* Produce codes (escape sequence) for designating CHARSET to graphic
4126    register REG at DST, and increment DST.  If <final-char> of CHARSET is
4127    '@', 'A', or 'B' and the coding system CODING allows, produce
4128    designation sequence of short-form.  */
4129
4130 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
4131   do {                                                                  \
4132     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
4133     char *intermediate_char_94 = "()*+";                                \
4134     char *intermediate_char_96 = ",-./";                                \
4135     int revision = -1;                                                  \
4136     int c;                                                              \
4137                                                                         \
4138     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
4139       revision = CHARSET_ISO_REVISION (charset);                        \
4140                                                                         \
4141     if (revision >= 0)                                                  \
4142       {                                                                 \
4143         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
4144         EMIT_ONE_BYTE ('@' + revision);                                 \
4145       }                                                                 \
4146     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
4147     if (CHARSET_DIMENSION (charset) == 1)                               \
4148       {                                                                 \
4149         if (! CHARSET_ISO_CHARS_96 (charset))                           \
4150           c = intermediate_char_94[reg];                                \
4151         else                                                            \
4152           c = intermediate_char_96[reg];                                \
4153         EMIT_ONE_ASCII_BYTE (c);                                        \
4154       }                                                                 \
4155     else                                                                \
4156       {                                                                 \
4157         EMIT_ONE_ASCII_BYTE ('$');                                      \
4158         if (! CHARSET_ISO_CHARS_96 (charset))                           \
4159           {                                                             \
4160             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
4161                 || reg != 0                                             \
4162                 || final_char < '@' || final_char > 'B')                \
4163               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
4164           }                                                             \
4165         else                                                            \
4166           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
4167       }                                                                 \
4168     EMIT_ONE_ASCII_BYTE (final_char);                                   \
4169                                                                         \
4170     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
4171   } while (0)
4172
4173
4174 /* The following two macros produce codes (control character or escape
4175    sequence) for ISO2022 single-shift functions (single-shift-2 and
4176    single-shift-3).  */
4177
4178 #define ENCODE_SINGLE_SHIFT_2                                           \
4179   do {                                                                  \
4180     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
4181       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
4182     else                                                                \
4183       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
4184     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
4185   } while (0)
4186
4187
4188 #define ENCODE_SINGLE_SHIFT_3                                           \
4189   do {                                                                  \
4190     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
4191       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
4192     else                                                                \
4193       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
4194     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
4195   } while (0)
4196
4197
4198 /* The following four macros produce codes (control character or
4199    escape sequence) for ISO2022 locking-shift functions (shift-in,
4200    shift-out, locking-shift-2, and locking-shift-3).  */
4201
4202 #define ENCODE_SHIFT_IN                                 \
4203   do {                                                  \
4204     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
4205     CODING_ISO_INVOCATION (coding, 0) = 0;              \
4206   } while (0)
4207
4208
4209 #define ENCODE_SHIFT_OUT                                \
4210   do {                                                  \
4211     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
4212     CODING_ISO_INVOCATION (coding, 0) = 1;              \
4213   } while (0)
4214
4215
4216 #define ENCODE_LOCKING_SHIFT_2                          \
4217   do {                                                  \
4218     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
4219     CODING_ISO_INVOCATION (coding, 0) = 2;              \
4220   } while (0)
4221
4222
4223 #define ENCODE_LOCKING_SHIFT_3                          \
4224   do {                                                  \
4225     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
4226     CODING_ISO_INVOCATION (coding, 0) = 3;              \
4227   } while (0)
4228
4229
4230 /* Produce codes for a DIMENSION1 character whose character set is
4231    CHARSET and whose position-code is C1.  Designation and invocation
4232    sequences are also produced in advance if necessary.  */
4233
4234 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
4235   do {                                                                  \
4236     int id = CHARSET_ID (charset);                                      \
4237                                                                         \
4238     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
4239         && id == charset_ascii)                                         \
4240       {                                                                 \
4241         id = charset_jisx0201_roman;                                    \
4242         charset = CHARSET_FROM_ID (id);                                 \
4243       }                                                                 \
4244                                                                         \
4245     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
4246       {                                                                 \
4247         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
4248           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
4249         else                                                            \
4250           EMIT_ONE_BYTE (c1 | 0x80);                                    \
4251         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
4252         break;                                                          \
4253       }                                                                 \
4254     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
4255       {                                                                 \
4256         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
4257         break;                                                          \
4258       }                                                                 \
4259     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
4260       {                                                                 \
4261         EMIT_ONE_BYTE (c1 | 0x80);                                      \
4262         break;                                                          \
4263       }                                                                 \
4264     else                                                                \
4265       /* Since CHARSET is not yet invoked to any graphic planes, we     \
4266          must invoke it, or, at first, designate it to some graphic     \
4267          register.  Then repeat the loop to actually produce the        \
4268          character.  */                                                 \
4269       dst = encode_invocation_designation (charset, coding, dst,        \
4270                                            &produced_chars);            \
4271   } while (1)
4272
4273
4274 /* Produce codes for a DIMENSION2 character whose character set is
4275    CHARSET and whose position-codes are C1 and C2.  Designation and
4276    invocation codes are also produced in advance if necessary.  */
4277
4278 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
4279   do {                                                                  \
4280     int id = CHARSET_ID (charset);                                      \
4281                                                                         \
4282     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
4283         && id == charset_jisx0208)                                      \
4284       {                                                                 \
4285         id = charset_jisx0208_1978;                                     \
4286         charset = CHARSET_FROM_ID (id);                                 \
4287       }                                                                 \
4288                                                                         \
4289     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
4290       {                                                                 \
4291         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
4292           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
4293         else                                                            \
4294           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
4295         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
4296         break;                                                          \
4297       }                                                                 \
4298     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
4299       {                                                                 \
4300         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
4301         break;                                                          \
4302       }                                                                 \
4303     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
4304       {                                                                 \
4305         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
4306         break;                                                          \
4307       }                                                                 \
4308     else                                                                \
4309       /* Since CHARSET is not yet invoked to any graphic planes, we     \
4310          must invoke it, or, at first, designate it to some graphic     \
4311          register.  Then repeat the loop to actually produce the        \
4312          character.  */                                                 \
4313       dst = encode_invocation_designation (charset, coding, dst,        \
4314                                            &produced_chars);            \
4315   } while (1)
4316
4317
4318 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
4319   do {                                                                     \
4320     int code = ENCODE_CHAR ((charset),(c));                                \
4321                                                                            \
4322     if (CHARSET_DIMENSION (charset) == 1)                                  \
4323       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
4324     else                                                                   \
4325       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
4326   } while (0)
4327
4328
4329 /* Produce designation and invocation codes at a place pointed by DST
4330    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
4331    Return new DST.  */
4332
4333 unsigned char *
4334 encode_invocation_designation (charset, coding, dst, p_nchars)
4335      struct charset *charset;
4336      struct coding_system *coding;
4337      unsigned char *dst;
4338      int *p_nchars;
4339 {
4340   int multibytep = coding->dst_multibyte;
4341   int produced_chars = *p_nchars;
4342   int reg;                      /* graphic register number */
4343   int id = CHARSET_ID (charset);
4344
4345   /* At first, check designations.  */
4346   for (reg = 0; reg < 4; reg++)
4347     if (id == CODING_ISO_DESIGNATION (coding, reg))
4348       break;
4349
4350   if (reg >= 4)
4351     {
4352       /* CHARSET is not yet designated to any graphic registers.  */
4353       /* At first check the requested designation.  */
4354       reg = CODING_ISO_REQUEST (coding, id);
4355       if (reg < 0)
4356         /* Since CHARSET requests no special designation, designate it
4357            to graphic register 0.  */
4358         reg = 0;
4359
4360       ENCODE_DESIGNATION (charset, reg, coding);
4361     }
4362
4363   if (CODING_ISO_INVOCATION (coding, 0) != reg
4364       && CODING_ISO_INVOCATION (coding, 1) != reg)
4365     {
4366       /* Since the graphic register REG is not invoked to any graphic
4367          planes, invoke it to graphic plane 0.  */
4368       switch (reg)
4369         {
4370         case 0:                 /* graphic register 0 */
4371           ENCODE_SHIFT_IN;
4372           break;
4373
4374         case 1:                 /* graphic register 1 */
4375           ENCODE_SHIFT_OUT;
4376           break;
4377
4378         case 2:                 /* graphic register 2 */
4379           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4380             ENCODE_SINGLE_SHIFT_2;
4381           else
4382             ENCODE_LOCKING_SHIFT_2;
4383           break;
4384
4385         case 3:                 /* graphic register 3 */
4386           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4387             ENCODE_SINGLE_SHIFT_3;
4388           else
4389             ENCODE_LOCKING_SHIFT_3;
4390           break;
4391         }
4392     }
4393
4394   *p_nchars = produced_chars;
4395   return dst;
4396 }
4397
4398 /* The following three macros produce codes for indicating direction
4399    of text.  */
4400 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER                              \
4401   do {                                                                  \
4402     if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS)        \
4403       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '[');                         \
4404     else                                                                \
4405       EMIT_ONE_BYTE (ISO_CODE_CSI);                                     \
4406   } while (0)
4407
4408
4409 #define ENCODE_DIRECTION_R2L()                  \
4410   do {                                          \
4411     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
4412     EMIT_TWO_ASCII_BYTES ('2', ']');            \
4413   } while (0)
4414
4415
4416 #define ENCODE_DIRECTION_L2R()                  \
4417   do {                                          \
4418     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
4419     EMIT_TWO_ASCII_BYTES ('0', ']');            \
4420   } while (0)
4421
4422
4423 /* Produce codes for designation and invocation to reset the graphic
4424    planes and registers to initial state.  */
4425 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
4426   do {                                                                  \
4427     int reg;                                                            \
4428     struct charset *charset;                                            \
4429                                                                         \
4430     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
4431       ENCODE_SHIFT_IN;                                                  \
4432     for (reg = 0; reg < 4; reg++)                                       \
4433       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
4434           && (CODING_ISO_DESIGNATION (coding, reg)                      \
4435               != CODING_ISO_INITIAL (coding, reg)))                     \
4436         {                                                               \
4437           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
4438           ENCODE_DESIGNATION (charset, reg, coding);                    \
4439         }                                                               \
4440   } while (0)
4441
4442
4443 /* Produce designation sequences of charsets in the line started from
4444    SRC to a place pointed by DST, and return updated DST.
4445
4446    If the current block ends before any end-of-line, we may fail to
4447    find all the necessary designations.  */
4448
4449 static unsigned char *
4450 encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
4451      struct coding_system *coding;
4452      int *charbuf, *charbuf_end;
4453      unsigned char *dst;
4454 {
4455   struct charset *charset;
4456   /* Table of charsets to be designated to each graphic register.  */
4457   int r[4];
4458   int c, found = 0, reg;
4459   int produced_chars = 0;
4460   int multibytep = coding->dst_multibyte;
4461   Lisp_Object attrs;
4462   Lisp_Object charset_list;
4463
4464   attrs = CODING_ID_ATTRS (coding->id);
4465   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4466   if (EQ (charset_list, Qiso_2022))
4467     charset_list = Viso_2022_charset_list;
4468
4469   for (reg = 0; reg < 4; reg++)
4470     r[reg] = -1;
4471
4472   while (found < 4)
4473     {
4474       int id;
4475
4476       c = *charbuf++;
4477       if (c == '\n')
4478         break;
4479       charset = char_charset (c, charset_list, NULL);
4480       id = CHARSET_ID (charset);
4481       reg = CODING_ISO_REQUEST (coding, id);
4482       if (reg >= 0 && r[reg] < 0)
4483         {
4484           found++;
4485           r[reg] = id;
4486         }
4487     }
4488
4489   if (found)
4490     {
4491       for (reg = 0; reg < 4; reg++)
4492         if (r[reg] >= 0
4493             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
4494           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
4495     }
4496
4497   return dst;
4498 }
4499
4500 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
4501
4502 static int
4503 encode_coding_iso_2022 (coding)
4504      struct coding_system *coding;
4505 {
4506   int multibytep = coding->dst_multibyte;
4507   int *charbuf = coding->charbuf;
4508   int *charbuf_end = charbuf + coding->charbuf_used;
4509   unsigned char *dst = coding->destination + coding->produced;
4510   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4511   int safe_room = 16;
4512   int bol_designation
4513     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
4514        && CODING_ISO_BOL (coding));
4515   int produced_chars = 0;
4516   Lisp_Object attrs, eol_type, charset_list;
4517   int ascii_compatible;
4518   int c;
4519   int preferred_charset_id = -1;
4520
4521   CODING_GET_INFO (coding, attrs, charset_list);
4522   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
4523   if (VECTORP (eol_type))
4524     eol_type = Qunix;
4525
4526   setup_iso_safe_charsets (attrs);
4527   /* Charset list may have been changed.  */
4528   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4529   coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
4530
4531   ascii_compatible
4532     = (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))
4533        && ! (CODING_ISO_FLAGS (coding) & (CODING_ISO_FLAG_DESIGNATION
4534                                           | CODING_ISO_FLAG_LOCKING_SHIFT)));
4535
4536   while (charbuf < charbuf_end)
4537     {
4538       ASSURE_DESTINATION (safe_room);
4539
4540       if (bol_designation)
4541         {
4542           unsigned char *dst_prev = dst;
4543
4544           /* We have to produce designation sequences if any now.  */
4545           dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
4546           bol_designation = 0;
4547           /* We are sure that designation sequences are all ASCII bytes.  */
4548           produced_chars += dst - dst_prev;
4549         }
4550
4551       c = *charbuf++;
4552
4553       if (c < 0)
4554         {
4555           /* Handle an annotation.  */
4556           switch (*charbuf)
4557             {
4558             case CODING_ANNOTATE_COMPOSITION_MASK:
4559               /* Not yet implemented.  */
4560               break;
4561             case CODING_ANNOTATE_CHARSET_MASK:
4562               preferred_charset_id = charbuf[2];
4563               if (preferred_charset_id >= 0
4564                   && NILP (Fmemq (make_number (preferred_charset_id),
4565                                   charset_list)))
4566                 preferred_charset_id = -1;
4567               break;
4568             default:
4569               abort ();
4570             }
4571           charbuf += -c - 1;
4572           continue;
4573         }
4574
4575       /* Now encode the character C.  */
4576       if (c < 0x20 || c == 0x7F)
4577         {
4578           if (c == '\n'
4579               || (c == '\r' && EQ (eol_type, Qmac)))
4580             {
4581               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4582                 ENCODE_RESET_PLANE_AND_REGISTER ();
4583               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
4584                 {
4585                   int i;
4586
4587                   for (i = 0; i < 4; i++)
4588                     CODING_ISO_DESIGNATION (coding, i)
4589                       = CODING_ISO_INITIAL (coding, i);
4590                 }
4591               bol_designation
4592                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
4593             }
4594           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
4595             ENCODE_RESET_PLANE_AND_REGISTER ();
4596           EMIT_ONE_ASCII_BYTE (c);
4597         }
4598       else if (ASCII_CHAR_P (c))
4599         {
4600           if (ascii_compatible)
4601             EMIT_ONE_ASCII_BYTE (c);
4602           else
4603             {
4604               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
4605               ENCODE_ISO_CHARACTER (charset, c);
4606             }
4607         }
4608       else if (CHAR_BYTE8_P (c))
4609         {
4610           c = CHAR_TO_BYTE8 (c);
4611           EMIT_ONE_BYTE (c);
4612         }
4613       else
4614         {
4615           struct charset *charset;
4616
4617           if (preferred_charset_id >= 0)
4618             {
4619               charset = CHARSET_FROM_ID (preferred_charset_id);
4620               if (! CHAR_CHARSET_P (c, charset))
4621                 charset = char_charset (c, charset_list, NULL);
4622             }
4623           else
4624             charset = char_charset (c, charset_list, NULL);
4625           if (!charset)
4626             {
4627               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4628                 {
4629                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4630                   charset = CHARSET_FROM_ID (charset_ascii);
4631                 }
4632               else
4633                 {
4634                   c = coding->default_char;
4635                   charset = char_charset (c, charset_list, NULL);
4636                 }
4637             }
4638           ENCODE_ISO_CHARACTER (charset, c);
4639         }
4640     }
4641
4642   if (coding->mode & CODING_MODE_LAST_BLOCK
4643       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4644     {
4645       ASSURE_DESTINATION (safe_room);
4646       ENCODE_RESET_PLANE_AND_REGISTER ();
4647     }
4648   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4649   CODING_ISO_BOL (coding) = bol_designation;
4650   coding->produced_char += produced_chars;
4651   coding->produced = dst - coding->destination;
4652   return 0;
4653 }
4654
4655 \f
4656 /*** 8,9. SJIS and BIG5 handlers ***/
4657
4658 /* Although SJIS and BIG5 are not ISO's coding system, they are used
4659    quite widely.  So, for the moment, Emacs supports them in the bare
4660    C code.  But, in the future, they may be supported only by CCL.  */
4661
4662 /* SJIS is a coding system encoding three character sets: ASCII, right
4663    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
4664    as is.  A character of charset katakana-jisx0201 is encoded by
4665    "position-code + 0x80".  A character of charset japanese-jisx0208
4666    is encoded in 2-byte but two position-codes are divided and shifted
4667    so that it fit in the range below.
4668
4669    --- CODE RANGE of SJIS ---
4670    (character set)      (range)
4671    ASCII                0x00 .. 0x7F
4672    KATAKANA-JISX0201    0xA0 .. 0xDF
4673    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
4674             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
4675    -------------------------------
4676
4677 */
4678
4679 /* BIG5 is a coding system encoding two character sets: ASCII and
4680    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
4681    character set and is encoded in two-byte.
4682
4683    --- CODE RANGE of BIG5 ---
4684    (character set)      (range)
4685    ASCII                0x00 .. 0x7F
4686    Big5 (1st byte)      0xA1 .. 0xFE
4687         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
4688    --------------------------
4689
4690   */
4691
4692 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4693    Check if a text is encoded in SJIS.  If it is, return
4694    CATEGORY_MASK_SJIS, else return 0.  */
4695
4696 static int
4697 detect_coding_sjis (coding, detect_info)
4698      struct coding_system *coding;
4699      struct coding_detection_info *detect_info;
4700 {
4701   const unsigned char *src = coding->source, *src_base;
4702   const unsigned char *src_end = coding->source + coding->src_bytes;
4703   int multibytep = coding->src_multibyte;
4704   int consumed_chars = 0;
4705   int found = 0;
4706   int c;
4707   Lisp_Object attrs, charset_list;
4708   int max_first_byte_of_2_byte_code;
4709
4710   CODING_GET_INFO (coding, attrs, charset_list);
4711   max_first_byte_of_2_byte_code
4712     = (XINT (Flength (charset_list)) > 3 ? 0xFC : 0xEF);
4713
4714   detect_info->checked |= CATEGORY_MASK_SJIS;
4715   /* A coding system of this category is always ASCII compatible.  */
4716   src += coding->head_ascii;
4717
4718   while (1)
4719     {
4720       src_base = src;
4721       ONE_MORE_BYTE (c);
4722       if (c < 0x80)
4723         continue;
4724       if ((c >= 0x81 && c <= 0x9F)
4725           || (c >= 0xE0 && c <= max_first_byte_of_2_byte_code))
4726         {
4727           ONE_MORE_BYTE (c);
4728           if (c < 0x40 || c == 0x7F || c > 0xFC)
4729             break;
4730           found = CATEGORY_MASK_SJIS;
4731         }
4732       else if (c >= 0xA0 && c < 0xE0)
4733         found = CATEGORY_MASK_SJIS;
4734       else
4735         break;
4736     }
4737   detect_info->rejected |= CATEGORY_MASK_SJIS;
4738   return 0;
4739
4740  no_more_source:
4741   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4742     {
4743       detect_info->rejected |= CATEGORY_MASK_SJIS;
4744       return 0;
4745     }
4746   detect_info->found |= found;
4747   return 1;
4748 }
4749
4750 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4751    Check if a text is encoded in BIG5.  If it is, return
4752    CATEGORY_MASK_BIG5, else return 0.  */
4753
4754 static int
4755 detect_coding_big5 (coding, detect_info)
4756      struct coding_system *coding;
4757      struct coding_detection_info *detect_info;
4758 {
4759   const unsigned char *src = coding->source, *src_base;
4760   const unsigned char *src_end = coding->source + coding->src_bytes;
4761   int multibytep = coding->src_multibyte;
4762   int consumed_chars = 0;
4763   int found = 0;
4764   int c;
4765
4766   detect_info->checked |= CATEGORY_MASK_BIG5;
4767   /* A coding system of this category is always ASCII compatible.  */
4768   src += coding->head_ascii;
4769
4770   while (1)
4771     {
4772       src_base = src;
4773       ONE_MORE_BYTE (c);
4774       if (c < 0x80)
4775         continue;
4776       if (c >= 0xA1)
4777         {
4778           ONE_MORE_BYTE (c);
4779           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
4780             return 0;
4781           found = CATEGORY_MASK_BIG5;
4782         }
4783       else
4784         break;
4785     }
4786   detect_info->rejected |= CATEGORY_MASK_BIG5;
4787   return 0;
4788
4789  no_more_source:
4790   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4791     {
4792       detect_info->rejected |= CATEGORY_MASK_BIG5;
4793       return 0;
4794     }
4795   detect_info->found |= found;
4796   return 1;
4797 }
4798
4799 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4800    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
4801
4802 static void
4803 decode_coding_sjis (coding)
4804      struct coding_system *coding;
4805 {
4806   const unsigned char *src = coding->source + coding->consumed;
4807   const unsigned char *src_end = coding->source + coding->src_bytes;
4808   const unsigned char *src_base;
4809   int *charbuf = coding->charbuf + coding->charbuf_used;
4810   /* We may produce one charset annocation in one loop and one more at
4811      the end.  */
4812   int *charbuf_end
4813     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4814   int consumed_chars = 0, consumed_chars_base;
4815   int multibytep = coding->src_multibyte;
4816   struct charset *charset_roman, *charset_kanji, *charset_kana;
4817   struct charset *charset_kanji2;
4818   Lisp_Object attrs, charset_list, val;
4819   int char_offset = coding->produced_char;
4820   int last_offset = char_offset;
4821   int last_id = charset_ascii;
4822   int eol_crlf =
4823     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4824   int byte_after_cr = -1;
4825
4826   CODING_GET_INFO (coding, attrs, charset_list);
4827
4828   val = charset_list;
4829   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4830   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4831   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4832   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4833
4834   while (1)
4835     {
4836       int c, c1;
4837       struct charset *charset;
4838
4839       src_base = src;
4840       consumed_chars_base = consumed_chars;
4841
4842       if (charbuf >= charbuf_end)
4843         {
4844           if (byte_after_cr >= 0)
4845             src_base--;
4846           break;
4847         }
4848
4849       if (byte_after_cr >= 0)
4850         c = byte_after_cr, byte_after_cr = -1;
4851       else
4852         ONE_MORE_BYTE (c);
4853       if (c < 0)
4854         goto invalid_code;
4855       if (c < 0x80)
4856         {
4857           if (eol_crlf && c == '\r')
4858             ONE_MORE_BYTE (byte_after_cr);
4859           charset = charset_roman;
4860         }
4861       else if (c == 0x80 || c == 0xA0)
4862         goto invalid_code;
4863       else if (c >= 0xA1 && c <= 0xDF)
4864         {
4865           /* SJIS -> JISX0201-Kana */
4866           c &= 0x7F;
4867           charset = charset_kana;
4868         }
4869       else if (c <= 0xEF)
4870         {
4871           /* SJIS -> JISX0208 */
4872           ONE_MORE_BYTE (c1);
4873           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4874             goto invalid_code;
4875           c = (c << 8) | c1;
4876           SJIS_TO_JIS (c);
4877           charset = charset_kanji;
4878         }
4879       else if (c <= 0xFC && charset_kanji2)
4880         {
4881           /* SJIS -> JISX0213-2 */
4882           ONE_MORE_BYTE (c1);
4883           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4884             goto invalid_code;
4885           c = (c << 8) | c1;
4886           SJIS_TO_JIS2 (c);
4887           charset = charset_kanji2;
4888         }
4889       else
4890         goto invalid_code;
4891       if (charset->id != charset_ascii
4892           && last_id != charset->id)
4893         {
4894           if (last_id != charset_ascii)
4895             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4896           last_id = charset->id;
4897           last_offset = char_offset;
4898         }
4899       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4900       *charbuf++ = c;
4901       char_offset++;
4902       continue;
4903
4904     invalid_code:
4905       src = src_base;
4906       consumed_chars = consumed_chars_base;
4907       ONE_MORE_BYTE (c);
4908       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4909       char_offset++;
4910       coding->errors++;
4911     }
4912
4913  no_more_source:
4914   if (last_id != charset_ascii)
4915     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4916   coding->consumed_char += consumed_chars_base;
4917   coding->consumed = src_base - coding->source;
4918   coding->charbuf_used = charbuf - coding->charbuf;
4919 }
4920
4921 static void
4922 decode_coding_big5 (coding)
4923      struct coding_system *coding;
4924 {
4925   const unsigned char *src = coding->source + coding->consumed;
4926   const unsigned char *src_end = coding->source + coding->src_bytes;
4927   const unsigned char *src_base;
4928   int *charbuf = coding->charbuf + coding->charbuf_used;
4929   /* We may produce one charset annocation in one loop and one more at
4930      the end.  */
4931   int *charbuf_end
4932     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4933   int consumed_chars = 0, consumed_chars_base;
4934   int multibytep = coding->src_multibyte;
4935   struct charset *charset_roman, *charset_big5;
4936   Lisp_Object attrs, charset_list, val;
4937   int char_offset = coding->produced_char;
4938   int last_offset = char_offset;
4939   int last_id = charset_ascii;
4940   int eol_crlf =
4941     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4942   int byte_after_cr = -1;
4943
4944   CODING_GET_INFO (coding, attrs, charset_list);
4945   val = charset_list;
4946   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4947   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4948
4949   while (1)
4950     {
4951       int c, c1;
4952       struct charset *charset;
4953
4954       src_base = src;
4955       consumed_chars_base = consumed_chars;
4956
4957       if (charbuf >= charbuf_end)
4958         {
4959           if (byte_after_cr >= 0)
4960             src_base--;
4961           break;
4962         }
4963
4964       if (byte_after_cr >= 0)
4965         c = byte_after_cr, byte_after_cr = -1;
4966       else
4967         ONE_MORE_BYTE (c);
4968
4969       if (c < 0)
4970         goto invalid_code;
4971       if (c < 0x80)
4972         {
4973           if (eol_crlf && c == '\r')
4974             ONE_MORE_BYTE (byte_after_cr);
4975           charset = charset_roman;
4976         }
4977       else
4978         {
4979           /* BIG5 -> Big5 */
4980           if (c < 0xA1 || c > 0xFE)
4981             goto invalid_code;
4982           ONE_MORE_BYTE (c1);
4983           if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4984             goto invalid_code;
4985           c = c << 8 | c1;
4986           charset = charset_big5;
4987         }
4988       if (charset->id != charset_ascii
4989           && last_id != charset->id)
4990         {
4991           if (last_id != charset_ascii)
4992             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4993           last_id = charset->id;
4994           last_offset = char_offset;
4995         }
4996       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4997       *charbuf++ = c;
4998       char_offset++;
4999       continue;
5000
5001     invalid_code:
5002       src = src_base;
5003       consumed_chars = consumed_chars_base;
5004       ONE_MORE_BYTE (c);
5005       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
5006       char_offset++;
5007       coding->errors++;
5008     }
5009
5010  no_more_source:
5011   if (last_id != charset_ascii)
5012     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5013   coding->consumed_char += consumed_chars_base;
5014   coding->consumed = src_base - coding->source;
5015   coding->charbuf_used = charbuf - coding->charbuf;
5016 }
5017
5018 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
5019    This function can encode charsets `ascii', `katakana-jisx0201',
5020    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
5021    are sure that all these charsets are registered as official charset
5022    (i.e. do not have extended leading-codes).  Characters of other
5023    charsets are produced without any encoding.  If SJIS_P is 1, encode
5024    SJIS text, else encode BIG5 text.  */
5025
5026 static int
5027 encode_coding_sjis (coding)
5028      struct coding_system *coding;
5029 {
5030   int multibytep = coding->dst_multibyte;
5031   int *charbuf = coding->charbuf;
5032   int *charbuf_end = charbuf + coding->charbuf_used;
5033   unsigned char *dst = coding->destination + coding->produced;
5034   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5035   int safe_room = 4;
5036   int produced_chars = 0;
5037   Lisp_Object attrs, charset_list, val;
5038   int ascii_compatible;
5039   struct charset *charset_roman, *charset_kanji, *charset_kana;
5040   struct charset *charset_kanji2;
5041   int c;
5042
5043   CODING_GET_INFO (coding, attrs, charset_list);
5044   val = charset_list;
5045   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5046   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5047   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5048   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
5049
5050   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5051
5052   while (charbuf < charbuf_end)
5053     {
5054       ASSURE_DESTINATION (safe_room);
5055       c = *charbuf++;
5056       /* Now encode the character C.  */
5057       if (ASCII_CHAR_P (c) && ascii_compatible)
5058         EMIT_ONE_ASCII_BYTE (c);
5059       else if (CHAR_BYTE8_P (c))
5060         {
5061           c = CHAR_TO_BYTE8 (c);
5062           EMIT_ONE_BYTE (c);
5063         }
5064       else
5065         {
5066           unsigned code;
5067           struct charset *charset = char_charset (c, charset_list, &code);
5068
5069           if (!charset)
5070             {
5071               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5072                 {
5073                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5074                   charset = CHARSET_FROM_ID (charset_ascii);
5075                 }
5076               else
5077                 {
5078                   c = coding->default_char;
5079                   charset = char_charset (c, charset_list, &code);
5080                 }
5081             }
5082           if (code == CHARSET_INVALID_CODE (charset))
5083             abort ();
5084           if (charset == charset_kanji)
5085             {
5086               int c1, c2;
5087               JIS_TO_SJIS (code);
5088               c1 = code >> 8, c2 = code & 0xFF;
5089               EMIT_TWO_BYTES (c1, c2);
5090             }
5091           else if (charset == charset_kana)
5092             EMIT_ONE_BYTE (code | 0x80);
5093           else if (charset_kanji2 && charset == charset_kanji2)
5094             {
5095               int c1, c2;
5096
5097               c1 = code >> 8;
5098               if (c1 == 0x21 || (c1 >= 0x23 && c1 <= 0x25)
5099                   || c1 == 0x28
5100                   || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
5101                 {
5102                   JIS_TO_SJIS2 (code);
5103                   c1 = code >> 8, c2 = code & 0xFF;
5104                   EMIT_TWO_BYTES (c1, c2);
5105                 }
5106               else
5107                 EMIT_ONE_ASCII_BYTE (code & 0x7F);
5108             }
5109           else
5110             EMIT_ONE_ASCII_BYTE (code & 0x7F);
5111         }
5112     }
5113   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5114   coding->produced_char += produced_chars;
5115   coding->produced = dst - coding->destination;
5116   return 0;
5117 }
5118
5119 static int
5120 encode_coding_big5 (coding)
5121      struct coding_system *coding;
5122 {
5123   int multibytep = coding->dst_multibyte;
5124   int *charbuf = coding->charbuf;
5125   int *charbuf_end = charbuf + coding->charbuf_used;
5126   unsigned char *dst = coding->destination + coding->produced;
5127   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5128   int safe_room = 4;
5129   int produced_chars = 0;
5130   Lisp_Object attrs, charset_list, val;
5131   int ascii_compatible;
5132   struct charset *charset_roman, *charset_big5;
5133   int c;
5134
5135   CODING_GET_INFO (coding, attrs, charset_list);
5136   val = charset_list;
5137   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5138   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
5139   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5140
5141   while (charbuf < charbuf_end)
5142     {
5143       ASSURE_DESTINATION (safe_room);
5144       c = *charbuf++;
5145       /* Now encode the character C.  */
5146       if (ASCII_CHAR_P (c) && ascii_compatible)
5147         EMIT_ONE_ASCII_BYTE (c);
5148       else if (CHAR_BYTE8_P (c))
5149         {
5150           c = CHAR_TO_BYTE8 (c);
5151           EMIT_ONE_BYTE (c);
5152         }
5153       else
5154         {
5155           unsigned code;
5156           struct charset *charset = char_charset (c, charset_list, &code);
5157
5158           if (! charset)
5159             {
5160               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5161                 {
5162                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5163                   charset = CHARSET_FROM_ID (charset_ascii);
5164                 }
5165               else
5166                 {
5167                   c = coding->default_char;
5168                   charset = char_charset (c, charset_list, &code);
5169                 }
5170             }
5171           if (code == CHARSET_INVALID_CODE (charset))
5172             abort ();
5173           if (charset == charset_big5)
5174             {
5175               int c1, c2;
5176
5177               c1 = code >> 8, c2 = code & 0xFF;
5178               EMIT_TWO_BYTES (c1, c2);
5179             }
5180           else
5181             EMIT_ONE_ASCII_BYTE (code & 0x7F);
5182         }
5183     }
5184   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5185   coding->produced_char += produced_chars;
5186   coding->produced = dst - coding->destination;
5187   return 0;
5188 }
5189
5190 \f
5191 /*** 10. CCL handlers ***/
5192
5193 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5194    Check if a text is encoded in a coding system of which
5195    encoder/decoder are written in CCL program.  If it is, return
5196    CATEGORY_MASK_CCL, else return 0.  */
5197
5198 static int
5199 detect_coding_ccl (coding, detect_info)
5200      struct coding_system *coding;
5201      struct coding_detection_info *detect_info;
5202 {
5203   const unsigned char *src = coding->source, *src_base;
5204   const unsigned char *src_end = coding->source + coding->src_bytes;
5205   int multibytep = coding->src_multibyte;
5206   int consumed_chars = 0;
5207   int found = 0;
5208   unsigned char *valids;
5209   int head_ascii = coding->head_ascii;
5210   Lisp_Object attrs;
5211
5212   detect_info->checked |= CATEGORY_MASK_CCL;
5213
5214   coding = &coding_categories[coding_category_ccl];
5215   valids = CODING_CCL_VALIDS (coding);
5216   attrs = CODING_ID_ATTRS (coding->id);
5217   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5218     src += head_ascii;
5219
5220   while (1)
5221     {
5222       int c;
5223
5224       src_base = src;
5225       ONE_MORE_BYTE (c);
5226       if (c < 0 || ! valids[c])
5227         break;
5228       if ((valids[c] > 1))
5229         found = CATEGORY_MASK_CCL;
5230     }
5231   detect_info->rejected |= CATEGORY_MASK_CCL;
5232   return 0;
5233
5234  no_more_source:
5235   detect_info->found |= found;
5236   return 1;
5237 }
5238
5239 static void
5240 decode_coding_ccl (coding)
5241      struct coding_system *coding;
5242 {
5243   const unsigned char *src = coding->source + coding->consumed;
5244   const unsigned char *src_end = coding->source + coding->src_bytes;
5245   int *charbuf = coding->charbuf + coding->charbuf_used;
5246   int *charbuf_end = coding->charbuf + coding->charbuf_size;
5247   int consumed_chars = 0;
5248   int multibytep = coding->src_multibyte;
5249   struct ccl_program *ccl = &coding->spec.ccl->ccl;
5250   int source_charbuf[1024];
5251   int source_byteidx[1025];
5252   Lisp_Object attrs, charset_list;
5253
5254   CODING_GET_INFO (coding, attrs, charset_list);
5255
5256   while (1)
5257     {
5258       const unsigned char *p = src;
5259       int i = 0;
5260
5261       if (multibytep)
5262         {
5263           while (i < 1024 && p < src_end)
5264             {
5265               source_byteidx[i] = p - src;
5266               source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
5267             }
5268           source_byteidx[i] = p - src;
5269         }
5270       else
5271         while (i < 1024 && p < src_end)
5272           source_charbuf[i++] = *p++;
5273
5274       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
5275         ccl->last_block = 1;
5276       ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf,
5277                   charset_list);
5278       charbuf += ccl->produced;
5279       if (multibytep)
5280         src += source_byteidx[ccl->consumed];
5281       else
5282         src += ccl->consumed;
5283       consumed_chars += ccl->consumed;
5284       if (p == src_end || ccl->status != CCL_STAT_SUSPEND_BY_SRC)
5285         break;
5286     }
5287
5288   switch (ccl->status)
5289     {
5290     case CCL_STAT_SUSPEND_BY_SRC:
5291       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5292       break;
5293     case CCL_STAT_SUSPEND_BY_DST:
5294       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
5295       break;
5296     case CCL_STAT_QUIT:
5297     case CCL_STAT_INVALID_CMD:
5298       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
5299       break;
5300     default:
5301       record_conversion_result (coding, CODING_RESULT_SUCCESS);
5302       break;
5303     }
5304   coding->consumed_char += consumed_chars;
5305   coding->consumed = src - coding->source;
5306   coding->charbuf_used = charbuf - coding->charbuf;
5307 }
5308
5309 static int
5310 encode_coding_ccl (coding)
5311      struct coding_system *coding;
5312 {
5313   struct ccl_program *ccl = &coding->spec.ccl->ccl;
5314   int multibytep = coding->dst_multibyte;
5315   int *charbuf = coding->charbuf;
5316   int *charbuf_end = charbuf + coding->charbuf_used;
5317   unsigned char *dst = coding->destination + coding->produced;
5318   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5319   int destination_charbuf[1024];
5320   int i, produced_chars = 0;
5321   Lisp_Object attrs, charset_list;
5322
5323   CODING_GET_INFO (coding, attrs, charset_list);
5324   if (coding->consumed_char == coding->src_chars
5325       && coding->mode & CODING_MODE_LAST_BLOCK)
5326     ccl->last_block = 1;
5327
5328   while (charbuf < charbuf_end)
5329     {
5330       ccl_driver (ccl, charbuf, destination_charbuf,
5331                   charbuf_end - charbuf, 1024, charset_list);
5332       if (multibytep)
5333         {
5334           ASSURE_DESTINATION (ccl->produced * 2);
5335           for (i = 0; i < ccl->produced; i++)
5336             EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
5337         }
5338       else
5339         {
5340           ASSURE_DESTINATION (ccl->produced);
5341           for (i = 0; i < ccl->produced; i++)
5342             *dst++ = destination_charbuf[i] & 0xFF;
5343           produced_chars += ccl->produced;
5344         }
5345       charbuf += ccl->consumed;
5346       if (ccl->status == CCL_STAT_QUIT
5347           || ccl->status == CCL_STAT_INVALID_CMD)
5348         break;
5349     }
5350
5351   switch (ccl->status)
5352     {
5353     case CCL_STAT_SUSPEND_BY_SRC:
5354       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5355       break;
5356     case CCL_STAT_SUSPEND_BY_DST:
5357       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
5358       break;
5359     case CCL_STAT_QUIT:
5360     case CCL_STAT_INVALID_CMD:
5361       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
5362       break;
5363     default:
5364       record_conversion_result (coding, CODING_RESULT_SUCCESS);
5365       break;
5366     }
5367
5368   coding->produced_char += produced_chars;
5369   coding->produced = dst - coding->destination;
5370   return 0;
5371 }
5372
5373
5374 \f
5375 /*** 10, 11. no-conversion handlers ***/
5376
5377 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
5378
5379 static void
5380 decode_coding_raw_text (coding)
5381      struct coding_system *coding;
5382 {
5383   int eol_crlf =
5384     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5385
5386   coding->chars_at_source = 1;
5387   coding->consumed_char = coding->src_chars;
5388   coding->consumed = coding->src_bytes;
5389   if (eol_crlf && coding->source[coding->src_bytes - 1] == '\r')
5390     {
5391       coding->consumed_char--;
5392       coding->consumed--;
5393       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5394     }
5395   else
5396     record_conversion_result (coding, CODING_RESULT_SUCCESS);
5397 }
5398
5399 static int
5400 encode_coding_raw_text (coding)
5401      struct coding_system *coding;
5402 {
5403   int multibytep = coding->dst_multibyte;
5404   int *charbuf = coding->charbuf;
5405   int *charbuf_end = coding->charbuf + coding->charbuf_used;
5406   unsigned char *dst = coding->destination + coding->produced;
5407   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5408   int produced_chars = 0;
5409   int c;
5410
5411   if (multibytep)
5412     {
5413       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
5414
5415       if (coding->src_multibyte)
5416         while (charbuf < charbuf_end)
5417           {
5418             ASSURE_DESTINATION (safe_room);
5419             c = *charbuf++;
5420             if (ASCII_CHAR_P (c))
5421               EMIT_ONE_ASCII_BYTE (c);
5422             else if (CHAR_BYTE8_P (c))
5423               {
5424                 c = CHAR_TO_BYTE8 (c);
5425                 EMIT_ONE_BYTE (c);
5426               }
5427             else
5428               {
5429                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
5430
5431                 CHAR_STRING_ADVANCE (c, p1);
5432                 while (p0 < p1)
5433                   {
5434                     EMIT_ONE_BYTE (*p0);
5435                     p0++;
5436                   }
5437               }
5438           }
5439       else
5440         while (charbuf < charbuf_end)
5441           {
5442             ASSURE_DESTINATION (safe_room);
5443             c = *charbuf++;
5444             EMIT_ONE_BYTE (c);
5445           }
5446     }
5447   else
5448     {
5449       if (coding->src_multibyte)
5450         {
5451           int safe_room = MAX_MULTIBYTE_LENGTH;
5452
5453           while (charbuf < charbuf_end)
5454             {
5455               ASSURE_DESTINATION (safe_room);
5456               c = *charbuf++;
5457               if (ASCII_CHAR_P (c))
5458                 *dst++ = c;
5459               else if (CHAR_BYTE8_P (c))
5460                 *dst++ = CHAR_TO_BYTE8 (c);
5461               else
5462                 CHAR_STRING_ADVANCE (c, dst);
5463             }
5464         }
5465       else
5466         {
5467           ASSURE_DESTINATION (charbuf_end - charbuf);
5468           while (charbuf < charbuf_end && dst < dst_end)
5469             *dst++ = *charbuf++;
5470         }
5471       produced_chars = dst - (coding->destination + coding->produced);
5472     }
5473   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5474   coding->produced_char += produced_chars;
5475   coding->produced = dst - coding->destination;
5476   return 0;
5477 }
5478
5479 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5480    Check if a text is encoded in a charset-based coding system.  If it
5481    is, return 1, else return 0.  */
5482
5483 static int
5484 detect_coding_charset (coding, detect_info)
5485      struct coding_system *coding;
5486      struct coding_detection_info *detect_info;
5487 {
5488   const unsigned char *src = coding->source, *src_base;
5489   const unsigned char *src_end = coding->source + coding->src_bytes;
5490   int multibytep = coding->src_multibyte;
5491   int consumed_chars = 0;
5492   Lisp_Object attrs, valids, name;
5493   int found = 0;
5494   int head_ascii = coding->head_ascii;
5495   int check_latin_extra = 0;
5496
5497   detect_info->checked |= CATEGORY_MASK_CHARSET;
5498
5499   coding = &coding_categories[coding_category_charset];
5500   attrs = CODING_ID_ATTRS (coding->id);
5501   valids = AREF (attrs, coding_attr_charset_valids);
5502   name = CODING_ID_NAME (coding->id);
5503   if (strncmp ((char *) SDATA (SYMBOL_NAME (name)),
5504                "iso-8859-", sizeof ("iso-8859-") - 1) == 0
5505       || strncmp ((char *) SDATA (SYMBOL_NAME (name)),
5506                   "iso-latin-", sizeof ("iso-latin-") - 1) == 0)
5507     check_latin_extra = 1;
5508
5509   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5510     src += head_ascii;
5511
5512   while (1)
5513     {
5514       int c;
5515       Lisp_Object val;
5516       struct charset *charset;
5517       int dim, idx;
5518
5519       src_base = src;
5520       ONE_MORE_BYTE (c);
5521       if (c < 0)
5522         continue;
5523       val = AREF (valids, c);
5524       if (NILP (val))
5525         break;
5526       if (c >= 0x80)
5527         {
5528           if (c < 0xA0
5529               && check_latin_extra
5530               && (!VECTORP (Vlatin_extra_code_table)
5531                   || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])))
5532             break;
5533           found = CATEGORY_MASK_CHARSET;
5534         }
5535       if (INTEGERP (val))
5536         {
5537           charset = CHARSET_FROM_ID (XFASTINT (val));
5538           dim = CHARSET_DIMENSION (charset);
5539           for (idx = 1; idx < dim; idx++)
5540             {
5541               if (src == src_end)
5542                 goto too_short;
5543               ONE_MORE_BYTE (c);
5544               if (c < charset->code_space[(dim - 1 - idx) * 2]
5545                   || c > charset->code_space[(dim - 1 - idx) * 2 + 1])
5546                 break;
5547             }
5548           if (idx < dim)
5549             break;
5550         }
5551       else
5552         {
5553           idx = 1;
5554           for (; CONSP (val); val = XCDR (val))
5555             {
5556               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5557               dim = CHARSET_DIMENSION (charset);
5558               while (idx < dim)
5559                 {
5560                   if (src == src_end)
5561                     goto too_short;
5562                   ONE_MORE_BYTE (c);
5563                   if (c < charset->code_space[(dim - 1 - idx) * 4]
5564                       || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
5565                     break;
5566                   idx++;
5567                 }
5568               if (idx == dim)
5569                 {
5570                   val = Qnil;
5571                   break;
5572                 }
5573             }
5574           if (CONSP (val))
5575             break;
5576         }
5577     }
5578  too_short:
5579   detect_info->rejected |= CATEGORY_MASK_CHARSET;
5580   return 0;
5581
5582  no_more_source:
5583   detect_info->found |= found;
5584   return 1;
5585 }
5586
5587 static void
5588 decode_coding_charset (coding)
5589      struct coding_system *coding;
5590 {
5591   const unsigned char *src = coding->source + coding->consumed;
5592   const unsigned char *src_end = coding->source + coding->src_bytes;
5593   const unsigned char *src_base;
5594   int *charbuf = coding->charbuf + coding->charbuf_used;
5595   /* We may produce one charset annocation in one loop and one more at
5596      the end.  */
5597   int *charbuf_end
5598     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
5599   int consumed_chars = 0, consumed_chars_base;
5600   int multibytep = coding->src_multibyte;
5601   Lisp_Object attrs, charset_list, valids;
5602   int char_offset = coding->produced_char;
5603   int last_offset = char_offset;
5604   int last_id = charset_ascii;
5605   int eol_crlf =
5606     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5607   int byte_after_cr = -1;
5608
5609   CODING_GET_INFO (coding, attrs, charset_list);
5610   valids = AREF (attrs, coding_attr_charset_valids);
5611
5612   while (1)
5613     {
5614       int c;
5615       Lisp_Object val;
5616       struct charset *charset;
5617       int dim;
5618       int len = 1;
5619       unsigned code;
5620
5621       src_base = src;
5622       consumed_chars_base = consumed_chars;
5623
5624       if (charbuf >= charbuf_end)
5625         {
5626           if (byte_after_cr >= 0)
5627             src_base--;
5628           break;
5629         }
5630
5631       if (byte_after_cr >= 0)
5632         {
5633           c = byte_after_cr;
5634           byte_after_cr = -1;
5635         }
5636       else
5637         {
5638           ONE_MORE_BYTE (c);
5639           if (eol_crlf && c == '\r')
5640             ONE_MORE_BYTE (byte_after_cr);
5641         }
5642       if (c < 0)
5643         goto invalid_code;
5644       code = c;
5645
5646       val = AREF (valids, c);
5647       if (! INTEGERP (val) && ! CONSP (val))
5648         goto invalid_code;
5649       if (INTEGERP (val))
5650         {
5651           charset = CHARSET_FROM_ID (XFASTINT (val));
5652           dim = CHARSET_DIMENSION (charset);
5653           while (len < dim)
5654             {
5655               ONE_MORE_BYTE (c);
5656               code = (code << 8) | c;
5657               len++;
5658             }
5659           CODING_DECODE_CHAR (coding, src, src_base, src_end,
5660                               charset, code, c);
5661         }
5662       else
5663         {
5664           /* VAL is a list of charset IDs.  It is assured that the
5665              list is sorted by charset dimensions (smaller one
5666              comes first).  */
5667           while (CONSP (val))
5668             {
5669               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5670               dim = CHARSET_DIMENSION (charset);
5671               while (len < dim)
5672                 {
5673                   ONE_MORE_BYTE (c);
5674                   code = (code << 8) | c;
5675                   len++;
5676                 }
5677               CODING_DECODE_CHAR (coding, src, src_base,
5678                                   src_end, charset, code, c);
5679               if (c >= 0)
5680                 break;
5681               val = XCDR (val);
5682             }
5683         }
5684       if (c < 0)
5685         goto invalid_code;
5686       if (charset->id != charset_ascii
5687           && last_id != charset->id)
5688         {
5689           if (last_id != charset_ascii)
5690             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5691           last_id = charset->id;
5692           last_offset = char_offset;
5693         }
5694
5695       *charbuf++ = c;
5696       char_offset++;
5697       continue;
5698
5699     invalid_code:
5700       src = src_base;
5701       consumed_chars = consumed_chars_base;
5702       ONE_MORE_BYTE (c);
5703       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
5704       char_offset++;
5705       coding->errors++;
5706     }
5707
5708  no_more_source:
5709   if (last_id != charset_ascii)
5710     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5711   coding->consumed_char += consumed_chars_base;
5712   coding->consumed = src_base - coding->source;
5713   coding->charbuf_used = charbuf - coding->charbuf;
5714 }
5715
5716 static int
5717 encode_coding_charset (coding)
5718      struct coding_system *coding;
5719 {
5720   int multibytep = coding->dst_multibyte;
5721   int *charbuf = coding->charbuf;
5722   int *charbuf_end = charbuf + coding->charbuf_used;
5723   unsigned char *dst = coding->destination + coding->produced;
5724   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5725   int safe_room = MAX_MULTIBYTE_LENGTH;
5726   int produced_chars = 0;
5727   Lisp_Object attrs, charset_list;
5728   int ascii_compatible;
5729   int c;
5730
5731   CODING_GET_INFO (coding, attrs, charset_list);
5732   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5733
5734   while (charbuf < charbuf_end)
5735     {
5736       struct charset *charset;
5737       unsigned code;
5738
5739       ASSURE_DESTINATION (safe_room);
5740       c = *charbuf++;
5741       if (ascii_compatible && ASCII_CHAR_P (c))
5742         EMIT_ONE_ASCII_BYTE (c);
5743       else if (CHAR_BYTE8_P (c))
5744         {
5745           c = CHAR_TO_BYTE8 (c);
5746           EMIT_ONE_BYTE (c);
5747         }
5748       else
5749         {
5750           charset = char_charset (c, charset_list, &code);
5751           if (charset)
5752             {
5753               if (CHARSET_DIMENSION (charset) == 1)
5754                 EMIT_ONE_BYTE (code);
5755               else if (CHARSET_DIMENSION (charset) == 2)
5756                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
5757               else if (CHARSET_DIMENSION (charset) == 3)
5758                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
5759               else
5760                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
5761                                  (code >> 8) & 0xFF, code & 0xFF);
5762             }
5763           else
5764             {
5765               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5766                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5767               else
5768                 c = coding->default_char;
5769               EMIT_ONE_BYTE (c);
5770             }
5771         }
5772     }
5773
5774   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5775   coding->produced_char += produced_chars;
5776   coding->produced = dst - coding->destination;
5777   return 0;
5778 }
5779
5780 \f
5781 /*** 7. C library functions ***/
5782
5783 /* Setup coding context CODING from information about CODING_SYSTEM.
5784    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
5785    CODING_SYSTEM is invalid, signal an error.  */
5786
5787 void
5788 setup_coding_system (coding_system, coding)
5789      Lisp_Object coding_system;
5790      struct coding_system *coding;
5791 {
5792   Lisp_Object attrs;
5793   Lisp_Object eol_type;
5794   Lisp_Object coding_type;
5795   Lisp_Object val;
5796
5797   if (NILP (coding_system))
5798     coding_system = Qundecided;
5799
5800   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
5801
5802   attrs = CODING_ID_ATTRS (coding->id);
5803   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
5804
5805   coding->mode = 0;
5806   coding->head_ascii = -1;
5807   if (VECTORP (eol_type))
5808     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5809                             | CODING_REQUIRE_DETECTION_MASK);
5810   else if (! EQ (eol_type, Qunix))
5811     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5812                             | CODING_REQUIRE_ENCODING_MASK);
5813   else
5814     coding->common_flags = 0;
5815   if (! NILP (CODING_ATTR_POST_READ (attrs)))
5816     coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5817   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5818     coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5819   if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
5820     coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
5821
5822   val = CODING_ATTR_SAFE_CHARSETS (attrs);
5823   coding->max_charset_id = SCHARS (val) - 1;
5824   coding->safe_charsets = SDATA (val);
5825   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
5826   coding->carryover_bytes = 0;
5827
5828   coding_type = CODING_ATTR_TYPE (attrs);
5829   if (EQ (coding_type, Qundecided))
5830     {
5831       coding->detector = NULL;
5832       coding->decoder = decode_coding_raw_text;
5833       coding->encoder = encode_coding_raw_text;
5834       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5835     }
5836   else if (EQ (coding_type, Qiso_2022))
5837     {
5838       int i;
5839       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5840
5841       /* Invoke graphic register 0 to plane 0.  */
5842       CODING_ISO_INVOCATION (coding, 0) = 0;
5843       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
5844       CODING_ISO_INVOCATION (coding, 1)
5845         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
5846       /* Setup the initial status of designation.  */
5847       for (i = 0; i < 4; i++)
5848         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
5849       /* Not single shifting initially.  */
5850       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
5851       /* Beginning of buffer should also be regarded as bol. */
5852       CODING_ISO_BOL (coding) = 1;
5853       coding->detector = detect_coding_iso_2022;
5854       coding->decoder = decode_coding_iso_2022;
5855       coding->encoder = encode_coding_iso_2022;
5856       if (flags & CODING_ISO_FLAG_SAFE)
5857         coding->mode |= CODING_MODE_SAFE_ENCODING;
5858       coding->common_flags
5859         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5860             | CODING_REQUIRE_FLUSHING_MASK);
5861       if (flags & CODING_ISO_FLAG_COMPOSITION)
5862         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
5863       if (flags & CODING_ISO_FLAG_DESIGNATION)
5864         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
5865       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5866         {
5867           setup_iso_safe_charsets (attrs);
5868           val = CODING_ATTR_SAFE_CHARSETS (attrs);
5869           coding->max_charset_id = SCHARS (val) - 1;
5870           coding->safe_charsets = SDATA (val);
5871         }
5872       CODING_ISO_FLAGS (coding) = flags;
5873       CODING_ISO_CMP_STATUS (coding)->state = COMPOSING_NO;
5874       CODING_ISO_CMP_STATUS (coding)->method = COMPOSITION_NO;
5875       CODING_ISO_EXTSEGMENT_LEN (coding) = 0;
5876       CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
5877     }
5878   else if (EQ (coding_type, Qcharset))
5879     {
5880       coding->detector = detect_coding_charset;
5881       coding->decoder = decode_coding_charset;
5882       coding->encoder = encode_coding_charset;
5883       coding->common_flags
5884         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5885     }
5886   else if (EQ (coding_type, Qutf_8))
5887     {
5888       val = AREF (attrs, coding_attr_utf_bom);
5889       CODING_UTF_8_BOM (coding) = (CONSP (val) ? utf_detect_bom
5890                                    : EQ (val, Qt) ? utf_with_bom
5891                                    : utf_without_bom);
5892       coding->detector = detect_coding_utf_8;
5893       coding->decoder = decode_coding_utf_8;
5894       coding->encoder = encode_coding_utf_8;
5895       coding->common_flags
5896         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5897       if (CODING_UTF_8_BOM (coding) == utf_detect_bom)
5898         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5899     }
5900   else if (EQ (coding_type, Qutf_16))
5901     {
5902       val = AREF (attrs, coding_attr_utf_bom);
5903       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_detect_bom
5904                                     : EQ (val, Qt) ? utf_with_bom
5905                                     : utf_without_bom);
5906       val = AREF (attrs, coding_attr_utf_16_endian);
5907       CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
5908                                        : utf_16_little_endian);
5909       CODING_UTF_16_SURROGATE (coding) = 0;
5910       coding->detector = detect_coding_utf_16;
5911       coding->decoder = decode_coding_utf_16;
5912       coding->encoder = encode_coding_utf_16;
5913       coding->common_flags
5914         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5915       if (CODING_UTF_16_BOM (coding) == utf_detect_bom)
5916         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5917     }
5918   else if (EQ (coding_type, Qccl))
5919     {
5920       coding->detector = detect_coding_ccl;
5921       coding->decoder = decode_coding_ccl;
5922       coding->encoder = encode_coding_ccl;
5923       coding->common_flags
5924         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5925             | CODING_REQUIRE_FLUSHING_MASK);
5926     }
5927   else if (EQ (coding_type, Qemacs_mule))
5928     {
5929       coding->detector = detect_coding_emacs_mule;
5930       coding->decoder = decode_coding_emacs_mule;
5931       coding->encoder = encode_coding_emacs_mule;
5932       coding->common_flags
5933         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5934       coding->spec.emacs_mule.full_support = 1;
5935       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5936           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5937         {
5938           Lisp_Object tail, safe_charsets;
5939           int max_charset_id = 0;
5940
5941           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5942                tail = XCDR (tail))
5943             if (max_charset_id < XFASTINT (XCAR (tail)))
5944               max_charset_id = XFASTINT (XCAR (tail));
5945           safe_charsets = make_uninit_string (max_charset_id + 1);
5946           memset (SDATA (safe_charsets), 255, max_charset_id + 1);
5947           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5948                tail = XCDR (tail))
5949             SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
5950           coding->max_charset_id = max_charset_id;
5951           coding->safe_charsets = SDATA (safe_charsets);
5952           coding->spec.emacs_mule.full_support = 1;
5953         }
5954       coding->spec.emacs_mule.cmp_status.state = COMPOSING_NO;
5955       coding->spec.emacs_mule.cmp_status.method = COMPOSITION_NO;
5956     }
5957   else if (EQ (coding_type, Qshift_jis))
5958     {
5959       coding->detector = detect_coding_sjis;
5960       coding->decoder = decode_coding_sjis;
5961       coding->encoder = encode_coding_sjis;
5962       coding->common_flags
5963         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5964     }
5965   else if (EQ (coding_type, Qbig5))
5966     {
5967       coding->detector = detect_coding_big5;
5968       coding->decoder = decode_coding_big5;
5969       coding->encoder = encode_coding_big5;
5970       coding->common_flags
5971         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5972     }
5973   else                          /* EQ (coding_type, Qraw_text) */
5974     {
5975       coding->detector = NULL;
5976       coding->decoder = decode_coding_raw_text;
5977       coding->encoder = encode_coding_raw_text;
5978       if (! EQ (eol_type, Qunix))
5979         {
5980           coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5981           if (! VECTORP (eol_type))
5982             coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5983         }
5984
5985     }
5986
5987   return;
5988 }
5989
5990 /* Return a list of charsets supported by CODING.  */
5991
5992 Lisp_Object
5993 coding_charset_list (coding)
5994      struct coding_system *coding;
5995 {
5996   Lisp_Object attrs, charset_list;
5997
5998   CODING_GET_INFO (coding, attrs, charset_list);
5999   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
6000     {
6001       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
6002
6003       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
6004         charset_list = Viso_2022_charset_list;
6005     }
6006   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
6007     {
6008       charset_list = Vemacs_mule_charset_list;
6009     }
6010   return charset_list;
6011 }
6012
6013
6014 /* Return a list of charsets supported by CODING-SYSTEM.  */
6015
6016 Lisp_Object
6017 coding_system_charset_list (coding_system)
6018      Lisp_Object coding_system;
6019 {
6020   int id;
6021   Lisp_Object attrs, charset_list;
6022
6023   CHECK_CODING_SYSTEM_GET_ID (coding_system, id);
6024   attrs = CODING_ID_ATTRS (id);
6025
6026   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
6027     {
6028       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
6029
6030       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
6031         charset_list = Viso_2022_charset_list;
6032       else
6033         charset_list = CODING_ATTR_CHARSET_LIST (attrs);
6034     }
6035   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
6036     {
6037       charset_list = Vemacs_mule_charset_list;
6038     }
6039   else
6040     {
6041       charset_list = CODING_ATTR_CHARSET_LIST (attrs);
6042     }
6043   return charset_list;
6044 }
6045
6046
6047 /* Return raw-text or one of its subsidiaries that has the same
6048    eol_type as CODING-SYSTEM.  */
6049
6050 Lisp_Object
6051 raw_text_coding_system (coding_system)
6052      Lisp_Object coding_system;
6053 {
6054   Lisp_Object spec, attrs;
6055   Lisp_Object eol_type, raw_text_eol_type;
6056
6057   if (NILP (coding_system))
6058     return Qraw_text;
6059   spec = CODING_SYSTEM_SPEC (coding_system);
6060   attrs = AREF (spec, 0);
6061
6062   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
6063     return coding_system;
6064
6065   eol_type = AREF (spec, 2);
6066   if (VECTORP (eol_type))
6067     return Qraw_text;
6068   spec = CODING_SYSTEM_SPEC (Qraw_text);
6069   raw_text_eol_type = AREF (spec, 2);
6070   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
6071           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
6072           : AREF (raw_text_eol_type, 2));
6073 }
6074
6075
6076 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
6077    does, return one of the subsidiary that has the same eol-spec as
6078    PARENT.  Otherwise, return CODING_SYSTEM.  If PARENT is nil,
6079    inherit end-of-line format from the system's setting
6080    (system_eol_type).  */
6081
6082 Lisp_Object
6083 coding_inherit_eol_type (coding_system, parent)
6084      Lisp_Object coding_system, parent;
6085 {
6086   Lisp_Object spec, eol_type;
6087
6088   if (NILP (coding_system))
6089     coding_system = Qraw_text;
6090   spec = CODING_SYSTEM_SPEC (coding_system);
6091   eol_type = AREF (spec, 2);
6092   if (VECTORP (eol_type))
6093     {
6094       Lisp_Object parent_eol_type;
6095
6096       if (! NILP (parent))
6097         {
6098           Lisp_Object parent_spec;
6099
6100           parent_spec = CODING_SYSTEM_SPEC (parent);
6101           parent_eol_type = AREF (parent_spec, 2);
6102         }
6103       else
6104         parent_eol_type = system_eol_type;
6105       if (EQ (parent_eol_type, Qunix))
6106         coding_system = AREF (eol_type, 0);
6107       else if (EQ (parent_eol_type, Qdos))
6108         coding_system = AREF (eol_type, 1);
6109       else if (EQ (parent_eol_type, Qmac))
6110         coding_system = AREF (eol_type, 2);
6111     }
6112   return coding_system;
6113 }
6114
6115
6116 /* Check if text-conversion and eol-conversion of CODING_SYSTEM are
6117    decided for writing to a process.  If not, complement them, and
6118    return a new coding system.  */
6119
6120 Lisp_Object
6121 complement_process_encoding_system (coding_system)
6122      Lisp_Object coding_system;
6123 {
6124   Lisp_Object spec, attrs, coding_type, eol_type;
6125
6126   if (NILP (coding_system))
6127     coding_system = Qundecided;
6128   spec = CODING_SYSTEM_SPEC (coding_system);
6129   attrs = AREF (spec, 0);
6130   coding_type = CODING_ATTR_TYPE (attrs);
6131   eol_type = AREF (spec, 2);
6132
6133   if (EQ (coding_type, Qundecided))
6134     {
6135       /* We must decide the text-conversion part.  */
6136       if (CONSP (Vdefault_process_coding_system))
6137         {
6138           coding_system = XCDR (Vdefault_process_coding_system);
6139           if (! NILP (coding_system))
6140             {
6141               spec = CODING_SYSTEM_SPEC (coding_system);
6142               attrs = AREF (spec, 0);
6143               coding_type = CODING_ATTR_TYPE (attrs);
6144               eol_type = AREF (spec, 2);
6145             }
6146         }
6147       if (EQ (coding_type, Qundecided))
6148         {
6149           coding_system = preferred_coding_system ();
6150           spec = CODING_SYSTEM_SPEC (coding_system);
6151           attrs = AREF (spec, 0);
6152           coding_type = CODING_ATTR_TYPE (attrs);
6153           eol_type = AREF (spec, 2);
6154         }
6155       if (EQ (coding_type, Qundecided))
6156         {
6157           coding_system = Qraw_text;
6158           coding_type = Qraw_text;
6159           eol_type = Qnil;
6160         }
6161     }
6162   if (NILP (eol_type) || VECTORP (eol_type))
6163     {
6164       /* We must decide the eol-conversion part.  */
6165       coding_system = coding_inherit_eol_type (coding_system, Qnil);
6166     }
6167
6168   return coding_system;
6169 }
6170
6171
6172 /* Emacs has a mechanism to automatically detect a coding system if it
6173    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
6174    it's impossible to distinguish some coding systems accurately
6175    because they use the same range of codes.  So, at first, coding
6176    systems are categorized into 7, those are:
6177
6178    o coding-category-emacs-mule
6179
6180         The category for a coding system which has the same code range
6181         as Emacs' internal format.  Assigned the coding-system (Lisp
6182         symbol) `emacs-mule' by default.
6183
6184    o coding-category-sjis
6185
6186         The category for a coding system which has the same code range
6187         as SJIS.  Assigned the coding-system (Lisp
6188         symbol) `japanese-shift-jis' by default.
6189
6190    o coding-category-iso-7
6191
6192         The category for a coding system which has the same code range
6193         as ISO2022 of 7-bit environment.  This doesn't use any locking
6194         shift and single shift functions.  This can encode/decode all
6195         charsets.  Assigned the coding-system (Lisp symbol)
6196         `iso-2022-7bit' by default.
6197
6198    o coding-category-iso-7-tight
6199
6200         Same as coding-category-iso-7 except that this can
6201         encode/decode only the specified charsets.
6202
6203    o coding-category-iso-8-1
6204
6205         The category for a coding system which has the same code range
6206         as ISO2022 of 8-bit environment and graphic plane 1 used only
6207         for DIMENSION1 charset.  This doesn't use any locking shift
6208         and single shift functions.  Assigned the coding-system (Lisp
6209         symbol) `iso-latin-1' by default.
6210
6211    o coding-category-iso-8-2
6212
6213         The category for a coding system which has the same code range
6214         as ISO2022 of 8-bit environment and graphic plane 1 used only
6215         for DIMENSION2 charset.  This doesn't use any locking shift
6216         and single shift functions.  Assigned the coding-system (Lisp
6217         symbol) `japanese-iso-8bit' by default.
6218
6219    o coding-category-iso-7-else
6220
6221         The category for a coding system which has the same code range
6222         as ISO2022 of 7-bit environemnt but uses locking shift or
6223         single shift functions.  Assigned the coding-system (Lisp
6224         symbol) `iso-2022-7bit-lock' by default.
6225
6226    o coding-category-iso-8-else
6227
6228         The category for a coding system which has the same code range
6229         as ISO2022 of 8-bit environemnt but uses locking shift or
6230         single shift functions.  Assigned the coding-system (Lisp
6231         symbol) `iso-2022-8bit-ss2' by default.
6232
6233    o coding-category-big5
6234
6235         The category for a coding system which has the same code range
6236         as BIG5.  Assigned the coding-system (Lisp symbol)
6237         `cn-big5' by default.
6238
6239    o coding-category-utf-8
6240
6241         The category for a coding system which has the same code range
6242         as UTF-8 (cf. RFC3629).  Assigned the coding-system (Lisp
6243         symbol) `utf-8' by default.
6244
6245    o coding-category-utf-16-be
6246
6247         The category for a coding system in which a text has an
6248         Unicode signature (cf. Unicode Standard) in the order of BIG
6249         endian at the head.  Assigned the coding-system (Lisp symbol)
6250         `utf-16-be' by default.
6251
6252    o coding-category-utf-16-le
6253
6254         The category for a coding system in which a text has an
6255         Unicode signature (cf. Unicode Standard) in the order of
6256         LITTLE endian at the head.  Assigned the coding-system (Lisp
6257         symbol) `utf-16-le' by default.
6258
6259    o coding-category-ccl
6260
6261         The category for a coding system of which encoder/decoder is
6262         written in CCL programs.  The default value is nil, i.e., no
6263         coding system is assigned.
6264
6265    o coding-category-binary
6266
6267         The category for a coding system not categorized in any of the
6268         above.  Assigned the coding-system (Lisp symbol)
6269         `no-conversion' by default.
6270
6271    Each of them is a Lisp symbol and the value is an actual
6272    `coding-system's (this is also a Lisp symbol) assigned by a user.
6273    What Emacs does actually is to detect a category of coding system.
6274    Then, it uses a `coding-system' assigned to it.  If Emacs can't
6275    decide only one possible category, it selects a category of the
6276    highest priority.  Priorities of categories are also specified by a
6277    user in a Lisp variable `coding-category-list'.
6278
6279 */
6280
6281 #define EOL_SEEN_NONE   0
6282 #define EOL_SEEN_LF     1
6283 #define EOL_SEEN_CR     2
6284 #define EOL_SEEN_CRLF   4
6285
6286 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
6287    SOURCE is encoded.  If CATEGORY is one of
6288    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
6289    two-byte, else they are encoded by one-byte.
6290
6291    Return one of EOL_SEEN_XXX.  */
6292
6293 #define MAX_EOL_CHECK_COUNT 3
6294
6295 static int
6296 detect_eol (source, src_bytes, category)
6297      const unsigned char *source;
6298      EMACS_INT src_bytes;
6299      enum coding_category category;
6300 {
6301   const unsigned char *src = source, *src_end = src + src_bytes;
6302   unsigned char c;
6303   int total  = 0;
6304   int eol_seen = EOL_SEEN_NONE;
6305
6306   if ((1 << category) & CATEGORY_MASK_UTF_16)
6307     {
6308       int msb, lsb;
6309
6310       msb = category == (coding_category_utf_16_le
6311                          | coding_category_utf_16_le_nosig);
6312       lsb = 1 - msb;
6313
6314       while (src + 1 < src_end)
6315         {
6316           c = src[lsb];
6317           if (src[msb] == 0 && (c == '\n' || c == '\r'))
6318             {
6319               int this_eol;
6320
6321               if (c == '\n')
6322                 this_eol = EOL_SEEN_LF;
6323               else if (src + 3 >= src_end
6324                        || src[msb + 2] != 0
6325                        || src[lsb + 2] != '\n')
6326                 this_eol = EOL_SEEN_CR;
6327               else
6328                 {
6329                   this_eol = EOL_SEEN_CRLF;
6330                   src += 2;
6331                 }
6332
6333               if (eol_seen == EOL_SEEN_NONE)
6334                 /* This is the first end-of-line.  */
6335                 eol_seen = this_eol;
6336               else if (eol_seen != this_eol)
6337                 {
6338                   /* The found type is different from what found before.
6339                      Allow for stray ^M characters in DOS EOL files.  */
6340                   if (eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF
6341                       || eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR)
6342                     eol_seen = EOL_SEEN_CRLF;
6343                   else
6344                     {
6345                       eol_seen = EOL_SEEN_LF;
6346                       break;
6347                     }
6348                 }
6349               if (++total == MAX_EOL_CHECK_COUNT)
6350                 break;
6351             }
6352           src += 2;
6353         }
6354     }
6355   else
6356     {
6357       while (src < src_end)
6358         {
6359           c = *src++;
6360           if (c == '\n' || c == '\r')
6361             {
6362               int this_eol;
6363
6364               if (c == '\n')
6365                 this_eol = EOL_SEEN_LF;
6366               else if (src >= src_end || *src != '\n')
6367                 this_eol = EOL_SEEN_CR;
6368               else
6369                 this_eol = EOL_SEEN_CRLF, src++;
6370
6371               if (eol_seen == EOL_SEEN_NONE)
6372                 /* This is the first end-of-line.  */
6373                 eol_seen = this_eol;
6374               else if (eol_seen != this_eol)
6375                 {
6376                   /* The found type is different from what found before.
6377                      Allow for stray ^M characters in DOS EOL files.  */
6378                   if (eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF
6379                       || eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR)
6380                     eol_seen = EOL_SEEN_CRLF;
6381                   else
6382                     {
6383                       eol_seen = EOL_SEEN_LF;
6384                       break;
6385                     }
6386                 }
6387               if (++total == MAX_EOL_CHECK_COUNT)
6388                 break;
6389             }
6390         }
6391     }
6392   return eol_seen;
6393 }
6394
6395
6396 static Lisp_Object
6397 adjust_coding_eol_type (coding, eol_seen)
6398      struct coding_system *coding;
6399      int eol_seen;
6400 {
6401   Lisp_Object eol_type;
6402
6403   eol_type = CODING_ID_EOL_TYPE (coding->id);
6404   if (eol_seen & EOL_SEEN_LF)
6405     {
6406       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
6407       eol_type = Qunix;
6408     }
6409   else if (eol_seen & EOL_SEEN_CRLF)
6410     {
6411       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
6412       eol_type = Qdos;
6413     }
6414   else if (eol_seen & EOL_SEEN_CR)
6415     {
6416       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
6417       eol_type = Qmac;
6418     }
6419   return eol_type;
6420 }
6421
6422 /* Detect how a text specified in CODING is encoded.  If a coding
6423    system is detected, update fields of CODING by the detected coding
6424    system.  */
6425
6426 void
6427 detect_coding (coding)
6428      struct coding_system *coding;
6429 {
6430   const unsigned char *src, *src_end;
6431   int saved_mode = coding->mode;
6432
6433   coding->consumed = coding->consumed_char = 0;
6434   coding->produced = coding->produced_char = 0;
6435   coding_set_source (coding);
6436
6437   src_end = coding->source + coding->src_bytes;
6438   coding->head_ascii = 0;
6439
6440   /* If we have not yet decided the text encoding type, detect it
6441      now.  */
6442   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
6443     {
6444       int c, i;
6445       struct coding_detection_info detect_info;
6446       int null_byte_found = 0, eight_bit_found = 0;
6447
6448       detect_info.checked = detect_info.found = detect_info.rejected = 0;
6449       for (src = coding->source; src < src_end; src++)
6450         {
6451           c = *src;
6452           if (c & 0x80)
6453             {
6454               eight_bit_found = 1;
6455               if (null_byte_found)
6456                 break;
6457             }
6458           else if (c < 0x20)
6459             {
6460               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
6461                   && ! inhibit_iso_escape_detection
6462                   && ! detect_info.checked)
6463                 {
6464                   if (detect_coding_iso_2022 (coding, &detect_info))
6465                     {
6466                       /* We have scanned the whole data.  */
6467                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
6468                         {
6469                           /* We didn't find an 8-bit code.  We may
6470                              have found a null-byte, but it's very
6471                              rare that a binary file confirm to
6472                              ISO-2022.  */
6473                           src = src_end;
6474                           coding->head_ascii = src - coding->source;
6475                         }
6476                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
6477                       break;
6478                     }
6479                 }
6480               else if (! c && !inhibit_null_byte_detection)
6481                 {
6482                   null_byte_found = 1;
6483                   if (eight_bit_found)
6484                     break;
6485                 }
6486               if (! eight_bit_found)
6487                 coding->head_ascii++;
6488             }
6489           else if (! eight_bit_found)
6490             coding->head_ascii++;
6491         }
6492
6493       if (null_byte_found || eight_bit_found
6494           || coding->head_ascii < coding->src_bytes
6495           || detect_info.found)
6496         {
6497           enum coding_category category;
6498           struct coding_system *this;
6499
6500           if (coding->head_ascii == coding->src_bytes)
6501             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
6502             for (i = 0; i < coding_category_raw_text; i++)
6503               {
6504                 category = coding_priorities[i];
6505                 this = coding_categories + category;
6506                 if (detect_info.found & (1 << category))
6507                   break;
6508               }
6509           else
6510             {
6511               if (null_byte_found)
6512                 {
6513                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
6514                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
6515                 }
6516               for (i = 0; i < coding_category_raw_text; i++)
6517                 {
6518                   category = coding_priorities[i];
6519                   this = coding_categories + category;
6520                   if (this->id < 0)
6521                     {
6522                       /* No coding system of this category is defined.  */
6523                       detect_info.rejected |= (1 << category);
6524                     }
6525                   else if (category >= coding_category_raw_text)
6526                     continue;
6527                   else if (detect_info.checked & (1 << category))
6528                     {
6529                       if (detect_info.found & (1 << category))
6530                         break;
6531                     }
6532                   else if ((*(this->detector)) (coding, &detect_info)
6533                            && detect_info.found & (1 << category))
6534                     {
6535                       if (category == coding_category_utf_16_auto)
6536                         {
6537                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6538                             category = coding_category_utf_16_le;
6539                           else
6540                             category = coding_category_utf_16_be;
6541                         }
6542                       break;
6543                     }
6544                 }
6545             }
6546
6547           if (i < coding_category_raw_text)
6548             setup_coding_system (CODING_ID_NAME (this->id), coding);
6549           else if (null_byte_found)
6550             setup_coding_system (Qno_conversion, coding);
6551           else if ((detect_info.rejected & CATEGORY_MASK_ANY)
6552                    == CATEGORY_MASK_ANY)
6553             setup_coding_system (Qraw_text, coding);
6554           else if (detect_info.rejected)
6555             for (i = 0; i < coding_category_raw_text; i++)
6556               if (! (detect_info.rejected & (1 << coding_priorities[i])))
6557                 {
6558                   this = coding_categories + coding_priorities[i];
6559                   setup_coding_system (CODING_ID_NAME (this->id), coding);
6560                   break;
6561                 }
6562         }
6563     }
6564   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6565            == coding_category_utf_8_auto)
6566     {
6567       Lisp_Object coding_systems;
6568       struct coding_detection_info detect_info;
6569
6570       coding_systems
6571         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6572       detect_info.found = detect_info.rejected = 0;
6573       coding->head_ascii = 0;
6574       if (CONSP (coding_systems)
6575           && detect_coding_utf_8 (coding, &detect_info))
6576         {
6577           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
6578             setup_coding_system (XCAR (coding_systems), coding);
6579           else
6580             setup_coding_system (XCDR (coding_systems), coding);
6581         }
6582     }
6583   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6584            == coding_category_utf_16_auto)
6585     {
6586       Lisp_Object coding_systems;
6587       struct coding_detection_info detect_info;
6588
6589       coding_systems
6590         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6591       detect_info.found = detect_info.rejected = 0;
6592       coding->head_ascii = 0;
6593       if (CONSP (coding_systems)
6594           && detect_coding_utf_16 (coding, &detect_info))
6595         {
6596           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6597             setup_coding_system (XCAR (coding_systems), coding);
6598           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
6599             setup_coding_system (XCDR (coding_systems), coding);
6600         }
6601     }
6602   coding->mode = saved_mode;
6603 }
6604
6605
6606 static void
6607 decode_eol (coding)
6608      struct coding_system *coding;
6609 {
6610   Lisp_Object eol_type;
6611   unsigned char *p, *pbeg, *pend;
6612
6613   eol_type = CODING_ID_EOL_TYPE (coding->id);
6614   if (EQ (eol_type, Qunix) || inhibit_eol_conversion)
6615     return;
6616
6617   if (NILP (coding->dst_object))
6618     pbeg = coding->destination;
6619   else
6620     pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
6621   pend = pbeg + coding->produced;
6622
6623   if (VECTORP (eol_type))
6624     {
6625       int eol_seen = EOL_SEEN_NONE;
6626
6627       for (p = pbeg; p < pend; p++)
6628         {
6629           if (*p == '\n')
6630             eol_seen |= EOL_SEEN_LF;
6631           else if (*p == '\r')
6632             {
6633               if (p + 1 < pend && *(p + 1) == '\n')
6634                 {
6635                   eol_seen |= EOL_SEEN_CRLF;
6636                   p++;
6637                 }
6638               else
6639                 eol_seen |= EOL_SEEN_CR;
6640             }
6641         }
6642       /* Handle DOS-style EOLs in a file with stray ^M characters.  */
6643       if ((eol_seen & EOL_SEEN_CRLF) != 0
6644           && (eol_seen & EOL_SEEN_CR) != 0
6645           && (eol_seen & EOL_SEEN_LF) == 0)
6646         eol_seen = EOL_SEEN_CRLF;
6647       else if (eol_seen != EOL_SEEN_NONE
6648           && eol_seen != EOL_SEEN_LF
6649           && eol_seen != EOL_SEEN_CRLF
6650           && eol_seen != EOL_SEEN_CR)
6651         eol_seen = EOL_SEEN_LF;
6652       if (eol_seen != EOL_SEEN_NONE)
6653         eol_type = adjust_coding_eol_type (coding, eol_seen);
6654     }
6655
6656   if (EQ (eol_type, Qmac))
6657     {
6658       for (p = pbeg; p < pend; p++)
6659         if (*p == '\r')
6660           *p = '\n';
6661     }
6662   else if (EQ (eol_type, Qdos))
6663     {
6664       int n = 0;
6665
6666       if (NILP (coding->dst_object))
6667         {
6668           /* Start deleting '\r' from the tail to minimize the memory
6669              movement.  */
6670           for (p = pend - 2; p >= pbeg; p--)
6671             if (*p == '\r')
6672               {
6673                 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
6674                 n++;
6675               }
6676         }
6677       else
6678         {
6679           int pos_byte = coding->dst_pos_byte;
6680           int pos = coding->dst_pos;
6681           int pos_end = pos + coding->produced_char - 1;
6682
6683           while (pos < pos_end)
6684             {
6685               p = BYTE_POS_ADDR (pos_byte);
6686               if (*p == '\r' && p[1] == '\n')
6687                 {
6688                   del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
6689                   n++;
6690                   pos_end--;
6691                 }
6692               pos++;
6693               if (coding->dst_multibyte)
6694                 pos_byte += BYTES_BY_CHAR_HEAD (*p);
6695               else
6696                 pos_byte++;
6697             }
6698         }
6699       coding->produced -= n;
6700       coding->produced_char -= n;
6701     }
6702 }
6703
6704
6705 /* Return a translation table (or list of them) from coding system
6706    attribute vector ATTRS for encoding (ENCODEP is nonzero) or
6707    decoding (ENCODEP is zero). */
6708
6709 static Lisp_Object
6710 get_translation_table (attrs, encodep, max_lookup)
6711      Lisp_Object attrs;
6712      int encodep, *max_lookup;
6713 {
6714   Lisp_Object standard, translation_table;
6715   Lisp_Object val;
6716
6717   if (NILP (Venable_character_translation))
6718     {
6719       if (max_lookup)
6720         *max_lookup = 0;
6721       return Qnil;
6722     }
6723   if (encodep)
6724     translation_table = CODING_ATTR_ENCODE_TBL (attrs),
6725       standard = Vstandard_translation_table_for_encode;
6726   else
6727     translation_table = CODING_ATTR_DECODE_TBL (attrs),
6728       standard = Vstandard_translation_table_for_decode;
6729   if (NILP (translation_table))
6730     translation_table = standard;
6731   else
6732     {
6733       if (SYMBOLP (translation_table))
6734         translation_table = Fget (translation_table, Qtranslation_table);
6735       else if (CONSP (translation_table))
6736         {
6737           translation_table = Fcopy_sequence (translation_table);
6738           for (val = translation_table; CONSP (val); val = XCDR (val))
6739             if (SYMBOLP (XCAR (val)))
6740               XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
6741         }
6742       if (CHAR_TABLE_P (standard))
6743         {
6744           if (CONSP (translation_table))
6745             translation_table = nconc2 (translation_table,
6746                                         Fcons (standard, Qnil));
6747           else
6748             translation_table = Fcons (translation_table,
6749                                        Fcons (standard, Qnil));
6750         }
6751     }
6752
6753   if (max_lookup)
6754     {
6755       *max_lookup = 1;
6756       if (CHAR_TABLE_P (translation_table)
6757           && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
6758         {
6759           val = XCHAR_TABLE (translation_table)->extras[1];
6760           if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6761             *max_lookup = XFASTINT (val);
6762         }
6763       else if (CONSP (translation_table))
6764         {
6765           Lisp_Object tail, val;
6766
6767           for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
6768             if (CHAR_TABLE_P (XCAR (tail))
6769                 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
6770               {
6771                 val = XCHAR_TABLE (XCAR (tail))->extras[1];
6772                 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6773                   *max_lookup = XFASTINT (val);
6774               }
6775         }
6776     }
6777   return translation_table;
6778 }
6779
6780 #define LOOKUP_TRANSLATION_TABLE(table, c, trans)               \
6781   do {                                                          \
6782     trans = Qnil;                                               \
6783     if (CHAR_TABLE_P (table))                                   \
6784       {                                                         \
6785         trans = CHAR_TABLE_REF (table, c);                      \
6786         if (CHARACTERP (trans))                                 \
6787           c = XFASTINT (trans), trans = Qnil;                   \
6788       }                                                         \
6789     else if (CONSP (table))                                     \
6790       {                                                         \
6791         Lisp_Object tail;                                       \
6792                                                                 \
6793         for (tail = table; CONSP (tail); tail = XCDR (tail))    \
6794           if (CHAR_TABLE_P (XCAR (tail)))                       \
6795             {                                                   \
6796               trans = CHAR_TABLE_REF (XCAR (tail), c);          \
6797               if (CHARACTERP (trans))                           \
6798                 c = XFASTINT (trans), trans = Qnil;             \
6799               else if (! NILP (trans))                          \
6800                 break;                                          \
6801             }                                                   \
6802       }                                                         \
6803   } while (0)
6804
6805
6806 /* Return a translation of character(s) at BUF according to TRANS.
6807    TRANS is TO-CHAR or ((FROM .  TO) ...) where
6808    FROM = [FROM-CHAR ...], TO is TO-CHAR or [TO-CHAR ...].
6809    The return value is TO-CHAR or ([FROM-CHAR ...] . TO) if a
6810    translation is found, and Qnil if not found..
6811    If BUF is too short to lookup characters in FROM, return Qt.  */
6812
6813 static Lisp_Object
6814 get_translation (trans, buf, buf_end)
6815      Lisp_Object trans;
6816      int *buf, *buf_end;
6817 {
6818
6819   if (INTEGERP (trans))
6820     return trans;
6821   for (; CONSP (trans); trans = XCDR (trans))
6822     {
6823       Lisp_Object val = XCAR (trans);
6824       Lisp_Object from = XCAR (val);
6825       int len = ASIZE (from);
6826       int i;
6827
6828       for (i = 0; i < len; i++)
6829         {
6830           if (buf + i == buf_end)
6831             return Qt;
6832           if (XINT (AREF (from, i)) != buf[i])
6833             break;
6834         }
6835       if (i == len)
6836         return val;
6837     }
6838   return Qnil;
6839 }
6840
6841
6842 static int
6843 produce_chars (coding, translation_table, last_block)
6844      struct coding_system *coding;
6845      Lisp_Object translation_table;
6846      int last_block;
6847 {
6848   unsigned char *dst = coding->destination + coding->produced;
6849   unsigned char *dst_end = coding->destination + coding->dst_bytes;
6850   EMACS_INT produced;
6851   EMACS_INT produced_chars = 0;
6852   int carryover = 0;
6853
6854   if (! coding->chars_at_source)
6855     {
6856       /* Source characters are in coding->charbuf.  */
6857       int *buf = coding->charbuf;
6858       int *buf_end = buf + coding->charbuf_used;
6859
6860       if (EQ (coding->src_object, coding->dst_object))
6861         {
6862           coding_set_source (coding);
6863           dst_end = ((unsigned char *) coding->source) + coding->consumed;
6864         }
6865
6866       while (buf < buf_end)
6867         {
6868           int c = *buf, i;
6869
6870           if (c >= 0)
6871             {
6872               int from_nchars = 1, to_nchars = 1;
6873               Lisp_Object trans = Qnil;
6874
6875               LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6876               if (! NILP (trans))
6877                 {
6878                   trans = get_translation (trans, buf, buf_end);
6879                   if (INTEGERP (trans))
6880                     c = XINT (trans);
6881                   else if (CONSP (trans))
6882                     {
6883                       from_nchars = ASIZE (XCAR (trans));
6884                       trans = XCDR (trans);
6885                       if (INTEGERP (trans))
6886                         c = XINT (trans);
6887                       else
6888                         {
6889                           to_nchars = ASIZE (trans);
6890                           c = XINT (AREF (trans, 0));
6891                         }
6892                     }
6893                   else if (EQ (trans, Qt) && ! last_block)
6894                     break;
6895                 }
6896
6897               if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
6898                 {
6899                   dst = alloc_destination (coding,
6900                                            buf_end - buf
6901                                            + MAX_MULTIBYTE_LENGTH * to_nchars,
6902                                            dst);
6903                   if (EQ (coding->src_object, coding->dst_object))
6904                     {
6905                       coding_set_source (coding);
6906                       dst_end = (((unsigned char *) coding->source)
6907                                  + coding->consumed);
6908                     }
6909                   else
6910                     dst_end = coding->destination + coding->dst_bytes;
6911                 }
6912
6913               for (i = 0; i < to_nchars; i++)
6914                 {
6915                   if (i > 0)
6916                     c = XINT (AREF (trans, i));
6917                   if (coding->dst_multibyte
6918                       || ! CHAR_BYTE8_P (c))
6919                     CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
6920                   else
6921                     *dst++ = CHAR_TO_BYTE8 (c);
6922                 }
6923               produced_chars += to_nchars;
6924               buf += from_nchars;
6925             }
6926           else
6927             /* This is an annotation datum.  (-C) is the length.  */
6928             buf += -c;
6929         }
6930       carryover = buf_end - buf;
6931     }
6932   else
6933     {
6934       /* Source characters are at coding->source.  */
6935       const unsigned char *src = coding->source;
6936       const unsigned char *src_end = src + coding->consumed;
6937
6938       if (EQ (coding->dst_object, coding->src_object))
6939         dst_end = (unsigned char *) src;
6940       if (coding->src_multibyte != coding->dst_multibyte)
6941         {
6942           if (coding->src_multibyte)
6943             {
6944               int multibytep = 1;
6945               EMACS_INT consumed_chars = 0;
6946
6947               while (1)
6948                 {
6949                   const unsigned char *src_base = src;
6950                   int c;
6951
6952                   ONE_MORE_BYTE (c);
6953                   if (dst == dst_end)
6954                     {
6955                       if (EQ (coding->src_object, coding->dst_object))
6956                         dst_end = (unsigned char *) src;
6957                       if (dst == dst_end)
6958                         {
6959                           EMACS_INT offset = src - coding->source;
6960
6961                           dst = alloc_destination (coding, src_end - src + 1,
6962                                                    dst);
6963                           dst_end = coding->destination + coding->dst_bytes;
6964                           coding_set_source (coding);
6965                           src = coding->source + offset;
6966                           src_end = coding->source + coding->src_bytes;
6967                           if (EQ (coding->src_object, coding->dst_object))
6968                             dst_end = (unsigned char *) src;
6969                         }
6970                     }
6971                   *dst++ = c;
6972                   produced_chars++;
6973                 }
6974             no_more_source:
6975               ;
6976             }
6977           else
6978             while (src < src_end)
6979               {
6980                 int multibytep = 1;
6981                 int c = *src++;
6982
6983                 if (dst >= dst_end - 1)
6984                   {
6985                     if (EQ (coding->src_object, coding->dst_object))
6986                       dst_end = (unsigned char *) src;
6987                     if (dst >= dst_end - 1)
6988                       {
6989                         EMACS_INT offset = src - coding->source;
6990                         EMACS_INT more_bytes;
6991
6992                         if (EQ (coding->src_object, coding->dst_object))
6993                           more_bytes = ((src_end - src) / 2) + 2;
6994                         else
6995                           more_bytes = src_end - src + 2;
6996                         dst = alloc_destination (coding, more_bytes, dst);
6997                         dst_end = coding->destination + coding->dst_bytes;
6998                         coding_set_source (coding);
6999                         src = coding->source + offset;
7000                         src_end = coding->source + coding->src_bytes;
7001                         if (EQ (coding->src_object, coding->dst_object))
7002                           dst_end = (unsigned char *) src;
7003                       }
7004                   }
7005                 EMIT_ONE_BYTE (c);
7006               }
7007         }
7008       else
7009         {
7010           if (!EQ (coding->src_object, coding->dst_object))
7011             {
7012               EMACS_INT require = coding->src_bytes - coding->dst_bytes;
7013
7014               if (require > 0)
7015                 {
7016                   EMACS_INT offset = src - coding->source;
7017
7018                   dst = alloc_destination (coding, require, dst);
7019                   coding_set_source (coding);
7020                   src = coding->source + offset;
7021                   src_end = coding->source + coding->src_bytes;
7022                 }
7023             }
7024           produced_chars = coding->consumed_char;
7025           while (src < src_end)
7026             *dst++ = *src++;
7027         }
7028     }
7029
7030   produced = dst - (coding->destination + coding->produced);
7031   if (BUFFERP (coding->dst_object) && produced_chars > 0)
7032     insert_from_gap (produced_chars, produced);
7033   coding->produced += produced;
7034   coding->produced_char += produced_chars;
7035   return carryover;
7036 }
7037
7038 /* Compose text in CODING->object according to the annotation data at
7039    CHARBUF.  CHARBUF is an array:
7040      [ -LENGTH ANNOTATION_MASK NCHARS NBYTES METHOD [ COMPONENTS... ] ]
7041  */
7042
7043 static INLINE void
7044 produce_composition (coding, charbuf, pos)
7045      struct coding_system *coding;
7046      int *charbuf;
7047      EMACS_INT pos;
7048 {
7049   int len;
7050   EMACS_INT to;
7051   enum composition_method method;
7052   Lisp_Object components;
7053
7054   len = -charbuf[0] - MAX_ANNOTATION_LENGTH;
7055   to = pos + charbuf[2];
7056   method = (enum composition_method) (charbuf[4]);
7057
7058   if (method == COMPOSITION_RELATIVE)
7059     components = Qnil;
7060   else
7061     {
7062       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
7063       int i, j;
7064
7065       if (method == COMPOSITION_WITH_RULE)
7066         len = charbuf[2] * 3 - 2;
7067       charbuf += MAX_ANNOTATION_LENGTH;
7068       /* charbuf = [ CHRA ... CHAR] or [ CHAR -2 RULE ... CHAR ] */
7069       for (i = j = 0; i < len && charbuf[i] != -1; i++, j++)
7070         {
7071           if (charbuf[i] >= 0)
7072             args[j] = make_number (charbuf[i]);
7073           else
7074             {
7075               i++;
7076               args[j] = make_number (charbuf[i] % 0x100);
7077             }
7078         }
7079       components = (i == j ? Fstring (j, args) : Fvector (j, args));
7080     }
7081   compose_text (pos, to, components, Qnil, coding->dst_object);
7082 }
7083
7084
7085 /* Put `charset' property on text in CODING->object according to
7086    the annotation data at CHARBUF.  CHARBUF is an array:
7087      [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
7088  */
7089
7090 static INLINE void
7091 produce_charset (coding, charbuf, pos)
7092      struct coding_system *coding;
7093      int *charbuf;
7094      EMACS_INT pos;
7095 {
7096   EMACS_INT from = pos - charbuf[2];
7097   struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
7098
7099   Fput_text_property (make_number (from), make_number (pos),
7100                       Qcharset, CHARSET_NAME (charset),
7101                       coding->dst_object);
7102 }
7103
7104
7105 #define CHARBUF_SIZE 0x4000
7106
7107 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
7108   do {                                                                  \
7109     int size = CHARBUF_SIZE;                                            \
7110                                                                         \
7111     coding->charbuf = NULL;                                             \
7112     while (size > 1024)                                                 \
7113       {                                                                 \
7114         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
7115         if (coding->charbuf)                                            \
7116           break;                                                        \
7117         size >>= 1;                                                     \
7118       }                                                                 \
7119     if (! coding->charbuf)                                              \
7120       {                                                                 \
7121         record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
7122         return coding->result;                                          \
7123       }                                                                 \
7124     coding->charbuf_size = size;                                        \
7125   } while (0)
7126
7127
7128 static void
7129 produce_annotation (coding, pos)
7130      struct coding_system *coding;
7131      EMACS_INT pos;
7132 {
7133   int *charbuf = coding->charbuf;
7134   int *charbuf_end = charbuf + coding->charbuf_used;
7135
7136   if (NILP (coding->dst_object))
7137     return;
7138
7139   while (charbuf < charbuf_end)
7140     {
7141       if (*charbuf >= 0)
7142         pos++, charbuf++;
7143       else
7144         {
7145           int len = -*charbuf;
7146
7147           if (len > 2)
7148             switch (charbuf[1])
7149               {
7150               case CODING_ANNOTATE_COMPOSITION_MASK:
7151                 produce_composition (coding, charbuf, pos);
7152                 break;
7153               case CODING_ANNOTATE_CHARSET_MASK:
7154                 produce_charset (coding, charbuf, pos);
7155                 break;
7156               }
7157           charbuf += len;
7158         }
7159     }
7160 }
7161
7162 /* Decode the data at CODING->src_object into CODING->dst_object.
7163    CODING->src_object is a buffer, a string, or nil.
7164    CODING->dst_object is a buffer.
7165
7166    If CODING->src_object is a buffer, it must be the current buffer.
7167    In this case, if CODING->src_pos is positive, it is a position of
7168    the source text in the buffer, otherwise, the source text is in the
7169    gap area of the buffer, and CODING->src_pos specifies the offset of
7170    the text from GPT (which must be the same as PT).  If this is the
7171    same buffer as CODING->dst_object, CODING->src_pos must be
7172    negative.
7173
7174    If CODING->src_object is a string, CODING->src_pos is an index to
7175    that string.
7176
7177    If CODING->src_object is nil, CODING->source must already point to
7178    the non-relocatable memory area.  In this case, CODING->src_pos is
7179    an offset from CODING->source.
7180
7181    The decoded data is inserted at the current point of the buffer
7182    CODING->dst_object.
7183 */
7184
7185 static int
7186 decode_coding (coding)
7187      struct coding_system *coding;
7188 {
7189   Lisp_Object attrs;
7190   Lisp_Object undo_list;
7191   Lisp_Object translation_table;
7192   struct ccl_spec cclspec;
7193   int carryover;
7194   int i;
7195
7196   if (BUFFERP (coding->src_object)
7197       && coding->src_pos > 0
7198       && coding->src_pos < GPT
7199       && coding->src_pos + coding->src_chars > GPT)
7200     move_gap_both (coding->src_pos, coding->src_pos_byte);
7201
7202   undo_list = Qt;
7203   if (BUFFERP (coding->dst_object))
7204     {
7205       if (current_buffer != XBUFFER (coding->dst_object))
7206         set_buffer_internal (XBUFFER (coding->dst_object));
7207       if (GPT != PT)
7208         move_gap_both (PT, PT_BYTE);
7209       undo_list = current_buffer->undo_list;
7210       current_buffer->undo_list = Qt;
7211     }
7212
7213   coding->consumed = coding->consumed_char = 0;
7214   coding->produced = coding->produced_char = 0;
7215   coding->chars_at_source = 0;
7216   record_conversion_result (coding, CODING_RESULT_SUCCESS);
7217   coding->errors = 0;
7218
7219   ALLOC_CONVERSION_WORK_AREA (coding);
7220
7221   attrs = CODING_ID_ATTRS (coding->id);
7222   translation_table = get_translation_table (attrs, 0, NULL);
7223
7224   carryover = 0;
7225   if (coding->decoder == decode_coding_ccl)
7226     {
7227       coding->spec.ccl = &cclspec;
7228       setup_ccl_program (&cclspec.ccl, CODING_CCL_DECODER (coding));
7229     }
7230   do
7231     {
7232       EMACS_INT pos = coding->dst_pos + coding->produced_char;
7233
7234       coding_set_source (coding);
7235       coding->annotated = 0;
7236       coding->charbuf_used = carryover;
7237       (*(coding->decoder)) (coding);
7238       coding_set_destination (coding);
7239       carryover = produce_chars (coding, translation_table, 0);
7240       if (coding->annotated)
7241         produce_annotation (coding, pos);
7242       for (i = 0; i < carryover; i++)
7243         coding->charbuf[i]
7244           = coding->charbuf[coding->charbuf_used - carryover + i];
7245     }
7246   while (coding->result == CODING_RESULT_INSUFFICIENT_DST
7247          || (coding->consumed < coding->src_bytes
7248              && (coding->result == CODING_RESULT_SUCCESS
7249                  || coding->result == CODING_RESULT_INVALID_SRC)));
7250
7251   if (carryover > 0)
7252     {
7253       coding_set_destination (coding);
7254       coding->charbuf_used = carryover;
7255       produce_chars (coding, translation_table, 1);
7256     }
7257
7258   coding->carryover_bytes = 0;
7259   if (coding->consumed < coding->src_bytes)
7260     {
7261       int nbytes = coding->src_bytes - coding->consumed;
7262       const unsigned char *src;
7263
7264       coding_set_source (coding);
7265       coding_set_destination (coding);
7266       src = coding->source + coding->consumed;
7267
7268       if (coding->mode & CODING_MODE_LAST_BLOCK)
7269         {
7270           /* Flush out unprocessed data as binary chars.  We are sure
7271              that the number of data is less than the size of
7272              coding->charbuf.  */
7273           coding->charbuf_used = 0;
7274           coding->chars_at_source = 0;
7275
7276           while (nbytes-- > 0)
7277             {
7278               int c = *src++;
7279
7280               if (c & 0x80)
7281                 c = BYTE8_TO_CHAR (c);
7282               coding->charbuf[coding->charbuf_used++] = c;
7283             }
7284           produce_chars (coding, Qnil, 1);
7285         }
7286       else
7287         {
7288           /* Record unprocessed bytes in coding->carryover.  We are
7289              sure that the number of data is less than the size of
7290              coding->carryover.  */
7291           unsigned char *p = coding->carryover;
7292
7293           if (nbytes > sizeof coding->carryover)
7294             nbytes = sizeof coding->carryover;
7295           coding->carryover_bytes = nbytes;
7296           while (nbytes-- > 0)
7297             *p++ = *src++;
7298         }
7299       coding->consumed = coding->src_bytes;
7300     }
7301
7302   if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix)
7303       && !inhibit_eol_conversion)
7304     decode_eol (coding);
7305   if (BUFFERP (coding->dst_object))
7306     {
7307       current_buffer->undo_list = undo_list;
7308       record_insert (coding->dst_pos, coding->produced_char);
7309     }
7310   return coding->result;
7311 }
7312
7313
7314 /* Extract an annotation datum from a composition starting at POS and
7315    ending before LIMIT of CODING->src_object (buffer or string), store
7316    the data in BUF, set *STOP to a starting position of the next
7317    composition (if any) or to LIMIT, and return the address of the
7318    next element of BUF.
7319
7320    If such an annotation is not found, set *STOP to a starting
7321    position of a composition after POS (if any) or to LIMIT, and
7322    return BUF.  */
7323
7324 static INLINE int *
7325 handle_composition_annotation (pos, limit, coding, buf, stop)
7326      EMACS_INT pos, limit;
7327      struct coding_system *coding;
7328      int *buf;
7329      EMACS_INT *stop;
7330 {
7331   EMACS_INT start, end;
7332   Lisp_Object prop;
7333
7334   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
7335       || end > limit)
7336     *stop = limit;
7337   else if (start > pos)
7338     *stop = start;
7339   else
7340     {
7341       if (start == pos)
7342         {
7343           /* We found a composition.  Store the corresponding
7344              annotation data in BUF.  */
7345           int *head = buf;
7346           enum composition_method method = COMPOSITION_METHOD (prop);
7347           int nchars = COMPOSITION_LENGTH (prop);
7348
7349           ADD_COMPOSITION_DATA (buf, nchars, 0, method);
7350           if (method != COMPOSITION_RELATIVE)
7351             {
7352               Lisp_Object components;
7353               int len, i, i_byte;
7354
7355               components = COMPOSITION_COMPONENTS (prop);
7356               if (VECTORP (components))
7357                 {
7358                   len = XVECTOR (components)->size;
7359                   for (i = 0; i < len; i++)
7360                     *buf++ = XINT (AREF (components, i));
7361                 }
7362               else if (STRINGP (components))
7363                 {
7364                   len = SCHARS (components);
7365                   i = i_byte = 0;
7366                   while (i < len)
7367                     {
7368                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
7369                       buf++;
7370                     }
7371                 }
7372               else if (INTEGERP (components))
7373                 {
7374                   len = 1;
7375                   *buf++ = XINT (components);
7376                 }
7377               else if (CONSP (components))
7378                 {
7379                   for (len = 0; CONSP (components);
7380                        len++, components = XCDR (components))
7381                     *buf++ = XINT (XCAR (components));
7382                 }
7383               else
7384                 abort ();
7385               *head -= len;
7386             }
7387         }
7388
7389       if (find_composition (end, limit, &start, &end, &prop,
7390                             coding->src_object)
7391           && end <= limit)
7392         *stop = start;
7393       else
7394         *stop = limit;
7395     }
7396   return buf;
7397 }
7398
7399
7400 /* Extract an annotation datum from a text property `charset' at POS of
7401    CODING->src_object (buffer of string), store the data in BUF, set
7402    *STOP to the position where the value of `charset' property changes
7403    (limiting by LIMIT), and return the address of the next element of
7404    BUF.
7405
7406    If the property value is nil, set *STOP to the position where the
7407    property value is non-nil (limiting by LIMIT), and return BUF.  */
7408
7409 static INLINE int *
7410 handle_charset_annotation (pos, limit, coding, buf, stop)
7411      EMACS_INT pos, limit;
7412      struct coding_system *coding;
7413      int *buf;
7414      EMACS_INT *stop;
7415 {
7416   Lisp_Object val, next;
7417   int id;
7418
7419   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
7420   if (! NILP (val) && CHARSETP (val))
7421     id = XINT (CHARSET_SYMBOL_ID (val));
7422   else
7423     id = -1;
7424   ADD_CHARSET_DATA (buf, 0, id);
7425   next = Fnext_single_property_change (make_number (pos), Qcharset,
7426                                        coding->src_object,
7427                                        make_number (limit));
7428   *stop = XINT (next);
7429   return buf;
7430 }
7431
7432
7433 static void
7434 consume_chars (coding, translation_table, max_lookup)
7435      struct coding_system *coding;
7436      Lisp_Object translation_table;
7437      int max_lookup;
7438 {
7439   int *buf = coding->charbuf;
7440   int *buf_end = coding->charbuf + coding->charbuf_size;
7441   const unsigned char *src = coding->source + coding->consumed;
7442   const unsigned char *src_end = coding->source + coding->src_bytes;
7443   EMACS_INT pos = coding->src_pos + coding->consumed_char;
7444   EMACS_INT end_pos = coding->src_pos + coding->src_chars;
7445   int multibytep = coding->src_multibyte;
7446   Lisp_Object eol_type;
7447   int c;
7448   EMACS_INT stop, stop_composition, stop_charset;
7449   int *lookup_buf = NULL;
7450
7451   if (! NILP (translation_table))
7452     lookup_buf = alloca (sizeof (int) * max_lookup);
7453
7454   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
7455   if (VECTORP (eol_type))
7456     eol_type = Qunix;
7457
7458   /* Note: composition handling is not yet implemented.  */
7459   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7460
7461   if (NILP (coding->src_object))
7462     stop = stop_composition = stop_charset = end_pos;
7463   else
7464     {
7465       if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
7466         stop = stop_composition = pos;
7467       else
7468         stop = stop_composition = end_pos;
7469       if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
7470         stop = stop_charset = pos;
7471       else
7472         stop_charset = end_pos;
7473     }
7474
7475   /* Compensate for CRLF and conversion.  */
7476   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
7477   while (buf < buf_end)
7478     {
7479       Lisp_Object trans;
7480
7481       if (pos == stop)
7482         {
7483           if (pos == end_pos)
7484             break;
7485           if (pos == stop_composition)
7486             buf = handle_composition_annotation (pos, end_pos, coding,
7487                                                  buf, &stop_composition);
7488           if (pos == stop_charset)
7489             buf = handle_charset_annotation (pos, end_pos, coding,
7490                                              buf, &stop_charset);
7491           stop = (stop_composition < stop_charset
7492                   ? stop_composition : stop_charset);
7493         }
7494
7495       if (! multibytep)
7496         {
7497           EMACS_INT bytes;
7498
7499           if (coding->encoder == encode_coding_raw_text
7500               || coding->encoder == encode_coding_ccl)
7501             c = *src++, pos++;
7502           else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
7503             c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
7504           else
7505             c = BYTE8_TO_CHAR (*src), src++, pos++;
7506         }
7507       else
7508         c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
7509       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
7510         c = '\n';
7511       if (! EQ (eol_type, Qunix))
7512         {
7513           if (c == '\n')
7514             {
7515               if (EQ (eol_type, Qdos))
7516                 *buf++ = '\r';
7517               else
7518                 c = '\r';
7519             }
7520         }
7521
7522       trans = Qnil;
7523       LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
7524       if (NILP (trans))
7525         *buf++ = c;
7526       else
7527         {
7528           int from_nchars = 1, to_nchars = 1;
7529           int *lookup_buf_end;
7530           const unsigned char *p = src;
7531           int i;
7532
7533           lookup_buf[0] = c;
7534           for (i = 1; i < max_lookup && p < src_end; i++)
7535             lookup_buf[i] = STRING_CHAR_ADVANCE (p);
7536           lookup_buf_end = lookup_buf + i;
7537           trans = get_translation (trans, lookup_buf, lookup_buf_end);
7538           if (INTEGERP (trans))
7539             c = XINT (trans);
7540           else if (CONSP (trans))
7541             {
7542               from_nchars = ASIZE (XCAR (trans));
7543               trans = XCDR (trans);
7544               if (INTEGERP (trans))
7545                 c = XINT (trans);
7546               else
7547                 {
7548                   to_nchars = ASIZE (trans);
7549                   if (buf + to_nchars > buf_end)
7550                     break;
7551                   c = XINT (AREF (trans, 0));
7552                 }
7553             }
7554           else
7555             break;
7556           *buf++ = c;
7557           for (i = 1; i < to_nchars; i++)
7558             *buf++ = XINT (AREF (trans, i));
7559           for (i = 1; i < from_nchars; i++, pos++)
7560             src += MULTIBYTE_LENGTH_NO_CHECK (src);
7561         }
7562     }
7563
7564   coding->consumed = src - coding->source;
7565   coding->consumed_char = pos - coding->src_pos;
7566   coding->charbuf_used = buf - coding->charbuf;
7567   coding->chars_at_source = 0;
7568 }
7569
7570
7571 /* Encode the text at CODING->src_object into CODING->dst_object.
7572    CODING->src_object is a buffer or a string.
7573    CODING->dst_object is a buffer or nil.
7574
7575    If CODING->src_object is a buffer, it must be the current buffer.
7576    In this case, if CODING->src_pos is positive, it is a position of
7577    the source text in the buffer, otherwise. the source text is in the
7578    gap area of the buffer, and coding->src_pos specifies the offset of
7579    the text from GPT (which must be the same as PT).  If this is the
7580    same buffer as CODING->dst_object, CODING->src_pos must be
7581    negative and CODING should not have `pre-write-conversion'.
7582
7583    If CODING->src_object is a string, CODING should not have
7584    `pre-write-conversion'.
7585
7586    If CODING->dst_object is a buffer, the encoded data is inserted at
7587    the current point of that buffer.
7588
7589    If CODING->dst_object is nil, the encoded data is placed at the
7590    memory area specified by CODING->destination.  */
7591
7592 static int
7593 encode_coding (coding)
7594      struct coding_system *coding;
7595 {
7596   Lisp_Object attrs;
7597   Lisp_Object translation_table;
7598   int max_lookup;
7599   struct ccl_spec cclspec;
7600
7601   attrs = CODING_ID_ATTRS (coding->id);
7602   if (coding->encoder == encode_coding_raw_text)
7603     translation_table = Qnil, max_lookup = 0;
7604   else
7605     translation_table = get_translation_table (attrs, 1, &max_lookup);
7606
7607   if (BUFFERP (coding->dst_object))
7608     {
7609       set_buffer_internal (XBUFFER (coding->dst_object));
7610       coding->dst_multibyte
7611         = ! NILP (current_buffer->enable_multibyte_characters);
7612     }
7613
7614   coding->consumed = coding->consumed_char = 0;
7615   coding->produced = coding->produced_char = 0;
7616   record_conversion_result (coding, CODING_RESULT_SUCCESS);
7617   coding->errors = 0;
7618
7619   ALLOC_CONVERSION_WORK_AREA (coding);
7620
7621   if (coding->encoder == encode_coding_ccl)
7622     {
7623       coding->spec.ccl = &cclspec;
7624       setup_ccl_program (&cclspec.ccl, CODING_CCL_ENCODER (coding));
7625     }
7626   do {
7627     coding_set_source (coding);
7628     consume_chars (coding, translation_table, max_lookup);
7629     coding_set_destination (coding);
7630     (*(coding->encoder)) (coding);
7631   } while (coding->consumed_char < coding->src_chars);
7632
7633   if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
7634     insert_from_gap (coding->produced_char, coding->produced);
7635
7636   return (coding->result);
7637 }
7638
7639
7640 /* Name (or base name) of work buffer for code conversion.  */
7641 static Lisp_Object Vcode_conversion_workbuf_name;
7642
7643 /* A working buffer used by the top level conversion.  Once it is
7644    created, it is never destroyed.  It has the name
7645    Vcode_conversion_workbuf_name.  The other working buffers are
7646    destroyed after the use is finished, and their names are modified
7647    versions of Vcode_conversion_workbuf_name.  */
7648 static Lisp_Object Vcode_conversion_reused_workbuf;
7649
7650 /* 1 iff Vcode_conversion_reused_workbuf is already in use.  */
7651 static int reused_workbuf_in_use;
7652
7653
7654 /* Return a working buffer of code convesion.  MULTIBYTE specifies the
7655    multibyteness of returning buffer.  */
7656
7657 static Lisp_Object
7658 make_conversion_work_buffer (multibyte)
7659      int multibyte;
7660 {
7661   Lisp_Object name, workbuf;
7662   struct buffer *current;
7663
7664   if (reused_workbuf_in_use++)
7665     {
7666       name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
7667       workbuf = Fget_buffer_create (name);
7668     }
7669   else
7670     {
7671       if (NILP (Fbuffer_live_p (Vcode_conversion_reused_workbuf)))
7672         Vcode_conversion_reused_workbuf
7673           = Fget_buffer_create (Vcode_conversion_workbuf_name);
7674       workbuf = Vcode_conversion_reused_workbuf;
7675     }
7676   current = current_buffer;
7677   set_buffer_internal (XBUFFER (workbuf));
7678   /* We can't allow modification hooks to run in the work buffer.  For
7679      instance, directory_files_internal assumes that file decoding
7680      doesn't compile new regexps.  */
7681   Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt);
7682   Ferase_buffer ();
7683   current_buffer->undo_list = Qt;
7684   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
7685   set_buffer_internal (current);
7686   return workbuf;
7687 }
7688
7689
7690 static Lisp_Object
7691 code_conversion_restore (arg)
7692      Lisp_Object arg;
7693 {
7694   Lisp_Object current, workbuf;
7695   struct gcpro gcpro1;
7696
7697   GCPRO1 (arg);
7698   current = XCAR (arg);
7699   workbuf = XCDR (arg);
7700   if (! NILP (workbuf))
7701     {
7702       if (EQ (workbuf, Vcode_conversion_reused_workbuf))
7703         reused_workbuf_in_use = 0;
7704       else if (! NILP (Fbuffer_live_p (workbuf)))
7705         Fkill_buffer (workbuf);
7706     }
7707   set_buffer_internal (XBUFFER (current));
7708   UNGCPRO;
7709   return Qnil;
7710 }
7711
7712 Lisp_Object
7713 code_conversion_save (with_work_buf, multibyte)
7714      int with_work_buf, multibyte;
7715 {
7716   Lisp_Object workbuf = Qnil;
7717
7718   if (with_work_buf)
7719     workbuf = make_conversion_work_buffer (multibyte);
7720   record_unwind_protect (code_conversion_restore,
7721                          Fcons (Fcurrent_buffer (), workbuf));
7722   return workbuf;
7723 }
7724
7725 int
7726 decode_coding_gap (coding, chars, bytes)
7727      struct coding_system *coding;
7728      EMACS_INT chars, bytes;
7729 {
7730   int count = specpdl_ptr - specpdl;
7731   Lisp_Object attrs;
7732
7733   code_conversion_save (0, 0);
7734
7735   coding->src_object = Fcurrent_buffer ();
7736   coding->src_chars = chars;
7737   coding->src_bytes = bytes;
7738   coding->src_pos = -chars;
7739   coding->src_pos_byte = -bytes;
7740   coding->src_multibyte = chars < bytes;
7741   coding->dst_object = coding->src_object;
7742   coding->dst_pos = PT;
7743   coding->dst_pos_byte = PT_BYTE;
7744   coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
7745
7746   if (CODING_REQUIRE_DETECTION (coding))
7747     detect_coding (coding);
7748
7749   coding->mode |= CODING_MODE_LAST_BLOCK;
7750   current_buffer->text->inhibit_shrinking = 1;
7751   decode_coding (coding);
7752   current_buffer->text->inhibit_shrinking = 0;
7753
7754   attrs = CODING_ID_ATTRS (coding->id);
7755   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7756     {
7757       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7758       Lisp_Object val;
7759
7760       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7761       val = call1 (CODING_ATTR_POST_READ (attrs),
7762                    make_number (coding->produced_char));
7763       CHECK_NATNUM (val);
7764       coding->produced_char += Z - prev_Z;
7765       coding->produced += Z_BYTE - prev_Z_BYTE;
7766     }
7767
7768   unbind_to (count, Qnil);
7769   return coding->result;
7770 }
7771
7772 int
7773 encode_coding_gap (coding, chars, bytes)
7774      struct coding_system *coding;
7775      EMACS_INT chars, bytes;
7776 {
7777   int count = specpdl_ptr - specpdl;
7778
7779   code_conversion_save (0, 0);
7780
7781   coding->src_object = Fcurrent_buffer ();
7782   coding->src_chars = chars;
7783   coding->src_bytes = bytes;
7784   coding->src_pos = -chars;
7785   coding->src_pos_byte = -bytes;
7786   coding->src_multibyte = chars < bytes;
7787   coding->dst_object = coding->src_object;
7788   coding->dst_pos = PT;
7789   coding->dst_pos_byte = PT_BYTE;
7790
7791   encode_coding (coding);
7792
7793   unbind_to (count, Qnil);
7794   return coding->result;
7795 }
7796
7797
7798 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
7799    SRC_OBJECT into DST_OBJECT by coding context CODING.
7800
7801    SRC_OBJECT is a buffer, a string, or Qnil.
7802
7803    If it is a buffer, the text is at point of the buffer.  FROM and TO
7804    are positions in the buffer.
7805
7806    If it is a string, the text is at the beginning of the string.
7807    FROM and TO are indices to the string.
7808
7809    If it is nil, the text is at coding->source.  FROM and TO are
7810    indices to coding->source.
7811
7812    DST_OBJECT is a buffer, Qt, or Qnil.
7813
7814    If it is a buffer, the decoded text is inserted at point of the
7815    buffer.  If the buffer is the same as SRC_OBJECT, the source text
7816    is deleted.
7817
7818    If it is Qt, a string is made from the decoded text, and
7819    set in CODING->dst_object.
7820
7821    If it is Qnil, the decoded text is stored at CODING->destination.
7822    The caller must allocate CODING->dst_bytes bytes at
7823    CODING->destination by xmalloc.  If the decoded text is longer than
7824    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
7825  */
7826
7827 void
7828 decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7829                       dst_object)
7830      struct coding_system *coding;
7831      Lisp_Object src_object;
7832      EMACS_INT from, from_byte, to, to_byte;
7833      Lisp_Object dst_object;
7834 {
7835   int count = specpdl_ptr - specpdl;
7836   unsigned char *destination;
7837   EMACS_INT dst_bytes;
7838   EMACS_INT chars = to - from;
7839   EMACS_INT bytes = to_byte - from_byte;
7840   Lisp_Object attrs;
7841   int saved_pt = -1, saved_pt_byte;
7842   int need_marker_adjustment = 0;
7843   Lisp_Object old_deactivate_mark;
7844
7845   old_deactivate_mark = Vdeactivate_mark;
7846
7847   if (NILP (dst_object))
7848     {
7849       destination = coding->destination;
7850       dst_bytes = coding->dst_bytes;
7851     }
7852
7853   coding->src_object = src_object;
7854   coding->src_chars = chars;
7855   coding->src_bytes = bytes;
7856   coding->src_multibyte = chars < bytes;
7857
7858   if (STRINGP (src_object))
7859     {
7860       coding->src_pos = from;
7861       coding->src_pos_byte = from_byte;
7862     }
7863   else if (BUFFERP (src_object))
7864     {
7865       set_buffer_internal (XBUFFER (src_object));
7866       if (from != GPT)
7867         move_gap_both (from, from_byte);
7868       if (EQ (src_object, dst_object))
7869         {
7870           struct Lisp_Marker *tail;
7871
7872           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7873             {
7874               tail->need_adjustment
7875                 = tail->charpos == (tail->insertion_type ? from : to);
7876               need_marker_adjustment |= tail->need_adjustment;
7877             }
7878           saved_pt = PT, saved_pt_byte = PT_BYTE;
7879           TEMP_SET_PT_BOTH (from, from_byte);
7880           current_buffer->text->inhibit_shrinking = 1;
7881           del_range_both (from, from_byte, to, to_byte, 1);
7882           coding->src_pos = -chars;
7883           coding->src_pos_byte = -bytes;
7884         }
7885       else
7886         {
7887           coding->src_pos = from;
7888           coding->src_pos_byte = from_byte;
7889         }
7890     }
7891
7892   if (CODING_REQUIRE_DETECTION (coding))
7893     detect_coding (coding);
7894   attrs = CODING_ID_ATTRS (coding->id);
7895
7896   if (EQ (dst_object, Qt)
7897       || (! NILP (CODING_ATTR_POST_READ (attrs))
7898           && NILP (dst_object)))
7899     {
7900       coding->dst_multibyte = !CODING_FOR_UNIBYTE (coding);
7901       coding->dst_object = code_conversion_save (1, coding->dst_multibyte);
7902       coding->dst_pos = BEG;
7903       coding->dst_pos_byte = BEG_BYTE;
7904     }
7905   else if (BUFFERP (dst_object))
7906     {
7907       code_conversion_save (0, 0);
7908       coding->dst_object = dst_object;
7909       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7910       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7911       coding->dst_multibyte
7912         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
7913     }
7914   else
7915     {
7916       code_conversion_save (0, 0);
7917       coding->dst_object = Qnil;
7918       /* Most callers presume this will return a multibyte result, and they
7919          won't use `binary' or `raw-text' anyway, so let's not worry about
7920          CODING_FOR_UNIBYTE.  */
7921       coding->dst_multibyte = 1;
7922     }
7923
7924   decode_coding (coding);
7925
7926   if (BUFFERP (coding->dst_object))
7927     set_buffer_internal (XBUFFER (coding->dst_object));
7928
7929   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7930     {
7931       struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7932       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7933       Lisp_Object val;
7934
7935       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7936       GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7937               old_deactivate_mark);
7938       val = safe_call1 (CODING_ATTR_POST_READ (attrs),
7939                         make_number (coding->produced_char));
7940       UNGCPRO;
7941       CHECK_NATNUM (val);
7942       coding->produced_char += Z - prev_Z;
7943       coding->produced += Z_BYTE - prev_Z_BYTE;
7944     }
7945
7946   if (EQ (dst_object, Qt))
7947     {
7948       coding->dst_object = Fbuffer_string ();
7949     }
7950   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
7951     {
7952       set_buffer_internal (XBUFFER (coding->dst_object));
7953       if (dst_bytes < coding->produced)
7954         {
7955           destination = xrealloc (destination, coding->produced);
7956           if (! destination)
7957             {
7958               record_conversion_result (coding,
7959                                         CODING_RESULT_INSUFFICIENT_MEM);
7960               unbind_to (count, Qnil);
7961               return;
7962             }
7963           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
7964             move_gap_both (BEGV, BEGV_BYTE);
7965           bcopy (BEGV_ADDR, destination, coding->produced);
7966           coding->destination = destination;
7967         }
7968     }
7969
7970   if (saved_pt >= 0)
7971     {
7972       /* This is the case of:
7973          (BUFFERP (src_object) && EQ (src_object, dst_object))
7974          As we have moved PT while replacing the original buffer
7975          contents, we must recover it now.  */
7976       set_buffer_internal (XBUFFER (src_object));
7977       current_buffer->text->inhibit_shrinking = 0;
7978       if (saved_pt < from)
7979         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7980       else if (saved_pt < from + chars)
7981         TEMP_SET_PT_BOTH (from, from_byte);
7982       else if (! NILP (current_buffer->enable_multibyte_characters))
7983         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7984                           saved_pt_byte + (coding->produced - bytes));
7985       else
7986         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7987                           saved_pt_byte + (coding->produced - bytes));
7988
7989       if (need_marker_adjustment)
7990         {
7991           struct Lisp_Marker *tail;
7992
7993           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7994             if (tail->need_adjustment)
7995               {
7996                 tail->need_adjustment = 0;
7997                 if (tail->insertion_type)
7998                   {
7999                     tail->bytepos = from_byte;
8000                     tail->charpos = from;
8001                   }
8002                 else
8003                   {
8004                     tail->bytepos = from_byte + coding->produced;
8005                     tail->charpos
8006                       = (NILP (current_buffer->enable_multibyte_characters)
8007                          ? tail->bytepos : from + coding->produced_char);
8008                   }
8009               }
8010         }
8011     }
8012
8013   Vdeactivate_mark = old_deactivate_mark;
8014   unbind_to (count, coding->dst_object);
8015 }
8016
8017
8018 void
8019 encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
8020                       dst_object)
8021      struct coding_system *coding;
8022      Lisp_Object src_object;
8023      EMACS_INT from, from_byte, to, to_byte;
8024      Lisp_Object dst_object;
8025 {
8026   int count = specpdl_ptr - specpdl;
8027   EMACS_INT chars = to - from;
8028   EMACS_INT bytes = to_byte - from_byte;
8029   Lisp_Object attrs;
8030   int saved_pt = -1, saved_pt_byte;
8031   int need_marker_adjustment = 0;
8032   int kill_src_buffer = 0;
8033   Lisp_Object old_deactivate_mark;
8034
8035   old_deactivate_mark = Vdeactivate_mark;
8036
8037   coding->src_object = src_object;
8038   coding->src_chars = chars;
8039   coding->src_bytes = bytes;
8040   coding->src_multibyte = chars < bytes;
8041
8042   attrs = CODING_ID_ATTRS (coding->id);
8043
8044   if (EQ (src_object, dst_object))
8045     {
8046       struct Lisp_Marker *tail;
8047
8048       for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
8049         {
8050           tail->need_adjustment
8051             = tail->charpos == (tail->insertion_type ? from : to);
8052           need_marker_adjustment |= tail->need_adjustment;
8053         }
8054     }
8055
8056   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
8057     {
8058       coding->src_object = code_conversion_save (1, coding->src_multibyte);
8059       set_buffer_internal (XBUFFER (coding->src_object));
8060       if (STRINGP (src_object))
8061         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
8062       else if (BUFFERP (src_object))
8063         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
8064       else
8065         insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
8066
8067       if (EQ (src_object, dst_object))
8068         {
8069           set_buffer_internal (XBUFFER (src_object));
8070           saved_pt = PT, saved_pt_byte = PT_BYTE;
8071           del_range_both (from, from_byte, to, to_byte, 1);
8072           set_buffer_internal (XBUFFER (coding->src_object));
8073         }
8074
8075       {
8076         Lisp_Object args[3];
8077         struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
8078
8079         GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
8080                 old_deactivate_mark);
8081         args[0] = CODING_ATTR_PRE_WRITE (attrs);
8082         args[1] = make_number (BEG);
8083         args[2] = make_number (Z);
8084         safe_call (3, args);
8085         UNGCPRO;
8086       }
8087       if (XBUFFER (coding->src_object) != current_buffer)
8088         kill_src_buffer = 1;
8089       coding->src_object = Fcurrent_buffer ();
8090       if (BEG != GPT)
8091         move_gap_both (BEG, BEG_BYTE);
8092       coding->src_chars = Z - BEG;
8093       coding->src_bytes = Z_BYTE - BEG_BYTE;
8094       coding->src_pos = BEG;
8095       coding->src_pos_byte = BEG_BYTE;
8096       coding->src_multibyte = Z < Z_BYTE;
8097     }
8098   else if (STRINGP (src_object))
8099     {
8100       code_conversion_save (0, 0);
8101       coding->src_pos = from;
8102       coding->src_pos_byte = from_byte;
8103     }
8104   else if (BUFFERP (src_object))
8105     {
8106       code_conversion_save (0, 0);
8107       set_buffer_internal (XBUFFER (src_object));
8108       if (EQ (src_object, dst_object))
8109         {
8110           saved_pt = PT, saved_pt_byte = PT_BYTE;
8111           coding->src_object = del_range_1 (from, to, 1, 1);
8112           coding->src_pos = 0;
8113           coding->src_pos_byte = 0;
8114         }
8115       else
8116         {
8117           if (from < GPT && to >= GPT)
8118             move_gap_both (from, from_byte);
8119           coding->src_pos = from;
8120           coding->src_pos_byte = from_byte;
8121         }
8122     }
8123   else
8124     code_conversion_save (0, 0);
8125
8126   if (BUFFERP (dst_object))
8127     {
8128       coding->dst_object = dst_object;
8129       if (EQ (src_object, dst_object))
8130         {
8131           coding->dst_pos = from;
8132           coding->dst_pos_byte = from_byte;
8133         }
8134       else
8135         {
8136           struct buffer *current = current_buffer;
8137
8138           set_buffer_temp (XBUFFER (dst_object));
8139           coding->dst_pos = PT;
8140           coding->dst_pos_byte = PT_BYTE;
8141           move_gap_both (coding->dst_pos, coding->dst_pos_byte);
8142           set_buffer_temp (current);
8143         }
8144       coding->dst_multibyte
8145         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
8146     }
8147   else if (EQ (dst_object, Qt))
8148     {
8149       coding->dst_object = Qnil;
8150       coding->dst_bytes = coding->src_chars;
8151       if (coding->dst_bytes == 0)
8152         coding->dst_bytes = 1;
8153       coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
8154       coding->dst_multibyte = 0;
8155     }
8156   else
8157     {
8158       coding->dst_object = Qnil;
8159       coding->dst_multibyte = 0;
8160     }
8161
8162   encode_coding (coding);
8163
8164   if (EQ (dst_object, Qt))
8165     {
8166       if (BUFFERP (coding->dst_object))
8167         coding->dst_object = Fbuffer_string ();
8168       else
8169         {
8170           coding->dst_object
8171             = make_unibyte_string ((char *) coding->destination,
8172                                    coding->produced);
8173           xfree (coding->destination);
8174         }
8175     }
8176
8177   if (saved_pt >= 0)
8178     {
8179       /* This is the case of:
8180          (BUFFERP (src_object) && EQ (src_object, dst_object))
8181          As we have moved PT while replacing the original buffer
8182          contents, we must recover it now.  */
8183       set_buffer_internal (XBUFFER (src_object));
8184       if (saved_pt < from)
8185         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
8186       else if (saved_pt < from + chars)
8187         TEMP_SET_PT_BOTH (from, from_byte);
8188       else if (! NILP (current_buffer->enable_multibyte_characters))
8189         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
8190                           saved_pt_byte + (coding->produced - bytes));
8191       else
8192         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
8193                           saved_pt_byte + (coding->produced - bytes));
8194
8195       if (need_marker_adjustment)
8196         {
8197           struct Lisp_Marker *tail;
8198
8199           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
8200             if (tail->need_adjustment)
8201               {
8202                 tail->need_adjustment = 0;
8203                 if (tail->insertion_type)
8204                   {
8205                     tail->bytepos = from_byte;
8206                     tail->charpos = from;
8207                   }
8208                 else
8209                   {
8210                     tail->bytepos = from_byte + coding->produced;
8211                     tail->charpos
8212                       = (NILP (current_buffer->enable_multibyte_characters)
8213                          ? tail->bytepos : from + coding->produced_char);
8214                   }
8215               }
8216         }
8217     }
8218
8219   if (kill_src_buffer)
8220     Fkill_buffer (coding->src_object);
8221
8222   Vdeactivate_mark = old_deactivate_mark;
8223   unbind_to (count, Qnil);
8224 }
8225
8226
8227 Lisp_Object
8228 preferred_coding_system ()
8229 {
8230   int id = coding_categories[coding_priorities[0]].id;
8231
8232   return CODING_ID_NAME (id);
8233 }
8234
8235 \f
8236 #ifdef emacs
8237 /*** 8. Emacs Lisp library functions ***/
8238
8239 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
8240        doc: /* Return t if OBJECT is nil or a coding-system.
8241 See the documentation of `define-coding-system' for information
8242 about coding-system objects.  */)
8243      (object)
8244      Lisp_Object object;
8245 {
8246   if (NILP (object)
8247       || CODING_SYSTEM_ID (object) >= 0)
8248     return Qt;
8249   if (! SYMBOLP (object)
8250       || NILP (Fget (object, Qcoding_system_define_form)))
8251     return Qnil;
8252   return Qt;
8253 }
8254
8255 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
8256        Sread_non_nil_coding_system, 1, 1, 0,
8257        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
8258      (prompt)
8259      Lisp_Object prompt;
8260 {
8261   Lisp_Object val;
8262   do
8263     {
8264       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8265                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
8266     }
8267   while (SCHARS (val) == 0);
8268   return (Fintern (val, Qnil));
8269 }
8270
8271 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
8272        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
8273 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.
8274 Ignores case when completing coding systems (all Emacs coding systems
8275 are lower-case).  */)
8276      (prompt, default_coding_system)
8277      Lisp_Object prompt, default_coding_system;
8278 {
8279   Lisp_Object val;
8280   int count = SPECPDL_INDEX ();
8281
8282   if (SYMBOLP (default_coding_system))
8283     default_coding_system = SYMBOL_NAME (default_coding_system);
8284   specbind (Qcompletion_ignore_case, Qt);
8285   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8286                           Qt, Qnil, Qcoding_system_history,
8287                           default_coding_system, Qnil);
8288   unbind_to (count, Qnil);
8289   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
8290 }
8291
8292 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
8293        1, 1, 0,
8294        doc: /* Check validity of CODING-SYSTEM.
8295 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
8296 It is valid if it is nil or a symbol defined as a coding system by the
8297 function `define-coding-system'.  */)
8298   (coding_system)
8299      Lisp_Object coding_system;
8300 {
8301   Lisp_Object define_form;
8302
8303   define_form = Fget (coding_system, Qcoding_system_define_form);
8304   if (! NILP (define_form))
8305     {
8306       Fput (coding_system, Qcoding_system_define_form, Qnil);
8307       safe_eval (define_form);
8308     }
8309   if (!NILP (Fcoding_system_p (coding_system)))
8310     return coding_system;
8311   xsignal1 (Qcoding_system_error, coding_system);
8312 }
8313
8314 \f
8315 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
8316    HIGHEST is nonzero, return the coding system of the highest
8317    priority among the detected coding systems.  Otherwize return a
8318    list of detected coding systems sorted by their priorities.  If
8319    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
8320    multibyte form but contains only ASCII and eight-bit chars.
8321    Otherwise, the bytes are raw bytes.
8322
8323    CODING-SYSTEM controls the detection as below:
8324
8325    If it is nil, detect both text-format and eol-format.  If the
8326    text-format part of CODING-SYSTEM is already specified
8327    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
8328    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
8329    detect only text-format.  */
8330
8331 Lisp_Object
8332 detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
8333                       coding_system)
8334      const unsigned char *src;
8335      EMACS_INT src_chars, src_bytes;
8336      int highest;
8337      int multibytep;
8338      Lisp_Object coding_system;
8339 {
8340   const unsigned char *src_end = src + src_bytes;
8341   Lisp_Object attrs, eol_type;
8342   Lisp_Object val = Qnil;
8343   struct coding_system coding;
8344   int id;
8345   struct coding_detection_info detect_info;
8346   enum coding_category base_category;
8347   int null_byte_found = 0, eight_bit_found = 0;
8348
8349   if (NILP (coding_system))
8350     coding_system = Qundecided;
8351   setup_coding_system (coding_system, &coding);
8352   attrs = CODING_ID_ATTRS (coding.id);
8353   eol_type = CODING_ID_EOL_TYPE (coding.id);
8354   coding_system = CODING_ATTR_BASE_NAME (attrs);
8355
8356   coding.source = src;
8357   coding.src_chars = src_chars;
8358   coding.src_bytes = src_bytes;
8359   coding.src_multibyte = multibytep;
8360   coding.consumed = 0;
8361   coding.mode |= CODING_MODE_LAST_BLOCK;
8362   coding.head_ascii = 0;
8363
8364   detect_info.checked = detect_info.found = detect_info.rejected = 0;
8365
8366   /* At first, detect text-format if necessary.  */
8367   base_category = XINT (CODING_ATTR_CATEGORY (attrs));
8368   if (base_category == coding_category_undecided)
8369     {
8370       enum coding_category category;
8371       struct coding_system *this;
8372       int c, i;
8373
8374       /* Skip all ASCII bytes except for a few ISO2022 controls.  */
8375       for (; src < src_end; src++)
8376         {
8377           c = *src;
8378           if (c & 0x80)
8379             {
8380               eight_bit_found = 1;
8381               if (null_byte_found)
8382                 break;
8383             }
8384           else if (c < 0x20)
8385             {
8386               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
8387                   && ! inhibit_iso_escape_detection
8388                   && ! detect_info.checked)
8389                 {
8390                   if (detect_coding_iso_2022 (&coding, &detect_info))
8391                     {
8392                       /* We have scanned the whole data.  */
8393                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
8394                         {
8395                           /* We didn't find an 8-bit code.  We may
8396                              have found a null-byte, but it's very
8397                              rare that a binary file confirm to
8398                              ISO-2022.  */
8399                           src = src_end;
8400                           coding.head_ascii = src - coding.source;
8401                         }
8402                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
8403                       break;
8404                     }
8405                 }
8406               else if (! c && !inhibit_null_byte_detection)
8407                 {
8408                   null_byte_found = 1;
8409                   if (eight_bit_found)
8410                     break;
8411                 }
8412               if (! eight_bit_found)
8413                 coding.head_ascii++;
8414             }
8415           else if (! eight_bit_found)
8416             coding.head_ascii++;
8417         }
8418
8419       if (null_byte_found || eight_bit_found
8420           || coding.head_ascii < coding.src_bytes
8421           || detect_info.found)
8422         {
8423           if (coding.head_ascii == coding.src_bytes)
8424             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
8425             for (i = 0; i < coding_category_raw_text; i++)
8426               {
8427                 category = coding_priorities[i];
8428                 this = coding_categories + category;
8429                 if (detect_info.found & (1 << category))
8430                   break;
8431               }
8432           else
8433             {
8434               if (null_byte_found)
8435                 {
8436                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
8437                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
8438                 }
8439               for (i = 0; i < coding_category_raw_text; i++)
8440                 {
8441                   category = coding_priorities[i];
8442                   this = coding_categories + category;
8443
8444                   if (this->id < 0)
8445                     {
8446                       /* No coding system of this category is defined.  */
8447                       detect_info.rejected |= (1 << category);
8448                     }
8449                   else if (category >= coding_category_raw_text)
8450                     continue;
8451                   else if (detect_info.checked & (1 << category))
8452                     {
8453                       if (highest
8454                           && (detect_info.found & (1 << category)))
8455                         break;
8456                     }
8457                   else if ((*(this->detector)) (&coding, &detect_info)
8458                            && highest
8459                            && (detect_info.found & (1 << category)))
8460                     {
8461                       if (category == coding_category_utf_16_auto)
8462                         {
8463                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8464                             category = coding_category_utf_16_le;
8465                           else
8466                             category = coding_category_utf_16_be;
8467                         }
8468                       break;
8469                     }
8470                 }
8471             }
8472         }
8473
8474       if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY
8475           || null_byte_found)
8476         {
8477           detect_info.found = CATEGORY_MASK_RAW_TEXT;
8478           id = CODING_SYSTEM_ID (Qno_conversion);
8479           val = Fcons (make_number (id), Qnil);
8480         }
8481       else if (! detect_info.rejected && ! detect_info.found)
8482         {
8483           detect_info.found = CATEGORY_MASK_ANY;
8484           id = coding_categories[coding_category_undecided].id;
8485           val = Fcons (make_number (id), Qnil);
8486         }
8487       else if (highest)
8488         {
8489           if (detect_info.found)
8490             {
8491               detect_info.found = 1 << category;
8492               val = Fcons (make_number (this->id), Qnil);
8493             }
8494           else
8495             for (i = 0; i < coding_category_raw_text; i++)
8496               if (! (detect_info.rejected & (1 << coding_priorities[i])))
8497                 {
8498                   detect_info.found = 1 << coding_priorities[i];
8499                   id = coding_categories[coding_priorities[i]].id;
8500                   val = Fcons (make_number (id), Qnil);
8501                   break;
8502                 }
8503         }
8504       else
8505         {
8506           int mask = detect_info.rejected | detect_info.found;
8507           int found = 0;
8508
8509           for (i = coding_category_raw_text - 1; i >= 0; i--)
8510             {
8511               category = coding_priorities[i];
8512               if (! (mask & (1 << category)))
8513                 {
8514                   found |= 1 << category;
8515                   id = coding_categories[category].id;
8516                   if (id >= 0)
8517                     val = Fcons (make_number (id), val);
8518                 }
8519             }
8520           for (i = coding_category_raw_text - 1; i >= 0; i--)
8521             {
8522               category = coding_priorities[i];
8523               if (detect_info.found & (1 << category))
8524                 {
8525                   id = coding_categories[category].id;
8526                   val = Fcons (make_number (id), val);
8527                 }
8528             }
8529           detect_info.found |= found;
8530         }
8531     }
8532   else if (base_category == coding_category_utf_8_auto)
8533     {
8534       if (detect_coding_utf_8 (&coding, &detect_info))
8535         {
8536           struct coding_system *this;
8537
8538           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
8539             this = coding_categories + coding_category_utf_8_sig;
8540           else
8541             this = coding_categories + coding_category_utf_8_nosig;
8542           val = Fcons (make_number (this->id), Qnil);
8543         }
8544     }
8545   else if (base_category == coding_category_utf_16_auto)
8546     {
8547       if (detect_coding_utf_16 (&coding, &detect_info))
8548         {
8549           struct coding_system *this;
8550
8551           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8552             this = coding_categories + coding_category_utf_16_le;
8553           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
8554             this = coding_categories + coding_category_utf_16_be;
8555           else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
8556             this = coding_categories + coding_category_utf_16_be_nosig;
8557           else
8558             this = coding_categories + coding_category_utf_16_le_nosig;
8559           val = Fcons (make_number (this->id), Qnil);
8560         }
8561     }
8562   else
8563     {
8564       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
8565       val = Fcons (make_number (coding.id), Qnil);
8566     }
8567
8568   /* Then, detect eol-format if necessary.  */
8569   {
8570     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol = -1;
8571     Lisp_Object tail;
8572
8573     if (VECTORP (eol_type))
8574       {
8575         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
8576           {
8577             if (null_byte_found)
8578               normal_eol = EOL_SEEN_LF;
8579             else
8580               normal_eol = detect_eol (coding.source, src_bytes,
8581                                        coding_category_raw_text);
8582           }
8583         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
8584                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
8585           utf_16_be_eol = detect_eol (coding.source, src_bytes,
8586                                       coding_category_utf_16_be);
8587         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
8588                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
8589           utf_16_le_eol = detect_eol (coding.source, src_bytes,
8590                                       coding_category_utf_16_le);
8591       }
8592     else
8593       {
8594         if (EQ (eol_type, Qunix))
8595           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
8596         else if (EQ (eol_type, Qdos))
8597           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
8598         else
8599           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
8600       }
8601
8602     for (tail = val; CONSP (tail); tail = XCDR (tail))
8603       {
8604         enum coding_category category;
8605         int this_eol;
8606
8607         id = XINT (XCAR (tail));
8608         attrs = CODING_ID_ATTRS (id);
8609         category = XINT (CODING_ATTR_CATEGORY (attrs));
8610         eol_type = CODING_ID_EOL_TYPE (id);
8611         if (VECTORP (eol_type))
8612           {
8613             if (category == coding_category_utf_16_be
8614                 || category == coding_category_utf_16_be_nosig)
8615               this_eol = utf_16_be_eol;
8616             else if (category == coding_category_utf_16_le
8617                      || category == coding_category_utf_16_le_nosig)
8618               this_eol = utf_16_le_eol;
8619             else
8620               this_eol = normal_eol;
8621
8622             if (this_eol == EOL_SEEN_LF)
8623               XSETCAR (tail, AREF (eol_type, 0));
8624             else if (this_eol == EOL_SEEN_CRLF)
8625               XSETCAR (tail, AREF (eol_type, 1));
8626             else if (this_eol == EOL_SEEN_CR)
8627               XSETCAR (tail, AREF (eol_type, 2));
8628             else
8629               XSETCAR (tail, CODING_ID_NAME (id));
8630           }
8631         else
8632           XSETCAR (tail, CODING_ID_NAME (id));
8633       }
8634   }
8635
8636   return (highest ? (CONSP (val) ? XCAR (val) : Qnil) : val);
8637 }
8638
8639
8640 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
8641        2, 3, 0,
8642        doc: /* Detect coding system of the text in the region between START and END.
8643 Return a list of possible coding systems ordered by priority.
8644 The coding systems to try and their priorities follows what
8645 the function `coding-system-priority-list' (which see) returns.
8646
8647 If only ASCII characters are found (except for such ISO-2022 control
8648 characters as ESC), it returns a list of single element `undecided'
8649 or its subsidiary coding system according to a detected end-of-line
8650 format.
8651
8652 If optional argument HIGHEST is non-nil, return the coding system of
8653 highest priority.  */)
8654      (start, end, highest)
8655      Lisp_Object start, end, highest;
8656 {
8657   int from, to;
8658   int from_byte, to_byte;
8659
8660   CHECK_NUMBER_COERCE_MARKER (start);
8661   CHECK_NUMBER_COERCE_MARKER (end);
8662
8663   validate_region (&start, &end);
8664   from = XINT (start), to = XINT (end);
8665   from_byte = CHAR_TO_BYTE (from);
8666   to_byte = CHAR_TO_BYTE (to);
8667
8668   if (from < GPT && to >= GPT)
8669     move_gap_both (to, to_byte);
8670
8671   return detect_coding_system (BYTE_POS_ADDR (from_byte),
8672                                to - from, to_byte - from_byte,
8673                                !NILP (highest),
8674                                !NILP (current_buffer
8675                                       ->enable_multibyte_characters),
8676                                Qnil);
8677 }
8678
8679 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
8680        1, 2, 0,
8681        doc: /* Detect coding system of the text in STRING.
8682 Return a list of possible coding systems ordered by priority.
8683 The coding systems to try and their priorities follows what
8684 the function `coding-system-priority-list' (which see) returns.
8685
8686 If only ASCII characters are found (except for such ISO-2022 control
8687 characters as ESC), it returns a list of single element `undecided'
8688 or its subsidiary coding system according to a detected end-of-line
8689 format.
8690
8691 If optional argument HIGHEST is non-nil, return the coding system of
8692 highest priority.  */)
8693      (string, highest)
8694      Lisp_Object string, highest;
8695 {
8696   CHECK_STRING (string);
8697
8698   return detect_coding_system (SDATA (string),
8699                                SCHARS (string), SBYTES (string),
8700                                !NILP (highest), STRING_MULTIBYTE (string),
8701                                Qnil);
8702 }
8703
8704
8705 static INLINE int
8706 char_encodable_p (c, attrs)
8707      int c;
8708      Lisp_Object attrs;
8709 {
8710   Lisp_Object tail;
8711   struct charset *charset;
8712   Lisp_Object translation_table;
8713
8714   translation_table = CODING_ATTR_TRANS_TBL (attrs);
8715   if (! NILP (translation_table))
8716     c = translate_char (translation_table, c);
8717   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
8718        CONSP (tail); tail = XCDR (tail))
8719     {
8720       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
8721       if (CHAR_CHARSET_P (c, charset))
8722         break;
8723     }
8724   return (! NILP (tail));
8725 }
8726
8727
8728 /* Return a list of coding systems that safely encode the text between
8729    START and END.  If EXCLUDE is non-nil, it is a list of coding
8730    systems not to check.  The returned list doesn't contain any such
8731    coding systems.  In any case, if the text contains only ASCII or is
8732    unibyte, return t.  */
8733
8734 DEFUN ("find-coding-systems-region-internal",
8735        Ffind_coding_systems_region_internal,
8736        Sfind_coding_systems_region_internal, 2, 3, 0,
8737        doc: /* Internal use only.  */)
8738      (start, end, exclude)
8739      Lisp_Object start, end, exclude;
8740 {
8741   Lisp_Object coding_attrs_list, safe_codings;
8742   EMACS_INT start_byte, end_byte;
8743   const unsigned char *p, *pbeg, *pend;
8744   int c;
8745   Lisp_Object tail, elt, work_table;
8746
8747   if (STRINGP (start))
8748     {
8749       if (!STRING_MULTIBYTE (start)
8750           || SCHARS (start) == SBYTES (start))
8751         return Qt;
8752       start_byte = 0;
8753       end_byte = SBYTES (start);
8754     }
8755   else
8756     {
8757       CHECK_NUMBER_COERCE_MARKER (start);
8758       CHECK_NUMBER_COERCE_MARKER (end);
8759       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8760         args_out_of_range (start, end);
8761       if (NILP (current_buffer->enable_multibyte_characters))
8762         return Qt;
8763       start_byte = CHAR_TO_BYTE (XINT (start));
8764       end_byte = CHAR_TO_BYTE (XINT (end));
8765       if (XINT (end) - XINT (start) == end_byte - start_byte)
8766         return Qt;
8767
8768       if (XINT (start) < GPT && XINT (end) > GPT)
8769         {
8770           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8771             move_gap_both (XINT (start), start_byte);
8772           else
8773             move_gap_both (XINT (end), end_byte);
8774         }
8775     }
8776
8777   coding_attrs_list = Qnil;
8778   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
8779     if (NILP (exclude)
8780         || NILP (Fmemq (XCAR (tail), exclude)))
8781       {
8782         Lisp_Object attrs;
8783
8784         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
8785         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
8786             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
8787           {
8788             ASET (attrs, coding_attr_trans_tbl,
8789                   get_translation_table (attrs, 1, NULL));
8790             coding_attrs_list = Fcons (attrs, coding_attrs_list);
8791           }
8792       }
8793
8794   if (STRINGP (start))
8795     p = pbeg = SDATA (start);
8796   else
8797     p = pbeg = BYTE_POS_ADDR (start_byte);
8798   pend = p + (end_byte - start_byte);
8799
8800   while (p < pend && ASCII_BYTE_P (*p)) p++;
8801   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8802
8803   work_table = Fmake_char_table (Qnil, Qnil);
8804   while (p < pend)
8805     {
8806       if (ASCII_BYTE_P (*p))
8807         p++;
8808       else
8809         {
8810           c = STRING_CHAR_ADVANCE (p);
8811           if (!NILP (char_table_ref (work_table, c)))
8812             /* This character was already checked.  Ignore it.  */
8813             continue;
8814
8815           charset_map_loaded = 0;
8816           for (tail = coding_attrs_list; CONSP (tail);)
8817             {
8818               elt = XCAR (tail);
8819               if (NILP (elt))
8820                 tail = XCDR (tail);
8821               else if (char_encodable_p (c, elt))
8822                 tail = XCDR (tail);
8823               else if (CONSP (XCDR (tail)))
8824                 {
8825                   XSETCAR (tail, XCAR (XCDR (tail)));
8826                   XSETCDR (tail, XCDR (XCDR (tail)));
8827                 }
8828               else
8829                 {
8830                   XSETCAR (tail, Qnil);
8831                   tail = XCDR (tail);
8832                 }
8833             }
8834           if (charset_map_loaded)
8835             {
8836               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8837
8838               if (STRINGP (start))
8839                 pbeg = SDATA (start);
8840               else
8841                 pbeg = BYTE_POS_ADDR (start_byte);
8842               p = pbeg + p_offset;
8843               pend = pbeg + pend_offset;
8844             }
8845           char_table_set (work_table, c, Qt);
8846         }
8847     }
8848
8849   safe_codings = list2 (Qraw_text, Qno_conversion);
8850   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
8851     if (! NILP (XCAR (tail)))
8852       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
8853
8854   return safe_codings;
8855 }
8856
8857
8858 DEFUN ("unencodable-char-position", Funencodable_char_position,
8859        Sunencodable_char_position, 3, 5, 0,
8860        doc: /*
8861 Return position of first un-encodable character in a region.
8862 START and END specify the region and CODING-SYSTEM specifies the
8863 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
8864
8865 If optional 4th argument COUNT is non-nil, it specifies at most how
8866 many un-encodable characters to search.  In this case, the value is a
8867 list of positions.
8868
8869 If optional 5th argument STRING is non-nil, it is a string to search
8870 for un-encodable characters.  In that case, START and END are indexes
8871 to the string.  */)
8872      (start, end, coding_system, count, string)
8873      Lisp_Object start, end, coding_system, count, string;
8874 {
8875   int n;
8876   struct coding_system coding;
8877   Lisp_Object attrs, charset_list, translation_table;
8878   Lisp_Object positions;
8879   int from, to;
8880   const unsigned char *p, *stop, *pend;
8881   int ascii_compatible;
8882
8883   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
8884   attrs = CODING_ID_ATTRS (coding.id);
8885   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
8886     return Qnil;
8887   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
8888   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8889   translation_table = get_translation_table (attrs, 1, NULL);
8890
8891   if (NILP (string))
8892     {
8893       validate_region (&start, &end);
8894       from = XINT (start);
8895       to = XINT (end);
8896       if (NILP (current_buffer->enable_multibyte_characters)
8897           || (ascii_compatible
8898               && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
8899         return Qnil;
8900       p = CHAR_POS_ADDR (from);
8901       pend = CHAR_POS_ADDR (to);
8902       if (from < GPT && to >= GPT)
8903         stop = GPT_ADDR;
8904       else
8905         stop = pend;
8906     }
8907   else
8908     {
8909       CHECK_STRING (string);
8910       CHECK_NATNUM (start);
8911       CHECK_NATNUM (end);
8912       from = XINT (start);
8913       to = XINT (end);
8914       if (from > to
8915           || to > SCHARS (string))
8916         args_out_of_range_3 (string, start, end);
8917       if (! STRING_MULTIBYTE (string))
8918         return Qnil;
8919       p = SDATA (string) + string_char_to_byte (string, from);
8920       stop = pend = SDATA (string) + string_char_to_byte (string, to);
8921       if (ascii_compatible && (to - from) == (pend - p))
8922         return Qnil;
8923     }
8924
8925   if (NILP (count))
8926     n = 1;
8927   else
8928     {
8929       CHECK_NATNUM (count);
8930       n = XINT (count);
8931     }
8932
8933   positions = Qnil;
8934   while (1)
8935     {
8936       int c;
8937
8938       if (ascii_compatible)
8939         while (p < stop && ASCII_BYTE_P (*p))
8940           p++, from++;
8941       if (p >= stop)
8942         {
8943           if (p >= pend)
8944             break;
8945           stop = pend;
8946           p = GAP_END_ADDR;
8947         }
8948
8949       c = STRING_CHAR_ADVANCE (p);
8950       if (! (ASCII_CHAR_P (c) && ascii_compatible)
8951           && ! char_charset (translate_char (translation_table, c),
8952                              charset_list, NULL))
8953         {
8954           positions = Fcons (make_number (from), positions);
8955           n--;
8956           if (n == 0)
8957             break;
8958         }
8959
8960       from++;
8961     }
8962
8963   return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
8964 }
8965
8966
8967 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
8968        Scheck_coding_systems_region, 3, 3, 0,
8969        doc: /* Check if the region is encodable by coding systems.
8970
8971 START and END are buffer positions specifying the region.
8972 CODING-SYSTEM-LIST is a list of coding systems to check.
8973
8974 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
8975 CODING-SYSTEM is a member of CODING-SYSTEM-LIST and can't encode the
8976 whole region, POS0, POS1, ... are buffer positions where non-encodable
8977 characters are found.
8978
8979 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
8980 value is nil.
8981
8982 START may be a string.  In that case, check if the string is
8983 encodable, and the value contains indices to the string instead of
8984 buffer positions.  END is ignored.
8985
8986 If the current buffer (or START if it is a string) is unibyte, the value
8987 is nil.  */)
8988      (start, end, coding_system_list)
8989      Lisp_Object start, end, coding_system_list;
8990 {
8991   Lisp_Object list;
8992   EMACS_INT start_byte, end_byte;
8993   int pos;
8994   const unsigned char *p, *pbeg, *pend;
8995   int c;
8996   Lisp_Object tail, elt, attrs;
8997
8998   if (STRINGP (start))
8999     {
9000       if (!STRING_MULTIBYTE (start)
9001           || SCHARS (start) == SBYTES (start))
9002         return Qnil;
9003       start_byte = 0;
9004       end_byte = SBYTES (start);
9005       pos = 0;
9006     }
9007   else
9008     {
9009       CHECK_NUMBER_COERCE_MARKER (start);
9010       CHECK_NUMBER_COERCE_MARKER (end);
9011       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
9012         args_out_of_range (start, end);
9013       if (NILP (current_buffer->enable_multibyte_characters))
9014         return Qnil;
9015       start_byte = CHAR_TO_BYTE (XINT (start));
9016       end_byte = CHAR_TO_BYTE (XINT (end));
9017       if (XINT (end) - XINT (start) == end_byte - start_byte)
9018         return Qnil;
9019
9020       if (XINT (start) < GPT && XINT (end) > GPT)
9021         {
9022           if ((GPT - XINT (start)) < (XINT (end) - GPT))
9023             move_gap_both (XINT (start), start_byte);
9024           else
9025             move_gap_both (XINT (end), end_byte);
9026         }
9027       pos = XINT (start);
9028     }
9029
9030   list = Qnil;
9031   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
9032     {
9033       elt = XCAR (tail);
9034       attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
9035       ASET (attrs, coding_attr_trans_tbl,
9036             get_translation_table (attrs, 1, NULL));
9037       list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
9038     }
9039
9040   if (STRINGP (start))
9041     p = pbeg = SDATA (start);
9042   else
9043     p = pbeg = BYTE_POS_ADDR (start_byte);
9044   pend = p + (end_byte - start_byte);
9045
9046   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
9047   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
9048
9049   while (p < pend)
9050     {
9051       if (ASCII_BYTE_P (*p))
9052         p++;
9053       else
9054         {
9055           c = STRING_CHAR_ADVANCE (p);
9056
9057           charset_map_loaded = 0;
9058           for (tail = list; CONSP (tail); tail = XCDR (tail))
9059             {
9060               elt = XCDR (XCAR (tail));
9061               if (! char_encodable_p (c, XCAR (elt)))
9062                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
9063             }
9064           if (charset_map_loaded)
9065             {
9066               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
9067
9068               if (STRINGP (start))
9069                 pbeg = SDATA (start);
9070               else
9071                 pbeg = BYTE_POS_ADDR (start_byte);
9072               p = pbeg + p_offset;
9073               pend = pbeg + pend_offset;
9074             }
9075         }
9076       pos++;
9077     }
9078
9079   tail = list;
9080   list = Qnil;
9081   for (; CONSP (tail); tail = XCDR (tail))
9082     {
9083       elt = XCAR (tail);
9084       if (CONSP (XCDR (XCDR (elt))))
9085         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
9086                       list);
9087     }
9088
9089   return list;
9090 }
9091
9092
9093 Lisp_Object
9094 code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
9095      Lisp_Object start, end, coding_system, dst_object;
9096      int encodep, norecord;
9097 {
9098   struct coding_system coding;
9099   EMACS_INT from, from_byte, to, to_byte;
9100   Lisp_Object src_object;
9101
9102   CHECK_NUMBER_COERCE_MARKER (start);
9103   CHECK_NUMBER_COERCE_MARKER (end);
9104   if (NILP (coding_system))
9105     coding_system = Qno_conversion;
9106   else
9107     CHECK_CODING_SYSTEM (coding_system);
9108   src_object = Fcurrent_buffer ();
9109   if (NILP (dst_object))
9110     dst_object = src_object;
9111   else if (! EQ (dst_object, Qt))
9112     CHECK_BUFFER (dst_object);
9113
9114   validate_region (&start, &end);
9115   from = XFASTINT (start);
9116   from_byte = CHAR_TO_BYTE (from);
9117   to = XFASTINT (end);
9118   to_byte = CHAR_TO_BYTE (to);
9119
9120   setup_coding_system (coding_system, &coding);
9121   coding.mode |= CODING_MODE_LAST_BLOCK;
9122
9123   if (encodep)
9124     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
9125                           dst_object);
9126   else
9127     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
9128                           dst_object);
9129   if (! norecord)
9130     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
9131
9132   return (BUFFERP (dst_object)
9133           ? make_number (coding.produced_char)
9134           : coding.dst_object);
9135 }
9136
9137
9138 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
9139        3, 4, "r\nzCoding system: ",
9140        doc: /* Decode the current region from the specified coding system.
9141 When called from a program, takes four arguments:
9142         START, END, CODING-SYSTEM, and DESTINATION.
9143 START and END are buffer positions.
9144
9145 Optional 4th arguments DESTINATION specifies where the decoded text goes.
9146 If nil, the region between START and END is replaced by the decoded text.
9147 If buffer, the decoded text is inserted in that buffer after point (point
9148 does not move).
9149 In those cases, the length of the decoded text is returned.
9150 If DESTINATION is t, the decoded text is returned.
9151
9152 This function sets `last-coding-system-used' to the precise coding system
9153 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9154 not fully specified.)  */)
9155      (start, end, coding_system, destination)
9156      Lisp_Object start, end, coding_system, destination;
9157 {
9158   return code_convert_region (start, end, coding_system, destination, 0, 0);
9159 }
9160
9161 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
9162        3, 4, "r\nzCoding system: ",
9163        doc: /* Encode the current region by specified coding system.
9164 When called from a program, takes four arguments:
9165         START, END, CODING-SYSTEM and DESTINATION.
9166 START and END are buffer positions.
9167
9168 Optional 4th arguments DESTINATION specifies where the encoded text goes.
9169 If nil, the region between START and END is replace by the encoded text.
9170 If buffer, the encoded text is inserted in that buffer after point (point
9171 does not move).
9172 In those cases, the length of the encoded text is returned.
9173 If DESTINATION is t, the encoded text is returned.
9174
9175 This function sets `last-coding-system-used' to the precise coding system
9176 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9177 not fully specified.)  */)
9178   (start, end, coding_system, destination)
9179      Lisp_Object start, end, coding_system, destination;
9180 {
9181   return code_convert_region (start, end, coding_system, destination, 1, 0);
9182 }
9183
9184 Lisp_Object
9185 code_convert_string (string, coding_system, dst_object,
9186                      encodep, nocopy, norecord)
9187      Lisp_Object string, coding_system, dst_object;
9188      int encodep, nocopy, norecord;
9189 {
9190   struct coding_system coding;
9191   EMACS_INT chars, bytes;
9192
9193   CHECK_STRING (string);
9194   if (NILP (coding_system))
9195     {
9196       if (! norecord)
9197         Vlast_coding_system_used = Qno_conversion;
9198       if (NILP (dst_object))
9199         return (nocopy ? Fcopy_sequence (string) : string);
9200     }
9201
9202   if (NILP (coding_system))
9203     coding_system = Qno_conversion;
9204   else
9205     CHECK_CODING_SYSTEM (coding_system);
9206   if (NILP (dst_object))
9207     dst_object = Qt;
9208   else if (! EQ (dst_object, Qt))
9209     CHECK_BUFFER (dst_object);
9210
9211   setup_coding_system (coding_system, &coding);
9212   coding.mode |= CODING_MODE_LAST_BLOCK;
9213   chars = SCHARS (string);
9214   bytes = SBYTES (string);
9215   if (encodep)
9216     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9217   else
9218     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9219   if (! norecord)
9220     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
9221
9222   return (BUFFERP (dst_object)
9223           ? make_number (coding.produced_char)
9224           : coding.dst_object);
9225 }
9226
9227
9228 /* Encode or decode STRING according to CODING_SYSTEM.
9229    Do not set Vlast_coding_system_used.
9230
9231    This function is called only from macros DECODE_FILE and
9232    ENCODE_FILE, thus we ignore character composition.  */
9233
9234 Lisp_Object
9235 code_convert_string_norecord (string, coding_system, encodep)
9236      Lisp_Object string, coding_system;
9237      int encodep;
9238 {
9239   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
9240 }
9241
9242
9243 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
9244        2, 4, 0,
9245        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
9246
9247 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
9248 if the decoding operation is trivial.
9249
9250 Optional fourth arg BUFFER non-nil means that the decoded text is
9251 inserted in that buffer after point (point does not move).  In this
9252 case, the return value is the length of the decoded text.
9253
9254 This function sets `last-coding-system-used' to the precise coding system
9255 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9256 not fully specified.)  */)
9257   (string, coding_system, nocopy, buffer)
9258      Lisp_Object string, coding_system, nocopy, buffer;
9259 {
9260   return code_convert_string (string, coding_system, buffer,
9261                               0, ! NILP (nocopy), 0);
9262 }
9263
9264 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
9265        2, 4, 0,
9266        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
9267
9268 Optional third arg NOCOPY non-nil means it is OK to return STRING
9269 itself if the encoding operation is trivial.
9270
9271 Optional fourth arg BUFFER non-nil means that the encoded text is
9272 inserted in that buffer after point (point does not move).  In this
9273 case, the return value is the length of the encoded text.
9274
9275 This function sets `last-coding-system-used' to the precise coding system
9276 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9277 not fully specified.)  */)
9278      (string, coding_system, nocopy, buffer)
9279      Lisp_Object string, coding_system, nocopy, buffer;
9280 {
9281   return code_convert_string (string, coding_system, buffer,
9282                               1, ! NILP (nocopy), 1);
9283 }
9284
9285 \f
9286 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
9287        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
9288 Return the corresponding character.  */)
9289      (code)
9290      Lisp_Object code;
9291 {
9292   Lisp_Object spec, attrs, val;
9293   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
9294   int c;
9295
9296   CHECK_NATNUM (code);
9297   c = XFASTINT (code);
9298   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9299   attrs = AREF (spec, 0);
9300
9301   if (ASCII_BYTE_P (c)
9302       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9303     return code;
9304
9305   val = CODING_ATTR_CHARSET_LIST (attrs);
9306   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9307   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9308   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
9309
9310   if (c <= 0x7F)
9311     charset = charset_roman;
9312   else if (c >= 0xA0 && c < 0xDF)
9313     {
9314       charset = charset_kana;
9315       c -= 0x80;
9316     }
9317   else
9318     {
9319       int s1 = c >> 8, s2 = c & 0xFF;
9320
9321       if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
9322           || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
9323         error ("Invalid code: %d", code);
9324       SJIS_TO_JIS (c);
9325       charset = charset_kanji;
9326     }
9327   c = DECODE_CHAR (charset, c);
9328   if (c < 0)
9329     error ("Invalid code: %d", code);
9330   return make_number (c);
9331 }
9332
9333
9334 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
9335        doc: /* Encode a Japanese character CH to shift_jis encoding.
9336 Return the corresponding code in SJIS.  */)
9337      (ch)
9338     Lisp_Object ch;
9339 {
9340   Lisp_Object spec, attrs, charset_list;
9341   int c;
9342   struct charset *charset;
9343   unsigned code;
9344
9345   CHECK_CHARACTER (ch);
9346   c = XFASTINT (ch);
9347   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9348   attrs = AREF (spec, 0);
9349
9350   if (ASCII_CHAR_P (c)
9351       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9352     return ch;
9353
9354   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9355   charset = char_charset (c, charset_list, &code);
9356   if (code == CHARSET_INVALID_CODE (charset))
9357     error ("Can't encode by shift_jis encoding: %d", c);
9358   JIS_TO_SJIS (code);
9359
9360   return make_number (code);
9361 }
9362
9363 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
9364        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
9365 Return the corresponding character.  */)
9366      (code)
9367      Lisp_Object code;
9368 {
9369   Lisp_Object spec, attrs, val;
9370   struct charset *charset_roman, *charset_big5, *charset;
9371   int c;
9372
9373   CHECK_NATNUM (code);
9374   c = XFASTINT (code);
9375   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9376   attrs = AREF (spec, 0);
9377
9378   if (ASCII_BYTE_P (c)
9379       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9380     return code;
9381
9382   val = CODING_ATTR_CHARSET_LIST (attrs);
9383   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9384   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
9385
9386   if (c <= 0x7F)
9387     charset = charset_roman;
9388   else
9389     {
9390       int b1 = c >> 8, b2 = c & 0x7F;
9391       if (b1 < 0xA1 || b1 > 0xFE
9392           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
9393         error ("Invalid code: %d", code);
9394       charset = charset_big5;
9395     }
9396   c = DECODE_CHAR (charset, (unsigned )c);
9397   if (c < 0)
9398     error ("Invalid code: %d", code);
9399   return make_number (c);
9400 }
9401
9402 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
9403        doc: /* Encode the Big5 character CH to BIG5 coding system.
9404 Return the corresponding character code in Big5.  */)
9405      (ch)
9406      Lisp_Object ch;
9407 {
9408   Lisp_Object spec, attrs, charset_list;
9409   struct charset *charset;
9410   int c;
9411   unsigned code;
9412
9413   CHECK_CHARACTER (ch);
9414   c = XFASTINT (ch);
9415   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9416   attrs = AREF (spec, 0);
9417   if (ASCII_CHAR_P (c)
9418       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9419     return ch;
9420
9421   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9422   charset = char_charset (c, charset_list, &code);
9423   if (code == CHARSET_INVALID_CODE (charset))
9424     error ("Can't encode by Big5 encoding: %d", c);
9425
9426   return make_number (code);
9427 }
9428
9429 \f
9430 DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
9431        Sset_terminal_coding_system_internal, 1, 2, 0,
9432        doc: /* Internal use only.  */)
9433      (coding_system, terminal)
9434      Lisp_Object coding_system;
9435      Lisp_Object terminal;
9436 {
9437   struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
9438   CHECK_SYMBOL (coding_system);
9439   setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
9440   /* We had better not send unsafe characters to terminal.  */
9441   terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
9442   /* Characer composition should be disabled.  */
9443   terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9444   terminal_coding->src_multibyte = 1;
9445   terminal_coding->dst_multibyte = 0;
9446   return Qnil;
9447 }
9448
9449 DEFUN ("set-safe-terminal-coding-system-internal",
9450        Fset_safe_terminal_coding_system_internal,
9451        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
9452        doc: /* Internal use only.  */)
9453      (coding_system)
9454      Lisp_Object coding_system;
9455 {
9456   CHECK_SYMBOL (coding_system);
9457   setup_coding_system (Fcheck_coding_system (coding_system),
9458                        &safe_terminal_coding);
9459   /* Characer composition should be disabled.  */
9460   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9461   safe_terminal_coding.src_multibyte = 1;
9462   safe_terminal_coding.dst_multibyte = 0;
9463   return Qnil;
9464 }
9465
9466 DEFUN ("terminal-coding-system", Fterminal_coding_system,
9467        Sterminal_coding_system, 0, 1, 0,
9468        doc: /* Return coding system specified for terminal output on the given terminal.
9469 TERMINAL may be a terminal object, a frame, or nil for the selected
9470 frame's terminal device.  */)
9471      (terminal)
9472      Lisp_Object terminal;
9473 {
9474   struct coding_system *terminal_coding
9475     = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
9476   Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
9477
9478   /* For backward compatibility, return nil if it is `undecided'. */
9479   return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
9480 }
9481
9482 DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
9483        Sset_keyboard_coding_system_internal, 1, 2, 0,
9484        doc: /* Internal use only.  */)
9485      (coding_system, terminal)
9486      Lisp_Object coding_system;
9487      Lisp_Object terminal;
9488 {
9489   struct terminal *t = get_terminal (terminal, 1);
9490   CHECK_SYMBOL (coding_system);
9491   if (NILP (coding_system))
9492     coding_system = Qno_conversion;
9493   else
9494     Fcheck_coding_system (coding_system);
9495   setup_coding_system (coding_system, TERMINAL_KEYBOARD_CODING (t));
9496   /* Characer composition should be disabled.  */
9497   TERMINAL_KEYBOARD_CODING (t)->common_flags
9498     &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9499   return Qnil;
9500 }
9501
9502 DEFUN ("keyboard-coding-system",
9503        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 1, 0,
9504        doc: /* Return coding system specified for decoding keyboard input.  */)
9505      (terminal)
9506      Lisp_Object terminal;
9507 {
9508   return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING
9509                          (get_terminal (terminal, 1))->id);
9510 }
9511
9512 \f
9513 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
9514        Sfind_operation_coding_system,  1, MANY, 0,
9515        doc: /* Choose a coding system for an operation based on the target name.
9516 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
9517 DECODING-SYSTEM is the coding system to use for decoding
9518 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
9519 for encoding (in case OPERATION does encoding).
9520
9521 The first argument OPERATION specifies an I/O primitive:
9522   For file I/O, `insert-file-contents' or `write-region'.
9523   For process I/O, `call-process', `call-process-region', or `start-process'.
9524   For network I/O, `open-network-stream'.
9525
9526 The remaining arguments should be the same arguments that were passed
9527 to the primitive.  Depending on which primitive, one of those arguments
9528 is selected as the TARGET.  For example, if OPERATION does file I/O,
9529 whichever argument specifies the file name is TARGET.
9530
9531 TARGET has a meaning which depends on OPERATION:
9532   For file I/O, TARGET is a file name (except for the special case below).
9533   For process I/O, TARGET is a process name.
9534   For network I/O, TARGET is a service name or a port number.
9535
9536 This function looks up what is specified for TARGET in
9537 `file-coding-system-alist', `process-coding-system-alist',
9538 or `network-coding-system-alist' depending on OPERATION.
9539 They may specify a coding system, a cons of coding systems,
9540 or a function symbol to call.
9541 In the last case, we call the function with one argument,
9542 which is a list of all the arguments given to this function.
9543 If the function can't decide a coding system, it can return
9544 `undecided' so that the normal code-detection is performed.
9545
9546 If OPERATION is `insert-file-contents', the argument corresponding to
9547 TARGET may be a cons (FILENAME . BUFFER).  In that case, FILENAME is a
9548 file name to look up, and BUFFER is a buffer that contains the file's
9549 contents (not yet decoded).  If `file-coding-system-alist' specifies a
9550 function to call for FILENAME, that function should examine the
9551 contents of BUFFER instead of reading the file.
9552
9553 usage: (find-operation-coding-system OPERATION ARGUMENTS...)  */)
9554      (nargs, args)
9555      int nargs;
9556      Lisp_Object *args;
9557 {
9558   Lisp_Object operation, target_idx, target, val;
9559   register Lisp_Object chain;
9560
9561   if (nargs < 2)
9562     error ("Too few arguments");
9563   operation = args[0];
9564   if (!SYMBOLP (operation)
9565       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
9566     error ("Invalid first argument");
9567   if (nargs < 1 + XINT (target_idx))
9568     error ("Too few arguments for operation: %s",
9569            SDATA (SYMBOL_NAME (operation)));
9570   target = args[XINT (target_idx) + 1];
9571   if (!(STRINGP (target)
9572         || (EQ (operation, Qinsert_file_contents) && CONSP (target)
9573             && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
9574         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
9575     error ("Invalid %dth argument", XINT (target_idx) + 1);
9576   if (CONSP (target))
9577     target = XCAR (target);
9578
9579   chain = ((EQ (operation, Qinsert_file_contents)
9580             || EQ (operation, Qwrite_region))
9581            ? Vfile_coding_system_alist
9582            : (EQ (operation, Qopen_network_stream)
9583               ? Vnetwork_coding_system_alist
9584               : Vprocess_coding_system_alist));
9585   if (NILP (chain))
9586     return Qnil;
9587
9588   for (; CONSP (chain); chain = XCDR (chain))
9589     {
9590       Lisp_Object elt;
9591
9592       elt = XCAR (chain);
9593       if (CONSP (elt)
9594           && ((STRINGP (target)
9595                && STRINGP (XCAR (elt))
9596                && fast_string_match (XCAR (elt), target) >= 0)
9597               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
9598         {
9599           val = XCDR (elt);
9600           /* Here, if VAL is both a valid coding system and a valid
9601              function symbol, we return VAL as a coding system.  */
9602           if (CONSP (val))
9603             return val;
9604           if (! SYMBOLP (val))
9605             return Qnil;
9606           if (! NILP (Fcoding_system_p (val)))
9607             return Fcons (val, val);
9608           if (! NILP (Ffboundp (val)))
9609             {
9610               /* We use call1 rather than safe_call1
9611                  so as to get bug reports about functions called here
9612                  which don't handle the current interface.  */
9613               val = call1 (val, Flist (nargs, args));
9614               if (CONSP (val))
9615                 return val;
9616               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
9617                 return Fcons (val, val);
9618             }
9619           return Qnil;
9620         }
9621     }
9622   return Qnil;
9623 }
9624
9625 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
9626        Sset_coding_system_priority, 0, MANY, 0,
9627        doc: /* Assign higher priority to the coding systems given as arguments.
9628 If multiple coding systems belong to the same category,
9629 all but the first one are ignored.
9630
9631 usage: (set-coding-system-priority &rest coding-systems)  */)
9632      (nargs, args)
9633      int nargs;
9634      Lisp_Object *args;
9635 {
9636   int i, j;
9637   int changed[coding_category_max];
9638   enum coding_category priorities[coding_category_max];
9639
9640   bzero (changed, sizeof changed);
9641
9642   for (i = j = 0; i < nargs; i++)
9643     {
9644       enum coding_category category;
9645       Lisp_Object spec, attrs;
9646
9647       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
9648       attrs = AREF (spec, 0);
9649       category = XINT (CODING_ATTR_CATEGORY (attrs));
9650       if (changed[category])
9651         /* Ignore this coding system because a coding system of the
9652            same category already had a higher priority.  */
9653         continue;
9654       changed[category] = 1;
9655       priorities[j++] = category;
9656       if (coding_categories[category].id >= 0
9657           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
9658         setup_coding_system (args[i], &coding_categories[category]);
9659       Fset (AREF (Vcoding_category_table, category), args[i]);
9660     }
9661
9662   /* Now we have decided top J priorities.  Reflect the order of the
9663      original priorities to the remaining priorities.  */
9664
9665   for (i = j, j = 0; i < coding_category_max; i++, j++)
9666     {
9667       while (j < coding_category_max
9668              && changed[coding_priorities[j]])
9669         j++;
9670       if (j == coding_category_max)
9671         abort ();
9672       priorities[i] = coding_priorities[j];
9673     }
9674
9675   bcopy (priorities, coding_priorities, sizeof priorities);
9676
9677   /* Update `coding-category-list'.  */
9678   Vcoding_category_list = Qnil;
9679   for (i = coding_category_max - 1; i >= 0; i--)
9680     Vcoding_category_list
9681       = Fcons (AREF (Vcoding_category_table, priorities[i]),
9682                Vcoding_category_list);
9683
9684   return Qnil;
9685 }
9686
9687 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
9688        Scoding_system_priority_list, 0, 1, 0,
9689        doc: /* Return a list of coding systems ordered by their priorities.
9690 The list contains a subset of coding systems; i.e. coding systems
9691 assigned to each coding category (see `coding-category-list').
9692
9693 HIGHESTP non-nil means just return the highest priority one.  */)
9694      (highestp)
9695      Lisp_Object highestp;
9696 {
9697   int i;
9698   Lisp_Object val;
9699
9700   for (i = 0, val = Qnil; i < coding_category_max; i++)
9701     {
9702       enum coding_category category = coding_priorities[i];
9703       int id = coding_categories[category].id;
9704       Lisp_Object attrs;
9705
9706       if (id < 0)
9707         continue;
9708       attrs = CODING_ID_ATTRS (id);
9709       if (! NILP (highestp))
9710         return CODING_ATTR_BASE_NAME (attrs);
9711       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
9712     }
9713   return Fnreverse (val);
9714 }
9715
9716 static const char *const suffixes[] = { "-unix", "-dos", "-mac" };
9717
9718 static Lisp_Object
9719 make_subsidiaries (base)
9720      Lisp_Object base;
9721 {
9722   Lisp_Object subsidiaries;
9723   int base_name_len = SBYTES (SYMBOL_NAME (base));
9724   char *buf = (char *) alloca (base_name_len + 6);
9725   int i;
9726
9727   bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
9728   subsidiaries = Fmake_vector (make_number (3), Qnil);
9729   for (i = 0; i < 3; i++)
9730     {
9731       bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
9732       ASET (subsidiaries, i, intern (buf));
9733     }
9734   return subsidiaries;
9735 }
9736
9737
9738 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
9739        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
9740        doc: /* For internal use only.
9741 usage: (define-coding-system-internal ...)  */)
9742      (nargs, args)
9743      int nargs;
9744      Lisp_Object *args;
9745 {
9746   Lisp_Object name;
9747   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
9748   Lisp_Object attrs;            /* Vector of attributes.  */
9749   Lisp_Object eol_type;
9750   Lisp_Object aliases;
9751   Lisp_Object coding_type, charset_list, safe_charsets;
9752   enum coding_category category;
9753   Lisp_Object tail, val;
9754   int max_charset_id = 0;
9755   int i;
9756
9757   if (nargs < coding_arg_max)
9758     goto short_args;
9759
9760   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
9761
9762   name = args[coding_arg_name];
9763   CHECK_SYMBOL (name);
9764   CODING_ATTR_BASE_NAME (attrs) = name;
9765
9766   val = args[coding_arg_mnemonic];
9767   if (! STRINGP (val))
9768     CHECK_CHARACTER (val);
9769   CODING_ATTR_MNEMONIC (attrs) = val;
9770
9771   coding_type = args[coding_arg_coding_type];
9772   CHECK_SYMBOL (coding_type);
9773   CODING_ATTR_TYPE (attrs) = coding_type;
9774
9775   charset_list = args[coding_arg_charset_list];
9776   if (SYMBOLP (charset_list))
9777     {
9778       if (EQ (charset_list, Qiso_2022))
9779         {
9780           if (! EQ (coding_type, Qiso_2022))
9781             error ("Invalid charset-list");
9782           charset_list = Viso_2022_charset_list;
9783         }
9784       else if (EQ (charset_list, Qemacs_mule))
9785         {
9786           if (! EQ (coding_type, Qemacs_mule))
9787             error ("Invalid charset-list");
9788           charset_list = Vemacs_mule_charset_list;
9789         }
9790       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9791         if (max_charset_id < XFASTINT (XCAR (tail)))
9792           max_charset_id = XFASTINT (XCAR (tail));
9793     }
9794   else
9795     {
9796       charset_list = Fcopy_sequence (charset_list);
9797       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9798         {
9799           struct charset *charset;
9800
9801           val = XCAR (tail);
9802           CHECK_CHARSET_GET_CHARSET (val, charset);
9803           if (EQ (coding_type, Qiso_2022)
9804               ? CHARSET_ISO_FINAL (charset) < 0
9805               : EQ (coding_type, Qemacs_mule)
9806               ? CHARSET_EMACS_MULE_ID (charset) < 0
9807               : 0)
9808             error ("Can't handle charset `%s'",
9809                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9810
9811           XSETCAR (tail, make_number (charset->id));
9812           if (max_charset_id < charset->id)
9813             max_charset_id = charset->id;
9814         }
9815     }
9816   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
9817
9818   safe_charsets = make_uninit_string (max_charset_id + 1);
9819   memset (SDATA (safe_charsets), 255, max_charset_id + 1);
9820   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9821     SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
9822   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
9823
9824   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
9825
9826   val = args[coding_arg_decode_translation_table];
9827   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9828     CHECK_SYMBOL (val);
9829   CODING_ATTR_DECODE_TBL (attrs) = val;
9830
9831   val = args[coding_arg_encode_translation_table];
9832   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9833     CHECK_SYMBOL (val);
9834   CODING_ATTR_ENCODE_TBL (attrs) = val;
9835
9836   val = args[coding_arg_post_read_conversion];
9837   CHECK_SYMBOL (val);
9838   CODING_ATTR_POST_READ (attrs) = val;
9839
9840   val = args[coding_arg_pre_write_conversion];
9841   CHECK_SYMBOL (val);
9842   CODING_ATTR_PRE_WRITE (attrs) = val;
9843
9844   val = args[coding_arg_default_char];
9845   if (NILP (val))
9846     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
9847   else
9848     {
9849       CHECK_CHARACTER (val);
9850       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9851     }
9852
9853   val = args[coding_arg_for_unibyte];
9854   CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
9855
9856   val = args[coding_arg_plist];
9857   CHECK_LIST (val);
9858   CODING_ATTR_PLIST (attrs) = val;
9859
9860   if (EQ (coding_type, Qcharset))
9861     {
9862       /* Generate a lisp vector of 256 elements.  Each element is nil,
9863          integer, or a list of charset IDs.
9864
9865          If Nth element is nil, the byte code N is invalid in this
9866          coding system.
9867
9868          If Nth element is a number NUM, N is the first byte of a
9869          charset whose ID is NUM.
9870
9871          If Nth element is a list of charset IDs, N is the first byte
9872          of one of them.  The list is sorted by dimensions of the
9873          charsets.  A charset of smaller dimension comes firtst. */
9874       val = Fmake_vector (make_number (256), Qnil);
9875
9876       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9877         {
9878           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
9879           int dim = CHARSET_DIMENSION (charset);
9880           int idx = (dim - 1) * 4;
9881
9882           if (CHARSET_ASCII_COMPATIBLE_P (charset))
9883             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9884
9885           for (i = charset->code_space[idx];
9886                i <= charset->code_space[idx + 1]; i++)
9887             {
9888               Lisp_Object tmp, tmp2;
9889               int dim2;
9890
9891               tmp = AREF (val, i);
9892               if (NILP (tmp))
9893                 tmp = XCAR (tail);
9894               else if (NUMBERP (tmp))
9895                 {
9896                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
9897                   if (dim < dim2)
9898                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
9899                   else
9900                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
9901                 }
9902               else
9903                 {
9904                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
9905                     {
9906                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
9907                       if (dim < dim2)
9908                         break;
9909                     }
9910                   if (NILP (tmp2))
9911                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
9912                   else
9913                     {
9914                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
9915                       XSETCAR (tmp2, XCAR (tail));
9916                     }
9917                 }
9918               ASET (val, i, tmp);
9919             }
9920         }
9921       ASET (attrs, coding_attr_charset_valids, val);
9922       category = coding_category_charset;
9923     }
9924   else if (EQ (coding_type, Qccl))
9925     {
9926       Lisp_Object valids;
9927
9928       if (nargs < coding_arg_ccl_max)
9929         goto short_args;
9930
9931       val = args[coding_arg_ccl_decoder];
9932       CHECK_CCL_PROGRAM (val);
9933       if (VECTORP (val))
9934         val = Fcopy_sequence (val);
9935       ASET (attrs, coding_attr_ccl_decoder, val);
9936
9937       val = args[coding_arg_ccl_encoder];
9938       CHECK_CCL_PROGRAM (val);
9939       if (VECTORP (val))
9940         val = Fcopy_sequence (val);
9941       ASET (attrs, coding_attr_ccl_encoder, val);
9942
9943       val = args[coding_arg_ccl_valids];
9944       valids = Fmake_string (make_number (256), make_number (0));
9945       for (tail = val; !NILP (tail); tail = Fcdr (tail))
9946         {
9947           int from, to;
9948
9949           val = Fcar (tail);
9950           if (INTEGERP (val))
9951             {
9952               from = to = XINT (val);
9953               if (from < 0 || from > 255)
9954                 args_out_of_range_3 (val, make_number (0), make_number (255));
9955             }
9956           else
9957             {
9958               CHECK_CONS (val);
9959               CHECK_NATNUM_CAR (val);
9960               CHECK_NATNUM_CDR (val);
9961               from = XINT (XCAR (val));
9962               if (from > 255)
9963                 args_out_of_range_3 (XCAR (val),
9964                                      make_number (0), make_number (255));
9965               to = XINT (XCDR (val));
9966               if (to < from || to > 255)
9967                 args_out_of_range_3 (XCDR (val),
9968                                      XCAR (val), make_number (255));
9969             }
9970           for (i = from; i <= to; i++)
9971             SSET (valids, i, 1);
9972         }
9973       ASET (attrs, coding_attr_ccl_valids, valids);
9974
9975       category = coding_category_ccl;
9976     }
9977   else if (EQ (coding_type, Qutf_16))
9978     {
9979       Lisp_Object bom, endian;
9980
9981       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9982
9983       if (nargs < coding_arg_utf16_max)
9984         goto short_args;
9985
9986       bom = args[coding_arg_utf16_bom];
9987       if (! NILP (bom) && ! EQ (bom, Qt))
9988         {
9989           CHECK_CONS (bom);
9990           val = XCAR (bom);
9991           CHECK_CODING_SYSTEM (val);
9992           val = XCDR (bom);
9993           CHECK_CODING_SYSTEM (val);
9994         }
9995       ASET (attrs, coding_attr_utf_bom, bom);
9996
9997       endian = args[coding_arg_utf16_endian];
9998       CHECK_SYMBOL (endian);
9999       if (NILP (endian))
10000         endian = Qbig;
10001       else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
10002         error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
10003       ASET (attrs, coding_attr_utf_16_endian, endian);
10004
10005       category = (CONSP (bom)
10006                   ? coding_category_utf_16_auto
10007                   : NILP (bom)
10008                   ? (EQ (endian, Qbig)
10009                      ? coding_category_utf_16_be_nosig
10010                      : coding_category_utf_16_le_nosig)
10011                   : (EQ (endian, Qbig)
10012                      ? coding_category_utf_16_be
10013                      : coding_category_utf_16_le));
10014     }
10015   else if (EQ (coding_type, Qiso_2022))
10016     {
10017       Lisp_Object initial, reg_usage, request, flags;
10018       int i;
10019
10020       if (nargs < coding_arg_iso2022_max)
10021         goto short_args;
10022
10023       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
10024       CHECK_VECTOR (initial);
10025       for (i = 0; i < 4; i++)
10026         {
10027           val = Faref (initial, make_number (i));
10028           if (! NILP (val))
10029             {
10030               struct charset *charset;
10031
10032               CHECK_CHARSET_GET_CHARSET (val, charset);
10033               ASET (initial, i, make_number (CHARSET_ID (charset)));
10034               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
10035                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
10036             }
10037           else
10038             ASET (initial, i, make_number (-1));
10039         }
10040
10041       reg_usage = args[coding_arg_iso2022_reg_usage];
10042       CHECK_CONS (reg_usage);
10043       CHECK_NUMBER_CAR (reg_usage);
10044       CHECK_NUMBER_CDR (reg_usage);
10045
10046       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
10047       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
10048         {
10049           int id;
10050           Lisp_Object tmp;
10051
10052           val = Fcar (tail);
10053           CHECK_CONS (val);
10054           tmp = XCAR (val);
10055           CHECK_CHARSET_GET_ID (tmp, id);
10056           CHECK_NATNUM_CDR (val);
10057           if (XINT (XCDR (val)) >= 4)
10058             error ("Invalid graphic register number: %d", XINT (XCDR (val)));
10059           XSETCAR (val, make_number (id));
10060         }
10061
10062       flags = args[coding_arg_iso2022_flags];
10063       CHECK_NATNUM (flags);
10064       i = XINT (flags);
10065       if (EQ (args[coding_arg_charset_list], Qiso_2022))
10066         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
10067
10068       ASET (attrs, coding_attr_iso_initial, initial);
10069       ASET (attrs, coding_attr_iso_usage, reg_usage);
10070       ASET (attrs, coding_attr_iso_request, request);
10071       ASET (attrs, coding_attr_iso_flags, flags);
10072       setup_iso_safe_charsets (attrs);
10073
10074       if (i & CODING_ISO_FLAG_SEVEN_BITS)
10075         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
10076                           | CODING_ISO_FLAG_SINGLE_SHIFT))
10077                     ? coding_category_iso_7_else
10078                     : EQ (args[coding_arg_charset_list], Qiso_2022)
10079                     ? coding_category_iso_7
10080                     : coding_category_iso_7_tight);
10081       else
10082         {
10083           int id = XINT (AREF (initial, 1));
10084
10085           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
10086                        || EQ (args[coding_arg_charset_list], Qiso_2022)
10087                        || id < 0)
10088                       ? coding_category_iso_8_else
10089                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
10090                       ? coding_category_iso_8_1
10091                       : coding_category_iso_8_2);
10092         }
10093       if (category != coding_category_iso_8_1
10094           && category != coding_category_iso_8_2)
10095         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
10096     }
10097   else if (EQ (coding_type, Qemacs_mule))
10098     {
10099       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
10100         ASET (attrs, coding_attr_emacs_mule_full, Qt);
10101       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
10102       category = coding_category_emacs_mule;
10103     }
10104   else if (EQ (coding_type, Qshift_jis))
10105     {
10106
10107       struct charset *charset;
10108
10109       if (XINT (Flength (charset_list)) != 3
10110           && XINT (Flength (charset_list)) != 4)
10111         error ("There should be three or four charsets");
10112
10113       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10114       if (CHARSET_DIMENSION (charset) != 1)
10115         error ("Dimension of charset %s is not one",
10116                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10117       if (CHARSET_ASCII_COMPATIBLE_P (charset))
10118         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
10119
10120       charset_list = XCDR (charset_list);
10121       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10122       if (CHARSET_DIMENSION (charset) != 1)
10123         error ("Dimension of charset %s is not one",
10124                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10125
10126       charset_list = XCDR (charset_list);
10127       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10128       if (CHARSET_DIMENSION (charset) != 2)
10129         error ("Dimension of charset %s is not two",
10130                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10131
10132       charset_list = XCDR (charset_list);
10133       if (! NILP (charset_list))
10134         {
10135           charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10136           if (CHARSET_DIMENSION (charset) != 2)
10137             error ("Dimension of charset %s is not two",
10138                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10139         }
10140
10141       category = coding_category_sjis;
10142       Vsjis_coding_system = name;
10143     }
10144   else if (EQ (coding_type, Qbig5))
10145     {
10146       struct charset *charset;
10147
10148       if (XINT (Flength (charset_list)) != 2)
10149         error ("There should be just two charsets");
10150
10151       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10152       if (CHARSET_DIMENSION (charset) != 1)
10153         error ("Dimension of charset %s is not one",
10154                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10155       if (CHARSET_ASCII_COMPATIBLE_P (charset))
10156         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
10157
10158       charset_list = XCDR (charset_list);
10159       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10160       if (CHARSET_DIMENSION (charset) != 2)
10161         error ("Dimension of charset %s is not two",
10162                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10163
10164       category = coding_category_big5;
10165       Vbig5_coding_system = name;
10166     }
10167   else if (EQ (coding_type, Qraw_text))
10168     {
10169       category = coding_category_raw_text;
10170       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
10171     }
10172   else if (EQ (coding_type, Qutf_8))
10173     {
10174       Lisp_Object bom;
10175
10176       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
10177
10178       if (nargs < coding_arg_utf8_max)
10179         goto short_args;
10180
10181       bom = args[coding_arg_utf8_bom];
10182       if (! NILP (bom) && ! EQ (bom, Qt))
10183         {
10184           CHECK_CONS (bom);
10185           val = XCAR (bom);
10186           CHECK_CODING_SYSTEM (val);
10187           val = XCDR (bom);
10188           CHECK_CODING_SYSTEM (val);
10189         }
10190       ASET (attrs, coding_attr_utf_bom, bom);
10191
10192       category = (CONSP (bom) ? coding_category_utf_8_auto
10193                   : NILP (bom) ? coding_category_utf_8_nosig
10194                   : coding_category_utf_8_sig);
10195     }
10196   else if (EQ (coding_type, Qundecided))
10197     category = coding_category_undecided;
10198   else
10199     error ("Invalid coding system type: %s",
10200            SDATA (SYMBOL_NAME (coding_type)));
10201
10202   CODING_ATTR_CATEGORY (attrs) = make_number (category);
10203   CODING_ATTR_PLIST (attrs)
10204     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
10205                                 CODING_ATTR_PLIST (attrs)));
10206   CODING_ATTR_PLIST (attrs)
10207     = Fcons (QCascii_compatible_p,
10208              Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
10209                     CODING_ATTR_PLIST (attrs)));
10210
10211   eol_type = args[coding_arg_eol_type];
10212   if (! NILP (eol_type)
10213       && ! EQ (eol_type, Qunix)
10214       && ! EQ (eol_type, Qdos)
10215       && ! EQ (eol_type, Qmac))
10216     error ("Invalid eol-type");
10217
10218   aliases = Fcons (name, Qnil);
10219
10220   if (NILP (eol_type))
10221     {
10222       eol_type = make_subsidiaries (name);
10223       for (i = 0; i < 3; i++)
10224         {
10225           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
10226
10227           this_name = AREF (eol_type, i);
10228           this_aliases = Fcons (this_name, Qnil);
10229           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
10230           this_spec = Fmake_vector (make_number (3), attrs);
10231           ASET (this_spec, 1, this_aliases);
10232           ASET (this_spec, 2, this_eol_type);
10233           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
10234           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
10235           val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist);
10236           if (NILP (val))
10237             Vcoding_system_alist
10238               = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
10239                        Vcoding_system_alist);
10240         }
10241     }
10242
10243   spec_vec = Fmake_vector (make_number (3), attrs);
10244   ASET (spec_vec, 1, aliases);
10245   ASET (spec_vec, 2, eol_type);
10246
10247   Fputhash (name, spec_vec, Vcoding_system_hash_table);
10248   Vcoding_system_list = Fcons (name, Vcoding_system_list);
10249   val = Fassoc (Fsymbol_name (name), Vcoding_system_alist);
10250   if (NILP (val))
10251     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
10252                                   Vcoding_system_alist);
10253
10254   {
10255     int id = coding_categories[category].id;
10256
10257     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
10258       setup_coding_system (name, &coding_categories[category]);
10259   }
10260
10261   return Qnil;
10262
10263  short_args:
10264   return Fsignal (Qwrong_number_of_arguments,
10265                   Fcons (intern ("define-coding-system-internal"),
10266                          make_number (nargs)));
10267 }
10268
10269
10270 DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
10271        3, 3, 0,
10272        doc: /* Change value in CODING-SYSTEM's property list PROP to VAL.  */)
10273   (coding_system, prop, val)
10274      Lisp_Object coding_system, prop, val;
10275 {
10276   Lisp_Object spec, attrs;
10277
10278   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10279   attrs = AREF (spec, 0);
10280   if (EQ (prop, QCmnemonic))
10281     {
10282       if (! STRINGP (val))
10283         CHECK_CHARACTER (val);
10284       CODING_ATTR_MNEMONIC (attrs) = val;
10285     }
10286   else if (EQ (prop, QCdefault_char))
10287     {
10288       if (NILP (val))
10289         val = make_number (' ');
10290       else
10291         CHECK_CHARACTER (val);
10292       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
10293     }
10294   else if (EQ (prop, QCdecode_translation_table))
10295     {
10296       if (! CHAR_TABLE_P (val) && ! CONSP (val))
10297         CHECK_SYMBOL (val);
10298       CODING_ATTR_DECODE_TBL (attrs) = val;
10299     }
10300   else if (EQ (prop, QCencode_translation_table))
10301     {
10302       if (! CHAR_TABLE_P (val) && ! CONSP (val))
10303         CHECK_SYMBOL (val);
10304       CODING_ATTR_ENCODE_TBL (attrs) = val;
10305     }
10306   else if (EQ (prop, QCpost_read_conversion))
10307     {
10308       CHECK_SYMBOL (val);
10309       CODING_ATTR_POST_READ (attrs) = val;
10310     }
10311   else if (EQ (prop, QCpre_write_conversion))
10312     {
10313       CHECK_SYMBOL (val);
10314       CODING_ATTR_PRE_WRITE (attrs) = val;
10315     }
10316   else if (EQ (prop, QCascii_compatible_p))
10317     {
10318       CODING_ATTR_ASCII_COMPAT (attrs) = val;
10319     }
10320
10321   CODING_ATTR_PLIST (attrs)
10322     = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
10323   return val;
10324 }
10325
10326
10327 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
10328        Sdefine_coding_system_alias, 2, 2, 0,
10329        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
10330      (alias, coding_system)
10331      Lisp_Object alias, coding_system;
10332 {
10333   Lisp_Object spec, aliases, eol_type, val;
10334
10335   CHECK_SYMBOL (alias);
10336   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10337   aliases = AREF (spec, 1);
10338   /* ALIASES should be a list of length more than zero, and the first
10339      element is a base coding system.  Append ALIAS at the tail of the
10340      list.  */
10341   while (!NILP (XCDR (aliases)))
10342     aliases = XCDR (aliases);
10343   XSETCDR (aliases, Fcons (alias, Qnil));
10344
10345   eol_type = AREF (spec, 2);
10346   if (VECTORP (eol_type))
10347     {
10348       Lisp_Object subsidiaries;
10349       int i;
10350
10351       subsidiaries = make_subsidiaries (alias);
10352       for (i = 0; i < 3; i++)
10353         Fdefine_coding_system_alias (AREF (subsidiaries, i),
10354                                      AREF (eol_type, i));
10355     }
10356
10357   Fputhash (alias, spec, Vcoding_system_hash_table);
10358   Vcoding_system_list = Fcons (alias, Vcoding_system_list);
10359   val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist);
10360   if (NILP (val))
10361     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
10362                                   Vcoding_system_alist);
10363
10364   return Qnil;
10365 }
10366
10367 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
10368        1, 1, 0,
10369        doc: /* Return the base of CODING-SYSTEM.
10370 Any alias or subsidiary coding system is not a base coding system.  */)
10371   (coding_system)
10372      Lisp_Object coding_system;
10373 {
10374   Lisp_Object spec, attrs;
10375
10376   if (NILP (coding_system))
10377     return (Qno_conversion);
10378   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10379   attrs = AREF (spec, 0);
10380   return CODING_ATTR_BASE_NAME (attrs);
10381 }
10382
10383 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
10384        1, 1, 0,
10385        doc: "Return the property list of CODING-SYSTEM.")
10386      (coding_system)
10387      Lisp_Object coding_system;
10388 {
10389   Lisp_Object spec, attrs;
10390
10391   if (NILP (coding_system))
10392     coding_system = Qno_conversion;
10393   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10394   attrs = AREF (spec, 0);
10395   return CODING_ATTR_PLIST (attrs);
10396 }
10397
10398
10399 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
10400        1, 1, 0,
10401        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
10402      (coding_system)
10403      Lisp_Object coding_system;
10404 {
10405   Lisp_Object spec;
10406
10407   if (NILP (coding_system))
10408     coding_system = Qno_conversion;
10409   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10410   return AREF (spec, 1);
10411 }
10412
10413 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
10414        Scoding_system_eol_type, 1, 1, 0,
10415        doc: /* Return eol-type of CODING-SYSTEM.
10416 An eol-type is an integer 0, 1, 2, or a vector of coding systems.
10417
10418 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
10419 and CR respectively.
10420
10421 A vector value indicates that a format of end-of-line should be
10422 detected automatically.  Nth element of the vector is the subsidiary
10423 coding system whose eol-type is N.  */)
10424      (coding_system)
10425      Lisp_Object coding_system;
10426 {
10427   Lisp_Object spec, eol_type;
10428   int n;
10429
10430   if (NILP (coding_system))
10431     coding_system = Qno_conversion;
10432   if (! CODING_SYSTEM_P (coding_system))
10433     return Qnil;
10434   spec = CODING_SYSTEM_SPEC (coding_system);
10435   eol_type = AREF (spec, 2);
10436   if (VECTORP (eol_type))
10437     return Fcopy_sequence (eol_type);
10438   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
10439   return make_number (n);
10440 }
10441
10442 #endif /* emacs */
10443
10444 \f
10445 /*** 9. Post-amble ***/
10446
10447 void
10448 init_coding_once ()
10449 {
10450   int i;
10451
10452   for (i = 0; i < coding_category_max; i++)
10453     {
10454       coding_categories[i].id = -1;
10455       coding_priorities[i] = i;
10456     }
10457
10458   /* ISO2022 specific initialize routine.  */
10459   for (i = 0; i < 0x20; i++)
10460     iso_code_class[i] = ISO_control_0;
10461   for (i = 0x21; i < 0x7F; i++)
10462     iso_code_class[i] = ISO_graphic_plane_0;
10463   for (i = 0x80; i < 0xA0; i++)
10464     iso_code_class[i] = ISO_control_1;
10465   for (i = 0xA1; i < 0xFF; i++)
10466     iso_code_class[i] = ISO_graphic_plane_1;
10467   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
10468   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
10469   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
10470   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
10471   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
10472   iso_code_class[ISO_CODE_ESC] = ISO_escape;
10473   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
10474   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
10475   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
10476
10477   for (i = 0; i < 256; i++)
10478     {
10479       emacs_mule_bytes[i] = 1;
10480     }
10481   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
10482   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
10483   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
10484   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
10485 }
10486
10487 #ifdef emacs
10488
10489 void
10490 syms_of_coding ()
10491 {
10492   staticpro (&Vcoding_system_hash_table);
10493   {
10494     Lisp_Object args[2];
10495     args[0] = QCtest;
10496     args[1] = Qeq;
10497     Vcoding_system_hash_table = Fmake_hash_table (2, args);
10498   }
10499
10500   staticpro (&Vsjis_coding_system);
10501   Vsjis_coding_system = Qnil;
10502
10503   staticpro (&Vbig5_coding_system);
10504   Vbig5_coding_system = Qnil;
10505
10506   staticpro (&Vcode_conversion_reused_workbuf);
10507   Vcode_conversion_reused_workbuf = Qnil;
10508
10509   staticpro (&Vcode_conversion_workbuf_name);
10510   Vcode_conversion_workbuf_name = make_pure_c_string (" *code-conversion-work*");
10511
10512   reused_workbuf_in_use = 0;
10513
10514   DEFSYM (Qcharset, "charset");
10515   DEFSYM (Qtarget_idx, "target-idx");
10516   DEFSYM (Qcoding_system_history, "coding-system-history");
10517   Fset (Qcoding_system_history, Qnil);
10518
10519   /* Target FILENAME is the first argument.  */
10520   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
10521   /* Target FILENAME is the third argument.  */
10522   Fput (Qwrite_region, Qtarget_idx, make_number (2));
10523
10524   DEFSYM (Qcall_process, "call-process");
10525   /* Target PROGRAM is the first argument.  */
10526   Fput (Qcall_process, Qtarget_idx, make_number (0));
10527
10528   DEFSYM (Qcall_process_region, "call-process-region");
10529   /* Target PROGRAM is the third argument.  */
10530   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
10531
10532   DEFSYM (Qstart_process, "start-process");
10533   /* Target PROGRAM is the third argument.  */
10534   Fput (Qstart_process, Qtarget_idx, make_number (2));
10535
10536   DEFSYM (Qopen_network_stream, "open-network-stream");
10537   /* Target SERVICE is the fourth argument.  */
10538   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
10539
10540   DEFSYM (Qcoding_system, "coding-system");
10541   DEFSYM (Qcoding_aliases, "coding-aliases");
10542
10543   DEFSYM (Qeol_type, "eol-type");
10544   DEFSYM (Qunix, "unix");
10545   DEFSYM (Qdos, "dos");
10546
10547   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
10548   DEFSYM (Qpost_read_conversion, "post-read-conversion");
10549   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
10550   DEFSYM (Qdefault_char, "default-char");
10551   DEFSYM (Qundecided, "undecided");
10552   DEFSYM (Qno_conversion, "no-conversion");
10553   DEFSYM (Qraw_text, "raw-text");
10554
10555   DEFSYM (Qiso_2022, "iso-2022");
10556
10557   DEFSYM (Qutf_8, "utf-8");
10558   DEFSYM (Qutf_8_emacs, "utf-8-emacs");
10559
10560   DEFSYM (Qutf_16, "utf-16");
10561   DEFSYM (Qbig, "big");
10562   DEFSYM (Qlittle, "little");
10563
10564   DEFSYM (Qshift_jis, "shift-jis");
10565   DEFSYM (Qbig5, "big5");
10566
10567   DEFSYM (Qcoding_system_p, "coding-system-p");
10568
10569   DEFSYM (Qcoding_system_error, "coding-system-error");
10570   Fput (Qcoding_system_error, Qerror_conditions,
10571         pure_cons (Qcoding_system_error, pure_cons (Qerror, Qnil)));
10572   Fput (Qcoding_system_error, Qerror_message,
10573         make_pure_c_string ("Invalid coding system"));
10574
10575   /* Intern this now in case it isn't already done.
10576      Setting this variable twice is harmless.
10577      But don't staticpro it here--that is done in alloc.c.  */
10578   Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
10579
10580   DEFSYM (Qtranslation_table, "translation-table");
10581   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
10582   DEFSYM (Qtranslation_table_id, "translation-table-id");
10583   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
10584   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
10585
10586   DEFSYM (Qvalid_codes, "valid-codes");
10587
10588   DEFSYM (Qemacs_mule, "emacs-mule");
10589
10590   DEFSYM (QCcategory, ":category");
10591   DEFSYM (QCmnemonic, ":mnemonic");
10592   DEFSYM (QCdefault_char, ":default-char");
10593   DEFSYM (QCdecode_translation_table, ":decode-translation-table");
10594   DEFSYM (QCencode_translation_table, ":encode-translation-table");
10595   DEFSYM (QCpost_read_conversion, ":post-read-conversion");
10596   DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
10597   DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
10598
10599   Vcoding_category_table
10600     = Fmake_vector (make_number (coding_category_max), Qnil);
10601   staticpro (&Vcoding_category_table);
10602   /* Followings are target of code detection.  */
10603   ASET (Vcoding_category_table, coding_category_iso_7,
10604         intern_c_string ("coding-category-iso-7"));
10605   ASET (Vcoding_category_table, coding_category_iso_7_tight,
10606         intern_c_string ("coding-category-iso-7-tight"));
10607   ASET (Vcoding_category_table, coding_category_iso_8_1,
10608         intern_c_string ("coding-category-iso-8-1"));
10609   ASET (Vcoding_category_table, coding_category_iso_8_2,
10610         intern_c_string ("coding-category-iso-8-2"));
10611   ASET (Vcoding_category_table, coding_category_iso_7_else,
10612         intern_c_string ("coding-category-iso-7-else"));
10613   ASET (Vcoding_category_table, coding_category_iso_8_else,
10614         intern_c_string ("coding-category-iso-8-else"));
10615   ASET (Vcoding_category_table, coding_category_utf_8_auto,
10616         intern_c_string ("coding-category-utf-8-auto"));
10617   ASET (Vcoding_category_table, coding_category_utf_8_nosig,
10618         intern_c_string ("coding-category-utf-8"));
10619   ASET (Vcoding_category_table, coding_category_utf_8_sig,
10620         intern_c_string ("coding-category-utf-8-sig"));
10621   ASET (Vcoding_category_table, coding_category_utf_16_be,
10622         intern_c_string ("coding-category-utf-16-be"));
10623   ASET (Vcoding_category_table, coding_category_utf_16_auto,
10624         intern_c_string ("coding-category-utf-16-auto"));
10625   ASET (Vcoding_category_table, coding_category_utf_16_le,
10626         intern_c_string ("coding-category-utf-16-le"));
10627   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
10628         intern_c_string ("coding-category-utf-16-be-nosig"));
10629   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
10630         intern_c_string ("coding-category-utf-16-le-nosig"));
10631   ASET (Vcoding_category_table, coding_category_charset,
10632         intern_c_string ("coding-category-charset"));
10633   ASET (Vcoding_category_table, coding_category_sjis,
10634         intern_c_string ("coding-category-sjis"));
10635   ASET (Vcoding_category_table, coding_category_big5,
10636         intern_c_string ("coding-category-big5"));
10637   ASET (Vcoding_category_table, coding_category_ccl,
10638         intern_c_string ("coding-category-ccl"));
10639   ASET (Vcoding_category_table, coding_category_emacs_mule,
10640         intern_c_string ("coding-category-emacs-mule"));
10641   /* Followings are NOT target of code detection.  */
10642   ASET (Vcoding_category_table, coding_category_raw_text,
10643         intern_c_string ("coding-category-raw-text"));
10644   ASET (Vcoding_category_table, coding_category_undecided,
10645         intern_c_string ("coding-category-undecided"));
10646
10647   DEFSYM (Qinsufficient_source, "insufficient-source");
10648   DEFSYM (Qinconsistent_eol, "inconsistent-eol");
10649   DEFSYM (Qinvalid_source, "invalid-source");
10650   DEFSYM (Qinterrupted, "interrupted");
10651   DEFSYM (Qinsufficient_memory, "insufficient-memory");
10652   DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
10653
10654   defsubr (&Scoding_system_p);
10655   defsubr (&Sread_coding_system);
10656   defsubr (&Sread_non_nil_coding_system);
10657   defsubr (&Scheck_coding_system);
10658   defsubr (&Sdetect_coding_region);
10659   defsubr (&Sdetect_coding_string);
10660   defsubr (&Sfind_coding_systems_region_internal);
10661   defsubr (&Sunencodable_char_position);
10662   defsubr (&Scheck_coding_systems_region);
10663   defsubr (&Sdecode_coding_region);
10664   defsubr (&Sencode_coding_region);
10665   defsubr (&Sdecode_coding_string);
10666   defsubr (&Sencode_coding_string);
10667   defsubr (&Sdecode_sjis_char);
10668   defsubr (&Sencode_sjis_char);
10669   defsubr (&Sdecode_big5_char);
10670   defsubr (&Sencode_big5_char);
10671   defsubr (&Sset_terminal_coding_system_internal);
10672   defsubr (&Sset_safe_terminal_coding_system_internal);
10673   defsubr (&Sterminal_coding_system);
10674   defsubr (&Sset_keyboard_coding_system_internal);
10675   defsubr (&Skeyboard_coding_system);
10676   defsubr (&Sfind_operation_coding_system);
10677   defsubr (&Sset_coding_system_priority);
10678   defsubr (&Sdefine_coding_system_internal);
10679   defsubr (&Sdefine_coding_system_alias);
10680   defsubr (&Scoding_system_put);
10681   defsubr (&Scoding_system_base);
10682   defsubr (&Scoding_system_plist);
10683   defsubr (&Scoding_system_aliases);
10684   defsubr (&Scoding_system_eol_type);
10685   defsubr (&Scoding_system_priority_list);
10686
10687   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
10688                doc: /* List of coding systems.
10689
10690 Do not alter the value of this variable manually.  This variable should be
10691 updated by the functions `define-coding-system' and
10692 `define-coding-system-alias'.  */);
10693   Vcoding_system_list = Qnil;
10694
10695   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
10696                doc: /* Alist of coding system names.
10697 Each element is one element list of coding system name.
10698 This variable is given to `completing-read' as COLLECTION argument.
10699
10700 Do not alter the value of this variable manually.  This variable should be
10701 updated by the functions `make-coding-system' and
10702 `define-coding-system-alias'.  */);
10703   Vcoding_system_alist = Qnil;
10704
10705   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
10706                doc: /* List of coding-categories (symbols) ordered by priority.
10707
10708 On detecting a coding system, Emacs tries code detection algorithms
10709 associated with each coding-category one by one in this order.  When
10710 one algorithm agrees with a byte sequence of source text, the coding
10711 system bound to the corresponding coding-category is selected.
10712
10713 Don't modify this variable directly, but use `set-coding-priority'.  */);
10714   {
10715     int i;
10716
10717     Vcoding_category_list = Qnil;
10718     for (i = coding_category_max - 1; i >= 0; i--)
10719       Vcoding_category_list
10720         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
10721                  Vcoding_category_list);
10722   }
10723
10724   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
10725                doc: /* Specify the coding system for read operations.
10726 It is useful to bind this variable with `let', but do not set it globally.
10727 If the value is a coding system, it is used for decoding on read operation.
10728 If not, an appropriate element is used from one of the coding system alists.
10729 There are three such tables: `file-coding-system-alist',
10730 `process-coding-system-alist', and `network-coding-system-alist'.  */);
10731   Vcoding_system_for_read = Qnil;
10732
10733   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
10734                doc: /* Specify the coding system for write operations.
10735 Programs bind this variable with `let', but you should not set it globally.
10736 If the value is a coding system, it is used for encoding of output,
10737 when writing it to a file and when sending it to a file or subprocess.
10738
10739 If this does not specify a coding system, an appropriate element
10740 is used from one of the coding system alists.
10741 There are three such tables: `file-coding-system-alist',
10742 `process-coding-system-alist', and `network-coding-system-alist'.
10743 For output to files, if the above procedure does not specify a coding system,
10744 the value of `buffer-file-coding-system' is used.  */);
10745   Vcoding_system_for_write = Qnil;
10746
10747   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
10748                doc: /*
10749 Coding system used in the latest file or process I/O.  */);
10750   Vlast_coding_system_used = Qnil;
10751
10752   DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
10753                doc: /*
10754 Error status of the last code conversion.
10755
10756 When an error was detected in the last code conversion, this variable
10757 is set to one of the following symbols.
10758   `insufficient-source'
10759   `inconsistent-eol'
10760   `invalid-source'
10761   `interrupted'
10762   `insufficient-memory'
10763 When no error was detected, the value doesn't change.  So, to check
10764 the error status of a code conversion by this variable, you must
10765 explicitly set this variable to nil before performing code
10766 conversion.  */);
10767   Vlast_code_conversion_error = Qnil;
10768
10769   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
10770                doc: /*
10771 *Non-nil means always inhibit code conversion of end-of-line format.
10772 See info node `Coding Systems' and info node `Text and Binary' concerning
10773 such conversion.  */);
10774   inhibit_eol_conversion = 0;
10775
10776   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
10777                doc: /*
10778 Non-nil means process buffer inherits coding system of process output.
10779 Bind it to t if the process output is to be treated as if it were a file
10780 read from some filesystem.  */);
10781   inherit_process_coding_system = 0;
10782
10783   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
10784                doc: /*
10785 Alist to decide a coding system to use for a file I/O operation.
10786 The format is ((PATTERN . VAL) ...),
10787 where PATTERN is a regular expression matching a file name,
10788 VAL is a coding system, a cons of coding systems, or a function symbol.
10789 If VAL is a coding system, it is used for both decoding and encoding
10790 the file contents.
10791 If VAL is a cons of coding systems, the car part is used for decoding,
10792 and the cdr part is used for encoding.
10793 If VAL is a function symbol, the function must return a coding system
10794 or a cons of coding systems which are used as above.  The function is
10795 called with an argument that is a list of the arguments with which
10796 `find-operation-coding-system' was called.  If the function can't decide
10797 a coding system, it can return `undecided' so that the normal
10798 code-detection is performed.
10799
10800 See also the function `find-operation-coding-system'
10801 and the variable `auto-coding-alist'.  */);
10802   Vfile_coding_system_alist = Qnil;
10803
10804   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
10805                doc: /*
10806 Alist to decide a coding system to use for a process I/O operation.
10807 The format is ((PATTERN . VAL) ...),
10808 where PATTERN is a regular expression matching a program name,
10809 VAL is a coding system, a cons of coding systems, or a function symbol.
10810 If VAL is a coding system, it is used for both decoding what received
10811 from the program and encoding what sent to the program.
10812 If VAL is a cons of coding systems, the car part is used for decoding,
10813 and the cdr part is used for encoding.
10814 If VAL is a function symbol, the function must return a coding system
10815 or a cons of coding systems which are used as above.
10816
10817 See also the function `find-operation-coding-system'.  */);
10818   Vprocess_coding_system_alist = Qnil;
10819
10820   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
10821                doc: /*
10822 Alist to decide a coding system to use for a network I/O operation.
10823 The format is ((PATTERN . VAL) ...),
10824 where PATTERN is a regular expression matching a network service name
10825 or is a port number to connect to,
10826 VAL is a coding system, a cons of coding systems, or a function symbol.
10827 If VAL is a coding system, it is used for both decoding what received
10828 from the network stream and encoding what sent to the network stream.
10829 If VAL is a cons of coding systems, the car part is used for decoding,
10830 and the cdr part is used for encoding.
10831 If VAL is a function symbol, the function must return a coding system
10832 or a cons of coding systems which are used as above.
10833
10834 See also the function `find-operation-coding-system'.  */);
10835   Vnetwork_coding_system_alist = Qnil;
10836
10837   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
10838                doc: /* Coding system to use with system messages.
10839 Also used for decoding keyboard input on X Window system.  */);
10840   Vlocale_coding_system = Qnil;
10841
10842   /* The eol mnemonics are reset in startup.el system-dependently.  */
10843   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
10844                doc: /*
10845 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
10846   eol_mnemonic_unix = make_pure_c_string (":");
10847
10848   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
10849                doc: /*
10850 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
10851   eol_mnemonic_dos = make_pure_c_string ("\\");
10852
10853   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
10854                doc: /*
10855 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
10856   eol_mnemonic_mac = make_pure_c_string ("/");
10857
10858   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
10859                doc: /*
10860 *String displayed in mode line when end-of-line format is not yet determined.  */);
10861   eol_mnemonic_undecided = make_pure_c_string (":");
10862
10863   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
10864                doc: /*
10865 *Non-nil enables character translation while encoding and decoding.  */);
10866   Venable_character_translation = Qt;
10867
10868   DEFVAR_LISP ("standard-translation-table-for-decode",
10869                &Vstandard_translation_table_for_decode,
10870                doc: /* Table for translating characters while decoding.  */);
10871   Vstandard_translation_table_for_decode = Qnil;
10872
10873   DEFVAR_LISP ("standard-translation-table-for-encode",
10874                &Vstandard_translation_table_for_encode,
10875                doc: /* Table for translating characters while encoding.  */);
10876   Vstandard_translation_table_for_encode = Qnil;
10877
10878   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
10879                doc: /* Alist of charsets vs revision numbers.
10880 While encoding, if a charset (car part of an element) is found,
10881 designate it with the escape sequence identifying revision (cdr part
10882 of the element).  */);
10883   Vcharset_revision_table = Qnil;
10884
10885   DEFVAR_LISP ("default-process-coding-system",
10886                &Vdefault_process_coding_system,
10887                doc: /* Cons of coding systems used for process I/O by default.
10888 The car part is used for decoding a process output,
10889 the cdr part is used for encoding a text to be sent to a process.  */);
10890   Vdefault_process_coding_system = Qnil;
10891
10892   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
10893                doc: /*
10894 Table of extra Latin codes in the range 128..159 (inclusive).
10895 This is a vector of length 256.
10896 If Nth element is non-nil, the existence of code N in a file
10897 \(or output of subprocess) doesn't prevent it to be detected as
10898 a coding system of ISO 2022 variant which has a flag
10899 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
10900 or reading output of a subprocess.
10901 Only 128th through 159th elements have a meaning.  */);
10902   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
10903
10904   DEFVAR_LISP ("select-safe-coding-system-function",
10905                &Vselect_safe_coding_system_function,
10906                doc: /*
10907 Function to call to select safe coding system for encoding a text.
10908
10909 If set, this function is called to force a user to select a proper
10910 coding system which can encode the text in the case that a default
10911 coding system used in each operation can't encode the text.  The
10912 function should take care that the buffer is not modified while
10913 the coding system is being selected.
10914
10915 The default value is `select-safe-coding-system' (which see).  */);
10916   Vselect_safe_coding_system_function = Qnil;
10917
10918   DEFVAR_BOOL ("coding-system-require-warning",
10919                &coding_system_require_warning,
10920                doc: /* Internal use only.
10921 If non-nil, on writing a file, `select-safe-coding-system-function' is
10922 called even if `coding-system-for-write' is non-nil.  The command
10923 `universal-coding-system-argument' binds this variable to t temporarily.  */);
10924   coding_system_require_warning = 0;
10925
10926
10927   DEFVAR_BOOL ("inhibit-iso-escape-detection",
10928                &inhibit_iso_escape_detection,
10929                doc: /*
10930 If non-nil, Emacs ignores ISO-2022 escape sequences during code detection.
10931
10932 When Emacs reads text, it tries to detect how the text is encoded.
10933 This code detection is sensitive to escape sequences.  If Emacs sees
10934 a valid ISO-2022 escape sequence, it assumes the text is encoded in one
10935 of the ISO2022 encodings, and decodes text by the corresponding coding
10936 system (e.g. `iso-2022-7bit').
10937
10938 However, there may be a case that you want to read escape sequences in
10939 a file as is.  In such a case, you can set this variable to non-nil.
10940 Then the code detection will ignore any escape sequences, and no text is
10941 detected as encoded in some ISO-2022 encoding.  The result is that all
10942 escape sequences become visible in a buffer.
10943
10944 The default value is nil, and it is strongly recommended not to change
10945 it.  That is because many Emacs Lisp source files that contain
10946 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
10947 in Emacs's distribution, and they won't be decoded correctly on
10948 reading if you suppress escape sequence detection.
10949
10950 The other way to read escape sequences in a file without decoding is
10951 to explicitly specify some coding system that doesn't use ISO-2022
10952 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
10953   inhibit_iso_escape_detection = 0;
10954
10955   DEFVAR_BOOL ("inhibit-null-byte-detection",
10956                &inhibit_null_byte_detection,
10957                doc: /* If non-nil, Emacs ignores null bytes on code detection.
10958 By default, Emacs treats it as binary data, and does not attempt to
10959 decode it.  The effect is as if you specified `no-conversion' for
10960 reading that text.
10961
10962 Set this to non-nil when a regular text happens to include null bytes.
10963 Examples are Index nodes of Info files and null-byte delimited output
10964 from GNU Find and GNU Grep.  Emacs will then ignore the null bytes and
10965 decode text as usual.  */);
10966   inhibit_null_byte_detection = 0;
10967
10968   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
10969                doc: /* Char table for translating self-inserting characters.
10970 This is applied to the result of input methods, not their input.
10971 See also `keyboard-translate-table'.
10972
10973 Use of this variable for character code unification was rendered
10974 obsolete in Emacs 23.1 and later, since Unicode is now the basis of
10975 internal character representation.  */);
10976     Vtranslation_table_for_input = Qnil;
10977
10978   {
10979     Lisp_Object args[coding_arg_max];
10980     Lisp_Object plist[16];
10981     int i;
10982
10983     for (i = 0; i < coding_arg_max; i++)
10984       args[i] = Qnil;
10985
10986     plist[0] = intern_c_string (":name");
10987     plist[1] = args[coding_arg_name] = Qno_conversion;
10988     plist[2] = intern_c_string (":mnemonic");
10989     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
10990     plist[4] = intern_c_string (":coding-type");
10991     plist[5] = args[coding_arg_coding_type] = Qraw_text;
10992     plist[6] = intern_c_string (":ascii-compatible-p");
10993     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
10994     plist[8] = intern_c_string (":default-char");
10995     plist[9] = args[coding_arg_default_char] = make_number (0);
10996     plist[10] = intern_c_string (":for-unibyte");
10997     plist[11] = args[coding_arg_for_unibyte] = Qt;
10998     plist[12] = intern_c_string (":docstring");
10999     plist[13] = make_pure_c_string ("Do no conversion.\n\
11000 \n\
11001 When you visit a file with this coding, the file is read into a\n\
11002 unibyte buffer as is, thus each byte of a file is treated as a\n\
11003 character.");
11004     plist[14] = intern_c_string (":eol-type");
11005     plist[15] = args[coding_arg_eol_type] = Qunix;
11006     args[coding_arg_plist] = Flist (16, plist);
11007     Fdefine_coding_system_internal (coding_arg_max, args);
11008
11009     plist[1] = args[coding_arg_name] = Qundecided;
11010     plist[3] = args[coding_arg_mnemonic] = make_number ('-');
11011     plist[5] = args[coding_arg_coding_type] = Qundecided;
11012     /* This is already set.
11013        plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
11014     plist[8] = intern_c_string (":charset-list");
11015     plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
11016     plist[11] = args[coding_arg_for_unibyte] = Qnil;
11017     plist[13] = make_pure_c_string ("No conversion on encoding, automatic conversion on decoding.");
11018     plist[15] = args[coding_arg_eol_type] = Qnil;
11019     args[coding_arg_plist] = Flist (16, plist);
11020     Fdefine_coding_system_internal (coding_arg_max, args);
11021   }
11022
11023   setup_coding_system (Qno_conversion, &safe_terminal_coding);
11024
11025   {
11026     int i;
11027
11028     for (i = 0; i < coding_category_max; i++)
11029       Fset (AREF (Vcoding_category_table, i), Qno_conversion);
11030   }
11031 #if defined (MSDOS) || defined (WINDOWSNT)
11032   system_eol_type = Qdos;
11033 #else
11034   system_eol_type = Qunix;
11035 #endif
11036   staticpro (&system_eol_type);
11037 }
11038
11039 char *
11040 emacs_strerror (error_number)
11041      int error_number;
11042 {
11043   char *str;
11044
11045   synchronize_system_messages_locale ();
11046   str = strerror (error_number);
11047
11048   if (! NILP (Vlocale_coding_system))
11049     {
11050       Lisp_Object dec = code_convert_string_norecord (build_string (str),
11051                                                       Vlocale_coding_system,
11052                                                       0);
11053       str = (char *) SDATA (dec);
11054     }
11055
11056   return str;
11057 }
11058
11059 #endif /* emacs */
11060
11061 /* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
11062    (do not change this comment) */