src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 2001, 2002, 2003, 2004, 2005,
   3                  2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
   4    Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   5      2005, 2006, 2007, 2008, 2009, 2010
   6      National Institute of Advanced Industrial Science and Technology (AIST)
   7      Registration Number H14PRO021
   8    Copyright (C) 2003
   9      National Institute of Advanced Industrial Science and Technology (AIST)
  10      Registration Number H13PRO009
  11
  12 This file is part of GNU Emacs.
  13
  14 GNU Emacs is free software: you can redistribute it and/or modify
  15 it under the terms of the GNU General Public License as published by
  16 the Free Software Foundation, either version 3 of the License, or
  17 (at your option) any later version.
  18
  19 GNU Emacs is distributed in the hope that it will be useful,
  20 but WITHOUT ANY WARRANTY; without even the implied warranty of
  21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22 GNU General Public License for more details.
  23
  24 You should have received a copy of the GNU General Public License
  25 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  26
  27 /*** TABLE OF CONTENTS ***
  28
  29   0. General comments
  30   1. Preamble
  31   2. Emacs' internal format (emacs-utf-8) handlers
  32   3. UTF-8 handlers
  33   4. UTF-16 handlers
  34   5. Charset-base coding systems handlers
  35   6. emacs-mule (old Emacs' internal format) handlers
  36   7. ISO2022 handlers
  37   8. Shift-JIS and BIG5 handlers
  38   9. CCL handlers
  39   10. C library functions
  40   11. Emacs Lisp library functions
  41   12. Postamble
  42
  43 */
  44
  45 /*** 0. General comments ***
  46
  47
  48 CODING SYSTEM
  49
  50   A coding system is an object for an encoding mechanism that contains
  51   information about how to convert byte sequences to character
  52   sequences and vice versa.  When we say "decode", it means converting
  53   a byte sequence of a specific coding system into a character
  54   sequence that is represented by Emacs' internal coding system
  55   `emacs-utf-8', and when we say "encode", it means converting a
  56   character sequence of emacs-utf-8 to a byte sequence of a specific
  57   coding system.
  58
  59   In Emacs Lisp, a coding system is represented by a Lisp symbol.  In
  60   C level, a coding system is represented by a vector of attributes
  61   stored in the hash table Vcharset_hash_table.  The conversion from
  62   coding system symbol to attributes vector is done by looking up
  63   Vcharset_hash_table by the symbol.
  64
  65   Coding systems are classified into the following types depending on
  66   the encoding mechanism.  Here's a brief description of the types.
  67
  68   o UTF-8
  69
  70   o UTF-16
  71
  72   o Charset-base coding system
  73
  74   A coding system defined by one or more (coded) character sets.
  75   Decoding and encoding are done by a code converter defined for each
  76   character set.
  77
  78   o Old Emacs internal format (emacs-mule)
  79
  80   The coding system adopted by old versions of Emacs (20 and 21).
  81
  82   o ISO2022-base coding system
  83
  84   The most famous coding system for multiple character sets.  X's
  85   Compound Text, various EUCs (Extended Unix Code), and coding systems
  86   used in the Internet communication such as ISO-2022-JP are all
  87   variants of ISO2022.
  88
  89   o SJIS (or Shift-JIS or MS-Kanji-Code)
  90
  91   A coding system to encode character sets: ASCII, JISX0201, and
  92   JISX0208.  Widely used for PC's in Japan.  Details are described in
  93   section 8.
  94
  95   o BIG5
  96
  97   A coding system to encode character sets: ASCII and Big5.  Widely
  98   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  99   described in section 8.  In this file, when we write "big5" (all
 100   lowercase), we mean the coding system, and when we write "Big5"
 101   (capitalized), we mean the character set.
 102
 103   o CCL
 104
 105   If a user wants to decode/encode text encoded in a coding system
 106   not listed above, he can supply a decoder and an encoder for it in
 107   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 108   program while decoding/encoding.
 109
 110   o Raw-text
 111
 112   A coding system for text containing raw eight-bit data.  Emacs
 113   treats each byte of source text as a character (except for
 114   end-of-line conversion).
 115
 116   o No-conversion
 117
 118   Like raw text, but don't do end-of-line conversion.
 119
 120
 121 END-OF-LINE FORMAT
 122
 123   How text end-of-line is encoded depends on operating system.  For
 124   instance, Unix's format is just one byte of LF (line-feed) code,
 125   whereas DOS's format is two-byte sequence of `carriage-return' and
 126   `line-feed' codes.  MacOS's format is usually one byte of
 127   `carriage-return'.
 128
 129   Since text character encoding and end-of-line encoding are
 130   independent, any coding system described above can take any format
 131   of end-of-line (except for no-conversion).
 132
 133 STRUCT CODING_SYSTEM
 134
 135   Before using a coding system for code conversion (i.e. decoding and
 136   encoding), we setup a structure of type `struct coding_system'.
 137   This structure keeps various information about a specific code
 138   conversion (e.g. the location of source and destination data).
 139
 140 */
 141
 142 /* COMMON MACROS */
 143
 144
 145 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 146
 147   These functions check if a byte sequence specified as a source in
 148   CODING conforms to the format of XXX, and update the members of
 149   DETECT_INFO.
 150
 151   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 152
 153   Below is the template of these functions.  */
 154
 155 #if 0
 156 static int
 157 detect_coding_XXX (coding, detect_info)
 158      struct coding_system *coding;
 159      struct coding_detection_info *detect_info;
 160 {
 161   const unsigned char *src = coding->source;
 162   const unsigned char *src_end = coding->source + coding->src_bytes;
 163   int multibytep = coding->src_multibyte;
 164   int consumed_chars = 0;
 165   int found = 0;
 166   ...;
 167
 168   while (1)
 169     {
 170       /* Get one byte from the source.  If the souce is exausted, jump
 171          to no_more_source:.  */
 172       ONE_MORE_BYTE (c);
 173
 174       if (! __C_conforms_to_XXX___ (c))
 175         break;
 176       if (! __C_strongly_suggests_XXX__ (c))
 177         found = CATEGORY_MASK_XXX;
 178     }
 179   /* The byte sequence is invalid for XXX.  */
 180   detect_info->rejected |= CATEGORY_MASK_XXX;
 181   return 0;
 182
 183  no_more_source:
 184   /* The source exausted successfully.  */
 185   detect_info->found |= found;
 186   return 1;
 187 }
 188 #endif
 189
 190 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 191
 192   These functions decode a byte sequence specified as a source by
 193   CODING.  The resulting multibyte text goes to a place pointed to by
 194   CODING->charbuf, the length of which should not exceed
 195   CODING->charbuf_size;
 196
 197   These functions set the information of original and decoded texts in
 198   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 199   They also set CODING->result to one of CODING_RESULT_XXX indicating
 200   how the decoding is finished.
 201
 202   Below is the template of these functions.  */
 203
 204 #if 0
 205 static void
 206 decode_coding_XXXX (coding)
 207      struct coding_system *coding;
 208 {
 209   const unsigned char *src = coding->source + coding->consumed;
 210   const unsigned char *src_end = coding->source + coding->src_bytes;
 211   /* SRC_BASE remembers the start position in source in each loop.
 212      The loop will be exited when there's not enough source code, or
 213      when there's no room in CHARBUF for a decoded character.  */
 214   const unsigned char *src_base;
 215   /* A buffer to produce decoded characters.  */
 216   int *charbuf = coding->charbuf + coding->charbuf_used;
 217   int *charbuf_end = coding->charbuf + coding->charbuf_size;
 218   int multibytep = coding->src_multibyte;
 219
 220   while (1)
 221     {
 222       src_base = src;
 223       if (charbuf < charbuf_end)
 224         /* No more room to produce a decoded character.  */
 225         break;
 226       ONE_MORE_BYTE (c);
 227       /* Decode it. */
 228     }
 229
 230  no_more_source:
 231   if (src_base < src_end
 232       && coding->mode & CODING_MODE_LAST_BLOCK)
 233     /* If the source ends by partial bytes to construct a character,
 234        treat them as eight-bit raw data.  */
 235     while (src_base < src_end && charbuf < charbuf_end)
 236       *charbuf++ = *src_base++;
 237   /* Remember how many bytes and characters we consumed.  If the
 238      source is multibyte, the bytes and chars are not identical.  */
 239   coding->consumed = coding->consumed_char = src_base - coding->source;
 240   /* Remember how many characters we produced.  */
 241   coding->charbuf_used = charbuf - coding->charbuf;
 242 }
 243 #endif
 244
 245 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 246
 247   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 248   internal multibyte format by CODING.  The resulting byte sequence
 249   goes to a place pointed to by DESTINATION, the length of which
 250   should not exceed DST_BYTES.
 251
 252   These functions set the information of original and encoded texts in
 253   the members produced, produced_char, consumed, and consumed_char of
 254   the structure *CODING.  They also set the member result to one of
 255   CODING_RESULT_XXX indicating how the encoding finished.
 256
 257   DST_BYTES zero means that source area and destination area are
 258   overlapped, which means that we can produce a encoded text until it
 259   reaches at the head of not-yet-encoded source text.
 260
 261   Below is a template of these functions.  */
 262 #if 0
 263 static void
 264 encode_coding_XXX (coding)
 265      struct coding_system *coding;
 266 {
 267   int multibytep = coding->dst_multibyte;
 268   int *charbuf = coding->charbuf;
 269   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 270   unsigned char *dst = coding->destination + coding->produced;
 271   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 272   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 273   int produced_chars = 0;
 274
 275   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 276     {
 277       int c = *charbuf;
 278       /* Encode C into DST, and increment DST.  */
 279     }
 280  label_no_more_destination:
 281   /* How many chars and bytes we produced.  */
 282   coding->produced_char += produced_chars;
 283   coding->produced = dst - coding->destination;
 284 }
 285 #endif
 286
 287 \f
 288 /*** 1. Preamble ***/
 289
 290 #include <config.h>
 291 #include <stdio.h>
 292 #include <setjmp.h>
 293
 294 #include "lisp.h"
 295 #include "buffer.h"
 296 #include "character.h"
 297 #include "charset.h"
 298 #include "ccl.h"
 299 #include "composite.h"
 300 #include "coding.h"
 301 #include "window.h"
 302 #include "frame.h"
 303 #include "termhooks.h"
 304
 305 Lisp_Object Vcoding_system_hash_table;
 306
 307 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
 308 Lisp_Object Qunix, Qdos;
 309 extern Lisp_Object Qmac;        /* frame.c */
 310 Lisp_Object Qbuffer_file_coding_system;
 311 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 312 Lisp_Object Qdefault_char;
 313 Lisp_Object Qno_conversion, Qundecided;
 314 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
 315 Lisp_Object Qbig, Qlittle;
 316 Lisp_Object Qcoding_system_history;
 317 Lisp_Object Qvalid_codes;
 318 Lisp_Object QCcategory, QCmnemonic, QCdefault_char;
 319 Lisp_Object QCdecode_translation_table, QCencode_translation_table;
 320 Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
 321 Lisp_Object QCascii_compatible_p;
 322
 323 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 324 Lisp_Object Qcall_process, Qcall_process_region;
 325 Lisp_Object Qstart_process, Qopen_network_stream;
 326 Lisp_Object Qtarget_idx;
 327
 328 Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
 329 Lisp_Object Qinterrupted, Qinsufficient_memory;
 330
 331 extern Lisp_Object Qcompletion_ignore_case;
 332
 333 /* If a symbol has this property, evaluate the value to define the
 334    symbol as a coding system.  */
 335 static Lisp_Object Qcoding_system_define_form;
 336
 337 int coding_system_require_warning;
 338
 339 Lisp_Object Vselect_safe_coding_system_function;
 340
 341 /* Mnemonic string for each format of end-of-line.  */
 342 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 343 /* Mnemonic string to indicate format of end-of-line is not yet
 344    decided.  */
 345 Lisp_Object eol_mnemonic_undecided;
 346
 347 /* Format of end-of-line decided by system.  This is Qunix on
 348    Unix and Mac, Qdos on DOS/Windows.
 349    This has an effect only for external encoding (i.e. for output to
 350    file and process), not for in-buffer or Lisp string encoding.  */
 351 static Lisp_Object system_eol_type;
 352
 353 #ifdef emacs
 354
 355 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 356
 357 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 358
 359 /* Coding system emacs-mule and raw-text are for converting only
 360    end-of-line format.  */
 361 Lisp_Object Qemacs_mule, Qraw_text;
 362 Lisp_Object Qutf_8_emacs;
 363
 364 /* Coding-systems are handed between Emacs Lisp programs and C internal
 365    routines by the following three variables.  */
 366 /* Coding-system for reading files and receiving data from process.  */
 367 Lisp_Object Vcoding_system_for_read;
 368 /* Coding-system for writing files and sending data to process.  */
 369 Lisp_Object Vcoding_system_for_write;
 370 /* Coding-system actually used in the latest I/O.  */
 371 Lisp_Object Vlast_coding_system_used;
 372 /* Set to non-nil when an error is detected while code conversion.  */
 373 Lisp_Object Vlast_code_conversion_error;
 374 /* A vector of length 256 which contains information about special
 375    Latin codes (especially for dealing with Microsoft codes).  */
 376 Lisp_Object Vlatin_extra_code_table;
 377
 378 /* Flag to inhibit code conversion of end-of-line format.  */
 379 int inhibit_eol_conversion;
 380
 381 /* Flag to inhibit ISO2022 escape sequence detection.  */
 382 int inhibit_iso_escape_detection;
 383
 384 /* Flag to inhibit detection of binary files through null bytes.  */
 385 int inhibit_null_byte_detection;
 386
 387 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 388 int inherit_process_coding_system;
 389
 390 /* Coding system to be used to encode text for terminal display when
 391    terminal coding system is nil.  */
 392 struct coding_system safe_terminal_coding;
 393
 394 Lisp_Object Vfile_coding_system_alist;
 395 Lisp_Object Vprocess_coding_system_alist;
 396 Lisp_Object Vnetwork_coding_system_alist;
 397
 398 Lisp_Object Vlocale_coding_system;
 399
 400 #endif /* emacs */
 401
 402 /* Flag to tell if we look up translation table on character code
 403    conversion.  */
 404 Lisp_Object Venable_character_translation;
 405 /* Standard translation table to look up on decoding (reading).  */
 406 Lisp_Object Vstandard_translation_table_for_decode;
 407 /* Standard translation table to look up on encoding (writing).  */
 408 Lisp_Object Vstandard_translation_table_for_encode;
 409
 410 Lisp_Object Qtranslation_table;
 411 Lisp_Object Qtranslation_table_id;
 412 Lisp_Object Qtranslation_table_for_decode;
 413 Lisp_Object Qtranslation_table_for_encode;
 414
 415 /* Alist of charsets vs revision number.  */
 416 static Lisp_Object Vcharset_revision_table;
 417
 418 /* Default coding systems used for process I/O.  */
 419 Lisp_Object Vdefault_process_coding_system;
 420
 421 /* Char table for translating Quail and self-inserting input.  */
 422 Lisp_Object Vtranslation_table_for_input;
 423
 424 /* Two special coding systems.  */
 425 Lisp_Object Vsjis_coding_system;
 426 Lisp_Object Vbig5_coding_system;
 427
 428 /* ISO2022 section */
 429
 430 #define CODING_ISO_INITIAL(coding, reg)                 \
 431   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 432                      coding_attr_iso_initial),          \
 433                reg)))
 434
 435
 436 #define CODING_ISO_REQUEST(coding, charset_id)          \
 437   (((charset_id) <= (coding)->max_charset_id            \
 438     ? ((coding)->safe_charsets[charset_id] != 255       \
 439        ? (coding)->safe_charsets[charset_id]            \
 440        : -1)                                            \
 441     : -1))
 442
 443
 444 #define CODING_ISO_FLAGS(coding)        \
 445   ((coding)->spec.iso_2022.flags)
 446 #define CODING_ISO_DESIGNATION(coding, reg)     \
 447   ((coding)->spec.iso_2022.current_designation[reg])
 448 #define CODING_ISO_INVOCATION(coding, plane)    \
 449   ((coding)->spec.iso_2022.current_invocation[plane])
 450 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 451   ((coding)->spec.iso_2022.single_shifting)
 452 #define CODING_ISO_BOL(coding)  \
 453   ((coding)->spec.iso_2022.bol)
 454 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 455   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 456 #define CODING_ISO_CMP_STATUS(coding)   \
 457   (&(coding)->spec.iso_2022.cmp_status)
 458 #define CODING_ISO_EXTSEGMENT_LEN(coding)       \
 459   ((coding)->spec.iso_2022.ctext_extended_segment_len)
 460 #define CODING_ISO_EMBEDDED_UTF_8(coding)       \
 461   ((coding)->spec.iso_2022.embedded_utf_8)
 462
 463 /* Control characters of ISO2022.  */
 464                         /* code */      /* function */
 465 #define ISO_CODE_LF     0x0A            /* line-feed */
 466 #define ISO_CODE_CR     0x0D            /* carriage-return */
 467 #define ISO_CODE_SO     0x0E            /* shift-out */
 468 #define ISO_CODE_SI     0x0F            /* shift-in */
 469 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 470 #define ISO_CODE_ESC    0x1B            /* escape */
 471 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 472 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 473 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 474
 475 /* All code (1-byte) of ISO2022 is classified into one of the
 476    followings.  */
 477 enum iso_code_class_type
 478   {
 479     ISO_control_0,              /* Control codes in the range
 480                                    0x00..0x1F and 0x7F, except for the
 481                                    following 5 codes.  */
 482     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 483     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 484     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 485     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 486     ISO_control_1,              /* Control codes in the range
 487                                    0x80..0x9F, except for the
 488                                    following 3 codes.  */
 489     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 490     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 491     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 492     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 493     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 494     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 495     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 496   };
 497
 498 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 499     `iso-flags' attribute of an iso2022 coding system.  */
 500
 501 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 502    instead of the correct short-form sequence (e.g. ESC $ A).  */
 503 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 504
 505 /* If set, reset graphic planes and registers at end-of-line to the
 506    initial state.  */
 507 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 508
 509 /* If set, reset graphic planes and registers before any control
 510    characters to the initial state.  */
 511 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 512
 513 /* If set, encode by 7-bit environment.  */
 514 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 515
 516 /* If set, use locking-shift function.  */
 517 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 518
 519 /* If set, use single-shift function.  Overwrite
 520    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 521 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 522
 523 /* If set, use designation escape sequence.  */
 524 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 525
 526 /* If set, produce revision number sequence.  */
 527 #define CODING_ISO_FLAG_REVISION        0x0080
 528
 529 /* If set, produce ISO6429's direction specifying sequence.  */
 530 #define CODING_ISO_FLAG_DIRECTION       0x0100
 531
 532 /* If set, assume designation states are reset at beginning of line on
 533    output.  */
 534 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 535
 536 /* If set, designation sequence should be placed at beginning of line
 537    on output.  */
 538 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 539
 540 /* If set, do not encode unsafe charactes on output.  */
 541 #define CODING_ISO_FLAG_SAFE            0x0800
 542
 543 /* If set, extra latin codes (128..159) are accepted as a valid code
 544    on input.  */
 545 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 546
 547 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 548
 549 #define CODING_ISO_FLAG_EUC_TW_SHIFT    0x4000
 550
 551 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 552
 553 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 554
 555 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 556
 557 /* A character to be produced on output if encoding of the original
 558    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 559 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 560
 561 /* UTF-8 section */
 562 #define CODING_UTF_8_BOM(coding)        \
 563   ((coding)->spec.utf_8_bom)
 564
 565 /* UTF-16 section */
 566 #define CODING_UTF_16_BOM(coding)       \
 567   ((coding)->spec.utf_16.bom)
 568
 569 #define CODING_UTF_16_ENDIAN(coding)    \
 570   ((coding)->spec.utf_16.endian)
 571
 572 #define CODING_UTF_16_SURROGATE(coding) \
 573   ((coding)->spec.utf_16.surrogate)
 574
 575
 576 /* CCL section */
 577 #define CODING_CCL_DECODER(coding)      \
 578   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 579 #define CODING_CCL_ENCODER(coding)      \
 580   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 581 #define CODING_CCL_VALIDS(coding)                                          \
 582   (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
 583
 584 /* Index for each coding category in `coding_categories' */
 585
 586 enum coding_category
 587   {
 588     coding_category_iso_7,
 589     coding_category_iso_7_tight,
 590     coding_category_iso_8_1,
 591     coding_category_iso_8_2,
 592     coding_category_iso_7_else,
 593     coding_category_iso_8_else,
 594     coding_category_utf_8_auto,
 595     coding_category_utf_8_nosig,
 596     coding_category_utf_8_sig,
 597     coding_category_utf_16_auto,
 598     coding_category_utf_16_be,
 599     coding_category_utf_16_le,
 600     coding_category_utf_16_be_nosig,
 601     coding_category_utf_16_le_nosig,
 602     coding_category_charset,
 603     coding_category_sjis,
 604     coding_category_big5,
 605     coding_category_ccl,
 606     coding_category_emacs_mule,
 607     /* All above are targets of code detection.  */
 608     coding_category_raw_text,
 609     coding_category_undecided,
 610     coding_category_max
 611   };
 612
 613 /* Definitions of flag bits used in detect_coding_XXXX.  */
 614 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 615 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 616 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 617 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 618 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 619 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 620 #define CATEGORY_MASK_UTF_8_AUTO        (1 << coding_category_utf_8_auto)
 621 #define CATEGORY_MASK_UTF_8_NOSIG       (1 << coding_category_utf_8_nosig)
 622 #define CATEGORY_MASK_UTF_8_SIG         (1 << coding_category_utf_8_sig)
 623 #define CATEGORY_MASK_UTF_16_AUTO       (1 << coding_category_utf_16_auto)
 624 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 625 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 626 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 627 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 628 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 629 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 630 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 631 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 632 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 633 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 634
 635 /* This value is returned if detect_coding_mask () find nothing other
 636    than ASCII characters.  */
 637 #define CATEGORY_MASK_ANY               \
 638   (CATEGORY_MASK_ISO_7                  \
 639    | CATEGORY_MASK_ISO_7_TIGHT          \
 640    | CATEGORY_MASK_ISO_8_1              \
 641    | CATEGORY_MASK_ISO_8_2              \
 642    | CATEGORY_MASK_ISO_7_ELSE           \
 643    | CATEGORY_MASK_ISO_8_ELSE           \
 644    | CATEGORY_MASK_UTF_8_AUTO           \
 645    | CATEGORY_MASK_UTF_8_NOSIG          \
 646    | CATEGORY_MASK_UTF_8_SIG            \
 647    | CATEGORY_MASK_UTF_16_AUTO          \
 648    | CATEGORY_MASK_UTF_16_BE            \
 649    | CATEGORY_MASK_UTF_16_LE            \
 650    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 651    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 652    | CATEGORY_MASK_CHARSET              \
 653    | CATEGORY_MASK_SJIS                 \
 654    | CATEGORY_MASK_BIG5                 \
 655    | CATEGORY_MASK_CCL                  \
 656    | CATEGORY_MASK_EMACS_MULE)
 657
 658
 659 #define CATEGORY_MASK_ISO_7BIT \
 660   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 661
 662 #define CATEGORY_MASK_ISO_8BIT \
 663   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 664
 665 #define CATEGORY_MASK_ISO_ELSE \
 666   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 667
 668 #define CATEGORY_MASK_ISO_ESCAPE        \
 669   (CATEGORY_MASK_ISO_7                  \
 670    | CATEGORY_MASK_ISO_7_TIGHT          \
 671    | CATEGORY_MASK_ISO_7_ELSE           \
 672    | CATEGORY_MASK_ISO_8_ELSE)
 673
 674 #define CATEGORY_MASK_ISO       \
 675   (  CATEGORY_MASK_ISO_7BIT     \
 676      | CATEGORY_MASK_ISO_8BIT   \
 677      | CATEGORY_MASK_ISO_ELSE)
 678
 679 #define CATEGORY_MASK_UTF_16            \
 680   (CATEGORY_MASK_UTF_16_AUTO            \
 681    | CATEGORY_MASK_UTF_16_BE            \
 682    | CATEGORY_MASK_UTF_16_LE            \
 683    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 684    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 685
 686 #define CATEGORY_MASK_UTF_8     \
 687   (CATEGORY_MASK_UTF_8_AUTO     \
 688    | CATEGORY_MASK_UTF_8_NOSIG  \
 689    | CATEGORY_MASK_UTF_8_SIG)
 690
 691 /* List of symbols `coding-category-xxx' ordered by priority.  This
 692    variable is exposed to Emacs Lisp.  */
 693 static Lisp_Object Vcoding_category_list;
 694
 695 /* Table of coding categories (Lisp symbols).  This variable is for
 696    internal use oly.  */
 697 static Lisp_Object Vcoding_category_table;
 698
 699 /* Table of coding-categories ordered by priority.  */
 700 static enum coding_category coding_priorities[coding_category_max];
 701
 702 /* Nth element is a coding context for the coding system bound to the
 703    Nth coding category.  */
 704 static struct coding_system coding_categories[coding_category_max];
 705
 706 /*** Commonly used macros and functions ***/
 707
 708 #ifndef min
 709 #define min(a, b) ((a) < (b) ? (a) : (b))
 710 #endif
 711 #ifndef max
 712 #define max(a, b) ((a) > (b) ? (a) : (b))
 713 #endif
 714
 715 #define CODING_GET_INFO(coding, attrs, charset_list)    \
 716   do {                                                  \
 717     (attrs) = CODING_ID_ATTRS ((coding)->id);           \
 718     (charset_list) = CODING_ATTR_CHARSET_LIST (attrs);  \
 719   } while (0)
 720
 721
 722 /* Safely get one byte from the source text pointed by SRC which ends
 723    at SRC_END, and set C to that byte.  If there are not enough bytes
 724    in the source, it jumps to `no_more_source'.  If multibytep is
 725    nonzero, and a multibyte character is found at SRC, set C to the
 726    negative value of the character code.  The caller should declare
 727    and set these variables appropriately in advance:
 728         src, src_end, multibytep */
 729
 730 #define ONE_MORE_BYTE(c)                                \
 731   do {                                                  \
 732     if (src == src_end)                                 \
 733       {                                                 \
 734         if (src_base < src)                             \
 735           record_conversion_result                      \
 736             (coding, CODING_RESULT_INSUFFICIENT_SRC);   \
 737         goto no_more_source;                            \
 738       }                                                 \
 739     c = *src++;                                         \
 740     if (multibytep && (c & 0x80))                       \
 741       {                                                 \
 742         if ((c & 0xFE) == 0xC0)                         \
 743           c = ((c & 1) << 6) | *src++;                  \
 744         else                                            \
 745           {                                             \
 746             src--;                                      \
 747             c = - string_char (src, &src, NULL);        \
 748             record_conversion_result                    \
 749               (coding, CODING_RESULT_INVALID_SRC);      \
 750           }                                             \
 751       }                                                 \
 752     consumed_chars++;                                   \
 753   } while (0)
 754
 755 /* Safely get two bytes from the source text pointed by SRC which ends
 756    at SRC_END, and set C1 and C2 to those bytes while skipping the
 757    heading multibyte characters.  If there are not enough bytes in the
 758    source, it jumps to `no_more_source'.  If multibytep is nonzero and
 759    a multibyte character is found for C2, set C2 to the negative value
 760    of the character code.  The caller should declare and set these
 761    variables appropriately in advance:
 762         src, src_end, multibytep
 763    It is intended that this macro is used in detect_coding_utf_16.  */
 764
 765 #define TWO_MORE_BYTES(c1, c2)                          \
 766   do {                                                  \
 767     do {                                                \
 768       if (src == src_end)                               \
 769         goto no_more_source;                            \
 770       c1 = *src++;                                      \
 771       if (multibytep && (c1 & 0x80))                    \
 772         {                                               \
 773           if ((c1 & 0xFE) == 0xC0)                      \
 774             c1 = ((c1 & 1) << 6) | *src++;              \
 775           else                                          \
 776             {                                           \
 777               src += BYTES_BY_CHAR_HEAD (c1) - 1;       \
 778               c1 = -1;                                  \
 779             }                                           \
 780         }                                               \
 781     } while (c1 < 0);                                   \
 782     if (src == src_end)                                 \
 783       goto no_more_source;                              \
 784     c2 = *src++;                                        \
 785     if (multibytep && (c2 & 0x80))                      \
 786       {                                                 \
 787         if ((c2 & 0xFE) == 0xC0)                        \
 788           c2 = ((c2 & 1) << 6) | *src++;                \
 789         else                                            \
 790           c2 = -1;                                      \
 791       }                                                 \
 792   } while (0)
 793
 794
 795 #define ONE_MORE_BYTE_NO_CHECK(c)                       \
 796   do {                                                  \
 797     c = *src++;                                         \
 798     if (multibytep && (c & 0x80))                       \
 799       {                                                 \
 800         if ((c & 0xFE) == 0xC0)                         \
 801           c = ((c & 1) << 6) | *src++;                  \
 802         else                                            \
 803           {                                             \
 804             src--;                                      \
 805             c = - string_char (src, &src, NULL);        \
 806             record_conversion_result                    \
 807               (coding, CODING_RESULT_INVALID_SRC);      \
 808           }                                             \
 809       }                                                 \
 810     consumed_chars++;                                   \
 811   } while (0)
 812
 813
 814 /* Store a byte C in the place pointed by DST and increment DST to the
 815    next free point, and increment PRODUCED_CHARS.  The caller should
 816    assure that C is 0..127, and declare and set the variable `dst'
 817    appropriately in advance.
 818 */
 819
 820
 821 #define EMIT_ONE_ASCII_BYTE(c)  \
 822   do {                          \
 823     produced_chars++;           \
 824     *dst++ = (c);               \
 825   } while (0)
 826
 827
 828 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2.  */
 829
 830 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 831   do {                                  \
 832     produced_chars += 2;                \
 833     *dst++ = (c1), *dst++ = (c2);       \
 834   } while (0)
 835
 836
 837 /* Store a byte C in the place pointed by DST and increment DST to the
 838    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 839    nonzero, store in an appropriate multibyte from.  The caller should
 840    declare and set the variables `dst' and `multibytep' appropriately
 841    in advance.  */
 842
 843 #define EMIT_ONE_BYTE(c)                \
 844   do {                                  \
 845     produced_chars++;                   \
 846     if (multibytep)                     \
 847       {                                 \
 848         int ch = (c);                   \
 849         if (ch >= 0x80)                 \
 850           ch = BYTE8_TO_CHAR (ch);      \
 851         CHAR_STRING_ADVANCE (ch, dst);  \
 852       }                                 \
 853     else                                \
 854       *dst++ = (c);                     \
 855   } while (0)
 856
 857
 858 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 859
 860 #define EMIT_TWO_BYTES(c1, c2)          \
 861   do {                                  \
 862     produced_chars += 2;                \
 863     if (multibytep)                     \
 864       {                                 \
 865         int ch;                         \
 866                                         \
 867         ch = (c1);                      \
 868         if (ch >= 0x80)                 \
 869           ch = BYTE8_TO_CHAR (ch);      \
 870         CHAR_STRING_ADVANCE (ch, dst);  \
 871         ch = (c2);                      \
 872         if (ch >= 0x80)                 \
 873           ch = BYTE8_TO_CHAR (ch);      \
 874         CHAR_STRING_ADVANCE (ch, dst);  \
 875       }                                 \
 876     else                                \
 877       {                                 \
 878         *dst++ = (c1);                  \
 879         *dst++ = (c2);                  \
 880       }                                 \
 881   } while (0)
 882
 883
 884 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 885   do {                                  \
 886     EMIT_ONE_BYTE (c1);                 \
 887     EMIT_TWO_BYTES (c2, c3);            \
 888   } while (0)
 889
 890
 891 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 892   do {                                          \
 893     EMIT_TWO_BYTES (c1, c2);                    \
 894     EMIT_TWO_BYTES (c3, c4);                    \
 895   } while (0)
 896
 897
 898 /* Prototypes for static functions.  */
 899 static void record_conversion_result P_ ((struct coding_system *coding,
 900                                           enum coding_result_code result));
 901 static int detect_coding_utf_8 P_ ((struct coding_system *,
 902                                     struct coding_detection_info *info));
 903 static void decode_coding_utf_8 P_ ((struct coding_system *));
 904 static int encode_coding_utf_8 P_ ((struct coding_system *));
 905
 906 static int detect_coding_utf_16 P_ ((struct coding_system *,
 907                                      struct coding_detection_info *info));
 908 static void decode_coding_utf_16 P_ ((struct coding_system *));
 909 static int encode_coding_utf_16 P_ ((struct coding_system *));
 910
 911 static int detect_coding_iso_2022 P_ ((struct coding_system *,
 912                                        struct coding_detection_info *info));
 913 static void decode_coding_iso_2022 P_ ((struct coding_system *));
 914 static int encode_coding_iso_2022 P_ ((struct coding_system *));
 915
 916 static int detect_coding_emacs_mule P_ ((struct coding_system *,
 917                                          struct coding_detection_info *info));
 918 static void decode_coding_emacs_mule P_ ((struct coding_system *));
 919 static int encode_coding_emacs_mule P_ ((struct coding_system *));
 920
 921 static int detect_coding_sjis P_ ((struct coding_system *,
 922                                    struct coding_detection_info *info));
 923 static void decode_coding_sjis P_ ((struct coding_system *));
 924 static int encode_coding_sjis P_ ((struct coding_system *));
 925
 926 static int detect_coding_big5 P_ ((struct coding_system *,
 927                                    struct coding_detection_info *info));
 928 static void decode_coding_big5 P_ ((struct coding_system *));
 929 static int encode_coding_big5 P_ ((struct coding_system *));
 930
 931 static int detect_coding_ccl P_ ((struct coding_system *,
 932                                   struct coding_detection_info *info));
 933 static void decode_coding_ccl P_ ((struct coding_system *));
 934 static int encode_coding_ccl P_ ((struct coding_system *));
 935
 936 static void decode_coding_raw_text P_ ((struct coding_system *));
 937 static int encode_coding_raw_text P_ ((struct coding_system *));
 938
 939 static void coding_set_source P_ ((struct coding_system *));
 940 static void coding_set_destination P_ ((struct coding_system *));
 941 static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT));
 942 static void coding_alloc_by_making_gap P_ ((struct coding_system *,
 943                                             EMACS_INT, EMACS_INT));
 944 static unsigned char *alloc_destination P_ ((struct coding_system *,
 945                                              EMACS_INT, unsigned char *));
 946 static void setup_iso_safe_charsets P_ ((Lisp_Object));
 947 static unsigned char *encode_designation_at_bol P_ ((struct coding_system *,
 948                                                      int *, int *,
 949                                                      unsigned char *));
 950 static int detect_eol P_ ((const unsigned char *,
 951                            EMACS_INT, enum coding_category));
 952 static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int));
 953 static void decode_eol P_ ((struct coding_system *));
 954 static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *));
 955 static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *));
 956 static int produce_chars P_ ((struct coding_system *, Lisp_Object, int));
 957 static INLINE void produce_charset P_ ((struct coding_system *, int *,
 958                                         EMACS_INT));
 959 static void produce_annotation P_ ((struct coding_system *, EMACS_INT));
 960 static int decode_coding P_ ((struct coding_system *));
 961 static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT,
 962                                                       struct coding_system *,
 963                                                       int *, EMACS_INT *));
 964 static INLINE int *handle_charset_annotation P_ ((EMACS_INT, EMACS_INT,
 965                                                   struct coding_system *,
 966                                                   int *, EMACS_INT *));
 967 static void consume_chars P_ ((struct coding_system *, Lisp_Object, int));
 968 static int encode_coding P_ ((struct coding_system *));
 969 static Lisp_Object make_conversion_work_buffer P_ ((int));
 970 static Lisp_Object code_conversion_restore P_ ((Lisp_Object));
 971 static INLINE int char_encodable_p P_ ((int, Lisp_Object));
 972 static Lisp_Object make_subsidiaries P_ ((Lisp_Object));
 973
 974 static void
 975 record_conversion_result (struct coding_system *coding,
 976                           enum coding_result_code result)
 977 {
 978   coding->result = result;
 979   switch (result)
 980     {
 981     case CODING_RESULT_INSUFFICIENT_SRC:
 982       Vlast_code_conversion_error = Qinsufficient_source;
 983       break;
 984     case CODING_RESULT_INCONSISTENT_EOL:
 985       Vlast_code_conversion_error = Qinconsistent_eol;
 986       break;
 987     case CODING_RESULT_INVALID_SRC:
 988       Vlast_code_conversion_error = Qinvalid_source;
 989       break;
 990     case CODING_RESULT_INTERRUPT:
 991       Vlast_code_conversion_error = Qinterrupted;
 992       break;
 993     case CODING_RESULT_INSUFFICIENT_MEM:
 994       Vlast_code_conversion_error = Qinsufficient_memory;
 995       break;
 996     case CODING_RESULT_SUCCESS:
 997       break;
 998     default:
 999       Vlast_code_conversion_error = intern ("Unknown error");
1000     }
1001 }
1002
1003 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
1004   do {                                                                       \
1005     charset_map_loaded = 0;                                                  \
1006     c = DECODE_CHAR (charset, code);                                         \
1007     if (charset_map_loaded)                                                  \
1008       {                                                                      \
1009         const unsigned char *orig = coding->source;                          \
1010         EMACS_INT offset;                                                    \
1011                                                                              \
1012         coding_set_source (coding);                                          \
1013         offset = coding->source - orig;                                      \
1014         src += offset;                                                       \
1015         src_base += offset;                                                  \
1016         src_end += offset;                                                   \
1017       }                                                                      \
1018   } while (0)
1019
1020
1021 /* If there are at least BYTES length of room at dst, allocate memory
1022    for coding->destination and update dst and dst_end.  We don't have
1023    to take care of coding->source which will be relocated.  It is
1024    handled by calling coding_set_source in encode_coding.  */
1025
1026 #define ASSURE_DESTINATION(bytes)                               \
1027   do {                                                          \
1028     if (dst + (bytes) >= dst_end)                               \
1029       {                                                         \
1030         int more_bytes = charbuf_end - charbuf + (bytes);       \
1031                                                                 \
1032         dst = alloc_destination (coding, more_bytes, dst);      \
1033         dst_end = coding->destination + coding->dst_bytes;      \
1034       }                                                         \
1035   } while (0)
1036
1037
1038 /* Store multibyte form of the character C in P, and advance P to the
1039    end of the multibyte form.  This is like CHAR_STRING_ADVANCE but it
1040    never calls MAYBE_UNIFY_CHAR.  */
1041
1042 #define CHAR_STRING_ADVANCE_NO_UNIFY(c, p)      \
1043   do {                                          \
1044     if ((c) <= MAX_1_BYTE_CHAR)                 \
1045       *(p)++ = (c);                             \
1046     else if ((c) <= MAX_2_BYTE_CHAR)            \
1047       *(p)++ = (0xC0 | ((c) >> 6)),             \
1048         *(p)++ = (0x80 | ((c) & 0x3F));         \
1049     else if ((c) <= MAX_3_BYTE_CHAR)            \
1050       *(p)++ = (0xE0 | ((c) >> 12)),            \
1051         *(p)++ = (0x80 | (((c) >> 6) & 0x3F)),  \
1052         *(p)++ = (0x80 | ((c) & 0x3F));         \
1053     else if ((c) <= MAX_4_BYTE_CHAR)            \
1054       *(p)++ = (0xF0 | (c >> 18)),              \
1055         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1056         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1057         *(p)++ = (0x80 | (c & 0x3F));           \
1058     else if ((c) <= MAX_5_BYTE_CHAR)            \
1059       *(p)++ = 0xF8,                            \
1060         *(p)++ = (0x80 | ((c >> 18) & 0x0F)),   \
1061         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1062         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1063         *(p)++ = (0x80 | (c & 0x3F));           \
1064     else                                        \
1065       (p) += BYTE8_STRING ((c) - 0x3FFF80, p);  \
1066   } while (0)
1067
1068
1069 /* Return the character code of character whose multibyte form is at
1070    P, and advance P to the end of the multibyte form.  This is like
1071    STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR.  */
1072
1073 #define STRING_CHAR_ADVANCE_NO_UNIFY(p)                         \
1074   (!((p)[0] & 0x80)                                             \
1075    ? *(p)++                                                     \
1076    : ! ((p)[0] & 0x20)                                          \
1077    ? ((p) += 2,                                                 \
1078       ((((p)[-2] & 0x1F) << 6)                                  \
1079        | ((p)[-1] & 0x3F)                                       \
1080        | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))    \
1081    : ! ((p)[0] & 0x10)                                          \
1082    ? ((p) += 3,                                                 \
1083       ((((p)[-3] & 0x0F) << 12)                                 \
1084        | (((p)[-2] & 0x3F) << 6)                                \
1085        | ((p)[-1] & 0x3F)))                                     \
1086    : ! ((p)[0] & 0x08)                                          \
1087    ? ((p) += 4,                                                 \
1088       ((((p)[-4] & 0xF) << 18)                                  \
1089        | (((p)[-3] & 0x3F) << 12)                               \
1090        | (((p)[-2] & 0x3F) << 6)                                \
1091        | ((p)[-1] & 0x3F)))                                     \
1092    : ((p) += 5,                                                 \
1093       ((((p)[-4] & 0x3F) << 18)                                 \
1094        | (((p)[-3] & 0x3F) << 12)                               \
1095        | (((p)[-2] & 0x3F) << 6)                                \
1096        | ((p)[-1] & 0x3F))))
1097
1098
1099 static void
1100 coding_set_source (coding)
1101      struct coding_system *coding;
1102 {
1103   if (BUFFERP (coding->src_object))
1104     {
1105       struct buffer *buf = XBUFFER (coding->src_object);
1106
1107       if (coding->src_pos < 0)
1108         coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
1109       else
1110         coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
1111     }
1112   else if (STRINGP (coding->src_object))
1113     {
1114       coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
1115     }
1116   else
1117     /* Otherwise, the source is C string and is never relocated
1118        automatically.  Thus we don't have to update anything.  */
1119     ;
1120 }
1121
1122 static void
1123 coding_set_destination (coding)
1124      struct coding_system *coding;
1125 {
1126   if (BUFFERP (coding->dst_object))
1127     {
1128       if (coding->src_pos < 0)
1129         {
1130           coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
1131           coding->dst_bytes = (GAP_END_ADDR
1132                                - (coding->src_bytes - coding->consumed)
1133                                - coding->destination);
1134         }
1135       else
1136         {
1137           /* We are sure that coding->dst_pos_byte is before the gap
1138              of the buffer. */
1139           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
1140                                  + coding->dst_pos_byte - BEG_BYTE);
1141           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
1142                                - coding->destination);
1143         }
1144     }
1145   else
1146     /* Otherwise, the destination is C string and is never relocated
1147        automatically.  Thus we don't have to update anything.  */
1148     ;
1149 }
1150
1151
1152 static void
1153 coding_alloc_by_realloc (coding, bytes)
1154      struct coding_system *coding;
1155      EMACS_INT bytes;
1156 {
1157   coding->destination = (unsigned char *) xrealloc (coding->destination,
1158                                                     coding->dst_bytes + bytes);
1159   coding->dst_bytes += bytes;
1160 }
1161
1162 static void
1163 coding_alloc_by_making_gap (coding, gap_head_used, bytes)
1164      struct coding_system *coding;
1165      EMACS_INT gap_head_used, bytes;
1166 {
1167   if (EQ (coding->src_object, coding->dst_object))
1168     {
1169       /* The gap may contain the produced data at the head and not-yet
1170          consumed data at the tail.  To preserve those data, we at
1171          first make the gap size to zero, then increase the gap
1172          size.  */
1173       EMACS_INT add = GAP_SIZE;
1174
1175       GPT += gap_head_used, GPT_BYTE += gap_head_used;
1176       GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
1177       make_gap (bytes);
1178       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
1179       GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
1180     }
1181   else
1182     {
1183       Lisp_Object this_buffer;
1184
1185       this_buffer = Fcurrent_buffer ();
1186       set_buffer_internal (XBUFFER (coding->dst_object));
1187       make_gap (bytes);
1188       set_buffer_internal (XBUFFER (this_buffer));
1189     }
1190 }
1191
1192
1193 static unsigned char *
1194 alloc_destination (coding, nbytes, dst)
1195      struct coding_system *coding;
1196      EMACS_INT nbytes;
1197      unsigned char *dst;
1198 {
1199   EMACS_INT offset = dst - coding->destination;
1200
1201   if (BUFFERP (coding->dst_object))
1202     {
1203       struct buffer *buf = XBUFFER (coding->dst_object);
1204
1205       coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
1206     }
1207   else
1208     coding_alloc_by_realloc (coding, nbytes);
1209   coding_set_destination (coding);
1210   dst = coding->destination + offset;
1211   return dst;
1212 }
1213
1214 /** Macros for annotations.  */
1215
1216 /* An annotation data is stored in the array coding->charbuf in this
1217    format:
1218      [ -LENGTH ANNOTATION_MASK NCHARS ... ]
1219    LENGTH is the number of elements in the annotation.
1220    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1221    NCHARS is the number of characters in the text annotated.
1222
1223    The format of the following elements depend on ANNOTATION_MASK.
1224
1225    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1226    follows:
1227      ... NBYTES METHOD [ COMPOSITION-COMPONENTS ... ]
1228
1229    NBYTES is the number of bytes specified in the header part of
1230    old-style emacs-mule encoding, or 0 for the other kind of
1231    composition.
1232
1233    METHOD is one of enum composition_method.
1234
1235    Optionnal COMPOSITION-COMPONENTS are characters and composition
1236    rules.
1237
1238    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1239    follows.
1240
1241    If ANNOTATION_MASK is 0, this annotation is just a space holder to
1242    recover from an invalid annotation, and should be skipped by
1243    produce_annotation.  */
1244
1245 /* Maximum length of the header of annotation data.  */
1246 #define MAX_ANNOTATION_LENGTH 5
1247
1248 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars)     \
1249   do {                                                  \
1250     *(buf)++ = -(len);                                  \
1251     *(buf)++ = (mask);                                  \
1252     *(buf)++ = (nchars);                                \
1253     coding->annotated = 1;                              \
1254   } while (0);
1255
1256 #define ADD_COMPOSITION_DATA(buf, nchars, nbytes, method)                   \
1257   do {                                                                      \
1258     ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1259     *buf++ = nbytes;                                                        \
1260     *buf++ = method;                                                        \
1261   } while (0)
1262
1263
1264 #define ADD_CHARSET_DATA(buf, nchars, id)                               \
1265   do {                                                                  \
1266     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1267     *buf++ = id;                                                        \
1268   } while (0)
1269
1270 \f
1271 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1272
1273
1274
1275 \f
1276 /*** 3. UTF-8 ***/
1277
1278 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1279    Check if a text is encoded in UTF-8.  If it is, return 1, else
1280    return 0.  */
1281
1282 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1283 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1284 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1285 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1286 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1287 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1288
1289 #define UTF_BOM 0xFEFF
1290 #define UTF_8_BOM_1 0xEF
1291 #define UTF_8_BOM_2 0xBB
1292 #define UTF_8_BOM_3 0xBF
1293
1294 static int
1295 detect_coding_utf_8 (coding, detect_info)
1296      struct coding_system *coding;
1297      struct coding_detection_info *detect_info;
1298 {
1299   const unsigned char *src = coding->source, *src_base;
1300   const unsigned char *src_end = coding->source + coding->src_bytes;
1301   int multibytep = coding->src_multibyte;
1302   int consumed_chars = 0;
1303   int bom_found = 0;
1304   int found = 0;
1305
1306   detect_info->checked |= CATEGORY_MASK_UTF_8;
1307   /* A coding system of this category is always ASCII compatible.  */
1308   src += coding->head_ascii;
1309
1310   while (1)
1311     {
1312       int c, c1, c2, c3, c4;
1313
1314       src_base = src;
1315       ONE_MORE_BYTE (c);
1316       if (c < 0 || UTF_8_1_OCTET_P (c))
1317         continue;
1318       ONE_MORE_BYTE (c1);
1319       if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
1320         break;
1321       if (UTF_8_2_OCTET_LEADING_P (c))
1322         {
1323           found = 1;
1324           continue;
1325         }
1326       ONE_MORE_BYTE (c2);
1327       if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1328         break;
1329       if (UTF_8_3_OCTET_LEADING_P (c))
1330         {
1331           found = 1;
1332           if (src_base == coding->source
1333               && c == UTF_8_BOM_1 && c1 == UTF_8_BOM_2 && c2 == UTF_8_BOM_3)
1334             bom_found = 1;
1335           continue;
1336         }
1337       ONE_MORE_BYTE (c3);
1338       if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1339         break;
1340       if (UTF_8_4_OCTET_LEADING_P (c))
1341         {
1342           found = 1;
1343           continue;
1344         }
1345       ONE_MORE_BYTE (c4);
1346       if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1347         break;
1348       if (UTF_8_5_OCTET_LEADING_P (c))
1349         {
1350           found = 1;
1351           continue;
1352         }
1353       break;
1354     }
1355   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1356   return 0;
1357
1358  no_more_source:
1359   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1360     {
1361       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1362       return 0;
1363     }
1364   if (bom_found)
1365     {
1366       /* The first character 0xFFFE doesn't necessarily mean a BOM.  */
1367       detect_info->found |= CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG;
1368     }
1369   else
1370     {
1371       detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG;
1372       if (found)
1373         detect_info->found |= CATEGORY_MASK_UTF_8_NOSIG;
1374     }
1375   return 1;
1376 }
1377
1378
1379 static void
1380 decode_coding_utf_8 (coding)
1381      struct coding_system *coding;
1382 {
1383   const unsigned char *src = coding->source + coding->consumed;
1384   const unsigned char *src_end = coding->source + coding->src_bytes;
1385   const unsigned char *src_base;
1386   int *charbuf = coding->charbuf + coding->charbuf_used;
1387   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1388   int consumed_chars = 0, consumed_chars_base = 0;
1389   int multibytep = coding->src_multibyte;
1390   enum utf_bom_type bom = CODING_UTF_8_BOM (coding);
1391   Lisp_Object attr, charset_list;
1392   int eol_crlf =
1393     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1394   int byte_after_cr = -1;
1395
1396   CODING_GET_INFO (coding, attr, charset_list);
1397
1398   if (bom != utf_without_bom)
1399     {
1400       int c1, c2, c3;
1401
1402       src_base = src;
1403       ONE_MORE_BYTE (c1);
1404       if (! UTF_8_3_OCTET_LEADING_P (c1))
1405         src = src_base;
1406       else
1407         {
1408           ONE_MORE_BYTE (c2);
1409           if (! UTF_8_EXTRA_OCTET_P (c2))
1410             src = src_base;
1411           else
1412             {
1413               ONE_MORE_BYTE (c3);
1414               if (! UTF_8_EXTRA_OCTET_P (c3))
1415                 src = src_base;
1416               else
1417                 {
1418                   if ((c1 != UTF_8_BOM_1)
1419                       || (c2 != UTF_8_BOM_2) || (c3 != UTF_8_BOM_3))
1420                     src = src_base;
1421                   else
1422                     CODING_UTF_8_BOM (coding) = utf_without_bom;
1423                 }
1424             }
1425         }
1426     }
1427   CODING_UTF_8_BOM (coding) = utf_without_bom;
1428
1429
1430
1431   while (1)
1432     {
1433       int c, c1, c2, c3, c4, c5;
1434
1435       src_base = src;
1436       consumed_chars_base = consumed_chars;
1437
1438       if (charbuf >= charbuf_end)
1439         {
1440           if (byte_after_cr >= 0)
1441             src_base--;
1442           break;
1443         }
1444
1445       if (byte_after_cr >= 0)
1446         c1 = byte_after_cr, byte_after_cr = -1;
1447       else
1448         ONE_MORE_BYTE (c1);
1449       if (c1 < 0)
1450         {
1451           c = - c1;
1452         }
1453       else if (UTF_8_1_OCTET_P(c1))
1454         {
1455           if (eol_crlf && c1 == '\r')
1456             ONE_MORE_BYTE (byte_after_cr);
1457           c = c1;
1458         }
1459       else
1460         {
1461           ONE_MORE_BYTE (c2);
1462           if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1463             goto invalid_code;
1464           if (UTF_8_2_OCTET_LEADING_P (c1))
1465             {
1466               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1467               /* Reject overlong sequences here and below.  Encoders
1468                  producing them are incorrect, they can be misleading,
1469                  and they mess up read/write invariance.  */
1470               if (c < 128)
1471                 goto invalid_code;
1472             }
1473           else
1474             {
1475               ONE_MORE_BYTE (c3);
1476               if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1477                 goto invalid_code;
1478               if (UTF_8_3_OCTET_LEADING_P (c1))
1479                 {
1480                   c = (((c1 & 0xF) << 12)
1481                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1482                   if (c < 0x800
1483                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1484                     goto invalid_code;
1485                 }
1486               else
1487                 {
1488                   ONE_MORE_BYTE (c4);
1489                   if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1490                     goto invalid_code;
1491                   if (UTF_8_4_OCTET_LEADING_P (c1))
1492                     {
1493                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1494                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1495                     if (c < 0x10000)
1496                       goto invalid_code;
1497                     }
1498                   else
1499                     {
1500                       ONE_MORE_BYTE (c5);
1501                       if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
1502                         goto invalid_code;
1503                       if (UTF_8_5_OCTET_LEADING_P (c1))
1504                         {
1505                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1506                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1507                                | (c5 & 0x3F));
1508                           if ((c > MAX_CHAR) || (c < 0x200000))
1509                             goto invalid_code;
1510                         }
1511                       else
1512                         goto invalid_code;
1513                     }
1514                 }
1515             }
1516         }
1517
1518       *charbuf++ = c;
1519       continue;
1520
1521     invalid_code:
1522       src = src_base;
1523       consumed_chars = consumed_chars_base;
1524       ONE_MORE_BYTE (c);
1525       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1526       coding->errors++;
1527     }
1528
1529  no_more_source:
1530   coding->consumed_char += consumed_chars_base;
1531   coding->consumed = src_base - coding->source;
1532   coding->charbuf_used = charbuf - coding->charbuf;
1533 }
1534
1535
1536 static int
1537 encode_coding_utf_8 (coding)
1538      struct coding_system *coding;
1539 {
1540   int multibytep = coding->dst_multibyte;
1541   int *charbuf = coding->charbuf;
1542   int *charbuf_end = charbuf + coding->charbuf_used;
1543   unsigned char *dst = coding->destination + coding->produced;
1544   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1545   int produced_chars = 0;
1546   int c;
1547
1548   if (CODING_UTF_8_BOM (coding) == utf_with_bom)
1549     {
1550       ASSURE_DESTINATION (3);
1551       EMIT_THREE_BYTES (UTF_8_BOM_1, UTF_8_BOM_2, UTF_8_BOM_3);
1552       CODING_UTF_8_BOM (coding) = utf_without_bom;
1553     }
1554
1555   if (multibytep)
1556     {
1557       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1558
1559       while (charbuf < charbuf_end)
1560         {
1561           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1562
1563           ASSURE_DESTINATION (safe_room);
1564           c = *charbuf++;
1565           if (CHAR_BYTE8_P (c))
1566             {
1567               c = CHAR_TO_BYTE8 (c);
1568               EMIT_ONE_BYTE (c);
1569             }
1570           else
1571             {
1572               CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
1573               for (p = str; p < pend; p++)
1574                 EMIT_ONE_BYTE (*p);
1575             }
1576         }
1577     }
1578   else
1579     {
1580       int safe_room = MAX_MULTIBYTE_LENGTH;
1581
1582       while (charbuf < charbuf_end)
1583         {
1584           ASSURE_DESTINATION (safe_room);
1585           c = *charbuf++;
1586           if (CHAR_BYTE8_P (c))
1587             *dst++ = CHAR_TO_BYTE8 (c);
1588           else
1589             CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
1590           produced_chars++;
1591         }
1592     }
1593   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1594   coding->produced_char += produced_chars;
1595   coding->produced = dst - coding->destination;
1596   return 0;
1597 }
1598
1599
1600 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1601    Check if a text is encoded in one of UTF-16 based coding systems.
1602    If it is, return 1, else return 0.  */
1603
1604 #define UTF_16_HIGH_SURROGATE_P(val) \
1605   (((val) & 0xFC00) == 0xD800)
1606
1607 #define UTF_16_LOW_SURROGATE_P(val) \
1608   (((val) & 0xFC00) == 0xDC00)
1609
1610 #define UTF_16_INVALID_P(val)   \
1611   (((val) == 0xFFFE)            \
1612    || ((val) == 0xFFFF)         \
1613    || UTF_16_LOW_SURROGATE_P (val))
1614
1615
1616 static int
1617 detect_coding_utf_16 (coding, detect_info)
1618      struct coding_system *coding;
1619      struct coding_detection_info *detect_info;
1620 {
1621   const unsigned char *src = coding->source, *src_base = src;
1622   const unsigned char *src_end = coding->source + coding->src_bytes;
1623   int multibytep = coding->src_multibyte;
1624   int consumed_chars = 0;
1625   int c1, c2;
1626
1627   detect_info->checked |= CATEGORY_MASK_UTF_16;
1628   if (coding->mode & CODING_MODE_LAST_BLOCK
1629       && (coding->src_chars & 1))
1630     {
1631       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1632       return 0;
1633     }
1634
1635   TWO_MORE_BYTES (c1, c2);
1636   if ((c1 == 0xFF) && (c2 == 0xFE))
1637     {
1638       detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1639                              | CATEGORY_MASK_UTF_16_AUTO);
1640       detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1641                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1642                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1643     }
1644   else if ((c1 == 0xFE) && (c2 == 0xFF))
1645     {
1646       detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1647                              | CATEGORY_MASK_UTF_16_AUTO);
1648       detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1649                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1650                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1651     }
1652   else if (c2 < 0)
1653     {
1654       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1655       return 0;
1656     }
1657   else
1658     {
1659       /* We check the dispersion of Eth and Oth bytes where E is even and
1660          O is odd.  If both are high, we assume binary data.*/
1661       unsigned char e[256], o[256];
1662       unsigned e_num = 1, o_num = 1;
1663
1664       memset (e, 0, 256);
1665       memset (o, 0, 256);
1666       e[c1] = 1;
1667       o[c2] = 1;
1668
1669       detect_info->rejected |= (CATEGORY_MASK_UTF_16_AUTO
1670                                 |CATEGORY_MASK_UTF_16_BE
1671                                 | CATEGORY_MASK_UTF_16_LE);
1672
1673       while ((detect_info->rejected & CATEGORY_MASK_UTF_16)
1674              != CATEGORY_MASK_UTF_16)
1675         {
1676           TWO_MORE_BYTES (c1, c2);
1677           if (c2 < 0)
1678             break;
1679           if (! e[c1])
1680             {
1681               e[c1] = 1;
1682               e_num++;
1683               if (e_num >= 128)
1684                 detect_info->rejected |= CATEGORY_MASK_UTF_16_BE_NOSIG;
1685             }
1686           if (! o[c2])
1687             {
1688               o[c2] = 1;
1689               o_num++;
1690               if (o_num >= 128)
1691                 detect_info->rejected |= CATEGORY_MASK_UTF_16_LE_NOSIG;
1692             }
1693         }
1694       return 0;
1695     }
1696
1697  no_more_source:
1698   return 1;
1699 }
1700
1701 static void
1702 decode_coding_utf_16 (coding)
1703      struct coding_system *coding;
1704 {
1705   const unsigned char *src = coding->source + coding->consumed;
1706   const unsigned char *src_end = coding->source + coding->src_bytes;
1707   const unsigned char *src_base;
1708   int *charbuf = coding->charbuf + coding->charbuf_used;
1709   /* We may produces at most 3 chars in one loop.  */
1710   int *charbuf_end = coding->charbuf + coding->charbuf_size - 2;
1711   int consumed_chars = 0, consumed_chars_base = 0;
1712   int multibytep = coding->src_multibyte;
1713   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1714   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1715   int surrogate = CODING_UTF_16_SURROGATE (coding);
1716   Lisp_Object attr, charset_list;
1717   int eol_crlf =
1718     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1719   int byte_after_cr1 = -1, byte_after_cr2 = -1;
1720
1721   CODING_GET_INFO (coding, attr, charset_list);
1722
1723   if (bom == utf_with_bom)
1724     {
1725       int c, c1, c2;
1726
1727       src_base = src;
1728       ONE_MORE_BYTE (c1);
1729       ONE_MORE_BYTE (c2);
1730       c = (c1 << 8) | c2;
1731
1732       if (endian == utf_16_big_endian
1733           ? c != 0xFEFF : c != 0xFFFE)
1734         {
1735           /* The first two bytes are not BOM.  Treat them as bytes
1736              for a normal character.  */
1737           src = src_base;
1738           coding->errors++;
1739         }
1740       CODING_UTF_16_BOM (coding) = utf_without_bom;
1741     }
1742   else if (bom == utf_detect_bom)
1743     {
1744       /* We have already tried to detect BOM and failed in
1745          detect_coding.  */
1746       CODING_UTF_16_BOM (coding) = utf_without_bom;
1747     }
1748
1749   while (1)
1750     {
1751       int c, c1, c2;
1752
1753       src_base = src;
1754       consumed_chars_base = consumed_chars;
1755
1756       if (charbuf >= charbuf_end)
1757         {
1758           if (byte_after_cr1 >= 0)
1759             src_base -= 2;
1760           break;
1761         }
1762
1763       if (byte_after_cr1 >= 0)
1764         c1 = byte_after_cr1, byte_after_cr1 = -1;
1765       else
1766         ONE_MORE_BYTE (c1);
1767       if (c1 < 0)
1768         {
1769           *charbuf++ = -c1;
1770           continue;
1771         }
1772       if (byte_after_cr2 >= 0)
1773         c2 = byte_after_cr2, byte_after_cr2 = -1;
1774       else
1775         ONE_MORE_BYTE (c2);
1776       if (c2 < 0)
1777         {
1778           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1779           *charbuf++ = -c2;
1780           continue;
1781         }
1782       c = (endian == utf_16_big_endian
1783            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1784
1785       if (surrogate)
1786         {
1787           if (! UTF_16_LOW_SURROGATE_P (c))
1788             {
1789               if (endian == utf_16_big_endian)
1790                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1791               else
1792                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1793               *charbuf++ = c1;
1794               *charbuf++ = c2;
1795               coding->errors++;
1796               if (UTF_16_HIGH_SURROGATE_P (c))
1797                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1798               else
1799                 *charbuf++ = c;
1800             }
1801           else
1802             {
1803               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1804               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1805               *charbuf++ = 0x10000 + c;
1806             }
1807         }
1808       else
1809         {
1810           if (UTF_16_HIGH_SURROGATE_P (c))
1811             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1812           else
1813             {
1814               if (eol_crlf && c == '\r')
1815                 {
1816                   ONE_MORE_BYTE (byte_after_cr1);
1817                   ONE_MORE_BYTE (byte_after_cr2);
1818                 }
1819               *charbuf++ = c;
1820             }
1821         }
1822     }
1823
1824  no_more_source:
1825   coding->consumed_char += consumed_chars_base;
1826   coding->consumed = src_base - coding->source;
1827   coding->charbuf_used = charbuf - coding->charbuf;
1828 }
1829
1830 static int
1831 encode_coding_utf_16 (coding)
1832      struct coding_system *coding;
1833 {
1834   int multibytep = coding->dst_multibyte;
1835   int *charbuf = coding->charbuf;
1836   int *charbuf_end = charbuf + coding->charbuf_used;
1837   unsigned char *dst = coding->destination + coding->produced;
1838   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1839   int safe_room = 8;
1840   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1841   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1842   int produced_chars = 0;
1843   Lisp_Object attrs, charset_list;
1844   int c;
1845
1846   CODING_GET_INFO (coding, attrs, charset_list);
1847
1848   if (bom != utf_without_bom)
1849     {
1850       ASSURE_DESTINATION (safe_room);
1851       if (big_endian)
1852         EMIT_TWO_BYTES (0xFE, 0xFF);
1853       else
1854         EMIT_TWO_BYTES (0xFF, 0xFE);
1855       CODING_UTF_16_BOM (coding) = utf_without_bom;
1856     }
1857
1858   while (charbuf < charbuf_end)
1859     {
1860       ASSURE_DESTINATION (safe_room);
1861       c = *charbuf++;
1862       if (c > MAX_UNICODE_CHAR)
1863         c = coding->default_char;
1864
1865       if (c < 0x10000)
1866         {
1867           if (big_endian)
1868             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1869           else
1870             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1871         }
1872       else
1873         {
1874           int c1, c2;
1875
1876           c -= 0x10000;
1877           c1 = (c >> 10) + 0xD800;
1878           c2 = (c & 0x3FF) + 0xDC00;
1879           if (big_endian)
1880             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1881           else
1882             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1883         }
1884     }
1885   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1886   coding->produced = dst - coding->destination;
1887   coding->produced_char += produced_chars;
1888   return 0;
1889 }
1890
1891 \f
1892 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1893
1894 /* Emacs' internal format for representation of multiple character
1895    sets is a kind of multi-byte encoding, i.e. characters are
1896    represented by variable-length sequences of one-byte codes.
1897
1898    ASCII characters and control characters (e.g. `tab', `newline') are
1899    represented by one-byte sequences which are their ASCII codes, in
1900    the range 0x00 through 0x7F.
1901
1902    8-bit characters of the range 0x80..0x9F are represented by
1903    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1904    code + 0x20).
1905
1906    8-bit characters of the range 0xA0..0xFF are represented by
1907    one-byte sequences which are their 8-bit code.
1908
1909    The other characters are represented by a sequence of `base
1910    leading-code', optional `extended leading-code', and one or two
1911    `position-code's.  The length of the sequence is determined by the
1912    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1913    whereas extended leading-code and position-code take the range 0xA0
1914    through 0xFF.  See `charset.h' for more details about leading-code
1915    and position-code.
1916
1917    --- CODE RANGE of Emacs' internal format ---
1918    character set        range
1919    -------------        -----
1920    ascii                0x00..0x7F
1921    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1922    eight-bit-graphic    0xA0..0xBF
1923    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1924    ---------------------------------------------
1925
1926    As this is the internal character representation, the format is
1927    usually not used externally (i.e. in a file or in a data sent to a
1928    process).  But, it is possible to have a text externally in this
1929    format (i.e. by encoding by the coding system `emacs-mule').
1930
1931    In that case, a sequence of one-byte codes has a slightly different
1932    form.
1933
1934    At first, all characters in eight-bit-control are represented by
1935    one-byte sequences which are their 8-bit code.
1936
1937    Next, character composition data are represented by the byte
1938    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1939    where,
1940         METHOD is 0xF2 plus one of composition method (enum
1941         composition_method),
1942
1943         BYTES is 0xA0 plus a byte length of this composition data,
1944
1945         CHARS is 0xA0 plus a number of characters composed by this
1946         data,
1947
1948         COMPONENTs are characters of multibye form or composition
1949         rules encoded by two-byte of ASCII codes.
1950
1951    In addition, for backward compatibility, the following formats are
1952    also recognized as composition data on decoding.
1953
1954    0x80 MSEQ ...
1955    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1956
1957    Here,
1958         MSEQ is a multibyte form but in these special format:
1959           ASCII: 0xA0 ASCII_CODE+0x80,
1960           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1961         RULE is a one byte code of the range 0xA0..0xF0 that
1962         represents a composition rule.
1963   */
1964
1965 char emacs_mule_bytes[256];
1966
1967
1968 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1969    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
1970    else return 0.  */
1971
1972 static int
1973 detect_coding_emacs_mule (coding, detect_info)
1974      struct coding_system *coding;
1975      struct coding_detection_info *detect_info;
1976 {
1977   const unsigned char *src = coding->source, *src_base;
1978   const unsigned char *src_end = coding->source + coding->src_bytes;
1979   int multibytep = coding->src_multibyte;
1980   int consumed_chars = 0;
1981   int c;
1982   int found = 0;
1983
1984   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1985   /* A coding system of this category is always ASCII compatible.  */
1986   src += coding->head_ascii;
1987
1988   while (1)
1989     {
1990       src_base = src;
1991       ONE_MORE_BYTE (c);
1992       if (c < 0)
1993         continue;
1994       if (c == 0x80)
1995         {
1996           /* Perhaps the start of composite character.  We simply skip
1997              it because analyzing it is too heavy for detecting.  But,
1998              at least, we check that the composite character
1999              constitutes of more than 4 bytes.  */
2000           const unsigned char *src_base;
2001
2002         repeat:
2003           src_base = src;
2004           do
2005             {
2006               ONE_MORE_BYTE (c);
2007             }
2008           while (c >= 0xA0);
2009
2010           if (src - src_base <= 4)
2011             break;
2012           found = CATEGORY_MASK_EMACS_MULE;
2013           if (c == 0x80)
2014             goto repeat;
2015         }
2016
2017       if (c < 0x80)
2018         {
2019           if (c < 0x20
2020               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
2021             break;
2022         }
2023       else
2024         {
2025           int more_bytes = emacs_mule_bytes[*src_base] - 1;
2026
2027           while (more_bytes > 0)
2028             {
2029               ONE_MORE_BYTE (c);
2030               if (c < 0xA0)
2031                 {
2032                   src--;        /* Unread the last byte.  */
2033                   break;
2034                 }
2035               more_bytes--;
2036             }
2037           if (more_bytes != 0)
2038             break;
2039           found = CATEGORY_MASK_EMACS_MULE;
2040         }
2041     }
2042   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2043   return 0;
2044
2045  no_more_source:
2046   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
2047     {
2048       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2049       return 0;
2050     }
2051   detect_info->found |= found;
2052   return 1;
2053 }
2054
2055
2056 /* Parse emacs-mule multibyte sequence at SRC and return the decoded
2057    character.  If CMP_STATUS indicates that we must expect MSEQ or
2058    RULE described above, decode it and return the negative value of
2059    the deocded character or rule.  If an invalid byte is found, return
2060    -1.  If SRC is too short, return -2.  */
2061
2062 int
2063 emacs_mule_char (coding, src, nbytes, nchars, id, cmp_status)
2064      struct coding_system *coding;
2065      const unsigned char *src;
2066      int *nbytes, *nchars, *id;
2067      struct composition_status *cmp_status;
2068 {
2069   const unsigned char *src_end = coding->source + coding->src_bytes;
2070   const unsigned char *src_base = src;
2071   int multibytep = coding->src_multibyte;
2072   struct charset *charset;
2073   unsigned code;
2074   int c;
2075   int consumed_chars = 0;
2076   int mseq_found = 0;
2077
2078   ONE_MORE_BYTE (c);
2079   if (c < 0)
2080     {
2081       c = -c;
2082       charset = emacs_mule_charset[0];
2083     }
2084   else
2085     {
2086       if (c >= 0xA0)
2087         {
2088           if (cmp_status->state != COMPOSING_NO
2089               && cmp_status->old_form)
2090             {
2091               if (cmp_status->state == COMPOSING_CHAR)
2092                 {
2093                   if (c == 0xA0)
2094                     {
2095                       ONE_MORE_BYTE (c);
2096                       c -= 0x80;
2097                       if (c < 0)
2098                         goto invalid_code;
2099                     }
2100                   else
2101                     c -= 0x20;
2102                   mseq_found = 1;
2103                 }
2104               else
2105                 {
2106                   *nbytes = src - src_base;
2107                   *nchars = consumed_chars;
2108                   return -c;
2109                 }
2110             }
2111           else
2112             goto invalid_code;
2113         }
2114
2115       switch (emacs_mule_bytes[c])
2116         {
2117         case 2:
2118           if (! (charset = emacs_mule_charset[c]))
2119             goto invalid_code;
2120           ONE_MORE_BYTE (c);
2121           if (c < 0xA0)
2122             goto invalid_code;
2123           code = c & 0x7F;
2124           break;
2125
2126         case 3:
2127           if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
2128               || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
2129             {
2130               ONE_MORE_BYTE (c);
2131               if (c < 0xA0 || ! (charset = emacs_mule_charset[c]))
2132                 goto invalid_code;
2133               ONE_MORE_BYTE (c);
2134               if (c < 0xA0)
2135                 goto invalid_code;
2136               code = c & 0x7F;
2137             }
2138           else
2139             {
2140               if (! (charset = emacs_mule_charset[c]))
2141                 goto invalid_code;
2142               ONE_MORE_BYTE (c);
2143               if (c < 0xA0)
2144                 goto invalid_code;
2145               code = (c & 0x7F) << 8;
2146               ONE_MORE_BYTE (c);
2147               if (c < 0xA0)
2148                 goto invalid_code;
2149               code |= c & 0x7F;
2150             }
2151           break;
2152
2153         case 4:
2154           ONE_MORE_BYTE (c);
2155           if (c < 0 || ! (charset = emacs_mule_charset[c]))
2156             goto invalid_code;
2157           ONE_MORE_BYTE (c);
2158           if (c < 0xA0)
2159             goto invalid_code;
2160           code = (c & 0x7F) << 8;
2161           ONE_MORE_BYTE (c);
2162           if (c < 0xA0)
2163             goto invalid_code;
2164           code |= c & 0x7F;
2165           break;
2166
2167         case 1:
2168           code = c;
2169           charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
2170                                      ? charset_ascii : charset_eight_bit);
2171           break;
2172
2173         default:
2174           abort ();
2175         }
2176       c = DECODE_CHAR (charset, code);
2177       if (c < 0)
2178         goto invalid_code;
2179     }
2180   *nbytes = src - src_base;
2181   *nchars = consumed_chars;
2182   if (id)
2183     *id = charset->id;
2184   return (mseq_found ? -c : c);
2185
2186  no_more_source:
2187   return -2;
2188
2189  invalid_code:
2190   return -1;
2191 }
2192
2193
2194 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2195
2196 /* Handle these composition sequence ('|': the end of header elements,
2197    BYTES and CHARS >= 0xA0):
2198
2199    (1) relative composition: 0x80 0xF2 BYTES CHARS | CHAR ...
2200    (2) altchar composition:  0x80 0xF4 BYTES CHARS | ALT ... ALT CHAR ...
2201    (3) alt&rule composition: 0x80 0xF5 BYTES CHARS | ALT RULE ... ALT CHAR ...
2202
2203    and these old form:
2204
2205    (4) relative composition: 0x80 | MSEQ ... MSEQ
2206    (5) rulebase composition: 0x80 0xFF | MSEQ MRULE ... MSEQ
2207
2208    When the starter 0x80 and the following header elements are found,
2209    this annotation header is produced.
2210
2211         [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS NBYTES METHOD ]
2212
2213    NCHARS is CHARS - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2214    NBYTES is BYTES - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2215
2216    Then, upon reading the following elements, these codes are produced
2217    until the composition end is found:
2218
2219    (1) CHAR ... CHAR
2220    (2) ALT ... ALT CHAR ... CHAR
2221    (3) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT CHAR ... CHAR
2222    (4) CHAR ... CHAR
2223    (5) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
2224
2225    When the composition end is found, LENGTH and NCHARS in the
2226    annotation header is updated as below:
2227
2228    (1) LENGTH: unchanged, NCHARS: unchanged
2229    (2) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2230    (3) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2231    (4) LENGTH: unchanged,  NCHARS: number of CHARs
2232    (5) LENGTH: unchanged,  NCHARS: number of CHARs
2233
2234    If an error is found while composing, the annotation header is
2235    changed to the original composition header (plus filler -1s) as
2236    below:
2237
2238    (1),(2),(3)  [ 0x80 0xF2+METHOD BYTES CHARS -1 ]
2239    (5)          [ 0x80 0xFF -1 -1- -1 ]
2240
2241    and the sequence [ -2 DECODED-RULE ] is changed to the original
2242    byte sequence as below:
2243         o the original byte sequence is B: [ B -1 ]
2244         o the original byte sequence is B1 B2: [ B1 B2 ]
2245
2246    Most of the routines are implemented by macros because many
2247    variables and labels in the caller decode_coding_emacs_mule must be
2248    accessible, and they are usually called just once (thus doesn't
2249    increase the size of compiled object).  */
2250
2251 /* Decode a composition rule represented by C as a component of
2252    composition sequence of Emacs 20 style.  Set RULE to the decoded
2253    rule. */
2254
2255 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(c, rule)  \
2256   do {                                                  \
2257     int gref, nref;                                     \
2258                                                         \
2259     c -= 0xA0;                                          \
2260     if (c < 0 || c >= 81)                               \
2261       goto invalid_code;                                \
2262     gref = c / 9, nref = c % 9;                         \
2263     if (gref == 4) gref = 10;                           \
2264     if (nref == 4) nref = 10;                           \
2265     rule = COMPOSITION_ENCODE_RULE (gref, nref);        \
2266   } while (0)
2267
2268
2269 /* Decode a composition rule represented by C and the following byte
2270    at SRC as a component of composition sequence of Emacs 21 style.
2271    Set RULE to the decoded rule.  */
2272
2273 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(c, rule)  \
2274   do {                                                  \
2275     int gref, nref;                                     \
2276                                                         \
2277     gref = c - 0x20;                                    \
2278     if (gref < 0 || gref >= 81)                         \
2279       goto invalid_code;                                \
2280     ONE_MORE_BYTE (c);                                  \
2281     nref = c - 0x20;                                    \
2282     if (nref < 0 || nref >= 81)                         \
2283       goto invalid_code;                                \
2284     rule = COMPOSITION_ENCODE_RULE (gref, nref);        \
2285   } while (0)
2286
2287
2288 /* Start of Emacs 21 style format.  The first three bytes at SRC are
2289    (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is the
2290    byte length of this composition information, CHARS is the number of
2291    characters composed by this composition.  */
2292
2293 #define DECODE_EMACS_MULE_21_COMPOSITION()                              \
2294   do {                                                                  \
2295     enum composition_method method = c - 0xF2;                          \
2296     int *charbuf_base = charbuf;                                        \
2297     int nbytes, nchars;                                                 \
2298                                                                         \
2299     ONE_MORE_BYTE (c);                                                  \
2300     if (c < 0)                                                          \
2301       goto invalid_code;                                                \
2302     nbytes = c - 0xA0;                                                  \
2303     if (nbytes < 3 || (method == COMPOSITION_RELATIVE && nbytes != 4))  \
2304       goto invalid_code;                                                \
2305     ONE_MORE_BYTE (c);                                                  \
2306     nchars = c - 0xA0;                                                  \
2307     if (nchars <= 0 || nchars >= MAX_COMPOSITION_COMPONENTS)            \
2308       goto invalid_code;                                                \
2309     cmp_status->old_form = 0;                                           \
2310     cmp_status->method = method;                                        \
2311     if (method == COMPOSITION_RELATIVE)                                 \
2312       cmp_status->state = COMPOSING_CHAR;                               \
2313     else                                                                \
2314       cmp_status->state = COMPOSING_COMPONENT_CHAR;                     \
2315     cmp_status->length = MAX_ANNOTATION_LENGTH;                         \
2316     cmp_status->nchars = nchars;                                        \
2317     cmp_status->ncomps = nbytes - 4;                                    \
2318     ADD_COMPOSITION_DATA (charbuf, nchars, nbytes, method);             \
2319   } while (0)
2320
2321
2322 /* Start of Emacs 20 style format for relative composition.  */
2323
2324 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION()             \
2325   do {                                                          \
2326     cmp_status->old_form = 1;                                   \
2327     cmp_status->method = COMPOSITION_RELATIVE;                  \
2328     cmp_status->state = COMPOSING_CHAR;                         \
2329     cmp_status->length = MAX_ANNOTATION_LENGTH;                 \
2330     cmp_status->nchars = cmp_status->ncomps = 0;                \
2331     ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);   \
2332   } while (0)
2333
2334
2335 /* Start of Emacs 20 style format for rule-base composition.  */
2336
2337 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION()             \
2338   do {                                                          \
2339     cmp_status->old_form = 1;                                   \
2340     cmp_status->method = COMPOSITION_WITH_RULE;                 \
2341     cmp_status->state = COMPOSING_CHAR;                         \
2342     cmp_status->length = MAX_ANNOTATION_LENGTH;                 \
2343     cmp_status->nchars = cmp_status->ncomps = 0;                \
2344     ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);   \
2345   } while (0)
2346
2347
2348 #define DECODE_EMACS_MULE_COMPOSITION_START()           \
2349   do {                                                  \
2350     const unsigned char *current_src = src;             \
2351                                                         \
2352     ONE_MORE_BYTE (c);                                  \
2353     if (c < 0)                                          \
2354       goto invalid_code;                                \
2355     if (c - 0xF2 >= COMPOSITION_RELATIVE                \
2356         && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)  \
2357       DECODE_EMACS_MULE_21_COMPOSITION ();              \
2358     else if (c < 0xA0)                                  \
2359       goto invalid_code;                                \
2360     else if (c < 0xC0)                                  \
2361       {                                                 \
2362         DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION ();   \
2363         /* Re-read C as a composition component.  */    \
2364         src = current_src;                              \
2365       }                                                 \
2366     else if (c == 0xFF)                                 \
2367       DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION ();     \
2368     else                                                \
2369       goto invalid_code;                                \
2370   } while (0)
2371
2372 #define EMACS_MULE_COMPOSITION_END()                            \
2373   do {                                                          \
2374     int idx = - cmp_status->length;                             \
2375                                                                 \
2376     if (cmp_status->old_form)                                   \
2377       charbuf[idx + 2] = cmp_status->nchars;                    \
2378     else if (cmp_status->method > COMPOSITION_RELATIVE)         \
2379       charbuf[idx] = charbuf[idx + 2] - cmp_status->length;     \
2380     cmp_status->state = COMPOSING_NO;                           \
2381   } while (0)
2382
2383
2384 static int
2385 emacs_mule_finish_composition (charbuf, cmp_status)
2386      int *charbuf;
2387      struct composition_status *cmp_status;
2388 {
2389   int idx = - cmp_status->length;
2390   int new_chars;
2391
2392   if (cmp_status->old_form && cmp_status->nchars > 0)
2393     {
2394       charbuf[idx + 2] = cmp_status->nchars;
2395       new_chars = 0;
2396       if (cmp_status->method == COMPOSITION_WITH_RULE
2397           && cmp_status->state == COMPOSING_CHAR)
2398         {
2399           /* The last rule was invalid.  */
2400           int rule = charbuf[-1] + 0xA0;
2401
2402           charbuf[-2] = BYTE8_TO_CHAR (rule);
2403           charbuf[-1] = -1;
2404           new_chars = 1;
2405         }
2406     }
2407   else
2408     {
2409       charbuf[idx++] = BYTE8_TO_CHAR (0x80);
2410
2411       if (cmp_status->method == COMPOSITION_WITH_RULE)
2412         {
2413           charbuf[idx++] = BYTE8_TO_CHAR (0xFF);
2414           charbuf[idx++] = -3;
2415           charbuf[idx++] = 0;
2416           new_chars = 1;
2417         }
2418       else
2419         {
2420           int nchars = charbuf[idx + 1] + 0xA0;
2421           int nbytes = charbuf[idx + 2] + 0xA0;
2422
2423           charbuf[idx++] = BYTE8_TO_CHAR (0xF2 + cmp_status->method);
2424           charbuf[idx++] = BYTE8_TO_CHAR (nbytes);
2425           charbuf[idx++] = BYTE8_TO_CHAR (nchars);
2426           charbuf[idx++] = -1;
2427           new_chars = 4;
2428         }
2429     }
2430   cmp_status->state = COMPOSING_NO;
2431   return new_chars;
2432 }
2433
2434 #define EMACS_MULE_MAYBE_FINISH_COMPOSITION()                             \
2435   do {                                                                    \
2436     if (cmp_status->state != COMPOSING_NO)                                \
2437       char_offset += emacs_mule_finish_composition (charbuf, cmp_status); \
2438   } while (0)
2439
2440
2441 static void
2442 decode_coding_emacs_mule (coding)
2443      struct coding_system *coding;
2444 {
2445   const unsigned char *src = coding->source + coding->consumed;
2446   const unsigned char *src_end = coding->source + coding->src_bytes;
2447   const unsigned char *src_base;
2448   int *charbuf = coding->charbuf + coding->charbuf_used;
2449   /* We may produce two annocations (charset and composition) in one
2450      loop and one more charset annocation at the end.  */
2451   int *charbuf_end
2452     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
2453   int consumed_chars = 0, consumed_chars_base;
2454   int multibytep = coding->src_multibyte;
2455   Lisp_Object attrs, charset_list;
2456   int char_offset = coding->produced_char;
2457   int last_offset = char_offset;
2458   int last_id = charset_ascii;
2459   int eol_crlf =
2460     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
2461   int byte_after_cr = -1;
2462   struct composition_status *cmp_status = &coding->spec.emacs_mule.cmp_status;
2463
2464   CODING_GET_INFO (coding, attrs, charset_list);
2465
2466   if (cmp_status->state != COMPOSING_NO)
2467     {
2468       int i;
2469
2470       for (i = 0; i < cmp_status->length; i++)
2471         *charbuf++ = cmp_status->carryover[i];
2472       coding->annotated = 1;
2473     }
2474
2475   while (1)
2476     {
2477       int c, id;
2478
2479       src_base = src;
2480       consumed_chars_base = consumed_chars;
2481
2482       if (charbuf >= charbuf_end)
2483         {
2484           if (byte_after_cr >= 0)
2485             src_base--;
2486           break;
2487         }
2488
2489       if (byte_after_cr >= 0)
2490         c = byte_after_cr, byte_after_cr = -1;
2491       else
2492         ONE_MORE_BYTE (c);
2493
2494       if (c < 0 || c == 0x80)
2495         {
2496           EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2497           if (c < 0)
2498             {
2499               *charbuf++ = -c;
2500               char_offset++;
2501             }
2502           else
2503             DECODE_EMACS_MULE_COMPOSITION_START ();
2504           continue;
2505         }
2506
2507       if (c < 0x80)
2508         {
2509           if (eol_crlf && c == '\r')
2510             ONE_MORE_BYTE (byte_after_cr);
2511           id = charset_ascii;
2512           if (cmp_status->state != COMPOSING_NO)
2513             {
2514               if (cmp_status->old_form)
2515                 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2516               else if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2517                 cmp_status->ncomps--;
2518             }
2519         }
2520       else
2521         {
2522           int nchars, nbytes;
2523
2524           c = emacs_mule_char (coding, src_base, &nbytes, &nchars, &id,
2525                                cmp_status);
2526           if (c < 0)
2527             {
2528               if (c == -1)
2529                 goto invalid_code;
2530               if (c == -2)
2531                 break;
2532             }
2533           src = src_base + nbytes;
2534           consumed_chars = consumed_chars_base + nchars;
2535           if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2536             cmp_status->ncomps -= nchars;
2537         }
2538
2539       /* Now if C >= 0, we found a normally encoded characer, if C <
2540          0, we found an old-style composition component character or
2541          rule.  */
2542
2543       if (cmp_status->state == COMPOSING_NO)
2544         {
2545           if (last_id != id)
2546             {
2547               if (last_id != charset_ascii)
2548                 ADD_CHARSET_DATA (charbuf, char_offset - last_offset,
2549                                   last_id);
2550               last_id = id;
2551               last_offset = char_offset;
2552             }
2553           *charbuf++ = c;
2554           char_offset++;
2555         }
2556       else if (cmp_status->state == COMPOSING_CHAR)
2557         {
2558           if (cmp_status->old_form)
2559             {
2560               if (c >= 0)
2561                 {
2562                   EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2563                   *charbuf++ = c;
2564                   char_offset++;
2565                 }
2566               else
2567                 {
2568                   *charbuf++ = -c;
2569                   cmp_status->nchars++;
2570                   cmp_status->length++;
2571                   if (cmp_status->nchars == MAX_COMPOSITION_COMPONENTS)
2572                     EMACS_MULE_COMPOSITION_END ();
2573                   else if (cmp_status->method == COMPOSITION_WITH_RULE)
2574                     cmp_status->state = COMPOSING_RULE;
2575                 }
2576             }
2577           else
2578             {
2579               *charbuf++ = c;
2580               cmp_status->length++;
2581               cmp_status->nchars--;
2582               if (cmp_status->nchars == 0)
2583                 EMACS_MULE_COMPOSITION_END ();
2584             }
2585         }
2586       else if (cmp_status->state == COMPOSING_RULE)
2587         {
2588           int rule;
2589
2590           if (c >= 0)
2591             {
2592               EMACS_MULE_COMPOSITION_END ();
2593               *charbuf++ = c;
2594               char_offset++;
2595             }
2596           else
2597             {
2598               c = -c;
2599               DECODE_EMACS_MULE_COMPOSITION_RULE_20 (c, rule);
2600               if (rule < 0)
2601                 goto invalid_code;
2602               *charbuf++ = -2;
2603               *charbuf++ = rule;
2604               cmp_status->length += 2;
2605               cmp_status->state = COMPOSING_CHAR;
2606             }
2607         }
2608       else if (cmp_status->state == COMPOSING_COMPONENT_CHAR)
2609         {
2610           *charbuf++ = c;
2611           cmp_status->length++;
2612           if (cmp_status->ncomps == 0)
2613             cmp_status->state = COMPOSING_CHAR;
2614           else if (cmp_status->ncomps > 0)
2615             {
2616               if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)
2617                 cmp_status->state = COMPOSING_COMPONENT_RULE;
2618             }
2619           else
2620             EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2621         }
2622       else                      /* COMPOSING_COMPONENT_RULE */
2623         {
2624           int rule;
2625
2626           DECODE_EMACS_MULE_COMPOSITION_RULE_21 (c, rule);
2627           if (rule < 0)
2628             goto invalid_code;
2629           *charbuf++ = -2;
2630           *charbuf++ = rule;
2631           cmp_status->length += 2;
2632           cmp_status->ncomps--;
2633           if (cmp_status->ncomps > 0)
2634             cmp_status->state = COMPOSING_COMPONENT_CHAR;
2635           else
2636             EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2637         }
2638       continue;
2639
2640     retry:
2641       src = src_base;
2642       consumed_chars = consumed_chars_base;
2643       continue;
2644
2645     invalid_code:
2646       EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2647       src = src_base;
2648       consumed_chars = consumed_chars_base;
2649       ONE_MORE_BYTE (c);
2650       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2651       char_offset++;
2652       coding->errors++;
2653     }
2654
2655  no_more_source:
2656   if (cmp_status->state != COMPOSING_NO)
2657     {
2658       if (coding->mode & CODING_MODE_LAST_BLOCK)
2659         EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2660       else
2661         {
2662           int i;
2663
2664           charbuf -= cmp_status->length;
2665           for (i = 0; i < cmp_status->length; i++)
2666             cmp_status->carryover[i] = charbuf[i];
2667         }
2668     }
2669   if (last_id != charset_ascii)
2670     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2671   coding->consumed_char += consumed_chars_base;
2672   coding->consumed = src_base - coding->source;
2673   coding->charbuf_used = charbuf - coding->charbuf;
2674 }
2675
2676
2677 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2678   do {                                          \
2679     if (id < 0xA0)                              \
2680       codes[0] = id, codes[1] = 0;              \
2681     else if (id < 0xE0)                         \
2682       codes[0] = 0x9A, codes[1] = id;           \
2683     else if (id < 0xF0)                         \
2684       codes[0] = 0x9B, codes[1] = id;           \
2685     else if (id < 0xF5)                         \
2686       codes[0] = 0x9C, codes[1] = id;           \
2687     else                                        \
2688       codes[0] = 0x9D, codes[1] = id;           \
2689   } while (0);
2690
2691
2692 static int
2693 encode_coding_emacs_mule (coding)
2694      struct coding_system *coding;
2695 {
2696   int multibytep = coding->dst_multibyte;
2697   int *charbuf = coding->charbuf;
2698   int *charbuf_end = charbuf + coding->charbuf_used;
2699   unsigned char *dst = coding->destination + coding->produced;
2700   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2701   int safe_room = 8;
2702   int produced_chars = 0;
2703   Lisp_Object attrs, charset_list;
2704   int c;
2705   int preferred_charset_id = -1;
2706
2707   CODING_GET_INFO (coding, attrs, charset_list);
2708   if (! EQ (charset_list, Vemacs_mule_charset_list))
2709     {
2710       CODING_ATTR_CHARSET_LIST (attrs)
2711         = charset_list = Vemacs_mule_charset_list;
2712     }
2713
2714   while (charbuf < charbuf_end)
2715     {
2716       ASSURE_DESTINATION (safe_room);
2717       c = *charbuf++;
2718
2719       if (c < 0)
2720         {
2721           /* Handle an annotation.  */
2722           switch (*charbuf)
2723             {
2724             case CODING_ANNOTATE_COMPOSITION_MASK:
2725               /* Not yet implemented.  */
2726               break;
2727             case CODING_ANNOTATE_CHARSET_MASK:
2728               preferred_charset_id = charbuf[3];
2729               if (preferred_charset_id >= 0
2730                   && NILP (Fmemq (make_number (preferred_charset_id),
2731                                   charset_list)))
2732                 preferred_charset_id = -1;
2733               break;
2734             default:
2735               abort ();
2736             }
2737           charbuf += -c - 1;
2738           continue;
2739         }
2740
2741       if (ASCII_CHAR_P (c))
2742         EMIT_ONE_ASCII_BYTE (c);
2743       else if (CHAR_BYTE8_P (c))
2744         {
2745           c = CHAR_TO_BYTE8 (c);
2746           EMIT_ONE_BYTE (c);
2747         }
2748       else
2749         {
2750           struct charset *charset;
2751           unsigned code;
2752           int dimension;
2753           int emacs_mule_id;
2754           unsigned char leading_codes[2];
2755
2756           if (preferred_charset_id >= 0)
2757             {
2758               charset = CHARSET_FROM_ID (preferred_charset_id);
2759               if (CHAR_CHARSET_P (c, charset))
2760                 code = ENCODE_CHAR (charset, c);
2761               else
2762                 charset = char_charset (c, charset_list, &code);
2763             }
2764           else
2765             charset = char_charset (c, charset_list, &code);
2766           if (! charset)
2767             {
2768               c = coding->default_char;
2769               if (ASCII_CHAR_P (c))
2770                 {
2771                   EMIT_ONE_ASCII_BYTE (c);
2772                   continue;
2773                 }
2774               charset = char_charset (c, charset_list, &code);
2775             }
2776           dimension = CHARSET_DIMENSION (charset);
2777           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2778           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2779           EMIT_ONE_BYTE (leading_codes[0]);
2780           if (leading_codes[1])
2781             EMIT_ONE_BYTE (leading_codes[1]);
2782           if (dimension == 1)
2783             EMIT_ONE_BYTE (code | 0x80);
2784           else
2785             {
2786               code |= 0x8080;
2787               EMIT_ONE_BYTE (code >> 8);
2788               EMIT_ONE_BYTE (code & 0xFF);
2789             }
2790         }
2791     }
2792   record_conversion_result (coding, CODING_RESULT_SUCCESS);
2793   coding->produced_char += produced_chars;
2794   coding->produced = dst - coding->destination;
2795   return 0;
2796 }
2797
2798 \f
2799 /*** 7. ISO2022 handlers ***/
2800
2801 /* The following note describes the coding system ISO2022 briefly.
2802    Since the intention of this note is to help understand the
2803    functions in this file, some parts are NOT ACCURATE or are OVERLY
2804    SIMPLIFIED.  For thorough understanding, please refer to the
2805    original document of ISO2022.  This is equivalent to the standard
2806    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2807
2808    ISO2022 provides many mechanisms to encode several character sets
2809    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2810    is encoded using bytes less than 128.  This may make the encoded
2811    text a little bit longer, but the text passes more easily through
2812    several types of gateway, some of which strip off the MSB (Most
2813    Significant Bit).
2814
2815    There are two kinds of character sets: control character sets and
2816    graphic character sets.  The former contain control characters such
2817    as `newline' and `escape' to provide control functions (control
2818    functions are also provided by escape sequences).  The latter
2819    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2820    two control character sets and many graphic character sets.
2821
2822    Graphic character sets are classified into one of the following
2823    four classes, according to the number of bytes (DIMENSION) and
2824    number of characters in one dimension (CHARS) of the set:
2825    - DIMENSION1_CHARS94
2826    - DIMENSION1_CHARS96
2827    - DIMENSION2_CHARS94
2828    - DIMENSION2_CHARS96
2829
2830    In addition, each character set is assigned an identification tag,
2831    unique for each set, called the "final character" (denoted as <F>
2832    hereafter).  The <F> of each character set is decided by ECMA(*)
2833    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2834    (0x30..0x3F are for private use only).
2835
2836    Note (*): ECMA = European Computer Manufacturers Association
2837
2838    Here are examples of graphic character sets [NAME(<F>)]:
2839         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2840         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2841         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2842         o DIMENSION2_CHARS96 -- none for the moment
2843
2844    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2845         C0 [0x00..0x1F] -- control character plane 0
2846         GL [0x20..0x7F] -- graphic character plane 0
2847         C1 [0x80..0x9F] -- control character plane 1
2848         GR [0xA0..0xFF] -- graphic character plane 1
2849
2850    A control character set is directly designated and invoked to C0 or
2851    C1 by an escape sequence.  The most common case is that:
2852    - ISO646's  control character set is designated/invoked to C0, and
2853    - ISO6429's control character set is designated/invoked to C1,
2854    and usually these designations/invocations are omitted in encoded
2855    text.  In a 7-bit environment, only C0 can be used, and a control
2856    character for C1 is encoded by an appropriate escape sequence to
2857    fit into the environment.  All control characters for C1 are
2858    defined to have corresponding escape sequences.
2859
2860    A graphic character set is at first designated to one of four
2861    graphic registers (G0 through G3), then these graphic registers are
2862    invoked to GL or GR.  These designations and invocations can be
2863    done independently.  The most common case is that G0 is invoked to
2864    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2865    these invocations and designations are omitted in encoded text.
2866    In a 7-bit environment, only GL can be used.
2867
2868    When a graphic character set of CHARS94 is invoked to GL, codes
2869    0x20 and 0x7F of the GL area work as control characters SPACE and
2870    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2871    be used.
2872
2873    There are two ways of invocation: locking-shift and single-shift.
2874    With locking-shift, the invocation lasts until the next different
2875    invocation, whereas with single-shift, the invocation affects the
2876    following character only and doesn't affect the locking-shift
2877    state.  Invocations are done by the following control characters or
2878    escape sequences:
2879
2880    ----------------------------------------------------------------------
2881    abbrev  function                  cntrl escape seq   description
2882    ----------------------------------------------------------------------
2883    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2884    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2885    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2886    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2887    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2888    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2889    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2890    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2891    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2892    ----------------------------------------------------------------------
2893    (*) These are not used by any known coding system.
2894
2895    Control characters for these functions are defined by macros
2896    ISO_CODE_XXX in `coding.h'.
2897
2898    Designations are done by the following escape sequences:
2899    ----------------------------------------------------------------------
2900    escape sequence      description
2901    ----------------------------------------------------------------------
2902    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2903    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2904    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2905    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2906    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2907    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2908    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2909    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2910    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2911    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2912    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2913    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2914    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2915    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2916    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2917    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2918    ----------------------------------------------------------------------
2919
2920    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2921    of dimension 1, chars 94, and final character <F>, etc...
2922
2923    Note (*): Although these designations are not allowed in ISO2022,
2924    Emacs accepts them on decoding, and produces them on encoding
2925    CHARS96 character sets in a coding system which is characterized as
2926    7-bit environment, non-locking-shift, and non-single-shift.
2927
2928    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2929    '(' must be omitted.  We refer to this as "short-form" hereafter.
2930
2931    Now you may notice that there are a lot of ways of encoding the
2932    same multilingual text in ISO2022.  Actually, there exist many
2933    coding systems such as Compound Text (used in X11's inter client
2934    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2935    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2936    localized platforms), and all of these are variants of ISO2022.
2937
2938    In addition to the above, Emacs handles two more kinds of escape
2939    sequences: ISO6429's direction specification and Emacs' private
2940    sequence for specifying character composition.
2941
2942    ISO6429's direction specification takes the following form:
2943         o CSI ']'      -- end of the current direction
2944         o CSI '0' ']'  -- end of the current direction
2945         o CSI '1' ']'  -- start of left-to-right text
2946         o CSI '2' ']'  -- start of right-to-left text
2947    The control character CSI (0x9B: control sequence introducer) is
2948    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2949
2950    Character composition specification takes the following form:
2951         o ESC '0' -- start relative composition
2952         o ESC '1' -- end composition
2953         o ESC '2' -- start rule-base composition (*)
2954         o ESC '3' -- start relative composition with alternate chars  (**)
2955         o ESC '4' -- start rule-base composition with alternate chars  (**)
2956   Since these are not standard escape sequences of any ISO standard,
2957   the use of them with these meanings is restricted to Emacs only.
2958
2959   (*) This form is used only in Emacs 20.7 and older versions,
2960   but newer versions can safely decode it.
2961   (**) This form is used only in Emacs 21.1 and newer versions,
2962   and older versions can't decode it.
2963
2964   Here's a list of example usages of these composition escape
2965   sequences (categorized by `enum composition_method').
2966
2967   COMPOSITION_RELATIVE:
2968         ESC 0 CHAR [ CHAR ] ESC 1
2969   COMPOSITION_WITH_RULE:
2970         ESC 2 CHAR [ RULE CHAR ] ESC 1
2971   COMPOSITION_WITH_ALTCHARS:
2972         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2973   COMPOSITION_WITH_RULE_ALTCHARS:
2974         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2975
2976 enum iso_code_class_type iso_code_class[256];
2977
2978 #define SAFE_CHARSET_P(coding, id)      \
2979   ((id) <= (coding)->max_charset_id     \
2980    && (coding)->safe_charsets[id] != 255)
2981
2982
2983 #define SHIFT_OUT_OK(category)  \
2984   (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2985
2986 static void
2987 setup_iso_safe_charsets (attrs)
2988      Lisp_Object attrs;
2989 {
2990   Lisp_Object charset_list, safe_charsets;
2991   Lisp_Object request;
2992   Lisp_Object reg_usage;
2993   Lisp_Object tail;
2994   int reg94, reg96;
2995   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2996   int max_charset_id;
2997
2998   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2999   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
3000       && ! EQ (charset_list, Viso_2022_charset_list))
3001     {
3002       CODING_ATTR_CHARSET_LIST (attrs)
3003         = charset_list = Viso_2022_charset_list;
3004       ASET (attrs, coding_attr_safe_charsets, Qnil);
3005     }
3006
3007   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
3008     return;
3009
3010   max_charset_id = 0;
3011   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
3012     {
3013       int id = XINT (XCAR (tail));
3014       if (max_charset_id < id)
3015         max_charset_id = id;
3016     }
3017
3018   safe_charsets = make_uninit_string (max_charset_id + 1);
3019   memset (SDATA (safe_charsets), 255, max_charset_id + 1);
3020   request = AREF (attrs, coding_attr_iso_request);
3021   reg_usage = AREF (attrs, coding_attr_iso_usage);
3022   reg94 = XINT (XCAR (reg_usage));
3023   reg96 = XINT (XCDR (reg_usage));
3024
3025   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
3026     {
3027       Lisp_Object id;
3028       Lisp_Object reg;
3029       struct charset *charset;
3030
3031       id = XCAR (tail);
3032       charset = CHARSET_FROM_ID (XINT (id));
3033       reg = Fcdr (Fassq (id, request));
3034       if (! NILP (reg))
3035         SSET (safe_charsets, XINT (id), XINT (reg));
3036       else if (charset->iso_chars_96)
3037         {
3038           if (reg96 < 4)
3039             SSET (safe_charsets, XINT (id), reg96);
3040         }
3041       else
3042         {
3043           if (reg94 < 4)
3044             SSET (safe_charsets, XINT (id), reg94);
3045         }
3046     }
3047   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
3048 }
3049
3050
3051 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3052    Check if a text is encoded in one of ISO-2022 based codig systems.
3053    If it is, return 1, else return 0.  */
3054
3055 static int
3056 detect_coding_iso_2022 (coding, detect_info)
3057      struct coding_system *coding;
3058      struct coding_detection_info *detect_info;
3059 {
3060   const unsigned char *src = coding->source, *src_base = src;
3061   const unsigned char *src_end = coding->source + coding->src_bytes;
3062   int multibytep = coding->src_multibyte;
3063   int single_shifting = 0;
3064   int id;
3065   int c, c1;
3066   int consumed_chars = 0;
3067   int i;
3068   int rejected = 0;
3069   int found = 0;
3070   int composition_count = -1;
3071
3072   detect_info->checked |= CATEGORY_MASK_ISO;
3073
3074   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
3075     {
3076       struct coding_system *this = &(coding_categories[i]);
3077       Lisp_Object attrs, val;
3078
3079       if (this->id < 0)
3080         continue;
3081       attrs = CODING_ID_ATTRS (this->id);
3082       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
3083           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Viso_2022_charset_list))
3084         setup_iso_safe_charsets (attrs);
3085       val = CODING_ATTR_SAFE_CHARSETS (attrs);
3086       this->max_charset_id = SCHARS (val) - 1;
3087       this->safe_charsets = SDATA (val);
3088     }
3089
3090   /* A coding system of this category is always ASCII compatible.  */
3091   src += coding->head_ascii;
3092
3093   while (rejected != CATEGORY_MASK_ISO)
3094     {
3095       src_base = src;
3096       ONE_MORE_BYTE (c);
3097       switch (c)
3098         {
3099         case ISO_CODE_ESC:
3100           if (inhibit_iso_escape_detection)
3101             break;
3102           single_shifting = 0;
3103           ONE_MORE_BYTE (c);
3104           if (c >= '(' && c <= '/')
3105             {
3106               /* Designation sequence for a charset of dimension 1.  */
3107               ONE_MORE_BYTE (c1);
3108               if (c1 < ' ' || c1 >= 0x80
3109                   || (id = iso_charset_table[0][c >= ','][c1]) < 0)
3110                 /* Invalid designation sequence.  Just ignore.  */
3111                 break;
3112             }
3113           else if (c == '$')
3114             {
3115               /* Designation sequence for a charset of dimension 2.  */
3116               ONE_MORE_BYTE (c);
3117               if (c >= '@' && c <= 'B')
3118                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
3119                 id = iso_charset_table[1][0][c];
3120               else if (c >= '(' && c <= '/')
3121                 {
3122                   ONE_MORE_BYTE (c1);
3123                   if (c1 < ' ' || c1 >= 0x80
3124                       || (id = iso_charset_table[1][c >= ','][c1]) < 0)
3125                     /* Invalid designation sequence.  Just ignore.  */
3126                     break;
3127                 }
3128               else
3129                 /* Invalid designation sequence.  Just ignore it.  */
3130                 break;
3131             }
3132           else if (c == 'N' || c == 'O')
3133             {
3134               /* ESC <Fe> for SS2 or SS3.  */
3135               single_shifting = 1;
3136               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
3137               break;
3138             }
3139           else if (c == '1')
3140             {
3141               /* End of composition.  */
3142               if (composition_count < 0
3143                   || composition_count > MAX_COMPOSITION_COMPONENTS)
3144                 /* Invalid */
3145                 break;
3146               composition_count = -1;
3147               found |= CATEGORY_MASK_ISO;
3148             }
3149           else if (c >= '0' && c <= '4')
3150             {
3151               /* ESC <Fp> for start/end composition.  */
3152               composition_count = 0;
3153               break;
3154             }
3155           else
3156             {
3157               /* Invalid escape sequence.  Just ignore it.  */
3158               break;
3159             }
3160
3161           /* We found a valid designation sequence for CHARSET.  */
3162           rejected |= CATEGORY_MASK_ISO_8BIT;
3163           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
3164                               id))
3165             found |= CATEGORY_MASK_ISO_7;
3166           else
3167             rejected |= CATEGORY_MASK_ISO_7;
3168           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
3169                               id))
3170             found |= CATEGORY_MASK_ISO_7_TIGHT;
3171           else
3172             rejected |= CATEGORY_MASK_ISO_7_TIGHT;
3173           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
3174                               id))
3175             found |= CATEGORY_MASK_ISO_7_ELSE;
3176           else
3177             rejected |= CATEGORY_MASK_ISO_7_ELSE;
3178           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
3179                               id))
3180             found |= CATEGORY_MASK_ISO_8_ELSE;
3181           else
3182             rejected |= CATEGORY_MASK_ISO_8_ELSE;
3183           break;
3184
3185         case ISO_CODE_SO:
3186         case ISO_CODE_SI:
3187           /* Locking shift out/in.  */
3188           if (inhibit_iso_escape_detection)
3189             break;
3190           single_shifting = 0;
3191           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
3192           break;
3193
3194         case ISO_CODE_CSI:
3195           /* Control sequence introducer.  */
3196           single_shifting = 0;
3197           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3198           found |= CATEGORY_MASK_ISO_8_ELSE;
3199           goto check_extra_latin;
3200
3201         case ISO_CODE_SS2:
3202         case ISO_CODE_SS3:
3203           /* Single shift.   */
3204           if (inhibit_iso_escape_detection)
3205             break;
3206           single_shifting = 0;
3207           rejected |= CATEGORY_MASK_ISO_7BIT;
3208           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3209               & CODING_ISO_FLAG_SINGLE_SHIFT)
3210             found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
3211           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
3212               & CODING_ISO_FLAG_SINGLE_SHIFT)
3213             found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
3214           if (single_shifting)
3215             break;
3216           goto check_extra_latin;
3217
3218         default:
3219           if (c < 0)
3220             continue;
3221           if (c < 0x80)
3222             {
3223               if (composition_count >= 0)
3224                 composition_count++;
3225               single_shifting = 0;
3226               break;
3227             }
3228           if (c >= 0xA0)
3229             {
3230               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3231               found |= CATEGORY_MASK_ISO_8_1;
3232               /* Check the length of succeeding codes of the range
3233                  0xA0..0FF.  If the byte length is even, we include
3234                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
3235                  only when we are not single shifting.  */
3236               if (! single_shifting
3237                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
3238                 {
3239                   int i = 1;
3240                   while (src < src_end)
3241                     {
3242                       src_base = src;
3243                       ONE_MORE_BYTE (c);
3244                       if (c < 0xA0)
3245                         {
3246                           src = src_base;
3247                           break;
3248                         }
3249                       i++;
3250                     }
3251
3252                   if (i & 1 && src < src_end)
3253                     {
3254                       rejected |= CATEGORY_MASK_ISO_8_2;
3255                       if (composition_count >= 0)
3256                         composition_count += i;
3257                     }
3258                   else
3259                     {
3260                       found |= CATEGORY_MASK_ISO_8_2;
3261                       if (composition_count >= 0)
3262                         composition_count += i / 2;
3263                     }
3264                 }
3265               break;
3266             }
3267         check_extra_latin:
3268           single_shifting = 0;
3269           if (! VECTORP (Vlatin_extra_code_table)
3270               || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3271             {
3272               rejected = CATEGORY_MASK_ISO;
3273               break;
3274             }
3275           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3276               & CODING_ISO_FLAG_LATIN_EXTRA)
3277             found |= CATEGORY_MASK_ISO_8_1;
3278           else
3279             rejected |= CATEGORY_MASK_ISO_8_1;
3280           rejected |= CATEGORY_MASK_ISO_8_2;
3281         }
3282     }
3283   detect_info->rejected |= CATEGORY_MASK_ISO;
3284   return 0;
3285
3286  no_more_source:
3287   detect_info->rejected |= rejected;
3288   detect_info->found |= (found & ~rejected);
3289   return 1;
3290 }
3291
3292
3293 /* Set designation state into CODING.  Set CHARS_96 to -1 if the
3294    escape sequence should be kept.  */
3295 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
3296   do {                                                                  \
3297     int id, prev;                                                       \
3298                                                                         \
3299     if (final < '0' || final >= 128                                     \
3300         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
3301         || !SAFE_CHARSET_P (coding, id))                                \
3302       {                                                                 \
3303         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
3304         chars_96 = -1;                                                  \
3305         break;                                                          \
3306       }                                                                 \
3307     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
3308     if (id == charset_jisx0201_roman)                                   \
3309       {                                                                 \
3310         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
3311           id = charset_ascii;                                           \
3312       }                                                                 \
3313     else if (id == charset_jisx0208_1978)                               \
3314       {                                                                 \
3315         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
3316           id = charset_jisx0208;                                        \
3317       }                                                                 \
3318     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
3319     /* If there was an invalid designation to REG previously, and this  \
3320        designation is ASCII to REG, we should keep this designation     \
3321        sequence.  */                                                    \
3322     if (prev == -2 && id == charset_ascii)                              \
3323       chars_96 = -1;                                                    \
3324   } while (0)
3325
3326
3327 /* Handle these composition sequence (ALT: alternate char):
3328
3329    (1) relative composition: ESC 0 CHAR ... ESC 1
3330    (2) rulebase composition: ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3331    (3) altchar composition:  ESC 3 ALT ... ALT ESC 0 CHAR ... ESC 1
3332    (4) alt&rule composition: ESC 4 ALT RULE ... ALT ESC 0 CHAR ... ESC 1
3333
3334    When the start sequence (ESC 0/2/3/4) is found, this annotation
3335    header is produced.
3336
3337         [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) 0 METHOD ]
3338
3339    Then, upon reading CHAR or RULE (one or two bytes), these codes are
3340    produced until the end sequence (ESC 1) is found:
3341
3342    (1) CHAR ... CHAR
3343    (2) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
3344    (3) ALT ... ALT -1 -1 CHAR ... CHAR
3345    (4) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT -1 -1 CHAR ... CHAR
3346
3347    When the end sequence (ESC 1) is found, LENGTH and NCHARS in the
3348    annotation header is updated as below:
3349
3350    (1) LENGTH: unchanged,  NCHARS: number of CHARs
3351    (2) LENGTH: unchanged,  NCHARS: number of CHARs
3352    (3) LENGTH: += number of ALTs + 2,  NCHARS: number of CHARs
3353    (4) LENGTH: += number of ALTs * 3,  NCHARS: number of CHARs
3354
3355    If an error is found while composing, the annotation header is
3356    changed to:
3357
3358         [ ESC '0'/'2'/'3'/'4' -2 0 ]
3359
3360    and the sequence [ -2 DECODED-RULE ] is changed to the original
3361    byte sequence as below:
3362         o the original byte sequence is B: [ B -1 ]
3363         o the original byte sequence is B1 B2: [ B1 B2 ]
3364    and the sequence [ -1 -1 ] is changed to the original byte
3365    sequence:
3366         [ ESC '0' ]
3367 */
3368
3369 /* Decode a composition rule C1 and maybe one more byte from the
3370    source, and set RULE to the encoded composition rule, NBYTES to the
3371    length of the composition rule.  If the rule is invalid, set RULE
3372    to some negative value.  */
3373
3374 #define DECODE_COMPOSITION_RULE(rule, nbytes)                           \
3375   do {                                                                  \
3376     rule = c1 - 32;                                                     \
3377     if (rule < 0)                                                       \
3378       break;                                                            \
3379     if (rule < 81)              /* old format (before ver.21) */        \
3380       {                                                                 \
3381         int gref = (rule) / 9;                                          \
3382         int nref = (rule) % 9;                                          \
3383         if (gref == 4) gref = 10;                                       \
3384         if (nref == 4) nref = 10;                                       \
3385         rule = COMPOSITION_ENCODE_RULE (gref, nref);                    \
3386         nbytes = 1;                                                     \
3387       }                                                                 \
3388     else                        /* new format (after ver.21) */         \
3389       {                                                                 \
3390         int c;                                                          \
3391                                                                         \
3392         ONE_MORE_BYTE (c);                                              \
3393         rule = COMPOSITION_ENCODE_RULE (rule - 81, c - 32);             \
3394         if (rule >= 0)                                                  \
3395           rule += 0x100;   /* to destinguish it from the old format */  \
3396         nbytes = 2;                                                     \
3397       }                                                                 \
3398   } while (0)
3399
3400 #define ENCODE_COMPOSITION_RULE(rule)                           \
3401   do {                                                          \
3402     int gref = (rule % 0x100) / 12, nref = (rule % 0x100) % 12; \
3403                                                                 \
3404     if (rule < 0x100)           /* old format */                \
3405       {                                                         \
3406         if (gref == 10) gref = 4;                               \
3407         if (nref == 10) nref = 4;                               \
3408         charbuf[idx] = 32 + gref * 9 + nref;                    \
3409         charbuf[idx + 1] = -1;                                  \
3410         new_chars++;                                            \
3411       }                                                         \
3412     else                                /* new format */        \
3413       {                                                         \
3414         charbuf[idx] = 32 + 81 + gref;                          \
3415         charbuf[idx + 1] = 32 + nref;                           \
3416         new_chars += 2;                                         \
3417       }                                                         \
3418   } while (0)
3419
3420 /* Finish the current composition as invalid.  */
3421
3422 static int finish_composition P_ ((int *, struct composition_status *));
3423
3424 static int
3425 finish_composition (charbuf, cmp_status)
3426      int *charbuf;
3427      struct composition_status *cmp_status;
3428 {
3429   int idx = - cmp_status->length;
3430   int new_chars;
3431
3432   /* Recover the original ESC sequence */
3433   charbuf[idx++] = ISO_CODE_ESC;
3434   charbuf[idx++] = (cmp_status->method == COMPOSITION_RELATIVE ? '0'
3435                     : cmp_status->method == COMPOSITION_WITH_RULE ? '2'
3436                     : cmp_status->method == COMPOSITION_WITH_ALTCHARS ? '3'
3437                     /* cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS */
3438                     : '4');
3439   charbuf[idx++] = -2;
3440   charbuf[idx++] = 0;
3441   charbuf[idx++] = -1;
3442   new_chars = cmp_status->nchars;
3443   if (cmp_status->method >= COMPOSITION_WITH_RULE)
3444     for (; idx < 0; idx++)
3445       {
3446         int elt = charbuf[idx];
3447
3448         if (elt == -2)
3449           {
3450             ENCODE_COMPOSITION_RULE (charbuf[idx + 1]);
3451             idx++;
3452           }
3453         else if (elt == -1)
3454           {
3455             charbuf[idx++] = ISO_CODE_ESC;
3456             charbuf[idx] = '0';
3457             new_chars += 2;
3458           }
3459       }
3460   cmp_status->state = COMPOSING_NO;
3461   return new_chars;
3462 }
3463
3464 /* If characers are under composition, finish the composition.  */
3465 #define MAYBE_FINISH_COMPOSITION()                              \
3466   do {                                                          \
3467     if (cmp_status->state != COMPOSING_NO)                      \
3468       char_offset += finish_composition (charbuf, cmp_status);  \
3469   } while (0)
3470
3471 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
3472
3473    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
3474    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3475    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
3476    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
3477
3478    Produce this annotation sequence now:
3479
3480    [ -LENGTH(==-4) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) METHOD ]
3481 */
3482
3483 #define DECODE_COMPOSITION_START(c1)                                       \
3484   do {                                                                     \
3485     if (c1 == '0'                                                          \
3486         && ((cmp_status->state == COMPOSING_COMPONENT_CHAR                 \
3487              && cmp_status->method == COMPOSITION_WITH_ALTCHARS)           \
3488             || (cmp_status->state == COMPOSING_COMPONENT_RULE              \
3489                 && cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS))) \
3490       {                                                                    \
3491         *charbuf++ = -1;                                                   \
3492         *charbuf++= -1;                                                    \
3493         cmp_status->state = COMPOSING_CHAR;                                \
3494         cmp_status->length += 2;                                           \
3495       }                                                                    \
3496     else                                                                   \
3497       {                                                                    \
3498         MAYBE_FINISH_COMPOSITION ();                                       \
3499         cmp_status->method = (c1 == '0' ? COMPOSITION_RELATIVE             \
3500                               : c1 == '2' ? COMPOSITION_WITH_RULE          \
3501                               : c1 == '3' ? COMPOSITION_WITH_ALTCHARS      \
3502                               : COMPOSITION_WITH_RULE_ALTCHARS);           \
3503         cmp_status->state                                                  \
3504           = (c1 <= '2' ? COMPOSING_CHAR : COMPOSING_COMPONENT_CHAR);       \
3505         ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);          \
3506         cmp_status->length = MAX_ANNOTATION_LENGTH;                        \
3507         cmp_status->nchars = cmp_status->ncomps = 0;                       \
3508         coding->annotated = 1;                                             \
3509       }                                                                    \
3510   } while (0)
3511
3512
3513 /* Handle composition end sequence ESC 1.  */
3514
3515 #define DECODE_COMPOSITION_END()                                        \
3516   do {                                                                  \
3517     if (cmp_status->nchars == 0                                         \
3518         || ((cmp_status->state == COMPOSING_CHAR)                       \
3519             == (cmp_status->method == COMPOSITION_WITH_RULE)))          \
3520       {                                                                 \
3521         MAYBE_FINISH_COMPOSITION ();                                    \
3522         goto invalid_code;                                              \
3523       }                                                                 \
3524     if (cmp_status->method == COMPOSITION_WITH_ALTCHARS)                \
3525       charbuf[- cmp_status->length] -= cmp_status->ncomps + 2;          \
3526     else if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)      \
3527       charbuf[- cmp_status->length] -= cmp_status->ncomps * 3;          \
3528     charbuf[- cmp_status->length + 2] = cmp_status->nchars;             \
3529     char_offset += cmp_status->nchars;                                  \
3530     cmp_status->state = COMPOSING_NO;                                   \
3531   } while (0)
3532
3533 /* Store a composition rule RULE in charbuf, and update cmp_status.  */
3534
3535 #define STORE_COMPOSITION_RULE(rule)    \
3536   do {                                  \
3537     *charbuf++ = -2;                    \
3538     *charbuf++ = rule;                  \
3539     cmp_status->length += 2;            \
3540     cmp_status->state--;                \
3541   } while (0)
3542
3543 /* Store a composed char or a component char C in charbuf, and update
3544    cmp_status.  */
3545
3546 #define STORE_COMPOSITION_CHAR(c)                                       \
3547   do {                                                                  \
3548     *charbuf++ = (c);                                                   \
3549     cmp_status->length++;                                               \
3550     if (cmp_status->state == COMPOSING_CHAR)                            \
3551       cmp_status->nchars++;                                             \
3552     else                                                                \
3553       cmp_status->ncomps++;                                             \
3554     if (cmp_status->method == COMPOSITION_WITH_RULE                     \
3555         || (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS        \
3556             && cmp_status->state == COMPOSING_COMPONENT_CHAR))          \
3557       cmp_status->state++;                                              \
3558   } while (0)
3559
3560
3561 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
3562
3563 static void
3564 decode_coding_iso_2022 (coding)
3565      struct coding_system *coding;
3566 {
3567   const unsigned char *src = coding->source + coding->consumed;
3568   const unsigned char *src_end = coding->source + coding->src_bytes;
3569   const unsigned char *src_base;
3570   int *charbuf = coding->charbuf + coding->charbuf_used;
3571   /* We may produce two annocations (charset and composition) in one
3572      loop and one more charset annocation at the end.  */
3573   int *charbuf_end
3574     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
3575   int consumed_chars = 0, consumed_chars_base;
3576   int multibytep = coding->src_multibyte;
3577   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
3578   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3579   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3580   int charset_id_2, charset_id_3;
3581   struct charset *charset;
3582   int c;
3583   struct composition_status *cmp_status = CODING_ISO_CMP_STATUS (coding);
3584   Lisp_Object attrs, charset_list;
3585   int char_offset = coding->produced_char;
3586   int last_offset = char_offset;
3587   int last_id = charset_ascii;
3588   int eol_crlf =
3589     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
3590   int byte_after_cr = -1;
3591   int i;
3592
3593   CODING_GET_INFO (coding, attrs, charset_list);
3594   setup_iso_safe_charsets (attrs);
3595   /* Charset list may have been changed.  */
3596   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3597   coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
3598
3599   if (cmp_status->state != COMPOSING_NO)
3600     {
3601       for (i = 0; i < cmp_status->length; i++)
3602         *charbuf++ = cmp_status->carryover[i];
3603       coding->annotated = 1;
3604     }
3605
3606   while (1)
3607     {
3608       int c1, c2, c3;
3609
3610       src_base = src;
3611       consumed_chars_base = consumed_chars;
3612
3613       if (charbuf >= charbuf_end)
3614         {
3615           if (byte_after_cr >= 0)
3616             src_base--;
3617           break;
3618         }
3619
3620       if (byte_after_cr >= 0)
3621         c1 = byte_after_cr, byte_after_cr = -1;
3622       else
3623         ONE_MORE_BYTE (c1);
3624       if (c1 < 0)
3625         goto invalid_code;
3626
3627       if (CODING_ISO_EXTSEGMENT_LEN (coding) > 0)
3628         {
3629           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3630           char_offset++;
3631           CODING_ISO_EXTSEGMENT_LEN (coding)--;
3632           continue;
3633         }
3634
3635       if (CODING_ISO_EMBEDDED_UTF_8 (coding))
3636         {
3637           if (c1 == ISO_CODE_ESC)
3638             {
3639               if (src + 1 >= src_end)
3640                 goto no_more_source;
3641               *charbuf++ = ISO_CODE_ESC;
3642               char_offset++;
3643               if (src[0] == '%' && src[1] == '@')
3644                 {
3645                   src += 2;
3646                   consumed_chars += 2;
3647                   char_offset += 2;
3648                   /* We are sure charbuf can contain two more chars. */
3649                   *charbuf++ = '%';
3650                   *charbuf++ = '@';
3651                   CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
3652                 }
3653             }
3654           else
3655             {
3656               *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3657               char_offset++;
3658             }
3659           continue;
3660         }
3661
3662       if ((cmp_status->state == COMPOSING_RULE
3663            || cmp_status->state == COMPOSING_COMPONENT_RULE)
3664           && c1 != ISO_CODE_ESC)
3665         {
3666           int rule, nbytes;
3667
3668           DECODE_COMPOSITION_RULE (rule, nbytes);
3669           if (rule < 0)
3670             goto invalid_code;
3671           STORE_COMPOSITION_RULE (rule);
3672           continue;
3673         }
3674
3675       /* We produce at most one character.  */
3676       switch (iso_code_class [c1])
3677         {
3678         case ISO_0x20_or_0x7F:
3679           if (charset_id_0 < 0
3680               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
3681             /* This is SPACE or DEL.  */
3682             charset = CHARSET_FROM_ID (charset_ascii);
3683           else
3684             charset = CHARSET_FROM_ID (charset_id_0);
3685           break;
3686
3687         case ISO_graphic_plane_0:
3688           if (charset_id_0 < 0)
3689             charset = CHARSET_FROM_ID (charset_ascii);
3690           else
3691             charset = CHARSET_FROM_ID (charset_id_0);
3692           break;
3693
3694         case ISO_0xA0_or_0xFF:
3695           if (charset_id_1 < 0
3696               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
3697               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3698             goto invalid_code;
3699           /* This is a graphic character, we fall down ... */
3700
3701         case ISO_graphic_plane_1:
3702           if (charset_id_1 < 0)
3703             goto invalid_code;
3704           charset = CHARSET_FROM_ID (charset_id_1);
3705           break;
3706
3707         case ISO_control_0:
3708           if (eol_crlf && c1 == '\r')
3709             ONE_MORE_BYTE (byte_after_cr);
3710           MAYBE_FINISH_COMPOSITION ();
3711           charset = CHARSET_FROM_ID (charset_ascii);
3712           break;
3713
3714         case ISO_control_1:
3715           goto invalid_code;
3716
3717         case ISO_shift_out:
3718           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3719               || CODING_ISO_DESIGNATION (coding, 1) < 0)
3720             goto invalid_code;
3721           CODING_ISO_INVOCATION (coding, 0) = 1;
3722           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3723           continue;
3724
3725         case ISO_shift_in:
3726           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3727             goto invalid_code;
3728           CODING_ISO_INVOCATION (coding, 0) = 0;
3729           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3730           continue;
3731
3732         case ISO_single_shift_2_7:
3733           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS))
3734             goto invalid_code;
3735         case ISO_single_shift_2:
3736           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3737             goto invalid_code;
3738           /* SS2 is handled as an escape sequence of ESC 'N' */
3739           c1 = 'N';
3740           goto label_escape_sequence;
3741
3742         case ISO_single_shift_3:
3743           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3744             goto invalid_code;
3745           /* SS2 is handled as an escape sequence of ESC 'O' */
3746           c1 = 'O';
3747           goto label_escape_sequence;
3748
3749         case ISO_control_sequence_introducer:
3750           /* CSI is handled as an escape sequence of ESC '[' ...  */
3751           c1 = '[';
3752           goto label_escape_sequence;
3753
3754         case ISO_escape:
3755           ONE_MORE_BYTE (c1);
3756         label_escape_sequence:
3757           /* Escape sequences handled here are invocation,
3758              designation, direction specification, and character
3759              composition specification.  */
3760           switch (c1)
3761             {
3762             case '&':           /* revision of following character set */
3763               ONE_MORE_BYTE (c1);
3764               if (!(c1 >= '@' && c1 <= '~'))
3765                 goto invalid_code;
3766               ONE_MORE_BYTE (c1);
3767               if (c1 != ISO_CODE_ESC)
3768                 goto invalid_code;
3769               ONE_MORE_BYTE (c1);
3770               goto label_escape_sequence;
3771
3772             case '$':           /* designation of 2-byte character set */
3773               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3774                 goto invalid_code;
3775               {
3776                 int reg, chars96;
3777
3778                 ONE_MORE_BYTE (c1);
3779                 if (c1 >= '@' && c1 <= 'B')
3780                   {     /* designation of JISX0208.1978, GB2312.1980,
3781                            or JISX0208.1980 */
3782                     reg = 0, chars96 = 0;
3783                   }
3784                 else if (c1 >= 0x28 && c1 <= 0x2B)
3785                   { /* designation of DIMENSION2_CHARS94 character set */
3786                     reg = c1 - 0x28, chars96 = 0;
3787                     ONE_MORE_BYTE (c1);
3788                   }
3789                 else if (c1 >= 0x2C && c1 <= 0x2F)
3790                   { /* designation of DIMENSION2_CHARS96 character set */
3791                     reg = c1 - 0x2C, chars96 = 1;
3792                     ONE_MORE_BYTE (c1);
3793                   }
3794                 else
3795                   goto invalid_code;
3796                 DECODE_DESIGNATION (reg, 2, chars96, c1);
3797                 /* We must update these variables now.  */
3798                 if (reg == 0)
3799                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3800                 else if (reg == 1)
3801                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3802                 if (chars96 < 0)
3803                   goto invalid_code;
3804               }
3805               continue;
3806
3807             case 'n':           /* invocation of locking-shift-2 */
3808               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3809                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3810                 goto invalid_code;
3811               CODING_ISO_INVOCATION (coding, 0) = 2;
3812               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3813               continue;
3814
3815             case 'o':           /* invocation of locking-shift-3 */
3816               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3817                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3818                 goto invalid_code;
3819               CODING_ISO_INVOCATION (coding, 0) = 3;
3820               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3821               continue;
3822
3823             case 'N':           /* invocation of single-shift-2 */
3824               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3825                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3826                 goto invalid_code;
3827               charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3828               if (charset_id_2 < 0)
3829                 charset = CHARSET_FROM_ID (charset_ascii);
3830               else
3831                 charset = CHARSET_FROM_ID (charset_id_2);
3832               ONE_MORE_BYTE (c1);
3833               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3834                 goto invalid_code;
3835               break;
3836
3837             case 'O':           /* invocation of single-shift-3 */
3838               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3839                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3840                 goto invalid_code;
3841               charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3842               if (charset_id_3 < 0)
3843                 charset = CHARSET_FROM_ID (charset_ascii);
3844               else
3845                 charset = CHARSET_FROM_ID (charset_id_3);
3846               ONE_MORE_BYTE (c1);
3847               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3848                 goto invalid_code;
3849               break;
3850
3851             case '0': case '2': case '3': case '4': /* start composition */
3852               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3853                 goto invalid_code;
3854               if (last_id != charset_ascii)
3855                 {
3856                   ADD_CHARSET_DATA (charbuf, char_offset- last_offset, last_id);
3857                   last_id = charset_ascii;
3858                   last_offset = char_offset;
3859                 }
3860               DECODE_COMPOSITION_START (c1);
3861               continue;
3862
3863             case '1':           /* end composition */
3864               if (cmp_status->state == COMPOSING_NO)
3865                 goto invalid_code;
3866               DECODE_COMPOSITION_END ();
3867               continue;
3868
3869             case '[':           /* specification of direction */
3870               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION))
3871                 goto invalid_code;
3872               /* For the moment, nested direction is not supported.
3873                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3874                  left-to-right, and nozero means right-to-left.  */
3875               ONE_MORE_BYTE (c1);
3876               switch (c1)
3877                 {
3878                 case ']':       /* end of the current direction */
3879                   coding->mode &= ~CODING_MODE_DIRECTION;
3880
3881                 case '0':       /* end of the current direction */
3882                 case '1':       /* start of left-to-right direction */
3883                   ONE_MORE_BYTE (c1);
3884                   if (c1 == ']')
3885                     coding->mode &= ~CODING_MODE_DIRECTION;
3886                   else
3887                     goto invalid_code;
3888                   break;
3889
3890                 case '2':       /* start of right-to-left direction */
3891                   ONE_MORE_BYTE (c1);
3892                   if (c1 == ']')
3893                     coding->mode |= CODING_MODE_DIRECTION;
3894                   else
3895                     goto invalid_code;
3896                   break;
3897
3898                 default:
3899                   goto invalid_code;
3900                 }
3901               continue;
3902
3903             case '%':
3904               ONE_MORE_BYTE (c1);
3905               if (c1 == '/')
3906                 {
3907                   /* CTEXT extended segment:
3908                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3909                      We keep these bytes as is for the moment.
3910                      They may be decoded by post-read-conversion.  */
3911                   int dim, M, L;
3912                   int size;
3913
3914                   ONE_MORE_BYTE (dim);
3915                   if (dim < 0 || dim > 4)
3916                     goto invalid_code;
3917                   ONE_MORE_BYTE (M);
3918                   if (M < 128)
3919                     goto invalid_code;
3920                   ONE_MORE_BYTE (L);
3921                   if (L < 128)
3922                     goto invalid_code;
3923                   size = ((M - 128) * 128) + (L - 128);
3924                   if (charbuf + 6 > charbuf_end)
3925                     goto break_loop;
3926                   *charbuf++ = ISO_CODE_ESC;
3927                   *charbuf++ = '%';
3928                   *charbuf++ = '/';
3929                   *charbuf++ = dim;
3930                   *charbuf++ = BYTE8_TO_CHAR (M);
3931                   *charbuf++ = BYTE8_TO_CHAR (L);
3932                   CODING_ISO_EXTSEGMENT_LEN (coding) = size;
3933                 }
3934               else if (c1 == 'G')
3935                 {
3936                   /* XFree86 extension for embedding UTF-8 in CTEXT:
3937                      ESC % G --UTF-8-BYTES-- ESC % @
3938                      We keep these bytes as is for the moment.
3939                      They may be decoded by post-read-conversion.  */
3940                   if (charbuf + 3 > charbuf_end)
3941                     goto break_loop;
3942                   *charbuf++ = ISO_CODE_ESC;
3943                   *charbuf++ = '%';
3944                   *charbuf++ = 'G';
3945                   CODING_ISO_EMBEDDED_UTF_8 (coding) = 1;
3946                 }
3947               else
3948                 goto invalid_code;
3949               continue;
3950               break;
3951
3952             default:
3953               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3954                 goto invalid_code;
3955               {
3956                 int reg, chars96;
3957
3958                 if (c1 >= 0x28 && c1 <= 0x2B)
3959                   { /* designation of DIMENSION1_CHARS94 character set */
3960                     reg = c1 - 0x28, chars96 = 0;
3961                     ONE_MORE_BYTE (c1);
3962                   }
3963                 else if (c1 >= 0x2C && c1 <= 0x2F)
3964                   { /* designation of DIMENSION1_CHARS96 character set */
3965                     reg = c1 - 0x2C, chars96 = 1;
3966                     ONE_MORE_BYTE (c1);
3967                   }
3968                 else
3969                   goto invalid_code;
3970                 DECODE_DESIGNATION (reg, 1, chars96, c1);
3971                 /* We must update these variables now.  */
3972                 if (reg == 0)
3973                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3974                 else if (reg == 1)
3975                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3976                 if (chars96 < 0)
3977                   goto invalid_code;
3978               }
3979               continue;
3980             }
3981         }
3982
3983       if (cmp_status->state == COMPOSING_NO
3984           && charset->id != charset_ascii
3985           && last_id != charset->id)
3986         {
3987           if (last_id != charset_ascii)
3988             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3989           last_id = charset->id;
3990           last_offset = char_offset;
3991         }
3992
3993       /* Now we know CHARSET and 1st position code C1 of a character.
3994          Produce a decoded character while getting 2nd and 3rd
3995          position codes C2, C3 if necessary.  */
3996       if (CHARSET_DIMENSION (charset) > 1)
3997         {
3998           ONE_MORE_BYTE (c2);
3999           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0)
4000               || ((c1 & 0x80) != (c2 & 0x80)))
4001             /* C2 is not in a valid range.  */
4002             goto invalid_code;
4003           if (CHARSET_DIMENSION (charset) == 2)
4004             c1 = (c1 << 8) | c2;
4005           else
4006             {
4007               ONE_MORE_BYTE (c3);
4008               if (c3 < 0x20 || (c3 >= 0x80 && c3 < 0xA0)
4009                   || ((c1 & 0x80) != (c3 & 0x80)))
4010                 /* C3 is not in a valid range.  */
4011                 goto invalid_code;
4012               c1 = (c1 << 16) | (c2 << 8) | c2;
4013             }
4014         }
4015       c1 &= 0x7F7F7F;
4016       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
4017       if (c < 0)
4018         {
4019           MAYBE_FINISH_COMPOSITION ();
4020           for (; src_base < src; src_base++, char_offset++)
4021             {
4022               if (ASCII_BYTE_P (*src_base))
4023                 *charbuf++ = *src_base;
4024               else
4025                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
4026             }
4027         }
4028       else if (cmp_status->state == COMPOSING_NO)
4029         {
4030           *charbuf++ = c;
4031           char_offset++;
4032         }
4033       else if ((cmp_status->state == COMPOSING_CHAR
4034                 ? cmp_status->nchars
4035                 : cmp_status->ncomps)
4036                >= MAX_COMPOSITION_COMPONENTS)
4037         {
4038           /* Too long composition.  */
4039           MAYBE_FINISH_COMPOSITION ();
4040           *charbuf++ = c;
4041           char_offset++;
4042         }
4043       else
4044         STORE_COMPOSITION_CHAR (c);
4045       continue;
4046
4047     invalid_code:
4048       MAYBE_FINISH_COMPOSITION ();
4049       src = src_base;
4050       consumed_chars = consumed_chars_base;
4051       ONE_MORE_BYTE (c);
4052       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4053       char_offset++;
4054       coding->errors++;
4055       continue;
4056
4057     break_loop:
4058       break;
4059     }
4060
4061  no_more_source:
4062   if (cmp_status->state != COMPOSING_NO)
4063     {
4064       if (coding->mode & CODING_MODE_LAST_BLOCK)
4065         MAYBE_FINISH_COMPOSITION ();
4066       else
4067         {
4068           charbuf -= cmp_status->length;
4069           for (i = 0; i < cmp_status->length; i++)
4070             cmp_status->carryover[i] = charbuf[i];
4071         }
4072     }
4073   else if (last_id != charset_ascii)
4074     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4075   coding->consumed_char += consumed_chars_base;
4076   coding->consumed = src_base - coding->source;
4077   coding->charbuf_used = charbuf - coding->charbuf;
4078 }
4079
4080
4081 /* ISO2022 encoding stuff.  */
4082
4083 /*
4084    It is not enough to say just "ISO2022" on encoding, we have to
4085    specify more details.  In Emacs, each coding system of ISO2022
4086    variant has the following specifications:
4087         1. Initial designation to G0 thru G3.
4088         2. Allows short-form designation?
4089         3. ASCII should be designated to G0 before control characters?
4090         4. ASCII should be designated to G0 at end of line?
4091         5. 7-bit environment or 8-bit environment?
4092         6. Use locking-shift?
4093         7. Use Single-shift?
4094    And the following two are only for Japanese:
4095         8. Use ASCII in place of JIS0201-1976-Roman?
4096         9. Use JISX0208-1983 in place of JISX0208-1978?
4097    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
4098    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
4099    details.
4100 */
4101
4102 /* Produce codes (escape sequence) for designating CHARSET to graphic
4103    register REG at DST, and increment DST.  If <final-char> of CHARSET is
4104    '@', 'A', or 'B' and the coding system CODING allows, produce
4105    designation sequence of short-form.  */
4106
4107 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
4108   do {                                                                  \
4109     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
4110     char *intermediate_char_94 = "()*+";                                \
4111     char *intermediate_char_96 = ",-./";                                \
4112     int revision = -1;                                                  \
4113     int c;                                                              \
4114                                                                         \
4115     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
4116       revision = CHARSET_ISO_REVISION (charset);                        \
4117                                                                         \
4118     if (revision >= 0)                                                  \
4119       {                                                                 \
4120         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
4121         EMIT_ONE_BYTE ('@' + revision);                                 \
4122       }                                                                 \
4123     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
4124     if (CHARSET_DIMENSION (charset) == 1)                               \
4125       {                                                                 \
4126         if (! CHARSET_ISO_CHARS_96 (charset))                           \
4127           c = intermediate_char_94[reg];                                \
4128         else                                                            \
4129           c = intermediate_char_96[reg];                                \
4130         EMIT_ONE_ASCII_BYTE (c);                                        \
4131       }                                                                 \
4132     else                                                                \
4133       {                                                                 \
4134         EMIT_ONE_ASCII_BYTE ('$');                                      \
4135         if (! CHARSET_ISO_CHARS_96 (charset))                           \
4136           {                                                             \
4137             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
4138                 || reg != 0                                             \
4139                 || final_char < '@' || final_char > 'B')                \
4140               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
4141           }                                                             \
4142         else                                                            \
4143           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
4144       }                                                                 \
4145     EMIT_ONE_ASCII_BYTE (final_char);                                   \
4146                                                                         \
4147     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
4148   } while (0)
4149
4150
4151 /* The following two macros produce codes (control character or escape
4152    sequence) for ISO2022 single-shift functions (single-shift-2 and
4153    single-shift-3).  */
4154
4155 #define ENCODE_SINGLE_SHIFT_2                                           \
4156   do {                                                                  \
4157     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
4158       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
4159     else                                                                \
4160       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
4161     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
4162   } while (0)
4163
4164
4165 #define ENCODE_SINGLE_SHIFT_3                                           \
4166   do {                                                                  \
4167     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
4168       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
4169     else                                                                \
4170       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
4171     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
4172   } while (0)
4173
4174
4175 /* The following four macros produce codes (control character or
4176    escape sequence) for ISO2022 locking-shift functions (shift-in,
4177    shift-out, locking-shift-2, and locking-shift-3).  */
4178
4179 #define ENCODE_SHIFT_IN                                 \
4180   do {                                                  \
4181     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
4182     CODING_ISO_INVOCATION (coding, 0) = 0;              \
4183   } while (0)
4184
4185
4186 #define ENCODE_SHIFT_OUT                                \
4187   do {                                                  \
4188     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
4189     CODING_ISO_INVOCATION (coding, 0) = 1;              \
4190   } while (0)
4191
4192
4193 #define ENCODE_LOCKING_SHIFT_2                          \
4194   do {                                                  \
4195     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
4196     CODING_ISO_INVOCATION (coding, 0) = 2;              \
4197   } while (0)
4198
4199
4200 #define ENCODE_LOCKING_SHIFT_3                          \
4201   do {                                                  \
4202     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
4203     CODING_ISO_INVOCATION (coding, 0) = 3;              \
4204   } while (0)
4205
4206
4207 /* Produce codes for a DIMENSION1 character whose character set is
4208    CHARSET and whose position-code is C1.  Designation and invocation
4209    sequences are also produced in advance if necessary.  */
4210
4211 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
4212   do {                                                                  \
4213     int id = CHARSET_ID (charset);                                      \
4214                                                                         \
4215     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
4216         && id == charset_ascii)                                         \
4217       {                                                                 \
4218         id = charset_jisx0201_roman;                                    \
4219         charset = CHARSET_FROM_ID (id);                                 \
4220       }                                                                 \
4221                                                                         \
4222     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
4223       {                                                                 \
4224         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
4225           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
4226         else                                                            \
4227           EMIT_ONE_BYTE (c1 | 0x80);                                    \
4228         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
4229         break;                                                          \
4230       }                                                                 \
4231     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
4232       {                                                                 \
4233         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
4234         break;                                                          \
4235       }                                                                 \
4236     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
4237       {                                                                 \
4238         EMIT_ONE_BYTE (c1 | 0x80);                                      \
4239         break;                                                          \
4240       }                                                                 \
4241     else                                                                \
4242       /* Since CHARSET is not yet invoked to any graphic planes, we     \
4243          must invoke it, or, at first, designate it to some graphic     \
4244          register.  Then repeat the loop to actually produce the        \
4245          character.  */                                                 \
4246       dst = encode_invocation_designation (charset, coding, dst,        \
4247                                            &produced_chars);            \
4248   } while (1)
4249
4250
4251 /* Produce codes for a DIMENSION2 character whose character set is
4252    CHARSET and whose position-codes are C1 and C2.  Designation and
4253    invocation codes are also produced in advance if necessary.  */
4254
4255 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
4256   do {                                                                  \
4257     int id = CHARSET_ID (charset);                                      \
4258                                                                         \
4259     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
4260         && id == charset_jisx0208)                                      \
4261       {                                                                 \
4262         id = charset_jisx0208_1978;                                     \
4263         charset = CHARSET_FROM_ID (id);                                 \
4264       }                                                                 \
4265                                                                         \
4266     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
4267       {                                                                 \
4268         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
4269           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
4270         else                                                            \
4271           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
4272         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
4273         break;                                                          \
4274       }                                                                 \
4275     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
4276       {                                                                 \
4277         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
4278         break;                                                          \
4279       }                                                                 \
4280     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
4281       {                                                                 \
4282         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
4283         break;                                                          \
4284       }                                                                 \
4285     else                                                                \
4286       /* Since CHARSET is not yet invoked to any graphic planes, we     \
4287          must invoke it, or, at first, designate it to some graphic     \
4288          register.  Then repeat the loop to actually produce the        \
4289          character.  */                                                 \
4290       dst = encode_invocation_designation (charset, coding, dst,        \
4291                                            &produced_chars);            \
4292   } while (1)
4293
4294
4295 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
4296   do {                                                                     \
4297     int code = ENCODE_CHAR ((charset),(c));                                \
4298                                                                            \
4299     if (CHARSET_DIMENSION (charset) == 1)                                  \
4300       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
4301     else                                                                   \
4302       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
4303   } while (0)
4304
4305
4306 /* Produce designation and invocation codes at a place pointed by DST
4307    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
4308    Return new DST.  */
4309
4310 unsigned char *
4311 encode_invocation_designation (charset, coding, dst, p_nchars)
4312      struct charset *charset;
4313      struct coding_system *coding;
4314      unsigned char *dst;
4315      int *p_nchars;
4316 {
4317   int multibytep = coding->dst_multibyte;
4318   int produced_chars = *p_nchars;
4319   int reg;                      /* graphic register number */
4320   int id = CHARSET_ID (charset);
4321
4322   /* At first, check designations.  */
4323   for (reg = 0; reg < 4; reg++)
4324     if (id == CODING_ISO_DESIGNATION (coding, reg))
4325       break;
4326
4327   if (reg >= 4)
4328     {
4329       /* CHARSET is not yet designated to any graphic registers.  */
4330       /* At first check the requested designation.  */
4331       reg = CODING_ISO_REQUEST (coding, id);
4332       if (reg < 0)
4333         /* Since CHARSET requests no special designation, designate it
4334            to graphic register 0.  */
4335         reg = 0;
4336
4337       ENCODE_DESIGNATION (charset, reg, coding);
4338     }
4339
4340   if (CODING_ISO_INVOCATION (coding, 0) != reg
4341       && CODING_ISO_INVOCATION (coding, 1) != reg)
4342     {
4343       /* Since the graphic register REG is not invoked to any graphic
4344          planes, invoke it to graphic plane 0.  */
4345       switch (reg)
4346         {
4347         case 0:                 /* graphic register 0 */
4348           ENCODE_SHIFT_IN;
4349           break;
4350
4351         case 1:                 /* graphic register 1 */
4352           ENCODE_SHIFT_OUT;
4353           break;
4354
4355         case 2:                 /* graphic register 2 */
4356           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4357             ENCODE_SINGLE_SHIFT_2;
4358           else
4359             ENCODE_LOCKING_SHIFT_2;
4360           break;
4361
4362         case 3:                 /* graphic register 3 */
4363           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4364             ENCODE_SINGLE_SHIFT_3;
4365           else
4366             ENCODE_LOCKING_SHIFT_3;
4367           break;
4368         }
4369     }
4370
4371   *p_nchars = produced_chars;
4372   return dst;
4373 }
4374
4375 /* The following three macros produce codes for indicating direction
4376    of text.  */
4377 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER                              \
4378   do {                                                                  \
4379     if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS)        \
4380       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '[');                         \
4381     else                                                                \
4382       EMIT_ONE_BYTE (ISO_CODE_CSI);                                     \
4383   } while (0)
4384
4385
4386 #define ENCODE_DIRECTION_R2L()                  \
4387   do {                                          \
4388     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
4389     EMIT_TWO_ASCII_BYTES ('2', ']');            \
4390   } while (0)
4391
4392
4393 #define ENCODE_DIRECTION_L2R()                  \
4394   do {                                          \
4395     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
4396     EMIT_TWO_ASCII_BYTES ('0', ']');            \
4397   } while (0)
4398
4399
4400 /* Produce codes for designation and invocation to reset the graphic
4401    planes and registers to initial state.  */
4402 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
4403   do {                                                                  \
4404     int reg;                                                            \
4405     struct charset *charset;                                            \
4406                                                                         \
4407     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
4408       ENCODE_SHIFT_IN;                                                  \
4409     for (reg = 0; reg < 4; reg++)                                       \
4410       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
4411           && (CODING_ISO_DESIGNATION (coding, reg)                      \
4412               != CODING_ISO_INITIAL (coding, reg)))                     \
4413         {                                                               \
4414           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
4415           ENCODE_DESIGNATION (charset, reg, coding);                    \
4416         }                                                               \
4417   } while (0)
4418
4419
4420 /* Produce designation sequences of charsets in the line started from
4421    SRC to a place pointed by DST, and return updated DST.
4422
4423    If the current block ends before any end-of-line, we may fail to
4424    find all the necessary designations.  */
4425
4426 static unsigned char *
4427 encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
4428      struct coding_system *coding;
4429      int *charbuf, *charbuf_end;
4430      unsigned char *dst;
4431 {
4432   struct charset *charset;
4433   /* Table of charsets to be designated to each graphic register.  */
4434   int r[4];
4435   int c, found = 0, reg;
4436   int produced_chars = 0;
4437   int multibytep = coding->dst_multibyte;
4438   Lisp_Object attrs;
4439   Lisp_Object charset_list;
4440
4441   attrs = CODING_ID_ATTRS (coding->id);
4442   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4443   if (EQ (charset_list, Qiso_2022))
4444     charset_list = Viso_2022_charset_list;
4445
4446   for (reg = 0; reg < 4; reg++)
4447     r[reg] = -1;
4448
4449   while (found < 4)
4450     {
4451       int id;
4452
4453       c = *charbuf++;
4454       if (c == '\n')
4455         break;
4456       charset = char_charset (c, charset_list, NULL);
4457       id = CHARSET_ID (charset);
4458       reg = CODING_ISO_REQUEST (coding, id);
4459       if (reg >= 0 && r[reg] < 0)
4460         {
4461           found++;
4462           r[reg] = id;
4463         }
4464     }
4465
4466   if (found)
4467     {
4468       for (reg = 0; reg < 4; reg++)
4469         if (r[reg] >= 0
4470             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
4471           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
4472     }
4473
4474   return dst;
4475 }
4476
4477 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
4478
4479 static int
4480 encode_coding_iso_2022 (coding)
4481      struct coding_system *coding;
4482 {
4483   int multibytep = coding->dst_multibyte;
4484   int *charbuf = coding->charbuf;
4485   int *charbuf_end = charbuf + coding->charbuf_used;
4486   unsigned char *dst = coding->destination + coding->produced;
4487   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4488   int safe_room = 16;
4489   int bol_designation
4490     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
4491        && CODING_ISO_BOL (coding));
4492   int produced_chars = 0;
4493   Lisp_Object attrs, eol_type, charset_list;
4494   int ascii_compatible;
4495   int c;
4496   int preferred_charset_id = -1;
4497
4498   CODING_GET_INFO (coding, attrs, charset_list);
4499   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
4500   if (VECTORP (eol_type))
4501     eol_type = Qunix;
4502
4503   setup_iso_safe_charsets (attrs);
4504   /* Charset list may have been changed.  */
4505   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4506   coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
4507
4508   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4509
4510   while (charbuf < charbuf_end)
4511     {
4512       ASSURE_DESTINATION (safe_room);
4513
4514       if (bol_designation)
4515         {
4516           unsigned char *dst_prev = dst;
4517
4518           /* We have to produce designation sequences if any now.  */
4519           dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
4520           bol_designation = 0;
4521           /* We are sure that designation sequences are all ASCII bytes.  */
4522           produced_chars += dst - dst_prev;
4523         }
4524
4525       c = *charbuf++;
4526
4527       if (c < 0)
4528         {
4529           /* Handle an annotation.  */
4530           switch (*charbuf)
4531             {
4532             case CODING_ANNOTATE_COMPOSITION_MASK:
4533               /* Not yet implemented.  */
4534               break;
4535             case CODING_ANNOTATE_CHARSET_MASK:
4536               preferred_charset_id = charbuf[2];
4537               if (preferred_charset_id >= 0
4538                   && NILP (Fmemq (make_number (preferred_charset_id),
4539                                   charset_list)))
4540                 preferred_charset_id = -1;
4541               break;
4542             default:
4543               abort ();
4544             }
4545           charbuf += -c - 1;
4546           continue;
4547         }
4548
4549       /* Now encode the character C.  */
4550       if (c < 0x20 || c == 0x7F)
4551         {
4552           if (c == '\n'
4553               || (c == '\r' && EQ (eol_type, Qmac)))
4554             {
4555               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4556                 ENCODE_RESET_PLANE_AND_REGISTER ();
4557               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
4558                 {
4559                   int i;
4560
4561                   for (i = 0; i < 4; i++)
4562                     CODING_ISO_DESIGNATION (coding, i)
4563                       = CODING_ISO_INITIAL (coding, i);
4564                 }
4565               bol_designation
4566                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
4567             }
4568           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
4569             ENCODE_RESET_PLANE_AND_REGISTER ();
4570           EMIT_ONE_ASCII_BYTE (c);
4571         }
4572       else if (ASCII_CHAR_P (c))
4573         {
4574           if (ascii_compatible)
4575             EMIT_ONE_ASCII_BYTE (c);
4576           else
4577             {
4578               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
4579               ENCODE_ISO_CHARACTER (charset, c);
4580             }
4581         }
4582       else if (CHAR_BYTE8_P (c))
4583         {
4584           c = CHAR_TO_BYTE8 (c);
4585           EMIT_ONE_BYTE (c);
4586         }
4587       else
4588         {
4589           struct charset *charset;
4590
4591           if (preferred_charset_id >= 0)
4592             {
4593               charset = CHARSET_FROM_ID (preferred_charset_id);
4594               if (! CHAR_CHARSET_P (c, charset))
4595                 charset = char_charset (c, charset_list, NULL);
4596             }
4597           else
4598             charset = char_charset (c, charset_list, NULL);
4599           if (!charset)
4600             {
4601               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4602                 {
4603                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4604                   charset = CHARSET_FROM_ID (charset_ascii);
4605                 }
4606               else
4607                 {
4608                   c = coding->default_char;
4609                   charset = char_charset (c, charset_list, NULL);
4610                 }
4611             }
4612           ENCODE_ISO_CHARACTER (charset, c);
4613         }
4614     }
4615
4616   if (coding->mode & CODING_MODE_LAST_BLOCK
4617       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4618     {
4619       ASSURE_DESTINATION (safe_room);
4620       ENCODE_RESET_PLANE_AND_REGISTER ();
4621     }
4622   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4623   CODING_ISO_BOL (coding) = bol_designation;
4624   coding->produced_char += produced_chars;
4625   coding->produced = dst - coding->destination;
4626   return 0;
4627 }
4628
4629 \f
4630 /*** 8,9. SJIS and BIG5 handlers ***/
4631
4632 /* Although SJIS and BIG5 are not ISO's coding system, they are used
4633    quite widely.  So, for the moment, Emacs supports them in the bare
4634    C code.  But, in the future, they may be supported only by CCL.  */
4635
4636 /* SJIS is a coding system encoding three character sets: ASCII, right
4637    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
4638    as is.  A character of charset katakana-jisx0201 is encoded by
4639    "position-code + 0x80".  A character of charset japanese-jisx0208
4640    is encoded in 2-byte but two position-codes are divided and shifted
4641    so that it fit in the range below.
4642
4643    --- CODE RANGE of SJIS ---
4644    (character set)      (range)
4645    ASCII                0x00 .. 0x7F
4646    KATAKANA-JISX0201    0xA0 .. 0xDF
4647    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
4648             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
4649    -------------------------------
4650
4651 */
4652
4653 /* BIG5 is a coding system encoding two character sets: ASCII and
4654    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
4655    character set and is encoded in two-byte.
4656
4657    --- CODE RANGE of BIG5 ---
4658    (character set)      (range)
4659    ASCII                0x00 .. 0x7F
4660    Big5 (1st byte)      0xA1 .. 0xFE
4661         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
4662    --------------------------
4663
4664   */
4665
4666 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4667    Check if a text is encoded in SJIS.  If it is, return
4668    CATEGORY_MASK_SJIS, else return 0.  */
4669
4670 static int
4671 detect_coding_sjis (coding, detect_info)
4672      struct coding_system *coding;
4673      struct coding_detection_info *detect_info;
4674 {
4675   const unsigned char *src = coding->source, *src_base;
4676   const unsigned char *src_end = coding->source + coding->src_bytes;
4677   int multibytep = coding->src_multibyte;
4678   int consumed_chars = 0;
4679   int found = 0;
4680   int c;
4681   Lisp_Object attrs, charset_list;
4682   int max_first_byte_of_2_byte_code;
4683
4684   CODING_GET_INFO (coding, attrs, charset_list);
4685   max_first_byte_of_2_byte_code
4686     = (XINT (Flength (charset_list)) > 3 ? 0xFC : 0xEF);
4687
4688   detect_info->checked |= CATEGORY_MASK_SJIS;
4689   /* A coding system of this category is always ASCII compatible.  */
4690   src += coding->head_ascii;
4691
4692   while (1)
4693     {
4694       src_base = src;
4695       ONE_MORE_BYTE (c);
4696       if (c < 0x80)
4697         continue;
4698       if ((c >= 0x81 && c <= 0x9F)
4699           || (c >= 0xE0 && c <= max_first_byte_of_2_byte_code))
4700         {
4701           ONE_MORE_BYTE (c);
4702           if (c < 0x40 || c == 0x7F || c > 0xFC)
4703             break;
4704           found = CATEGORY_MASK_SJIS;
4705         }
4706       else if (c >= 0xA0 && c < 0xE0)
4707         found = CATEGORY_MASK_SJIS;
4708       else
4709         break;
4710     }
4711   detect_info->rejected |= CATEGORY_MASK_SJIS;
4712   return 0;
4713
4714  no_more_source:
4715   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4716     {
4717       detect_info->rejected |= CATEGORY_MASK_SJIS;
4718       return 0;
4719     }
4720   detect_info->found |= found;
4721   return 1;
4722 }
4723
4724 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4725    Check if a text is encoded in BIG5.  If it is, return
4726    CATEGORY_MASK_BIG5, else return 0.  */
4727
4728 static int
4729 detect_coding_big5 (coding, detect_info)
4730      struct coding_system *coding;
4731      struct coding_detection_info *detect_info;
4732 {
4733   const unsigned char *src = coding->source, *src_base;
4734   const unsigned char *src_end = coding->source + coding->src_bytes;
4735   int multibytep = coding->src_multibyte;
4736   int consumed_chars = 0;
4737   int found = 0;
4738   int c;
4739
4740   detect_info->checked |= CATEGORY_MASK_BIG5;
4741   /* A coding system of this category is always ASCII compatible.  */
4742   src += coding->head_ascii;
4743
4744   while (1)
4745     {
4746       src_base = src;
4747       ONE_MORE_BYTE (c);
4748       if (c < 0x80)
4749         continue;
4750       if (c >= 0xA1)
4751         {
4752           ONE_MORE_BYTE (c);
4753           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
4754             return 0;
4755           found = CATEGORY_MASK_BIG5;
4756         }
4757       else
4758         break;
4759     }
4760   detect_info->rejected |= CATEGORY_MASK_BIG5;
4761   return 0;
4762
4763  no_more_source:
4764   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4765     {
4766       detect_info->rejected |= CATEGORY_MASK_BIG5;
4767       return 0;
4768     }
4769   detect_info->found |= found;
4770   return 1;
4771 }
4772
4773 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4774    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
4775
4776 static void
4777 decode_coding_sjis (coding)
4778      struct coding_system *coding;
4779 {
4780   const unsigned char *src = coding->source + coding->consumed;
4781   const unsigned char *src_end = coding->source + coding->src_bytes;
4782   const unsigned char *src_base;
4783   int *charbuf = coding->charbuf + coding->charbuf_used;
4784   /* We may produce one charset annocation in one loop and one more at
4785      the end.  */
4786   int *charbuf_end
4787     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4788   int consumed_chars = 0, consumed_chars_base;
4789   int multibytep = coding->src_multibyte;
4790   struct charset *charset_roman, *charset_kanji, *charset_kana;
4791   struct charset *charset_kanji2;
4792   Lisp_Object attrs, charset_list, val;
4793   int char_offset = coding->produced_char;
4794   int last_offset = char_offset;
4795   int last_id = charset_ascii;
4796   int eol_crlf =
4797     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4798   int byte_after_cr = -1;
4799
4800   CODING_GET_INFO (coding, attrs, charset_list);
4801
4802   val = charset_list;
4803   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4804   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4805   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4806   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4807
4808   while (1)
4809     {
4810       int c, c1;
4811       struct charset *charset;
4812
4813       src_base = src;
4814       consumed_chars_base = consumed_chars;
4815
4816       if (charbuf >= charbuf_end)
4817         {
4818           if (byte_after_cr >= 0)
4819             src_base--;
4820           break;
4821         }
4822
4823       if (byte_after_cr >= 0)
4824         c = byte_after_cr, byte_after_cr = -1;
4825       else
4826         ONE_MORE_BYTE (c);
4827       if (c < 0)
4828         goto invalid_code;
4829       if (c < 0x80)
4830         {
4831           if (eol_crlf && c == '\r')
4832             ONE_MORE_BYTE (byte_after_cr);
4833           charset = charset_roman;
4834         }
4835       else if (c == 0x80 || c == 0xA0)
4836         goto invalid_code;
4837       else if (c >= 0xA1 && c <= 0xDF)
4838         {
4839           /* SJIS -> JISX0201-Kana */
4840           c &= 0x7F;
4841           charset = charset_kana;
4842         }
4843       else if (c <= 0xEF)
4844         {
4845           /* SJIS -> JISX0208 */
4846           ONE_MORE_BYTE (c1);
4847           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4848             goto invalid_code;
4849           c = (c << 8) | c1;
4850           SJIS_TO_JIS (c);
4851           charset = charset_kanji;
4852         }
4853       else if (c <= 0xFC && charset_kanji2)
4854         {
4855           /* SJIS -> JISX0213-2 */
4856           ONE_MORE_BYTE (c1);
4857           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4858             goto invalid_code;
4859           c = (c << 8) | c1;
4860           SJIS_TO_JIS2 (c);
4861           charset = charset_kanji2;
4862         }
4863       else
4864         goto invalid_code;
4865       if (charset->id != charset_ascii
4866           && last_id != charset->id)
4867         {
4868           if (last_id != charset_ascii)
4869             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4870           last_id = charset->id;
4871           last_offset = char_offset;
4872         }
4873       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4874       *charbuf++ = c;
4875       char_offset++;
4876       continue;
4877
4878     invalid_code:
4879       src = src_base;
4880       consumed_chars = consumed_chars_base;
4881       ONE_MORE_BYTE (c);
4882       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4883       char_offset++;
4884       coding->errors++;
4885     }
4886
4887  no_more_source:
4888   if (last_id != charset_ascii)
4889     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4890   coding->consumed_char += consumed_chars_base;
4891   coding->consumed = src_base - coding->source;
4892   coding->charbuf_used = charbuf - coding->charbuf;
4893 }
4894
4895 static void
4896 decode_coding_big5 (coding)
4897      struct coding_system *coding;
4898 {
4899   const unsigned char *src = coding->source + coding->consumed;
4900   const unsigned char *src_end = coding->source + coding->src_bytes;
4901   const unsigned char *src_base;
4902   int *charbuf = coding->charbuf + coding->charbuf_used;
4903   /* We may produce one charset annocation in one loop and one more at
4904      the end.  */
4905   int *charbuf_end
4906     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4907   int consumed_chars = 0, consumed_chars_base;
4908   int multibytep = coding->src_multibyte;
4909   struct charset *charset_roman, *charset_big5;
4910   Lisp_Object attrs, charset_list, val;
4911   int char_offset = coding->produced_char;
4912   int last_offset = char_offset;
4913   int last_id = charset_ascii;
4914   int eol_crlf =
4915     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4916   int byte_after_cr = -1;
4917
4918   CODING_GET_INFO (coding, attrs, charset_list);
4919   val = charset_list;
4920   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4921   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4922
4923   while (1)
4924     {
4925       int c, c1;
4926       struct charset *charset;
4927
4928       src_base = src;
4929       consumed_chars_base = consumed_chars;
4930
4931       if (charbuf >= charbuf_end)
4932         {
4933           if (byte_after_cr >= 0)
4934             src_base--;
4935           break;
4936         }
4937
4938       if (byte_after_cr >= 0)
4939         c = byte_after_cr, byte_after_cr = -1;
4940       else
4941         ONE_MORE_BYTE (c);
4942
4943       if (c < 0)
4944         goto invalid_code;
4945       if (c < 0x80)
4946         {
4947           if (eol_crlf && c == '\r')
4948             ONE_MORE_BYTE (byte_after_cr);
4949           charset = charset_roman;
4950         }
4951       else
4952         {
4953           /* BIG5 -> Big5 */
4954           if (c < 0xA1 || c > 0xFE)
4955             goto invalid_code;
4956           ONE_MORE_BYTE (c1);
4957           if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4958             goto invalid_code;
4959           c = c << 8 | c1;
4960           charset = charset_big5;
4961         }
4962       if (charset->id != charset_ascii
4963           && last_id != charset->id)
4964         {
4965           if (last_id != charset_ascii)
4966             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4967           last_id = charset->id;
4968           last_offset = char_offset;
4969         }
4970       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4971       *charbuf++ = c;
4972       char_offset++;
4973       continue;
4974
4975     invalid_code:
4976       src = src_base;
4977       consumed_chars = consumed_chars_base;
4978       ONE_MORE_BYTE (c);
4979       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4980       char_offset++;
4981       coding->errors++;
4982     }
4983
4984  no_more_source:
4985   if (last_id != charset_ascii)
4986     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4987   coding->consumed_char += consumed_chars_base;
4988   coding->consumed = src_base - coding->source;
4989   coding->charbuf_used = charbuf - coding->charbuf;
4990 }
4991
4992 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4993    This function can encode charsets `ascii', `katakana-jisx0201',
4994    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
4995    are sure that all these charsets are registered as official charset
4996    (i.e. do not have extended leading-codes).  Characters of other
4997    charsets are produced without any encoding.  If SJIS_P is 1, encode
4998    SJIS text, else encode BIG5 text.  */
4999
5000 static int
5001 encode_coding_sjis (coding)
5002      struct coding_system *coding;
5003 {
5004   int multibytep = coding->dst_multibyte;
5005   int *charbuf = coding->charbuf;
5006   int *charbuf_end = charbuf + coding->charbuf_used;
5007   unsigned char *dst = coding->destination + coding->produced;
5008   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5009   int safe_room = 4;
5010   int produced_chars = 0;
5011   Lisp_Object attrs, charset_list, val;
5012   int ascii_compatible;
5013   struct charset *charset_roman, *charset_kanji, *charset_kana;
5014   struct charset *charset_kanji2;
5015   int c;
5016
5017   CODING_GET_INFO (coding, attrs, charset_list);
5018   val = charset_list;
5019   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5020   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5021   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5022   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
5023
5024   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5025
5026   while (charbuf < charbuf_end)
5027     {
5028       ASSURE_DESTINATION (safe_room);
5029       c = *charbuf++;
5030       /* Now encode the character C.  */
5031       if (ASCII_CHAR_P (c) && ascii_compatible)
5032         EMIT_ONE_ASCII_BYTE (c);
5033       else if (CHAR_BYTE8_P (c))
5034         {
5035           c = CHAR_TO_BYTE8 (c);
5036           EMIT_ONE_BYTE (c);
5037         }
5038       else
5039         {
5040           unsigned code;
5041           struct charset *charset = char_charset (c, charset_list, &code);
5042
5043           if (!charset)
5044             {
5045               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5046                 {
5047                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5048                   charset = CHARSET_FROM_ID (charset_ascii);
5049                 }
5050               else
5051                 {
5052                   c = coding->default_char;
5053                   charset = char_charset (c, charset_list, &code);
5054                 }
5055             }
5056           if (code == CHARSET_INVALID_CODE (charset))
5057             abort ();
5058           if (charset == charset_kanji)
5059             {
5060               int c1, c2;
5061               JIS_TO_SJIS (code);
5062               c1 = code >> 8, c2 = code & 0xFF;
5063               EMIT_TWO_BYTES (c1, c2);
5064             }
5065           else if (charset == charset_kana)
5066             EMIT_ONE_BYTE (code | 0x80);
5067           else if (charset_kanji2 && charset == charset_kanji2)
5068             {
5069               int c1, c2;
5070
5071               c1 = code >> 8;
5072               if (c1 == 0x21 || (c1 >= 0x23 && c1 <= 0x25)
5073                   || c1 == 0x28
5074                   || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
5075                 {
5076                   JIS_TO_SJIS2 (code);
5077                   c1 = code >> 8, c2 = code & 0xFF;
5078                   EMIT_TWO_BYTES (c1, c2);
5079                 }
5080               else
5081                 EMIT_ONE_ASCII_BYTE (code & 0x7F);
5082             }
5083           else
5084             EMIT_ONE_ASCII_BYTE (code & 0x7F);
5085         }
5086     }
5087   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5088   coding->produced_char += produced_chars;
5089   coding->produced = dst - coding->destination;
5090   return 0;
5091 }
5092
5093 static int
5094 encode_coding_big5 (coding)
5095      struct coding_system *coding;
5096 {
5097   int multibytep = coding->dst_multibyte;
5098   int *charbuf = coding->charbuf;
5099   int *charbuf_end = charbuf + coding->charbuf_used;
5100   unsigned char *dst = coding->destination + coding->produced;
5101   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5102   int safe_room = 4;
5103   int produced_chars = 0;
5104   Lisp_Object attrs, charset_list, val;
5105   int ascii_compatible;
5106   struct charset *charset_roman, *charset_big5;
5107   int c;
5108
5109   CODING_GET_INFO (coding, attrs, charset_list);
5110   val = charset_list;
5111   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5112   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
5113   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5114
5115   while (charbuf < charbuf_end)
5116     {
5117       ASSURE_DESTINATION (safe_room);
5118       c = *charbuf++;
5119       /* Now encode the character C.  */
5120       if (ASCII_CHAR_P (c) && ascii_compatible)
5121         EMIT_ONE_ASCII_BYTE (c);
5122       else if (CHAR_BYTE8_P (c))
5123         {
5124           c = CHAR_TO_BYTE8 (c);
5125           EMIT_ONE_BYTE (c);
5126         }
5127       else
5128         {
5129           unsigned code;
5130           struct charset *charset = char_charset (c, charset_list, &code);
5131
5132           if (! charset)
5133             {
5134               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5135                 {
5136                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5137                   charset = CHARSET_FROM_ID (charset_ascii);
5138                 }
5139               else
5140                 {
5141                   c = coding->default_char;
5142                   charset = char_charset (c, charset_list, &code);
5143                 }
5144             }
5145           if (code == CHARSET_INVALID_CODE (charset))
5146             abort ();
5147           if (charset == charset_big5)
5148             {
5149               int c1, c2;
5150
5151               c1 = code >> 8, c2 = code & 0xFF;
5152               EMIT_TWO_BYTES (c1, c2);
5153             }
5154           else
5155             EMIT_ONE_ASCII_BYTE (code & 0x7F);
5156         }
5157     }
5158   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5159   coding->produced_char += produced_chars;
5160   coding->produced = dst - coding->destination;
5161   return 0;
5162 }
5163
5164 \f
5165 /*** 10. CCL handlers ***/
5166
5167 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5168    Check if a text is encoded in a coding system of which
5169    encoder/decoder are written in CCL program.  If it is, return
5170    CATEGORY_MASK_CCL, else return 0.  */
5171
5172 static int
5173 detect_coding_ccl (coding, detect_info)
5174      struct coding_system *coding;
5175      struct coding_detection_info *detect_info;
5176 {
5177   const unsigned char *src = coding->source, *src_base;
5178   const unsigned char *src_end = coding->source + coding->src_bytes;
5179   int multibytep = coding->src_multibyte;
5180   int consumed_chars = 0;
5181   int found = 0;
5182   unsigned char *valids;
5183   int head_ascii = coding->head_ascii;
5184   Lisp_Object attrs;
5185
5186   detect_info->checked |= CATEGORY_MASK_CCL;
5187
5188   coding = &coding_categories[coding_category_ccl];
5189   valids = CODING_CCL_VALIDS (coding);
5190   attrs = CODING_ID_ATTRS (coding->id);
5191   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5192     src += head_ascii;
5193
5194   while (1)
5195     {
5196       int c;
5197
5198       src_base = src;
5199       ONE_MORE_BYTE (c);
5200       if (c < 0 || ! valids[c])
5201         break;
5202       if ((valids[c] > 1))
5203         found = CATEGORY_MASK_CCL;
5204     }
5205   detect_info->rejected |= CATEGORY_MASK_CCL;
5206   return 0;
5207
5208  no_more_source:
5209   detect_info->found |= found;
5210   return 1;
5211 }
5212
5213 static void
5214 decode_coding_ccl (coding)
5215      struct coding_system *coding;
5216 {
5217   const unsigned char *src = coding->source + coding->consumed;
5218   const unsigned char *src_end = coding->source + coding->src_bytes;
5219   int *charbuf = coding->charbuf + coding->charbuf_used;
5220   int *charbuf_end = coding->charbuf + coding->charbuf_size;
5221   int consumed_chars = 0;
5222   int multibytep = coding->src_multibyte;
5223   struct ccl_program ccl;
5224   int source_charbuf[1024];
5225   int source_byteidx[1024];
5226   Lisp_Object attrs, charset_list;
5227
5228   CODING_GET_INFO (coding, attrs, charset_list);
5229   setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
5230
5231   while (src < src_end)
5232     {
5233       const unsigned char *p = src;
5234       int *source, *source_end;
5235       int i = 0;
5236
5237       if (multibytep)
5238         while (i < 1024 && p < src_end)
5239           {
5240             source_byteidx[i] = p - src;
5241             source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
5242           }
5243       else
5244         while (i < 1024 && p < src_end)
5245           source_charbuf[i++] = *p++;
5246
5247       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
5248         ccl.last_block = 1;
5249
5250       source = source_charbuf;
5251       source_end = source + i;
5252       while (source < source_end)
5253         {
5254           ccl_driver (&ccl, source, charbuf,
5255                       source_end - source, charbuf_end - charbuf,
5256                       charset_list);
5257           source += ccl.consumed;
5258           charbuf += ccl.produced;
5259           if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
5260             break;
5261         }
5262       if (source < source_end)
5263         src += source_byteidx[source - source_charbuf];
5264       else
5265         src = p;
5266       consumed_chars += source - source_charbuf;
5267
5268       if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
5269           && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
5270         break;
5271     }
5272
5273   switch (ccl.status)
5274     {
5275     case CCL_STAT_SUSPEND_BY_SRC:
5276       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5277       break;
5278     case CCL_STAT_SUSPEND_BY_DST:
5279       break;
5280     case CCL_STAT_QUIT:
5281     case CCL_STAT_INVALID_CMD:
5282       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
5283       break;
5284     default:
5285       record_conversion_result (coding, CODING_RESULT_SUCCESS);
5286       break;
5287     }
5288   coding->consumed_char += consumed_chars;
5289   coding->consumed = src - coding->source;
5290   coding->charbuf_used = charbuf - coding->charbuf;
5291 }
5292
5293 static int
5294 encode_coding_ccl (coding)
5295      struct coding_system *coding;
5296 {
5297   struct ccl_program ccl;
5298   int multibytep = coding->dst_multibyte;
5299   int *charbuf = coding->charbuf;
5300   int *charbuf_end = charbuf + coding->charbuf_used;
5301   unsigned char *dst = coding->destination + coding->produced;
5302   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5303   int destination_charbuf[1024];
5304   int i, produced_chars = 0;
5305   Lisp_Object attrs, charset_list;
5306
5307   CODING_GET_INFO (coding, attrs, charset_list);
5308   setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
5309
5310   ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
5311   ccl.dst_multibyte = coding->dst_multibyte;
5312
5313   while (charbuf < charbuf_end)
5314     {
5315       ccl_driver (&ccl, charbuf, destination_charbuf,
5316                   charbuf_end - charbuf, 1024, charset_list);
5317       if (multibytep)
5318         {
5319           ASSURE_DESTINATION (ccl.produced * 2);
5320           for (i = 0; i < ccl.produced; i++)
5321             EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
5322         }
5323       else
5324         {
5325           ASSURE_DESTINATION (ccl.produced);
5326           for (i = 0; i < ccl.produced; i++)
5327             *dst++ = destination_charbuf[i] & 0xFF;
5328           produced_chars += ccl.produced;
5329         }
5330       charbuf += ccl.consumed;
5331       if (ccl.status == CCL_STAT_QUIT
5332           || ccl.status == CCL_STAT_INVALID_CMD)
5333         break;
5334     }
5335
5336   switch (ccl.status)
5337     {
5338     case CCL_STAT_SUSPEND_BY_SRC:
5339       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5340       break;
5341     case CCL_STAT_SUSPEND_BY_DST:
5342       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
5343       break;
5344     case CCL_STAT_QUIT:
5345     case CCL_STAT_INVALID_CMD:
5346       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
5347       break;
5348     default:
5349       record_conversion_result (coding, CODING_RESULT_SUCCESS);
5350       break;
5351     }
5352
5353   coding->produced_char += produced_chars;
5354   coding->produced = dst - coding->destination;
5355   return 0;
5356 }
5357
5358
5359 \f
5360 /*** 10, 11. no-conversion handlers ***/
5361
5362 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
5363
5364 static void
5365 decode_coding_raw_text (coding)
5366      struct coding_system *coding;
5367 {
5368   int eol_crlf =
5369     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5370
5371   coding->chars_at_source = 1;
5372   coding->consumed_char = coding->src_chars;
5373   coding->consumed = coding->src_bytes;
5374   if (eol_crlf && coding->source[coding->src_bytes - 1] == '\r')
5375     {
5376       coding->consumed_char--;
5377       coding->consumed--;
5378       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5379     }
5380   else
5381     record_conversion_result (coding, CODING_RESULT_SUCCESS);
5382 }
5383
5384 static int
5385 encode_coding_raw_text (coding)
5386      struct coding_system *coding;
5387 {
5388   int multibytep = coding->dst_multibyte;
5389   int *charbuf = coding->charbuf;
5390   int *charbuf_end = coding->charbuf + coding->charbuf_used;
5391   unsigned char *dst = coding->destination + coding->produced;
5392   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5393   int produced_chars = 0;
5394   int c;
5395
5396   if (multibytep)
5397     {
5398       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
5399
5400       if (coding->src_multibyte)
5401         while (charbuf < charbuf_end)
5402           {
5403             ASSURE_DESTINATION (safe_room);
5404             c = *charbuf++;
5405             if (ASCII_CHAR_P (c))
5406               EMIT_ONE_ASCII_BYTE (c);
5407             else if (CHAR_BYTE8_P (c))
5408               {
5409                 c = CHAR_TO_BYTE8 (c);
5410                 EMIT_ONE_BYTE (c);
5411               }
5412             else
5413               {
5414                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
5415
5416                 CHAR_STRING_ADVANCE (c, p1);
5417                 while (p0 < p1)
5418                   {
5419                     EMIT_ONE_BYTE (*p0);
5420                     p0++;
5421                   }
5422               }
5423           }
5424       else
5425         while (charbuf < charbuf_end)
5426           {
5427             ASSURE_DESTINATION (safe_room);
5428             c = *charbuf++;
5429             EMIT_ONE_BYTE (c);
5430           }
5431     }
5432   else
5433     {
5434       if (coding->src_multibyte)
5435         {
5436           int safe_room = MAX_MULTIBYTE_LENGTH;
5437
5438           while (charbuf < charbuf_end)
5439             {
5440               ASSURE_DESTINATION (safe_room);
5441               c = *charbuf++;
5442               if (ASCII_CHAR_P (c))
5443                 *dst++ = c;
5444               else if (CHAR_BYTE8_P (c))
5445                 *dst++ = CHAR_TO_BYTE8 (c);
5446               else
5447                 CHAR_STRING_ADVANCE (c, dst);
5448             }
5449         }
5450       else
5451         {
5452           ASSURE_DESTINATION (charbuf_end - charbuf);
5453           while (charbuf < charbuf_end && dst < dst_end)
5454             *dst++ = *charbuf++;
5455         }
5456       produced_chars = dst - (coding->destination + coding->produced);
5457     }
5458   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5459   coding->produced_char += produced_chars;
5460   coding->produced = dst - coding->destination;
5461   return 0;
5462 }
5463
5464 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5465    Check if a text is encoded in a charset-based coding system.  If it
5466    is, return 1, else return 0.  */
5467
5468 static int
5469 detect_coding_charset (coding, detect_info)
5470      struct coding_system *coding;
5471      struct coding_detection_info *detect_info;
5472 {
5473   const unsigned char *src = coding->source, *src_base;
5474   const unsigned char *src_end = coding->source + coding->src_bytes;
5475   int multibytep = coding->src_multibyte;
5476   int consumed_chars = 0;
5477   Lisp_Object attrs, valids, name;
5478   int found = 0;
5479   int head_ascii = coding->head_ascii;
5480   int check_latin_extra = 0;
5481
5482   detect_info->checked |= CATEGORY_MASK_CHARSET;
5483
5484   coding = &coding_categories[coding_category_charset];
5485   attrs = CODING_ID_ATTRS (coding->id);
5486   valids = AREF (attrs, coding_attr_charset_valids);
5487   name = CODING_ID_NAME (coding->id);
5488   if (strncmp ((char *) SDATA (SYMBOL_NAME (name)),
5489                "iso-8859-", sizeof ("iso-8859-") - 1) == 0
5490       || strncmp ((char *) SDATA (SYMBOL_NAME (name)),
5491                   "iso-latin-", sizeof ("iso-latin-") - 1) == 0)
5492     check_latin_extra = 1;
5493
5494   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5495     src += head_ascii;
5496
5497   while (1)
5498     {
5499       int c;
5500       Lisp_Object val;
5501       struct charset *charset;
5502       int dim, idx;
5503
5504       src_base = src;
5505       ONE_MORE_BYTE (c);
5506       if (c < 0)
5507         continue;
5508       val = AREF (valids, c);
5509       if (NILP (val))
5510         break;
5511       if (c >= 0x80)
5512         {
5513           if (c < 0xA0
5514               && check_latin_extra
5515               && (!VECTORP (Vlatin_extra_code_table)
5516                   || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])))
5517             break;
5518           found = CATEGORY_MASK_CHARSET;
5519         }
5520       if (INTEGERP (val))
5521         {
5522           charset = CHARSET_FROM_ID (XFASTINT (val));
5523           dim = CHARSET_DIMENSION (charset);
5524           for (idx = 1; idx < dim; idx++)
5525             {
5526               if (src == src_end)
5527                 goto too_short;
5528               ONE_MORE_BYTE (c);
5529               if (c < charset->code_space[(dim - 1 - idx) * 2]
5530                   || c > charset->code_space[(dim - 1 - idx) * 2 + 1])
5531                 break;
5532             }
5533           if (idx < dim)
5534             break;
5535         }
5536       else
5537         {
5538           idx = 1;
5539           for (; CONSP (val); val = XCDR (val))
5540             {
5541               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5542               dim = CHARSET_DIMENSION (charset);
5543               while (idx < dim)
5544                 {
5545                   if (src == src_end)
5546                     goto too_short;
5547                   ONE_MORE_BYTE (c);
5548                   if (c < charset->code_space[(dim - 1 - idx) * 4]
5549                       || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
5550                     break;
5551                   idx++;
5552                 }
5553               if (idx == dim)
5554                 {
5555                   val = Qnil;
5556                   break;
5557                 }
5558             }
5559           if (CONSP (val))
5560             break;
5561         }
5562     }
5563  too_short:
5564   detect_info->rejected |= CATEGORY_MASK_CHARSET;
5565   return 0;
5566
5567  no_more_source:
5568   detect_info->found |= found;
5569   return 1;
5570 }
5571
5572 static void
5573 decode_coding_charset (coding)
5574      struct coding_system *coding;
5575 {
5576   const unsigned char *src = coding->source + coding->consumed;
5577   const unsigned char *src_end = coding->source + coding->src_bytes;
5578   const unsigned char *src_base;
5579   int *charbuf = coding->charbuf + coding->charbuf_used;
5580   /* We may produce one charset annocation in one loop and one more at
5581      the end.  */
5582   int *charbuf_end
5583     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
5584   int consumed_chars = 0, consumed_chars_base;
5585   int multibytep = coding->src_multibyte;
5586   Lisp_Object attrs, charset_list, valids;
5587   int char_offset = coding->produced_char;
5588   int last_offset = char_offset;
5589   int last_id = charset_ascii;
5590   int eol_crlf =
5591     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5592   int byte_after_cr = -1;
5593
5594   CODING_GET_INFO (coding, attrs, charset_list);
5595   valids = AREF (attrs, coding_attr_charset_valids);
5596
5597   while (1)
5598     {
5599       int c;
5600       Lisp_Object val;
5601       struct charset *charset;
5602       int dim;
5603       int len = 1;
5604       unsigned code;
5605
5606       src_base = src;
5607       consumed_chars_base = consumed_chars;
5608
5609       if (charbuf >= charbuf_end)
5610         {
5611           if (byte_after_cr >= 0)
5612             src_base--;
5613           break;
5614         }
5615
5616       if (byte_after_cr >= 0)
5617         {
5618           c = byte_after_cr;
5619           byte_after_cr = -1;
5620         }
5621       else
5622         {
5623           ONE_MORE_BYTE (c);
5624           if (eol_crlf && c == '\r')
5625             ONE_MORE_BYTE (byte_after_cr);
5626         }
5627       if (c < 0)
5628         goto invalid_code;
5629       code = c;
5630
5631       val = AREF (valids, c);
5632       if (! INTEGERP (val) && ! CONSP (val))
5633         goto invalid_code;
5634       if (INTEGERP (val))
5635         {
5636           charset = CHARSET_FROM_ID (XFASTINT (val));
5637           dim = CHARSET_DIMENSION (charset);
5638           while (len < dim)
5639             {
5640               ONE_MORE_BYTE (c);
5641               code = (code << 8) | c;
5642               len++;
5643             }
5644           CODING_DECODE_CHAR (coding, src, src_base, src_end,
5645                               charset, code, c);
5646         }
5647       else
5648         {
5649           /* VAL is a list of charset IDs.  It is assured that the
5650              list is sorted by charset dimensions (smaller one
5651              comes first).  */
5652           while (CONSP (val))
5653             {
5654               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5655               dim = CHARSET_DIMENSION (charset);
5656               while (len < dim)
5657                 {
5658                   ONE_MORE_BYTE (c);
5659                   code = (code << 8) | c;
5660                   len++;
5661                 }
5662               CODING_DECODE_CHAR (coding, src, src_base,
5663                                   src_end, charset, code, c);
5664               if (c >= 0)
5665                 break;
5666               val = XCDR (val);
5667             }
5668         }
5669       if (c < 0)
5670         goto invalid_code;
5671       if (charset->id != charset_ascii
5672           && last_id != charset->id)
5673         {
5674           if (last_id != charset_ascii)
5675             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5676           last_id = charset->id;
5677           last_offset = char_offset;
5678         }
5679
5680       *charbuf++ = c;
5681       char_offset++;
5682       continue;
5683
5684     invalid_code:
5685       src = src_base;
5686       consumed_chars = consumed_chars_base;
5687       ONE_MORE_BYTE (c);
5688       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
5689       char_offset++;
5690       coding->errors++;
5691     }
5692
5693  no_more_source:
5694   if (last_id != charset_ascii)
5695     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5696   coding->consumed_char += consumed_chars_base;
5697   coding->consumed = src_base - coding->source;
5698   coding->charbuf_used = charbuf - coding->charbuf;
5699 }
5700
5701 static int
5702 encode_coding_charset (coding)
5703      struct coding_system *coding;
5704 {
5705   int multibytep = coding->dst_multibyte;
5706   int *charbuf = coding->charbuf;
5707   int *charbuf_end = charbuf + coding->charbuf_used;
5708   unsigned char *dst = coding->destination + coding->produced;
5709   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5710   int safe_room = MAX_MULTIBYTE_LENGTH;
5711   int produced_chars = 0;
5712   Lisp_Object attrs, charset_list;
5713   int ascii_compatible;
5714   int c;
5715
5716   CODING_GET_INFO (coding, attrs, charset_list);
5717   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5718
5719   while (charbuf < charbuf_end)
5720     {
5721       struct charset *charset;
5722       unsigned code;
5723
5724       ASSURE_DESTINATION (safe_room);
5725       c = *charbuf++;
5726       if (ascii_compatible && ASCII_CHAR_P (c))
5727         EMIT_ONE_ASCII_BYTE (c);
5728       else if (CHAR_BYTE8_P (c))
5729         {
5730           c = CHAR_TO_BYTE8 (c);
5731           EMIT_ONE_BYTE (c);
5732         }
5733       else
5734         {
5735           charset = char_charset (c, charset_list, &code);
5736           if (charset)
5737             {
5738               if (CHARSET_DIMENSION (charset) == 1)
5739                 EMIT_ONE_BYTE (code);
5740               else if (CHARSET_DIMENSION (charset) == 2)
5741                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
5742               else if (CHARSET_DIMENSION (charset) == 3)
5743                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
5744               else
5745                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
5746                                  (code >> 8) & 0xFF, code & 0xFF);
5747             }
5748           else
5749             {
5750               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5751                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5752               else
5753                 c = coding->default_char;
5754               EMIT_ONE_BYTE (c);
5755             }
5756         }
5757     }
5758
5759   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5760   coding->produced_char += produced_chars;
5761   coding->produced = dst - coding->destination;
5762   return 0;
5763 }
5764
5765 \f
5766 /*** 7. C library functions ***/
5767
5768 /* Setup coding context CODING from information about CODING_SYSTEM.
5769    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
5770    CODING_SYSTEM is invalid, signal an error.  */
5771
5772 void
5773 setup_coding_system (coding_system, coding)
5774      Lisp_Object coding_system;
5775      struct coding_system *coding;
5776 {
5777   Lisp_Object attrs;
5778   Lisp_Object eol_type;
5779   Lisp_Object coding_type;
5780   Lisp_Object val;
5781
5782   if (NILP (coding_system))
5783     coding_system = Qundecided;
5784
5785   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
5786
5787   attrs = CODING_ID_ATTRS (coding->id);
5788   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
5789
5790   coding->mode = 0;
5791   coding->head_ascii = -1;
5792   if (VECTORP (eol_type))
5793     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5794                             | CODING_REQUIRE_DETECTION_MASK);
5795   else if (! EQ (eol_type, Qunix))
5796     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5797                             | CODING_REQUIRE_ENCODING_MASK);
5798   else
5799     coding->common_flags = 0;
5800   if (! NILP (CODING_ATTR_POST_READ (attrs)))
5801     coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5802   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5803     coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5804   if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
5805     coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
5806
5807   val = CODING_ATTR_SAFE_CHARSETS (attrs);
5808   coding->max_charset_id = SCHARS (val) - 1;
5809   coding->safe_charsets = SDATA (val);
5810   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
5811   coding->carryover_bytes = 0;
5812
5813   coding_type = CODING_ATTR_TYPE (attrs);
5814   if (EQ (coding_type, Qundecided))
5815     {
5816       coding->detector = NULL;
5817       coding->decoder = decode_coding_raw_text;
5818       coding->encoder = encode_coding_raw_text;
5819       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5820     }
5821   else if (EQ (coding_type, Qiso_2022))
5822     {
5823       int i;
5824       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5825
5826       /* Invoke graphic register 0 to plane 0.  */
5827       CODING_ISO_INVOCATION (coding, 0) = 0;
5828       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
5829       CODING_ISO_INVOCATION (coding, 1)
5830         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
5831       /* Setup the initial status of designation.  */
5832       for (i = 0; i < 4; i++)
5833         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
5834       /* Not single shifting initially.  */
5835       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
5836       /* Beginning of buffer should also be regarded as bol. */
5837       CODING_ISO_BOL (coding) = 1;
5838       coding->detector = detect_coding_iso_2022;
5839       coding->decoder = decode_coding_iso_2022;
5840       coding->encoder = encode_coding_iso_2022;
5841       if (flags & CODING_ISO_FLAG_SAFE)
5842         coding->mode |= CODING_MODE_SAFE_ENCODING;
5843       coding->common_flags
5844         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5845             | CODING_REQUIRE_FLUSHING_MASK);
5846       if (flags & CODING_ISO_FLAG_COMPOSITION)
5847         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
5848       if (flags & CODING_ISO_FLAG_DESIGNATION)
5849         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
5850       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5851         {
5852           setup_iso_safe_charsets (attrs);
5853           val = CODING_ATTR_SAFE_CHARSETS (attrs);
5854           coding->max_charset_id = SCHARS (val) - 1;
5855           coding->safe_charsets = SDATA (val);
5856         }
5857       CODING_ISO_FLAGS (coding) = flags;
5858       CODING_ISO_CMP_STATUS (coding)->state = COMPOSING_NO;
5859       CODING_ISO_CMP_STATUS (coding)->method = COMPOSITION_NO;
5860       CODING_ISO_EXTSEGMENT_LEN (coding) = 0;
5861       CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
5862     }
5863   else if (EQ (coding_type, Qcharset))
5864     {
5865       coding->detector = detect_coding_charset;
5866       coding->decoder = decode_coding_charset;
5867       coding->encoder = encode_coding_charset;
5868       coding->common_flags
5869         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5870     }
5871   else if (EQ (coding_type, Qutf_8))
5872     {
5873       val = AREF (attrs, coding_attr_utf_bom);
5874       CODING_UTF_8_BOM (coding) = (CONSP (val) ? utf_detect_bom
5875                                    : EQ (val, Qt) ? utf_with_bom
5876                                    : utf_without_bom);
5877       coding->detector = detect_coding_utf_8;
5878       coding->decoder = decode_coding_utf_8;
5879       coding->encoder = encode_coding_utf_8;
5880       coding->common_flags
5881         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5882       if (CODING_UTF_8_BOM (coding) == utf_detect_bom)
5883         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5884     }
5885   else if (EQ (coding_type, Qutf_16))
5886     {
5887       val = AREF (attrs, coding_attr_utf_bom);
5888       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_detect_bom
5889                                     : EQ (val, Qt) ? utf_with_bom
5890                                     : utf_without_bom);
5891       val = AREF (attrs, coding_attr_utf_16_endian);
5892       CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
5893                                        : utf_16_little_endian);
5894       CODING_UTF_16_SURROGATE (coding) = 0;
5895       coding->detector = detect_coding_utf_16;
5896       coding->decoder = decode_coding_utf_16;
5897       coding->encoder = encode_coding_utf_16;
5898       coding->common_flags
5899         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5900       if (CODING_UTF_16_BOM (coding) == utf_detect_bom)
5901         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5902     }
5903   else if (EQ (coding_type, Qccl))
5904     {
5905       coding->detector = detect_coding_ccl;
5906       coding->decoder = decode_coding_ccl;
5907       coding->encoder = encode_coding_ccl;
5908       coding->common_flags
5909         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5910             | CODING_REQUIRE_FLUSHING_MASK);
5911     }
5912   else if (EQ (coding_type, Qemacs_mule))
5913     {
5914       coding->detector = detect_coding_emacs_mule;
5915       coding->decoder = decode_coding_emacs_mule;
5916       coding->encoder = encode_coding_emacs_mule;
5917       coding->common_flags
5918         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5919       coding->spec.emacs_mule.full_support = 1;
5920       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5921           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5922         {
5923           Lisp_Object tail, safe_charsets;
5924           int max_charset_id = 0;
5925
5926           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5927                tail = XCDR (tail))
5928             if (max_charset_id < XFASTINT (XCAR (tail)))
5929               max_charset_id = XFASTINT (XCAR (tail));
5930           safe_charsets = make_uninit_string (max_charset_id + 1);
5931           memset (SDATA (safe_charsets), 255, max_charset_id + 1);
5932           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5933                tail = XCDR (tail))
5934             SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
5935           coding->max_charset_id = max_charset_id;
5936           coding->safe_charsets = SDATA (safe_charsets);
5937           coding->spec.emacs_mule.full_support = 1;
5938         }
5939       coding->spec.emacs_mule.cmp_status.state = COMPOSING_NO;
5940       coding->spec.emacs_mule.cmp_status.method = COMPOSITION_NO;
5941     }
5942   else if (EQ (coding_type, Qshift_jis))
5943     {
5944       coding->detector = detect_coding_sjis;
5945       coding->decoder = decode_coding_sjis;
5946       coding->encoder = encode_coding_sjis;
5947       coding->common_flags
5948         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5949     }
5950   else if (EQ (coding_type, Qbig5))
5951     {
5952       coding->detector = detect_coding_big5;
5953       coding->decoder = decode_coding_big5;
5954       coding->encoder = encode_coding_big5;
5955       coding->common_flags
5956         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5957     }
5958   else                          /* EQ (coding_type, Qraw_text) */
5959     {
5960       coding->detector = NULL;
5961       coding->decoder = decode_coding_raw_text;
5962       coding->encoder = encode_coding_raw_text;
5963       if (! EQ (eol_type, Qunix))
5964         {
5965           coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5966           if (! VECTORP (eol_type))
5967             coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5968         }
5969
5970     }
5971
5972   return;
5973 }
5974
5975 /* Return a list of charsets supported by CODING.  */
5976
5977 Lisp_Object
5978 coding_charset_list (coding)
5979      struct coding_system *coding;
5980 {
5981   Lisp_Object attrs, charset_list;
5982
5983   CODING_GET_INFO (coding, attrs, charset_list);
5984   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5985     {
5986       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5987
5988       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5989         charset_list = Viso_2022_charset_list;
5990     }
5991   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5992     {
5993       charset_list = Vemacs_mule_charset_list;
5994     }
5995   return charset_list;
5996 }
5997
5998
5999 /* Return a list of charsets supported by CODING-SYSTEM.  */
6000
6001 Lisp_Object
6002 coding_system_charset_list (coding_system)
6003      Lisp_Object coding_system;
6004 {
6005   int id;
6006   Lisp_Object attrs, charset_list;
6007
6008   CHECK_CODING_SYSTEM_GET_ID (coding_system, id);
6009   attrs = CODING_ID_ATTRS (id);
6010
6011   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
6012     {
6013       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
6014
6015       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
6016         charset_list = Viso_2022_charset_list;
6017       else
6018         charset_list = CODING_ATTR_CHARSET_LIST (attrs);
6019     }
6020   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
6021     {
6022       charset_list = Vemacs_mule_charset_list;
6023     }
6024   else
6025     {
6026       charset_list = CODING_ATTR_CHARSET_LIST (attrs);
6027     }
6028   return charset_list;
6029 }
6030
6031
6032 /* Return raw-text or one of its subsidiaries that has the same
6033    eol_type as CODING-SYSTEM.  */
6034
6035 Lisp_Object
6036 raw_text_coding_system (coding_system)
6037      Lisp_Object coding_system;
6038 {
6039   Lisp_Object spec, attrs;
6040   Lisp_Object eol_type, raw_text_eol_type;
6041
6042   if (NILP (coding_system))
6043     return Qraw_text;
6044   spec = CODING_SYSTEM_SPEC (coding_system);
6045   attrs = AREF (spec, 0);
6046
6047   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
6048     return coding_system;
6049
6050   eol_type = AREF (spec, 2);
6051   if (VECTORP (eol_type))
6052     return Qraw_text;
6053   spec = CODING_SYSTEM_SPEC (Qraw_text);
6054   raw_text_eol_type = AREF (spec, 2);
6055   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
6056           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
6057           : AREF (raw_text_eol_type, 2));
6058 }
6059
6060
6061 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
6062    does, return one of the subsidiary that has the same eol-spec as
6063    PARENT.  Otherwise, return CODING_SYSTEM.  If PARENT is nil,
6064    inherit end-of-line format from the system's setting
6065    (system_eol_type).  */
6066
6067 Lisp_Object
6068 coding_inherit_eol_type (coding_system, parent)
6069      Lisp_Object coding_system, parent;
6070 {
6071   Lisp_Object spec, eol_type;
6072
6073   if (NILP (coding_system))
6074     coding_system = Qraw_text;
6075   spec = CODING_SYSTEM_SPEC (coding_system);
6076   eol_type = AREF (spec, 2);
6077   if (VECTORP (eol_type))
6078     {
6079       Lisp_Object parent_eol_type;
6080
6081       if (! NILP (parent))
6082         {
6083           Lisp_Object parent_spec;
6084
6085           parent_spec = CODING_SYSTEM_SPEC (parent);
6086           parent_eol_type = AREF (parent_spec, 2);
6087         }
6088       else
6089         parent_eol_type = system_eol_type;
6090       if (EQ (parent_eol_type, Qunix))
6091         coding_system = AREF (eol_type, 0);
6092       else if (EQ (parent_eol_type, Qdos))
6093         coding_system = AREF (eol_type, 1);
6094       else if (EQ (parent_eol_type, Qmac))
6095         coding_system = AREF (eol_type, 2);
6096     }
6097   return coding_system;
6098 }
6099
6100 /* Emacs has a mechanism to automatically detect a coding system if it
6101    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
6102    it's impossible to distinguish some coding systems accurately
6103    because they use the same range of codes.  So, at first, coding
6104    systems are categorized into 7, those are:
6105
6106    o coding-category-emacs-mule
6107
6108         The category for a coding system which has the same code range
6109         as Emacs' internal format.  Assigned the coding-system (Lisp
6110         symbol) `emacs-mule' by default.
6111
6112    o coding-category-sjis
6113
6114         The category for a coding system which has the same code range
6115         as SJIS.  Assigned the coding-system (Lisp
6116         symbol) `japanese-shift-jis' by default.
6117
6118    o coding-category-iso-7
6119
6120         The category for a coding system which has the same code range
6121         as ISO2022 of 7-bit environment.  This doesn't use any locking
6122         shift and single shift functions.  This can encode/decode all
6123         charsets.  Assigned the coding-system (Lisp symbol)
6124         `iso-2022-7bit' by default.
6125
6126    o coding-category-iso-7-tight
6127
6128         Same as coding-category-iso-7 except that this can
6129         encode/decode only the specified charsets.
6130
6131    o coding-category-iso-8-1
6132
6133         The category for a coding system which has the same code range
6134         as ISO2022 of 8-bit environment and graphic plane 1 used only
6135         for DIMENSION1 charset.  This doesn't use any locking shift
6136         and single shift functions.  Assigned the coding-system (Lisp
6137         symbol) `iso-latin-1' by default.
6138
6139    o coding-category-iso-8-2
6140
6141         The category for a coding system which has the same code range
6142         as ISO2022 of 8-bit environment and graphic plane 1 used only
6143         for DIMENSION2 charset.  This doesn't use any locking shift
6144         and single shift functions.  Assigned the coding-system (Lisp
6145         symbol) `japanese-iso-8bit' by default.
6146
6147    o coding-category-iso-7-else
6148
6149         The category for a coding system which has the same code range
6150         as ISO2022 of 7-bit environemnt but uses locking shift or
6151         single shift functions.  Assigned the coding-system (Lisp
6152         symbol) `iso-2022-7bit-lock' by default.
6153
6154    o coding-category-iso-8-else
6155
6156         The category for a coding system which has the same code range
6157         as ISO2022 of 8-bit environemnt but uses locking shift or
6158         single shift functions.  Assigned the coding-system (Lisp
6159         symbol) `iso-2022-8bit-ss2' by default.
6160
6161    o coding-category-big5
6162
6163         The category for a coding system which has the same code range
6164         as BIG5.  Assigned the coding-system (Lisp symbol)
6165         `cn-big5' by default.
6166
6167    o coding-category-utf-8
6168
6169         The category for a coding system which has the same code range
6170         as UTF-8 (cf. RFC3629).  Assigned the coding-system (Lisp
6171         symbol) `utf-8' by default.
6172
6173    o coding-category-utf-16-be
6174
6175         The category for a coding system in which a text has an
6176         Unicode signature (cf. Unicode Standard) in the order of BIG
6177         endian at the head.  Assigned the coding-system (Lisp symbol)
6178         `utf-16-be' by default.
6179
6180    o coding-category-utf-16-le
6181
6182         The category for a coding system in which a text has an
6183         Unicode signature (cf. Unicode Standard) in the order of
6184         LITTLE endian at the head.  Assigned the coding-system (Lisp
6185         symbol) `utf-16-le' by default.
6186
6187    o coding-category-ccl
6188
6189         The category for a coding system of which encoder/decoder is
6190         written in CCL programs.  The default value is nil, i.e., no
6191         coding system is assigned.
6192
6193    o coding-category-binary
6194
6195         The category for a coding system not categorized in any of the
6196         above.  Assigned the coding-system (Lisp symbol)
6197         `no-conversion' by default.
6198
6199    Each of them is a Lisp symbol and the value is an actual
6200    `coding-system's (this is also a Lisp symbol) assigned by a user.
6201    What Emacs does actually is to detect a category of coding system.
6202    Then, it uses a `coding-system' assigned to it.  If Emacs can't
6203    decide only one possible category, it selects a category of the
6204    highest priority.  Priorities of categories are also specified by a
6205    user in a Lisp variable `coding-category-list'.
6206
6207 */
6208
6209 #define EOL_SEEN_NONE   0
6210 #define EOL_SEEN_LF     1
6211 #define EOL_SEEN_CR     2
6212 #define EOL_SEEN_CRLF   4
6213
6214 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
6215    SOURCE is encoded.  If CATEGORY is one of
6216    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
6217    two-byte, else they are encoded by one-byte.
6218
6219    Return one of EOL_SEEN_XXX.  */
6220
6221 #define MAX_EOL_CHECK_COUNT 3
6222
6223 static int
6224 detect_eol (source, src_bytes, category)
6225      const unsigned char *source;
6226      EMACS_INT src_bytes;
6227      enum coding_category category;
6228 {
6229   const unsigned char *src = source, *src_end = src + src_bytes;
6230   unsigned char c;
6231   int total  = 0;
6232   int eol_seen = EOL_SEEN_NONE;
6233
6234   if ((1 << category) & CATEGORY_MASK_UTF_16)
6235     {
6236       int msb, lsb;
6237
6238       msb = category == (coding_category_utf_16_le
6239                          | coding_category_utf_16_le_nosig);
6240       lsb = 1 - msb;
6241
6242       while (src + 1 < src_end)
6243         {
6244           c = src[lsb];
6245           if (src[msb] == 0 && (c == '\n' || c == '\r'))
6246             {
6247               int this_eol;
6248
6249               if (c == '\n')
6250                 this_eol = EOL_SEEN_LF;
6251               else if (src + 3 >= src_end
6252                        || src[msb + 2] != 0
6253                        || src[lsb + 2] != '\n')
6254                 this_eol = EOL_SEEN_CR;
6255               else
6256                 {
6257                   this_eol = EOL_SEEN_CRLF;
6258                   src += 2;
6259                 }
6260
6261               if (eol_seen == EOL_SEEN_NONE)
6262                 /* This is the first end-of-line.  */
6263                 eol_seen = this_eol;
6264               else if (eol_seen != this_eol)
6265                 {
6266                   /* The found type is different from what found before.
6267                      Allow for stray ^M characters in DOS EOL files.  */
6268                   if (eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF
6269                       || eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR)
6270                     eol_seen = EOL_SEEN_CRLF;
6271                   else
6272                     {
6273                       eol_seen = EOL_SEEN_LF;
6274                       break;
6275                     }
6276                 }
6277               if (++total == MAX_EOL_CHECK_COUNT)
6278                 break;
6279             }
6280           src += 2;
6281         }
6282     }
6283   else
6284     {
6285       while (src < src_end)
6286         {
6287           c = *src++;
6288           if (c == '\n' || c == '\r')
6289             {
6290               int this_eol;
6291
6292               if (c == '\n')
6293                 this_eol = EOL_SEEN_LF;
6294               else if (src >= src_end || *src != '\n')
6295                 this_eol = EOL_SEEN_CR;
6296               else
6297                 this_eol = EOL_SEEN_CRLF, src++;
6298
6299               if (eol_seen == EOL_SEEN_NONE)
6300                 /* This is the first end-of-line.  */
6301                 eol_seen = this_eol;
6302               else if (eol_seen != this_eol)
6303                 {
6304                   /* The found type is different from what found before.
6305                      Allow for stray ^M characters in DOS EOL files.  */
6306                   if (eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF
6307                       || eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR)
6308                     eol_seen = EOL_SEEN_CRLF;
6309                   else
6310                     {
6311                       eol_seen = EOL_SEEN_LF;
6312                       break;
6313                     }
6314                 }
6315               if (++total == MAX_EOL_CHECK_COUNT)
6316                 break;
6317             }
6318         }
6319     }
6320   return eol_seen;
6321 }
6322
6323
6324 static Lisp_Object
6325 adjust_coding_eol_type (coding, eol_seen)
6326      struct coding_system *coding;
6327      int eol_seen;
6328 {
6329   Lisp_Object eol_type;
6330
6331   eol_type = CODING_ID_EOL_TYPE (coding->id);
6332   if (eol_seen & EOL_SEEN_LF)
6333     {
6334       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
6335       eol_type = Qunix;
6336     }
6337   else if (eol_seen & EOL_SEEN_CRLF)
6338     {
6339       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
6340       eol_type = Qdos;
6341     }
6342   else if (eol_seen & EOL_SEEN_CR)
6343     {
6344       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
6345       eol_type = Qmac;
6346     }
6347   return eol_type;
6348 }
6349
6350 /* Detect how a text specified in CODING is encoded.  If a coding
6351    system is detected, update fields of CODING by the detected coding
6352    system.  */
6353
6354 void
6355 detect_coding (coding)
6356      struct coding_system *coding;
6357 {
6358   const unsigned char *src, *src_end;
6359   int saved_mode = coding->mode;
6360
6361   coding->consumed = coding->consumed_char = 0;
6362   coding->produced = coding->produced_char = 0;
6363   coding_set_source (coding);
6364
6365   src_end = coding->source + coding->src_bytes;
6366   coding->head_ascii = 0;
6367
6368   /* If we have not yet decided the text encoding type, detect it
6369      now.  */
6370   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
6371     {
6372       int c, i;
6373       struct coding_detection_info detect_info;
6374       int null_byte_found = 0, eight_bit_found = 0;
6375
6376       detect_info.checked = detect_info.found = detect_info.rejected = 0;
6377       for (src = coding->source; src < src_end; src++)
6378         {
6379           c = *src;
6380           if (c & 0x80)
6381             {
6382               eight_bit_found = 1;
6383               if (null_byte_found)
6384                 break;
6385             }
6386           else if (c < 0x20)
6387             {
6388               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
6389                   && ! inhibit_iso_escape_detection
6390                   && ! detect_info.checked)
6391                 {
6392                   if (detect_coding_iso_2022 (coding, &detect_info))
6393                     {
6394                       /* We have scanned the whole data.  */
6395                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
6396                         {
6397                           /* We didn't find an 8-bit code.  We may
6398                              have found a null-byte, but it's very
6399                              rare that a binary file confirm to
6400                              ISO-2022.  */
6401                           src = src_end;
6402                           coding->head_ascii = src - coding->source;
6403                         }
6404                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
6405                       break;
6406                     }
6407                 }
6408               else if (! c && !inhibit_null_byte_detection)
6409                 {
6410                   null_byte_found = 1;
6411                   if (eight_bit_found)
6412                     break;
6413                 }
6414               if (! eight_bit_found)
6415                 coding->head_ascii++;
6416             }
6417           else if (! eight_bit_found)
6418             coding->head_ascii++;
6419         }
6420
6421       if (null_byte_found || eight_bit_found
6422           || coding->head_ascii < coding->src_bytes
6423           || detect_info.found)
6424         {
6425           enum coding_category category;
6426           struct coding_system *this;
6427
6428           if (coding->head_ascii == coding->src_bytes)
6429             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
6430             for (i = 0; i < coding_category_raw_text; i++)
6431               {
6432                 category = coding_priorities[i];
6433                 this = coding_categories + category;
6434                 if (detect_info.found & (1 << category))
6435                   break;
6436               }
6437           else
6438             {
6439               if (null_byte_found)
6440                 {
6441                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
6442                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
6443                 }
6444               for (i = 0; i < coding_category_raw_text; i++)
6445                 {
6446                   category = coding_priorities[i];
6447                   this = coding_categories + category;
6448                   if (this->id < 0)
6449                     {
6450                       /* No coding system of this category is defined.  */
6451                       detect_info.rejected |= (1 << category);
6452                     }
6453                   else if (category >= coding_category_raw_text)
6454                     continue;
6455                   else if (detect_info.checked & (1 << category))
6456                     {
6457                       if (detect_info.found & (1 << category))
6458                         break;
6459                     }
6460                   else if ((*(this->detector)) (coding, &detect_info)
6461                            && detect_info.found & (1 << category))
6462                     {
6463                       if (category == coding_category_utf_16_auto)
6464                         {
6465                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6466                             category = coding_category_utf_16_le;
6467                           else
6468                             category = coding_category_utf_16_be;
6469                         }
6470                       break;
6471                     }
6472                 }
6473             }
6474
6475           if (i < coding_category_raw_text)
6476             setup_coding_system (CODING_ID_NAME (this->id), coding);
6477           else if (null_byte_found)
6478             setup_coding_system (Qno_conversion, coding);
6479           else if ((detect_info.rejected & CATEGORY_MASK_ANY)
6480                    == CATEGORY_MASK_ANY)
6481             setup_coding_system (Qraw_text, coding);
6482           else if (detect_info.rejected)
6483             for (i = 0; i < coding_category_raw_text; i++)
6484               if (! (detect_info.rejected & (1 << coding_priorities[i])))
6485                 {
6486                   this = coding_categories + coding_priorities[i];
6487                   setup_coding_system (CODING_ID_NAME (this->id), coding);
6488                   break;
6489                 }
6490         }
6491     }
6492   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6493            == coding_category_utf_8_auto)
6494     {
6495       Lisp_Object coding_systems;
6496       struct coding_detection_info detect_info;
6497
6498       coding_systems
6499         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6500       detect_info.found = detect_info.rejected = 0;
6501       coding->head_ascii = 0;
6502       if (CONSP (coding_systems)
6503           && detect_coding_utf_8 (coding, &detect_info))
6504         {
6505           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
6506             setup_coding_system (XCAR (coding_systems), coding);
6507           else
6508             setup_coding_system (XCDR (coding_systems), coding);
6509         }
6510     }
6511   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6512            == coding_category_utf_16_auto)
6513     {
6514       Lisp_Object coding_systems;
6515       struct coding_detection_info detect_info;
6516
6517       coding_systems
6518         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6519       detect_info.found = detect_info.rejected = 0;
6520       coding->head_ascii = 0;
6521       if (CONSP (coding_systems)
6522           && detect_coding_utf_16 (coding, &detect_info))
6523         {
6524           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6525             setup_coding_system (XCAR (coding_systems), coding);
6526           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
6527             setup_coding_system (XCDR (coding_systems), coding);
6528         }
6529     }
6530   coding->mode = saved_mode;
6531 }
6532
6533
6534 static void
6535 decode_eol (coding)
6536      struct coding_system *coding;
6537 {
6538   Lisp_Object eol_type;
6539   unsigned char *p, *pbeg, *pend;
6540
6541   eol_type = CODING_ID_EOL_TYPE (coding->id);
6542   if (EQ (eol_type, Qunix) || inhibit_eol_conversion)
6543     return;
6544
6545   if (NILP (coding->dst_object))
6546     pbeg = coding->destination;
6547   else
6548     pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
6549   pend = pbeg + coding->produced;
6550
6551   if (VECTORP (eol_type))
6552     {
6553       int eol_seen = EOL_SEEN_NONE;
6554
6555       for (p = pbeg; p < pend; p++)
6556         {
6557           if (*p == '\n')
6558             eol_seen |= EOL_SEEN_LF;
6559           else if (*p == '\r')
6560             {
6561               if (p + 1 < pend && *(p + 1) == '\n')
6562                 {
6563                   eol_seen |= EOL_SEEN_CRLF;
6564                   p++;
6565                 }
6566               else
6567                 eol_seen |= EOL_SEEN_CR;
6568             }
6569         }
6570       /* Handle DOS-style EOLs in a file with stray ^M characters.  */
6571       if ((eol_seen & EOL_SEEN_CRLF) != 0
6572           && (eol_seen & EOL_SEEN_CR) != 0
6573           && (eol_seen & EOL_SEEN_LF) == 0)
6574         eol_seen = EOL_SEEN_CRLF;
6575       else if (eol_seen != EOL_SEEN_NONE
6576           && eol_seen != EOL_SEEN_LF
6577           && eol_seen != EOL_SEEN_CRLF
6578           && eol_seen != EOL_SEEN_CR)
6579         eol_seen = EOL_SEEN_LF;
6580       if (eol_seen != EOL_SEEN_NONE)
6581         eol_type = adjust_coding_eol_type (coding, eol_seen);
6582     }
6583
6584   if (EQ (eol_type, Qmac))
6585     {
6586       for (p = pbeg; p < pend; p++)
6587         if (*p == '\r')
6588           *p = '\n';
6589     }
6590   else if (EQ (eol_type, Qdos))
6591     {
6592       int n = 0;
6593
6594       if (NILP (coding->dst_object))
6595         {
6596           /* Start deleting '\r' from the tail to minimize the memory
6597              movement.  */
6598           for (p = pend - 2; p >= pbeg; p--)
6599             if (*p == '\r')
6600               {
6601                 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
6602                 n++;
6603               }
6604         }
6605       else
6606         {
6607           int pos_byte = coding->dst_pos_byte;
6608           int pos = coding->dst_pos;
6609           int pos_end = pos + coding->produced_char - 1;
6610
6611           while (pos < pos_end)
6612             {
6613               p = BYTE_POS_ADDR (pos_byte);
6614               if (*p == '\r' && p[1] == '\n')
6615                 {
6616                   del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
6617                   n++;
6618                   pos_end--;
6619                 }
6620               pos++;
6621               if (coding->dst_multibyte)
6622                 pos_byte += BYTES_BY_CHAR_HEAD (*p);
6623               else
6624                 pos_byte++;
6625             }
6626         }
6627       coding->produced -= n;
6628       coding->produced_char -= n;
6629     }
6630 }
6631
6632
6633 /* Return a translation table (or list of them) from coding system
6634    attribute vector ATTRS for encoding (ENCODEP is nonzero) or
6635    decoding (ENCODEP is zero). */
6636
6637 static Lisp_Object
6638 get_translation_table (attrs, encodep, max_lookup)
6639      Lisp_Object attrs;
6640      int encodep, *max_lookup;
6641 {
6642   Lisp_Object standard, translation_table;
6643   Lisp_Object val;
6644
6645   if (NILP (Venable_character_translation))
6646     {
6647       if (max_lookup)
6648         *max_lookup = 0;
6649       return Qnil;
6650     }
6651   if (encodep)
6652     translation_table = CODING_ATTR_ENCODE_TBL (attrs),
6653       standard = Vstandard_translation_table_for_encode;
6654   else
6655     translation_table = CODING_ATTR_DECODE_TBL (attrs),
6656       standard = Vstandard_translation_table_for_decode;
6657   if (NILP (translation_table))
6658     translation_table = standard;
6659   else
6660     {
6661       if (SYMBOLP (translation_table))
6662         translation_table = Fget (translation_table, Qtranslation_table);
6663       else if (CONSP (translation_table))
6664         {
6665           translation_table = Fcopy_sequence (translation_table);
6666           for (val = translation_table; CONSP (val); val = XCDR (val))
6667             if (SYMBOLP (XCAR (val)))
6668               XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
6669         }
6670       if (CHAR_TABLE_P (standard))
6671         {
6672           if (CONSP (translation_table))
6673             translation_table = nconc2 (translation_table,
6674                                         Fcons (standard, Qnil));
6675           else
6676             translation_table = Fcons (translation_table,
6677                                        Fcons (standard, Qnil));
6678         }
6679     }
6680
6681   if (max_lookup)
6682     {
6683       *max_lookup = 1;
6684       if (CHAR_TABLE_P (translation_table)
6685           && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
6686         {
6687           val = XCHAR_TABLE (translation_table)->extras[1];
6688           if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6689             *max_lookup = XFASTINT (val);
6690         }
6691       else if (CONSP (translation_table))
6692         {
6693           Lisp_Object tail, val;
6694
6695           for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
6696             if (CHAR_TABLE_P (XCAR (tail))
6697                 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
6698               {
6699                 val = XCHAR_TABLE (XCAR (tail))->extras[1];
6700                 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6701                   *max_lookup = XFASTINT (val);
6702               }
6703         }
6704     }
6705   return translation_table;
6706 }
6707
6708 #define LOOKUP_TRANSLATION_TABLE(table, c, trans)               \
6709   do {                                                          \
6710     trans = Qnil;                                               \
6711     if (CHAR_TABLE_P (table))                                   \
6712       {                                                         \
6713         trans = CHAR_TABLE_REF (table, c);                      \
6714         if (CHARACTERP (trans))                                 \
6715           c = XFASTINT (trans), trans = Qnil;                   \
6716       }                                                         \
6717     else if (CONSP (table))                                     \
6718       {                                                         \
6719         Lisp_Object tail;                                       \
6720                                                                 \
6721         for (tail = table; CONSP (tail); tail = XCDR (tail))    \
6722           if (CHAR_TABLE_P (XCAR (tail)))                       \
6723             {                                                   \
6724               trans = CHAR_TABLE_REF (XCAR (tail), c);          \
6725               if (CHARACTERP (trans))                           \
6726                 c = XFASTINT (trans), trans = Qnil;             \
6727               else if (! NILP (trans))                          \
6728                 break;                                          \
6729             }                                                   \
6730       }                                                         \
6731   } while (0)
6732
6733
6734 /* Return a translation of character(s) at BUF according to TRANS.
6735    TRANS is TO-CHAR or ((FROM .  TO) ...) where
6736    FROM = [FROM-CHAR ...], TO is TO-CHAR or [TO-CHAR ...].
6737    The return value is TO-CHAR or ([FROM-CHAR ...] . TO) if a
6738    translation is found, and Qnil if not found..
6739    If BUF is too short to lookup characters in FROM, return Qt.  */
6740
6741 static Lisp_Object
6742 get_translation (trans, buf, buf_end)
6743      Lisp_Object trans;
6744      int *buf, *buf_end;
6745 {
6746
6747   if (INTEGERP (trans))
6748     return trans;
6749   for (; CONSP (trans); trans = XCDR (trans))
6750     {
6751       Lisp_Object val = XCAR (trans);
6752       Lisp_Object from = XCAR (val);
6753       int len = ASIZE (from);
6754       int i;
6755
6756       for (i = 0; i < len; i++)
6757         {
6758           if (buf + i == buf_end)
6759             return Qt;
6760           if (XINT (AREF (from, i)) != buf[i])
6761             break;
6762         }
6763       if (i == len)
6764         return val;
6765     }
6766   return Qnil;
6767 }
6768
6769
6770 static int
6771 produce_chars (coding, translation_table, last_block)
6772      struct coding_system *coding;
6773      Lisp_Object translation_table;
6774      int last_block;
6775 {
6776   unsigned char *dst = coding->destination + coding->produced;
6777   unsigned char *dst_end = coding->destination + coding->dst_bytes;
6778   EMACS_INT produced;
6779   EMACS_INT produced_chars = 0;
6780   int carryover = 0;
6781
6782   if (! coding->chars_at_source)
6783     {
6784       /* Source characters are in coding->charbuf.  */
6785       int *buf = coding->charbuf;
6786       int *buf_end = buf + coding->charbuf_used;
6787
6788       if (EQ (coding->src_object, coding->dst_object))
6789         {
6790           coding_set_source (coding);
6791           dst_end = ((unsigned char *) coding->source) + coding->consumed;
6792         }
6793
6794       while (buf < buf_end)
6795         {
6796           int c = *buf, i;
6797
6798           if (c >= 0)
6799             {
6800               int from_nchars = 1, to_nchars = 1;
6801               Lisp_Object trans = Qnil;
6802
6803               LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6804               if (! NILP (trans))
6805                 {
6806                   trans = get_translation (trans, buf, buf_end);
6807                   if (INTEGERP (trans))
6808                     c = XINT (trans);
6809                   else if (CONSP (trans))
6810                     {
6811                       from_nchars = ASIZE (XCAR (trans));
6812                       trans = XCDR (trans);
6813                       if (INTEGERP (trans))
6814                         c = XINT (trans);
6815                       else
6816                         {
6817                           to_nchars = ASIZE (trans);
6818                           c = XINT (AREF (trans, 0));
6819                         }
6820                     }
6821                   else if (EQ (trans, Qt) && ! last_block)
6822                     break;
6823                 }
6824
6825               if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
6826                 {
6827                   dst = alloc_destination (coding,
6828                                            buf_end - buf
6829                                            + MAX_MULTIBYTE_LENGTH * to_nchars,
6830                                            dst);
6831                   if (EQ (coding->src_object, coding->dst_object))
6832                     {
6833                       coding_set_source (coding);
6834                       dst_end = (((unsigned char *) coding->source)
6835                                  + coding->consumed);
6836                     }
6837                   else
6838                     dst_end = coding->destination + coding->dst_bytes;
6839                 }
6840
6841               for (i = 0; i < to_nchars; i++)
6842                 {
6843                   if (i > 0)
6844                     c = XINT (AREF (trans, i));
6845                   if (coding->dst_multibyte
6846                       || ! CHAR_BYTE8_P (c))
6847                     CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
6848                   else
6849                     *dst++ = CHAR_TO_BYTE8 (c);
6850                 }
6851               produced_chars += to_nchars;
6852               buf += from_nchars;
6853             }
6854           else
6855             /* This is an annotation datum.  (-C) is the length.  */
6856             buf += -c;
6857         }
6858       carryover = buf_end - buf;
6859     }
6860   else
6861     {
6862       /* Source characters are at coding->source.  */
6863       const unsigned char *src = coding->source;
6864       const unsigned char *src_end = src + coding->consumed;
6865
6866       if (EQ (coding->dst_object, coding->src_object))
6867         dst_end = (unsigned char *) src;
6868       if (coding->src_multibyte != coding->dst_multibyte)
6869         {
6870           if (coding->src_multibyte)
6871             {
6872               int multibytep = 1;
6873               EMACS_INT consumed_chars = 0;
6874
6875               while (1)
6876                 {
6877                   const unsigned char *src_base = src;
6878                   int c;
6879
6880                   ONE_MORE_BYTE (c);
6881                   if (dst == dst_end)
6882                     {
6883                       if (EQ (coding->src_object, coding->dst_object))
6884                         dst_end = (unsigned char *) src;
6885                       if (dst == dst_end)
6886                         {
6887                           EMACS_INT offset = src - coding->source;
6888
6889                           dst = alloc_destination (coding, src_end - src + 1,
6890                                                    dst);
6891                           dst_end = coding->destination + coding->dst_bytes;
6892                           coding_set_source (coding);
6893                           src = coding->source + offset;
6894                           src_end = coding->source + coding->src_bytes;
6895                           if (EQ (coding->src_object, coding->dst_object))
6896                             dst_end = (unsigned char *) src;
6897                         }
6898                     }
6899                   *dst++ = c;
6900                   produced_chars++;
6901                 }
6902             no_more_source:
6903               ;
6904             }
6905           else
6906             while (src < src_end)
6907               {
6908                 int multibytep = 1;
6909                 int c = *src++;
6910
6911                 if (dst >= dst_end - 1)
6912                   {
6913                     if (EQ (coding->src_object, coding->dst_object))
6914                       dst_end = (unsigned char *) src;
6915                     if (dst >= dst_end - 1)
6916                       {
6917                         EMACS_INT offset = src - coding->source;
6918                         EMACS_INT more_bytes;
6919
6920                         if (EQ (coding->src_object, coding->dst_object))
6921                           more_bytes = ((src_end - src) / 2) + 2;
6922                         else
6923                           more_bytes = src_end - src + 2;
6924                         dst = alloc_destination (coding, more_bytes, dst);
6925                         dst_end = coding->destination + coding->dst_bytes;
6926                         coding_set_source (coding);
6927                         src = coding->source + offset;
6928                         src_end = coding->source + coding->src_bytes;
6929                         if (EQ (coding->src_object, coding->dst_object))
6930                           dst_end = (unsigned char *) src;
6931                       }
6932                   }
6933                 EMIT_ONE_BYTE (c);
6934               }
6935         }
6936       else
6937         {
6938           if (!EQ (coding->src_object, coding->dst_object))
6939             {
6940               EMACS_INT require = coding->src_bytes - coding->dst_bytes;
6941
6942               if (require > 0)
6943                 {
6944                   EMACS_INT offset = src - coding->source;
6945
6946                   dst = alloc_destination (coding, require, dst);
6947                   coding_set_source (coding);
6948                   src = coding->source + offset;
6949                   src_end = coding->source + coding->src_bytes;
6950                 }
6951             }
6952           produced_chars = coding->consumed_char;
6953           while (src < src_end)
6954             *dst++ = *src++;
6955         }
6956     }
6957
6958   produced = dst - (coding->destination + coding->produced);
6959   if (BUFFERP (coding->dst_object) && produced_chars > 0)
6960     insert_from_gap (produced_chars, produced);
6961   coding->produced += produced;
6962   coding->produced_char += produced_chars;
6963   return carryover;
6964 }
6965
6966 /* Compose text in CODING->object according to the annotation data at
6967    CHARBUF.  CHARBUF is an array:
6968      [ -LENGTH ANNOTATION_MASK NCHARS NBYTES METHOD [ COMPONENTS... ] ]
6969  */
6970
6971 static INLINE void
6972 produce_composition (coding, charbuf, pos)
6973      struct coding_system *coding;
6974      int *charbuf;
6975      EMACS_INT pos;
6976 {
6977   int len;
6978   EMACS_INT to;
6979   enum composition_method method;
6980   Lisp_Object components;
6981
6982   len = -charbuf[0] - MAX_ANNOTATION_LENGTH;
6983   to = pos + charbuf[2];
6984   method = (enum composition_method) (charbuf[4]);
6985
6986   if (method == COMPOSITION_RELATIVE)
6987     components = Qnil;
6988   else
6989     {
6990       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
6991       int i, j;
6992
6993       if (method == COMPOSITION_WITH_RULE)
6994         len = charbuf[2] * 3 - 2;
6995       charbuf += MAX_ANNOTATION_LENGTH;
6996       /* charbuf = [ CHRA ... CHAR] or [ CHAR -2 RULE ... CHAR ] */
6997       for (i = j = 0; i < len && charbuf[i] != -1; i++, j++)
6998         {
6999           if (charbuf[i] >= 0)
7000             args[j] = make_number (charbuf[i]);
7001           else
7002             {
7003               i++;
7004               args[j] = make_number (charbuf[i] % 0x100);
7005             }
7006         }
7007       components = (i == j ? Fstring (j, args) : Fvector (j, args));
7008     }
7009   compose_text (pos, to, components, Qnil, coding->dst_object);
7010 }
7011
7012
7013 /* Put `charset' property on text in CODING->object according to
7014    the annotation data at CHARBUF.  CHARBUF is an array:
7015      [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
7016  */
7017
7018 static INLINE void
7019 produce_charset (coding, charbuf, pos)
7020      struct coding_system *coding;
7021      int *charbuf;
7022      EMACS_INT pos;
7023 {
7024   EMACS_INT from = pos - charbuf[2];
7025   struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
7026
7027   Fput_text_property (make_number (from), make_number (pos),
7028                       Qcharset, CHARSET_NAME (charset),
7029                       coding->dst_object);
7030 }
7031
7032
7033 #define CHARBUF_SIZE 0x4000
7034
7035 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
7036   do {                                                                  \
7037     int size = CHARBUF_SIZE;                                            \
7038                                                                         \
7039     coding->charbuf = NULL;                                             \
7040     while (size > 1024)                                                 \
7041       {                                                                 \
7042         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
7043         if (coding->charbuf)                                            \
7044           break;                                                        \
7045         size >>= 1;                                                     \
7046       }                                                                 \
7047     if (! coding->charbuf)                                              \
7048       {                                                                 \
7049         record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
7050         return coding->result;                                          \
7051       }                                                                 \
7052     coding->charbuf_size = size;                                        \
7053   } while (0)
7054
7055
7056 static void
7057 produce_annotation (coding, pos)
7058      struct coding_system *coding;
7059      EMACS_INT pos;
7060 {
7061   int *charbuf = coding->charbuf;
7062   int *charbuf_end = charbuf + coding->charbuf_used;
7063
7064   if (NILP (coding->dst_object))
7065     return;
7066
7067   while (charbuf < charbuf_end)
7068     {
7069       if (*charbuf >= 0)
7070         pos++, charbuf++;
7071       else
7072         {
7073           int len = -*charbuf;
7074
7075           if (len > 2)
7076             switch (charbuf[1])
7077               {
7078               case CODING_ANNOTATE_COMPOSITION_MASK:
7079                 produce_composition (coding, charbuf, pos);
7080                 break;
7081               case CODING_ANNOTATE_CHARSET_MASK:
7082                 produce_charset (coding, charbuf, pos);
7083                 break;
7084               }
7085           charbuf += len;
7086         }
7087     }
7088 }
7089
7090 /* Decode the data at CODING->src_object into CODING->dst_object.
7091    CODING->src_object is a buffer, a string, or nil.
7092    CODING->dst_object is a buffer.
7093
7094    If CODING->src_object is a buffer, it must be the current buffer.
7095    In this case, if CODING->src_pos is positive, it is a position of
7096    the source text in the buffer, otherwise, the source text is in the
7097    gap area of the buffer, and CODING->src_pos specifies the offset of
7098    the text from GPT (which must be the same as PT).  If this is the
7099    same buffer as CODING->dst_object, CODING->src_pos must be
7100    negative.
7101
7102    If CODING->src_object is a string, CODING->src_pos is an index to
7103    that string.
7104
7105    If CODING->src_object is nil, CODING->source must already point to
7106    the non-relocatable memory area.  In this case, CODING->src_pos is
7107    an offset from CODING->source.
7108
7109    The decoded data is inserted at the current point of the buffer
7110    CODING->dst_object.
7111 */
7112
7113 static int
7114 decode_coding (coding)
7115      struct coding_system *coding;
7116 {
7117   Lisp_Object attrs;
7118   Lisp_Object undo_list;
7119   Lisp_Object translation_table;
7120   int carryover;
7121   int i;
7122
7123   if (BUFFERP (coding->src_object)
7124       && coding->src_pos > 0
7125       && coding->src_pos < GPT
7126       && coding->src_pos + coding->src_chars > GPT)
7127     move_gap_both (coding->src_pos, coding->src_pos_byte);
7128
7129   undo_list = Qt;
7130   if (BUFFERP (coding->dst_object))
7131     {
7132       if (current_buffer != XBUFFER (coding->dst_object))
7133         set_buffer_internal (XBUFFER (coding->dst_object));
7134       if (GPT != PT)
7135         move_gap_both (PT, PT_BYTE);
7136       undo_list = current_buffer->undo_list;
7137       current_buffer->undo_list = Qt;
7138     }
7139
7140   coding->consumed = coding->consumed_char = 0;
7141   coding->produced = coding->produced_char = 0;
7142   coding->chars_at_source = 0;
7143   record_conversion_result (coding, CODING_RESULT_SUCCESS);
7144   coding->errors = 0;
7145
7146   ALLOC_CONVERSION_WORK_AREA (coding);
7147
7148   attrs = CODING_ID_ATTRS (coding->id);
7149   translation_table = get_translation_table (attrs, 0, NULL);
7150
7151   carryover = 0;
7152   do
7153     {
7154       EMACS_INT pos = coding->dst_pos + coding->produced_char;
7155
7156       coding_set_source (coding);
7157       coding->annotated = 0;
7158       coding->charbuf_used = carryover;
7159       (*(coding->decoder)) (coding);
7160       coding_set_destination (coding);
7161       carryover = produce_chars (coding, translation_table, 0);
7162       if (coding->annotated)
7163         produce_annotation (coding, pos);
7164       for (i = 0; i < carryover; i++)
7165         coding->charbuf[i]
7166           = coding->charbuf[coding->charbuf_used - carryover + i];
7167     }
7168   while (coding->consumed < coding->src_bytes
7169          && (coding->result == CODING_RESULT_SUCCESS
7170              || coding->result == CODING_RESULT_INVALID_SRC));
7171
7172   if (carryover > 0)
7173     {
7174       coding_set_destination (coding);
7175       coding->charbuf_used = carryover;
7176       produce_chars (coding, translation_table, 1);
7177     }
7178
7179   coding->carryover_bytes = 0;
7180   if (coding->consumed < coding->src_bytes)
7181     {
7182       int nbytes = coding->src_bytes - coding->consumed;
7183       const unsigned char *src;
7184
7185       coding_set_source (coding);
7186       coding_set_destination (coding);
7187       src = coding->source + coding->consumed;
7188
7189       if (coding->mode & CODING_MODE_LAST_BLOCK)
7190         {
7191           /* Flush out unprocessed data as binary chars.  We are sure
7192              that the number of data is less than the size of
7193              coding->charbuf.  */
7194           coding->charbuf_used = 0;
7195           coding->chars_at_source = 0;
7196
7197           while (nbytes-- > 0)
7198             {
7199               int c = *src++;
7200
7201               if (c & 0x80)
7202                 c = BYTE8_TO_CHAR (c);
7203               coding->charbuf[coding->charbuf_used++] = c;
7204             }
7205           produce_chars (coding, Qnil, 1);
7206         }
7207       else
7208         {
7209           /* Record unprocessed bytes in coding->carryover.  We are
7210              sure that the number of data is less than the size of
7211              coding->carryover.  */
7212           unsigned char *p = coding->carryover;
7213
7214           if (nbytes > sizeof coding->carryover)
7215             nbytes = sizeof coding->carryover;
7216           coding->carryover_bytes = nbytes;
7217           while (nbytes-- > 0)
7218             *p++ = *src++;
7219         }
7220       coding->consumed = coding->src_bytes;
7221     }
7222
7223   if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix)
7224       && !inhibit_eol_conversion)
7225     decode_eol (coding);
7226   if (BUFFERP (coding->dst_object))
7227     {
7228       current_buffer->undo_list = undo_list;
7229       record_insert (coding->dst_pos, coding->produced_char);
7230     }
7231   return coding->result;
7232 }
7233
7234
7235 /* Extract an annotation datum from a composition starting at POS and
7236    ending before LIMIT of CODING->src_object (buffer or string), store
7237    the data in BUF, set *STOP to a starting position of the next
7238    composition (if any) or to LIMIT, and return the address of the
7239    next element of BUF.
7240
7241    If such an annotation is not found, set *STOP to a starting
7242    position of a composition after POS (if any) or to LIMIT, and
7243    return BUF.  */
7244
7245 static INLINE int *
7246 handle_composition_annotation (pos, limit, coding, buf, stop)
7247      EMACS_INT pos, limit;
7248      struct coding_system *coding;
7249      int *buf;
7250      EMACS_INT *stop;
7251 {
7252   EMACS_INT start, end;
7253   Lisp_Object prop;
7254
7255   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
7256       || end > limit)
7257     *stop = limit;
7258   else if (start > pos)
7259     *stop = start;
7260   else
7261     {
7262       if (start == pos)
7263         {
7264           /* We found a composition.  Store the corresponding
7265              annotation data in BUF.  */
7266           int *head = buf;
7267           enum composition_method method = COMPOSITION_METHOD (prop);
7268           int nchars = COMPOSITION_LENGTH (prop);
7269
7270           ADD_COMPOSITION_DATA (buf, nchars, 0, method);
7271           if (method != COMPOSITION_RELATIVE)
7272             {
7273               Lisp_Object components;
7274               int len, i, i_byte;
7275
7276               components = COMPOSITION_COMPONENTS (prop);
7277               if (VECTORP (components))
7278                 {
7279                   len = XVECTOR (components)->size;
7280                   for (i = 0; i < len; i++)
7281                     *buf++ = XINT (AREF (components, i));
7282                 }
7283               else if (STRINGP (components))
7284                 {
7285                   len = SCHARS (components);
7286                   i = i_byte = 0;
7287                   while (i < len)
7288                     {
7289                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
7290                       buf++;
7291                     }
7292                 }
7293               else if (INTEGERP (components))
7294                 {
7295                   len = 1;
7296                   *buf++ = XINT (components);
7297                 }
7298               else if (CONSP (components))
7299                 {
7300                   for (len = 0; CONSP (components);
7301                        len++, components = XCDR (components))
7302                     *buf++ = XINT (XCAR (components));
7303                 }
7304               else
7305                 abort ();
7306               *head -= len;
7307             }
7308         }
7309
7310       if (find_composition (end, limit, &start, &end, &prop,
7311                             coding->src_object)
7312           && end <= limit)
7313         *stop = start;
7314       else
7315         *stop = limit;
7316     }
7317   return buf;
7318 }
7319
7320
7321 /* Extract an annotation datum from a text property `charset' at POS of
7322    CODING->src_object (buffer of string), store the data in BUF, set
7323    *STOP to the position where the value of `charset' property changes
7324    (limiting by LIMIT), and return the address of the next element of
7325    BUF.
7326
7327    If the property value is nil, set *STOP to the position where the
7328    property value is non-nil (limiting by LIMIT), and return BUF.  */
7329
7330 static INLINE int *
7331 handle_charset_annotation (pos, limit, coding, buf, stop)
7332      EMACS_INT pos, limit;
7333      struct coding_system *coding;
7334      int *buf;
7335      EMACS_INT *stop;
7336 {
7337   Lisp_Object val, next;
7338   int id;
7339
7340   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
7341   if (! NILP (val) && CHARSETP (val))
7342     id = XINT (CHARSET_SYMBOL_ID (val));
7343   else
7344     id = -1;
7345   ADD_CHARSET_DATA (buf, 0, id);
7346   next = Fnext_single_property_change (make_number (pos), Qcharset,
7347                                        coding->src_object,
7348                                        make_number (limit));
7349   *stop = XINT (next);
7350   return buf;
7351 }
7352
7353
7354 static void
7355 consume_chars (coding, translation_table, max_lookup)
7356      struct coding_system *coding;
7357      Lisp_Object translation_table;
7358      int max_lookup;
7359 {
7360   int *buf = coding->charbuf;
7361   int *buf_end = coding->charbuf + coding->charbuf_size;
7362   const unsigned char *src = coding->source + coding->consumed;
7363   const unsigned char *src_end = coding->source + coding->src_bytes;
7364   EMACS_INT pos = coding->src_pos + coding->consumed_char;
7365   EMACS_INT end_pos = coding->src_pos + coding->src_chars;
7366   int multibytep = coding->src_multibyte;
7367   Lisp_Object eol_type;
7368   int c;
7369   EMACS_INT stop, stop_composition, stop_charset;
7370   int *lookup_buf = NULL;
7371
7372   if (! NILP (translation_table))
7373     lookup_buf = alloca (sizeof (int) * max_lookup);
7374
7375   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
7376   if (VECTORP (eol_type))
7377     eol_type = Qunix;
7378
7379   /* Note: composition handling is not yet implemented.  */
7380   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7381
7382   if (NILP (coding->src_object))
7383     stop = stop_composition = stop_charset = end_pos;
7384   else
7385     {
7386       if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
7387         stop = stop_composition = pos;
7388       else
7389         stop = stop_composition = end_pos;
7390       if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
7391         stop = stop_charset = pos;
7392       else
7393         stop_charset = end_pos;
7394     }
7395
7396   /* Compensate for CRLF and conversion.  */
7397   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
7398   while (buf < buf_end)
7399     {
7400       Lisp_Object trans;
7401
7402       if (pos == stop)
7403         {
7404           if (pos == end_pos)
7405             break;
7406           if (pos == stop_composition)
7407             buf = handle_composition_annotation (pos, end_pos, coding,
7408                                                  buf, &stop_composition);
7409           if (pos == stop_charset)
7410             buf = handle_charset_annotation (pos, end_pos, coding,
7411                                              buf, &stop_charset);
7412           stop = (stop_composition < stop_charset
7413                   ? stop_composition : stop_charset);
7414         }
7415
7416       if (! multibytep)
7417         {
7418           EMACS_INT bytes;
7419
7420           if (coding->encoder == encode_coding_raw_text
7421               || coding->encoder == encode_coding_ccl)
7422             c = *src++, pos++;
7423           else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
7424             c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
7425           else
7426             c = BYTE8_TO_CHAR (*src), src++, pos++;
7427         }
7428       else
7429         c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
7430       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
7431         c = '\n';
7432       if (! EQ (eol_type, Qunix))
7433         {
7434           if (c == '\n')
7435             {
7436               if (EQ (eol_type, Qdos))
7437                 *buf++ = '\r';
7438               else
7439                 c = '\r';
7440             }
7441         }
7442
7443       trans = Qnil;
7444       LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
7445       if (NILP (trans))
7446         *buf++ = c;
7447       else
7448         {
7449           int from_nchars = 1, to_nchars = 1;
7450           int *lookup_buf_end;
7451           const unsigned char *p = src;
7452           int i;
7453
7454           lookup_buf[0] = c;
7455           for (i = 1; i < max_lookup && p < src_end; i++)
7456             lookup_buf[i] = STRING_CHAR_ADVANCE (p);
7457           lookup_buf_end = lookup_buf + i;
7458           trans = get_translation (trans, lookup_buf, lookup_buf_end);
7459           if (INTEGERP (trans))
7460             c = XINT (trans);
7461           else if (CONSP (trans))
7462             {
7463               from_nchars = ASIZE (XCAR (trans));
7464               trans = XCDR (trans);
7465               if (INTEGERP (trans))
7466                 c = XINT (trans);
7467               else
7468                 {
7469                   to_nchars = ASIZE (trans);
7470                   if (buf + to_nchars > buf_end)
7471                     break;
7472                   c = XINT (AREF (trans, 0));
7473                 }
7474             }
7475           else
7476             break;
7477           *buf++ = c;
7478           for (i = 1; i < to_nchars; i++)
7479             *buf++ = XINT (AREF (trans, i));
7480           for (i = 1; i < from_nchars; i++, pos++)
7481             src += MULTIBYTE_LENGTH_NO_CHECK (src);
7482         }
7483     }
7484
7485   coding->consumed = src - coding->source;
7486   coding->consumed_char = pos - coding->src_pos;
7487   coding->charbuf_used = buf - coding->charbuf;
7488   coding->chars_at_source = 0;
7489 }
7490
7491
7492 /* Encode the text at CODING->src_object into CODING->dst_object.
7493    CODING->src_object is a buffer or a string.
7494    CODING->dst_object is a buffer or nil.
7495
7496    If CODING->src_object is a buffer, it must be the current buffer.
7497    In this case, if CODING->src_pos is positive, it is a position of
7498    the source text in the buffer, otherwise. the source text is in the
7499    gap area of the buffer, and coding->src_pos specifies the offset of
7500    the text from GPT (which must be the same as PT).  If this is the
7501    same buffer as CODING->dst_object, CODING->src_pos must be
7502    negative and CODING should not have `pre-write-conversion'.
7503
7504    If CODING->src_object is a string, CODING should not have
7505    `pre-write-conversion'.
7506
7507    If CODING->dst_object is a buffer, the encoded data is inserted at
7508    the current point of that buffer.
7509
7510    If CODING->dst_object is nil, the encoded data is placed at the
7511    memory area specified by CODING->destination.  */
7512
7513 static int
7514 encode_coding (coding)
7515      struct coding_system *coding;
7516 {
7517   Lisp_Object attrs;
7518   Lisp_Object translation_table;
7519   int max_lookup;
7520
7521   attrs = CODING_ID_ATTRS (coding->id);
7522   if (coding->encoder == encode_coding_raw_text)
7523     translation_table = Qnil, max_lookup = 0;
7524   else
7525     translation_table = get_translation_table (attrs, 1, &max_lookup);
7526
7527   if (BUFFERP (coding->dst_object))
7528     {
7529       set_buffer_internal (XBUFFER (coding->dst_object));
7530       coding->dst_multibyte
7531         = ! NILP (current_buffer->enable_multibyte_characters);
7532     }
7533
7534   coding->consumed = coding->consumed_char = 0;
7535   coding->produced = coding->produced_char = 0;
7536   record_conversion_result (coding, CODING_RESULT_SUCCESS);
7537   coding->errors = 0;
7538
7539   ALLOC_CONVERSION_WORK_AREA (coding);
7540
7541   do {
7542     coding_set_source (coding);
7543     consume_chars (coding, translation_table, max_lookup);
7544     coding_set_destination (coding);
7545     (*(coding->encoder)) (coding);
7546   } while (coding->consumed_char < coding->src_chars);
7547
7548   if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
7549     insert_from_gap (coding->produced_char, coding->produced);
7550
7551   return (coding->result);
7552 }
7553
7554
7555 /* Name (or base name) of work buffer for code conversion.  */
7556 static Lisp_Object Vcode_conversion_workbuf_name;
7557
7558 /* A working buffer used by the top level conversion.  Once it is
7559    created, it is never destroyed.  It has the name
7560    Vcode_conversion_workbuf_name.  The other working buffers are
7561    destroyed after the use is finished, and their names are modified
7562    versions of Vcode_conversion_workbuf_name.  */
7563 static Lisp_Object Vcode_conversion_reused_workbuf;
7564
7565 /* 1 iff Vcode_conversion_reused_workbuf is already in use.  */
7566 static int reused_workbuf_in_use;
7567
7568
7569 /* Return a working buffer of code convesion.  MULTIBYTE specifies the
7570    multibyteness of returning buffer.  */
7571
7572 static Lisp_Object
7573 make_conversion_work_buffer (multibyte)
7574      int multibyte;
7575 {
7576   Lisp_Object name, workbuf;
7577   struct buffer *current;
7578
7579   if (reused_workbuf_in_use++)
7580     {
7581       name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
7582       workbuf = Fget_buffer_create (name);
7583     }
7584   else
7585     {
7586       if (NILP (Fbuffer_live_p (Vcode_conversion_reused_workbuf)))
7587         Vcode_conversion_reused_workbuf
7588           = Fget_buffer_create (Vcode_conversion_workbuf_name);
7589       workbuf = Vcode_conversion_reused_workbuf;
7590     }
7591   current = current_buffer;
7592   set_buffer_internal (XBUFFER (workbuf));
7593   /* We can't allow modification hooks to run in the work buffer.  For
7594      instance, directory_files_internal assumes that file decoding
7595      doesn't compile new regexps.  */
7596   Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt);
7597   Ferase_buffer ();
7598   current_buffer->undo_list = Qt;
7599   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
7600   set_buffer_internal (current);
7601   return workbuf;
7602 }
7603
7604
7605 static Lisp_Object
7606 code_conversion_restore (arg)
7607      Lisp_Object arg;
7608 {
7609   Lisp_Object current, workbuf;
7610   struct gcpro gcpro1;
7611
7612   GCPRO1 (arg);
7613   current = XCAR (arg);
7614   workbuf = XCDR (arg);
7615   if (! NILP (workbuf))
7616     {
7617       if (EQ (workbuf, Vcode_conversion_reused_workbuf))
7618         reused_workbuf_in_use = 0;
7619       else if (! NILP (Fbuffer_live_p (workbuf)))
7620         Fkill_buffer (workbuf);
7621     }
7622   set_buffer_internal (XBUFFER (current));
7623   UNGCPRO;
7624   return Qnil;
7625 }
7626
7627 Lisp_Object
7628 code_conversion_save (with_work_buf, multibyte)
7629      int with_work_buf, multibyte;
7630 {
7631   Lisp_Object workbuf = Qnil;
7632
7633   if (with_work_buf)
7634     workbuf = make_conversion_work_buffer (multibyte);
7635   record_unwind_protect (code_conversion_restore,
7636                          Fcons (Fcurrent_buffer (), workbuf));
7637   return workbuf;
7638 }
7639
7640 int
7641 decode_coding_gap (coding, chars, bytes)
7642      struct coding_system *coding;
7643      EMACS_INT chars, bytes;
7644 {
7645   int count = specpdl_ptr - specpdl;
7646   Lisp_Object attrs;
7647
7648   code_conversion_save (0, 0);
7649
7650   coding->src_object = Fcurrent_buffer ();
7651   coding->src_chars = chars;
7652   coding->src_bytes = bytes;
7653   coding->src_pos = -chars;
7654   coding->src_pos_byte = -bytes;
7655   coding->src_multibyte = chars < bytes;
7656   coding->dst_object = coding->src_object;
7657   coding->dst_pos = PT;
7658   coding->dst_pos_byte = PT_BYTE;
7659   coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
7660
7661   if (CODING_REQUIRE_DETECTION (coding))
7662     detect_coding (coding);
7663
7664   coding->mode |= CODING_MODE_LAST_BLOCK;
7665   current_buffer->text->inhibit_shrinking = 1;
7666   decode_coding (coding);
7667   current_buffer->text->inhibit_shrinking = 0;
7668
7669   attrs = CODING_ID_ATTRS (coding->id);
7670   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7671     {
7672       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7673       Lisp_Object val;
7674
7675       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7676       val = call1 (CODING_ATTR_POST_READ (attrs),
7677                    make_number (coding->produced_char));
7678       CHECK_NATNUM (val);
7679       coding->produced_char += Z - prev_Z;
7680       coding->produced += Z_BYTE - prev_Z_BYTE;
7681     }
7682
7683   unbind_to (count, Qnil);
7684   return coding->result;
7685 }
7686
7687 int
7688 encode_coding_gap (coding, chars, bytes)
7689      struct coding_system *coding;
7690      EMACS_INT chars, bytes;
7691 {
7692   int count = specpdl_ptr - specpdl;
7693
7694   code_conversion_save (0, 0);
7695
7696   coding->src_object = Fcurrent_buffer ();
7697   coding->src_chars = chars;
7698   coding->src_bytes = bytes;
7699   coding->src_pos = -chars;
7700   coding->src_pos_byte = -bytes;
7701   coding->src_multibyte = chars < bytes;
7702   coding->dst_object = coding->src_object;
7703   coding->dst_pos = PT;
7704   coding->dst_pos_byte = PT_BYTE;
7705
7706   encode_coding (coding);
7707
7708   unbind_to (count, Qnil);
7709   return coding->result;
7710 }
7711
7712
7713 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
7714    SRC_OBJECT into DST_OBJECT by coding context CODING.
7715
7716    SRC_OBJECT is a buffer, a string, or Qnil.
7717
7718    If it is a buffer, the text is at point of the buffer.  FROM and TO
7719    are positions in the buffer.
7720
7721    If it is a string, the text is at the beginning of the string.
7722    FROM and TO are indices to the string.
7723
7724    If it is nil, the text is at coding->source.  FROM and TO are
7725    indices to coding->source.
7726
7727    DST_OBJECT is a buffer, Qt, or Qnil.
7728
7729    If it is a buffer, the decoded text is inserted at point of the
7730    buffer.  If the buffer is the same as SRC_OBJECT, the source text
7731    is deleted.
7732
7733    If it is Qt, a string is made from the decoded text, and
7734    set in CODING->dst_object.
7735
7736    If it is Qnil, the decoded text is stored at CODING->destination.
7737    The caller must allocate CODING->dst_bytes bytes at
7738    CODING->destination by xmalloc.  If the decoded text is longer than
7739    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
7740  */
7741
7742 void
7743 decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7744                       dst_object)
7745      struct coding_system *coding;
7746      Lisp_Object src_object;
7747      EMACS_INT from, from_byte, to, to_byte;
7748      Lisp_Object dst_object;
7749 {
7750   int count = specpdl_ptr - specpdl;
7751   unsigned char *destination;
7752   EMACS_INT dst_bytes;
7753   EMACS_INT chars = to - from;
7754   EMACS_INT bytes = to_byte - from_byte;
7755   Lisp_Object attrs;
7756   int saved_pt = -1, saved_pt_byte;
7757   int need_marker_adjustment = 0;
7758   Lisp_Object old_deactivate_mark;
7759
7760   old_deactivate_mark = Vdeactivate_mark;
7761
7762   if (NILP (dst_object))
7763     {
7764       destination = coding->destination;
7765       dst_bytes = coding->dst_bytes;
7766     }
7767
7768   coding->src_object = src_object;
7769   coding->src_chars = chars;
7770   coding->src_bytes = bytes;
7771   coding->src_multibyte = chars < bytes;
7772
7773   if (STRINGP (src_object))
7774     {
7775       coding->src_pos = from;
7776       coding->src_pos_byte = from_byte;
7777     }
7778   else if (BUFFERP (src_object))
7779     {
7780       set_buffer_internal (XBUFFER (src_object));
7781       if (from != GPT)
7782         move_gap_both (from, from_byte);
7783       if (EQ (src_object, dst_object))
7784         {
7785           struct Lisp_Marker *tail;
7786
7787           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7788             {
7789               tail->need_adjustment
7790                 = tail->charpos == (tail->insertion_type ? from : to);
7791               need_marker_adjustment |= tail->need_adjustment;
7792             }
7793           saved_pt = PT, saved_pt_byte = PT_BYTE;
7794           TEMP_SET_PT_BOTH (from, from_byte);
7795           current_buffer->text->inhibit_shrinking = 1;
7796           del_range_both (from, from_byte, to, to_byte, 1);
7797           coding->src_pos = -chars;
7798           coding->src_pos_byte = -bytes;
7799         }
7800       else
7801         {
7802           coding->src_pos = from;
7803           coding->src_pos_byte = from_byte;
7804         }
7805     }
7806
7807   if (CODING_REQUIRE_DETECTION (coding))
7808     detect_coding (coding);
7809   attrs = CODING_ID_ATTRS (coding->id);
7810
7811   if (EQ (dst_object, Qt)
7812       || (! NILP (CODING_ATTR_POST_READ (attrs))
7813           && NILP (dst_object)))
7814     {
7815       coding->dst_multibyte = !CODING_FOR_UNIBYTE (coding);
7816       coding->dst_object = code_conversion_save (1, coding->dst_multibyte);
7817       coding->dst_pos = BEG;
7818       coding->dst_pos_byte = BEG_BYTE;
7819     }
7820   else if (BUFFERP (dst_object))
7821     {
7822       code_conversion_save (0, 0);
7823       coding->dst_object = dst_object;
7824       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7825       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7826       coding->dst_multibyte
7827         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
7828     }
7829   else
7830     {
7831       code_conversion_save (0, 0);
7832       coding->dst_object = Qnil;
7833       /* Most callers presume this will return a multibyte result, and they
7834          won't use `binary' or `raw-text' anyway, so let's not worry about
7835          CODING_FOR_UNIBYTE.  */
7836       coding->dst_multibyte = 1;
7837     }
7838
7839   decode_coding (coding);
7840
7841   if (BUFFERP (coding->dst_object))
7842     set_buffer_internal (XBUFFER (coding->dst_object));
7843
7844   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7845     {
7846       struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7847       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7848       Lisp_Object val;
7849
7850       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7851       GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7852               old_deactivate_mark);
7853       val = safe_call1 (CODING_ATTR_POST_READ (attrs),
7854                         make_number (coding->produced_char));
7855       UNGCPRO;
7856       CHECK_NATNUM (val);
7857       coding->produced_char += Z - prev_Z;
7858       coding->produced += Z_BYTE - prev_Z_BYTE;
7859     }
7860
7861   if (EQ (dst_object, Qt))
7862     {
7863       coding->dst_object = Fbuffer_string ();
7864     }
7865   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
7866     {
7867       set_buffer_internal (XBUFFER (coding->dst_object));
7868       if (dst_bytes < coding->produced)
7869         {
7870           destination = xrealloc (destination, coding->produced);
7871           if (! destination)
7872             {
7873               record_conversion_result (coding,
7874                                         CODING_RESULT_INSUFFICIENT_DST);
7875               unbind_to (count, Qnil);
7876               return;
7877             }
7878           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
7879             move_gap_both (BEGV, BEGV_BYTE);
7880           bcopy (BEGV_ADDR, destination, coding->produced);
7881           coding->destination = destination;
7882         }
7883     }
7884
7885   if (saved_pt >= 0)
7886     {
7887       /* This is the case of:
7888          (BUFFERP (src_object) && EQ (src_object, dst_object))
7889          As we have moved PT while replacing the original buffer
7890          contents, we must recover it now.  */
7891       set_buffer_internal (XBUFFER (src_object));
7892       current_buffer->text->inhibit_shrinking = 0;
7893       if (saved_pt < from)
7894         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7895       else if (saved_pt < from + chars)
7896         TEMP_SET_PT_BOTH (from, from_byte);
7897       else if (! NILP (current_buffer->enable_multibyte_characters))
7898         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7899                           saved_pt_byte + (coding->produced - bytes));
7900       else
7901         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7902                           saved_pt_byte + (coding->produced - bytes));
7903
7904       if (need_marker_adjustment)
7905         {
7906           struct Lisp_Marker *tail;
7907
7908           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7909             if (tail->need_adjustment)
7910               {
7911                 tail->need_adjustment = 0;
7912                 if (tail->insertion_type)
7913                   {
7914                     tail->bytepos = from_byte;
7915                     tail->charpos = from;
7916                   }
7917                 else
7918                   {
7919                     tail->bytepos = from_byte + coding->produced;
7920                     tail->charpos
7921                       = (NILP (current_buffer->enable_multibyte_characters)
7922                          ? tail->bytepos : from + coding->produced_char);
7923                   }
7924               }
7925         }
7926     }
7927
7928   Vdeactivate_mark = old_deactivate_mark;
7929   unbind_to (count, coding->dst_object);
7930 }
7931
7932
7933 void
7934 encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7935                       dst_object)
7936      struct coding_system *coding;
7937      Lisp_Object src_object;
7938      EMACS_INT from, from_byte, to, to_byte;
7939      Lisp_Object dst_object;
7940 {
7941   int count = specpdl_ptr - specpdl;
7942   EMACS_INT chars = to - from;
7943   EMACS_INT bytes = to_byte - from_byte;
7944   Lisp_Object attrs;
7945   int saved_pt = -1, saved_pt_byte;
7946   int need_marker_adjustment = 0;
7947   int kill_src_buffer = 0;
7948   Lisp_Object old_deactivate_mark;
7949
7950   old_deactivate_mark = Vdeactivate_mark;
7951
7952   coding->src_object = src_object;
7953   coding->src_chars = chars;
7954   coding->src_bytes = bytes;
7955   coding->src_multibyte = chars < bytes;
7956
7957   attrs = CODING_ID_ATTRS (coding->id);
7958
7959   if (EQ (src_object, dst_object))
7960     {
7961       struct Lisp_Marker *tail;
7962
7963       for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7964         {
7965           tail->need_adjustment
7966             = tail->charpos == (tail->insertion_type ? from : to);
7967           need_marker_adjustment |= tail->need_adjustment;
7968         }
7969     }
7970
7971   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
7972     {
7973       coding->src_object = code_conversion_save (1, coding->src_multibyte);
7974       set_buffer_internal (XBUFFER (coding->src_object));
7975       if (STRINGP (src_object))
7976         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
7977       else if (BUFFERP (src_object))
7978         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
7979       else
7980         insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
7981
7982       if (EQ (src_object, dst_object))
7983         {
7984           set_buffer_internal (XBUFFER (src_object));
7985           saved_pt = PT, saved_pt_byte = PT_BYTE;
7986           del_range_both (from, from_byte, to, to_byte, 1);
7987           set_buffer_internal (XBUFFER (coding->src_object));
7988         }
7989
7990       {
7991         Lisp_Object args[3];
7992         struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7993
7994         GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7995                 old_deactivate_mark);
7996         args[0] = CODING_ATTR_PRE_WRITE (attrs);
7997         args[1] = make_number (BEG);
7998         args[2] = make_number (Z);
7999         safe_call (3, args);
8000         UNGCPRO;
8001       }
8002       if (XBUFFER (coding->src_object) != current_buffer)
8003         kill_src_buffer = 1;
8004       coding->src_object = Fcurrent_buffer ();
8005       if (BEG != GPT)
8006         move_gap_both (BEG, BEG_BYTE);
8007       coding->src_chars = Z - BEG;
8008       coding->src_bytes = Z_BYTE - BEG_BYTE;
8009       coding->src_pos = BEG;
8010       coding->src_pos_byte = BEG_BYTE;
8011       coding->src_multibyte = Z < Z_BYTE;
8012     }
8013   else if (STRINGP (src_object))
8014     {
8015       code_conversion_save (0, 0);
8016       coding->src_pos = from;
8017       coding->src_pos_byte = from_byte;
8018     }
8019   else if (BUFFERP (src_object))
8020     {
8021       code_conversion_save (0, 0);
8022       set_buffer_internal (XBUFFER (src_object));
8023       if (EQ (src_object, dst_object))
8024         {
8025           saved_pt = PT, saved_pt_byte = PT_BYTE;
8026           coding->src_object = del_range_1 (from, to, 1, 1);
8027           coding->src_pos = 0;
8028           coding->src_pos_byte = 0;
8029         }
8030       else
8031         {
8032           if (from < GPT && to >= GPT)
8033             move_gap_both (from, from_byte);
8034           coding->src_pos = from;
8035           coding->src_pos_byte = from_byte;
8036         }
8037     }
8038   else
8039     code_conversion_save (0, 0);
8040
8041   if (BUFFERP (dst_object))
8042     {
8043       coding->dst_object = dst_object;
8044       if (EQ (src_object, dst_object))
8045         {
8046           coding->dst_pos = from;
8047           coding->dst_pos_byte = from_byte;
8048         }
8049       else
8050         {
8051           struct buffer *current = current_buffer;
8052
8053           set_buffer_temp (XBUFFER (dst_object));
8054           coding->dst_pos = PT;
8055           coding->dst_pos_byte = PT_BYTE;
8056           move_gap_both (coding->dst_pos, coding->dst_pos_byte);
8057           set_buffer_temp (current);
8058         }
8059       coding->dst_multibyte
8060         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
8061     }
8062   else if (EQ (dst_object, Qt))
8063     {
8064       coding->dst_object = Qnil;
8065       coding->dst_bytes = coding->src_chars;
8066       if (coding->dst_bytes == 0)
8067         coding->dst_bytes = 1;
8068       coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
8069       coding->dst_multibyte = 0;
8070     }
8071   else
8072     {
8073       coding->dst_object = Qnil;
8074       coding->dst_multibyte = 0;
8075     }
8076
8077   encode_coding (coding);
8078
8079   if (EQ (dst_object, Qt))
8080     {
8081       if (BUFFERP (coding->dst_object))
8082         coding->dst_object = Fbuffer_string ();
8083       else
8084         {
8085           coding->dst_object
8086             = make_unibyte_string ((char *) coding->destination,
8087                                    coding->produced);
8088           xfree (coding->destination);
8089         }
8090     }
8091
8092   if (saved_pt >= 0)
8093     {
8094       /* This is the case of:
8095          (BUFFERP (src_object) && EQ (src_object, dst_object))
8096          As we have moved PT while replacing the original buffer
8097          contents, we must recover it now.  */
8098       set_buffer_internal (XBUFFER (src_object));
8099       if (saved_pt < from)
8100         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
8101       else if (saved_pt < from + chars)
8102         TEMP_SET_PT_BOTH (from, from_byte);
8103       else if (! NILP (current_buffer->enable_multibyte_characters))
8104         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
8105                           saved_pt_byte + (coding->produced - bytes));
8106       else
8107         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
8108                           saved_pt_byte + (coding->produced - bytes));
8109
8110       if (need_marker_adjustment)
8111         {
8112           struct Lisp_Marker *tail;
8113
8114           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
8115             if (tail->need_adjustment)
8116               {
8117                 tail->need_adjustment = 0;
8118                 if (tail->insertion_type)
8119                   {
8120                     tail->bytepos = from_byte;
8121                     tail->charpos = from;
8122                   }
8123                 else
8124                   {
8125                     tail->bytepos = from_byte + coding->produced;
8126                     tail->charpos
8127                       = (NILP (current_buffer->enable_multibyte_characters)
8128                          ? tail->bytepos : from + coding->produced_char);
8129                   }
8130               }
8131         }
8132     }
8133
8134   if (kill_src_buffer)
8135     Fkill_buffer (coding->src_object);
8136
8137   Vdeactivate_mark = old_deactivate_mark;
8138   unbind_to (count, Qnil);
8139 }
8140
8141
8142 Lisp_Object
8143 preferred_coding_system ()
8144 {
8145   int id = coding_categories[coding_priorities[0]].id;
8146
8147   return CODING_ID_NAME (id);
8148 }
8149
8150 \f
8151 #ifdef emacs
8152 /*** 8. Emacs Lisp library functions ***/
8153
8154 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
8155        doc: /* Return t if OBJECT is nil or a coding-system.
8156 See the documentation of `define-coding-system' for information
8157 about coding-system objects.  */)
8158      (object)
8159      Lisp_Object object;
8160 {
8161   if (NILP (object)
8162       || CODING_SYSTEM_ID (object) >= 0)
8163     return Qt;
8164   if (! SYMBOLP (object)
8165       || NILP (Fget (object, Qcoding_system_define_form)))
8166     return Qnil;
8167   return Qt;
8168 }
8169
8170 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
8171        Sread_non_nil_coding_system, 1, 1, 0,
8172        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
8173      (prompt)
8174      Lisp_Object prompt;
8175 {
8176   Lisp_Object val;
8177   do
8178     {
8179       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8180                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
8181     }
8182   while (SCHARS (val) == 0);
8183   return (Fintern (val, Qnil));
8184 }
8185
8186 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
8187        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
8188 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.
8189 Ignores case when completing coding systems (all Emacs coding systems
8190 are lower-case).  */)
8191      (prompt, default_coding_system)
8192      Lisp_Object prompt, default_coding_system;
8193 {
8194   Lisp_Object val;
8195   int count = SPECPDL_INDEX ();
8196
8197   if (SYMBOLP (default_coding_system))
8198     default_coding_system = SYMBOL_NAME (default_coding_system);
8199   specbind (Qcompletion_ignore_case, Qt);
8200   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8201                           Qt, Qnil, Qcoding_system_history,
8202                           default_coding_system, Qnil);
8203   unbind_to (count, Qnil);
8204   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
8205 }
8206
8207 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
8208        1, 1, 0,
8209        doc: /* Check validity of CODING-SYSTEM.
8210 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
8211 It is valid if it is nil or a symbol defined as a coding system by the
8212 function `define-coding-system'.  */)
8213   (coding_system)
8214      Lisp_Object coding_system;
8215 {
8216   Lisp_Object define_form;
8217
8218   define_form = Fget (coding_system, Qcoding_system_define_form);
8219   if (! NILP (define_form))
8220     {
8221       Fput (coding_system, Qcoding_system_define_form, Qnil);
8222       safe_eval (define_form);
8223     }
8224   if (!NILP (Fcoding_system_p (coding_system)))
8225     return coding_system;
8226   xsignal1 (Qcoding_system_error, coding_system);
8227 }
8228
8229 \f
8230 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
8231    HIGHEST is nonzero, return the coding system of the highest
8232    priority among the detected coding systems.  Otherwize return a
8233    list of detected coding systems sorted by their priorities.  If
8234    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
8235    multibyte form but contains only ASCII and eight-bit chars.
8236    Otherwise, the bytes are raw bytes.
8237
8238    CODING-SYSTEM controls the detection as below:
8239
8240    If it is nil, detect both text-format and eol-format.  If the
8241    text-format part of CODING-SYSTEM is already specified
8242    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
8243    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
8244    detect only text-format.  */
8245
8246 Lisp_Object
8247 detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
8248                       coding_system)
8249      const unsigned char *src;
8250      EMACS_INT src_chars, src_bytes;
8251      int highest;
8252      int multibytep;
8253      Lisp_Object coding_system;
8254 {
8255   const unsigned char *src_end = src + src_bytes;
8256   Lisp_Object attrs, eol_type;
8257   Lisp_Object val = Qnil;
8258   struct coding_system coding;
8259   int id;
8260   struct coding_detection_info detect_info;
8261   enum coding_category base_category;
8262   int null_byte_found = 0, eight_bit_found = 0;
8263
8264   if (NILP (coding_system))
8265     coding_system = Qundecided;
8266   setup_coding_system (coding_system, &coding);
8267   attrs = CODING_ID_ATTRS (coding.id);
8268   eol_type = CODING_ID_EOL_TYPE (coding.id);
8269   coding_system = CODING_ATTR_BASE_NAME (attrs);
8270
8271   coding.source = src;
8272   coding.src_chars = src_chars;
8273   coding.src_bytes = src_bytes;
8274   coding.src_multibyte = multibytep;
8275   coding.consumed = 0;
8276   coding.mode |= CODING_MODE_LAST_BLOCK;
8277   coding.head_ascii = 0;
8278
8279   detect_info.checked = detect_info.found = detect_info.rejected = 0;
8280
8281   /* At first, detect text-format if necessary.  */
8282   base_category = XINT (CODING_ATTR_CATEGORY (attrs));
8283   if (base_category == coding_category_undecided)
8284     {
8285       enum coding_category category;
8286       struct coding_system *this;
8287       int c, i;
8288
8289       /* Skip all ASCII bytes except for a few ISO2022 controls.  */
8290       for (; src < src_end; src++)
8291         {
8292           c = *src;
8293           if (c & 0x80)
8294             {
8295               eight_bit_found = 1;
8296               if (null_byte_found)
8297                 break;
8298             }
8299           else if (c < 0x20)
8300             {
8301               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
8302                   && ! inhibit_iso_escape_detection
8303                   && ! detect_info.checked)
8304                 {
8305                   if (detect_coding_iso_2022 (&coding, &detect_info))
8306                     {
8307                       /* We have scanned the whole data.  */
8308                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
8309                         {
8310                           /* We didn't find an 8-bit code.  We may
8311                              have found a null-byte, but it's very
8312                              rare that a binary file confirm to
8313                              ISO-2022.  */
8314                           src = src_end;
8315                           coding.head_ascii = src - coding.source;
8316                         }
8317                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
8318                       break;
8319                     }
8320                 }
8321               else if (! c && !inhibit_null_byte_detection)
8322                 {
8323                   null_byte_found = 1;
8324                   if (eight_bit_found)
8325                     break;
8326                 }
8327               if (! eight_bit_found)
8328                 coding.head_ascii++;
8329             }
8330           else if (! eight_bit_found)
8331             coding.head_ascii++;
8332         }
8333
8334       if (null_byte_found || eight_bit_found
8335           || coding.head_ascii < coding.src_bytes
8336           || detect_info.found)
8337         {
8338           if (coding.head_ascii == coding.src_bytes)
8339             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
8340             for (i = 0; i < coding_category_raw_text; i++)
8341               {
8342                 category = coding_priorities[i];
8343                 this = coding_categories + category;
8344                 if (detect_info.found & (1 << category))
8345                   break;
8346               }
8347           else
8348             {
8349               if (null_byte_found)
8350                 {
8351                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
8352                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
8353                 }
8354               for (i = 0; i < coding_category_raw_text; i++)
8355                 {
8356                   category = coding_priorities[i];
8357                   this = coding_categories + category;
8358
8359                   if (this->id < 0)
8360                     {
8361                       /* No coding system of this category is defined.  */
8362                       detect_info.rejected |= (1 << category);
8363                     }
8364                   else if (category >= coding_category_raw_text)
8365                     continue;
8366                   else if (detect_info.checked & (1 << category))
8367                     {
8368                       if (highest
8369                           && (detect_info.found & (1 << category)))
8370                         break;
8371                     }
8372                   else if ((*(this->detector)) (&coding, &detect_info)
8373                            && highest
8374                            && (detect_info.found & (1 << category)))
8375                     {
8376                       if (category == coding_category_utf_16_auto)
8377                         {
8378                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8379                             category = coding_category_utf_16_le;
8380                           else
8381                             category = coding_category_utf_16_be;
8382                         }
8383                       break;
8384                     }
8385                 }
8386             }
8387         }
8388
8389       if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY
8390           || null_byte_found)
8391         {
8392           detect_info.found = CATEGORY_MASK_RAW_TEXT;
8393           id = CODING_SYSTEM_ID (Qno_conversion);
8394           val = Fcons (make_number (id), Qnil);
8395         }
8396       else if (! detect_info.rejected && ! detect_info.found)
8397         {
8398           detect_info.found = CATEGORY_MASK_ANY;
8399           id = coding_categories[coding_category_undecided].id;
8400           val = Fcons (make_number (id), Qnil);
8401         }
8402       else if (highest)
8403         {
8404           if (detect_info.found)
8405             {
8406               detect_info.found = 1 << category;
8407               val = Fcons (make_number (this->id), Qnil);
8408             }
8409           else
8410             for (i = 0; i < coding_category_raw_text; i++)
8411               if (! (detect_info.rejected & (1 << coding_priorities[i])))
8412                 {
8413                   detect_info.found = 1 << coding_priorities[i];
8414                   id = coding_categories[coding_priorities[i]].id;
8415                   val = Fcons (make_number (id), Qnil);
8416                   break;
8417                 }
8418         }
8419       else
8420         {
8421           int mask = detect_info.rejected | detect_info.found;
8422           int found = 0;
8423
8424           for (i = coding_category_raw_text - 1; i >= 0; i--)
8425             {
8426               category = coding_priorities[i];
8427               if (! (mask & (1 << category)))
8428                 {
8429                   found |= 1 << category;
8430                   id = coding_categories[category].id;
8431                   if (id >= 0)
8432                     val = Fcons (make_number (id), val);
8433                 }
8434             }
8435           for (i = coding_category_raw_text - 1; i >= 0; i--)
8436             {
8437               category = coding_priorities[i];
8438               if (detect_info.found & (1 << category))
8439                 {
8440                   id = coding_categories[category].id;
8441                   val = Fcons (make_number (id), val);
8442                 }
8443             }
8444           detect_info.found |= found;
8445         }
8446     }
8447   else if (base_category == coding_category_utf_8_auto)
8448     {
8449       if (detect_coding_utf_8 (&coding, &detect_info))
8450         {
8451           struct coding_system *this;
8452
8453           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
8454             this = coding_categories + coding_category_utf_8_sig;
8455           else
8456             this = coding_categories + coding_category_utf_8_nosig;
8457           val = Fcons (make_number (this->id), Qnil);
8458         }
8459     }
8460   else if (base_category == coding_category_utf_16_auto)
8461     {
8462       if (detect_coding_utf_16 (&coding, &detect_info))
8463         {
8464           struct coding_system *this;
8465
8466           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8467             this = coding_categories + coding_category_utf_16_le;
8468           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
8469             this = coding_categories + coding_category_utf_16_be;
8470           else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
8471             this = coding_categories + coding_category_utf_16_be_nosig;
8472           else
8473             this = coding_categories + coding_category_utf_16_le_nosig;
8474           val = Fcons (make_number (this->id), Qnil);
8475         }
8476     }
8477   else
8478     {
8479       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
8480       val = Fcons (make_number (coding.id), Qnil);
8481     }
8482
8483   /* Then, detect eol-format if necessary.  */
8484   {
8485     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol = -1;
8486     Lisp_Object tail;
8487
8488     if (VECTORP (eol_type))
8489       {
8490         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
8491           {
8492             if (null_byte_found)
8493               normal_eol = EOL_SEEN_LF;
8494             else
8495               normal_eol = detect_eol (coding.source, src_bytes,
8496                                        coding_category_raw_text);
8497           }
8498         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
8499                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
8500           utf_16_be_eol = detect_eol (coding.source, src_bytes,
8501                                       coding_category_utf_16_be);
8502         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
8503                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
8504           utf_16_le_eol = detect_eol (coding.source, src_bytes,
8505                                       coding_category_utf_16_le);
8506       }
8507     else
8508       {
8509         if (EQ (eol_type, Qunix))
8510           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
8511         else if (EQ (eol_type, Qdos))
8512           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
8513         else
8514           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
8515       }
8516
8517     for (tail = val; CONSP (tail); tail = XCDR (tail))
8518       {
8519         enum coding_category category;
8520         int this_eol;
8521
8522         id = XINT (XCAR (tail));
8523         attrs = CODING_ID_ATTRS (id);
8524         category = XINT (CODING_ATTR_CATEGORY (attrs));
8525         eol_type = CODING_ID_EOL_TYPE (id);
8526         if (VECTORP (eol_type))
8527           {
8528             if (category == coding_category_utf_16_be
8529                 || category == coding_category_utf_16_be_nosig)
8530               this_eol = utf_16_be_eol;
8531             else if (category == coding_category_utf_16_le
8532                      || category == coding_category_utf_16_le_nosig)
8533               this_eol = utf_16_le_eol;
8534             else
8535               this_eol = normal_eol;
8536
8537             if (this_eol == EOL_SEEN_LF)
8538               XSETCAR (tail, AREF (eol_type, 0));
8539             else if (this_eol == EOL_SEEN_CRLF)
8540               XSETCAR (tail, AREF (eol_type, 1));
8541             else if (this_eol == EOL_SEEN_CR)
8542               XSETCAR (tail, AREF (eol_type, 2));
8543             else
8544               XSETCAR (tail, CODING_ID_NAME (id));
8545           }
8546         else
8547           XSETCAR (tail, CODING_ID_NAME (id));
8548       }
8549   }
8550
8551   return (highest ? (CONSP (val) ? XCAR (val) : Qnil) : val);
8552 }
8553
8554
8555 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
8556        2, 3, 0,
8557        doc: /* Detect coding system of the text in the region between START and END.
8558 Return a list of possible coding systems ordered by priority.
8559 The coding systems to try and their priorities follows what
8560 the function `coding-system-priority-list' (which see) returns.
8561
8562 If only ASCII characters are found (except for such ISO-2022 control
8563 characters as ESC), it returns a list of single element `undecided'
8564 or its subsidiary coding system according to a detected end-of-line
8565 format.
8566
8567 If optional argument HIGHEST is non-nil, return the coding system of
8568 highest priority.  */)
8569      (start, end, highest)
8570      Lisp_Object start, end, highest;
8571 {
8572   int from, to;
8573   int from_byte, to_byte;
8574
8575   CHECK_NUMBER_COERCE_MARKER (start);
8576   CHECK_NUMBER_COERCE_MARKER (end);
8577
8578   validate_region (&start, &end);
8579   from = XINT (start), to = XINT (end);
8580   from_byte = CHAR_TO_BYTE (from);
8581   to_byte = CHAR_TO_BYTE (to);
8582
8583   if (from < GPT && to >= GPT)
8584     move_gap_both (to, to_byte);
8585
8586   return detect_coding_system (BYTE_POS_ADDR (from_byte),
8587                                to - from, to_byte - from_byte,
8588                                !NILP (highest),
8589                                !NILP (current_buffer
8590                                       ->enable_multibyte_characters),
8591                                Qnil);
8592 }
8593
8594 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
8595        1, 2, 0,
8596        doc: /* Detect coding system of the text in STRING.
8597 Return a list of possible coding systems ordered by priority.
8598 The coding systems to try and their priorities follows what
8599 the function `coding-system-priority-list' (which see) returns.
8600
8601 If only ASCII characters are found (except for such ISO-2022 control
8602 characters as ESC), it returns a list of single element `undecided'
8603 or its subsidiary coding system according to a detected end-of-line
8604 format.
8605
8606 If optional argument HIGHEST is non-nil, return the coding system of
8607 highest priority.  */)
8608      (string, highest)
8609      Lisp_Object string, highest;
8610 {
8611   CHECK_STRING (string);
8612
8613   return detect_coding_system (SDATA (string),
8614                                SCHARS (string), SBYTES (string),
8615                                !NILP (highest), STRING_MULTIBYTE (string),
8616                                Qnil);
8617 }
8618
8619
8620 static INLINE int
8621 char_encodable_p (c, attrs)
8622      int c;
8623      Lisp_Object attrs;
8624 {
8625   Lisp_Object tail;
8626   struct charset *charset;
8627   Lisp_Object translation_table;
8628
8629   translation_table = CODING_ATTR_TRANS_TBL (attrs);
8630   if (! NILP (translation_table))
8631     c = translate_char (translation_table, c);
8632   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
8633        CONSP (tail); tail = XCDR (tail))
8634     {
8635       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
8636       if (CHAR_CHARSET_P (c, charset))
8637         break;
8638     }
8639   return (! NILP (tail));
8640 }
8641
8642
8643 /* Return a list of coding systems that safely encode the text between
8644    START and END.  If EXCLUDE is non-nil, it is a list of coding
8645    systems not to check.  The returned list doesn't contain any such
8646    coding systems.  In any case, if the text contains only ASCII or is
8647    unibyte, return t.  */
8648
8649 DEFUN ("find-coding-systems-region-internal",
8650        Ffind_coding_systems_region_internal,
8651        Sfind_coding_systems_region_internal, 2, 3, 0,
8652        doc: /* Internal use only.  */)
8653      (start, end, exclude)
8654      Lisp_Object start, end, exclude;
8655 {
8656   Lisp_Object coding_attrs_list, safe_codings;
8657   EMACS_INT start_byte, end_byte;
8658   const unsigned char *p, *pbeg, *pend;
8659   int c;
8660   Lisp_Object tail, elt, work_table;
8661
8662   if (STRINGP (start))
8663     {
8664       if (!STRING_MULTIBYTE (start)
8665           || SCHARS (start) == SBYTES (start))
8666         return Qt;
8667       start_byte = 0;
8668       end_byte = SBYTES (start);
8669     }
8670   else
8671     {
8672       CHECK_NUMBER_COERCE_MARKER (start);
8673       CHECK_NUMBER_COERCE_MARKER (end);
8674       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8675         args_out_of_range (start, end);
8676       if (NILP (current_buffer->enable_multibyte_characters))
8677         return Qt;
8678       start_byte = CHAR_TO_BYTE (XINT (start));
8679       end_byte = CHAR_TO_BYTE (XINT (end));
8680       if (XINT (end) - XINT (start) == end_byte - start_byte)
8681         return Qt;
8682
8683       if (XINT (start) < GPT && XINT (end) > GPT)
8684         {
8685           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8686             move_gap_both (XINT (start), start_byte);
8687           else
8688             move_gap_both (XINT (end), end_byte);
8689         }
8690     }
8691
8692   coding_attrs_list = Qnil;
8693   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
8694     if (NILP (exclude)
8695         || NILP (Fmemq (XCAR (tail), exclude)))
8696       {
8697         Lisp_Object attrs;
8698
8699         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
8700         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
8701             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
8702           {
8703             ASET (attrs, coding_attr_trans_tbl,
8704                   get_translation_table (attrs, 1, NULL));
8705             coding_attrs_list = Fcons (attrs, coding_attrs_list);
8706           }
8707       }
8708
8709   if (STRINGP (start))
8710     p = pbeg = SDATA (start);
8711   else
8712     p = pbeg = BYTE_POS_ADDR (start_byte);
8713   pend = p + (end_byte - start_byte);
8714
8715   while (p < pend && ASCII_BYTE_P (*p)) p++;
8716   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8717
8718   work_table = Fmake_char_table (Qnil, Qnil);
8719   while (p < pend)
8720     {
8721       if (ASCII_BYTE_P (*p))
8722         p++;
8723       else
8724         {
8725           c = STRING_CHAR_ADVANCE (p);
8726           if (!NILP (char_table_ref (work_table, c)))
8727             /* This character was already checked.  Ignore it.  */
8728             continue;
8729
8730           charset_map_loaded = 0;
8731           for (tail = coding_attrs_list; CONSP (tail);)
8732             {
8733               elt = XCAR (tail);
8734               if (NILP (elt))
8735                 tail = XCDR (tail);
8736               else if (char_encodable_p (c, elt))
8737                 tail = XCDR (tail);
8738               else if (CONSP (XCDR (tail)))
8739                 {
8740                   XSETCAR (tail, XCAR (XCDR (tail)));
8741                   XSETCDR (tail, XCDR (XCDR (tail)));
8742                 }
8743               else
8744                 {
8745                   XSETCAR (tail, Qnil);
8746                   tail = XCDR (tail);
8747                 }
8748             }
8749           if (charset_map_loaded)
8750             {
8751               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8752
8753               if (STRINGP (start))
8754                 pbeg = SDATA (start);
8755               else
8756                 pbeg = BYTE_POS_ADDR (start_byte);
8757               p = pbeg + p_offset;
8758               pend = pbeg + pend_offset;
8759             }
8760           char_table_set (work_table, c, Qt);
8761         }
8762     }
8763
8764   safe_codings = list2 (Qraw_text, Qno_conversion);
8765   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
8766     if (! NILP (XCAR (tail)))
8767       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
8768
8769   return safe_codings;
8770 }
8771
8772
8773 DEFUN ("unencodable-char-position", Funencodable_char_position,
8774        Sunencodable_char_position, 3, 5, 0,
8775        doc: /*
8776 Return position of first un-encodable character in a region.
8777 START and END specify the region and CODING-SYSTEM specifies the
8778 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
8779
8780 If optional 4th argument COUNT is non-nil, it specifies at most how
8781 many un-encodable characters to search.  In this case, the value is a
8782 list of positions.
8783
8784 If optional 5th argument STRING is non-nil, it is a string to search
8785 for un-encodable characters.  In that case, START and END are indexes
8786 to the string.  */)
8787      (start, end, coding_system, count, string)
8788      Lisp_Object start, end, coding_system, count, string;
8789 {
8790   int n;
8791   struct coding_system coding;
8792   Lisp_Object attrs, charset_list, translation_table;
8793   Lisp_Object positions;
8794   int from, to;
8795   const unsigned char *p, *stop, *pend;
8796   int ascii_compatible;
8797
8798   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
8799   attrs = CODING_ID_ATTRS (coding.id);
8800   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
8801     return Qnil;
8802   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
8803   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8804   translation_table = get_translation_table (attrs, 1, NULL);
8805
8806   if (NILP (string))
8807     {
8808       validate_region (&start, &end);
8809       from = XINT (start);
8810       to = XINT (end);
8811       if (NILP (current_buffer->enable_multibyte_characters)
8812           || (ascii_compatible
8813               && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
8814         return Qnil;
8815       p = CHAR_POS_ADDR (from);
8816       pend = CHAR_POS_ADDR (to);
8817       if (from < GPT && to >= GPT)
8818         stop = GPT_ADDR;
8819       else
8820         stop = pend;
8821     }
8822   else
8823     {
8824       CHECK_STRING (string);
8825       CHECK_NATNUM (start);
8826       CHECK_NATNUM (end);
8827       from = XINT (start);
8828       to = XINT (end);
8829       if (from > to
8830           || to > SCHARS (string))
8831         args_out_of_range_3 (string, start, end);
8832       if (! STRING_MULTIBYTE (string))
8833         return Qnil;
8834       p = SDATA (string) + string_char_to_byte (string, from);
8835       stop = pend = SDATA (string) + string_char_to_byte (string, to);
8836       if (ascii_compatible && (to - from) == (pend - p))
8837         return Qnil;
8838     }
8839
8840   if (NILP (count))
8841     n = 1;
8842   else
8843     {
8844       CHECK_NATNUM (count);
8845       n = XINT (count);
8846     }
8847
8848   positions = Qnil;
8849   while (1)
8850     {
8851       int c;
8852
8853       if (ascii_compatible)
8854         while (p < stop && ASCII_BYTE_P (*p))
8855           p++, from++;
8856       if (p >= stop)
8857         {
8858           if (p >= pend)
8859             break;
8860           stop = pend;
8861           p = GAP_END_ADDR;
8862         }
8863
8864       c = STRING_CHAR_ADVANCE (p);
8865       if (! (ASCII_CHAR_P (c) && ascii_compatible)
8866           && ! char_charset (translate_char (translation_table, c),
8867                              charset_list, NULL))
8868         {
8869           positions = Fcons (make_number (from), positions);
8870           n--;
8871           if (n == 0)
8872             break;
8873         }
8874
8875       from++;
8876     }
8877
8878   return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
8879 }
8880
8881
8882 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
8883        Scheck_coding_systems_region, 3, 3, 0,
8884        doc: /* Check if the region is encodable by coding systems.
8885
8886 START and END are buffer positions specifying the region.
8887 CODING-SYSTEM-LIST is a list of coding systems to check.
8888
8889 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
8890 CODING-SYSTEM is a member of CODING-SYSTEM-LIST and can't encode the
8891 whole region, POS0, POS1, ... are buffer positions where non-encodable
8892 characters are found.
8893
8894 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
8895 value is nil.
8896
8897 START may be a string.  In that case, check if the string is
8898 encodable, and the value contains indices to the string instead of
8899 buffer positions.  END is ignored.
8900
8901 If the current buffer (or START if it is a string) is unibyte, the value
8902 is nil.  */)
8903      (start, end, coding_system_list)
8904      Lisp_Object start, end, coding_system_list;
8905 {
8906   Lisp_Object list;
8907   EMACS_INT start_byte, end_byte;
8908   int pos;
8909   const unsigned char *p, *pbeg, *pend;
8910   int c;
8911   Lisp_Object tail, elt, attrs;
8912
8913   if (STRINGP (start))
8914     {
8915       if (!STRING_MULTIBYTE (start)
8916           || SCHARS (start) == SBYTES (start))
8917         return Qnil;
8918       start_byte = 0;
8919       end_byte = SBYTES (start);
8920       pos = 0;
8921     }
8922   else
8923     {
8924       CHECK_NUMBER_COERCE_MARKER (start);
8925       CHECK_NUMBER_COERCE_MARKER (end);
8926       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8927         args_out_of_range (start, end);
8928       if (NILP (current_buffer->enable_multibyte_characters))
8929         return Qnil;
8930       start_byte = CHAR_TO_BYTE (XINT (start));
8931       end_byte = CHAR_TO_BYTE (XINT (end));
8932       if (XINT (end) - XINT (start) == end_byte - start_byte)
8933         return Qnil;
8934
8935       if (XINT (start) < GPT && XINT (end) > GPT)
8936         {
8937           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8938             move_gap_both (XINT (start), start_byte);
8939           else
8940             move_gap_both (XINT (end), end_byte);
8941         }
8942       pos = XINT (start);
8943     }
8944
8945   list = Qnil;
8946   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
8947     {
8948       elt = XCAR (tail);
8949       attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
8950       ASET (attrs, coding_attr_trans_tbl,
8951             get_translation_table (attrs, 1, NULL));
8952       list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
8953     }
8954
8955   if (STRINGP (start))
8956     p = pbeg = SDATA (start);
8957   else
8958     p = pbeg = BYTE_POS_ADDR (start_byte);
8959   pend = p + (end_byte - start_byte);
8960
8961   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
8962   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8963
8964   while (p < pend)
8965     {
8966       if (ASCII_BYTE_P (*p))
8967         p++;
8968       else
8969         {
8970           c = STRING_CHAR_ADVANCE (p);
8971
8972           charset_map_loaded = 0;
8973           for (tail = list; CONSP (tail); tail = XCDR (tail))
8974             {
8975               elt = XCDR (XCAR (tail));
8976               if (! char_encodable_p (c, XCAR (elt)))
8977                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
8978             }
8979           if (charset_map_loaded)
8980             {
8981               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8982
8983               if (STRINGP (start))
8984                 pbeg = SDATA (start);
8985               else
8986                 pbeg = BYTE_POS_ADDR (start_byte);
8987               p = pbeg + p_offset;
8988               pend = pbeg + pend_offset;
8989             }
8990         }
8991       pos++;
8992     }
8993
8994   tail = list;
8995   list = Qnil;
8996   for (; CONSP (tail); tail = XCDR (tail))
8997     {
8998       elt = XCAR (tail);
8999       if (CONSP (XCDR (XCDR (elt))))
9000         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
9001                       list);
9002     }
9003
9004   return list;
9005 }
9006
9007
9008 Lisp_Object
9009 code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
9010      Lisp_Object start, end, coding_system, dst_object;
9011      int encodep, norecord;
9012 {
9013   struct coding_system coding;
9014   EMACS_INT from, from_byte, to, to_byte;
9015   Lisp_Object src_object;
9016
9017   CHECK_NUMBER_COERCE_MARKER (start);
9018   CHECK_NUMBER_COERCE_MARKER (end);
9019   if (NILP (coding_system))
9020     coding_system = Qno_conversion;
9021   else
9022     CHECK_CODING_SYSTEM (coding_system);
9023   src_object = Fcurrent_buffer ();
9024   if (NILP (dst_object))
9025     dst_object = src_object;
9026   else if (! EQ (dst_object, Qt))
9027     CHECK_BUFFER (dst_object);
9028
9029   validate_region (&start, &end);
9030   from = XFASTINT (start);
9031   from_byte = CHAR_TO_BYTE (from);
9032   to = XFASTINT (end);
9033   to_byte = CHAR_TO_BYTE (to);
9034
9035   setup_coding_system (coding_system, &coding);
9036   coding.mode |= CODING_MODE_LAST_BLOCK;
9037
9038   if (encodep)
9039     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
9040                           dst_object);
9041   else
9042     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
9043                           dst_object);
9044   if (! norecord)
9045     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
9046
9047   return (BUFFERP (dst_object)
9048           ? make_number (coding.produced_char)
9049           : coding.dst_object);
9050 }
9051
9052
9053 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
9054        3, 4, "r\nzCoding system: ",
9055        doc: /* Decode the current region from the specified coding system.
9056 When called from a program, takes four arguments:
9057         START, END, CODING-SYSTEM, and DESTINATION.
9058 START and END are buffer positions.
9059
9060 Optional 4th arguments DESTINATION specifies where the decoded text goes.
9061 If nil, the region between START and END is replaced by the decoded text.
9062 If buffer, the decoded text is inserted in that buffer after point (point
9063 does not move).
9064 In those cases, the length of the decoded text is returned.
9065 If DESTINATION is t, the decoded text is returned.
9066
9067 This function sets `last-coding-system-used' to the precise coding system
9068 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9069 not fully specified.)  */)
9070      (start, end, coding_system, destination)
9071      Lisp_Object start, end, coding_system, destination;
9072 {
9073   return code_convert_region (start, end, coding_system, destination, 0, 0);
9074 }
9075
9076 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
9077        3, 4, "r\nzCoding system: ",
9078        doc: /* Encode the current region by specified coding system.
9079 When called from a program, takes four arguments:
9080         START, END, CODING-SYSTEM and DESTINATION.
9081 START and END are buffer positions.
9082
9083 Optional 4th arguments DESTINATION specifies where the encoded text goes.
9084 If nil, the region between START and END is replace by the encoded text.
9085 If buffer, the encoded text is inserted in that buffer after point (point
9086 does not move).
9087 In those cases, the length of the encoded text is returned.
9088 If DESTINATION is t, the encoded text is returned.
9089
9090 This function sets `last-coding-system-used' to the precise coding system
9091 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9092 not fully specified.)  */)
9093   (start, end, coding_system, destination)
9094      Lisp_Object start, end, coding_system, destination;
9095 {
9096   return code_convert_region (start, end, coding_system, destination, 1, 0);
9097 }
9098
9099 Lisp_Object
9100 code_convert_string (string, coding_system, dst_object,
9101                      encodep, nocopy, norecord)
9102      Lisp_Object string, coding_system, dst_object;
9103      int encodep, nocopy, norecord;
9104 {
9105   struct coding_system coding;
9106   EMACS_INT chars, bytes;
9107
9108   CHECK_STRING (string);
9109   if (NILP (coding_system))
9110     {
9111       if (! norecord)
9112         Vlast_coding_system_used = Qno_conversion;
9113       if (NILP (dst_object))
9114         return (nocopy ? Fcopy_sequence (string) : string);
9115     }
9116
9117   if (NILP (coding_system))
9118     coding_system = Qno_conversion;
9119   else
9120     CHECK_CODING_SYSTEM (coding_system);
9121   if (NILP (dst_object))
9122     dst_object = Qt;
9123   else if (! EQ (dst_object, Qt))
9124     CHECK_BUFFER (dst_object);
9125
9126   setup_coding_system (coding_system, &coding);
9127   coding.mode |= CODING_MODE_LAST_BLOCK;
9128   chars = SCHARS (string);
9129   bytes = SBYTES (string);
9130   if (encodep)
9131     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9132   else
9133     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9134   if (! norecord)
9135     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
9136
9137   return (BUFFERP (dst_object)
9138           ? make_number (coding.produced_char)
9139           : coding.dst_object);
9140 }
9141
9142
9143 /* Encode or decode STRING according to CODING_SYSTEM.
9144    Do not set Vlast_coding_system_used.
9145
9146    This function is called only from macros DECODE_FILE and
9147    ENCODE_FILE, thus we ignore character composition.  */
9148
9149 Lisp_Object
9150 code_convert_string_norecord (string, coding_system, encodep)
9151      Lisp_Object string, coding_system;
9152      int encodep;
9153 {
9154   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
9155 }
9156
9157
9158 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
9159        2, 4, 0,
9160        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
9161
9162 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
9163 if the decoding operation is trivial.
9164
9165 Optional fourth arg BUFFER non-nil means that the decoded text is
9166 inserted in that buffer after point (point does not move).  In this
9167 case, the return value is the length of the decoded text.
9168
9169 This function sets `last-coding-system-used' to the precise coding system
9170 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9171 not fully specified.)  */)
9172   (string, coding_system, nocopy, buffer)
9173      Lisp_Object string, coding_system, nocopy, buffer;
9174 {
9175   return code_convert_string (string, coding_system, buffer,
9176                               0, ! NILP (nocopy), 0);
9177 }
9178
9179 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
9180        2, 4, 0,
9181        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
9182
9183 Optional third arg NOCOPY non-nil means it is OK to return STRING
9184 itself if the encoding operation is trivial.
9185
9186 Optional fourth arg BUFFER non-nil means that the encoded text is
9187 inserted in that buffer after point (point does not move).  In this
9188 case, the return value is the length of the encoded text.
9189
9190 This function sets `last-coding-system-used' to the precise coding system
9191 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9192 not fully specified.)  */)
9193      (string, coding_system, nocopy, buffer)
9194      Lisp_Object string, coding_system, nocopy, buffer;
9195 {
9196   return code_convert_string (string, coding_system, buffer,
9197                               1, ! NILP (nocopy), 1);
9198 }
9199
9200 \f
9201 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
9202        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
9203 Return the corresponding character.  */)
9204      (code)
9205      Lisp_Object code;
9206 {
9207   Lisp_Object spec, attrs, val;
9208   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
9209   int c;
9210
9211   CHECK_NATNUM (code);
9212   c = XFASTINT (code);
9213   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9214   attrs = AREF (spec, 0);
9215
9216   if (ASCII_BYTE_P (c)
9217       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9218     return code;
9219
9220   val = CODING_ATTR_CHARSET_LIST (attrs);
9221   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9222   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9223   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
9224
9225   if (c <= 0x7F)
9226     charset = charset_roman;
9227   else if (c >= 0xA0 && c < 0xDF)
9228     {
9229       charset = charset_kana;
9230       c -= 0x80;
9231     }
9232   else
9233     {
9234       int s1 = c >> 8, s2 = c & 0xFF;
9235
9236       if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
9237           || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
9238         error ("Invalid code: %d", code);
9239       SJIS_TO_JIS (c);
9240       charset = charset_kanji;
9241     }
9242   c = DECODE_CHAR (charset, c);
9243   if (c < 0)
9244     error ("Invalid code: %d", code);
9245   return make_number (c);
9246 }
9247
9248
9249 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
9250        doc: /* Encode a Japanese character CH to shift_jis encoding.
9251 Return the corresponding code in SJIS.  */)
9252      (ch)
9253     Lisp_Object ch;
9254 {
9255   Lisp_Object spec, attrs, charset_list;
9256   int c;
9257   struct charset *charset;
9258   unsigned code;
9259
9260   CHECK_CHARACTER (ch);
9261   c = XFASTINT (ch);
9262   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9263   attrs = AREF (spec, 0);
9264
9265   if (ASCII_CHAR_P (c)
9266       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9267     return ch;
9268
9269   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9270   charset = char_charset (c, charset_list, &code);
9271   if (code == CHARSET_INVALID_CODE (charset))
9272     error ("Can't encode by shift_jis encoding: %d", c);
9273   JIS_TO_SJIS (code);
9274
9275   return make_number (code);
9276 }
9277
9278 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
9279        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
9280 Return the corresponding character.  */)
9281      (code)
9282      Lisp_Object code;
9283 {
9284   Lisp_Object spec, attrs, val;
9285   struct charset *charset_roman, *charset_big5, *charset;
9286   int c;
9287
9288   CHECK_NATNUM (code);
9289   c = XFASTINT (code);
9290   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9291   attrs = AREF (spec, 0);
9292
9293   if (ASCII_BYTE_P (c)
9294       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9295     return code;
9296
9297   val = CODING_ATTR_CHARSET_LIST (attrs);
9298   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9299   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
9300
9301   if (c <= 0x7F)
9302     charset = charset_roman;
9303   else
9304     {
9305       int b1 = c >> 8, b2 = c & 0x7F;
9306       if (b1 < 0xA1 || b1 > 0xFE
9307           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
9308         error ("Invalid code: %d", code);
9309       charset = charset_big5;
9310     }
9311   c = DECODE_CHAR (charset, (unsigned )c);
9312   if (c < 0)
9313     error ("Invalid code: %d", code);
9314   return make_number (c);
9315 }
9316
9317 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
9318        doc: /* Encode the Big5 character CH to BIG5 coding system.
9319 Return the corresponding character code in Big5.  */)
9320      (ch)
9321      Lisp_Object ch;
9322 {
9323   Lisp_Object spec, attrs, charset_list;
9324   struct charset *charset;
9325   int c;
9326   unsigned code;
9327
9328   CHECK_CHARACTER (ch);
9329   c = XFASTINT (ch);
9330   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9331   attrs = AREF (spec, 0);
9332   if (ASCII_CHAR_P (c)
9333       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9334     return ch;
9335
9336   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9337   charset = char_charset (c, charset_list, &code);
9338   if (code == CHARSET_INVALID_CODE (charset))
9339     error ("Can't encode by Big5 encoding: %d", c);
9340
9341   return make_number (code);
9342 }
9343
9344 \f
9345 DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
9346        Sset_terminal_coding_system_internal, 1, 2, 0,
9347        doc: /* Internal use only.  */)
9348      (coding_system, terminal)
9349      Lisp_Object coding_system;
9350      Lisp_Object terminal;
9351 {
9352   struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
9353   CHECK_SYMBOL (coding_system);
9354   setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
9355   /* We had better not send unsafe characters to terminal.  */
9356   terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
9357   /* Characer composition should be disabled.  */
9358   terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9359   terminal_coding->src_multibyte = 1;
9360   terminal_coding->dst_multibyte = 0;
9361   return Qnil;
9362 }
9363
9364 DEFUN ("set-safe-terminal-coding-system-internal",
9365        Fset_safe_terminal_coding_system_internal,
9366        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
9367        doc: /* Internal use only.  */)
9368      (coding_system)
9369      Lisp_Object coding_system;
9370 {
9371   CHECK_SYMBOL (coding_system);
9372   setup_coding_system (Fcheck_coding_system (coding_system),
9373                        &safe_terminal_coding);
9374   /* Characer composition should be disabled.  */
9375   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9376   safe_terminal_coding.src_multibyte = 1;
9377   safe_terminal_coding.dst_multibyte = 0;
9378   return Qnil;
9379 }
9380
9381 DEFUN ("terminal-coding-system", Fterminal_coding_system,
9382        Sterminal_coding_system, 0, 1, 0,
9383        doc: /* Return coding system specified for terminal output on the given terminal.
9384 TERMINAL may be a terminal object, a frame, or nil for the selected
9385 frame's terminal device.  */)
9386      (terminal)
9387      Lisp_Object terminal;
9388 {
9389   struct coding_system *terminal_coding
9390     = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
9391   Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
9392
9393   /* For backward compatibility, return nil if it is `undecided'. */
9394   return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
9395 }
9396
9397 DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
9398        Sset_keyboard_coding_system_internal, 1, 2, 0,
9399        doc: /* Internal use only.  */)
9400      (coding_system, terminal)
9401      Lisp_Object coding_system;
9402      Lisp_Object terminal;
9403 {
9404   struct terminal *t = get_terminal (terminal, 1);
9405   CHECK_SYMBOL (coding_system);
9406   if (NILP (coding_system))
9407     coding_system = Qno_conversion;
9408   else
9409     Fcheck_coding_system (coding_system);
9410   setup_coding_system (coding_system, TERMINAL_KEYBOARD_CODING (t));
9411   /* Characer composition should be disabled.  */
9412   TERMINAL_KEYBOARD_CODING (t)->common_flags
9413     &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9414   return Qnil;
9415 }
9416
9417 DEFUN ("keyboard-coding-system",
9418        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 1, 0,
9419        doc: /* Return coding system specified for decoding keyboard input.  */)
9420      (terminal)
9421      Lisp_Object terminal;
9422 {
9423   return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING
9424                          (get_terminal (terminal, 1))->id);
9425 }
9426
9427 \f
9428 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
9429        Sfind_operation_coding_system,  1, MANY, 0,
9430        doc: /* Choose a coding system for an operation based on the target name.
9431 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
9432 DECODING-SYSTEM is the coding system to use for decoding
9433 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
9434 for encoding (in case OPERATION does encoding).
9435
9436 The first argument OPERATION specifies an I/O primitive:
9437   For file I/O, `insert-file-contents' or `write-region'.
9438   For process I/O, `call-process', `call-process-region', or `start-process'.
9439   For network I/O, `open-network-stream'.
9440
9441 The remaining arguments should be the same arguments that were passed
9442 to the primitive.  Depending on which primitive, one of those arguments
9443 is selected as the TARGET.  For example, if OPERATION does file I/O,
9444 whichever argument specifies the file name is TARGET.
9445
9446 TARGET has a meaning which depends on OPERATION:
9447   For file I/O, TARGET is a file name (except for the special case below).
9448   For process I/O, TARGET is a process name.
9449   For network I/O, TARGET is a service name or a port number.
9450
9451 This function looks up what is specified for TARGET in
9452 `file-coding-system-alist', `process-coding-system-alist',
9453 or `network-coding-system-alist' depending on OPERATION.
9454 They may specify a coding system, a cons of coding systems,
9455 or a function symbol to call.
9456 In the last case, we call the function with one argument,
9457 which is a list of all the arguments given to this function.
9458 If the function can't decide a coding system, it can return
9459 `undecided' so that the normal code-detection is performed.
9460
9461 If OPERATION is `insert-file-contents', the argument corresponding to
9462 TARGET may be a cons (FILENAME . BUFFER).  In that case, FILENAME is a
9463 file name to look up, and BUFFER is a buffer that contains the file's
9464 contents (not yet decoded).  If `file-coding-system-alist' specifies a
9465 function to call for FILENAME, that function should examine the
9466 contents of BUFFER instead of reading the file.
9467
9468 usage: (find-operation-coding-system OPERATION ARGUMENTS...)  */)
9469      (nargs, args)
9470      int nargs;
9471      Lisp_Object *args;
9472 {
9473   Lisp_Object operation, target_idx, target, val;
9474   register Lisp_Object chain;
9475
9476   if (nargs < 2)
9477     error ("Too few arguments");
9478   operation = args[0];
9479   if (!SYMBOLP (operation)
9480       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
9481     error ("Invalid first argument");
9482   if (nargs < 1 + XINT (target_idx))
9483     error ("Too few arguments for operation: %s",
9484            SDATA (SYMBOL_NAME (operation)));
9485   target = args[XINT (target_idx) + 1];
9486   if (!(STRINGP (target)
9487         || (EQ (operation, Qinsert_file_contents) && CONSP (target)
9488             && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
9489         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
9490     error ("Invalid %dth argument", XINT (target_idx) + 1);
9491   if (CONSP (target))
9492     target = XCAR (target);
9493
9494   chain = ((EQ (operation, Qinsert_file_contents)
9495             || EQ (operation, Qwrite_region))
9496            ? Vfile_coding_system_alist
9497            : (EQ (operation, Qopen_network_stream)
9498               ? Vnetwork_coding_system_alist
9499               : Vprocess_coding_system_alist));
9500   if (NILP (chain))
9501     return Qnil;
9502
9503   for (; CONSP (chain); chain = XCDR (chain))
9504     {
9505       Lisp_Object elt;
9506
9507       elt = XCAR (chain);
9508       if (CONSP (elt)
9509           && ((STRINGP (target)
9510                && STRINGP (XCAR (elt))
9511                && fast_string_match (XCAR (elt), target) >= 0)
9512               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
9513         {
9514           val = XCDR (elt);
9515           /* Here, if VAL is both a valid coding system and a valid
9516              function symbol, we return VAL as a coding system.  */
9517           if (CONSP (val))
9518             return val;
9519           if (! SYMBOLP (val))
9520             return Qnil;
9521           if (! NILP (Fcoding_system_p (val)))
9522             return Fcons (val, val);
9523           if (! NILP (Ffboundp (val)))
9524             {
9525               /* We use call1 rather than safe_call1
9526                  so as to get bug reports about functions called here
9527                  which don't handle the current interface.  */
9528               val = call1 (val, Flist (nargs, args));
9529               if (CONSP (val))
9530                 return val;
9531               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
9532                 return Fcons (val, val);
9533             }
9534           return Qnil;
9535         }
9536     }
9537   return Qnil;
9538 }
9539
9540 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
9541        Sset_coding_system_priority, 0, MANY, 0,
9542        doc: /* Assign higher priority to the coding systems given as arguments.
9543 If multiple coding systems belong to the same category,
9544 all but the first one are ignored.
9545
9546 usage: (set-coding-system-priority &rest coding-systems)  */)
9547      (nargs, args)
9548      int nargs;
9549      Lisp_Object *args;
9550 {
9551   int i, j;
9552   int changed[coding_category_max];
9553   enum coding_category priorities[coding_category_max];
9554
9555   bzero (changed, sizeof changed);
9556
9557   for (i = j = 0; i < nargs; i++)
9558     {
9559       enum coding_category category;
9560       Lisp_Object spec, attrs;
9561
9562       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
9563       attrs = AREF (spec, 0);
9564       category = XINT (CODING_ATTR_CATEGORY (attrs));
9565       if (changed[category])
9566         /* Ignore this coding system because a coding system of the
9567            same category already had a higher priority.  */
9568         continue;
9569       changed[category] = 1;
9570       priorities[j++] = category;
9571       if (coding_categories[category].id >= 0
9572           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
9573         setup_coding_system (args[i], &coding_categories[category]);
9574       Fset (AREF (Vcoding_category_table, category), args[i]);
9575     }
9576
9577   /* Now we have decided top J priorities.  Reflect the order of the
9578      original priorities to the remaining priorities.  */
9579
9580   for (i = j, j = 0; i < coding_category_max; i++, j++)
9581     {
9582       while (j < coding_category_max
9583              && changed[coding_priorities[j]])
9584         j++;
9585       if (j == coding_category_max)
9586         abort ();
9587       priorities[i] = coding_priorities[j];
9588     }
9589
9590   bcopy (priorities, coding_priorities, sizeof priorities);
9591
9592   /* Update `coding-category-list'.  */
9593   Vcoding_category_list = Qnil;
9594   for (i = coding_category_max - 1; i >= 0; i--)
9595     Vcoding_category_list
9596       = Fcons (AREF (Vcoding_category_table, priorities[i]),
9597                Vcoding_category_list);
9598
9599   return Qnil;
9600 }
9601
9602 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
9603        Scoding_system_priority_list, 0, 1, 0,
9604        doc: /* Return a list of coding systems ordered by their priorities.
9605 The list contains a subset of coding systems; i.e. coding systems
9606 assigned to each coding category (see `coding-category-list').
9607
9608 HIGHESTP non-nil means just return the highest priority one.  */)
9609      (highestp)
9610      Lisp_Object highestp;
9611 {
9612   int i;
9613   Lisp_Object val;
9614
9615   for (i = 0, val = Qnil; i < coding_category_max; i++)
9616     {
9617       enum coding_category category = coding_priorities[i];
9618       int id = coding_categories[category].id;
9619       Lisp_Object attrs;
9620
9621       if (id < 0)
9622         continue;
9623       attrs = CODING_ID_ATTRS (id);
9624       if (! NILP (highestp))
9625         return CODING_ATTR_BASE_NAME (attrs);
9626       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
9627     }
9628   return Fnreverse (val);
9629 }
9630
9631 static const char *const suffixes[] = { "-unix", "-dos", "-mac" };
9632
9633 static Lisp_Object
9634 make_subsidiaries (base)
9635      Lisp_Object base;
9636 {
9637   Lisp_Object subsidiaries;
9638   int base_name_len = SBYTES (SYMBOL_NAME (base));
9639   char *buf = (char *) alloca (base_name_len + 6);
9640   int i;
9641
9642   bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
9643   subsidiaries = Fmake_vector (make_number (3), Qnil);
9644   for (i = 0; i < 3; i++)
9645     {
9646       bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
9647       ASET (subsidiaries, i, intern (buf));
9648     }
9649   return subsidiaries;
9650 }
9651
9652
9653 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
9654        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
9655        doc: /* For internal use only.
9656 usage: (define-coding-system-internal ...)  */)
9657      (nargs, args)
9658      int nargs;
9659      Lisp_Object *args;
9660 {
9661   Lisp_Object name;
9662   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
9663   Lisp_Object attrs;            /* Vector of attributes.  */
9664   Lisp_Object eol_type;
9665   Lisp_Object aliases;
9666   Lisp_Object coding_type, charset_list, safe_charsets;
9667   enum coding_category category;
9668   Lisp_Object tail, val;
9669   int max_charset_id = 0;
9670   int i;
9671
9672   if (nargs < coding_arg_max)
9673     goto short_args;
9674
9675   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
9676
9677   name = args[coding_arg_name];
9678   CHECK_SYMBOL (name);
9679   CODING_ATTR_BASE_NAME (attrs) = name;
9680
9681   val = args[coding_arg_mnemonic];
9682   if (! STRINGP (val))
9683     CHECK_CHARACTER (val);
9684   CODING_ATTR_MNEMONIC (attrs) = val;
9685
9686   coding_type = args[coding_arg_coding_type];
9687   CHECK_SYMBOL (coding_type);
9688   CODING_ATTR_TYPE (attrs) = coding_type;
9689
9690   charset_list = args[coding_arg_charset_list];
9691   if (SYMBOLP (charset_list))
9692     {
9693       if (EQ (charset_list, Qiso_2022))
9694         {
9695           if (! EQ (coding_type, Qiso_2022))
9696             error ("Invalid charset-list");
9697           charset_list = Viso_2022_charset_list;
9698         }
9699       else if (EQ (charset_list, Qemacs_mule))
9700         {
9701           if (! EQ (coding_type, Qemacs_mule))
9702             error ("Invalid charset-list");
9703           charset_list = Vemacs_mule_charset_list;
9704         }
9705       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9706         if (max_charset_id < XFASTINT (XCAR (tail)))
9707           max_charset_id = XFASTINT (XCAR (tail));
9708     }
9709   else
9710     {
9711       charset_list = Fcopy_sequence (charset_list);
9712       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9713         {
9714           struct charset *charset;
9715
9716           val = XCAR (tail);
9717           CHECK_CHARSET_GET_CHARSET (val, charset);
9718           if (EQ (coding_type, Qiso_2022)
9719               ? CHARSET_ISO_FINAL (charset) < 0
9720               : EQ (coding_type, Qemacs_mule)
9721               ? CHARSET_EMACS_MULE_ID (charset) < 0
9722               : 0)
9723             error ("Can't handle charset `%s'",
9724                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9725
9726           XSETCAR (tail, make_number (charset->id));
9727           if (max_charset_id < charset->id)
9728             max_charset_id = charset->id;
9729         }
9730     }
9731   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
9732
9733   safe_charsets = make_uninit_string (max_charset_id + 1);
9734   memset (SDATA (safe_charsets), 255, max_charset_id + 1);
9735   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9736     SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
9737   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
9738
9739   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
9740
9741   val = args[coding_arg_decode_translation_table];
9742   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9743     CHECK_SYMBOL (val);
9744   CODING_ATTR_DECODE_TBL (attrs) = val;
9745
9746   val = args[coding_arg_encode_translation_table];
9747   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9748     CHECK_SYMBOL (val);
9749   CODING_ATTR_ENCODE_TBL (attrs) = val;
9750
9751   val = args[coding_arg_post_read_conversion];
9752   CHECK_SYMBOL (val);
9753   CODING_ATTR_POST_READ (attrs) = val;
9754
9755   val = args[coding_arg_pre_write_conversion];
9756   CHECK_SYMBOL (val);
9757   CODING_ATTR_PRE_WRITE (attrs) = val;
9758
9759   val = args[coding_arg_default_char];
9760   if (NILP (val))
9761     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
9762   else
9763     {
9764       CHECK_CHARACTER (val);
9765       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9766     }
9767
9768   val = args[coding_arg_for_unibyte];
9769   CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
9770
9771   val = args[coding_arg_plist];
9772   CHECK_LIST (val);
9773   CODING_ATTR_PLIST (attrs) = val;
9774
9775   if (EQ (coding_type, Qcharset))
9776     {
9777       /* Generate a lisp vector of 256 elements.  Each element is nil,
9778          integer, or a list of charset IDs.
9779
9780          If Nth element is nil, the byte code N is invalid in this
9781          coding system.
9782
9783          If Nth element is a number NUM, N is the first byte of a
9784          charset whose ID is NUM.
9785
9786          If Nth element is a list of charset IDs, N is the first byte
9787          of one of them.  The list is sorted by dimensions of the
9788          charsets.  A charset of smaller dimension comes firtst. */
9789       val = Fmake_vector (make_number (256), Qnil);
9790
9791       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9792         {
9793           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
9794           int dim = CHARSET_DIMENSION (charset);
9795           int idx = (dim - 1) * 4;
9796
9797           if (CHARSET_ASCII_COMPATIBLE_P (charset))
9798             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9799
9800           for (i = charset->code_space[idx];
9801                i <= charset->code_space[idx + 1]; i++)
9802             {
9803               Lisp_Object tmp, tmp2;
9804               int dim2;
9805
9806               tmp = AREF (val, i);
9807               if (NILP (tmp))
9808                 tmp = XCAR (tail);
9809               else if (NUMBERP (tmp))
9810                 {
9811                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
9812                   if (dim < dim2)
9813                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
9814                   else
9815                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
9816                 }
9817               else
9818                 {
9819                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
9820                     {
9821                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
9822                       if (dim < dim2)
9823                         break;
9824                     }
9825                   if (NILP (tmp2))
9826                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
9827                   else
9828                     {
9829                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
9830                       XSETCAR (tmp2, XCAR (tail));
9831                     }
9832                 }
9833               ASET (val, i, tmp);
9834             }
9835         }
9836       ASET (attrs, coding_attr_charset_valids, val);
9837       category = coding_category_charset;
9838     }
9839   else if (EQ (coding_type, Qccl))
9840     {
9841       Lisp_Object valids;
9842
9843       if (nargs < coding_arg_ccl_max)
9844         goto short_args;
9845
9846       val = args[coding_arg_ccl_decoder];
9847       CHECK_CCL_PROGRAM (val);
9848       if (VECTORP (val))
9849         val = Fcopy_sequence (val);
9850       ASET (attrs, coding_attr_ccl_decoder, val);
9851
9852       val = args[coding_arg_ccl_encoder];
9853       CHECK_CCL_PROGRAM (val);
9854       if (VECTORP (val))
9855         val = Fcopy_sequence (val);
9856       ASET (attrs, coding_attr_ccl_encoder, val);
9857
9858       val = args[coding_arg_ccl_valids];
9859       valids = Fmake_string (make_number (256), make_number (0));
9860       for (tail = val; !NILP (tail); tail = Fcdr (tail))
9861         {
9862           int from, to;
9863
9864           val = Fcar (tail);
9865           if (INTEGERP (val))
9866             {
9867               from = to = XINT (val);
9868               if (from < 0 || from > 255)
9869                 args_out_of_range_3 (val, make_number (0), make_number (255));
9870             }
9871           else
9872             {
9873               CHECK_CONS (val);
9874               CHECK_NATNUM_CAR (val);
9875               CHECK_NATNUM_CDR (val);
9876               from = XINT (XCAR (val));
9877               if (from > 255)
9878                 args_out_of_range_3 (XCAR (val),
9879                                      make_number (0), make_number (255));
9880               to = XINT (XCDR (val));
9881               if (to < from || to > 255)
9882                 args_out_of_range_3 (XCDR (val),
9883                                      XCAR (val), make_number (255));
9884             }
9885           for (i = from; i <= to; i++)
9886             SSET (valids, i, 1);
9887         }
9888       ASET (attrs, coding_attr_ccl_valids, valids);
9889
9890       category = coding_category_ccl;
9891     }
9892   else if (EQ (coding_type, Qutf_16))
9893     {
9894       Lisp_Object bom, endian;
9895
9896       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9897
9898       if (nargs < coding_arg_utf16_max)
9899         goto short_args;
9900
9901       bom = args[coding_arg_utf16_bom];
9902       if (! NILP (bom) && ! EQ (bom, Qt))
9903         {
9904           CHECK_CONS (bom);
9905           val = XCAR (bom);
9906           CHECK_CODING_SYSTEM (val);
9907           val = XCDR (bom);
9908           CHECK_CODING_SYSTEM (val);
9909         }
9910       ASET (attrs, coding_attr_utf_bom, bom);
9911
9912       endian = args[coding_arg_utf16_endian];
9913       CHECK_SYMBOL (endian);
9914       if (NILP (endian))
9915         endian = Qbig;
9916       else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
9917         error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
9918       ASET (attrs, coding_attr_utf_16_endian, endian);
9919
9920       category = (CONSP (bom)
9921                   ? coding_category_utf_16_auto
9922                   : NILP (bom)
9923                   ? (EQ (endian, Qbig)
9924                      ? coding_category_utf_16_be_nosig
9925                      : coding_category_utf_16_le_nosig)
9926                   : (EQ (endian, Qbig)
9927                      ? coding_category_utf_16_be
9928                      : coding_category_utf_16_le));
9929     }
9930   else if (EQ (coding_type, Qiso_2022))
9931     {
9932       Lisp_Object initial, reg_usage, request, flags;
9933       int i;
9934
9935       if (nargs < coding_arg_iso2022_max)
9936         goto short_args;
9937
9938       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
9939       CHECK_VECTOR (initial);
9940       for (i = 0; i < 4; i++)
9941         {
9942           val = Faref (initial, make_number (i));
9943           if (! NILP (val))
9944             {
9945               struct charset *charset;
9946
9947               CHECK_CHARSET_GET_CHARSET (val, charset);
9948               ASET (initial, i, make_number (CHARSET_ID (charset)));
9949               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
9950                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9951             }
9952           else
9953             ASET (initial, i, make_number (-1));
9954         }
9955
9956       reg_usage = args[coding_arg_iso2022_reg_usage];
9957       CHECK_CONS (reg_usage);
9958       CHECK_NUMBER_CAR (reg_usage);
9959       CHECK_NUMBER_CDR (reg_usage);
9960
9961       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
9962       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
9963         {
9964           int id;
9965           Lisp_Object tmp;
9966
9967           val = Fcar (tail);
9968           CHECK_CONS (val);
9969           tmp = XCAR (val);
9970           CHECK_CHARSET_GET_ID (tmp, id);
9971           CHECK_NATNUM_CDR (val);
9972           if (XINT (XCDR (val)) >= 4)
9973             error ("Invalid graphic register number: %d", XINT (XCDR (val)));
9974           XSETCAR (val, make_number (id));
9975         }
9976
9977       flags = args[coding_arg_iso2022_flags];
9978       CHECK_NATNUM (flags);
9979       i = XINT (flags);
9980       if (EQ (args[coding_arg_charset_list], Qiso_2022))
9981         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
9982
9983       ASET (attrs, coding_attr_iso_initial, initial);
9984       ASET (attrs, coding_attr_iso_usage, reg_usage);
9985       ASET (attrs, coding_attr_iso_request, request);
9986       ASET (attrs, coding_attr_iso_flags, flags);
9987       setup_iso_safe_charsets (attrs);
9988
9989       if (i & CODING_ISO_FLAG_SEVEN_BITS)
9990         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
9991                           | CODING_ISO_FLAG_SINGLE_SHIFT))
9992                     ? coding_category_iso_7_else
9993                     : EQ (args[coding_arg_charset_list], Qiso_2022)
9994                     ? coding_category_iso_7
9995                     : coding_category_iso_7_tight);
9996       else
9997         {
9998           int id = XINT (AREF (initial, 1));
9999
10000           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
10001                        || EQ (args[coding_arg_charset_list], Qiso_2022)
10002                        || id < 0)
10003                       ? coding_category_iso_8_else
10004                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
10005                       ? coding_category_iso_8_1
10006                       : coding_category_iso_8_2);
10007         }
10008       if (category != coding_category_iso_8_1
10009           && category != coding_category_iso_8_2)
10010         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
10011     }
10012   else if (EQ (coding_type, Qemacs_mule))
10013     {
10014       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
10015         ASET (attrs, coding_attr_emacs_mule_full, Qt);
10016       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
10017       category = coding_category_emacs_mule;
10018     }
10019   else if (EQ (coding_type, Qshift_jis))
10020     {
10021
10022       struct charset *charset;
10023
10024       if (XINT (Flength (charset_list)) != 3
10025           && XINT (Flength (charset_list)) != 4)
10026         error ("There should be three or four charsets");
10027
10028       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10029       if (CHARSET_DIMENSION (charset) != 1)
10030         error ("Dimension of charset %s is not one",
10031                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10032       if (CHARSET_ASCII_COMPATIBLE_P (charset))
10033         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
10034
10035       charset_list = XCDR (charset_list);
10036       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10037       if (CHARSET_DIMENSION (charset) != 1)
10038         error ("Dimension of charset %s is not one",
10039                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10040
10041       charset_list = XCDR (charset_list);
10042       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10043       if (CHARSET_DIMENSION (charset) != 2)
10044         error ("Dimension of charset %s is not two",
10045                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10046
10047       charset_list = XCDR (charset_list);
10048       if (! NILP (charset_list))
10049         {
10050           charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10051           if (CHARSET_DIMENSION (charset) != 2)
10052             error ("Dimension of charset %s is not two",
10053                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10054         }
10055
10056       category = coding_category_sjis;
10057       Vsjis_coding_system = name;
10058     }
10059   else if (EQ (coding_type, Qbig5))
10060     {
10061       struct charset *charset;
10062
10063       if (XINT (Flength (charset_list)) != 2)
10064         error ("There should be just two charsets");
10065
10066       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10067       if (CHARSET_DIMENSION (charset) != 1)
10068         error ("Dimension of charset %s is not one",
10069                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10070       if (CHARSET_ASCII_COMPATIBLE_P (charset))
10071         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
10072
10073       charset_list = XCDR (charset_list);
10074       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10075       if (CHARSET_DIMENSION (charset) != 2)
10076         error ("Dimension of charset %s is not two",
10077                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10078
10079       category = coding_category_big5;
10080       Vbig5_coding_system = name;
10081     }
10082   else if (EQ (coding_type, Qraw_text))
10083     {
10084       category = coding_category_raw_text;
10085       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
10086     }
10087   else if (EQ (coding_type, Qutf_8))
10088     {
10089       Lisp_Object bom;
10090
10091       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
10092
10093       if (nargs < coding_arg_utf8_max)
10094         goto short_args;
10095
10096       bom = args[coding_arg_utf8_bom];
10097       if (! NILP (bom) && ! EQ (bom, Qt))
10098         {
10099           CHECK_CONS (bom);
10100           val = XCAR (bom);
10101           CHECK_CODING_SYSTEM (val);
10102           val = XCDR (bom);
10103           CHECK_CODING_SYSTEM (val);
10104         }
10105       ASET (attrs, coding_attr_utf_bom, bom);
10106
10107       category = (CONSP (bom) ? coding_category_utf_8_auto
10108                   : NILP (bom) ? coding_category_utf_8_nosig
10109                   : coding_category_utf_8_sig);
10110     }
10111   else if (EQ (coding_type, Qundecided))
10112     category = coding_category_undecided;
10113   else
10114     error ("Invalid coding system type: %s",
10115            SDATA (SYMBOL_NAME (coding_type)));
10116
10117   CODING_ATTR_CATEGORY (attrs) = make_number (category);
10118   CODING_ATTR_PLIST (attrs)
10119     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
10120                                 CODING_ATTR_PLIST (attrs)));
10121   CODING_ATTR_PLIST (attrs)
10122     = Fcons (QCascii_compatible_p,
10123              Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
10124                     CODING_ATTR_PLIST (attrs)));
10125
10126   eol_type = args[coding_arg_eol_type];
10127   if (! NILP (eol_type)
10128       && ! EQ (eol_type, Qunix)
10129       && ! EQ (eol_type, Qdos)
10130       && ! EQ (eol_type, Qmac))
10131     error ("Invalid eol-type");
10132
10133   aliases = Fcons (name, Qnil);
10134
10135   if (NILP (eol_type))
10136     {
10137       eol_type = make_subsidiaries (name);
10138       for (i = 0; i < 3; i++)
10139         {
10140           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
10141
10142           this_name = AREF (eol_type, i);
10143           this_aliases = Fcons (this_name, Qnil);
10144           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
10145           this_spec = Fmake_vector (make_number (3), attrs);
10146           ASET (this_spec, 1, this_aliases);
10147           ASET (this_spec, 2, this_eol_type);
10148           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
10149           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
10150           val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist);
10151           if (NILP (val))
10152             Vcoding_system_alist
10153               = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
10154                        Vcoding_system_alist);
10155         }
10156     }
10157
10158   spec_vec = Fmake_vector (make_number (3), attrs);
10159   ASET (spec_vec, 1, aliases);
10160   ASET (spec_vec, 2, eol_type);
10161
10162   Fputhash (name, spec_vec, Vcoding_system_hash_table);
10163   Vcoding_system_list = Fcons (name, Vcoding_system_list);
10164   val = Fassoc (Fsymbol_name (name), Vcoding_system_alist);
10165   if (NILP (val))
10166     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
10167                                   Vcoding_system_alist);
10168
10169   {
10170     int id = coding_categories[category].id;
10171
10172     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
10173       setup_coding_system (name, &coding_categories[category]);
10174   }
10175
10176   return Qnil;
10177
10178  short_args:
10179   return Fsignal (Qwrong_number_of_arguments,
10180                   Fcons (intern ("define-coding-system-internal"),
10181                          make_number (nargs)));
10182 }
10183
10184
10185 DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
10186        3, 3, 0,
10187        doc: /* Change value in CODING-SYSTEM's property list PROP to VAL.  */)
10188   (coding_system, prop, val)
10189      Lisp_Object coding_system, prop, val;
10190 {
10191   Lisp_Object spec, attrs;
10192
10193   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10194   attrs = AREF (spec, 0);
10195   if (EQ (prop, QCmnemonic))
10196     {
10197       if (! STRINGP (val))
10198         CHECK_CHARACTER (val);
10199       CODING_ATTR_MNEMONIC (attrs) = val;
10200     }
10201   else if (EQ (prop, QCdefault_char))
10202     {
10203       if (NILP (val))
10204         val = make_number (' ');
10205       else
10206         CHECK_CHARACTER (val);
10207       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
10208     }
10209   else if (EQ (prop, QCdecode_translation_table))
10210     {
10211       if (! CHAR_TABLE_P (val) && ! CONSP (val))
10212         CHECK_SYMBOL (val);
10213       CODING_ATTR_DECODE_TBL (attrs) = val;
10214     }
10215   else if (EQ (prop, QCencode_translation_table))
10216     {
10217       if (! CHAR_TABLE_P (val) && ! CONSP (val))
10218         CHECK_SYMBOL (val);
10219       CODING_ATTR_ENCODE_TBL (attrs) = val;
10220     }
10221   else if (EQ (prop, QCpost_read_conversion))
10222     {
10223       CHECK_SYMBOL (val);
10224       CODING_ATTR_POST_READ (attrs) = val;
10225     }
10226   else if (EQ (prop, QCpre_write_conversion))
10227     {
10228       CHECK_SYMBOL (val);
10229       CODING_ATTR_PRE_WRITE (attrs) = val;
10230     }
10231   else if (EQ (prop, QCascii_compatible_p))
10232     {
10233       CODING_ATTR_ASCII_COMPAT (attrs) = val;
10234     }
10235
10236   CODING_ATTR_PLIST (attrs)
10237     = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
10238   return val;
10239 }
10240
10241
10242 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
10243        Sdefine_coding_system_alias, 2, 2, 0,
10244        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
10245      (alias, coding_system)
10246      Lisp_Object alias, coding_system;
10247 {
10248   Lisp_Object spec, aliases, eol_type, val;
10249
10250   CHECK_SYMBOL (alias);
10251   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10252   aliases = AREF (spec, 1);
10253   /* ALIASES should be a list of length more than zero, and the first
10254      element is a base coding system.  Append ALIAS at the tail of the
10255      list.  */
10256   while (!NILP (XCDR (aliases)))
10257     aliases = XCDR (aliases);
10258   XSETCDR (aliases, Fcons (alias, Qnil));
10259
10260   eol_type = AREF (spec, 2);
10261   if (VECTORP (eol_type))
10262     {
10263       Lisp_Object subsidiaries;
10264       int i;
10265
10266       subsidiaries = make_subsidiaries (alias);
10267       for (i = 0; i < 3; i++)
10268         Fdefine_coding_system_alias (AREF (subsidiaries, i),
10269                                      AREF (eol_type, i));
10270     }
10271
10272   Fputhash (alias, spec, Vcoding_system_hash_table);
10273   Vcoding_system_list = Fcons (alias, Vcoding_system_list);
10274   val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist);
10275   if (NILP (val))
10276     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
10277                                   Vcoding_system_alist);
10278
10279   return Qnil;
10280 }
10281
10282 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
10283        1, 1, 0,
10284        doc: /* Return the base of CODING-SYSTEM.
10285 Any alias or subsidiary coding system is not a base coding system.  */)
10286   (coding_system)
10287      Lisp_Object coding_system;
10288 {
10289   Lisp_Object spec, attrs;
10290
10291   if (NILP (coding_system))
10292     return (Qno_conversion);
10293   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10294   attrs = AREF (spec, 0);
10295   return CODING_ATTR_BASE_NAME (attrs);
10296 }
10297
10298 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
10299        1, 1, 0,
10300        doc: "Return the property list of CODING-SYSTEM.")
10301      (coding_system)
10302      Lisp_Object coding_system;
10303 {
10304   Lisp_Object spec, attrs;
10305
10306   if (NILP (coding_system))
10307     coding_system = Qno_conversion;
10308   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10309   attrs = AREF (spec, 0);
10310   return CODING_ATTR_PLIST (attrs);
10311 }
10312
10313
10314 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
10315        1, 1, 0,
10316        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
10317      (coding_system)
10318      Lisp_Object coding_system;
10319 {
10320   Lisp_Object spec;
10321
10322   if (NILP (coding_system))
10323     coding_system = Qno_conversion;
10324   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10325   return AREF (spec, 1);
10326 }
10327
10328 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
10329        Scoding_system_eol_type, 1, 1, 0,
10330        doc: /* Return eol-type of CODING-SYSTEM.
10331 An eol-type is an integer 0, 1, 2, or a vector of coding systems.
10332
10333 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
10334 and CR respectively.
10335
10336 A vector value indicates that a format of end-of-line should be
10337 detected automatically.  Nth element of the vector is the subsidiary
10338 coding system whose eol-type is N.  */)
10339      (coding_system)
10340      Lisp_Object coding_system;
10341 {
10342   Lisp_Object spec, eol_type;
10343   int n;
10344
10345   if (NILP (coding_system))
10346     coding_system = Qno_conversion;
10347   if (! CODING_SYSTEM_P (coding_system))
10348     return Qnil;
10349   spec = CODING_SYSTEM_SPEC (coding_system);
10350   eol_type = AREF (spec, 2);
10351   if (VECTORP (eol_type))
10352     return Fcopy_sequence (eol_type);
10353   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
10354   return make_number (n);
10355 }
10356
10357 #endif /* emacs */
10358
10359 \f
10360 /*** 9. Post-amble ***/
10361
10362 void
10363 init_coding_once ()
10364 {
10365   int i;
10366
10367   for (i = 0; i < coding_category_max; i++)
10368     {
10369       coding_categories[i].id = -1;
10370       coding_priorities[i] = i;
10371     }
10372
10373   /* ISO2022 specific initialize routine.  */
10374   for (i = 0; i < 0x20; i++)
10375     iso_code_class[i] = ISO_control_0;
10376   for (i = 0x21; i < 0x7F; i++)
10377     iso_code_class[i] = ISO_graphic_plane_0;
10378   for (i = 0x80; i < 0xA0; i++)
10379     iso_code_class[i] = ISO_control_1;
10380   for (i = 0xA1; i < 0xFF; i++)
10381     iso_code_class[i] = ISO_graphic_plane_1;
10382   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
10383   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
10384   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
10385   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
10386   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
10387   iso_code_class[ISO_CODE_ESC] = ISO_escape;
10388   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
10389   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
10390   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
10391
10392   for (i = 0; i < 256; i++)
10393     {
10394       emacs_mule_bytes[i] = 1;
10395     }
10396   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
10397   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
10398   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
10399   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
10400 }
10401
10402 #ifdef emacs
10403
10404 void
10405 syms_of_coding ()
10406 {
10407   staticpro (&Vcoding_system_hash_table);
10408   {
10409     Lisp_Object args[2];
10410     args[0] = QCtest;
10411     args[1] = Qeq;
10412     Vcoding_system_hash_table = Fmake_hash_table (2, args);
10413   }
10414
10415   staticpro (&Vsjis_coding_system);
10416   Vsjis_coding_system = Qnil;
10417
10418   staticpro (&Vbig5_coding_system);
10419   Vbig5_coding_system = Qnil;
10420
10421   staticpro (&Vcode_conversion_reused_workbuf);
10422   Vcode_conversion_reused_workbuf = Qnil;
10423
10424   staticpro (&Vcode_conversion_workbuf_name);
10425   Vcode_conversion_workbuf_name = make_pure_c_string (" *code-conversion-work*");
10426
10427   reused_workbuf_in_use = 0;
10428
10429   DEFSYM (Qcharset, "charset");
10430   DEFSYM (Qtarget_idx, "target-idx");
10431   DEFSYM (Qcoding_system_history, "coding-system-history");
10432   Fset (Qcoding_system_history, Qnil);
10433
10434   /* Target FILENAME is the first argument.  */
10435   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
10436   /* Target FILENAME is the third argument.  */
10437   Fput (Qwrite_region, Qtarget_idx, make_number (2));
10438
10439   DEFSYM (Qcall_process, "call-process");
10440   /* Target PROGRAM is the first argument.  */
10441   Fput (Qcall_process, Qtarget_idx, make_number (0));
10442
10443   DEFSYM (Qcall_process_region, "call-process-region");
10444   /* Target PROGRAM is the third argument.  */
10445   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
10446
10447   DEFSYM (Qstart_process, "start-process");
10448   /* Target PROGRAM is the third argument.  */
10449   Fput (Qstart_process, Qtarget_idx, make_number (2));
10450
10451   DEFSYM (Qopen_network_stream, "open-network-stream");
10452   /* Target SERVICE is the fourth argument.  */
10453   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
10454
10455   DEFSYM (Qcoding_system, "coding-system");
10456   DEFSYM (Qcoding_aliases, "coding-aliases");
10457
10458   DEFSYM (Qeol_type, "eol-type");
10459   DEFSYM (Qunix, "unix");
10460   DEFSYM (Qdos, "dos");
10461
10462   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
10463   DEFSYM (Qpost_read_conversion, "post-read-conversion");
10464   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
10465   DEFSYM (Qdefault_char, "default-char");
10466   DEFSYM (Qundecided, "undecided");
10467   DEFSYM (Qno_conversion, "no-conversion");
10468   DEFSYM (Qraw_text, "raw-text");
10469
10470   DEFSYM (Qiso_2022, "iso-2022");
10471
10472   DEFSYM (Qutf_8, "utf-8");
10473   DEFSYM (Qutf_8_emacs, "utf-8-emacs");
10474
10475   DEFSYM (Qutf_16, "utf-16");
10476   DEFSYM (Qbig, "big");
10477   DEFSYM (Qlittle, "little");
10478
10479   DEFSYM (Qshift_jis, "shift-jis");
10480   DEFSYM (Qbig5, "big5");
10481
10482   DEFSYM (Qcoding_system_p, "coding-system-p");
10483
10484   DEFSYM (Qcoding_system_error, "coding-system-error");
10485   Fput (Qcoding_system_error, Qerror_conditions,
10486         pure_cons (Qcoding_system_error, pure_cons (Qerror, Qnil)));
10487   Fput (Qcoding_system_error, Qerror_message,
10488         make_pure_c_string ("Invalid coding system"));
10489
10490   /* Intern this now in case it isn't already done.
10491      Setting this variable twice is harmless.
10492      But don't staticpro it here--that is done in alloc.c.  */
10493   Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
10494
10495   DEFSYM (Qtranslation_table, "translation-table");
10496   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
10497   DEFSYM (Qtranslation_table_id, "translation-table-id");
10498   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
10499   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
10500
10501   DEFSYM (Qvalid_codes, "valid-codes");
10502
10503   DEFSYM (Qemacs_mule, "emacs-mule");
10504
10505   DEFSYM (QCcategory, ":category");
10506   DEFSYM (QCmnemonic, ":mnemonic");
10507   DEFSYM (QCdefault_char, ":default-char");
10508   DEFSYM (QCdecode_translation_table, ":decode-translation-table");
10509   DEFSYM (QCencode_translation_table, ":encode-translation-table");
10510   DEFSYM (QCpost_read_conversion, ":post-read-conversion");
10511   DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
10512   DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
10513
10514   Vcoding_category_table
10515     = Fmake_vector (make_number (coding_category_max), Qnil);
10516   staticpro (&Vcoding_category_table);
10517   /* Followings are target of code detection.  */
10518   ASET (Vcoding_category_table, coding_category_iso_7,
10519         intern_c_string ("coding-category-iso-7"));
10520   ASET (Vcoding_category_table, coding_category_iso_7_tight,
10521         intern_c_string ("coding-category-iso-7-tight"));
10522   ASET (Vcoding_category_table, coding_category_iso_8_1,
10523         intern_c_string ("coding-category-iso-8-1"));
10524   ASET (Vcoding_category_table, coding_category_iso_8_2,
10525         intern_c_string ("coding-category-iso-8-2"));
10526   ASET (Vcoding_category_table, coding_category_iso_7_else,
10527         intern_c_string ("coding-category-iso-7-else"));
10528   ASET (Vcoding_category_table, coding_category_iso_8_else,
10529         intern_c_string ("coding-category-iso-8-else"));
10530   ASET (Vcoding_category_table, coding_category_utf_8_auto,
10531         intern_c_string ("coding-category-utf-8-auto"));
10532   ASET (Vcoding_category_table, coding_category_utf_8_nosig,
10533         intern_c_string ("coding-category-utf-8"));
10534   ASET (Vcoding_category_table, coding_category_utf_8_sig,
10535         intern_c_string ("coding-category-utf-8-sig"));
10536   ASET (Vcoding_category_table, coding_category_utf_16_be,
10537         intern_c_string ("coding-category-utf-16-be"));
10538   ASET (Vcoding_category_table, coding_category_utf_16_auto,
10539         intern_c_string ("coding-category-utf-16-auto"));
10540   ASET (Vcoding_category_table, coding_category_utf_16_le,
10541         intern_c_string ("coding-category-utf-16-le"));
10542   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
10543         intern_c_string ("coding-category-utf-16-be-nosig"));
10544   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
10545         intern_c_string ("coding-category-utf-16-le-nosig"));
10546   ASET (Vcoding_category_table, coding_category_charset,
10547         intern_c_string ("coding-category-charset"));
10548   ASET (Vcoding_category_table, coding_category_sjis,
10549         intern_c_string ("coding-category-sjis"));
10550   ASET (Vcoding_category_table, coding_category_big5,
10551         intern_c_string ("coding-category-big5"));
10552   ASET (Vcoding_category_table, coding_category_ccl,
10553         intern_c_string ("coding-category-ccl"));
10554   ASET (Vcoding_category_table, coding_category_emacs_mule,
10555         intern_c_string ("coding-category-emacs-mule"));
10556   /* Followings are NOT target of code detection.  */
10557   ASET (Vcoding_category_table, coding_category_raw_text,
10558         intern_c_string ("coding-category-raw-text"));
10559   ASET (Vcoding_category_table, coding_category_undecided,
10560         intern_c_string ("coding-category-undecided"));
10561
10562   DEFSYM (Qinsufficient_source, "insufficient-source");
10563   DEFSYM (Qinconsistent_eol, "inconsistent-eol");
10564   DEFSYM (Qinvalid_source, "invalid-source");
10565   DEFSYM (Qinterrupted, "interrupted");
10566   DEFSYM (Qinsufficient_memory, "insufficient-memory");
10567   DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
10568
10569   defsubr (&Scoding_system_p);
10570   defsubr (&Sread_coding_system);
10571   defsubr (&Sread_non_nil_coding_system);
10572   defsubr (&Scheck_coding_system);
10573   defsubr (&Sdetect_coding_region);
10574   defsubr (&Sdetect_coding_string);
10575   defsubr (&Sfind_coding_systems_region_internal);
10576   defsubr (&Sunencodable_char_position);
10577   defsubr (&Scheck_coding_systems_region);
10578   defsubr (&Sdecode_coding_region);
10579   defsubr (&Sencode_coding_region);
10580   defsubr (&Sdecode_coding_string);
10581   defsubr (&Sencode_coding_string);
10582   defsubr (&Sdecode_sjis_char);
10583   defsubr (&Sencode_sjis_char);
10584   defsubr (&Sdecode_big5_char);
10585   defsubr (&Sencode_big5_char);
10586   defsubr (&Sset_terminal_coding_system_internal);
10587   defsubr (&Sset_safe_terminal_coding_system_internal);
10588   defsubr (&Sterminal_coding_system);
10589   defsubr (&Sset_keyboard_coding_system_internal);
10590   defsubr (&Skeyboard_coding_system);
10591   defsubr (&Sfind_operation_coding_system);
10592   defsubr (&Sset_coding_system_priority);
10593   defsubr (&Sdefine_coding_system_internal);
10594   defsubr (&Sdefine_coding_system_alias);
10595   defsubr (&Scoding_system_put);
10596   defsubr (&Scoding_system_base);
10597   defsubr (&Scoding_system_plist);
10598   defsubr (&Scoding_system_aliases);
10599   defsubr (&Scoding_system_eol_type);
10600   defsubr (&Scoding_system_priority_list);
10601
10602   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
10603                doc: /* List of coding systems.
10604
10605 Do not alter the value of this variable manually.  This variable should be
10606 updated by the functions `define-coding-system' and
10607 `define-coding-system-alias'.  */);
10608   Vcoding_system_list = Qnil;
10609
10610   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
10611                doc: /* Alist of coding system names.
10612 Each element is one element list of coding system name.
10613 This variable is given to `completing-read' as COLLECTION argument.
10614
10615 Do not alter the value of this variable manually.  This variable should be
10616 updated by the functions `make-coding-system' and
10617 `define-coding-system-alias'.  */);
10618   Vcoding_system_alist = Qnil;
10619
10620   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
10621                doc: /* List of coding-categories (symbols) ordered by priority.
10622
10623 On detecting a coding system, Emacs tries code detection algorithms
10624 associated with each coding-category one by one in this order.  When
10625 one algorithm agrees with a byte sequence of source text, the coding
10626 system bound to the corresponding coding-category is selected.
10627
10628 Don't modify this variable directly, but use `set-coding-priority'.  */);
10629   {
10630     int i;
10631
10632     Vcoding_category_list = Qnil;
10633     for (i = coding_category_max - 1; i >= 0; i--)
10634       Vcoding_category_list
10635         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
10636                  Vcoding_category_list);
10637   }
10638
10639   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
10640                doc: /* Specify the coding system for read operations.
10641 It is useful to bind this variable with `let', but do not set it globally.
10642 If the value is a coding system, it is used for decoding on read operation.
10643 If not, an appropriate element is used from one of the coding system alists.
10644 There are three such tables: `file-coding-system-alist',
10645 `process-coding-system-alist', and `network-coding-system-alist'.  */);
10646   Vcoding_system_for_read = Qnil;
10647
10648   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
10649                doc: /* Specify the coding system for write operations.
10650 Programs bind this variable with `let', but you should not set it globally.
10651 If the value is a coding system, it is used for encoding of output,
10652 when writing it to a file and when sending it to a file or subprocess.
10653
10654 If this does not specify a coding system, an appropriate element
10655 is used from one of the coding system alists.
10656 There are three such tables: `file-coding-system-alist',
10657 `process-coding-system-alist', and `network-coding-system-alist'.
10658 For output to files, if the above procedure does not specify a coding system,
10659 the value of `buffer-file-coding-system' is used.  */);
10660   Vcoding_system_for_write = Qnil;
10661
10662   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
10663                doc: /*
10664 Coding system used in the latest file or process I/O.  */);
10665   Vlast_coding_system_used = Qnil;
10666
10667   DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
10668                doc: /*
10669 Error status of the last code conversion.
10670
10671 When an error was detected in the last code conversion, this variable
10672 is set to one of the following symbols.
10673   `insufficient-source'
10674   `inconsistent-eol'
10675   `invalid-source'
10676   `interrupted'
10677   `insufficient-memory'
10678 When no error was detected, the value doesn't change.  So, to check
10679 the error status of a code conversion by this variable, you must
10680 explicitly set this variable to nil before performing code
10681 conversion.  */);
10682   Vlast_code_conversion_error = Qnil;
10683
10684   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
10685                doc: /*
10686 *Non-nil means always inhibit code conversion of end-of-line format.
10687 See info node `Coding Systems' and info node `Text and Binary' concerning
10688 such conversion.  */);
10689   inhibit_eol_conversion = 0;
10690
10691   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
10692                doc: /*
10693 Non-nil means process buffer inherits coding system of process output.
10694 Bind it to t if the process output is to be treated as if it were a file
10695 read from some filesystem.  */);
10696   inherit_process_coding_system = 0;
10697
10698   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
10699                doc: /*
10700 Alist to decide a coding system to use for a file I/O operation.
10701 The format is ((PATTERN . VAL) ...),
10702 where PATTERN is a regular expression matching a file name,
10703 VAL is a coding system, a cons of coding systems, or a function symbol.
10704 If VAL is a coding system, it is used for both decoding and encoding
10705 the file contents.
10706 If VAL is a cons of coding systems, the car part is used for decoding,
10707 and the cdr part is used for encoding.
10708 If VAL is a function symbol, the function must return a coding system
10709 or a cons of coding systems which are used as above.  The function is
10710 called with an argument that is a list of the arguments with which
10711 `find-operation-coding-system' was called.  If the function can't decide
10712 a coding system, it can return `undecided' so that the normal
10713 code-detection is performed.
10714
10715 See also the function `find-operation-coding-system'
10716 and the variable `auto-coding-alist'.  */);
10717   Vfile_coding_system_alist = Qnil;
10718
10719   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
10720                doc: /*
10721 Alist to decide a coding system to use for a process I/O operation.
10722 The format is ((PATTERN . VAL) ...),
10723 where PATTERN is a regular expression matching a program name,
10724 VAL is a coding system, a cons of coding systems, or a function symbol.
10725 If VAL is a coding system, it is used for both decoding what received
10726 from the program and encoding what sent to the program.
10727 If VAL is a cons of coding systems, the car part is used for decoding,
10728 and the cdr part is used for encoding.
10729 If VAL is a function symbol, the function must return a coding system
10730 or a cons of coding systems which are used as above.
10731
10732 See also the function `find-operation-coding-system'.  */);
10733   Vprocess_coding_system_alist = Qnil;
10734
10735   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
10736                doc: /*
10737 Alist to decide a coding system to use for a network I/O operation.
10738 The format is ((PATTERN . VAL) ...),
10739 where PATTERN is a regular expression matching a network service name
10740 or is a port number to connect to,
10741 VAL is a coding system, a cons of coding systems, or a function symbol.
10742 If VAL is a coding system, it is used for both decoding what received
10743 from the network stream and encoding what sent to the network stream.
10744 If VAL is a cons of coding systems, the car part is used for decoding,
10745 and the cdr part is used for encoding.
10746 If VAL is a function symbol, the function must return a coding system
10747 or a cons of coding systems which are used as above.
10748
10749 See also the function `find-operation-coding-system'.  */);
10750   Vnetwork_coding_system_alist = Qnil;
10751
10752   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
10753                doc: /* Coding system to use with system messages.
10754 Also used for decoding keyboard input on X Window system.  */);
10755   Vlocale_coding_system = Qnil;
10756
10757   /* The eol mnemonics are reset in startup.el system-dependently.  */
10758   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
10759                doc: /*
10760 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
10761   eol_mnemonic_unix = make_pure_c_string (":");
10762
10763   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
10764                doc: /*
10765 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
10766   eol_mnemonic_dos = make_pure_c_string ("\\");
10767
10768   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
10769                doc: /*
10770 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
10771   eol_mnemonic_mac = make_pure_c_string ("/");
10772
10773   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
10774                doc: /*
10775 *String displayed in mode line when end-of-line format is not yet determined.  */);
10776   eol_mnemonic_undecided = make_pure_c_string (":");
10777
10778   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
10779                doc: /*
10780 *Non-nil enables character translation while encoding and decoding.  */);
10781   Venable_character_translation = Qt;
10782
10783   DEFVAR_LISP ("standard-translation-table-for-decode",
10784                &Vstandard_translation_table_for_decode,
10785                doc: /* Table for translating characters while decoding.  */);
10786   Vstandard_translation_table_for_decode = Qnil;
10787
10788   DEFVAR_LISP ("standard-translation-table-for-encode",
10789                &Vstandard_translation_table_for_encode,
10790                doc: /* Table for translating characters while encoding.  */);
10791   Vstandard_translation_table_for_encode = Qnil;
10792
10793   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
10794                doc: /* Alist of charsets vs revision numbers.
10795 While encoding, if a charset (car part of an element) is found,
10796 designate it with the escape sequence identifying revision (cdr part
10797 of the element).  */);
10798   Vcharset_revision_table = Qnil;
10799
10800   DEFVAR_LISP ("default-process-coding-system",
10801                &Vdefault_process_coding_system,
10802                doc: /* Cons of coding systems used for process I/O by default.
10803 The car part is used for decoding a process output,
10804 the cdr part is used for encoding a text to be sent to a process.  */);
10805   Vdefault_process_coding_system = Qnil;
10806
10807   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
10808                doc: /*
10809 Table of extra Latin codes in the range 128..159 (inclusive).
10810 This is a vector of length 256.
10811 If Nth element is non-nil, the existence of code N in a file
10812 \(or output of subprocess) doesn't prevent it to be detected as
10813 a coding system of ISO 2022 variant which has a flag
10814 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
10815 or reading output of a subprocess.
10816 Only 128th through 159th elements have a meaning.  */);
10817   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
10818
10819   DEFVAR_LISP ("select-safe-coding-system-function",
10820                &Vselect_safe_coding_system_function,
10821                doc: /*
10822 Function to call to select safe coding system for encoding a text.
10823
10824 If set, this function is called to force a user to select a proper
10825 coding system which can encode the text in the case that a default
10826 coding system used in each operation can't encode the text.  The
10827 function should take care that the buffer is not modified while
10828 the coding system is being selected.
10829
10830 The default value is `select-safe-coding-system' (which see).  */);
10831   Vselect_safe_coding_system_function = Qnil;
10832
10833   DEFVAR_BOOL ("coding-system-require-warning",
10834                &coding_system_require_warning,
10835                doc: /* Internal use only.
10836 If non-nil, on writing a file, `select-safe-coding-system-function' is
10837 called even if `coding-system-for-write' is non-nil.  The command
10838 `universal-coding-system-argument' binds this variable to t temporarily.  */);
10839   coding_system_require_warning = 0;
10840
10841
10842   DEFVAR_BOOL ("inhibit-iso-escape-detection",
10843                &inhibit_iso_escape_detection,
10844                doc: /*
10845 If non-nil, Emacs ignores ISO-2022 escape sequences during code detection.
10846
10847 When Emacs reads text, it tries to detect how the text is encoded.
10848 This code detection is sensitive to escape sequences.  If Emacs sees
10849 a valid ISO-2022 escape sequence, it assumes the text is encoded in one
10850 of the ISO2022 encodings, and decodes text by the corresponding coding
10851 system (e.g. `iso-2022-7bit').
10852
10853 However, there may be a case that you want to read escape sequences in
10854 a file as is.  In such a case, you can set this variable to non-nil.
10855 Then the code detection will ignore any escape sequences, and no text is
10856 detected as encoded in some ISO-2022 encoding.  The result is that all
10857 escape sequences become visible in a buffer.
10858
10859 The default value is nil, and it is strongly recommended not to change
10860 it.  That is because many Emacs Lisp source files that contain
10861 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
10862 in Emacs's distribution, and they won't be decoded correctly on
10863 reading if you suppress escape sequence detection.
10864
10865 The other way to read escape sequences in a file without decoding is
10866 to explicitly specify some coding system that doesn't use ISO-2022
10867 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
10868   inhibit_iso_escape_detection = 0;
10869
10870   DEFVAR_BOOL ("inhibit-null-byte-detection",
10871                &inhibit_null_byte_detection,
10872                doc: /* If non-nil, Emacs ignores null bytes on code detection.
10873 By default, Emacs treats it as binary data, and does not attempt to
10874 decode it.  The effect is as if you specified `no-conversion' for
10875 reading that text.
10876
10877 Set this to non-nil when a regular text happens to include null bytes.
10878 Examples are Index nodes of Info files and null-byte delimited output
10879 from GNU Find and GNU Grep.  Emacs will then ignore the null bytes and
10880 decode text as usual.  */);
10881   inhibit_null_byte_detection = 0;
10882
10883   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
10884                doc: /* Char table for translating self-inserting characters.
10885 This is applied to the result of input methods, not their input.
10886 See also `keyboard-translate-table'.
10887
10888 Use of this variable for character code unification was rendered
10889 obsolete in Emacs 23.1 and later, since Unicode is now the basis of
10890 internal character representation.  */);
10891     Vtranslation_table_for_input = Qnil;
10892
10893   {
10894     Lisp_Object args[coding_arg_max];
10895     Lisp_Object plist[16];
10896     int i;
10897
10898     for (i = 0; i < coding_arg_max; i++)
10899       args[i] = Qnil;
10900
10901     plist[0] = intern_c_string (":name");
10902     plist[1] = args[coding_arg_name] = Qno_conversion;
10903     plist[2] = intern_c_string (":mnemonic");
10904     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
10905     plist[4] = intern_c_string (":coding-type");
10906     plist[5] = args[coding_arg_coding_type] = Qraw_text;
10907     plist[6] = intern_c_string (":ascii-compatible-p");
10908     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
10909     plist[8] = intern_c_string (":default-char");
10910     plist[9] = args[coding_arg_default_char] = make_number (0);
10911     plist[10] = intern_c_string (":for-unibyte");
10912     plist[11] = args[coding_arg_for_unibyte] = Qt;
10913     plist[12] = intern_c_string (":docstring");
10914     plist[13] = make_pure_c_string ("Do no conversion.\n\
10915 \n\
10916 When you visit a file with this coding, the file is read into a\n\
10917 unibyte buffer as is, thus each byte of a file is treated as a\n\
10918 character.");
10919     plist[14] = intern_c_string (":eol-type");
10920     plist[15] = args[coding_arg_eol_type] = Qunix;
10921     args[coding_arg_plist] = Flist (16, plist);
10922     Fdefine_coding_system_internal (coding_arg_max, args);
10923
10924     plist[1] = args[coding_arg_name] = Qundecided;
10925     plist[3] = args[coding_arg_mnemonic] = make_number ('-');
10926     plist[5] = args[coding_arg_coding_type] = Qundecided;
10927     /* This is already set.
10928        plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
10929     plist[8] = intern_c_string (":charset-list");
10930     plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
10931     plist[11] = args[coding_arg_for_unibyte] = Qnil;
10932     plist[13] = make_pure_c_string ("No conversion on encoding, automatic conversion on decoding.");
10933     plist[15] = args[coding_arg_eol_type] = Qnil;
10934     args[coding_arg_plist] = Flist (16, plist);
10935     Fdefine_coding_system_internal (coding_arg_max, args);
10936   }
10937
10938   setup_coding_system (Qno_conversion, &safe_terminal_coding);
10939
10940   {
10941     int i;
10942
10943     for (i = 0; i < coding_category_max; i++)
10944       Fset (AREF (Vcoding_category_table, i), Qno_conversion);
10945   }
10946 #if defined (MSDOS) || defined (WINDOWSNT)
10947   system_eol_type = Qdos;
10948 #else
10949   system_eol_type = Qunix;
10950 #endif
10951   staticpro (&system_eol_type);
10952 }
10953
10954 char *
10955 emacs_strerror (error_number)
10956      int error_number;
10957 {
10958   char *str;
10959
10960   synchronize_system_messages_locale ();
10961   str = strerror (error_number);
10962
10963   if (! NILP (Vlocale_coding_system))
10964     {
10965       Lisp_Object dec = code_convert_string_norecord (build_string (str),
10966                                                       Vlocale_coding_system,
10967                                                       0);
10968       str = (char *) SDATA (dec);
10969     }
10970
10971   return str;
10972 }
10973
10974 #endif /* emacs */
10975
10976 /* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
10977    (do not change this comment) */