src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 2001, 2002, 2003, 2004, 2005,
   3                  2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
   4    Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   5      2005, 2006, 2007, 2008, 2009, 2010
   6      National Institute of Advanced Industrial Science and Technology (AIST)
   7      Registration Number H14PRO021
   8    Copyright (C) 2003
   9      National Institute of Advanced Industrial Science and Technology (AIST)
  10      Registration Number H13PRO009
  11
  12 This file is part of GNU Emacs.
  13
  14 GNU Emacs is free software: you can redistribute it and/or modify
  15 it under the terms of the GNU General Public License as published by
  16 the Free Software Foundation, either version 3 of the License, or
  17 (at your option) any later version.
  18
  19 GNU Emacs is distributed in the hope that it will be useful,
  20 but WITHOUT ANY WARRANTY; without even the implied warranty of
  21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22 GNU General Public License for more details.
  23
  24 You should have received a copy of the GNU General Public License
  25 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  26
  27 /*** TABLE OF CONTENTS ***
  28
  29   0. General comments
  30   1. Preamble
  31   2. Emacs' internal format (emacs-utf-8) handlers
  32   3. UTF-8 handlers
  33   4. UTF-16 handlers
  34   5. Charset-base coding systems handlers
  35   6. emacs-mule (old Emacs' internal format) handlers
  36   7. ISO2022 handlers
  37   8. Shift-JIS and BIG5 handlers
  38   9. CCL handlers
  39   10. C library functions
  40   11. Emacs Lisp library functions
  41   12. Postamble
  42
  43 */
  44
  45 /*** 0. General comments ***
  46
  47
  48 CODING SYSTEM
  49
  50   A coding system is an object for an encoding mechanism that contains
  51   information about how to convert byte sequences to character
  52   sequences and vice versa.  When we say "decode", it means converting
  53   a byte sequence of a specific coding system into a character
  54   sequence that is represented by Emacs' internal coding system
  55   `emacs-utf-8', and when we say "encode", it means converting a
  56   character sequence of emacs-utf-8 to a byte sequence of a specific
  57   coding system.
  58
  59   In Emacs Lisp, a coding system is represented by a Lisp symbol.  In
  60   C level, a coding system is represented by a vector of attributes
  61   stored in the hash table Vcharset_hash_table.  The conversion from
  62   coding system symbol to attributes vector is done by looking up
  63   Vcharset_hash_table by the symbol.
  64
  65   Coding systems are classified into the following types depending on
  66   the encoding mechanism.  Here's a brief description of the types.
  67
  68   o UTF-8
  69
  70   o UTF-16
  71
  72   o Charset-base coding system
  73
  74   A coding system defined by one or more (coded) character sets.
  75   Decoding and encoding are done by a code converter defined for each
  76   character set.
  77
  78   o Old Emacs internal format (emacs-mule)
  79
  80   The coding system adopted by old versions of Emacs (20 and 21).
  81
  82   o ISO2022-base coding system
  83
  84   The most famous coding system for multiple character sets.  X's
  85   Compound Text, various EUCs (Extended Unix Code), and coding systems
  86   used in the Internet communication such as ISO-2022-JP are all
  87   variants of ISO2022.
  88
  89   o SJIS (or Shift-JIS or MS-Kanji-Code)
  90
  91   A coding system to encode character sets: ASCII, JISX0201, and
  92   JISX0208.  Widely used for PC's in Japan.  Details are described in
  93   section 8.
  94
  95   o BIG5
  96
  97   A coding system to encode character sets: ASCII and Big5.  Widely
  98   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  99   described in section 8.  In this file, when we write "big5" (all
 100   lowercase), we mean the coding system, and when we write "Big5"
 101   (capitalized), we mean the character set.
 102
 103   o CCL
 104
 105   If a user wants to decode/encode text encoded in a coding system
 106   not listed above, he can supply a decoder and an encoder for it in
 107   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 108   program while decoding/encoding.
 109
 110   o Raw-text
 111
 112   A coding system for text containing raw eight-bit data.  Emacs
 113   treats each byte of source text as a character (except for
 114   end-of-line conversion).
 115
 116   o No-conversion
 117
 118   Like raw text, but don't do end-of-line conversion.
 119
 120
 121 END-OF-LINE FORMAT
 122
 123   How text end-of-line is encoded depends on operating system.  For
 124   instance, Unix's format is just one byte of LF (line-feed) code,
 125   whereas DOS's format is two-byte sequence of `carriage-return' and
 126   `line-feed' codes.  MacOS's format is usually one byte of
 127   `carriage-return'.
 128
 129   Since text character encoding and end-of-line encoding are
 130   independent, any coding system described above can take any format
 131   of end-of-line (except for no-conversion).
 132
 133 STRUCT CODING_SYSTEM
 134
 135   Before using a coding system for code conversion (i.e. decoding and
 136   encoding), we setup a structure of type `struct coding_system'.
 137   This structure keeps various information about a specific code
 138   conversion (e.g. the location of source and destination data).
 139
 140 */
 141
 142 /* COMMON MACROS */
 143
 144
 145 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 146
 147   These functions check if a byte sequence specified as a source in
 148   CODING conforms to the format of XXX, and update the members of
 149   DETECT_INFO.
 150
 151   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 152
 153   Below is the template of these functions.  */
 154
 155 #if 0
 156 static int
 157 detect_coding_XXX (struct coding_system *coding,
 158                    struct coding_detection_info *detect_info)
 159 {
 160   const unsigned char *src = coding->source;
 161   const unsigned char *src_end = coding->source + coding->src_bytes;
 162   int multibytep = coding->src_multibyte;
 163   int consumed_chars = 0;
 164   int found = 0;
 165   ...;
 166
 167   while (1)
 168     {
 169       /* Get one byte from the source.  If the souce is exausted, jump
 170          to no_more_source:.  */
 171       ONE_MORE_BYTE (c);
 172
 173       if (! __C_conforms_to_XXX___ (c))
 174         break;
 175       if (! __C_strongly_suggests_XXX__ (c))
 176         found = CATEGORY_MASK_XXX;
 177     }
 178   /* The byte sequence is invalid for XXX.  */
 179   detect_info->rejected |= CATEGORY_MASK_XXX;
 180   return 0;
 181
 182  no_more_source:
 183   /* The source exausted successfully.  */
 184   detect_info->found |= found;
 185   return 1;
 186 }
 187 #endif
 188
 189 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 190
 191   These functions decode a byte sequence specified as a source by
 192   CODING.  The resulting multibyte text goes to a place pointed to by
 193   CODING->charbuf, the length of which should not exceed
 194   CODING->charbuf_size;
 195
 196   These functions set the information of original and decoded texts in
 197   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 198   They also set CODING->result to one of CODING_RESULT_XXX indicating
 199   how the decoding is finished.
 200
 201   Below is the template of these functions.  */
 202
 203 #if 0
 204 static void
 205 decode_coding_XXXX (struct coding_system *coding)
 206 {
 207   const unsigned char *src = coding->source + coding->consumed;
 208   const unsigned char *src_end = coding->source + coding->src_bytes;
 209   /* SRC_BASE remembers the start position in source in each loop.
 210      The loop will be exited when there's not enough source code, or
 211      when there's no room in CHARBUF for a decoded character.  */
 212   const unsigned char *src_base;
 213   /* A buffer to produce decoded characters.  */
 214   int *charbuf = coding->charbuf + coding->charbuf_used;
 215   int *charbuf_end = coding->charbuf + coding->charbuf_size;
 216   int multibytep = coding->src_multibyte;
 217
 218   while (1)
 219     {
 220       src_base = src;
 221       if (charbuf < charbuf_end)
 222         /* No more room to produce a decoded character.  */
 223         break;
 224       ONE_MORE_BYTE (c);
 225       /* Decode it. */
 226     }
 227
 228  no_more_source:
 229   if (src_base < src_end
 230       && coding->mode & CODING_MODE_LAST_BLOCK)
 231     /* If the source ends by partial bytes to construct a character,
 232        treat them as eight-bit raw data.  */
 233     while (src_base < src_end && charbuf < charbuf_end)
 234       *charbuf++ = *src_base++;
 235   /* Remember how many bytes and characters we consumed.  If the
 236      source is multibyte, the bytes and chars are not identical.  */
 237   coding->consumed = coding->consumed_char = src_base - coding->source;
 238   /* Remember how many characters we produced.  */
 239   coding->charbuf_used = charbuf - coding->charbuf;
 240 }
 241 #endif
 242
 243 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 244
 245   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 246   internal multibyte format by CODING.  The resulting byte sequence
 247   goes to a place pointed to by DESTINATION, the length of which
 248   should not exceed DST_BYTES.
 249
 250   These functions set the information of original and encoded texts in
 251   the members produced, produced_char, consumed, and consumed_char of
 252   the structure *CODING.  They also set the member result to one of
 253   CODING_RESULT_XXX indicating how the encoding finished.
 254
 255   DST_BYTES zero means that source area and destination area are
 256   overlapped, which means that we can produce a encoded text until it
 257   reaches at the head of not-yet-encoded source text.
 258
 259   Below is a template of these functions.  */
 260 #if 0
 261 static void
 262 encode_coding_XXX (struct coding_system *coding)
 263 {
 264   int multibytep = coding->dst_multibyte;
 265   int *charbuf = coding->charbuf;
 266   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 267   unsigned char *dst = coding->destination + coding->produced;
 268   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 269   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 270   int produced_chars = 0;
 271
 272   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 273     {
 274       int c = *charbuf;
 275       /* Encode C into DST, and increment DST.  */
 276     }
 277  label_no_more_destination:
 278   /* How many chars and bytes we produced.  */
 279   coding->produced_char += produced_chars;
 280   coding->produced = dst - coding->destination;
 281 }
 282 #endif
 283
 284 \f
 285 /*** 1. Preamble ***/
 286
 287 #include <config.h>
 288 #include <stdio.h>
 289 #include <setjmp.h>
 290
 291 #include "lisp.h"
 292 #include "buffer.h"
 293 #include "character.h"
 294 #include "charset.h"
 295 #include "ccl.h"
 296 #include "composite.h"
 297 #include "coding.h"
 298 #include "window.h"
 299 #include "frame.h"
 300 #include "termhooks.h"
 301
 302 Lisp_Object Vcoding_system_hash_table;
 303
 304 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
 305 Lisp_Object Qunix, Qdos;
 306 extern Lisp_Object Qmac;        /* frame.c */
 307 Lisp_Object Qbuffer_file_coding_system;
 308 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 309 Lisp_Object Qdefault_char;
 310 Lisp_Object Qno_conversion, Qundecided;
 311 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
 312 Lisp_Object Qbig, Qlittle;
 313 Lisp_Object Qcoding_system_history;
 314 Lisp_Object Qvalid_codes;
 315 Lisp_Object QCcategory, QCmnemonic, QCdefault_char;
 316 Lisp_Object QCdecode_translation_table, QCencode_translation_table;
 317 Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
 318 Lisp_Object QCascii_compatible_p;
 319
 320 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 321 Lisp_Object Qcall_process, Qcall_process_region;
 322 Lisp_Object Qstart_process, Qopen_network_stream;
 323 Lisp_Object Qtarget_idx;
 324
 325 Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
 326 Lisp_Object Qinterrupted, Qinsufficient_memory;
 327
 328 extern Lisp_Object Qcompletion_ignore_case;
 329
 330 /* If a symbol has this property, evaluate the value to define the
 331    symbol as a coding system.  */
 332 static Lisp_Object Qcoding_system_define_form;
 333
 334 int coding_system_require_warning;
 335
 336 Lisp_Object Vselect_safe_coding_system_function;
 337
 338 /* Mnemonic string for each format of end-of-line.  */
 339 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 340 /* Mnemonic string to indicate format of end-of-line is not yet
 341    decided.  */
 342 Lisp_Object eol_mnemonic_undecided;
 343
 344 /* Format of end-of-line decided by system.  This is Qunix on
 345    Unix and Mac, Qdos on DOS/Windows.
 346    This has an effect only for external encoding (i.e. for output to
 347    file and process), not for in-buffer or Lisp string encoding.  */
 348 static Lisp_Object system_eol_type;
 349
 350 #ifdef emacs
 351
 352 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 353
 354 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 355
 356 /* Coding system emacs-mule and raw-text are for converting only
 357    end-of-line format.  */
 358 Lisp_Object Qemacs_mule, Qraw_text;
 359 Lisp_Object Qutf_8_emacs;
 360
 361 /* Coding-systems are handed between Emacs Lisp programs and C internal
 362    routines by the following three variables.  */
 363 /* Coding-system for reading files and receiving data from process.  */
 364 Lisp_Object Vcoding_system_for_read;
 365 /* Coding-system for writing files and sending data to process.  */
 366 Lisp_Object Vcoding_system_for_write;
 367 /* Coding-system actually used in the latest I/O.  */
 368 Lisp_Object Vlast_coding_system_used;
 369 /* Set to non-nil when an error is detected while code conversion.  */
 370 Lisp_Object Vlast_code_conversion_error;
 371 /* A vector of length 256 which contains information about special
 372    Latin codes (especially for dealing with Microsoft codes).  */
 373 Lisp_Object Vlatin_extra_code_table;
 374
 375 /* Flag to inhibit code conversion of end-of-line format.  */
 376 int inhibit_eol_conversion;
 377
 378 /* Flag to inhibit ISO2022 escape sequence detection.  */
 379 int inhibit_iso_escape_detection;
 380
 381 /* Flag to inhibit detection of binary files through null bytes.  */
 382 int inhibit_null_byte_detection;
 383
 384 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 385 int inherit_process_coding_system;
 386
 387 /* Coding system to be used to encode text for terminal display when
 388    terminal coding system is nil.  */
 389 struct coding_system safe_terminal_coding;
 390
 391 Lisp_Object Vfile_coding_system_alist;
 392 Lisp_Object Vprocess_coding_system_alist;
 393 Lisp_Object Vnetwork_coding_system_alist;
 394
 395 Lisp_Object Vlocale_coding_system;
 396
 397 #endif /* emacs */
 398
 399 /* Flag to tell if we look up translation table on character code
 400    conversion.  */
 401 Lisp_Object Venable_character_translation;
 402 /* Standard translation table to look up on decoding (reading).  */
 403 Lisp_Object Vstandard_translation_table_for_decode;
 404 /* Standard translation table to look up on encoding (writing).  */
 405 Lisp_Object Vstandard_translation_table_for_encode;
 406
 407 Lisp_Object Qtranslation_table;
 408 Lisp_Object Qtranslation_table_id;
 409 Lisp_Object Qtranslation_table_for_decode;
 410 Lisp_Object Qtranslation_table_for_encode;
 411
 412 /* Alist of charsets vs revision number.  */
 413 static Lisp_Object Vcharset_revision_table;
 414
 415 /* Default coding systems used for process I/O.  */
 416 Lisp_Object Vdefault_process_coding_system;
 417
 418 /* Char table for translating Quail and self-inserting input.  */
 419 Lisp_Object Vtranslation_table_for_input;
 420
 421 /* Two special coding systems.  */
 422 Lisp_Object Vsjis_coding_system;
 423 Lisp_Object Vbig5_coding_system;
 424
 425 /* ISO2022 section */
 426
 427 #define CODING_ISO_INITIAL(coding, reg)                 \
 428   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 429                      coding_attr_iso_initial),          \
 430                reg)))
 431
 432
 433 #define CODING_ISO_REQUEST(coding, charset_id)          \
 434   (((charset_id) <= (coding)->max_charset_id            \
 435     ? ((coding)->safe_charsets[charset_id] != 255       \
 436        ? (coding)->safe_charsets[charset_id]            \
 437        : -1)                                            \
 438     : -1))
 439
 440
 441 #define CODING_ISO_FLAGS(coding)        \
 442   ((coding)->spec.iso_2022.flags)
 443 #define CODING_ISO_DESIGNATION(coding, reg)     \
 444   ((coding)->spec.iso_2022.current_designation[reg])
 445 #define CODING_ISO_INVOCATION(coding, plane)    \
 446   ((coding)->spec.iso_2022.current_invocation[plane])
 447 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 448   ((coding)->spec.iso_2022.single_shifting)
 449 #define CODING_ISO_BOL(coding)  \
 450   ((coding)->spec.iso_2022.bol)
 451 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 452   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 453 #define CODING_ISO_CMP_STATUS(coding)   \
 454   (&(coding)->spec.iso_2022.cmp_status)
 455 #define CODING_ISO_EXTSEGMENT_LEN(coding)       \
 456   ((coding)->spec.iso_2022.ctext_extended_segment_len)
 457 #define CODING_ISO_EMBEDDED_UTF_8(coding)       \
 458   ((coding)->spec.iso_2022.embedded_utf_8)
 459
 460 /* Control characters of ISO2022.  */
 461                         /* code */      /* function */
 462 #define ISO_CODE_LF     0x0A            /* line-feed */
 463 #define ISO_CODE_CR     0x0D            /* carriage-return */
 464 #define ISO_CODE_SO     0x0E            /* shift-out */
 465 #define ISO_CODE_SI     0x0F            /* shift-in */
 466 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 467 #define ISO_CODE_ESC    0x1B            /* escape */
 468 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 469 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 470 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 471
 472 /* All code (1-byte) of ISO2022 is classified into one of the
 473    followings.  */
 474 enum iso_code_class_type
 475   {
 476     ISO_control_0,              /* Control codes in the range
 477                                    0x00..0x1F and 0x7F, except for the
 478                                    following 5 codes.  */
 479     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 480     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 481     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 482     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 483     ISO_control_1,              /* Control codes in the range
 484                                    0x80..0x9F, except for the
 485                                    following 3 codes.  */
 486     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 487     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 488     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 489     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 490     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 491     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 492     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 493   };
 494
 495 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 496     `iso-flags' attribute of an iso2022 coding system.  */
 497
 498 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 499    instead of the correct short-form sequence (e.g. ESC $ A).  */
 500 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 501
 502 /* If set, reset graphic planes and registers at end-of-line to the
 503    initial state.  */
 504 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 505
 506 /* If set, reset graphic planes and registers before any control
 507    characters to the initial state.  */
 508 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 509
 510 /* If set, encode by 7-bit environment.  */
 511 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 512
 513 /* If set, use locking-shift function.  */
 514 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 515
 516 /* If set, use single-shift function.  Overwrite
 517    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 518 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 519
 520 /* If set, use designation escape sequence.  */
 521 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 522
 523 /* If set, produce revision number sequence.  */
 524 #define CODING_ISO_FLAG_REVISION        0x0080
 525
 526 /* If set, produce ISO6429's direction specifying sequence.  */
 527 #define CODING_ISO_FLAG_DIRECTION       0x0100
 528
 529 /* If set, assume designation states are reset at beginning of line on
 530    output.  */
 531 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 532
 533 /* If set, designation sequence should be placed at beginning of line
 534    on output.  */
 535 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 536
 537 /* If set, do not encode unsafe charactes on output.  */
 538 #define CODING_ISO_FLAG_SAFE            0x0800
 539
 540 /* If set, extra latin codes (128..159) are accepted as a valid code
 541    on input.  */
 542 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 543
 544 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 545
 546 #define CODING_ISO_FLAG_EUC_TW_SHIFT    0x4000
 547
 548 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 549
 550 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 551
 552 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 553
 554 /* A character to be produced on output if encoding of the original
 555    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 556 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 557
 558 /* UTF-8 section */
 559 #define CODING_UTF_8_BOM(coding)        \
 560   ((coding)->spec.utf_8_bom)
 561
 562 /* UTF-16 section */
 563 #define CODING_UTF_16_BOM(coding)       \
 564   ((coding)->spec.utf_16.bom)
 565
 566 #define CODING_UTF_16_ENDIAN(coding)    \
 567   ((coding)->spec.utf_16.endian)
 568
 569 #define CODING_UTF_16_SURROGATE(coding) \
 570   ((coding)->spec.utf_16.surrogate)
 571
 572
 573 /* CCL section */
 574 #define CODING_CCL_DECODER(coding)      \
 575   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 576 #define CODING_CCL_ENCODER(coding)      \
 577   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 578 #define CODING_CCL_VALIDS(coding)                                          \
 579   (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
 580
 581 /* Index for each coding category in `coding_categories' */
 582
 583 enum coding_category
 584   {
 585     coding_category_iso_7,
 586     coding_category_iso_7_tight,
 587     coding_category_iso_8_1,
 588     coding_category_iso_8_2,
 589     coding_category_iso_7_else,
 590     coding_category_iso_8_else,
 591     coding_category_utf_8_auto,
 592     coding_category_utf_8_nosig,
 593     coding_category_utf_8_sig,
 594     coding_category_utf_16_auto,
 595     coding_category_utf_16_be,
 596     coding_category_utf_16_le,
 597     coding_category_utf_16_be_nosig,
 598     coding_category_utf_16_le_nosig,
 599     coding_category_charset,
 600     coding_category_sjis,
 601     coding_category_big5,
 602     coding_category_ccl,
 603     coding_category_emacs_mule,
 604     /* All above are targets of code detection.  */
 605     coding_category_raw_text,
 606     coding_category_undecided,
 607     coding_category_max
 608   };
 609
 610 /* Definitions of flag bits used in detect_coding_XXXX.  */
 611 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 612 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 613 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 614 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 615 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 616 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 617 #define CATEGORY_MASK_UTF_8_AUTO        (1 << coding_category_utf_8_auto)
 618 #define CATEGORY_MASK_UTF_8_NOSIG       (1 << coding_category_utf_8_nosig)
 619 #define CATEGORY_MASK_UTF_8_SIG         (1 << coding_category_utf_8_sig)
 620 #define CATEGORY_MASK_UTF_16_AUTO       (1 << coding_category_utf_16_auto)
 621 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 622 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 623 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 624 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 625 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 626 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 627 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 628 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 629 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 630 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 631
 632 /* This value is returned if detect_coding_mask () find nothing other
 633    than ASCII characters.  */
 634 #define CATEGORY_MASK_ANY               \
 635   (CATEGORY_MASK_ISO_7                  \
 636    | CATEGORY_MASK_ISO_7_TIGHT          \
 637    | CATEGORY_MASK_ISO_8_1              \
 638    | CATEGORY_MASK_ISO_8_2              \
 639    | CATEGORY_MASK_ISO_7_ELSE           \
 640    | CATEGORY_MASK_ISO_8_ELSE           \
 641    | CATEGORY_MASK_UTF_8_AUTO           \
 642    | CATEGORY_MASK_UTF_8_NOSIG          \
 643    | CATEGORY_MASK_UTF_8_SIG            \
 644    | CATEGORY_MASK_UTF_16_AUTO          \
 645    | CATEGORY_MASK_UTF_16_BE            \
 646    | CATEGORY_MASK_UTF_16_LE            \
 647    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 648    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 649    | CATEGORY_MASK_CHARSET              \
 650    | CATEGORY_MASK_SJIS                 \
 651    | CATEGORY_MASK_BIG5                 \
 652    | CATEGORY_MASK_CCL                  \
 653    | CATEGORY_MASK_EMACS_MULE)
 654
 655
 656 #define CATEGORY_MASK_ISO_7BIT \
 657   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 658
 659 #define CATEGORY_MASK_ISO_8BIT \
 660   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 661
 662 #define CATEGORY_MASK_ISO_ELSE \
 663   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 664
 665 #define CATEGORY_MASK_ISO_ESCAPE        \
 666   (CATEGORY_MASK_ISO_7                  \
 667    | CATEGORY_MASK_ISO_7_TIGHT          \
 668    | CATEGORY_MASK_ISO_7_ELSE           \
 669    | CATEGORY_MASK_ISO_8_ELSE)
 670
 671 #define CATEGORY_MASK_ISO       \
 672   (  CATEGORY_MASK_ISO_7BIT     \
 673      | CATEGORY_MASK_ISO_8BIT   \
 674      | CATEGORY_MASK_ISO_ELSE)
 675
 676 #define CATEGORY_MASK_UTF_16            \
 677   (CATEGORY_MASK_UTF_16_AUTO            \
 678    | CATEGORY_MASK_UTF_16_BE            \
 679    | CATEGORY_MASK_UTF_16_LE            \
 680    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 681    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 682
 683 #define CATEGORY_MASK_UTF_8     \
 684   (CATEGORY_MASK_UTF_8_AUTO     \
 685    | CATEGORY_MASK_UTF_8_NOSIG  \
 686    | CATEGORY_MASK_UTF_8_SIG)
 687
 688 /* List of symbols `coding-category-xxx' ordered by priority.  This
 689    variable is exposed to Emacs Lisp.  */
 690 static Lisp_Object Vcoding_category_list;
 691
 692 /* Table of coding categories (Lisp symbols).  This variable is for
 693    internal use oly.  */
 694 static Lisp_Object Vcoding_category_table;
 695
 696 /* Table of coding-categories ordered by priority.  */
 697 static enum coding_category coding_priorities[coding_category_max];
 698
 699 /* Nth element is a coding context for the coding system bound to the
 700    Nth coding category.  */
 701 static struct coding_system coding_categories[coding_category_max];
 702
 703 /*** Commonly used macros and functions ***/
 704
 705 #ifndef min
 706 #define min(a, b) ((a) < (b) ? (a) : (b))
 707 #endif
 708 #ifndef max
 709 #define max(a, b) ((a) > (b) ? (a) : (b))
 710 #endif
 711
 712 #define CODING_GET_INFO(coding, attrs, charset_list)    \
 713   do {                                                  \
 714     (attrs) = CODING_ID_ATTRS ((coding)->id);           \
 715     (charset_list) = CODING_ATTR_CHARSET_LIST (attrs);  \
 716   } while (0)
 717
 718
 719 /* Safely get one byte from the source text pointed by SRC which ends
 720    at SRC_END, and set C to that byte.  If there are not enough bytes
 721    in the source, it jumps to `no_more_source'.  If multibytep is
 722    nonzero, and a multibyte character is found at SRC, set C to the
 723    negative value of the character code.  The caller should declare
 724    and set these variables appropriately in advance:
 725         src, src_end, multibytep */
 726
 727 #define ONE_MORE_BYTE(c)                                \
 728   do {                                                  \
 729     if (src == src_end)                                 \
 730       {                                                 \
 731         if (src_base < src)                             \
 732           record_conversion_result                      \
 733             (coding, CODING_RESULT_INSUFFICIENT_SRC);   \
 734         goto no_more_source;                            \
 735       }                                                 \
 736     c = *src++;                                         \
 737     if (multibytep && (c & 0x80))                       \
 738       {                                                 \
 739         if ((c & 0xFE) == 0xC0)                         \
 740           c = ((c & 1) << 6) | *src++;                  \
 741         else                                            \
 742           {                                             \
 743             src--;                                      \
 744             c = - string_char (src, &src, NULL);        \
 745             record_conversion_result                    \
 746               (coding, CODING_RESULT_INVALID_SRC);      \
 747           }                                             \
 748       }                                                 \
 749     consumed_chars++;                                   \
 750   } while (0)
 751
 752 /* Safely get two bytes from the source text pointed by SRC which ends
 753    at SRC_END, and set C1 and C2 to those bytes while skipping the
 754    heading multibyte characters.  If there are not enough bytes in the
 755    source, it jumps to `no_more_source'.  If multibytep is nonzero and
 756    a multibyte character is found for C2, set C2 to the negative value
 757    of the character code.  The caller should declare and set these
 758    variables appropriately in advance:
 759         src, src_end, multibytep
 760    It is intended that this macro is used in detect_coding_utf_16.  */
 761
 762 #define TWO_MORE_BYTES(c1, c2)                          \
 763   do {                                                  \
 764     do {                                                \
 765       if (src == src_end)                               \
 766         goto no_more_source;                            \
 767       c1 = *src++;                                      \
 768       if (multibytep && (c1 & 0x80))                    \
 769         {                                               \
 770           if ((c1 & 0xFE) == 0xC0)                      \
 771             c1 = ((c1 & 1) << 6) | *src++;              \
 772           else                                          \
 773             {                                           \
 774               src += BYTES_BY_CHAR_HEAD (c1) - 1;       \
 775               c1 = -1;                                  \
 776             }                                           \
 777         }                                               \
 778     } while (c1 < 0);                                   \
 779     if (src == src_end)                                 \
 780       goto no_more_source;                              \
 781     c2 = *src++;                                        \
 782     if (multibytep && (c2 & 0x80))                      \
 783       {                                                 \
 784         if ((c2 & 0xFE) == 0xC0)                        \
 785           c2 = ((c2 & 1) << 6) | *src++;                \
 786         else                                            \
 787           c2 = -1;                                      \
 788       }                                                 \
 789   } while (0)
 790
 791
 792 #define ONE_MORE_BYTE_NO_CHECK(c)                       \
 793   do {                                                  \
 794     c = *src++;                                         \
 795     if (multibytep && (c & 0x80))                       \
 796       {                                                 \
 797         if ((c & 0xFE) == 0xC0)                         \
 798           c = ((c & 1) << 6) | *src++;                  \
 799         else                                            \
 800           {                                             \
 801             src--;                                      \
 802             c = - string_char (src, &src, NULL);        \
 803             record_conversion_result                    \
 804               (coding, CODING_RESULT_INVALID_SRC);      \
 805           }                                             \
 806       }                                                 \
 807     consumed_chars++;                                   \
 808   } while (0)
 809
 810
 811 /* Store a byte C in the place pointed by DST and increment DST to the
 812    next free point, and increment PRODUCED_CHARS.  The caller should
 813    assure that C is 0..127, and declare and set the variable `dst'
 814    appropriately in advance.
 815 */
 816
 817
 818 #define EMIT_ONE_ASCII_BYTE(c)  \
 819   do {                          \
 820     produced_chars++;           \
 821     *dst++ = (c);               \
 822   } while (0)
 823
 824
 825 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2.  */
 826
 827 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 828   do {                                  \
 829     produced_chars += 2;                \
 830     *dst++ = (c1), *dst++ = (c2);       \
 831   } while (0)
 832
 833
 834 /* Store a byte C in the place pointed by DST and increment DST to the
 835    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 836    nonzero, store in an appropriate multibyte from.  The caller should
 837    declare and set the variables `dst' and `multibytep' appropriately
 838    in advance.  */
 839
 840 #define EMIT_ONE_BYTE(c)                \
 841   do {                                  \
 842     produced_chars++;                   \
 843     if (multibytep)                     \
 844       {                                 \
 845         int ch = (c);                   \
 846         if (ch >= 0x80)                 \
 847           ch = BYTE8_TO_CHAR (ch);      \
 848         CHAR_STRING_ADVANCE (ch, dst);  \
 849       }                                 \
 850     else                                \
 851       *dst++ = (c);                     \
 852   } while (0)
 853
 854
 855 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 856
 857 #define EMIT_TWO_BYTES(c1, c2)          \
 858   do {                                  \
 859     produced_chars += 2;                \
 860     if (multibytep)                     \
 861       {                                 \
 862         int ch;                         \
 863                                         \
 864         ch = (c1);                      \
 865         if (ch >= 0x80)                 \
 866           ch = BYTE8_TO_CHAR (ch);      \
 867         CHAR_STRING_ADVANCE (ch, dst);  \
 868         ch = (c2);                      \
 869         if (ch >= 0x80)                 \
 870           ch = BYTE8_TO_CHAR (ch);      \
 871         CHAR_STRING_ADVANCE (ch, dst);  \
 872       }                                 \
 873     else                                \
 874       {                                 \
 875         *dst++ = (c1);                  \
 876         *dst++ = (c2);                  \
 877       }                                 \
 878   } while (0)
 879
 880
 881 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 882   do {                                  \
 883     EMIT_ONE_BYTE (c1);                 \
 884     EMIT_TWO_BYTES (c2, c3);            \
 885   } while (0)
 886
 887
 888 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 889   do {                                          \
 890     EMIT_TWO_BYTES (c1, c2);                    \
 891     EMIT_TWO_BYTES (c3, c4);                    \
 892   } while (0)
 893
 894
 895 /* Prototypes for static functions.  */
 896 static void record_conversion_result (struct coding_system *coding,
 897                                       enum coding_result_code result);
 898 static int detect_coding_utf_8 (struct coding_system *,
 899                                 struct coding_detection_info *info);
 900 static void decode_coding_utf_8 (struct coding_system *);
 901 static int encode_coding_utf_8 (struct coding_system *);
 902
 903 static int detect_coding_utf_16 (struct coding_system *,
 904                                  struct coding_detection_info *info);
 905 static void decode_coding_utf_16 (struct coding_system *);
 906 static int encode_coding_utf_16 (struct coding_system *);
 907
 908 static int detect_coding_iso_2022 (struct coding_system *,
 909                                    struct coding_detection_info *info);
 910 static void decode_coding_iso_2022 (struct coding_system *);
 911 static int encode_coding_iso_2022 (struct coding_system *);
 912
 913 static int detect_coding_emacs_mule (struct coding_system *,
 914                                      struct coding_detection_info *info);
 915 static void decode_coding_emacs_mule (struct coding_system *);
 916 static int encode_coding_emacs_mule (struct coding_system *);
 917
 918 static int detect_coding_sjis (struct coding_system *,
 919                                struct coding_detection_info *info);
 920 static void decode_coding_sjis (struct coding_system *);
 921 static int encode_coding_sjis (struct coding_system *);
 922
 923 static int detect_coding_big5 (struct coding_system *,
 924                                struct coding_detection_info *info);
 925 static void decode_coding_big5 (struct coding_system *);
 926 static int encode_coding_big5 (struct coding_system *);
 927
 928 static int detect_coding_ccl (struct coding_system *,
 929                               struct coding_detection_info *info);
 930 static void decode_coding_ccl (struct coding_system *);
 931 static int encode_coding_ccl (struct coding_system *);
 932
 933 static void decode_coding_raw_text (struct coding_system *);
 934 static int encode_coding_raw_text (struct coding_system *);
 935
 936 static void coding_set_source (struct coding_system *);
 937 static void coding_set_destination (struct coding_system *);
 938 static void coding_alloc_by_realloc (struct coding_system *, EMACS_INT);
 939 static void coding_alloc_by_making_gap (struct coding_system *,
 940                                         EMACS_INT, EMACS_INT);
 941 static unsigned char *alloc_destination (struct coding_system *,
 942                                          EMACS_INT, unsigned char *);
 943 static void setup_iso_safe_charsets (Lisp_Object);
 944 static unsigned char *encode_designation_at_bol (struct coding_system *,
 945                                                  int *, int *,
 946                                                  unsigned char *);
 947 static int detect_eol (const unsigned char *,
 948                        EMACS_INT, enum coding_category);
 949 static Lisp_Object adjust_coding_eol_type (struct coding_system *, int);
 950 static void decode_eol (struct coding_system *);
 951 static Lisp_Object get_translation_table (Lisp_Object, int, int *);
 952 static Lisp_Object get_translation (Lisp_Object, int *, int *);
 953 static int produce_chars (struct coding_system *, Lisp_Object, int);
 954 static INLINE void produce_charset (struct coding_system *, int *,
 955                                     EMACS_INT);
 956 static void produce_annotation (struct coding_system *, EMACS_INT);
 957 static int decode_coding (struct coding_system *);
 958 static INLINE int *handle_composition_annotation (EMACS_INT, EMACS_INT,
 959                                                   struct coding_system *,
 960                                                   int *, EMACS_INT *);
 961 static INLINE int *handle_charset_annotation (EMACS_INT, EMACS_INT,
 962                                               struct coding_system *,
 963                                               int *, EMACS_INT *);
 964 static void consume_chars (struct coding_system *, Lisp_Object, int);
 965 static int encode_coding (struct coding_system *);
 966 static Lisp_Object make_conversion_work_buffer (int);
 967 static Lisp_Object code_conversion_restore (Lisp_Object);
 968 static INLINE int char_encodable_p (int, Lisp_Object);
 969 static Lisp_Object make_subsidiaries (Lisp_Object);
 970
 971 static void
 972 record_conversion_result (struct coding_system *coding,
 973                           enum coding_result_code result)
 974 {
 975   coding->result = result;
 976   switch (result)
 977     {
 978     case CODING_RESULT_INSUFFICIENT_SRC:
 979       Vlast_code_conversion_error = Qinsufficient_source;
 980       break;
 981     case CODING_RESULT_INCONSISTENT_EOL:
 982       Vlast_code_conversion_error = Qinconsistent_eol;
 983       break;
 984     case CODING_RESULT_INVALID_SRC:
 985       Vlast_code_conversion_error = Qinvalid_source;
 986       break;
 987     case CODING_RESULT_INTERRUPT:
 988       Vlast_code_conversion_error = Qinterrupted;
 989       break;
 990     case CODING_RESULT_INSUFFICIENT_MEM:
 991       Vlast_code_conversion_error = Qinsufficient_memory;
 992       break;
 993     case CODING_RESULT_INSUFFICIENT_DST:
 994       /* Don't record this error in Vlast_code_conversion_error
 995          because it happens just temporarily and is resolved when the
 996          whole conversion is finished.  */
 997       break;
 998     case CODING_RESULT_SUCCESS:
 999       break;
1000     default:
1001       Vlast_code_conversion_error = intern ("Unknown error");
1002     }
1003 }
1004
1005 /* This wrapper macro is used to preserve validity of pointers into
1006    buffer text across calls to decode_char, which could cause
1007    relocation of buffers if it loads a charset map, because loading a
1008    charset map allocates large structures.  */
1009 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
1010   do {                                                                       \
1011     charset_map_loaded = 0;                                                  \
1012     c = DECODE_CHAR (charset, code);                                         \
1013     if (charset_map_loaded)                                                  \
1014       {                                                                      \
1015         const unsigned char *orig = coding->source;                          \
1016         EMACS_INT offset;                                                    \
1017                                                                              \
1018         coding_set_source (coding);                                          \
1019         offset = coding->source - orig;                                      \
1020         src += offset;                                                       \
1021         src_base += offset;                                                  \
1022         src_end += offset;                                                   \
1023       }                                                                      \
1024   } while (0)
1025
1026
1027 /* If there are at least BYTES length of room at dst, allocate memory
1028    for coding->destination and update dst and dst_end.  We don't have
1029    to take care of coding->source which will be relocated.  It is
1030    handled by calling coding_set_source in encode_coding.  */
1031
1032 #define ASSURE_DESTINATION(bytes)                               \
1033   do {                                                          \
1034     if (dst + (bytes) >= dst_end)                               \
1035       {                                                         \
1036         int more_bytes = charbuf_end - charbuf + (bytes);       \
1037                                                                 \
1038         dst = alloc_destination (coding, more_bytes, dst);      \
1039         dst_end = coding->destination + coding->dst_bytes;      \
1040       }                                                         \
1041   } while (0)
1042
1043
1044 /* Store multibyte form of the character C in P, and advance P to the
1045    end of the multibyte form.  This is like CHAR_STRING_ADVANCE but it
1046    never calls MAYBE_UNIFY_CHAR.  */
1047
1048 #define CHAR_STRING_ADVANCE_NO_UNIFY(c, p)      \
1049   do {                                          \
1050     if ((c) <= MAX_1_BYTE_CHAR)                 \
1051       *(p)++ = (c);                             \
1052     else if ((c) <= MAX_2_BYTE_CHAR)            \
1053       *(p)++ = (0xC0 | ((c) >> 6)),             \
1054         *(p)++ = (0x80 | ((c) & 0x3F));         \
1055     else if ((c) <= MAX_3_BYTE_CHAR)            \
1056       *(p)++ = (0xE0 | ((c) >> 12)),            \
1057         *(p)++ = (0x80 | (((c) >> 6) & 0x3F)),  \
1058         *(p)++ = (0x80 | ((c) & 0x3F));         \
1059     else if ((c) <= MAX_4_BYTE_CHAR)            \
1060       *(p)++ = (0xF0 | (c >> 18)),              \
1061         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1062         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1063         *(p)++ = (0x80 | (c & 0x3F));           \
1064     else if ((c) <= MAX_5_BYTE_CHAR)            \
1065       *(p)++ = 0xF8,                            \
1066         *(p)++ = (0x80 | ((c >> 18) & 0x0F)),   \
1067         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1068         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1069         *(p)++ = (0x80 | (c & 0x3F));           \
1070     else                                        \
1071       (p) += BYTE8_STRING ((c) - 0x3FFF80, p);  \
1072   } while (0)
1073
1074
1075 /* Return the character code of character whose multibyte form is at
1076    P, and advance P to the end of the multibyte form.  This is like
1077    STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR.  */
1078
1079 #define STRING_CHAR_ADVANCE_NO_UNIFY(p)                         \
1080   (!((p)[0] & 0x80)                                             \
1081    ? *(p)++                                                     \
1082    : ! ((p)[0] & 0x20)                                          \
1083    ? ((p) += 2,                                                 \
1084       ((((p)[-2] & 0x1F) << 6)                                  \
1085        | ((p)[-1] & 0x3F)                                       \
1086        | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))    \
1087    : ! ((p)[0] & 0x10)                                          \
1088    ? ((p) += 3,                                                 \
1089       ((((p)[-3] & 0x0F) << 12)                                 \
1090        | (((p)[-2] & 0x3F) << 6)                                \
1091        | ((p)[-1] & 0x3F)))                                     \
1092    : ! ((p)[0] & 0x08)                                          \
1093    ? ((p) += 4,                                                 \
1094       ((((p)[-4] & 0xF) << 18)                                  \
1095        | (((p)[-3] & 0x3F) << 12)                               \
1096        | (((p)[-2] & 0x3F) << 6)                                \
1097        | ((p)[-1] & 0x3F)))                                     \
1098    : ((p) += 5,                                                 \
1099       ((((p)[-4] & 0x3F) << 18)                                 \
1100        | (((p)[-3] & 0x3F) << 12)                               \
1101        | (((p)[-2] & 0x3F) << 6)                                \
1102        | ((p)[-1] & 0x3F))))
1103
1104
1105 static void
1106 coding_set_source (struct coding_system *coding)
1107 {
1108   if (BUFFERP (coding->src_object))
1109     {
1110       struct buffer *buf = XBUFFER (coding->src_object);
1111
1112       if (coding->src_pos < 0)
1113         coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
1114       else
1115         coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
1116     }
1117   else if (STRINGP (coding->src_object))
1118     {
1119       coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
1120     }
1121   else
1122     /* Otherwise, the source is C string and is never relocated
1123        automatically.  Thus we don't have to update anything.  */
1124     ;
1125 }
1126
1127 static void
1128 coding_set_destination (struct coding_system *coding)
1129 {
1130   if (BUFFERP (coding->dst_object))
1131     {
1132       if (coding->src_pos < 0)
1133         {
1134           coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
1135           coding->dst_bytes = (GAP_END_ADDR
1136                                - (coding->src_bytes - coding->consumed)
1137                                - coding->destination);
1138         }
1139       else
1140         {
1141           /* We are sure that coding->dst_pos_byte is before the gap
1142              of the buffer. */
1143           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
1144                                  + coding->dst_pos_byte - BEG_BYTE);
1145           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
1146                                - coding->destination);
1147         }
1148     }
1149   else
1150     /* Otherwise, the destination is C string and is never relocated
1151        automatically.  Thus we don't have to update anything.  */
1152     ;
1153 }
1154
1155
1156 static void
1157 coding_alloc_by_realloc (struct coding_system *coding, EMACS_INT bytes)
1158 {
1159   coding->destination = (unsigned char *) xrealloc (coding->destination,
1160                                                     coding->dst_bytes + bytes);
1161   coding->dst_bytes += bytes;
1162 }
1163
1164 static void
1165 coding_alloc_by_making_gap (struct coding_system *coding,
1166                             EMACS_INT gap_head_used, EMACS_INT bytes)
1167 {
1168   if (EQ (coding->src_object, coding->dst_object))
1169     {
1170       /* The gap may contain the produced data at the head and not-yet
1171          consumed data at the tail.  To preserve those data, we at
1172          first make the gap size to zero, then increase the gap
1173          size.  */
1174       EMACS_INT add = GAP_SIZE;
1175
1176       GPT += gap_head_used, GPT_BYTE += gap_head_used;
1177       GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
1178       make_gap (bytes);
1179       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
1180       GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
1181     }
1182   else
1183     {
1184       Lisp_Object this_buffer;
1185
1186       this_buffer = Fcurrent_buffer ();
1187       set_buffer_internal (XBUFFER (coding->dst_object));
1188       make_gap (bytes);
1189       set_buffer_internal (XBUFFER (this_buffer));
1190     }
1191 }
1192
1193
1194 static unsigned char *
1195 alloc_destination (struct coding_system *coding, EMACS_INT nbytes,
1196                    unsigned char *dst)
1197 {
1198   EMACS_INT offset = dst - coding->destination;
1199
1200   if (BUFFERP (coding->dst_object))
1201     {
1202       struct buffer *buf = XBUFFER (coding->dst_object);
1203
1204       coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
1205     }
1206   else
1207     coding_alloc_by_realloc (coding, nbytes);
1208   coding_set_destination (coding);
1209   dst = coding->destination + offset;
1210   return dst;
1211 }
1212
1213 /** Macros for annotations.  */
1214
1215 /* An annotation data is stored in the array coding->charbuf in this
1216    format:
1217      [ -LENGTH ANNOTATION_MASK NCHARS ... ]
1218    LENGTH is the number of elements in the annotation.
1219    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1220    NCHARS is the number of characters in the text annotated.
1221
1222    The format of the following elements depend on ANNOTATION_MASK.
1223
1224    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1225    follows:
1226      ... NBYTES METHOD [ COMPOSITION-COMPONENTS ... ]
1227
1228    NBYTES is the number of bytes specified in the header part of
1229    old-style emacs-mule encoding, or 0 for the other kind of
1230    composition.
1231
1232    METHOD is one of enum composition_method.
1233
1234    Optionnal COMPOSITION-COMPONENTS are characters and composition
1235    rules.
1236
1237    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1238    follows.
1239
1240    If ANNOTATION_MASK is 0, this annotation is just a space holder to
1241    recover from an invalid annotation, and should be skipped by
1242    produce_annotation.  */
1243
1244 /* Maximum length of the header of annotation data.  */
1245 #define MAX_ANNOTATION_LENGTH 5
1246
1247 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars)     \
1248   do {                                                  \
1249     *(buf)++ = -(len);                                  \
1250     *(buf)++ = (mask);                                  \
1251     *(buf)++ = (nchars);                                \
1252     coding->annotated = 1;                              \
1253   } while (0);
1254
1255 #define ADD_COMPOSITION_DATA(buf, nchars, nbytes, method)                   \
1256   do {                                                                      \
1257     ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1258     *buf++ = nbytes;                                                        \
1259     *buf++ = method;                                                        \
1260   } while (0)
1261
1262
1263 #define ADD_CHARSET_DATA(buf, nchars, id)                               \
1264   do {                                                                  \
1265     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1266     *buf++ = id;                                                        \
1267   } while (0)
1268
1269 \f
1270 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1271
1272
1273
1274 \f
1275 /*** 3. UTF-8 ***/
1276
1277 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1278    Check if a text is encoded in UTF-8.  If it is, return 1, else
1279    return 0.  */
1280
1281 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1282 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1283 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1284 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1285 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1286 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1287
1288 #define UTF_BOM 0xFEFF
1289 #define UTF_8_BOM_1 0xEF
1290 #define UTF_8_BOM_2 0xBB
1291 #define UTF_8_BOM_3 0xBF
1292
1293 static int
1294 detect_coding_utf_8 (struct coding_system *coding,
1295                      struct coding_detection_info *detect_info)
1296 {
1297   const unsigned char *src = coding->source, *src_base;
1298   const unsigned char *src_end = coding->source + coding->src_bytes;
1299   int multibytep = coding->src_multibyte;
1300   int consumed_chars = 0;
1301   int bom_found = 0;
1302   int found = 0;
1303
1304   detect_info->checked |= CATEGORY_MASK_UTF_8;
1305   /* A coding system of this category is always ASCII compatible.  */
1306   src += coding->head_ascii;
1307
1308   while (1)
1309     {
1310       int c, c1, c2, c3, c4;
1311
1312       src_base = src;
1313       ONE_MORE_BYTE (c);
1314       if (c < 0 || UTF_8_1_OCTET_P (c))
1315         continue;
1316       ONE_MORE_BYTE (c1);
1317       if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
1318         break;
1319       if (UTF_8_2_OCTET_LEADING_P (c))
1320         {
1321           found = 1;
1322           continue;
1323         }
1324       ONE_MORE_BYTE (c2);
1325       if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1326         break;
1327       if (UTF_8_3_OCTET_LEADING_P (c))
1328         {
1329           found = 1;
1330           if (src_base == coding->source
1331               && c == UTF_8_BOM_1 && c1 == UTF_8_BOM_2 && c2 == UTF_8_BOM_3)
1332             bom_found = 1;
1333           continue;
1334         }
1335       ONE_MORE_BYTE (c3);
1336       if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1337         break;
1338       if (UTF_8_4_OCTET_LEADING_P (c))
1339         {
1340           found = 1;
1341           continue;
1342         }
1343       ONE_MORE_BYTE (c4);
1344       if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1345         break;
1346       if (UTF_8_5_OCTET_LEADING_P (c))
1347         {
1348           found = 1;
1349           continue;
1350         }
1351       break;
1352     }
1353   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1354   return 0;
1355
1356  no_more_source:
1357   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1358     {
1359       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1360       return 0;
1361     }
1362   if (bom_found)
1363     {
1364       /* The first character 0xFFFE doesn't necessarily mean a BOM.  */
1365       detect_info->found |= CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG;
1366     }
1367   else
1368     {
1369       detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG;
1370       if (found)
1371         detect_info->found |= CATEGORY_MASK_UTF_8_NOSIG;
1372     }
1373   return 1;
1374 }
1375
1376
1377 static void
1378 decode_coding_utf_8 (struct coding_system *coding)
1379 {
1380   const unsigned char *src = coding->source + coding->consumed;
1381   const unsigned char *src_end = coding->source + coding->src_bytes;
1382   const unsigned char *src_base;
1383   int *charbuf = coding->charbuf + coding->charbuf_used;
1384   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1385   int consumed_chars = 0, consumed_chars_base = 0;
1386   int multibytep = coding->src_multibyte;
1387   enum utf_bom_type bom = CODING_UTF_8_BOM (coding);
1388   Lisp_Object attr, charset_list;
1389   int eol_crlf =
1390     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1391   int byte_after_cr = -1;
1392
1393   CODING_GET_INFO (coding, attr, charset_list);
1394
1395   if (bom != utf_without_bom)
1396     {
1397       int c1, c2, c3;
1398
1399       src_base = src;
1400       ONE_MORE_BYTE (c1);
1401       if (! UTF_8_3_OCTET_LEADING_P (c1))
1402         src = src_base;
1403       else
1404         {
1405           ONE_MORE_BYTE (c2);
1406           if (! UTF_8_EXTRA_OCTET_P (c2))
1407             src = src_base;
1408           else
1409             {
1410               ONE_MORE_BYTE (c3);
1411               if (! UTF_8_EXTRA_OCTET_P (c3))
1412                 src = src_base;
1413               else
1414                 {
1415                   if ((c1 != UTF_8_BOM_1)
1416                       || (c2 != UTF_8_BOM_2) || (c3 != UTF_8_BOM_3))
1417                     src = src_base;
1418                   else
1419                     CODING_UTF_8_BOM (coding) = utf_without_bom;
1420                 }
1421             }
1422         }
1423     }
1424   CODING_UTF_8_BOM (coding) = utf_without_bom;
1425
1426   while (1)
1427     {
1428       int c, c1, c2, c3, c4, c5;
1429
1430       src_base = src;
1431       consumed_chars_base = consumed_chars;
1432
1433       if (charbuf >= charbuf_end)
1434         {
1435           if (byte_after_cr >= 0)
1436             src_base--;
1437           break;
1438         }
1439
1440       if (byte_after_cr >= 0)
1441         c1 = byte_after_cr, byte_after_cr = -1;
1442       else
1443         ONE_MORE_BYTE (c1);
1444       if (c1 < 0)
1445         {
1446           c = - c1;
1447         }
1448       else if (UTF_8_1_OCTET_P (c1))
1449         {
1450           if (eol_crlf && c1 == '\r')
1451             ONE_MORE_BYTE (byte_after_cr);
1452           c = c1;
1453         }
1454       else
1455         {
1456           ONE_MORE_BYTE (c2);
1457           if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1458             goto invalid_code;
1459           if (UTF_8_2_OCTET_LEADING_P (c1))
1460             {
1461               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1462               /* Reject overlong sequences here and below.  Encoders
1463                  producing them are incorrect, they can be misleading,
1464                  and they mess up read/write invariance.  */
1465               if (c < 128)
1466                 goto invalid_code;
1467             }
1468           else
1469             {
1470               ONE_MORE_BYTE (c3);
1471               if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1472                 goto invalid_code;
1473               if (UTF_8_3_OCTET_LEADING_P (c1))
1474                 {
1475                   c = (((c1 & 0xF) << 12)
1476                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1477                   if (c < 0x800
1478                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1479                     goto invalid_code;
1480                 }
1481               else
1482                 {
1483                   ONE_MORE_BYTE (c4);
1484                   if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1485                     goto invalid_code;
1486                   if (UTF_8_4_OCTET_LEADING_P (c1))
1487                     {
1488                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1489                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1490                     if (c < 0x10000)
1491                       goto invalid_code;
1492                     }
1493                   else
1494                     {
1495                       ONE_MORE_BYTE (c5);
1496                       if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
1497                         goto invalid_code;
1498                       if (UTF_8_5_OCTET_LEADING_P (c1))
1499                         {
1500                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1501                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1502                                | (c5 & 0x3F));
1503                           if ((c > MAX_CHAR) || (c < 0x200000))
1504                             goto invalid_code;
1505                         }
1506                       else
1507                         goto invalid_code;
1508                     }
1509                 }
1510             }
1511         }
1512
1513       *charbuf++ = c;
1514       continue;
1515
1516     invalid_code:
1517       src = src_base;
1518       consumed_chars = consumed_chars_base;
1519       ONE_MORE_BYTE (c);
1520       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1521       coding->errors++;
1522     }
1523
1524  no_more_source:
1525   coding->consumed_char += consumed_chars_base;
1526   coding->consumed = src_base - coding->source;
1527   coding->charbuf_used = charbuf - coding->charbuf;
1528 }
1529
1530
1531 static int
1532 encode_coding_utf_8 (struct coding_system *coding)
1533 {
1534   int multibytep = coding->dst_multibyte;
1535   int *charbuf = coding->charbuf;
1536   int *charbuf_end = charbuf + coding->charbuf_used;
1537   unsigned char *dst = coding->destination + coding->produced;
1538   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1539   int produced_chars = 0;
1540   int c;
1541
1542   if (CODING_UTF_8_BOM (coding) == utf_with_bom)
1543     {
1544       ASSURE_DESTINATION (3);
1545       EMIT_THREE_BYTES (UTF_8_BOM_1, UTF_8_BOM_2, UTF_8_BOM_3);
1546       CODING_UTF_8_BOM (coding) = utf_without_bom;
1547     }
1548
1549   if (multibytep)
1550     {
1551       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1552
1553       while (charbuf < charbuf_end)
1554         {
1555           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1556
1557           ASSURE_DESTINATION (safe_room);
1558           c = *charbuf++;
1559           if (CHAR_BYTE8_P (c))
1560             {
1561               c = CHAR_TO_BYTE8 (c);
1562               EMIT_ONE_BYTE (c);
1563             }
1564           else
1565             {
1566               CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
1567               for (p = str; p < pend; p++)
1568                 EMIT_ONE_BYTE (*p);
1569             }
1570         }
1571     }
1572   else
1573     {
1574       int safe_room = MAX_MULTIBYTE_LENGTH;
1575
1576       while (charbuf < charbuf_end)
1577         {
1578           ASSURE_DESTINATION (safe_room);
1579           c = *charbuf++;
1580           if (CHAR_BYTE8_P (c))
1581             *dst++ = CHAR_TO_BYTE8 (c);
1582           else
1583             CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
1584           produced_chars++;
1585         }
1586     }
1587   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1588   coding->produced_char += produced_chars;
1589   coding->produced = dst - coding->destination;
1590   return 0;
1591 }
1592
1593
1594 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1595    Check if a text is encoded in one of UTF-16 based coding systems.
1596    If it is, return 1, else return 0.  */
1597
1598 #define UTF_16_HIGH_SURROGATE_P(val) \
1599   (((val) & 0xFC00) == 0xD800)
1600
1601 #define UTF_16_LOW_SURROGATE_P(val) \
1602   (((val) & 0xFC00) == 0xDC00)
1603
1604 #define UTF_16_INVALID_P(val)   \
1605   (((val) == 0xFFFE)            \
1606    || ((val) == 0xFFFF)         \
1607    || UTF_16_LOW_SURROGATE_P (val))
1608
1609
1610 static int
1611 detect_coding_utf_16 (struct coding_system *coding,
1612                       struct coding_detection_info *detect_info)
1613 {
1614   const unsigned char *src = coding->source, *src_base = src;
1615   const unsigned char *src_end = coding->source + coding->src_bytes;
1616   int multibytep = coding->src_multibyte;
1617   int consumed_chars = 0;
1618   int c1, c2;
1619
1620   detect_info->checked |= CATEGORY_MASK_UTF_16;
1621   if (coding->mode & CODING_MODE_LAST_BLOCK
1622       && (coding->src_chars & 1))
1623     {
1624       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1625       return 0;
1626     }
1627
1628   TWO_MORE_BYTES (c1, c2);
1629   if ((c1 == 0xFF) && (c2 == 0xFE))
1630     {
1631       detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1632                              | CATEGORY_MASK_UTF_16_AUTO);
1633       detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1634                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1635                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1636     }
1637   else if ((c1 == 0xFE) && (c2 == 0xFF))
1638     {
1639       detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1640                              | CATEGORY_MASK_UTF_16_AUTO);
1641       detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1642                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1643                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1644     }
1645   else if (c2 < 0)
1646     {
1647       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1648       return 0;
1649     }
1650   else
1651     {
1652       /* We check the dispersion of Eth and Oth bytes where E is even and
1653          O is odd.  If both are high, we assume binary data.*/
1654       unsigned char e[256], o[256];
1655       unsigned e_num = 1, o_num = 1;
1656
1657       memset (e, 0, 256);
1658       memset (o, 0, 256);
1659       e[c1] = 1;
1660       o[c2] = 1;
1661
1662       detect_info->rejected |= (CATEGORY_MASK_UTF_16_AUTO
1663                                 |CATEGORY_MASK_UTF_16_BE
1664                                 | CATEGORY_MASK_UTF_16_LE);
1665
1666       while ((detect_info->rejected & CATEGORY_MASK_UTF_16)
1667              != CATEGORY_MASK_UTF_16)
1668         {
1669           TWO_MORE_BYTES (c1, c2);
1670           if (c2 < 0)
1671             break;
1672           if (! e[c1])
1673             {
1674               e[c1] = 1;
1675               e_num++;
1676               if (e_num >= 128)
1677                 detect_info->rejected |= CATEGORY_MASK_UTF_16_BE_NOSIG;
1678             }
1679           if (! o[c2])
1680             {
1681               o[c2] = 1;
1682               o_num++;
1683               if (o_num >= 128)
1684                 detect_info->rejected |= CATEGORY_MASK_UTF_16_LE_NOSIG;
1685             }
1686         }
1687       return 0;
1688     }
1689
1690  no_more_source:
1691   return 1;
1692 }
1693
1694 static void
1695 decode_coding_utf_16 (struct coding_system *coding)
1696 {
1697   const unsigned char *src = coding->source + coding->consumed;
1698   const unsigned char *src_end = coding->source + coding->src_bytes;
1699   const unsigned char *src_base;
1700   int *charbuf = coding->charbuf + coding->charbuf_used;
1701   /* We may produces at most 3 chars in one loop.  */
1702   int *charbuf_end = coding->charbuf + coding->charbuf_size - 2;
1703   int consumed_chars = 0, consumed_chars_base = 0;
1704   int multibytep = coding->src_multibyte;
1705   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1706   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1707   int surrogate = CODING_UTF_16_SURROGATE (coding);
1708   Lisp_Object attr, charset_list;
1709   int eol_crlf =
1710     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1711   int byte_after_cr1 = -1, byte_after_cr2 = -1;
1712
1713   CODING_GET_INFO (coding, attr, charset_list);
1714
1715   if (bom == utf_with_bom)
1716     {
1717       int c, c1, c2;
1718
1719       src_base = src;
1720       ONE_MORE_BYTE (c1);
1721       ONE_MORE_BYTE (c2);
1722       c = (c1 << 8) | c2;
1723
1724       if (endian == utf_16_big_endian
1725           ? c != 0xFEFF : c != 0xFFFE)
1726         {
1727           /* The first two bytes are not BOM.  Treat them as bytes
1728              for a normal character.  */
1729           src = src_base;
1730           coding->errors++;
1731         }
1732       CODING_UTF_16_BOM (coding) = utf_without_bom;
1733     }
1734   else if (bom == utf_detect_bom)
1735     {
1736       /* We have already tried to detect BOM and failed in
1737          detect_coding.  */
1738       CODING_UTF_16_BOM (coding) = utf_without_bom;
1739     }
1740
1741   while (1)
1742     {
1743       int c, c1, c2;
1744
1745       src_base = src;
1746       consumed_chars_base = consumed_chars;
1747
1748       if (charbuf >= charbuf_end)
1749         {
1750           if (byte_after_cr1 >= 0)
1751             src_base -= 2;
1752           break;
1753         }
1754
1755       if (byte_after_cr1 >= 0)
1756         c1 = byte_after_cr1, byte_after_cr1 = -1;
1757       else
1758         ONE_MORE_BYTE (c1);
1759       if (c1 < 0)
1760         {
1761           *charbuf++ = -c1;
1762           continue;
1763         }
1764       if (byte_after_cr2 >= 0)
1765         c2 = byte_after_cr2, byte_after_cr2 = -1;
1766       else
1767         ONE_MORE_BYTE (c2);
1768       if (c2 < 0)
1769         {
1770           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1771           *charbuf++ = -c2;
1772           continue;
1773         }
1774       c = (endian == utf_16_big_endian
1775            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1776
1777       if (surrogate)
1778         {
1779           if (! UTF_16_LOW_SURROGATE_P (c))
1780             {
1781               if (endian == utf_16_big_endian)
1782                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1783               else
1784                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1785               *charbuf++ = c1;
1786               *charbuf++ = c2;
1787               coding->errors++;
1788               if (UTF_16_HIGH_SURROGATE_P (c))
1789                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1790               else
1791                 *charbuf++ = c;
1792             }
1793           else
1794             {
1795               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1796               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1797               *charbuf++ = 0x10000 + c;
1798             }
1799         }
1800       else
1801         {
1802           if (UTF_16_HIGH_SURROGATE_P (c))
1803             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1804           else
1805             {
1806               if (eol_crlf && c == '\r')
1807                 {
1808                   ONE_MORE_BYTE (byte_after_cr1);
1809                   ONE_MORE_BYTE (byte_after_cr2);
1810                 }
1811               *charbuf++ = c;
1812             }
1813         }
1814     }
1815
1816  no_more_source:
1817   coding->consumed_char += consumed_chars_base;
1818   coding->consumed = src_base - coding->source;
1819   coding->charbuf_used = charbuf - coding->charbuf;
1820 }
1821
1822 static int
1823 encode_coding_utf_16 (struct coding_system *coding)
1824 {
1825   int multibytep = coding->dst_multibyte;
1826   int *charbuf = coding->charbuf;
1827   int *charbuf_end = charbuf + coding->charbuf_used;
1828   unsigned char *dst = coding->destination + coding->produced;
1829   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1830   int safe_room = 8;
1831   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1832   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1833   int produced_chars = 0;
1834   Lisp_Object attrs, charset_list;
1835   int c;
1836
1837   CODING_GET_INFO (coding, attrs, charset_list);
1838
1839   if (bom != utf_without_bom)
1840     {
1841       ASSURE_DESTINATION (safe_room);
1842       if (big_endian)
1843         EMIT_TWO_BYTES (0xFE, 0xFF);
1844       else
1845         EMIT_TWO_BYTES (0xFF, 0xFE);
1846       CODING_UTF_16_BOM (coding) = utf_without_bom;
1847     }
1848
1849   while (charbuf < charbuf_end)
1850     {
1851       ASSURE_DESTINATION (safe_room);
1852       c = *charbuf++;
1853       if (c > MAX_UNICODE_CHAR)
1854         c = coding->default_char;
1855
1856       if (c < 0x10000)
1857         {
1858           if (big_endian)
1859             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1860           else
1861             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1862         }
1863       else
1864         {
1865           int c1, c2;
1866
1867           c -= 0x10000;
1868           c1 = (c >> 10) + 0xD800;
1869           c2 = (c & 0x3FF) + 0xDC00;
1870           if (big_endian)
1871             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1872           else
1873             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1874         }
1875     }
1876   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1877   coding->produced = dst - coding->destination;
1878   coding->produced_char += produced_chars;
1879   return 0;
1880 }
1881
1882 \f
1883 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1884
1885 /* Emacs' internal format for representation of multiple character
1886    sets is a kind of multi-byte encoding, i.e. characters are
1887    represented by variable-length sequences of one-byte codes.
1888
1889    ASCII characters and control characters (e.g. `tab', `newline') are
1890    represented by one-byte sequences which are their ASCII codes, in
1891    the range 0x00 through 0x7F.
1892
1893    8-bit characters of the range 0x80..0x9F are represented by
1894    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1895    code + 0x20).
1896
1897    8-bit characters of the range 0xA0..0xFF are represented by
1898    one-byte sequences which are their 8-bit code.
1899
1900    The other characters are represented by a sequence of `base
1901    leading-code', optional `extended leading-code', and one or two
1902    `position-code's.  The length of the sequence is determined by the
1903    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1904    whereas extended leading-code and position-code take the range 0xA0
1905    through 0xFF.  See `charset.h' for more details about leading-code
1906    and position-code.
1907
1908    --- CODE RANGE of Emacs' internal format ---
1909    character set        range
1910    -------------        -----
1911    ascii                0x00..0x7F
1912    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1913    eight-bit-graphic    0xA0..0xBF
1914    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1915    ---------------------------------------------
1916
1917    As this is the internal character representation, the format is
1918    usually not used externally (i.e. in a file or in a data sent to a
1919    process).  But, it is possible to have a text externally in this
1920    format (i.e. by encoding by the coding system `emacs-mule').
1921
1922    In that case, a sequence of one-byte codes has a slightly different
1923    form.
1924
1925    At first, all characters in eight-bit-control are represented by
1926    one-byte sequences which are their 8-bit code.
1927
1928    Next, character composition data are represented by the byte
1929    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1930    where,
1931         METHOD is 0xF2 plus one of composition method (enum
1932         composition_method),
1933
1934         BYTES is 0xA0 plus a byte length of this composition data,
1935
1936         CHARS is 0xA0 plus a number of characters composed by this
1937         data,
1938
1939         COMPONENTs are characters of multibye form or composition
1940         rules encoded by two-byte of ASCII codes.
1941
1942    In addition, for backward compatibility, the following formats are
1943    also recognized as composition data on decoding.
1944
1945    0x80 MSEQ ...
1946    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1947
1948    Here,
1949         MSEQ is a multibyte form but in these special format:
1950           ASCII: 0xA0 ASCII_CODE+0x80,
1951           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1952         RULE is a one byte code of the range 0xA0..0xF0 that
1953         represents a composition rule.
1954   */
1955
1956 char emacs_mule_bytes[256];
1957
1958
1959 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1960    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
1961    else return 0.  */
1962
1963 static int
1964 detect_coding_emacs_mule (struct coding_system *coding,
1965                           struct coding_detection_info *detect_info)
1966 {
1967   const unsigned char *src = coding->source, *src_base;
1968   const unsigned char *src_end = coding->source + coding->src_bytes;
1969   int multibytep = coding->src_multibyte;
1970   int consumed_chars = 0;
1971   int c;
1972   int found = 0;
1973
1974   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1975   /* A coding system of this category is always ASCII compatible.  */
1976   src += coding->head_ascii;
1977
1978   while (1)
1979     {
1980       src_base = src;
1981       ONE_MORE_BYTE (c);
1982       if (c < 0)
1983         continue;
1984       if (c == 0x80)
1985         {
1986           /* Perhaps the start of composite character.  We simply skip
1987              it because analyzing it is too heavy for detecting.  But,
1988              at least, we check that the composite character
1989              constitutes of more than 4 bytes.  */
1990           const unsigned char *src_base;
1991
1992         repeat:
1993           src_base = src;
1994           do
1995             {
1996               ONE_MORE_BYTE (c);
1997             }
1998           while (c >= 0xA0);
1999
2000           if (src - src_base <= 4)
2001             break;
2002           found = CATEGORY_MASK_EMACS_MULE;
2003           if (c == 0x80)
2004             goto repeat;
2005         }
2006
2007       if (c < 0x80)
2008         {
2009           if (c < 0x20
2010               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
2011             break;
2012         }
2013       else
2014         {
2015           int more_bytes = emacs_mule_bytes[*src_base] - 1;
2016
2017           while (more_bytes > 0)
2018             {
2019               ONE_MORE_BYTE (c);
2020               if (c < 0xA0)
2021                 {
2022                   src--;        /* Unread the last byte.  */
2023                   break;
2024                 }
2025               more_bytes--;
2026             }
2027           if (more_bytes != 0)
2028             break;
2029           found = CATEGORY_MASK_EMACS_MULE;
2030         }
2031     }
2032   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2033   return 0;
2034
2035  no_more_source:
2036   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
2037     {
2038       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2039       return 0;
2040     }
2041   detect_info->found |= found;
2042   return 1;
2043 }
2044
2045
2046 /* Parse emacs-mule multibyte sequence at SRC and return the decoded
2047    character.  If CMP_STATUS indicates that we must expect MSEQ or
2048    RULE described above, decode it and return the negative value of
2049    the decoded character or rule.  If an invalid byte is found, return
2050    -1.  If SRC is too short, return -2.  */
2051
2052 int
2053 emacs_mule_char (struct coding_system *coding, const unsigned char *src,
2054                  int *nbytes, int *nchars, int *id,
2055                  struct composition_status *cmp_status)
2056 {
2057   const unsigned char *src_end = coding->source + coding->src_bytes;
2058   const unsigned char *src_base = src;
2059   int multibytep = coding->src_multibyte;
2060   struct charset *charset;
2061   unsigned code;
2062   int c;
2063   int consumed_chars = 0;
2064   int mseq_found = 0;
2065
2066   ONE_MORE_BYTE (c);
2067   if (c < 0)
2068     {
2069       c = -c;
2070       charset = emacs_mule_charset[0];
2071     }
2072   else
2073     {
2074       if (c >= 0xA0)
2075         {
2076           if (cmp_status->state != COMPOSING_NO
2077               && cmp_status->old_form)
2078             {
2079               if (cmp_status->state == COMPOSING_CHAR)
2080                 {
2081                   if (c == 0xA0)
2082                     {
2083                       ONE_MORE_BYTE (c);
2084                       c -= 0x80;
2085                       if (c < 0)
2086                         goto invalid_code;
2087                     }
2088                   else
2089                     c -= 0x20;
2090                   mseq_found = 1;
2091                 }
2092               else
2093                 {
2094                   *nbytes = src - src_base;
2095                   *nchars = consumed_chars;
2096                   return -c;
2097                 }
2098             }
2099           else
2100             goto invalid_code;
2101         }
2102
2103       switch (emacs_mule_bytes[c])
2104         {
2105         case 2:
2106           if (! (charset = emacs_mule_charset[c]))
2107             goto invalid_code;
2108           ONE_MORE_BYTE (c);
2109           if (c < 0xA0)
2110             goto invalid_code;
2111           code = c & 0x7F;
2112           break;
2113
2114         case 3:
2115           if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
2116               || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
2117             {
2118               ONE_MORE_BYTE (c);
2119               if (c < 0xA0 || ! (charset = emacs_mule_charset[c]))
2120                 goto invalid_code;
2121               ONE_MORE_BYTE (c);
2122               if (c < 0xA0)
2123                 goto invalid_code;
2124               code = c & 0x7F;
2125             }
2126           else
2127             {
2128               if (! (charset = emacs_mule_charset[c]))
2129                 goto invalid_code;
2130               ONE_MORE_BYTE (c);
2131               if (c < 0xA0)
2132                 goto invalid_code;
2133               code = (c & 0x7F) << 8;
2134               ONE_MORE_BYTE (c);
2135               if (c < 0xA0)
2136                 goto invalid_code;
2137               code |= c & 0x7F;
2138             }
2139           break;
2140
2141         case 4:
2142           ONE_MORE_BYTE (c);
2143           if (c < 0 || ! (charset = emacs_mule_charset[c]))
2144             goto invalid_code;
2145           ONE_MORE_BYTE (c);
2146           if (c < 0xA0)
2147             goto invalid_code;
2148           code = (c & 0x7F) << 8;
2149           ONE_MORE_BYTE (c);
2150           if (c < 0xA0)
2151             goto invalid_code;
2152           code |= c & 0x7F;
2153           break;
2154
2155         case 1:
2156           code = c;
2157           charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
2158                                      ? charset_ascii : charset_eight_bit);
2159           break;
2160
2161         default:
2162           abort ();
2163         }
2164       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, code, c);
2165       if (c < 0)
2166         goto invalid_code;
2167     }
2168   *nbytes = src - src_base;
2169   *nchars = consumed_chars;
2170   if (id)
2171     *id = charset->id;
2172   return (mseq_found ? -c : c);
2173
2174  no_more_source:
2175   return -2;
2176
2177  invalid_code:
2178   return -1;
2179 }
2180
2181
2182 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2183
2184 /* Handle these composition sequence ('|': the end of header elements,
2185    BYTES and CHARS >= 0xA0):
2186
2187    (1) relative composition: 0x80 0xF2 BYTES CHARS | CHAR ...
2188    (2) altchar composition:  0x80 0xF4 BYTES CHARS | ALT ... ALT CHAR ...
2189    (3) alt&rule composition: 0x80 0xF5 BYTES CHARS | ALT RULE ... ALT CHAR ...
2190
2191    and these old form:
2192
2193    (4) relative composition: 0x80 | MSEQ ... MSEQ
2194    (5) rulebase composition: 0x80 0xFF | MSEQ MRULE ... MSEQ
2195
2196    When the starter 0x80 and the following header elements are found,
2197    this annotation header is produced.
2198
2199         [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS NBYTES METHOD ]
2200
2201    NCHARS is CHARS - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2202    NBYTES is BYTES - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2203
2204    Then, upon reading the following elements, these codes are produced
2205    until the composition end is found:
2206
2207    (1) CHAR ... CHAR
2208    (2) ALT ... ALT CHAR ... CHAR
2209    (3) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT CHAR ... CHAR
2210    (4) CHAR ... CHAR
2211    (5) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
2212
2213    When the composition end is found, LENGTH and NCHARS in the
2214    annotation header is updated as below:
2215
2216    (1) LENGTH: unchanged, NCHARS: unchanged
2217    (2) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2218    (3) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2219    (4) LENGTH: unchanged,  NCHARS: number of CHARs
2220    (5) LENGTH: unchanged,  NCHARS: number of CHARs
2221
2222    If an error is found while composing, the annotation header is
2223    changed to the original composition header (plus filler -1s) as
2224    below:
2225
2226    (1),(2),(3)  [ 0x80 0xF2+METHOD BYTES CHARS -1 ]
2227    (5)          [ 0x80 0xFF -1 -1- -1 ]
2228
2229    and the sequence [ -2 DECODED-RULE ] is changed to the original
2230    byte sequence as below:
2231         o the original byte sequence is B: [ B -1 ]
2232         o the original byte sequence is B1 B2: [ B1 B2 ]
2233
2234    Most of the routines are implemented by macros because many
2235    variables and labels in the caller decode_coding_emacs_mule must be
2236    accessible, and they are usually called just once (thus doesn't
2237    increase the size of compiled object).  */
2238
2239 /* Decode a composition rule represented by C as a component of
2240    composition sequence of Emacs 20 style.  Set RULE to the decoded
2241    rule. */
2242
2243 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(c, rule)  \
2244   do {                                                  \
2245     int gref, nref;                                     \
2246                                                         \
2247     c -= 0xA0;                                          \
2248     if (c < 0 || c >= 81)                               \
2249       goto invalid_code;                                \
2250     gref = c / 9, nref = c % 9;                         \
2251     if (gref == 4) gref = 10;                           \
2252     if (nref == 4) nref = 10;                           \
2253     rule = COMPOSITION_ENCODE_RULE (gref, nref);        \
2254   } while (0)
2255
2256
2257 /* Decode a composition rule represented by C and the following byte
2258    at SRC as a component of composition sequence of Emacs 21 style.
2259    Set RULE to the decoded rule.  */
2260
2261 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(c, rule)  \
2262   do {                                                  \
2263     int gref, nref;                                     \
2264                                                         \
2265     gref = c - 0x20;                                    \
2266     if (gref < 0 || gref >= 81)                         \
2267       goto invalid_code;                                \
2268     ONE_MORE_BYTE (c);                                  \
2269     nref = c - 0x20;                                    \
2270     if (nref < 0 || nref >= 81)                         \
2271       goto invalid_code;                                \
2272     rule = COMPOSITION_ENCODE_RULE (gref, nref);        \
2273   } while (0)
2274
2275
2276 /* Start of Emacs 21 style format.  The first three bytes at SRC are
2277    (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is the
2278    byte length of this composition information, CHARS is the number of
2279    characters composed by this composition.  */
2280
2281 #define DECODE_EMACS_MULE_21_COMPOSITION()                              \
2282   do {                                                                  \
2283     enum composition_method method = c - 0xF2;                          \
2284     int *charbuf_base = charbuf;                                        \
2285     int nbytes, nchars;                                                 \
2286                                                                         \
2287     ONE_MORE_BYTE (c);                                                  \
2288     if (c < 0)                                                          \
2289       goto invalid_code;                                                \
2290     nbytes = c - 0xA0;                                                  \
2291     if (nbytes < 3 || (method == COMPOSITION_RELATIVE && nbytes != 4))  \
2292       goto invalid_code;                                                \
2293     ONE_MORE_BYTE (c);                                                  \
2294     nchars = c - 0xA0;                                                  \
2295     if (nchars <= 0 || nchars >= MAX_COMPOSITION_COMPONENTS)            \
2296       goto invalid_code;                                                \
2297     cmp_status->old_form = 0;                                           \
2298     cmp_status->method = method;                                        \
2299     if (method == COMPOSITION_RELATIVE)                                 \
2300       cmp_status->state = COMPOSING_CHAR;                               \
2301     else                                                                \
2302       cmp_status->state = COMPOSING_COMPONENT_CHAR;                     \
2303     cmp_status->length = MAX_ANNOTATION_LENGTH;                         \
2304     cmp_status->nchars = nchars;                                        \
2305     cmp_status->ncomps = nbytes - 4;                                    \
2306     ADD_COMPOSITION_DATA (charbuf, nchars, nbytes, method);             \
2307   } while (0)
2308
2309
2310 /* Start of Emacs 20 style format for relative composition.  */
2311
2312 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION()             \
2313   do {                                                          \
2314     cmp_status->old_form = 1;                                   \
2315     cmp_status->method = COMPOSITION_RELATIVE;                  \
2316     cmp_status->state = COMPOSING_CHAR;                         \
2317     cmp_status->length = MAX_ANNOTATION_LENGTH;                 \
2318     cmp_status->nchars = cmp_status->ncomps = 0;                \
2319     ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);   \
2320   } while (0)
2321
2322
2323 /* Start of Emacs 20 style format for rule-base composition.  */
2324
2325 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION()             \
2326   do {                                                          \
2327     cmp_status->old_form = 1;                                   \
2328     cmp_status->method = COMPOSITION_WITH_RULE;                 \
2329     cmp_status->state = COMPOSING_CHAR;                         \
2330     cmp_status->length = MAX_ANNOTATION_LENGTH;                 \
2331     cmp_status->nchars = cmp_status->ncomps = 0;                \
2332     ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);   \
2333   } while (0)
2334
2335
2336 #define DECODE_EMACS_MULE_COMPOSITION_START()           \
2337   do {                                                  \
2338     const unsigned char *current_src = src;             \
2339                                                         \
2340     ONE_MORE_BYTE (c);                                  \
2341     if (c < 0)                                          \
2342       goto invalid_code;                                \
2343     if (c - 0xF2 >= COMPOSITION_RELATIVE                \
2344         && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)  \
2345       DECODE_EMACS_MULE_21_COMPOSITION ();              \
2346     else if (c < 0xA0)                                  \
2347       goto invalid_code;                                \
2348     else if (c < 0xC0)                                  \
2349       {                                                 \
2350         DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION ();   \
2351         /* Re-read C as a composition component.  */    \
2352         src = current_src;                              \
2353       }                                                 \
2354     else if (c == 0xFF)                                 \
2355       DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION ();     \
2356     else                                                \
2357       goto invalid_code;                                \
2358   } while (0)
2359
2360 #define EMACS_MULE_COMPOSITION_END()                            \
2361   do {                                                          \
2362     int idx = - cmp_status->length;                             \
2363                                                                 \
2364     if (cmp_status->old_form)                                   \
2365       charbuf[idx + 2] = cmp_status->nchars;                    \
2366     else if (cmp_status->method > COMPOSITION_RELATIVE)         \
2367       charbuf[idx] = charbuf[idx + 2] - cmp_status->length;     \
2368     cmp_status->state = COMPOSING_NO;                           \
2369   } while (0)
2370
2371
2372 static int
2373 emacs_mule_finish_composition (int *charbuf,
2374                                struct composition_status *cmp_status)
2375 {
2376   int idx = - cmp_status->length;
2377   int new_chars;
2378
2379   if (cmp_status->old_form && cmp_status->nchars > 0)
2380     {
2381       charbuf[idx + 2] = cmp_status->nchars;
2382       new_chars = 0;
2383       if (cmp_status->method == COMPOSITION_WITH_RULE
2384           && cmp_status->state == COMPOSING_CHAR)
2385         {
2386           /* The last rule was invalid.  */
2387           int rule = charbuf[-1] + 0xA0;
2388
2389           charbuf[-2] = BYTE8_TO_CHAR (rule);
2390           charbuf[-1] = -1;
2391           new_chars = 1;
2392         }
2393     }
2394   else
2395     {
2396       charbuf[idx++] = BYTE8_TO_CHAR (0x80);
2397
2398       if (cmp_status->method == COMPOSITION_WITH_RULE)
2399         {
2400           charbuf[idx++] = BYTE8_TO_CHAR (0xFF);
2401           charbuf[idx++] = -3;
2402           charbuf[idx++] = 0;
2403           new_chars = 1;
2404         }
2405       else
2406         {
2407           int nchars = charbuf[idx + 1] + 0xA0;
2408           int nbytes = charbuf[idx + 2] + 0xA0;
2409
2410           charbuf[idx++] = BYTE8_TO_CHAR (0xF2 + cmp_status->method);
2411           charbuf[idx++] = BYTE8_TO_CHAR (nbytes);
2412           charbuf[idx++] = BYTE8_TO_CHAR (nchars);
2413           charbuf[idx++] = -1;
2414           new_chars = 4;
2415         }
2416     }
2417   cmp_status->state = COMPOSING_NO;
2418   return new_chars;
2419 }
2420
2421 #define EMACS_MULE_MAYBE_FINISH_COMPOSITION()                             \
2422   do {                                                                    \
2423     if (cmp_status->state != COMPOSING_NO)                                \
2424       char_offset += emacs_mule_finish_composition (charbuf, cmp_status); \
2425   } while (0)
2426
2427
2428 static void
2429 decode_coding_emacs_mule (struct coding_system *coding)
2430 {
2431   const unsigned char *src = coding->source + coding->consumed;
2432   const unsigned char *src_end = coding->source + coding->src_bytes;
2433   const unsigned char *src_base;
2434   int *charbuf = coding->charbuf + coding->charbuf_used;
2435   /* We may produce two annocations (charset and composition) in one
2436      loop and one more charset annocation at the end.  */
2437   int *charbuf_end
2438     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
2439   int consumed_chars = 0, consumed_chars_base;
2440   int multibytep = coding->src_multibyte;
2441   Lisp_Object attrs, charset_list;
2442   int char_offset = coding->produced_char;
2443   int last_offset = char_offset;
2444   int last_id = charset_ascii;
2445   int eol_crlf =
2446     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
2447   int byte_after_cr = -1;
2448   struct composition_status *cmp_status = &coding->spec.emacs_mule.cmp_status;
2449
2450   CODING_GET_INFO (coding, attrs, charset_list);
2451
2452   if (cmp_status->state != COMPOSING_NO)
2453     {
2454       int i;
2455
2456       for (i = 0; i < cmp_status->length; i++)
2457         *charbuf++ = cmp_status->carryover[i];
2458       coding->annotated = 1;
2459     }
2460
2461   while (1)
2462     {
2463       int c, id;
2464
2465       src_base = src;
2466       consumed_chars_base = consumed_chars;
2467
2468       if (charbuf >= charbuf_end)
2469         {
2470           if (byte_after_cr >= 0)
2471             src_base--;
2472           break;
2473         }
2474
2475       if (byte_after_cr >= 0)
2476         c = byte_after_cr, byte_after_cr = -1;
2477       else
2478         ONE_MORE_BYTE (c);
2479
2480       if (c < 0 || c == 0x80)
2481         {
2482           EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2483           if (c < 0)
2484             {
2485               *charbuf++ = -c;
2486               char_offset++;
2487             }
2488           else
2489             DECODE_EMACS_MULE_COMPOSITION_START ();
2490           continue;
2491         }
2492
2493       if (c < 0x80)
2494         {
2495           if (eol_crlf && c == '\r')
2496             ONE_MORE_BYTE (byte_after_cr);
2497           id = charset_ascii;
2498           if (cmp_status->state != COMPOSING_NO)
2499             {
2500               if (cmp_status->old_form)
2501                 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2502               else if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2503                 cmp_status->ncomps--;
2504             }
2505         }
2506       else
2507         {
2508           int nchars, nbytes;
2509           /* emacs_mule_char can load a charset map from a file, which
2510              allocates a large structure and might cause buffer text
2511              to be relocated as result.  Thus, we need to remember the
2512              original pointer to buffer text, and fixup all related
2513              pointers after the call.  */
2514           const unsigned char *orig = coding->source;
2515           EMACS_INT offset;
2516
2517           c = emacs_mule_char (coding, src_base, &nbytes, &nchars, &id,
2518                                cmp_status);
2519           offset = coding->source - orig;
2520           if (offset)
2521             {
2522               src += offset;
2523               src_base += offset;
2524               src_end += offset;
2525             }
2526           if (c < 0)
2527             {
2528               if (c == -1)
2529                 goto invalid_code;
2530               if (c == -2)
2531                 break;
2532             }
2533           src = src_base + nbytes;
2534           consumed_chars = consumed_chars_base + nchars;
2535           if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2536             cmp_status->ncomps -= nchars;
2537         }
2538
2539       /* Now if C >= 0, we found a normally encoded characer, if C <
2540          0, we found an old-style composition component character or
2541          rule.  */
2542
2543       if (cmp_status->state == COMPOSING_NO)
2544         {
2545           if (last_id != id)
2546             {
2547               if (last_id != charset_ascii)
2548                 ADD_CHARSET_DATA (charbuf, char_offset - last_offset,
2549                                   last_id);
2550               last_id = id;
2551               last_offset = char_offset;
2552             }
2553           *charbuf++ = c;
2554           char_offset++;
2555         }
2556       else if (cmp_status->state == COMPOSING_CHAR)
2557         {
2558           if (cmp_status->old_form)
2559             {
2560               if (c >= 0)
2561                 {
2562                   EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2563                   *charbuf++ = c;
2564                   char_offset++;
2565                 }
2566               else
2567                 {
2568                   *charbuf++ = -c;
2569                   cmp_status->nchars++;
2570                   cmp_status->length++;
2571                   if (cmp_status->nchars == MAX_COMPOSITION_COMPONENTS)
2572                     EMACS_MULE_COMPOSITION_END ();
2573                   else if (cmp_status->method == COMPOSITION_WITH_RULE)
2574                     cmp_status->state = COMPOSING_RULE;
2575                 }
2576             }
2577           else
2578             {
2579               *charbuf++ = c;
2580               cmp_status->length++;
2581               cmp_status->nchars--;
2582               if (cmp_status->nchars == 0)
2583                 EMACS_MULE_COMPOSITION_END ();
2584             }
2585         }
2586       else if (cmp_status->state == COMPOSING_RULE)
2587         {
2588           int rule;
2589
2590           if (c >= 0)
2591             {
2592               EMACS_MULE_COMPOSITION_END ();
2593               *charbuf++ = c;
2594               char_offset++;
2595             }
2596           else
2597             {
2598               c = -c;
2599               DECODE_EMACS_MULE_COMPOSITION_RULE_20 (c, rule);
2600               if (rule < 0)
2601                 goto invalid_code;
2602               *charbuf++ = -2;
2603               *charbuf++ = rule;
2604               cmp_status->length += 2;
2605               cmp_status->state = COMPOSING_CHAR;
2606             }
2607         }
2608       else if (cmp_status->state == COMPOSING_COMPONENT_CHAR)
2609         {
2610           *charbuf++ = c;
2611           cmp_status->length++;
2612           if (cmp_status->ncomps == 0)
2613             cmp_status->state = COMPOSING_CHAR;
2614           else if (cmp_status->ncomps > 0)
2615             {
2616               if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)
2617                 cmp_status->state = COMPOSING_COMPONENT_RULE;
2618             }
2619           else
2620             EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2621         }
2622       else                      /* COMPOSING_COMPONENT_RULE */
2623         {
2624           int rule;
2625
2626           DECODE_EMACS_MULE_COMPOSITION_RULE_21 (c, rule);
2627           if (rule < 0)
2628             goto invalid_code;
2629           *charbuf++ = -2;
2630           *charbuf++ = rule;
2631           cmp_status->length += 2;
2632           cmp_status->ncomps--;
2633           if (cmp_status->ncomps > 0)
2634             cmp_status->state = COMPOSING_COMPONENT_CHAR;
2635           else
2636             EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2637         }
2638       continue;
2639
2640     retry:
2641       src = src_base;
2642       consumed_chars = consumed_chars_base;
2643       continue;
2644
2645     invalid_code:
2646       EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2647       src = src_base;
2648       consumed_chars = consumed_chars_base;
2649       ONE_MORE_BYTE (c);
2650       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2651       char_offset++;
2652       coding->errors++;
2653     }
2654
2655  no_more_source:
2656   if (cmp_status->state != COMPOSING_NO)
2657     {
2658       if (coding->mode & CODING_MODE_LAST_BLOCK)
2659         EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2660       else
2661         {
2662           int i;
2663
2664           charbuf -= cmp_status->length;
2665           for (i = 0; i < cmp_status->length; i++)
2666             cmp_status->carryover[i] = charbuf[i];
2667         }
2668     }
2669   if (last_id != charset_ascii)
2670     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2671   coding->consumed_char += consumed_chars_base;
2672   coding->consumed = src_base - coding->source;
2673   coding->charbuf_used = charbuf - coding->charbuf;
2674 }
2675
2676
2677 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2678   do {                                          \
2679     if (id < 0xA0)                              \
2680       codes[0] = id, codes[1] = 0;              \
2681     else if (id < 0xE0)                         \
2682       codes[0] = 0x9A, codes[1] = id;           \
2683     else if (id < 0xF0)                         \
2684       codes[0] = 0x9B, codes[1] = id;           \
2685     else if (id < 0xF5)                         \
2686       codes[0] = 0x9C, codes[1] = id;           \
2687     else                                        \
2688       codes[0] = 0x9D, codes[1] = id;           \
2689   } while (0);
2690
2691
2692 static int
2693 encode_coding_emacs_mule (struct coding_system *coding)
2694 {
2695   int multibytep = coding->dst_multibyte;
2696   int *charbuf = coding->charbuf;
2697   int *charbuf_end = charbuf + coding->charbuf_used;
2698   unsigned char *dst = coding->destination + coding->produced;
2699   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2700   int safe_room = 8;
2701   int produced_chars = 0;
2702   Lisp_Object attrs, charset_list;
2703   int c;
2704   int preferred_charset_id = -1;
2705
2706   CODING_GET_INFO (coding, attrs, charset_list);
2707   if (! EQ (charset_list, Vemacs_mule_charset_list))
2708     {
2709       CODING_ATTR_CHARSET_LIST (attrs)
2710         = charset_list = Vemacs_mule_charset_list;
2711     }
2712
2713   while (charbuf < charbuf_end)
2714     {
2715       ASSURE_DESTINATION (safe_room);
2716       c = *charbuf++;
2717
2718       if (c < 0)
2719         {
2720           /* Handle an annotation.  */
2721           switch (*charbuf)
2722             {
2723             case CODING_ANNOTATE_COMPOSITION_MASK:
2724               /* Not yet implemented.  */
2725               break;
2726             case CODING_ANNOTATE_CHARSET_MASK:
2727               preferred_charset_id = charbuf[3];
2728               if (preferred_charset_id >= 0
2729                   && NILP (Fmemq (make_number (preferred_charset_id),
2730                                   charset_list)))
2731                 preferred_charset_id = -1;
2732               break;
2733             default:
2734               abort ();
2735             }
2736           charbuf += -c - 1;
2737           continue;
2738         }
2739
2740       if (ASCII_CHAR_P (c))
2741         EMIT_ONE_ASCII_BYTE (c);
2742       else if (CHAR_BYTE8_P (c))
2743         {
2744           c = CHAR_TO_BYTE8 (c);
2745           EMIT_ONE_BYTE (c);
2746         }
2747       else
2748         {
2749           struct charset *charset;
2750           unsigned code;
2751           int dimension;
2752           int emacs_mule_id;
2753           unsigned char leading_codes[2];
2754
2755           if (preferred_charset_id >= 0)
2756             {
2757               charset = CHARSET_FROM_ID (preferred_charset_id);
2758               if (CHAR_CHARSET_P (c, charset))
2759                 code = ENCODE_CHAR (charset, c);
2760               else
2761                 charset = char_charset (c, charset_list, &code);
2762             }
2763           else
2764             charset = char_charset (c, charset_list, &code);
2765           if (! charset)
2766             {
2767               c = coding->default_char;
2768               if (ASCII_CHAR_P (c))
2769                 {
2770                   EMIT_ONE_ASCII_BYTE (c);
2771                   continue;
2772                 }
2773               charset = char_charset (c, charset_list, &code);
2774             }
2775           dimension = CHARSET_DIMENSION (charset);
2776           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2777           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2778           EMIT_ONE_BYTE (leading_codes[0]);
2779           if (leading_codes[1])
2780             EMIT_ONE_BYTE (leading_codes[1]);
2781           if (dimension == 1)
2782             EMIT_ONE_BYTE (code | 0x80);
2783           else
2784             {
2785               code |= 0x8080;
2786               EMIT_ONE_BYTE (code >> 8);
2787               EMIT_ONE_BYTE (code & 0xFF);
2788             }
2789         }
2790     }
2791   record_conversion_result (coding, CODING_RESULT_SUCCESS);
2792   coding->produced_char += produced_chars;
2793   coding->produced = dst - coding->destination;
2794   return 0;
2795 }
2796
2797 \f
2798 /*** 7. ISO2022 handlers ***/
2799
2800 /* The following note describes the coding system ISO2022 briefly.
2801    Since the intention of this note is to help understand the
2802    functions in this file, some parts are NOT ACCURATE or are OVERLY
2803    SIMPLIFIED.  For thorough understanding, please refer to the
2804    original document of ISO2022.  This is equivalent to the standard
2805    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2806
2807    ISO2022 provides many mechanisms to encode several character sets
2808    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2809    is encoded using bytes less than 128.  This may make the encoded
2810    text a little bit longer, but the text passes more easily through
2811    several types of gateway, some of which strip off the MSB (Most
2812    Significant Bit).
2813
2814    There are two kinds of character sets: control character sets and
2815    graphic character sets.  The former contain control characters such
2816    as `newline' and `escape' to provide control functions (control
2817    functions are also provided by escape sequences).  The latter
2818    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2819    two control character sets and many graphic character sets.
2820
2821    Graphic character sets are classified into one of the following
2822    four classes, according to the number of bytes (DIMENSION) and
2823    number of characters in one dimension (CHARS) of the set:
2824    - DIMENSION1_CHARS94
2825    - DIMENSION1_CHARS96
2826    - DIMENSION2_CHARS94
2827    - DIMENSION2_CHARS96
2828
2829    In addition, each character set is assigned an identification tag,
2830    unique for each set, called the "final character" (denoted as <F>
2831    hereafter).  The <F> of each character set is decided by ECMA(*)
2832    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2833    (0x30..0x3F are for private use only).
2834
2835    Note (*): ECMA = European Computer Manufacturers Association
2836
2837    Here are examples of graphic character sets [NAME(<F>)]:
2838         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2839         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2840         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2841         o DIMENSION2_CHARS96 -- none for the moment
2842
2843    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2844         C0 [0x00..0x1F] -- control character plane 0
2845         GL [0x20..0x7F] -- graphic character plane 0
2846         C1 [0x80..0x9F] -- control character plane 1
2847         GR [0xA0..0xFF] -- graphic character plane 1
2848
2849    A control character set is directly designated and invoked to C0 or
2850    C1 by an escape sequence.  The most common case is that:
2851    - ISO646's  control character set is designated/invoked to C0, and
2852    - ISO6429's control character set is designated/invoked to C1,
2853    and usually these designations/invocations are omitted in encoded
2854    text.  In a 7-bit environment, only C0 can be used, and a control
2855    character for C1 is encoded by an appropriate escape sequence to
2856    fit into the environment.  All control characters for C1 are
2857    defined to have corresponding escape sequences.
2858
2859    A graphic character set is at first designated to one of four
2860    graphic registers (G0 through G3), then these graphic registers are
2861    invoked to GL or GR.  These designations and invocations can be
2862    done independently.  The most common case is that G0 is invoked to
2863    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2864    these invocations and designations are omitted in encoded text.
2865    In a 7-bit environment, only GL can be used.
2866
2867    When a graphic character set of CHARS94 is invoked to GL, codes
2868    0x20 and 0x7F of the GL area work as control characters SPACE and
2869    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2870    be used.
2871
2872    There are two ways of invocation: locking-shift and single-shift.
2873    With locking-shift, the invocation lasts until the next different
2874    invocation, whereas with single-shift, the invocation affects the
2875    following character only and doesn't affect the locking-shift
2876    state.  Invocations are done by the following control characters or
2877    escape sequences:
2878
2879    ----------------------------------------------------------------------
2880    abbrev  function                  cntrl escape seq   description
2881    ----------------------------------------------------------------------
2882    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2883    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2884    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2885    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2886    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2887    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2888    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2889    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2890    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2891    ----------------------------------------------------------------------
2892    (*) These are not used by any known coding system.
2893
2894    Control characters for these functions are defined by macros
2895    ISO_CODE_XXX in `coding.h'.
2896
2897    Designations are done by the following escape sequences:
2898    ----------------------------------------------------------------------
2899    escape sequence      description
2900    ----------------------------------------------------------------------
2901    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2902    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2903    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2904    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2905    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2906    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2907    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2908    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2909    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2910    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2911    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2912    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2913    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2914    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2915    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2916    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2917    ----------------------------------------------------------------------
2918
2919    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2920    of dimension 1, chars 94, and final character <F>, etc...
2921
2922    Note (*): Although these designations are not allowed in ISO2022,
2923    Emacs accepts them on decoding, and produces them on encoding
2924    CHARS96 character sets in a coding system which is characterized as
2925    7-bit environment, non-locking-shift, and non-single-shift.
2926
2927    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2928    '(' must be omitted.  We refer to this as "short-form" hereafter.
2929
2930    Now you may notice that there are a lot of ways of encoding the
2931    same multilingual text in ISO2022.  Actually, there exist many
2932    coding systems such as Compound Text (used in X11's inter client
2933    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2934    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2935    localized platforms), and all of these are variants of ISO2022.
2936
2937    In addition to the above, Emacs handles two more kinds of escape
2938    sequences: ISO6429's direction specification and Emacs' private
2939    sequence for specifying character composition.
2940
2941    ISO6429's direction specification takes the following form:
2942         o CSI ']'      -- end of the current direction
2943         o CSI '0' ']'  -- end of the current direction
2944         o CSI '1' ']'  -- start of left-to-right text
2945         o CSI '2' ']'  -- start of right-to-left text
2946    The control character CSI (0x9B: control sequence introducer) is
2947    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2948
2949    Character composition specification takes the following form:
2950         o ESC '0' -- start relative composition
2951         o ESC '1' -- end composition
2952         o ESC '2' -- start rule-base composition (*)
2953         o ESC '3' -- start relative composition with alternate chars  (**)
2954         o ESC '4' -- start rule-base composition with alternate chars  (**)
2955   Since these are not standard escape sequences of any ISO standard,
2956   the use of them with these meanings is restricted to Emacs only.
2957
2958   (*) This form is used only in Emacs 20.7 and older versions,
2959   but newer versions can safely decode it.
2960   (**) This form is used only in Emacs 21.1 and newer versions,
2961   and older versions can't decode it.
2962
2963   Here's a list of example usages of these composition escape
2964   sequences (categorized by `enum composition_method').
2965
2966   COMPOSITION_RELATIVE:
2967         ESC 0 CHAR [ CHAR ] ESC 1
2968   COMPOSITION_WITH_RULE:
2969         ESC 2 CHAR [ RULE CHAR ] ESC 1
2970   COMPOSITION_WITH_ALTCHARS:
2971         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2972   COMPOSITION_WITH_RULE_ALTCHARS:
2973         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2974
2975 enum iso_code_class_type iso_code_class[256];
2976
2977 #define SAFE_CHARSET_P(coding, id)      \
2978   ((id) <= (coding)->max_charset_id     \
2979    && (coding)->safe_charsets[id] != 255)
2980
2981
2982 #define SHIFT_OUT_OK(category)  \
2983   (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2984
2985 static void
2986 setup_iso_safe_charsets (Lisp_Object attrs)
2987 {
2988   Lisp_Object charset_list, safe_charsets;
2989   Lisp_Object request;
2990   Lisp_Object reg_usage;
2991   Lisp_Object tail;
2992   int reg94, reg96;
2993   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2994   int max_charset_id;
2995
2996   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2997   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2998       && ! EQ (charset_list, Viso_2022_charset_list))
2999     {
3000       CODING_ATTR_CHARSET_LIST (attrs)
3001         = charset_list = Viso_2022_charset_list;
3002       ASET (attrs, coding_attr_safe_charsets, Qnil);
3003     }
3004
3005   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
3006     return;
3007
3008   max_charset_id = 0;
3009   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
3010     {
3011       int id = XINT (XCAR (tail));
3012       if (max_charset_id < id)
3013         max_charset_id = id;
3014     }
3015
3016   safe_charsets = make_uninit_string (max_charset_id + 1);
3017   memset (SDATA (safe_charsets), 255, max_charset_id + 1);
3018   request = AREF (attrs, coding_attr_iso_request);
3019   reg_usage = AREF (attrs, coding_attr_iso_usage);
3020   reg94 = XINT (XCAR (reg_usage));
3021   reg96 = XINT (XCDR (reg_usage));
3022
3023   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
3024     {
3025       Lisp_Object id;
3026       Lisp_Object reg;
3027       struct charset *charset;
3028
3029       id = XCAR (tail);
3030       charset = CHARSET_FROM_ID (XINT (id));
3031       reg = Fcdr (Fassq (id, request));
3032       if (! NILP (reg))
3033         SSET (safe_charsets, XINT (id), XINT (reg));
3034       else if (charset->iso_chars_96)
3035         {
3036           if (reg96 < 4)
3037             SSET (safe_charsets, XINT (id), reg96);
3038         }
3039       else
3040         {
3041           if (reg94 < 4)
3042             SSET (safe_charsets, XINT (id), reg94);
3043         }
3044     }
3045   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
3046 }
3047
3048
3049 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3050    Check if a text is encoded in one of ISO-2022 based codig systems.
3051    If it is, return 1, else return 0.  */
3052
3053 static int
3054 detect_coding_iso_2022 (struct coding_system *coding,
3055                         struct coding_detection_info *detect_info)
3056 {
3057   const unsigned char *src = coding->source, *src_base = src;
3058   const unsigned char *src_end = coding->source + coding->src_bytes;
3059   int multibytep = coding->src_multibyte;
3060   int single_shifting = 0;
3061   int id;
3062   int c, c1;
3063   int consumed_chars = 0;
3064   int i;
3065   int rejected = 0;
3066   int found = 0;
3067   int composition_count = -1;
3068
3069   detect_info->checked |= CATEGORY_MASK_ISO;
3070
3071   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
3072     {
3073       struct coding_system *this = &(coding_categories[i]);
3074       Lisp_Object attrs, val;
3075
3076       if (this->id < 0)
3077         continue;
3078       attrs = CODING_ID_ATTRS (this->id);
3079       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
3080           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Viso_2022_charset_list))
3081         setup_iso_safe_charsets (attrs);
3082       val = CODING_ATTR_SAFE_CHARSETS (attrs);
3083       this->max_charset_id = SCHARS (val) - 1;
3084       this->safe_charsets = SDATA (val);
3085     }
3086
3087   /* A coding system of this category is always ASCII compatible.  */
3088   src += coding->head_ascii;
3089
3090   while (rejected != CATEGORY_MASK_ISO)
3091     {
3092       src_base = src;
3093       ONE_MORE_BYTE (c);
3094       switch (c)
3095         {
3096         case ISO_CODE_ESC:
3097           if (inhibit_iso_escape_detection)
3098             break;
3099           single_shifting = 0;
3100           ONE_MORE_BYTE (c);
3101           if (c >= '(' && c <= '/')
3102             {
3103               /* Designation sequence for a charset of dimension 1.  */
3104               ONE_MORE_BYTE (c1);
3105               if (c1 < ' ' || c1 >= 0x80
3106                   || (id = iso_charset_table[0][c >= ','][c1]) < 0)
3107                 /* Invalid designation sequence.  Just ignore.  */
3108                 break;
3109             }
3110           else if (c == '$')
3111             {
3112               /* Designation sequence for a charset of dimension 2.  */
3113               ONE_MORE_BYTE (c);
3114               if (c >= '@' && c <= 'B')
3115                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
3116                 id = iso_charset_table[1][0][c];
3117               else if (c >= '(' && c <= '/')
3118                 {
3119                   ONE_MORE_BYTE (c1);
3120                   if (c1 < ' ' || c1 >= 0x80
3121                       || (id = iso_charset_table[1][c >= ','][c1]) < 0)
3122                     /* Invalid designation sequence.  Just ignore.  */
3123                     break;
3124                 }
3125               else
3126                 /* Invalid designation sequence.  Just ignore it.  */
3127                 break;
3128             }
3129           else if (c == 'N' || c == 'O')
3130             {
3131               /* ESC <Fe> for SS2 or SS3.  */
3132               single_shifting = 1;
3133               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
3134               break;
3135             }
3136           else if (c == '1')
3137             {
3138               /* End of composition.  */
3139               if (composition_count < 0
3140                   || composition_count > MAX_COMPOSITION_COMPONENTS)
3141                 /* Invalid */
3142                 break;
3143               composition_count = -1;
3144               found |= CATEGORY_MASK_ISO;
3145             }
3146           else if (c >= '0' && c <= '4')
3147             {
3148               /* ESC <Fp> for start/end composition.  */
3149               composition_count = 0;
3150               break;
3151             }
3152           else
3153             {
3154               /* Invalid escape sequence.  Just ignore it.  */
3155               break;
3156             }
3157
3158           /* We found a valid designation sequence for CHARSET.  */
3159           rejected |= CATEGORY_MASK_ISO_8BIT;
3160           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
3161                               id))
3162             found |= CATEGORY_MASK_ISO_7;
3163           else
3164             rejected |= CATEGORY_MASK_ISO_7;
3165           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
3166                               id))
3167             found |= CATEGORY_MASK_ISO_7_TIGHT;
3168           else
3169             rejected |= CATEGORY_MASK_ISO_7_TIGHT;
3170           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
3171                               id))
3172             found |= CATEGORY_MASK_ISO_7_ELSE;
3173           else
3174             rejected |= CATEGORY_MASK_ISO_7_ELSE;
3175           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
3176                               id))
3177             found |= CATEGORY_MASK_ISO_8_ELSE;
3178           else
3179             rejected |= CATEGORY_MASK_ISO_8_ELSE;
3180           break;
3181
3182         case ISO_CODE_SO:
3183         case ISO_CODE_SI:
3184           /* Locking shift out/in.  */
3185           if (inhibit_iso_escape_detection)
3186             break;
3187           single_shifting = 0;
3188           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
3189           break;
3190
3191         case ISO_CODE_CSI:
3192           /* Control sequence introducer.  */
3193           single_shifting = 0;
3194           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3195           found |= CATEGORY_MASK_ISO_8_ELSE;
3196           goto check_extra_latin;
3197
3198         case ISO_CODE_SS2:
3199         case ISO_CODE_SS3:
3200           /* Single shift.   */
3201           if (inhibit_iso_escape_detection)
3202             break;
3203           single_shifting = 0;
3204           rejected |= CATEGORY_MASK_ISO_7BIT;
3205           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3206               & CODING_ISO_FLAG_SINGLE_SHIFT)
3207             found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
3208           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
3209               & CODING_ISO_FLAG_SINGLE_SHIFT)
3210             found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
3211           if (single_shifting)
3212             break;
3213           goto check_extra_latin;
3214
3215         default:
3216           if (c < 0)
3217             continue;
3218           if (c < 0x80)
3219             {
3220               if (composition_count >= 0)
3221                 composition_count++;
3222               single_shifting = 0;
3223               break;
3224             }
3225           if (c >= 0xA0)
3226             {
3227               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3228               found |= CATEGORY_MASK_ISO_8_1;
3229               /* Check the length of succeeding codes of the range
3230                  0xA0..0FF.  If the byte length is even, we include
3231                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
3232                  only when we are not single shifting.  */
3233               if (! single_shifting
3234                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
3235                 {
3236                   int i = 1;
3237                   while (src < src_end)
3238                     {
3239                       src_base = src;
3240                       ONE_MORE_BYTE (c);
3241                       if (c < 0xA0)
3242                         {
3243                           src = src_base;
3244                           break;
3245                         }
3246                       i++;
3247                     }
3248
3249                   if (i & 1 && src < src_end)
3250                     {
3251                       rejected |= CATEGORY_MASK_ISO_8_2;
3252                       if (composition_count >= 0)
3253                         composition_count += i;
3254                     }
3255                   else
3256                     {
3257                       found |= CATEGORY_MASK_ISO_8_2;
3258                       if (composition_count >= 0)
3259                         composition_count += i / 2;
3260                     }
3261                 }
3262               break;
3263             }
3264         check_extra_latin:
3265           single_shifting = 0;
3266           if (! VECTORP (Vlatin_extra_code_table)
3267               || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3268             {
3269               rejected = CATEGORY_MASK_ISO;
3270               break;
3271             }
3272           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3273               & CODING_ISO_FLAG_LATIN_EXTRA)
3274             found |= CATEGORY_MASK_ISO_8_1;
3275           else
3276             rejected |= CATEGORY_MASK_ISO_8_1;
3277           rejected |= CATEGORY_MASK_ISO_8_2;
3278         }
3279     }
3280   detect_info->rejected |= CATEGORY_MASK_ISO;
3281   return 0;
3282
3283  no_more_source:
3284   detect_info->rejected |= rejected;
3285   detect_info->found |= (found & ~rejected);
3286   return 1;
3287 }
3288
3289
3290 /* Set designation state into CODING.  Set CHARS_96 to -1 if the
3291    escape sequence should be kept.  */
3292 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
3293   do {                                                                  \
3294     int id, prev;                                                       \
3295                                                                         \
3296     if (final < '0' || final >= 128                                     \
3297         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
3298         || !SAFE_CHARSET_P (coding, id))                                \
3299       {                                                                 \
3300         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
3301         chars_96 = -1;                                                  \
3302         break;                                                          \
3303       }                                                                 \
3304     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
3305     if (id == charset_jisx0201_roman)                                   \
3306       {                                                                 \
3307         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
3308           id = charset_ascii;                                           \
3309       }                                                                 \
3310     else if (id == charset_jisx0208_1978)                               \
3311       {                                                                 \
3312         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
3313           id = charset_jisx0208;                                        \
3314       }                                                                 \
3315     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
3316     /* If there was an invalid designation to REG previously, and this  \
3317        designation is ASCII to REG, we should keep this designation     \
3318        sequence.  */                                                    \
3319     if (prev == -2 && id == charset_ascii)                              \
3320       chars_96 = -1;                                                    \
3321   } while (0)
3322
3323
3324 /* Handle these composition sequence (ALT: alternate char):
3325
3326    (1) relative composition: ESC 0 CHAR ... ESC 1
3327    (2) rulebase composition: ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3328    (3) altchar composition:  ESC 3 ALT ... ALT ESC 0 CHAR ... ESC 1
3329    (4) alt&rule composition: ESC 4 ALT RULE ... ALT ESC 0 CHAR ... ESC 1
3330
3331    When the start sequence (ESC 0/2/3/4) is found, this annotation
3332    header is produced.
3333
3334         [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) 0 METHOD ]
3335
3336    Then, upon reading CHAR or RULE (one or two bytes), these codes are
3337    produced until the end sequence (ESC 1) is found:
3338
3339    (1) CHAR ... CHAR
3340    (2) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
3341    (3) ALT ... ALT -1 -1 CHAR ... CHAR
3342    (4) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT -1 -1 CHAR ... CHAR
3343
3344    When the end sequence (ESC 1) is found, LENGTH and NCHARS in the
3345    annotation header is updated as below:
3346
3347    (1) LENGTH: unchanged,  NCHARS: number of CHARs
3348    (2) LENGTH: unchanged,  NCHARS: number of CHARs
3349    (3) LENGTH: += number of ALTs + 2,  NCHARS: number of CHARs
3350    (4) LENGTH: += number of ALTs * 3,  NCHARS: number of CHARs
3351
3352    If an error is found while composing, the annotation header is
3353    changed to:
3354
3355         [ ESC '0'/'2'/'3'/'4' -2 0 ]
3356
3357    and the sequence [ -2 DECODED-RULE ] is changed to the original
3358    byte sequence as below:
3359         o the original byte sequence is B: [ B -1 ]
3360         o the original byte sequence is B1 B2: [ B1 B2 ]
3361    and the sequence [ -1 -1 ] is changed to the original byte
3362    sequence:
3363         [ ESC '0' ]
3364 */
3365
3366 /* Decode a composition rule C1 and maybe one more byte from the
3367    source, and set RULE to the encoded composition rule, NBYTES to the
3368    length of the composition rule.  If the rule is invalid, set RULE
3369    to some negative value.  */
3370
3371 #define DECODE_COMPOSITION_RULE(rule, nbytes)                           \
3372   do {                                                                  \
3373     rule = c1 - 32;                                                     \
3374     if (rule < 0)                                                       \
3375       break;                                                            \
3376     if (rule < 81)              /* old format (before ver.21) */        \
3377       {                                                                 \
3378         int gref = (rule) / 9;                                          \
3379         int nref = (rule) % 9;                                          \
3380         if (gref == 4) gref = 10;                                       \
3381         if (nref == 4) nref = 10;                                       \
3382         rule = COMPOSITION_ENCODE_RULE (gref, nref);                    \
3383         nbytes = 1;                                                     \
3384       }                                                                 \
3385     else                        /* new format (after ver.21) */         \
3386       {                                                                 \
3387         int c;                                                          \
3388                                                                         \
3389         ONE_MORE_BYTE (c);                                              \
3390         rule = COMPOSITION_ENCODE_RULE (rule - 81, c - 32);             \
3391         if (rule >= 0)                                                  \
3392           rule += 0x100;   /* to destinguish it from the old format */  \
3393         nbytes = 2;                                                     \
3394       }                                                                 \
3395   } while (0)
3396
3397 #define ENCODE_COMPOSITION_RULE(rule)                           \
3398   do {                                                          \
3399     int gref = (rule % 0x100) / 12, nref = (rule % 0x100) % 12; \
3400                                                                 \
3401     if (rule < 0x100)           /* old format */                \
3402       {                                                         \
3403         if (gref == 10) gref = 4;                               \
3404         if (nref == 10) nref = 4;                               \
3405         charbuf[idx] = 32 + gref * 9 + nref;                    \
3406         charbuf[idx + 1] = -1;                                  \
3407         new_chars++;                                            \
3408       }                                                         \
3409     else                                /* new format */        \
3410       {                                                         \
3411         charbuf[idx] = 32 + 81 + gref;                          \
3412         charbuf[idx + 1] = 32 + nref;                           \
3413         new_chars += 2;                                         \
3414       }                                                         \
3415   } while (0)
3416
3417 /* Finish the current composition as invalid.  */
3418
3419 static int finish_composition (int *, struct composition_status *);
3420
3421 static int
3422 finish_composition (int *charbuf, struct composition_status *cmp_status)
3423 {
3424   int idx = - cmp_status->length;
3425   int new_chars;
3426
3427   /* Recover the original ESC sequence */
3428   charbuf[idx++] = ISO_CODE_ESC;
3429   charbuf[idx++] = (cmp_status->method == COMPOSITION_RELATIVE ? '0'
3430                     : cmp_status->method == COMPOSITION_WITH_RULE ? '2'
3431                     : cmp_status->method == COMPOSITION_WITH_ALTCHARS ? '3'
3432                     /* cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS */
3433                     : '4');
3434   charbuf[idx++] = -2;
3435   charbuf[idx++] = 0;
3436   charbuf[idx++] = -1;
3437   new_chars = cmp_status->nchars;
3438   if (cmp_status->method >= COMPOSITION_WITH_RULE)
3439     for (; idx < 0; idx++)
3440       {
3441         int elt = charbuf[idx];
3442
3443         if (elt == -2)
3444           {
3445             ENCODE_COMPOSITION_RULE (charbuf[idx + 1]);
3446             idx++;
3447           }
3448         else if (elt == -1)
3449           {
3450             charbuf[idx++] = ISO_CODE_ESC;
3451             charbuf[idx] = '0';
3452             new_chars += 2;
3453           }
3454       }
3455   cmp_status->state = COMPOSING_NO;
3456   return new_chars;
3457 }
3458
3459 /* If characers are under composition, finish the composition.  */
3460 #define MAYBE_FINISH_COMPOSITION()                              \
3461   do {                                                          \
3462     if (cmp_status->state != COMPOSING_NO)                      \
3463       char_offset += finish_composition (charbuf, cmp_status);  \
3464   } while (0)
3465
3466 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
3467
3468    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
3469    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3470    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
3471    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
3472
3473    Produce this annotation sequence now:
3474
3475    [ -LENGTH(==-4) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) METHOD ]
3476 */
3477
3478 #define DECODE_COMPOSITION_START(c1)                                       \
3479   do {                                                                     \
3480     if (c1 == '0'                                                          \
3481         && ((cmp_status->state == COMPOSING_COMPONENT_CHAR                 \
3482              && cmp_status->method == COMPOSITION_WITH_ALTCHARS)           \
3483             || (cmp_status->state == COMPOSING_COMPONENT_RULE              \
3484                 && cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS))) \
3485       {                                                                    \
3486         *charbuf++ = -1;                                                   \
3487         *charbuf++= -1;                                                    \
3488         cmp_status->state = COMPOSING_CHAR;                                \
3489         cmp_status->length += 2;                                           \
3490       }                                                                    \
3491     else                                                                   \
3492       {                                                                    \
3493         MAYBE_FINISH_COMPOSITION ();                                       \
3494         cmp_status->method = (c1 == '0' ? COMPOSITION_RELATIVE             \
3495                               : c1 == '2' ? COMPOSITION_WITH_RULE          \
3496                               : c1 == '3' ? COMPOSITION_WITH_ALTCHARS      \
3497                               : COMPOSITION_WITH_RULE_ALTCHARS);           \
3498         cmp_status->state                                                  \
3499           = (c1 <= '2' ? COMPOSING_CHAR : COMPOSING_COMPONENT_CHAR);       \
3500         ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);          \
3501         cmp_status->length = MAX_ANNOTATION_LENGTH;                        \
3502         cmp_status->nchars = cmp_status->ncomps = 0;                       \
3503         coding->annotated = 1;                                             \
3504       }                                                                    \
3505   } while (0)
3506
3507
3508 /* Handle composition end sequence ESC 1.  */
3509
3510 #define DECODE_COMPOSITION_END()                                        \
3511   do {                                                                  \
3512     if (cmp_status->nchars == 0                                         \
3513         || ((cmp_status->state == COMPOSING_CHAR)                       \
3514             == (cmp_status->method == COMPOSITION_WITH_RULE)))          \
3515       {                                                                 \
3516         MAYBE_FINISH_COMPOSITION ();                                    \
3517         goto invalid_code;                                              \
3518       }                                                                 \
3519     if (cmp_status->method == COMPOSITION_WITH_ALTCHARS)                \
3520       charbuf[- cmp_status->length] -= cmp_status->ncomps + 2;          \
3521     else if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)      \
3522       charbuf[- cmp_status->length] -= cmp_status->ncomps * 3;          \
3523     charbuf[- cmp_status->length + 2] = cmp_status->nchars;             \
3524     char_offset += cmp_status->nchars;                                  \
3525     cmp_status->state = COMPOSING_NO;                                   \
3526   } while (0)
3527
3528 /* Store a composition rule RULE in charbuf, and update cmp_status.  */
3529
3530 #define STORE_COMPOSITION_RULE(rule)    \
3531   do {                                  \
3532     *charbuf++ = -2;                    \
3533     *charbuf++ = rule;                  \
3534     cmp_status->length += 2;            \
3535     cmp_status->state--;                \
3536   } while (0)
3537
3538 /* Store a composed char or a component char C in charbuf, and update
3539    cmp_status.  */
3540
3541 #define STORE_COMPOSITION_CHAR(c)                                       \
3542   do {                                                                  \
3543     *charbuf++ = (c);                                                   \
3544     cmp_status->length++;                                               \
3545     if (cmp_status->state == COMPOSING_CHAR)                            \
3546       cmp_status->nchars++;                                             \
3547     else                                                                \
3548       cmp_status->ncomps++;                                             \
3549     if (cmp_status->method == COMPOSITION_WITH_RULE                     \
3550         || (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS        \
3551             && cmp_status->state == COMPOSING_COMPONENT_CHAR))          \
3552       cmp_status->state++;                                              \
3553   } while (0)
3554
3555
3556 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
3557
3558 static void
3559 decode_coding_iso_2022 (struct coding_system *coding)
3560 {
3561   const unsigned char *src = coding->source + coding->consumed;
3562   const unsigned char *src_end = coding->source + coding->src_bytes;
3563   const unsigned char *src_base;
3564   int *charbuf = coding->charbuf + coding->charbuf_used;
3565   /* We may produce two annocations (charset and composition) in one
3566      loop and one more charset annocation at the end.  */
3567   int *charbuf_end
3568     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
3569   int consumed_chars = 0, consumed_chars_base;
3570   int multibytep = coding->src_multibyte;
3571   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
3572   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3573   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3574   int charset_id_2, charset_id_3;
3575   struct charset *charset;
3576   int c;
3577   struct composition_status *cmp_status = CODING_ISO_CMP_STATUS (coding);
3578   Lisp_Object attrs, charset_list;
3579   int char_offset = coding->produced_char;
3580   int last_offset = char_offset;
3581   int last_id = charset_ascii;
3582   int eol_crlf =
3583     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
3584   int byte_after_cr = -1;
3585   int i;
3586
3587   CODING_GET_INFO (coding, attrs, charset_list);
3588   setup_iso_safe_charsets (attrs);
3589   /* Charset list may have been changed.  */
3590   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3591   coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
3592
3593   if (cmp_status->state != COMPOSING_NO)
3594     {
3595       for (i = 0; i < cmp_status->length; i++)
3596         *charbuf++ = cmp_status->carryover[i];
3597       coding->annotated = 1;
3598     }
3599
3600   while (1)
3601     {
3602       int c1, c2, c3;
3603
3604       src_base = src;
3605       consumed_chars_base = consumed_chars;
3606
3607       if (charbuf >= charbuf_end)
3608         {
3609           if (byte_after_cr >= 0)
3610             src_base--;
3611           break;
3612         }
3613
3614       if (byte_after_cr >= 0)
3615         c1 = byte_after_cr, byte_after_cr = -1;
3616       else
3617         ONE_MORE_BYTE (c1);
3618       if (c1 < 0)
3619         goto invalid_code;
3620
3621       if (CODING_ISO_EXTSEGMENT_LEN (coding) > 0)
3622         {
3623           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3624           char_offset++;
3625           CODING_ISO_EXTSEGMENT_LEN (coding)--;
3626           continue;
3627         }
3628
3629       if (CODING_ISO_EMBEDDED_UTF_8 (coding))
3630         {
3631           if (c1 == ISO_CODE_ESC)
3632             {
3633               if (src + 1 >= src_end)
3634                 goto no_more_source;
3635               *charbuf++ = ISO_CODE_ESC;
3636               char_offset++;
3637               if (src[0] == '%' && src[1] == '@')
3638                 {
3639                   src += 2;
3640                   consumed_chars += 2;
3641                   char_offset += 2;
3642                   /* We are sure charbuf can contain two more chars. */
3643                   *charbuf++ = '%';
3644                   *charbuf++ = '@';
3645                   CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
3646                 }
3647             }
3648           else
3649             {
3650               *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3651               char_offset++;
3652             }
3653           continue;
3654         }
3655
3656       if ((cmp_status->state == COMPOSING_RULE
3657            || cmp_status->state == COMPOSING_COMPONENT_RULE)
3658           && c1 != ISO_CODE_ESC)
3659         {
3660           int rule, nbytes;
3661
3662           DECODE_COMPOSITION_RULE (rule, nbytes);
3663           if (rule < 0)
3664             goto invalid_code;
3665           STORE_COMPOSITION_RULE (rule);
3666           continue;
3667         }
3668
3669       /* We produce at most one character.  */
3670       switch (iso_code_class [c1])
3671         {
3672         case ISO_0x20_or_0x7F:
3673           if (charset_id_0 < 0
3674               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
3675             /* This is SPACE or DEL.  */
3676             charset = CHARSET_FROM_ID (charset_ascii);
3677           else
3678             charset = CHARSET_FROM_ID (charset_id_0);
3679           break;
3680
3681         case ISO_graphic_plane_0:
3682           if (charset_id_0 < 0)
3683             charset = CHARSET_FROM_ID (charset_ascii);
3684           else
3685             charset = CHARSET_FROM_ID (charset_id_0);
3686           break;
3687
3688         case ISO_0xA0_or_0xFF:
3689           if (charset_id_1 < 0
3690               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
3691               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3692             goto invalid_code;
3693           /* This is a graphic character, we fall down ... */
3694
3695         case ISO_graphic_plane_1:
3696           if (charset_id_1 < 0)
3697             goto invalid_code;
3698           charset = CHARSET_FROM_ID (charset_id_1);
3699           break;
3700
3701         case ISO_control_0:
3702           if (eol_crlf && c1 == '\r')
3703             ONE_MORE_BYTE (byte_after_cr);
3704           MAYBE_FINISH_COMPOSITION ();
3705           charset = CHARSET_FROM_ID (charset_ascii);
3706           break;
3707
3708         case ISO_control_1:
3709           goto invalid_code;
3710
3711         case ISO_shift_out:
3712           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3713               || CODING_ISO_DESIGNATION (coding, 1) < 0)
3714             goto invalid_code;
3715           CODING_ISO_INVOCATION (coding, 0) = 1;
3716           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3717           continue;
3718
3719         case ISO_shift_in:
3720           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3721             goto invalid_code;
3722           CODING_ISO_INVOCATION (coding, 0) = 0;
3723           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3724           continue;
3725
3726         case ISO_single_shift_2_7:
3727           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS))
3728             goto invalid_code;
3729         case ISO_single_shift_2:
3730           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3731             goto invalid_code;
3732           /* SS2 is handled as an escape sequence of ESC 'N' */
3733           c1 = 'N';
3734           goto label_escape_sequence;
3735
3736         case ISO_single_shift_3:
3737           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3738             goto invalid_code;
3739           /* SS2 is handled as an escape sequence of ESC 'O' */
3740           c1 = 'O';
3741           goto label_escape_sequence;
3742
3743         case ISO_control_sequence_introducer:
3744           /* CSI is handled as an escape sequence of ESC '[' ...  */
3745           c1 = '[';
3746           goto label_escape_sequence;
3747
3748         case ISO_escape:
3749           ONE_MORE_BYTE (c1);
3750         label_escape_sequence:
3751           /* Escape sequences handled here are invocation,
3752              designation, direction specification, and character
3753              composition specification.  */
3754           switch (c1)
3755             {
3756             case '&':           /* revision of following character set */
3757               ONE_MORE_BYTE (c1);
3758               if (!(c1 >= '@' && c1 <= '~'))
3759                 goto invalid_code;
3760               ONE_MORE_BYTE (c1);
3761               if (c1 != ISO_CODE_ESC)
3762                 goto invalid_code;
3763               ONE_MORE_BYTE (c1);
3764               goto label_escape_sequence;
3765
3766             case '$':           /* designation of 2-byte character set */
3767               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3768                 goto invalid_code;
3769               {
3770                 int reg, chars96;
3771
3772                 ONE_MORE_BYTE (c1);
3773                 if (c1 >= '@' && c1 <= 'B')
3774                   {     /* designation of JISX0208.1978, GB2312.1980,
3775                            or JISX0208.1980 */
3776                     reg = 0, chars96 = 0;
3777                   }
3778                 else if (c1 >= 0x28 && c1 <= 0x2B)
3779                   { /* designation of DIMENSION2_CHARS94 character set */
3780                     reg = c1 - 0x28, chars96 = 0;
3781                     ONE_MORE_BYTE (c1);
3782                   }
3783                 else if (c1 >= 0x2C && c1 <= 0x2F)
3784                   { /* designation of DIMENSION2_CHARS96 character set */
3785                     reg = c1 - 0x2C, chars96 = 1;
3786                     ONE_MORE_BYTE (c1);
3787                   }
3788                 else
3789                   goto invalid_code;
3790                 DECODE_DESIGNATION (reg, 2, chars96, c1);
3791                 /* We must update these variables now.  */
3792                 if (reg == 0)
3793                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3794                 else if (reg == 1)
3795                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3796                 if (chars96 < 0)
3797                   goto invalid_code;
3798               }
3799               continue;
3800
3801             case 'n':           /* invocation of locking-shift-2 */
3802               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3803                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3804                 goto invalid_code;
3805               CODING_ISO_INVOCATION (coding, 0) = 2;
3806               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3807               continue;
3808
3809             case 'o':           /* invocation of locking-shift-3 */
3810               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3811                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3812                 goto invalid_code;
3813               CODING_ISO_INVOCATION (coding, 0) = 3;
3814               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3815               continue;
3816
3817             case 'N':           /* invocation of single-shift-2 */
3818               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3819                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3820                 goto invalid_code;
3821               charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3822               if (charset_id_2 < 0)
3823                 charset = CHARSET_FROM_ID (charset_ascii);
3824               else
3825                 charset = CHARSET_FROM_ID (charset_id_2);
3826               ONE_MORE_BYTE (c1);
3827               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3828                 goto invalid_code;
3829               break;
3830
3831             case 'O':           /* invocation of single-shift-3 */
3832               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3833                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3834                 goto invalid_code;
3835               charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3836               if (charset_id_3 < 0)
3837                 charset = CHARSET_FROM_ID (charset_ascii);
3838               else
3839                 charset = CHARSET_FROM_ID (charset_id_3);
3840               ONE_MORE_BYTE (c1);
3841               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3842                 goto invalid_code;
3843               break;
3844
3845             case '0': case '2': case '3': case '4': /* start composition */
3846               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3847                 goto invalid_code;
3848               if (last_id != charset_ascii)
3849                 {
3850                   ADD_CHARSET_DATA (charbuf, char_offset- last_offset, last_id);
3851                   last_id = charset_ascii;
3852                   last_offset = char_offset;
3853                 }
3854               DECODE_COMPOSITION_START (c1);
3855               continue;
3856
3857             case '1':           /* end composition */
3858               if (cmp_status->state == COMPOSING_NO)
3859                 goto invalid_code;
3860               DECODE_COMPOSITION_END ();
3861               continue;
3862
3863             case '[':           /* specification of direction */
3864               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION))
3865                 goto invalid_code;
3866               /* For the moment, nested direction is not supported.
3867                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3868                  left-to-right, and nozero means right-to-left.  */
3869               ONE_MORE_BYTE (c1);
3870               switch (c1)
3871                 {
3872                 case ']':       /* end of the current direction */
3873                   coding->mode &= ~CODING_MODE_DIRECTION;
3874
3875                 case '0':       /* end of the current direction */
3876                 case '1':       /* start of left-to-right direction */
3877                   ONE_MORE_BYTE (c1);
3878                   if (c1 == ']')
3879                     coding->mode &= ~CODING_MODE_DIRECTION;
3880                   else
3881                     goto invalid_code;
3882                   break;
3883
3884                 case '2':       /* start of right-to-left direction */
3885                   ONE_MORE_BYTE (c1);
3886                   if (c1 == ']')
3887                     coding->mode |= CODING_MODE_DIRECTION;
3888                   else
3889                     goto invalid_code;
3890                   break;
3891
3892                 default:
3893                   goto invalid_code;
3894                 }
3895               continue;
3896
3897             case '%':
3898               ONE_MORE_BYTE (c1);
3899               if (c1 == '/')
3900                 {
3901                   /* CTEXT extended segment:
3902                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3903                      We keep these bytes as is for the moment.
3904                      They may be decoded by post-read-conversion.  */
3905                   int dim, M, L;
3906                   int size;
3907
3908                   ONE_MORE_BYTE (dim);
3909                   if (dim < 0 || dim > 4)
3910                     goto invalid_code;
3911                   ONE_MORE_BYTE (M);
3912                   if (M < 128)
3913                     goto invalid_code;
3914                   ONE_MORE_BYTE (L);
3915                   if (L < 128)
3916                     goto invalid_code;
3917                   size = ((M - 128) * 128) + (L - 128);
3918                   if (charbuf + 6 > charbuf_end)
3919                     goto break_loop;
3920                   *charbuf++ = ISO_CODE_ESC;
3921                   *charbuf++ = '%';
3922                   *charbuf++ = '/';
3923                   *charbuf++ = dim;
3924                   *charbuf++ = BYTE8_TO_CHAR (M);
3925                   *charbuf++ = BYTE8_TO_CHAR (L);
3926                   CODING_ISO_EXTSEGMENT_LEN (coding) = size;
3927                 }
3928               else if (c1 == 'G')
3929                 {
3930                   /* XFree86 extension for embedding UTF-8 in CTEXT:
3931                      ESC % G --UTF-8-BYTES-- ESC % @
3932                      We keep these bytes as is for the moment.
3933                      They may be decoded by post-read-conversion.  */
3934                   if (charbuf + 3 > charbuf_end)
3935                     goto break_loop;
3936                   *charbuf++ = ISO_CODE_ESC;
3937                   *charbuf++ = '%';
3938                   *charbuf++ = 'G';
3939                   CODING_ISO_EMBEDDED_UTF_8 (coding) = 1;
3940                 }
3941               else
3942                 goto invalid_code;
3943               continue;
3944               break;
3945
3946             default:
3947               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3948                 goto invalid_code;
3949               {
3950                 int reg, chars96;
3951
3952                 if (c1 >= 0x28 && c1 <= 0x2B)
3953                   { /* designation of DIMENSION1_CHARS94 character set */
3954                     reg = c1 - 0x28, chars96 = 0;
3955                     ONE_MORE_BYTE (c1);
3956                   }
3957                 else if (c1 >= 0x2C && c1 <= 0x2F)
3958                   { /* designation of DIMENSION1_CHARS96 character set */
3959                     reg = c1 - 0x2C, chars96 = 1;
3960                     ONE_MORE_BYTE (c1);
3961                   }
3962                 else
3963                   goto invalid_code;
3964                 DECODE_DESIGNATION (reg, 1, chars96, c1);
3965                 /* We must update these variables now.  */
3966                 if (reg == 0)
3967                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3968                 else if (reg == 1)
3969                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3970                 if (chars96 < 0)
3971                   goto invalid_code;
3972               }
3973               continue;
3974             }
3975         }
3976
3977       if (cmp_status->state == COMPOSING_NO
3978           && charset->id != charset_ascii
3979           && last_id != charset->id)
3980         {
3981           if (last_id != charset_ascii)
3982             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3983           last_id = charset->id;
3984           last_offset = char_offset;
3985         }
3986
3987       /* Now we know CHARSET and 1st position code C1 of a character.
3988          Produce a decoded character while getting 2nd and 3rd
3989          position codes C2, C3 if necessary.  */
3990       if (CHARSET_DIMENSION (charset) > 1)
3991         {
3992           ONE_MORE_BYTE (c2);
3993           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0)
3994               || ((c1 & 0x80) != (c2 & 0x80)))
3995             /* C2 is not in a valid range.  */
3996             goto invalid_code;
3997           if (CHARSET_DIMENSION (charset) == 2)
3998             c1 = (c1 << 8) | c2;
3999           else
4000             {
4001               ONE_MORE_BYTE (c3);
4002               if (c3 < 0x20 || (c3 >= 0x80 && c3 < 0xA0)
4003                   || ((c1 & 0x80) != (c3 & 0x80)))
4004                 /* C3 is not in a valid range.  */
4005                 goto invalid_code;
4006               c1 = (c1 << 16) | (c2 << 8) | c2;
4007             }
4008         }
4009       c1 &= 0x7F7F7F;
4010       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
4011       if (c < 0)
4012         {
4013           MAYBE_FINISH_COMPOSITION ();
4014           for (; src_base < src; src_base++, char_offset++)
4015             {
4016               if (ASCII_BYTE_P (*src_base))
4017                 *charbuf++ = *src_base;
4018               else
4019                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
4020             }
4021         }
4022       else if (cmp_status->state == COMPOSING_NO)
4023         {
4024           *charbuf++ = c;
4025           char_offset++;
4026         }
4027       else if ((cmp_status->state == COMPOSING_CHAR
4028                 ? cmp_status->nchars
4029                 : cmp_status->ncomps)
4030                >= MAX_COMPOSITION_COMPONENTS)
4031         {
4032           /* Too long composition.  */
4033           MAYBE_FINISH_COMPOSITION ();
4034           *charbuf++ = c;
4035           char_offset++;
4036         }
4037       else
4038         STORE_COMPOSITION_CHAR (c);
4039       continue;
4040
4041     invalid_code:
4042       MAYBE_FINISH_COMPOSITION ();
4043       src = src_base;
4044       consumed_chars = consumed_chars_base;
4045       ONE_MORE_BYTE (c);
4046       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4047       char_offset++;
4048       coding->errors++;
4049       continue;
4050
4051     break_loop:
4052       break;
4053     }
4054
4055  no_more_source:
4056   if (cmp_status->state != COMPOSING_NO)
4057     {
4058       if (coding->mode & CODING_MODE_LAST_BLOCK)
4059         MAYBE_FINISH_COMPOSITION ();
4060       else
4061         {
4062           charbuf -= cmp_status->length;
4063           for (i = 0; i < cmp_status->length; i++)
4064             cmp_status->carryover[i] = charbuf[i];
4065         }
4066     }
4067   else if (last_id != charset_ascii)
4068     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4069   coding->consumed_char += consumed_chars_base;
4070   coding->consumed = src_base - coding->source;
4071   coding->charbuf_used = charbuf - coding->charbuf;
4072 }
4073
4074
4075 /* ISO2022 encoding stuff.  */
4076
4077 /*
4078    It is not enough to say just "ISO2022" on encoding, we have to
4079    specify more details.  In Emacs, each coding system of ISO2022
4080    variant has the following specifications:
4081         1. Initial designation to G0 thru G3.
4082         2. Allows short-form designation?
4083         3. ASCII should be designated to G0 before control characters?
4084         4. ASCII should be designated to G0 at end of line?
4085         5. 7-bit environment or 8-bit environment?
4086         6. Use locking-shift?
4087         7. Use Single-shift?
4088    And the following two are only for Japanese:
4089         8. Use ASCII in place of JIS0201-1976-Roman?
4090         9. Use JISX0208-1983 in place of JISX0208-1978?
4091    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
4092    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
4093    details.
4094 */
4095
4096 /* Produce codes (escape sequence) for designating CHARSET to graphic
4097    register REG at DST, and increment DST.  If <final-char> of CHARSET is
4098    '@', 'A', or 'B' and the coding system CODING allows, produce
4099    designation sequence of short-form.  */
4100
4101 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
4102   do {                                                                  \
4103     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
4104     char *intermediate_char_94 = "()*+";                                \
4105     char *intermediate_char_96 = ",-./";                                \
4106     int revision = -1;                                                  \
4107     int c;                                                              \
4108                                                                         \
4109     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
4110       revision = CHARSET_ISO_REVISION (charset);                        \
4111                                                                         \
4112     if (revision >= 0)                                                  \
4113       {                                                                 \
4114         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
4115         EMIT_ONE_BYTE ('@' + revision);                                 \
4116       }                                                                 \
4117     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
4118     if (CHARSET_DIMENSION (charset) == 1)                               \
4119       {                                                                 \
4120         if (! CHARSET_ISO_CHARS_96 (charset))                           \
4121           c = intermediate_char_94[reg];                                \
4122         else                                                            \
4123           c = intermediate_char_96[reg];                                \
4124         EMIT_ONE_ASCII_BYTE (c);                                        \
4125       }                                                                 \
4126     else                                                                \
4127       {                                                                 \
4128         EMIT_ONE_ASCII_BYTE ('$');                                      \
4129         if (! CHARSET_ISO_CHARS_96 (charset))                           \
4130           {                                                             \
4131             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
4132                 || reg != 0                                             \
4133                 || final_char < '@' || final_char > 'B')                \
4134               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
4135           }                                                             \
4136         else                                                            \
4137           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
4138       }                                                                 \
4139     EMIT_ONE_ASCII_BYTE (final_char);                                   \
4140                                                                         \
4141     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
4142   } while (0)
4143
4144
4145 /* The following two macros produce codes (control character or escape
4146    sequence) for ISO2022 single-shift functions (single-shift-2 and
4147    single-shift-3).  */
4148
4149 #define ENCODE_SINGLE_SHIFT_2                                           \
4150   do {                                                                  \
4151     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
4152       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
4153     else                                                                \
4154       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
4155     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
4156   } while (0)
4157
4158
4159 #define ENCODE_SINGLE_SHIFT_3                                           \
4160   do {                                                                  \
4161     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
4162       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
4163     else                                                                \
4164       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
4165     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
4166   } while (0)
4167
4168
4169 /* The following four macros produce codes (control character or
4170    escape sequence) for ISO2022 locking-shift functions (shift-in,
4171    shift-out, locking-shift-2, and locking-shift-3).  */
4172
4173 #define ENCODE_SHIFT_IN                                 \
4174   do {                                                  \
4175     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
4176     CODING_ISO_INVOCATION (coding, 0) = 0;              \
4177   } while (0)
4178
4179
4180 #define ENCODE_SHIFT_OUT                                \
4181   do {                                                  \
4182     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
4183     CODING_ISO_INVOCATION (coding, 0) = 1;              \
4184   } while (0)
4185
4186
4187 #define ENCODE_LOCKING_SHIFT_2                          \
4188   do {                                                  \
4189     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
4190     CODING_ISO_INVOCATION (coding, 0) = 2;              \
4191   } while (0)
4192
4193
4194 #define ENCODE_LOCKING_SHIFT_3                          \
4195   do {                                                  \
4196     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
4197     CODING_ISO_INVOCATION (coding, 0) = 3;              \
4198   } while (0)
4199
4200
4201 /* Produce codes for a DIMENSION1 character whose character set is
4202    CHARSET and whose position-code is C1.  Designation and invocation
4203    sequences are also produced in advance if necessary.  */
4204
4205 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
4206   do {                                                                  \
4207     int id = CHARSET_ID (charset);                                      \
4208                                                                         \
4209     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
4210         && id == charset_ascii)                                         \
4211       {                                                                 \
4212         id = charset_jisx0201_roman;                                    \
4213         charset = CHARSET_FROM_ID (id);                                 \
4214       }                                                                 \
4215                                                                         \
4216     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
4217       {                                                                 \
4218         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
4219           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
4220         else                                                            \
4221           EMIT_ONE_BYTE (c1 | 0x80);                                    \
4222         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
4223         break;                                                          \
4224       }                                                                 \
4225     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
4226       {                                                                 \
4227         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
4228         break;                                                          \
4229       }                                                                 \
4230     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
4231       {                                                                 \
4232         EMIT_ONE_BYTE (c1 | 0x80);                                      \
4233         break;                                                          \
4234       }                                                                 \
4235     else                                                                \
4236       /* Since CHARSET is not yet invoked to any graphic planes, we     \
4237          must invoke it, or, at first, designate it to some graphic     \
4238          register.  Then repeat the loop to actually produce the        \
4239          character.  */                                                 \
4240       dst = encode_invocation_designation (charset, coding, dst,        \
4241                                            &produced_chars);            \
4242   } while (1)
4243
4244
4245 /* Produce codes for a DIMENSION2 character whose character set is
4246    CHARSET and whose position-codes are C1 and C2.  Designation and
4247    invocation codes are also produced in advance if necessary.  */
4248
4249 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
4250   do {                                                                  \
4251     int id = CHARSET_ID (charset);                                      \
4252                                                                         \
4253     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
4254         && id == charset_jisx0208)                                      \
4255       {                                                                 \
4256         id = charset_jisx0208_1978;                                     \
4257         charset = CHARSET_FROM_ID (id);                                 \
4258       }                                                                 \
4259                                                                         \
4260     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
4261       {                                                                 \
4262         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
4263           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
4264         else                                                            \
4265           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
4266         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
4267         break;                                                          \
4268       }                                                                 \
4269     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
4270       {                                                                 \
4271         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
4272         break;                                                          \
4273       }                                                                 \
4274     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
4275       {                                                                 \
4276         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
4277         break;                                                          \
4278       }                                                                 \
4279     else                                                                \
4280       /* Since CHARSET is not yet invoked to any graphic planes, we     \
4281          must invoke it, or, at first, designate it to some graphic     \
4282          register.  Then repeat the loop to actually produce the        \
4283          character.  */                                                 \
4284       dst = encode_invocation_designation (charset, coding, dst,        \
4285                                            &produced_chars);            \
4286   } while (1)
4287
4288
4289 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
4290   do {                                                                     \
4291     int code = ENCODE_CHAR ((charset), (c));                               \
4292                                                                            \
4293     if (CHARSET_DIMENSION (charset) == 1)                                  \
4294       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
4295     else                                                                   \
4296       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
4297   } while (0)
4298
4299
4300 /* Produce designation and invocation codes at a place pointed by DST
4301    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
4302    Return new DST.  */
4303
4304 unsigned char *
4305 encode_invocation_designation (struct charset *charset,
4306                                struct coding_system *coding,
4307                                unsigned char *dst, int *p_nchars)
4308 {
4309   int multibytep = coding->dst_multibyte;
4310   int produced_chars = *p_nchars;
4311   int reg;                      /* graphic register number */
4312   int id = CHARSET_ID (charset);
4313
4314   /* At first, check designations.  */
4315   for (reg = 0; reg < 4; reg++)
4316     if (id == CODING_ISO_DESIGNATION (coding, reg))
4317       break;
4318
4319   if (reg >= 4)
4320     {
4321       /* CHARSET is not yet designated to any graphic registers.  */
4322       /* At first check the requested designation.  */
4323       reg = CODING_ISO_REQUEST (coding, id);
4324       if (reg < 0)
4325         /* Since CHARSET requests no special designation, designate it
4326            to graphic register 0.  */
4327         reg = 0;
4328
4329       ENCODE_DESIGNATION (charset, reg, coding);
4330     }
4331
4332   if (CODING_ISO_INVOCATION (coding, 0) != reg
4333       && CODING_ISO_INVOCATION (coding, 1) != reg)
4334     {
4335       /* Since the graphic register REG is not invoked to any graphic
4336          planes, invoke it to graphic plane 0.  */
4337       switch (reg)
4338         {
4339         case 0:                 /* graphic register 0 */
4340           ENCODE_SHIFT_IN;
4341           break;
4342
4343         case 1:                 /* graphic register 1 */
4344           ENCODE_SHIFT_OUT;
4345           break;
4346
4347         case 2:                 /* graphic register 2 */
4348           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4349             ENCODE_SINGLE_SHIFT_2;
4350           else
4351             ENCODE_LOCKING_SHIFT_2;
4352           break;
4353
4354         case 3:                 /* graphic register 3 */
4355           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4356             ENCODE_SINGLE_SHIFT_3;
4357           else
4358             ENCODE_LOCKING_SHIFT_3;
4359           break;
4360         }
4361     }
4362
4363   *p_nchars = produced_chars;
4364   return dst;
4365 }
4366
4367 /* The following three macros produce codes for indicating direction
4368    of text.  */
4369 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER                              \
4370   do {                                                                  \
4371     if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS)        \
4372       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '[');                         \
4373     else                                                                \
4374       EMIT_ONE_BYTE (ISO_CODE_CSI);                                     \
4375   } while (0)
4376
4377
4378 #define ENCODE_DIRECTION_R2L()                  \
4379   do {                                          \
4380     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
4381     EMIT_TWO_ASCII_BYTES ('2', ']');            \
4382   } while (0)
4383
4384
4385 #define ENCODE_DIRECTION_L2R()                  \
4386   do {                                          \
4387     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
4388     EMIT_TWO_ASCII_BYTES ('0', ']');            \
4389   } while (0)
4390
4391
4392 /* Produce codes for designation and invocation to reset the graphic
4393    planes and registers to initial state.  */
4394 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
4395   do {                                                                  \
4396     int reg;                                                            \
4397     struct charset *charset;                                            \
4398                                                                         \
4399     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
4400       ENCODE_SHIFT_IN;                                                  \
4401     for (reg = 0; reg < 4; reg++)                                       \
4402       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
4403           && (CODING_ISO_DESIGNATION (coding, reg)                      \
4404               != CODING_ISO_INITIAL (coding, reg)))                     \
4405         {                                                               \
4406           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
4407           ENCODE_DESIGNATION (charset, reg, coding);                    \
4408         }                                                               \
4409   } while (0)
4410
4411
4412 /* Produce designation sequences of charsets in the line started from
4413    SRC to a place pointed by DST, and return updated DST.
4414
4415    If the current block ends before any end-of-line, we may fail to
4416    find all the necessary designations.  */
4417
4418 static unsigned char *
4419 encode_designation_at_bol (struct coding_system *coding, int *charbuf,
4420                            int *charbuf_end, unsigned char *dst)
4421 {
4422   struct charset *charset;
4423   /* Table of charsets to be designated to each graphic register.  */
4424   int r[4];
4425   int c, found = 0, reg;
4426   int produced_chars = 0;
4427   int multibytep = coding->dst_multibyte;
4428   Lisp_Object attrs;
4429   Lisp_Object charset_list;
4430
4431   attrs = CODING_ID_ATTRS (coding->id);
4432   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4433   if (EQ (charset_list, Qiso_2022))
4434     charset_list = Viso_2022_charset_list;
4435
4436   for (reg = 0; reg < 4; reg++)
4437     r[reg] = -1;
4438
4439   while (found < 4)
4440     {
4441       int id;
4442
4443       c = *charbuf++;
4444       if (c == '\n')
4445         break;
4446       charset = char_charset (c, charset_list, NULL);
4447       id = CHARSET_ID (charset);
4448       reg = CODING_ISO_REQUEST (coding, id);
4449       if (reg >= 0 && r[reg] < 0)
4450         {
4451           found++;
4452           r[reg] = id;
4453         }
4454     }
4455
4456   if (found)
4457     {
4458       for (reg = 0; reg < 4; reg++)
4459         if (r[reg] >= 0
4460             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
4461           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
4462     }
4463
4464   return dst;
4465 }
4466
4467 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
4468
4469 static int
4470 encode_coding_iso_2022 (struct coding_system *coding)
4471 {
4472   int multibytep = coding->dst_multibyte;
4473   int *charbuf = coding->charbuf;
4474   int *charbuf_end = charbuf + coding->charbuf_used;
4475   unsigned char *dst = coding->destination + coding->produced;
4476   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4477   int safe_room = 16;
4478   int bol_designation
4479     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
4480        && CODING_ISO_BOL (coding));
4481   int produced_chars = 0;
4482   Lisp_Object attrs, eol_type, charset_list;
4483   int ascii_compatible;
4484   int c;
4485   int preferred_charset_id = -1;
4486
4487   CODING_GET_INFO (coding, attrs, charset_list);
4488   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
4489   if (VECTORP (eol_type))
4490     eol_type = Qunix;
4491
4492   setup_iso_safe_charsets (attrs);
4493   /* Charset list may have been changed.  */
4494   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4495   coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
4496
4497   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4498
4499   while (charbuf < charbuf_end)
4500     {
4501       ASSURE_DESTINATION (safe_room);
4502
4503       if (bol_designation)
4504         {
4505           unsigned char *dst_prev = dst;
4506
4507           /* We have to produce designation sequences if any now.  */
4508           dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
4509           bol_designation = 0;
4510           /* We are sure that designation sequences are all ASCII bytes.  */
4511           produced_chars += dst - dst_prev;
4512         }
4513
4514       c = *charbuf++;
4515
4516       if (c < 0)
4517         {
4518           /* Handle an annotation.  */
4519           switch (*charbuf)
4520             {
4521             case CODING_ANNOTATE_COMPOSITION_MASK:
4522               /* Not yet implemented.  */
4523               break;
4524             case CODING_ANNOTATE_CHARSET_MASK:
4525               preferred_charset_id = charbuf[2];
4526               if (preferred_charset_id >= 0
4527                   && NILP (Fmemq (make_number (preferred_charset_id),
4528                                   charset_list)))
4529                 preferred_charset_id = -1;
4530               break;
4531             default:
4532               abort ();
4533             }
4534           charbuf += -c - 1;
4535           continue;
4536         }
4537
4538       /* Now encode the character C.  */
4539       if (c < 0x20 || c == 0x7F)
4540         {
4541           if (c == '\n'
4542               || (c == '\r' && EQ (eol_type, Qmac)))
4543             {
4544               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4545                 ENCODE_RESET_PLANE_AND_REGISTER ();
4546               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
4547                 {
4548                   int i;
4549
4550                   for (i = 0; i < 4; i++)
4551                     CODING_ISO_DESIGNATION (coding, i)
4552                       = CODING_ISO_INITIAL (coding, i);
4553                 }
4554               bol_designation
4555                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
4556             }
4557           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
4558             ENCODE_RESET_PLANE_AND_REGISTER ();
4559           EMIT_ONE_ASCII_BYTE (c);
4560         }
4561       else if (ASCII_CHAR_P (c))
4562         {
4563           if (ascii_compatible)
4564             EMIT_ONE_ASCII_BYTE (c);
4565           else
4566             {
4567               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
4568               ENCODE_ISO_CHARACTER (charset, c);
4569             }
4570         }
4571       else if (CHAR_BYTE8_P (c))
4572         {
4573           c = CHAR_TO_BYTE8 (c);
4574           EMIT_ONE_BYTE (c);
4575         }
4576       else
4577         {
4578           struct charset *charset;
4579
4580           if (preferred_charset_id >= 0)
4581             {
4582               charset = CHARSET_FROM_ID (preferred_charset_id);
4583               if (! CHAR_CHARSET_P (c, charset))
4584                 charset = char_charset (c, charset_list, NULL);
4585             }
4586           else
4587             charset = char_charset (c, charset_list, NULL);
4588           if (!charset)
4589             {
4590               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4591                 {
4592                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4593                   charset = CHARSET_FROM_ID (charset_ascii);
4594                 }
4595               else
4596                 {
4597                   c = coding->default_char;
4598                   charset = char_charset (c, charset_list, NULL);
4599                 }
4600             }
4601           ENCODE_ISO_CHARACTER (charset, c);
4602         }
4603     }
4604
4605   if (coding->mode & CODING_MODE_LAST_BLOCK
4606       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4607     {
4608       ASSURE_DESTINATION (safe_room);
4609       ENCODE_RESET_PLANE_AND_REGISTER ();
4610     }
4611   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4612   CODING_ISO_BOL (coding) = bol_designation;
4613   coding->produced_char += produced_chars;
4614   coding->produced = dst - coding->destination;
4615   return 0;
4616 }
4617
4618 \f
4619 /*** 8,9. SJIS and BIG5 handlers ***/
4620
4621 /* Although SJIS and BIG5 are not ISO's coding system, they are used
4622    quite widely.  So, for the moment, Emacs supports them in the bare
4623    C code.  But, in the future, they may be supported only by CCL.  */
4624
4625 /* SJIS is a coding system encoding three character sets: ASCII, right
4626    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
4627    as is.  A character of charset katakana-jisx0201 is encoded by
4628    "position-code + 0x80".  A character of charset japanese-jisx0208
4629    is encoded in 2-byte but two position-codes are divided and shifted
4630    so that it fit in the range below.
4631
4632    --- CODE RANGE of SJIS ---
4633    (character set)      (range)
4634    ASCII                0x00 .. 0x7F
4635    KATAKANA-JISX0201    0xA0 .. 0xDF
4636    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
4637             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
4638    -------------------------------
4639
4640 */
4641
4642 /* BIG5 is a coding system encoding two character sets: ASCII and
4643    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
4644    character set and is encoded in two-byte.
4645
4646    --- CODE RANGE of BIG5 ---
4647    (character set)      (range)
4648    ASCII                0x00 .. 0x7F
4649    Big5 (1st byte)      0xA1 .. 0xFE
4650         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
4651    --------------------------
4652
4653   */
4654
4655 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4656    Check if a text is encoded in SJIS.  If it is, return
4657    CATEGORY_MASK_SJIS, else return 0.  */
4658
4659 static int
4660 detect_coding_sjis (struct coding_system *coding,
4661                     struct coding_detection_info *detect_info)
4662 {
4663   const unsigned char *src = coding->source, *src_base;
4664   const unsigned char *src_end = coding->source + coding->src_bytes;
4665   int multibytep = coding->src_multibyte;
4666   int consumed_chars = 0;
4667   int found = 0;
4668   int c;
4669   Lisp_Object attrs, charset_list;
4670   int max_first_byte_of_2_byte_code;
4671
4672   CODING_GET_INFO (coding, attrs, charset_list);
4673   max_first_byte_of_2_byte_code
4674     = (XINT (Flength (charset_list)) > 3 ? 0xFC : 0xEF);
4675
4676   detect_info->checked |= CATEGORY_MASK_SJIS;
4677   /* A coding system of this category is always ASCII compatible.  */
4678   src += coding->head_ascii;
4679
4680   while (1)
4681     {
4682       src_base = src;
4683       ONE_MORE_BYTE (c);
4684       if (c < 0x80)
4685         continue;
4686       if ((c >= 0x81 && c <= 0x9F)
4687           || (c >= 0xE0 && c <= max_first_byte_of_2_byte_code))
4688         {
4689           ONE_MORE_BYTE (c);
4690           if (c < 0x40 || c == 0x7F || c > 0xFC)
4691             break;
4692           found = CATEGORY_MASK_SJIS;
4693         }
4694       else if (c >= 0xA0 && c < 0xE0)
4695         found = CATEGORY_MASK_SJIS;
4696       else
4697         break;
4698     }
4699   detect_info->rejected |= CATEGORY_MASK_SJIS;
4700   return 0;
4701
4702  no_more_source:
4703   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4704     {
4705       detect_info->rejected |= CATEGORY_MASK_SJIS;
4706       return 0;
4707     }
4708   detect_info->found |= found;
4709   return 1;
4710 }
4711
4712 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4713    Check if a text is encoded in BIG5.  If it is, return
4714    CATEGORY_MASK_BIG5, else return 0.  */
4715
4716 static int
4717 detect_coding_big5 (struct coding_system *coding,
4718                     struct coding_detection_info *detect_info)
4719 {
4720   const unsigned char *src = coding->source, *src_base;
4721   const unsigned char *src_end = coding->source + coding->src_bytes;
4722   int multibytep = coding->src_multibyte;
4723   int consumed_chars = 0;
4724   int found = 0;
4725   int c;
4726
4727   detect_info->checked |= CATEGORY_MASK_BIG5;
4728   /* A coding system of this category is always ASCII compatible.  */
4729   src += coding->head_ascii;
4730
4731   while (1)
4732     {
4733       src_base = src;
4734       ONE_MORE_BYTE (c);
4735       if (c < 0x80)
4736         continue;
4737       if (c >= 0xA1)
4738         {
4739           ONE_MORE_BYTE (c);
4740           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
4741             return 0;
4742           found = CATEGORY_MASK_BIG5;
4743         }
4744       else
4745         break;
4746     }
4747   detect_info->rejected |= CATEGORY_MASK_BIG5;
4748   return 0;
4749
4750  no_more_source:
4751   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4752     {
4753       detect_info->rejected |= CATEGORY_MASK_BIG5;
4754       return 0;
4755     }
4756   detect_info->found |= found;
4757   return 1;
4758 }
4759
4760 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4761    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
4762
4763 static void
4764 decode_coding_sjis (struct coding_system *coding)
4765 {
4766   const unsigned char *src = coding->source + coding->consumed;
4767   const unsigned char *src_end = coding->source + coding->src_bytes;
4768   const unsigned char *src_base;
4769   int *charbuf = coding->charbuf + coding->charbuf_used;
4770   /* We may produce one charset annocation in one loop and one more at
4771      the end.  */
4772   int *charbuf_end
4773     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4774   int consumed_chars = 0, consumed_chars_base;
4775   int multibytep = coding->src_multibyte;
4776   struct charset *charset_roman, *charset_kanji, *charset_kana;
4777   struct charset *charset_kanji2;
4778   Lisp_Object attrs, charset_list, val;
4779   int char_offset = coding->produced_char;
4780   int last_offset = char_offset;
4781   int last_id = charset_ascii;
4782   int eol_crlf =
4783     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4784   int byte_after_cr = -1;
4785
4786   CODING_GET_INFO (coding, attrs, charset_list);
4787
4788   val = charset_list;
4789   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4790   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4791   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4792   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4793
4794   while (1)
4795     {
4796       int c, c1;
4797       struct charset *charset;
4798
4799       src_base = src;
4800       consumed_chars_base = consumed_chars;
4801
4802       if (charbuf >= charbuf_end)
4803         {
4804           if (byte_after_cr >= 0)
4805             src_base--;
4806           break;
4807         }
4808
4809       if (byte_after_cr >= 0)
4810         c = byte_after_cr, byte_after_cr = -1;
4811       else
4812         ONE_MORE_BYTE (c);
4813       if (c < 0)
4814         goto invalid_code;
4815       if (c < 0x80)
4816         {
4817           if (eol_crlf && c == '\r')
4818             ONE_MORE_BYTE (byte_after_cr);
4819           charset = charset_roman;
4820         }
4821       else if (c == 0x80 || c == 0xA0)
4822         goto invalid_code;
4823       else if (c >= 0xA1 && c <= 0xDF)
4824         {
4825           /* SJIS -> JISX0201-Kana */
4826           c &= 0x7F;
4827           charset = charset_kana;
4828         }
4829       else if (c <= 0xEF)
4830         {
4831           /* SJIS -> JISX0208 */
4832           ONE_MORE_BYTE (c1);
4833           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4834             goto invalid_code;
4835           c = (c << 8) | c1;
4836           SJIS_TO_JIS (c);
4837           charset = charset_kanji;
4838         }
4839       else if (c <= 0xFC && charset_kanji2)
4840         {
4841           /* SJIS -> JISX0213-2 */
4842           ONE_MORE_BYTE (c1);
4843           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4844             goto invalid_code;
4845           c = (c << 8) | c1;
4846           SJIS_TO_JIS2 (c);
4847           charset = charset_kanji2;
4848         }
4849       else
4850         goto invalid_code;
4851       if (charset->id != charset_ascii
4852           && last_id != charset->id)
4853         {
4854           if (last_id != charset_ascii)
4855             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4856           last_id = charset->id;
4857           last_offset = char_offset;
4858         }
4859       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4860       *charbuf++ = c;
4861       char_offset++;
4862       continue;
4863
4864     invalid_code:
4865       src = src_base;
4866       consumed_chars = consumed_chars_base;
4867       ONE_MORE_BYTE (c);
4868       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4869       char_offset++;
4870       coding->errors++;
4871     }
4872
4873  no_more_source:
4874   if (last_id != charset_ascii)
4875     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4876   coding->consumed_char += consumed_chars_base;
4877   coding->consumed = src_base - coding->source;
4878   coding->charbuf_used = charbuf - coding->charbuf;
4879 }
4880
4881 static void
4882 decode_coding_big5 (struct coding_system *coding)
4883 {
4884   const unsigned char *src = coding->source + coding->consumed;
4885   const unsigned char *src_end = coding->source + coding->src_bytes;
4886   const unsigned char *src_base;
4887   int *charbuf = coding->charbuf + coding->charbuf_used;
4888   /* We may produce one charset annocation in one loop and one more at
4889      the end.  */
4890   int *charbuf_end
4891     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4892   int consumed_chars = 0, consumed_chars_base;
4893   int multibytep = coding->src_multibyte;
4894   struct charset *charset_roman, *charset_big5;
4895   Lisp_Object attrs, charset_list, val;
4896   int char_offset = coding->produced_char;
4897   int last_offset = char_offset;
4898   int last_id = charset_ascii;
4899   int eol_crlf =
4900     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4901   int byte_after_cr = -1;
4902
4903   CODING_GET_INFO (coding, attrs, charset_list);
4904   val = charset_list;
4905   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4906   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4907
4908   while (1)
4909     {
4910       int c, c1;
4911       struct charset *charset;
4912
4913       src_base = src;
4914       consumed_chars_base = consumed_chars;
4915
4916       if (charbuf >= charbuf_end)
4917         {
4918           if (byte_after_cr >= 0)
4919             src_base--;
4920           break;
4921         }
4922
4923       if (byte_after_cr >= 0)
4924         c = byte_after_cr, byte_after_cr = -1;
4925       else
4926         ONE_MORE_BYTE (c);
4927
4928       if (c < 0)
4929         goto invalid_code;
4930       if (c < 0x80)
4931         {
4932           if (eol_crlf && c == '\r')
4933             ONE_MORE_BYTE (byte_after_cr);
4934           charset = charset_roman;
4935         }
4936       else
4937         {
4938           /* BIG5 -> Big5 */
4939           if (c < 0xA1 || c > 0xFE)
4940             goto invalid_code;
4941           ONE_MORE_BYTE (c1);
4942           if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4943             goto invalid_code;
4944           c = c << 8 | c1;
4945           charset = charset_big5;
4946         }
4947       if (charset->id != charset_ascii
4948           && last_id != charset->id)
4949         {
4950           if (last_id != charset_ascii)
4951             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4952           last_id = charset->id;
4953           last_offset = char_offset;
4954         }
4955       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4956       *charbuf++ = c;
4957       char_offset++;
4958       continue;
4959
4960     invalid_code:
4961       src = src_base;
4962       consumed_chars = consumed_chars_base;
4963       ONE_MORE_BYTE (c);
4964       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4965       char_offset++;
4966       coding->errors++;
4967     }
4968
4969  no_more_source:
4970   if (last_id != charset_ascii)
4971     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4972   coding->consumed_char += consumed_chars_base;
4973   coding->consumed = src_base - coding->source;
4974   coding->charbuf_used = charbuf - coding->charbuf;
4975 }
4976
4977 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4978    This function can encode charsets `ascii', `katakana-jisx0201',
4979    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
4980    are sure that all these charsets are registered as official charset
4981    (i.e. do not have extended leading-codes).  Characters of other
4982    charsets are produced without any encoding.  If SJIS_P is 1, encode
4983    SJIS text, else encode BIG5 text.  */
4984
4985 static int
4986 encode_coding_sjis (struct coding_system *coding)
4987 {
4988   int multibytep = coding->dst_multibyte;
4989   int *charbuf = coding->charbuf;
4990   int *charbuf_end = charbuf + coding->charbuf_used;
4991   unsigned char *dst = coding->destination + coding->produced;
4992   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4993   int safe_room = 4;
4994   int produced_chars = 0;
4995   Lisp_Object attrs, charset_list, val;
4996   int ascii_compatible;
4997   struct charset *charset_roman, *charset_kanji, *charset_kana;
4998   struct charset *charset_kanji2;
4999   int c;
5000
5001   CODING_GET_INFO (coding, attrs, charset_list);
5002   val = charset_list;
5003   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5004   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5005   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5006   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
5007
5008   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5009
5010   while (charbuf < charbuf_end)
5011     {
5012       ASSURE_DESTINATION (safe_room);
5013       c = *charbuf++;
5014       /* Now encode the character C.  */
5015       if (ASCII_CHAR_P (c) && ascii_compatible)
5016         EMIT_ONE_ASCII_BYTE (c);
5017       else if (CHAR_BYTE8_P (c))
5018         {
5019           c = CHAR_TO_BYTE8 (c);
5020           EMIT_ONE_BYTE (c);
5021         }
5022       else
5023         {
5024           unsigned code;
5025           struct charset *charset = char_charset (c, charset_list, &code);
5026
5027           if (!charset)
5028             {
5029               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5030                 {
5031                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5032                   charset = CHARSET_FROM_ID (charset_ascii);
5033                 }
5034               else
5035                 {
5036                   c = coding->default_char;
5037                   charset = char_charset (c, charset_list, &code);
5038                 }
5039             }
5040           if (code == CHARSET_INVALID_CODE (charset))
5041             abort ();
5042           if (charset == charset_kanji)
5043             {
5044               int c1, c2;
5045               JIS_TO_SJIS (code);
5046               c1 = code >> 8, c2 = code & 0xFF;
5047               EMIT_TWO_BYTES (c1, c2);
5048             }
5049           else if (charset == charset_kana)
5050             EMIT_ONE_BYTE (code | 0x80);
5051           else if (charset_kanji2 && charset == charset_kanji2)
5052             {
5053               int c1, c2;
5054
5055               c1 = code >> 8;
5056               if (c1 == 0x21 || (c1 >= 0x23 && c1 <= 0x25)
5057                   || c1 == 0x28
5058                   || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
5059                 {
5060                   JIS_TO_SJIS2 (code);
5061                   c1 = code >> 8, c2 = code & 0xFF;
5062                   EMIT_TWO_BYTES (c1, c2);
5063                 }
5064               else
5065                 EMIT_ONE_ASCII_BYTE (code & 0x7F);
5066             }
5067           else
5068             EMIT_ONE_ASCII_BYTE (code & 0x7F);
5069         }
5070     }
5071   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5072   coding->produced_char += produced_chars;
5073   coding->produced = dst - coding->destination;
5074   return 0;
5075 }
5076
5077 static int
5078 encode_coding_big5 (struct coding_system *coding)
5079 {
5080   int multibytep = coding->dst_multibyte;
5081   int *charbuf = coding->charbuf;
5082   int *charbuf_end = charbuf + coding->charbuf_used;
5083   unsigned char *dst = coding->destination + coding->produced;
5084   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5085   int safe_room = 4;
5086   int produced_chars = 0;
5087   Lisp_Object attrs, charset_list, val;
5088   int ascii_compatible;
5089   struct charset *charset_roman, *charset_big5;
5090   int c;
5091
5092   CODING_GET_INFO (coding, attrs, charset_list);
5093   val = charset_list;
5094   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5095   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
5096   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5097
5098   while (charbuf < charbuf_end)
5099     {
5100       ASSURE_DESTINATION (safe_room);
5101       c = *charbuf++;
5102       /* Now encode the character C.  */
5103       if (ASCII_CHAR_P (c) && ascii_compatible)
5104         EMIT_ONE_ASCII_BYTE (c);
5105       else if (CHAR_BYTE8_P (c))
5106         {
5107           c = CHAR_TO_BYTE8 (c);
5108           EMIT_ONE_BYTE (c);
5109         }
5110       else
5111         {
5112           unsigned code;
5113           struct charset *charset = char_charset (c, charset_list, &code);
5114
5115           if (! charset)
5116             {
5117               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5118                 {
5119                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5120                   charset = CHARSET_FROM_ID (charset_ascii);
5121                 }
5122               else
5123                 {
5124                   c = coding->default_char;
5125                   charset = char_charset (c, charset_list, &code);
5126                 }
5127             }
5128           if (code == CHARSET_INVALID_CODE (charset))
5129             abort ();
5130           if (charset == charset_big5)
5131             {
5132               int c1, c2;
5133
5134               c1 = code >> 8, c2 = code & 0xFF;
5135               EMIT_TWO_BYTES (c1, c2);
5136             }
5137           else
5138             EMIT_ONE_ASCII_BYTE (code & 0x7F);
5139         }
5140     }
5141   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5142   coding->produced_char += produced_chars;
5143   coding->produced = dst - coding->destination;
5144   return 0;
5145 }
5146
5147 \f
5148 /*** 10. CCL handlers ***/
5149
5150 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5151    Check if a text is encoded in a coding system of which
5152    encoder/decoder are written in CCL program.  If it is, return
5153    CATEGORY_MASK_CCL, else return 0.  */
5154
5155 static int
5156 detect_coding_ccl (struct coding_system *coding,
5157                    struct coding_detection_info *detect_info)
5158 {
5159   const unsigned char *src = coding->source, *src_base;
5160   const unsigned char *src_end = coding->source + coding->src_bytes;
5161   int multibytep = coding->src_multibyte;
5162   int consumed_chars = 0;
5163   int found = 0;
5164   unsigned char *valids;
5165   int head_ascii = coding->head_ascii;
5166   Lisp_Object attrs;
5167
5168   detect_info->checked |= CATEGORY_MASK_CCL;
5169
5170   coding = &coding_categories[coding_category_ccl];
5171   valids = CODING_CCL_VALIDS (coding);
5172   attrs = CODING_ID_ATTRS (coding->id);
5173   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5174     src += head_ascii;
5175
5176   while (1)
5177     {
5178       int c;
5179
5180       src_base = src;
5181       ONE_MORE_BYTE (c);
5182       if (c < 0 || ! valids[c])
5183         break;
5184       if ((valids[c] > 1))
5185         found = CATEGORY_MASK_CCL;
5186     }
5187   detect_info->rejected |= CATEGORY_MASK_CCL;
5188   return 0;
5189
5190  no_more_source:
5191   detect_info->found |= found;
5192   return 1;
5193 }
5194
5195 static void
5196 decode_coding_ccl (struct coding_system *coding)
5197 {
5198   const unsigned char *src = coding->source + coding->consumed;
5199   const unsigned char *src_end = coding->source + coding->src_bytes;
5200   int *charbuf = coding->charbuf + coding->charbuf_used;
5201   int *charbuf_end = coding->charbuf + coding->charbuf_size;
5202   int consumed_chars = 0;
5203   int multibytep = coding->src_multibyte;
5204   struct ccl_program *ccl = &coding->spec.ccl->ccl;
5205   int source_charbuf[1024];
5206   int source_byteidx[1025];
5207   Lisp_Object attrs, charset_list;
5208
5209   CODING_GET_INFO (coding, attrs, charset_list);
5210
5211   while (1)
5212     {
5213       const unsigned char *p = src;
5214       int i = 0;
5215
5216       if (multibytep)
5217         {
5218           while (i < 1024 && p < src_end)
5219             {
5220               source_byteidx[i] = p - src;
5221               source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
5222             }
5223           source_byteidx[i] = p - src;
5224         }
5225       else
5226         while (i < 1024 && p < src_end)
5227           source_charbuf[i++] = *p++;
5228
5229       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
5230         ccl->last_block = 1;
5231       ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf,
5232                   charset_list);
5233       charbuf += ccl->produced;
5234       if (multibytep)
5235         src += source_byteidx[ccl->consumed];
5236       else
5237         src += ccl->consumed;
5238       consumed_chars += ccl->consumed;
5239       if (p == src_end || ccl->status != CCL_STAT_SUSPEND_BY_SRC)
5240         break;
5241     }
5242
5243   switch (ccl->status)
5244     {
5245     case CCL_STAT_SUSPEND_BY_SRC:
5246       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5247       break;
5248     case CCL_STAT_SUSPEND_BY_DST:
5249       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
5250       break;
5251     case CCL_STAT_QUIT:
5252     case CCL_STAT_INVALID_CMD:
5253       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
5254       break;
5255     default:
5256       record_conversion_result (coding, CODING_RESULT_SUCCESS);
5257       break;
5258     }
5259   coding->consumed_char += consumed_chars;
5260   coding->consumed = src - coding->source;
5261   coding->charbuf_used = charbuf - coding->charbuf;
5262 }
5263
5264 static int
5265 encode_coding_ccl (struct coding_system *coding)
5266 {
5267   struct ccl_program *ccl = &coding->spec.ccl->ccl;
5268   int multibytep = coding->dst_multibyte;
5269   int *charbuf = coding->charbuf;
5270   int *charbuf_end = charbuf + coding->charbuf_used;
5271   unsigned char *dst = coding->destination + coding->produced;
5272   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5273   int destination_charbuf[1024];
5274   int i, produced_chars = 0;
5275   Lisp_Object attrs, charset_list;
5276
5277   CODING_GET_INFO (coding, attrs, charset_list);
5278   if (coding->consumed_char == coding->src_chars
5279       && coding->mode & CODING_MODE_LAST_BLOCK)
5280     ccl->last_block = 1;
5281
5282   while (charbuf < charbuf_end)
5283     {
5284       ccl_driver (ccl, charbuf, destination_charbuf,
5285                   charbuf_end - charbuf, 1024, charset_list);
5286       if (multibytep)
5287         {
5288           ASSURE_DESTINATION (ccl->produced * 2);
5289           for (i = 0; i < ccl->produced; i++)
5290             EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
5291         }
5292       else
5293         {
5294           ASSURE_DESTINATION (ccl->produced);
5295           for (i = 0; i < ccl->produced; i++)
5296             *dst++ = destination_charbuf[i] & 0xFF;
5297           produced_chars += ccl->produced;
5298         }
5299       charbuf += ccl->consumed;
5300       if (ccl->status == CCL_STAT_QUIT
5301           || ccl->status == CCL_STAT_INVALID_CMD)
5302         break;
5303     }
5304
5305   switch (ccl->status)
5306     {
5307     case CCL_STAT_SUSPEND_BY_SRC:
5308       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5309       break;
5310     case CCL_STAT_SUSPEND_BY_DST:
5311       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
5312       break;
5313     case CCL_STAT_QUIT:
5314     case CCL_STAT_INVALID_CMD:
5315       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
5316       break;
5317     default:
5318       record_conversion_result (coding, CODING_RESULT_SUCCESS);
5319       break;
5320     }
5321
5322   coding->produced_char += produced_chars;
5323   coding->produced = dst - coding->destination;
5324   return 0;
5325 }
5326
5327
5328 \f
5329 /*** 10, 11. no-conversion handlers ***/
5330
5331 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
5332
5333 static void
5334 decode_coding_raw_text (struct coding_system *coding)
5335 {
5336   int eol_crlf =
5337     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5338
5339   coding->chars_at_source = 1;
5340   coding->consumed_char = coding->src_chars;
5341   coding->consumed = coding->src_bytes;
5342   if (eol_crlf && coding->source[coding->src_bytes - 1] == '\r')
5343     {
5344       coding->consumed_char--;
5345       coding->consumed--;
5346       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5347     }
5348   else
5349     record_conversion_result (coding, CODING_RESULT_SUCCESS);
5350 }
5351
5352 static int
5353 encode_coding_raw_text (struct coding_system *coding)
5354 {
5355   int multibytep = coding->dst_multibyte;
5356   int *charbuf = coding->charbuf;
5357   int *charbuf_end = coding->charbuf + coding->charbuf_used;
5358   unsigned char *dst = coding->destination + coding->produced;
5359   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5360   int produced_chars = 0;
5361   int c;
5362
5363   if (multibytep)
5364     {
5365       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
5366
5367       if (coding->src_multibyte)
5368         while (charbuf < charbuf_end)
5369           {
5370             ASSURE_DESTINATION (safe_room);
5371             c = *charbuf++;
5372             if (ASCII_CHAR_P (c))
5373               EMIT_ONE_ASCII_BYTE (c);
5374             else if (CHAR_BYTE8_P (c))
5375               {
5376                 c = CHAR_TO_BYTE8 (c);
5377                 EMIT_ONE_BYTE (c);
5378               }
5379             else
5380               {
5381                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
5382
5383                 CHAR_STRING_ADVANCE (c, p1);
5384                 while (p0 < p1)
5385                   {
5386                     EMIT_ONE_BYTE (*p0);
5387                     p0++;
5388                   }
5389               }
5390           }
5391       else
5392         while (charbuf < charbuf_end)
5393           {
5394             ASSURE_DESTINATION (safe_room);
5395             c = *charbuf++;
5396             EMIT_ONE_BYTE (c);
5397           }
5398     }
5399   else
5400     {
5401       if (coding->src_multibyte)
5402         {
5403           int safe_room = MAX_MULTIBYTE_LENGTH;
5404
5405           while (charbuf < charbuf_end)
5406             {
5407               ASSURE_DESTINATION (safe_room);
5408               c = *charbuf++;
5409               if (ASCII_CHAR_P (c))
5410                 *dst++ = c;
5411               else if (CHAR_BYTE8_P (c))
5412                 *dst++ = CHAR_TO_BYTE8 (c);
5413               else
5414                 CHAR_STRING_ADVANCE (c, dst);
5415             }
5416         }
5417       else
5418         {
5419           ASSURE_DESTINATION (charbuf_end - charbuf);
5420           while (charbuf < charbuf_end && dst < dst_end)
5421             *dst++ = *charbuf++;
5422         }
5423       produced_chars = dst - (coding->destination + coding->produced);
5424     }
5425   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5426   coding->produced_char += produced_chars;
5427   coding->produced = dst - coding->destination;
5428   return 0;
5429 }
5430
5431 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5432    Check if a text is encoded in a charset-based coding system.  If it
5433    is, return 1, else return 0.  */
5434
5435 static int
5436 detect_coding_charset (struct coding_system *coding,
5437                        struct coding_detection_info *detect_info)
5438 {
5439   const unsigned char *src = coding->source, *src_base;
5440   const unsigned char *src_end = coding->source + coding->src_bytes;
5441   int multibytep = coding->src_multibyte;
5442   int consumed_chars = 0;
5443   Lisp_Object attrs, valids, name;
5444   int found = 0;
5445   int head_ascii = coding->head_ascii;
5446   int check_latin_extra = 0;
5447
5448   detect_info->checked |= CATEGORY_MASK_CHARSET;
5449
5450   coding = &coding_categories[coding_category_charset];
5451   attrs = CODING_ID_ATTRS (coding->id);
5452   valids = AREF (attrs, coding_attr_charset_valids);
5453   name = CODING_ID_NAME (coding->id);
5454   if (strncmp ((char *) SDATA (SYMBOL_NAME (name)),
5455                "iso-8859-", sizeof ("iso-8859-") - 1) == 0
5456       || strncmp ((char *) SDATA (SYMBOL_NAME (name)),
5457                   "iso-latin-", sizeof ("iso-latin-") - 1) == 0)
5458     check_latin_extra = 1;
5459
5460   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5461     src += head_ascii;
5462
5463   while (1)
5464     {
5465       int c;
5466       Lisp_Object val;
5467       struct charset *charset;
5468       int dim, idx;
5469
5470       src_base = src;
5471       ONE_MORE_BYTE (c);
5472       if (c < 0)
5473         continue;
5474       val = AREF (valids, c);
5475       if (NILP (val))
5476         break;
5477       if (c >= 0x80)
5478         {
5479           if (c < 0xA0
5480               && check_latin_extra
5481               && (!VECTORP (Vlatin_extra_code_table)
5482                   || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])))
5483             break;
5484           found = CATEGORY_MASK_CHARSET;
5485         }
5486       if (INTEGERP (val))
5487         {
5488           charset = CHARSET_FROM_ID (XFASTINT (val));
5489           dim = CHARSET_DIMENSION (charset);
5490           for (idx = 1; idx < dim; idx++)
5491             {
5492               if (src == src_end)
5493                 goto too_short;
5494               ONE_MORE_BYTE (c);
5495               if (c < charset->code_space[(dim - 1 - idx) * 2]
5496                   || c > charset->code_space[(dim - 1 - idx) * 2 + 1])
5497                 break;
5498             }
5499           if (idx < dim)
5500             break;
5501         }
5502       else
5503         {
5504           idx = 1;
5505           for (; CONSP (val); val = XCDR (val))
5506             {
5507               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5508               dim = CHARSET_DIMENSION (charset);
5509               while (idx < dim)
5510                 {
5511                   if (src == src_end)
5512                     goto too_short;
5513                   ONE_MORE_BYTE (c);
5514                   if (c < charset->code_space[(dim - 1 - idx) * 4]
5515                       || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
5516                     break;
5517                   idx++;
5518                 }
5519               if (idx == dim)
5520                 {
5521                   val = Qnil;
5522                   break;
5523                 }
5524             }
5525           if (CONSP (val))
5526             break;
5527         }
5528     }
5529  too_short:
5530   detect_info->rejected |= CATEGORY_MASK_CHARSET;
5531   return 0;
5532
5533  no_more_source:
5534   detect_info->found |= found;
5535   return 1;
5536 }
5537
5538 static void
5539 decode_coding_charset (struct coding_system *coding)
5540 {
5541   const unsigned char *src = coding->source + coding->consumed;
5542   const unsigned char *src_end = coding->source + coding->src_bytes;
5543   const unsigned char *src_base;
5544   int *charbuf = coding->charbuf + coding->charbuf_used;
5545   /* We may produce one charset annocation in one loop and one more at
5546      the end.  */
5547   int *charbuf_end
5548     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
5549   int consumed_chars = 0, consumed_chars_base;
5550   int multibytep = coding->src_multibyte;
5551   Lisp_Object attrs, charset_list, valids;
5552   int char_offset = coding->produced_char;
5553   int last_offset = char_offset;
5554   int last_id = charset_ascii;
5555   int eol_crlf =
5556     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5557   int byte_after_cr = -1;
5558
5559   CODING_GET_INFO (coding, attrs, charset_list);
5560   valids = AREF (attrs, coding_attr_charset_valids);
5561
5562   while (1)
5563     {
5564       int c;
5565       Lisp_Object val;
5566       struct charset *charset;
5567       int dim;
5568       int len = 1;
5569       unsigned code;
5570
5571       src_base = src;
5572       consumed_chars_base = consumed_chars;
5573
5574       if (charbuf >= charbuf_end)
5575         {
5576           if (byte_after_cr >= 0)
5577             src_base--;
5578           break;
5579         }
5580
5581       if (byte_after_cr >= 0)
5582         {
5583           c = byte_after_cr;
5584           byte_after_cr = -1;
5585         }
5586       else
5587         {
5588           ONE_MORE_BYTE (c);
5589           if (eol_crlf && c == '\r')
5590             ONE_MORE_BYTE (byte_after_cr);
5591         }
5592       if (c < 0)
5593         goto invalid_code;
5594       code = c;
5595
5596       val = AREF (valids, c);
5597       if (! INTEGERP (val) && ! CONSP (val))
5598         goto invalid_code;
5599       if (INTEGERP (val))
5600         {
5601           charset = CHARSET_FROM_ID (XFASTINT (val));
5602           dim = CHARSET_DIMENSION (charset);
5603           while (len < dim)
5604             {
5605               ONE_MORE_BYTE (c);
5606               code = (code << 8) | c;
5607               len++;
5608             }
5609           CODING_DECODE_CHAR (coding, src, src_base, src_end,
5610                               charset, code, c);
5611         }
5612       else
5613         {
5614           /* VAL is a list of charset IDs.  It is assured that the
5615              list is sorted by charset dimensions (smaller one
5616              comes first).  */
5617           while (CONSP (val))
5618             {
5619               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5620               dim = CHARSET_DIMENSION (charset);
5621               while (len < dim)
5622                 {
5623                   ONE_MORE_BYTE (c);
5624                   code = (code << 8) | c;
5625                   len++;
5626                 }
5627               CODING_DECODE_CHAR (coding, src, src_base,
5628                                   src_end, charset, code, c);
5629               if (c >= 0)
5630                 break;
5631               val = XCDR (val);
5632             }
5633         }
5634       if (c < 0)
5635         goto invalid_code;
5636       if (charset->id != charset_ascii
5637           && last_id != charset->id)
5638         {
5639           if (last_id != charset_ascii)
5640             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5641           last_id = charset->id;
5642           last_offset = char_offset;
5643         }
5644
5645       *charbuf++ = c;
5646       char_offset++;
5647       continue;
5648
5649     invalid_code:
5650       src = src_base;
5651       consumed_chars = consumed_chars_base;
5652       ONE_MORE_BYTE (c);
5653       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
5654       char_offset++;
5655       coding->errors++;
5656     }
5657
5658  no_more_source:
5659   if (last_id != charset_ascii)
5660     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5661   coding->consumed_char += consumed_chars_base;
5662   coding->consumed = src_base - coding->source;
5663   coding->charbuf_used = charbuf - coding->charbuf;
5664 }
5665
5666 static int
5667 encode_coding_charset (struct coding_system *coding)
5668 {
5669   int multibytep = coding->dst_multibyte;
5670   int *charbuf = coding->charbuf;
5671   int *charbuf_end = charbuf + coding->charbuf_used;
5672   unsigned char *dst = coding->destination + coding->produced;
5673   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5674   int safe_room = MAX_MULTIBYTE_LENGTH;
5675   int produced_chars = 0;
5676   Lisp_Object attrs, charset_list;
5677   int ascii_compatible;
5678   int c;
5679
5680   CODING_GET_INFO (coding, attrs, charset_list);
5681   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5682
5683   while (charbuf < charbuf_end)
5684     {
5685       struct charset *charset;
5686       unsigned code;
5687
5688       ASSURE_DESTINATION (safe_room);
5689       c = *charbuf++;
5690       if (ascii_compatible && ASCII_CHAR_P (c))
5691         EMIT_ONE_ASCII_BYTE (c);
5692       else if (CHAR_BYTE8_P (c))
5693         {
5694           c = CHAR_TO_BYTE8 (c);
5695           EMIT_ONE_BYTE (c);
5696         }
5697       else
5698         {
5699           charset = char_charset (c, charset_list, &code);
5700           if (charset)
5701             {
5702               if (CHARSET_DIMENSION (charset) == 1)
5703                 EMIT_ONE_BYTE (code);
5704               else if (CHARSET_DIMENSION (charset) == 2)
5705                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
5706               else if (CHARSET_DIMENSION (charset) == 3)
5707                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
5708               else
5709                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
5710                                  (code >> 8) & 0xFF, code & 0xFF);
5711             }
5712           else
5713             {
5714               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5715                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5716               else
5717                 c = coding->default_char;
5718               EMIT_ONE_BYTE (c);
5719             }
5720         }
5721     }
5722
5723   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5724   coding->produced_char += produced_chars;
5725   coding->produced = dst - coding->destination;
5726   return 0;
5727 }
5728
5729 \f
5730 /*** 7. C library functions ***/
5731
5732 /* Setup coding context CODING from information about CODING_SYSTEM.
5733    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
5734    CODING_SYSTEM is invalid, signal an error.  */
5735
5736 void
5737 setup_coding_system (Lisp_Object coding_system, struct coding_system *coding)
5738 {
5739   Lisp_Object attrs;
5740   Lisp_Object eol_type;
5741   Lisp_Object coding_type;
5742   Lisp_Object val;
5743
5744   if (NILP (coding_system))
5745     coding_system = Qundecided;
5746
5747   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
5748
5749   attrs = CODING_ID_ATTRS (coding->id);
5750   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
5751
5752   coding->mode = 0;
5753   coding->head_ascii = -1;
5754   if (VECTORP (eol_type))
5755     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5756                             | CODING_REQUIRE_DETECTION_MASK);
5757   else if (! EQ (eol_type, Qunix))
5758     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5759                             | CODING_REQUIRE_ENCODING_MASK);
5760   else
5761     coding->common_flags = 0;
5762   if (! NILP (CODING_ATTR_POST_READ (attrs)))
5763     coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5764   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5765     coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5766   if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
5767     coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
5768
5769   val = CODING_ATTR_SAFE_CHARSETS (attrs);
5770   coding->max_charset_id = SCHARS (val) - 1;
5771   coding->safe_charsets = SDATA (val);
5772   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
5773   coding->carryover_bytes = 0;
5774
5775   coding_type = CODING_ATTR_TYPE (attrs);
5776   if (EQ (coding_type, Qundecided))
5777     {
5778       coding->detector = NULL;
5779       coding->decoder = decode_coding_raw_text;
5780       coding->encoder = encode_coding_raw_text;
5781       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5782     }
5783   else if (EQ (coding_type, Qiso_2022))
5784     {
5785       int i;
5786       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5787
5788       /* Invoke graphic register 0 to plane 0.  */
5789       CODING_ISO_INVOCATION (coding, 0) = 0;
5790       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
5791       CODING_ISO_INVOCATION (coding, 1)
5792         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
5793       /* Setup the initial status of designation.  */
5794       for (i = 0; i < 4; i++)
5795         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
5796       /* Not single shifting initially.  */
5797       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
5798       /* Beginning of buffer should also be regarded as bol. */
5799       CODING_ISO_BOL (coding) = 1;
5800       coding->detector = detect_coding_iso_2022;
5801       coding->decoder = decode_coding_iso_2022;
5802       coding->encoder = encode_coding_iso_2022;
5803       if (flags & CODING_ISO_FLAG_SAFE)
5804         coding->mode |= CODING_MODE_SAFE_ENCODING;
5805       coding->common_flags
5806         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5807             | CODING_REQUIRE_FLUSHING_MASK);
5808       if (flags & CODING_ISO_FLAG_COMPOSITION)
5809         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
5810       if (flags & CODING_ISO_FLAG_DESIGNATION)
5811         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
5812       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5813         {
5814           setup_iso_safe_charsets (attrs);
5815           val = CODING_ATTR_SAFE_CHARSETS (attrs);
5816           coding->max_charset_id = SCHARS (val) - 1;
5817           coding->safe_charsets = SDATA (val);
5818         }
5819       CODING_ISO_FLAGS (coding) = flags;
5820       CODING_ISO_CMP_STATUS (coding)->state = COMPOSING_NO;
5821       CODING_ISO_CMP_STATUS (coding)->method = COMPOSITION_NO;
5822       CODING_ISO_EXTSEGMENT_LEN (coding) = 0;
5823       CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
5824     }
5825   else if (EQ (coding_type, Qcharset))
5826     {
5827       coding->detector = detect_coding_charset;
5828       coding->decoder = decode_coding_charset;
5829       coding->encoder = encode_coding_charset;
5830       coding->common_flags
5831         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5832     }
5833   else if (EQ (coding_type, Qutf_8))
5834     {
5835       val = AREF (attrs, coding_attr_utf_bom);
5836       CODING_UTF_8_BOM (coding) = (CONSP (val) ? utf_detect_bom
5837                                    : EQ (val, Qt) ? utf_with_bom
5838                                    : utf_without_bom);
5839       coding->detector = detect_coding_utf_8;
5840       coding->decoder = decode_coding_utf_8;
5841       coding->encoder = encode_coding_utf_8;
5842       coding->common_flags
5843         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5844       if (CODING_UTF_8_BOM (coding) == utf_detect_bom)
5845         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5846     }
5847   else if (EQ (coding_type, Qutf_16))
5848     {
5849       val = AREF (attrs, coding_attr_utf_bom);
5850       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_detect_bom
5851                                     : EQ (val, Qt) ? utf_with_bom
5852                                     : utf_without_bom);
5853       val = AREF (attrs, coding_attr_utf_16_endian);
5854       CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
5855                                        : utf_16_little_endian);
5856       CODING_UTF_16_SURROGATE (coding) = 0;
5857       coding->detector = detect_coding_utf_16;
5858       coding->decoder = decode_coding_utf_16;
5859       coding->encoder = encode_coding_utf_16;
5860       coding->common_flags
5861         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5862       if (CODING_UTF_16_BOM (coding) == utf_detect_bom)
5863         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5864     }
5865   else if (EQ (coding_type, Qccl))
5866     {
5867       coding->detector = detect_coding_ccl;
5868       coding->decoder = decode_coding_ccl;
5869       coding->encoder = encode_coding_ccl;
5870       coding->common_flags
5871         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5872             | CODING_REQUIRE_FLUSHING_MASK);
5873     }
5874   else if (EQ (coding_type, Qemacs_mule))
5875     {
5876       coding->detector = detect_coding_emacs_mule;
5877       coding->decoder = decode_coding_emacs_mule;
5878       coding->encoder = encode_coding_emacs_mule;
5879       coding->common_flags
5880         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5881       coding->spec.emacs_mule.full_support = 1;
5882       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5883           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5884         {
5885           Lisp_Object tail, safe_charsets;
5886           int max_charset_id = 0;
5887
5888           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5889                tail = XCDR (tail))
5890             if (max_charset_id < XFASTINT (XCAR (tail)))
5891               max_charset_id = XFASTINT (XCAR (tail));
5892           safe_charsets = make_uninit_string (max_charset_id + 1);
5893           memset (SDATA (safe_charsets), 255, max_charset_id + 1);
5894           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5895                tail = XCDR (tail))
5896             SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
5897           coding->max_charset_id = max_charset_id;
5898           coding->safe_charsets = SDATA (safe_charsets);
5899           coding->spec.emacs_mule.full_support = 1;
5900         }
5901       coding->spec.emacs_mule.cmp_status.state = COMPOSING_NO;
5902       coding->spec.emacs_mule.cmp_status.method = COMPOSITION_NO;
5903     }
5904   else if (EQ (coding_type, Qshift_jis))
5905     {
5906       coding->detector = detect_coding_sjis;
5907       coding->decoder = decode_coding_sjis;
5908       coding->encoder = encode_coding_sjis;
5909       coding->common_flags
5910         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5911     }
5912   else if (EQ (coding_type, Qbig5))
5913     {
5914       coding->detector = detect_coding_big5;
5915       coding->decoder = decode_coding_big5;
5916       coding->encoder = encode_coding_big5;
5917       coding->common_flags
5918         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5919     }
5920   else                          /* EQ (coding_type, Qraw_text) */
5921     {
5922       coding->detector = NULL;
5923       coding->decoder = decode_coding_raw_text;
5924       coding->encoder = encode_coding_raw_text;
5925       if (! EQ (eol_type, Qunix))
5926         {
5927           coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5928           if (! VECTORP (eol_type))
5929             coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5930         }
5931
5932     }
5933
5934   return;
5935 }
5936
5937 /* Return a list of charsets supported by CODING.  */
5938
5939 Lisp_Object
5940 coding_charset_list (struct coding_system *coding)
5941 {
5942   Lisp_Object attrs, charset_list;
5943
5944   CODING_GET_INFO (coding, attrs, charset_list);
5945   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5946     {
5947       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5948
5949       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5950         charset_list = Viso_2022_charset_list;
5951     }
5952   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5953     {
5954       charset_list = Vemacs_mule_charset_list;
5955     }
5956   return charset_list;
5957 }
5958
5959
5960 /* Return a list of charsets supported by CODING-SYSTEM.  */
5961
5962 Lisp_Object
5963 coding_system_charset_list (Lisp_Object coding_system)
5964 {
5965   int id;
5966   Lisp_Object attrs, charset_list;
5967
5968   CHECK_CODING_SYSTEM_GET_ID (coding_system, id);
5969   attrs = CODING_ID_ATTRS (id);
5970
5971   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5972     {
5973       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5974
5975       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5976         charset_list = Viso_2022_charset_list;
5977       else
5978         charset_list = CODING_ATTR_CHARSET_LIST (attrs);
5979     }
5980   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5981     {
5982       charset_list = Vemacs_mule_charset_list;
5983     }
5984   else
5985     {
5986       charset_list = CODING_ATTR_CHARSET_LIST (attrs);
5987     }
5988   return charset_list;
5989 }
5990
5991
5992 /* Return raw-text or one of its subsidiaries that has the same
5993    eol_type as CODING-SYSTEM.  */
5994
5995 Lisp_Object
5996 raw_text_coding_system (Lisp_Object coding_system)
5997 {
5998   Lisp_Object spec, attrs;
5999   Lisp_Object eol_type, raw_text_eol_type;
6000
6001   if (NILP (coding_system))
6002     return Qraw_text;
6003   spec = CODING_SYSTEM_SPEC (coding_system);
6004   attrs = AREF (spec, 0);
6005
6006   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
6007     return coding_system;
6008
6009   eol_type = AREF (spec, 2);
6010   if (VECTORP (eol_type))
6011     return Qraw_text;
6012   spec = CODING_SYSTEM_SPEC (Qraw_text);
6013   raw_text_eol_type = AREF (spec, 2);
6014   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
6015           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
6016           : AREF (raw_text_eol_type, 2));
6017 }
6018
6019
6020 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
6021    does, return one of the subsidiary that has the same eol-spec as
6022    PARENT.  Otherwise, return CODING_SYSTEM.  If PARENT is nil,
6023    inherit end-of-line format from the system's setting
6024    (system_eol_type).  */
6025
6026 Lisp_Object
6027 coding_inherit_eol_type (Lisp_Object coding_system, Lisp_Object parent)
6028 {
6029   Lisp_Object spec, eol_type;
6030
6031   if (NILP (coding_system))
6032     coding_system = Qraw_text;
6033   spec = CODING_SYSTEM_SPEC (coding_system);
6034   eol_type = AREF (spec, 2);
6035   if (VECTORP (eol_type))
6036     {
6037       Lisp_Object parent_eol_type;
6038
6039       if (! NILP (parent))
6040         {
6041           Lisp_Object parent_spec;
6042
6043           parent_spec = CODING_SYSTEM_SPEC (parent);
6044           parent_eol_type = AREF (parent_spec, 2);
6045         }
6046       else
6047         parent_eol_type = system_eol_type;
6048       if (EQ (parent_eol_type, Qunix))
6049         coding_system = AREF (eol_type, 0);
6050       else if (EQ (parent_eol_type, Qdos))
6051         coding_system = AREF (eol_type, 1);
6052       else if (EQ (parent_eol_type, Qmac))
6053         coding_system = AREF (eol_type, 2);
6054     }
6055   return coding_system;
6056 }
6057
6058 /* Emacs has a mechanism to automatically detect a coding system if it
6059    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
6060    it's impossible to distinguish some coding systems accurately
6061    because they use the same range of codes.  So, at first, coding
6062    systems are categorized into 7, those are:
6063
6064    o coding-category-emacs-mule
6065
6066         The category for a coding system which has the same code range
6067         as Emacs' internal format.  Assigned the coding-system (Lisp
6068         symbol) `emacs-mule' by default.
6069
6070    o coding-category-sjis
6071
6072         The category for a coding system which has the same code range
6073         as SJIS.  Assigned the coding-system (Lisp
6074         symbol) `japanese-shift-jis' by default.
6075
6076    o coding-category-iso-7
6077
6078         The category for a coding system which has the same code range
6079         as ISO2022 of 7-bit environment.  This doesn't use any locking
6080         shift and single shift functions.  This can encode/decode all
6081         charsets.  Assigned the coding-system (Lisp symbol)
6082         `iso-2022-7bit' by default.
6083
6084    o coding-category-iso-7-tight
6085
6086         Same as coding-category-iso-7 except that this can
6087         encode/decode only the specified charsets.
6088
6089    o coding-category-iso-8-1
6090
6091         The category for a coding system which has the same code range
6092         as ISO2022 of 8-bit environment and graphic plane 1 used only
6093         for DIMENSION1 charset.  This doesn't use any locking shift
6094         and single shift functions.  Assigned the coding-system (Lisp
6095         symbol) `iso-latin-1' by default.
6096
6097    o coding-category-iso-8-2
6098
6099         The category for a coding system which has the same code range
6100         as ISO2022 of 8-bit environment and graphic plane 1 used only
6101         for DIMENSION2 charset.  This doesn't use any locking shift
6102         and single shift functions.  Assigned the coding-system (Lisp
6103         symbol) `japanese-iso-8bit' by default.
6104
6105    o coding-category-iso-7-else
6106
6107         The category for a coding system which has the same code range
6108         as ISO2022 of 7-bit environemnt but uses locking shift or
6109         single shift functions.  Assigned the coding-system (Lisp
6110         symbol) `iso-2022-7bit-lock' by default.
6111
6112    o coding-category-iso-8-else
6113
6114         The category for a coding system which has the same code range
6115         as ISO2022 of 8-bit environemnt but uses locking shift or
6116         single shift functions.  Assigned the coding-system (Lisp
6117         symbol) `iso-2022-8bit-ss2' by default.
6118
6119    o coding-category-big5
6120
6121         The category for a coding system which has the same code range
6122         as BIG5.  Assigned the coding-system (Lisp symbol)
6123         `cn-big5' by default.
6124
6125    o coding-category-utf-8
6126
6127         The category for a coding system which has the same code range
6128         as UTF-8 (cf. RFC3629).  Assigned the coding-system (Lisp
6129         symbol) `utf-8' by default.
6130
6131    o coding-category-utf-16-be
6132
6133         The category for a coding system in which a text has an
6134         Unicode signature (cf. Unicode Standard) in the order of BIG
6135         endian at the head.  Assigned the coding-system (Lisp symbol)
6136         `utf-16-be' by default.
6137
6138    o coding-category-utf-16-le
6139
6140         The category for a coding system in which a text has an
6141         Unicode signature (cf. Unicode Standard) in the order of
6142         LITTLE endian at the head.  Assigned the coding-system (Lisp
6143         symbol) `utf-16-le' by default.
6144
6145    o coding-category-ccl
6146
6147         The category for a coding system of which encoder/decoder is
6148         written in CCL programs.  The default value is nil, i.e., no
6149         coding system is assigned.
6150
6151    o coding-category-binary
6152
6153         The category for a coding system not categorized in any of the
6154         above.  Assigned the coding-system (Lisp symbol)
6155         `no-conversion' by default.
6156
6157    Each of them is a Lisp symbol and the value is an actual
6158    `coding-system's (this is also a Lisp symbol) assigned by a user.
6159    What Emacs does actually is to detect a category of coding system.
6160    Then, it uses a `coding-system' assigned to it.  If Emacs can't
6161    decide only one possible category, it selects a category of the
6162    highest priority.  Priorities of categories are also specified by a
6163    user in a Lisp variable `coding-category-list'.
6164
6165 */
6166
6167 #define EOL_SEEN_NONE   0
6168 #define EOL_SEEN_LF     1
6169 #define EOL_SEEN_CR     2
6170 #define EOL_SEEN_CRLF   4
6171
6172 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
6173    SOURCE is encoded.  If CATEGORY is one of
6174    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
6175    two-byte, else they are encoded by one-byte.
6176
6177    Return one of EOL_SEEN_XXX.  */
6178
6179 #define MAX_EOL_CHECK_COUNT 3
6180
6181 static int
6182 detect_eol (const unsigned char *source, EMACS_INT src_bytes,
6183             enum coding_category category)
6184 {
6185   const unsigned char *src = source, *src_end = src + src_bytes;
6186   unsigned char c;
6187   int total  = 0;
6188   int eol_seen = EOL_SEEN_NONE;
6189
6190   if ((1 << category) & CATEGORY_MASK_UTF_16)
6191     {
6192       int msb, lsb;
6193
6194       msb = category == (coding_category_utf_16_le
6195                          | coding_category_utf_16_le_nosig);
6196       lsb = 1 - msb;
6197
6198       while (src + 1 < src_end)
6199         {
6200           c = src[lsb];
6201           if (src[msb] == 0 && (c == '\n' || c == '\r'))
6202             {
6203               int this_eol;
6204
6205               if (c == '\n')
6206                 this_eol = EOL_SEEN_LF;
6207               else if (src + 3 >= src_end
6208                        || src[msb + 2] != 0
6209                        || src[lsb + 2] != '\n')
6210                 this_eol = EOL_SEEN_CR;
6211               else
6212                 {
6213                   this_eol = EOL_SEEN_CRLF;
6214                   src += 2;
6215                 }
6216
6217               if (eol_seen == EOL_SEEN_NONE)
6218                 /* This is the first end-of-line.  */
6219                 eol_seen = this_eol;
6220               else if (eol_seen != this_eol)
6221                 {
6222                   /* The found type is different from what found before.
6223                      Allow for stray ^M characters in DOS EOL files.  */
6224                   if (eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF
6225                       || eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR)
6226                     eol_seen = EOL_SEEN_CRLF;
6227                   else
6228                     {
6229                       eol_seen = EOL_SEEN_LF;
6230                       break;
6231                     }
6232                 }
6233               if (++total == MAX_EOL_CHECK_COUNT)
6234                 break;
6235             }
6236           src += 2;
6237         }
6238     }
6239   else
6240     {
6241       while (src < src_end)
6242         {
6243           c = *src++;
6244           if (c == '\n' || c == '\r')
6245             {
6246               int this_eol;
6247
6248               if (c == '\n')
6249                 this_eol = EOL_SEEN_LF;
6250               else if (src >= src_end || *src != '\n')
6251                 this_eol = EOL_SEEN_CR;
6252               else
6253                 this_eol = EOL_SEEN_CRLF, src++;
6254
6255               if (eol_seen == EOL_SEEN_NONE)
6256                 /* This is the first end-of-line.  */
6257                 eol_seen = this_eol;
6258               else if (eol_seen != this_eol)
6259                 {
6260                   /* The found type is different from what found before.
6261                      Allow for stray ^M characters in DOS EOL files.  */
6262                   if (eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF
6263                       || eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR)
6264                     eol_seen = EOL_SEEN_CRLF;
6265                   else
6266                     {
6267                       eol_seen = EOL_SEEN_LF;
6268                       break;
6269                     }
6270                 }
6271               if (++total == MAX_EOL_CHECK_COUNT)
6272                 break;
6273             }
6274         }
6275     }
6276   return eol_seen;
6277 }
6278
6279
6280 static Lisp_Object
6281 adjust_coding_eol_type (struct coding_system *coding, int eol_seen)
6282 {
6283   Lisp_Object eol_type;
6284
6285   eol_type = CODING_ID_EOL_TYPE (coding->id);
6286   if (eol_seen & EOL_SEEN_LF)
6287     {
6288       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
6289       eol_type = Qunix;
6290     }
6291   else if (eol_seen & EOL_SEEN_CRLF)
6292     {
6293       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
6294       eol_type = Qdos;
6295     }
6296   else if (eol_seen & EOL_SEEN_CR)
6297     {
6298       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
6299       eol_type = Qmac;
6300     }
6301   return eol_type;
6302 }
6303
6304 /* Detect how a text specified in CODING is encoded.  If a coding
6305    system is detected, update fields of CODING by the detected coding
6306    system.  */
6307
6308 void
6309 detect_coding (struct coding_system *coding)
6310 {
6311   const unsigned char *src, *src_end;
6312   int saved_mode = coding->mode;
6313
6314   coding->consumed = coding->consumed_char = 0;
6315   coding->produced = coding->produced_char = 0;
6316   coding_set_source (coding);
6317
6318   src_end = coding->source + coding->src_bytes;
6319   coding->head_ascii = 0;
6320
6321   /* If we have not yet decided the text encoding type, detect it
6322      now.  */
6323   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
6324     {
6325       int c, i;
6326       struct coding_detection_info detect_info;
6327       int null_byte_found = 0, eight_bit_found = 0;
6328
6329       detect_info.checked = detect_info.found = detect_info.rejected = 0;
6330       for (src = coding->source; src < src_end; src++)
6331         {
6332           c = *src;
6333           if (c & 0x80)
6334             {
6335               eight_bit_found = 1;
6336               if (null_byte_found)
6337                 break;
6338             }
6339           else if (c < 0x20)
6340             {
6341               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
6342                   && ! inhibit_iso_escape_detection
6343                   && ! detect_info.checked)
6344                 {
6345                   if (detect_coding_iso_2022 (coding, &detect_info))
6346                     {
6347                       /* We have scanned the whole data.  */
6348                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
6349                         {
6350                           /* We didn't find an 8-bit code.  We may
6351                              have found a null-byte, but it's very
6352                              rare that a binary file conforms to
6353                              ISO-2022.  */
6354                           src = src_end;
6355                           coding->head_ascii = src - coding->source;
6356                         }
6357                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
6358                       break;
6359                     }
6360                 }
6361               else if (! c && !inhibit_null_byte_detection)
6362                 {
6363                   null_byte_found = 1;
6364                   if (eight_bit_found)
6365                     break;
6366                 }
6367               if (! eight_bit_found)
6368                 coding->head_ascii++;
6369             }
6370           else if (! eight_bit_found)
6371             coding->head_ascii++;
6372         }
6373
6374       if (null_byte_found || eight_bit_found
6375           || coding->head_ascii < coding->src_bytes
6376           || detect_info.found)
6377         {
6378           enum coding_category category;
6379           struct coding_system *this;
6380
6381           if (coding->head_ascii == coding->src_bytes)
6382             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
6383             for (i = 0; i < coding_category_raw_text; i++)
6384               {
6385                 category = coding_priorities[i];
6386                 this = coding_categories + category;
6387                 if (detect_info.found & (1 << category))
6388                   break;
6389               }
6390           else
6391             {
6392               if (null_byte_found)
6393                 {
6394                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
6395                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
6396                 }
6397               for (i = 0; i < coding_category_raw_text; i++)
6398                 {
6399                   category = coding_priorities[i];
6400                   this = coding_categories + category;
6401                   if (this->id < 0)
6402                     {
6403                       /* No coding system of this category is defined.  */
6404                       detect_info.rejected |= (1 << category);
6405                     }
6406                   else if (category >= coding_category_raw_text)
6407                     continue;
6408                   else if (detect_info.checked & (1 << category))
6409                     {
6410                       if (detect_info.found & (1 << category))
6411                         break;
6412                     }
6413                   else if ((*(this->detector)) (coding, &detect_info)
6414                            && detect_info.found & (1 << category))
6415                     {
6416                       if (category == coding_category_utf_16_auto)
6417                         {
6418                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6419                             category = coding_category_utf_16_le;
6420                           else
6421                             category = coding_category_utf_16_be;
6422                         }
6423                       break;
6424                     }
6425                 }
6426             }
6427
6428           if (i < coding_category_raw_text)
6429             setup_coding_system (CODING_ID_NAME (this->id), coding);
6430           else if (null_byte_found)
6431             setup_coding_system (Qno_conversion, coding);
6432           else if ((detect_info.rejected & CATEGORY_MASK_ANY)
6433                    == CATEGORY_MASK_ANY)
6434             setup_coding_system (Qraw_text, coding);
6435           else if (detect_info.rejected)
6436             for (i = 0; i < coding_category_raw_text; i++)
6437               if (! (detect_info.rejected & (1 << coding_priorities[i])))
6438                 {
6439                   this = coding_categories + coding_priorities[i];
6440                   setup_coding_system (CODING_ID_NAME (this->id), coding);
6441                   break;
6442                 }
6443         }
6444     }
6445   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6446            == coding_category_utf_8_auto)
6447     {
6448       Lisp_Object coding_systems;
6449       struct coding_detection_info detect_info;
6450
6451       coding_systems
6452         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6453       detect_info.found = detect_info.rejected = 0;
6454       coding->head_ascii = 0;
6455       if (CONSP (coding_systems)
6456           && detect_coding_utf_8 (coding, &detect_info))
6457         {
6458           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
6459             setup_coding_system (XCAR (coding_systems), coding);
6460           else
6461             setup_coding_system (XCDR (coding_systems), coding);
6462         }
6463     }
6464   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6465            == coding_category_utf_16_auto)
6466     {
6467       Lisp_Object coding_systems;
6468       struct coding_detection_info detect_info;
6469
6470       coding_systems
6471         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6472       detect_info.found = detect_info.rejected = 0;
6473       coding->head_ascii = 0;
6474       if (CONSP (coding_systems)
6475           && detect_coding_utf_16 (coding, &detect_info))
6476         {
6477           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6478             setup_coding_system (XCAR (coding_systems), coding);
6479           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
6480             setup_coding_system (XCDR (coding_systems), coding);
6481         }
6482     }
6483   coding->mode = saved_mode;
6484 }
6485
6486
6487 static void
6488 decode_eol (struct coding_system *coding)
6489 {
6490   Lisp_Object eol_type;
6491   unsigned char *p, *pbeg, *pend;
6492
6493   eol_type = CODING_ID_EOL_TYPE (coding->id);
6494   if (EQ (eol_type, Qunix) || inhibit_eol_conversion)
6495     return;
6496
6497   if (NILP (coding->dst_object))
6498     pbeg = coding->destination;
6499   else
6500     pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
6501   pend = pbeg + coding->produced;
6502
6503   if (VECTORP (eol_type))
6504     {
6505       int eol_seen = EOL_SEEN_NONE;
6506
6507       for (p = pbeg; p < pend; p++)
6508         {
6509           if (*p == '\n')
6510             eol_seen |= EOL_SEEN_LF;
6511           else if (*p == '\r')
6512             {
6513               if (p + 1 < pend && *(p + 1) == '\n')
6514                 {
6515                   eol_seen |= EOL_SEEN_CRLF;
6516                   p++;
6517                 }
6518               else
6519                 eol_seen |= EOL_SEEN_CR;
6520             }
6521         }
6522       /* Handle DOS-style EOLs in a file with stray ^M characters.  */
6523       if ((eol_seen & EOL_SEEN_CRLF) != 0
6524           && (eol_seen & EOL_SEEN_CR) != 0
6525           && (eol_seen & EOL_SEEN_LF) == 0)
6526         eol_seen = EOL_SEEN_CRLF;
6527       else if (eol_seen != EOL_SEEN_NONE
6528           && eol_seen != EOL_SEEN_LF
6529           && eol_seen != EOL_SEEN_CRLF
6530           && eol_seen != EOL_SEEN_CR)
6531         eol_seen = EOL_SEEN_LF;
6532       if (eol_seen != EOL_SEEN_NONE)
6533         eol_type = adjust_coding_eol_type (coding, eol_seen);
6534     }
6535
6536   if (EQ (eol_type, Qmac))
6537     {
6538       for (p = pbeg; p < pend; p++)
6539         if (*p == '\r')
6540           *p = '\n';
6541     }
6542   else if (EQ (eol_type, Qdos))
6543     {
6544       int n = 0;
6545
6546       if (NILP (coding->dst_object))
6547         {
6548           /* Start deleting '\r' from the tail to minimize the memory
6549              movement.  */
6550           for (p = pend - 2; p >= pbeg; p--)
6551             if (*p == '\r')
6552               {
6553                 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
6554                 n++;
6555               }
6556         }
6557       else
6558         {
6559           int pos_byte = coding->dst_pos_byte;
6560           int pos = coding->dst_pos;
6561           int pos_end = pos + coding->produced_char - 1;
6562
6563           while (pos < pos_end)
6564             {
6565               p = BYTE_POS_ADDR (pos_byte);
6566               if (*p == '\r' && p[1] == '\n')
6567                 {
6568                   del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
6569                   n++;
6570                   pos_end--;
6571                 }
6572               pos++;
6573               if (coding->dst_multibyte)
6574                 pos_byte += BYTES_BY_CHAR_HEAD (*p);
6575               else
6576                 pos_byte++;
6577             }
6578         }
6579       coding->produced -= n;
6580       coding->produced_char -= n;
6581     }
6582 }
6583
6584
6585 /* Return a translation table (or list of them) from coding system
6586    attribute vector ATTRS for encoding (ENCODEP is nonzero) or
6587    decoding (ENCODEP is zero). */
6588
6589 static Lisp_Object
6590 get_translation_table (Lisp_Object attrs, int encodep, int *max_lookup)
6591 {
6592   Lisp_Object standard, translation_table;
6593   Lisp_Object val;
6594
6595   if (NILP (Venable_character_translation))
6596     {
6597       if (max_lookup)
6598         *max_lookup = 0;
6599       return Qnil;
6600     }
6601   if (encodep)
6602     translation_table = CODING_ATTR_ENCODE_TBL (attrs),
6603       standard = Vstandard_translation_table_for_encode;
6604   else
6605     translation_table = CODING_ATTR_DECODE_TBL (attrs),
6606       standard = Vstandard_translation_table_for_decode;
6607   if (NILP (translation_table))
6608     translation_table = standard;
6609   else
6610     {
6611       if (SYMBOLP (translation_table))
6612         translation_table = Fget (translation_table, Qtranslation_table);
6613       else if (CONSP (translation_table))
6614         {
6615           translation_table = Fcopy_sequence (translation_table);
6616           for (val = translation_table; CONSP (val); val = XCDR (val))
6617             if (SYMBOLP (XCAR (val)))
6618               XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
6619         }
6620       if (CHAR_TABLE_P (standard))
6621         {
6622           if (CONSP (translation_table))
6623             translation_table = nconc2 (translation_table,
6624                                         Fcons (standard, Qnil));
6625           else
6626             translation_table = Fcons (translation_table,
6627                                        Fcons (standard, Qnil));
6628         }
6629     }
6630
6631   if (max_lookup)
6632     {
6633       *max_lookup = 1;
6634       if (CHAR_TABLE_P (translation_table)
6635           && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
6636         {
6637           val = XCHAR_TABLE (translation_table)->extras[1];
6638           if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6639             *max_lookup = XFASTINT (val);
6640         }
6641       else if (CONSP (translation_table))
6642         {
6643           Lisp_Object tail, val;
6644
6645           for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
6646             if (CHAR_TABLE_P (XCAR (tail))
6647                 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
6648               {
6649                 val = XCHAR_TABLE (XCAR (tail))->extras[1];
6650                 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6651                   *max_lookup = XFASTINT (val);
6652               }
6653         }
6654     }
6655   return translation_table;
6656 }
6657
6658 #define LOOKUP_TRANSLATION_TABLE(table, c, trans)               \
6659   do {                                                          \
6660     trans = Qnil;                                               \
6661     if (CHAR_TABLE_P (table))                                   \
6662       {                                                         \
6663         trans = CHAR_TABLE_REF (table, c);                      \
6664         if (CHARACTERP (trans))                                 \
6665           c = XFASTINT (trans), trans = Qnil;                   \
6666       }                                                         \
6667     else if (CONSP (table))                                     \
6668       {                                                         \
6669         Lisp_Object tail;                                       \
6670                                                                 \
6671         for (tail = table; CONSP (tail); tail = XCDR (tail))    \
6672           if (CHAR_TABLE_P (XCAR (tail)))                       \
6673             {                                                   \
6674               trans = CHAR_TABLE_REF (XCAR (tail), c);          \
6675               if (CHARACTERP (trans))                           \
6676                 c = XFASTINT (trans), trans = Qnil;             \
6677               else if (! NILP (trans))                          \
6678                 break;                                          \
6679             }                                                   \
6680       }                                                         \
6681   } while (0)
6682
6683
6684 /* Return a translation of character(s) at BUF according to TRANS.
6685    TRANS is TO-CHAR or ((FROM .  TO) ...) where
6686    FROM = [FROM-CHAR ...], TO is TO-CHAR or [TO-CHAR ...].
6687    The return value is TO-CHAR or ([FROM-CHAR ...] . TO) if a
6688    translation is found, and Qnil if not found..
6689    If BUF is too short to lookup characters in FROM, return Qt.  */
6690
6691 static Lisp_Object
6692 get_translation (Lisp_Object trans, int *buf, int *buf_end)
6693 {
6694
6695   if (INTEGERP (trans))
6696     return trans;
6697   for (; CONSP (trans); trans = XCDR (trans))
6698     {
6699       Lisp_Object val = XCAR (trans);
6700       Lisp_Object from = XCAR (val);
6701       int len = ASIZE (from);
6702       int i;
6703
6704       for (i = 0; i < len; i++)
6705         {
6706           if (buf + i == buf_end)
6707             return Qt;
6708           if (XINT (AREF (from, i)) != buf[i])
6709             break;
6710         }
6711       if (i == len)
6712         return val;
6713     }
6714   return Qnil;
6715 }
6716
6717
6718 static int
6719 produce_chars (struct coding_system *coding, Lisp_Object translation_table,
6720                int last_block)
6721 {
6722   unsigned char *dst = coding->destination + coding->produced;
6723   unsigned char *dst_end = coding->destination + coding->dst_bytes;
6724   EMACS_INT produced;
6725   EMACS_INT produced_chars = 0;
6726   int carryover = 0;
6727
6728   if (! coding->chars_at_source)
6729     {
6730       /* Source characters are in coding->charbuf.  */
6731       int *buf = coding->charbuf;
6732       int *buf_end = buf + coding->charbuf_used;
6733
6734       if (EQ (coding->src_object, coding->dst_object))
6735         {
6736           coding_set_source (coding);
6737           dst_end = ((unsigned char *) coding->source) + coding->consumed;
6738         }
6739
6740       while (buf < buf_end)
6741         {
6742           int c = *buf, i;
6743
6744           if (c >= 0)
6745             {
6746               int from_nchars = 1, to_nchars = 1;
6747               Lisp_Object trans = Qnil;
6748
6749               LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6750               if (! NILP (trans))
6751                 {
6752                   trans = get_translation (trans, buf, buf_end);
6753                   if (INTEGERP (trans))
6754                     c = XINT (trans);
6755                   else if (CONSP (trans))
6756                     {
6757                       from_nchars = ASIZE (XCAR (trans));
6758                       trans = XCDR (trans);
6759                       if (INTEGERP (trans))
6760                         c = XINT (trans);
6761                       else
6762                         {
6763                           to_nchars = ASIZE (trans);
6764                           c = XINT (AREF (trans, 0));
6765                         }
6766                     }
6767                   else if (EQ (trans, Qt) && ! last_block)
6768                     break;
6769                 }
6770
6771               if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
6772                 {
6773                   dst = alloc_destination (coding,
6774                                            buf_end - buf
6775                                            + MAX_MULTIBYTE_LENGTH * to_nchars,
6776                                            dst);
6777                   if (EQ (coding->src_object, coding->dst_object))
6778                     {
6779                       coding_set_source (coding);
6780                       dst_end = (((unsigned char *) coding->source)
6781                                  + coding->consumed);
6782                     }
6783                   else
6784                     dst_end = coding->destination + coding->dst_bytes;
6785                 }
6786
6787               for (i = 0; i < to_nchars; i++)
6788                 {
6789                   if (i > 0)
6790                     c = XINT (AREF (trans, i));
6791                   if (coding->dst_multibyte
6792                       || ! CHAR_BYTE8_P (c))
6793                     CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
6794                   else
6795                     *dst++ = CHAR_TO_BYTE8 (c);
6796                 }
6797               produced_chars += to_nchars;
6798               buf += from_nchars;
6799             }
6800           else
6801             /* This is an annotation datum.  (-C) is the length.  */
6802             buf += -c;
6803         }
6804       carryover = buf_end - buf;
6805     }
6806   else
6807     {
6808       /* Source characters are at coding->source.  */
6809       const unsigned char *src = coding->source;
6810       const unsigned char *src_end = src + coding->consumed;
6811
6812       if (EQ (coding->dst_object, coding->src_object))
6813         dst_end = (unsigned char *) src;
6814       if (coding->src_multibyte != coding->dst_multibyte)
6815         {
6816           if (coding->src_multibyte)
6817             {
6818               int multibytep = 1;
6819               EMACS_INT consumed_chars = 0;
6820
6821               while (1)
6822                 {
6823                   const unsigned char *src_base = src;
6824                   int c;
6825
6826                   ONE_MORE_BYTE (c);
6827                   if (dst == dst_end)
6828                     {
6829                       if (EQ (coding->src_object, coding->dst_object))
6830                         dst_end = (unsigned char *) src;
6831                       if (dst == dst_end)
6832                         {
6833                           EMACS_INT offset = src - coding->source;
6834
6835                           dst = alloc_destination (coding, src_end - src + 1,
6836                                                    dst);
6837                           dst_end = coding->destination + coding->dst_bytes;
6838                           coding_set_source (coding);
6839                           src = coding->source + offset;
6840                           src_end = coding->source + coding->src_bytes;
6841                           if (EQ (coding->src_object, coding->dst_object))
6842                             dst_end = (unsigned char *) src;
6843                         }
6844                     }
6845                   *dst++ = c;
6846                   produced_chars++;
6847                 }
6848             no_more_source:
6849               ;
6850             }
6851           else
6852             while (src < src_end)
6853               {
6854                 int multibytep = 1;
6855                 int c = *src++;
6856
6857                 if (dst >= dst_end - 1)
6858                   {
6859                     if (EQ (coding->src_object, coding->dst_object))
6860                       dst_end = (unsigned char *) src;
6861                     if (dst >= dst_end - 1)
6862                       {
6863                         EMACS_INT offset = src - coding->source;
6864                         EMACS_INT more_bytes;
6865
6866                         if (EQ (coding->src_object, coding->dst_object))
6867                           more_bytes = ((src_end - src) / 2) + 2;
6868                         else
6869                           more_bytes = src_end - src + 2;
6870                         dst = alloc_destination (coding, more_bytes, dst);
6871                         dst_end = coding->destination + coding->dst_bytes;
6872                         coding_set_source (coding);
6873                         src = coding->source + offset;
6874                         src_end = coding->source + coding->src_bytes;
6875                         if (EQ (coding->src_object, coding->dst_object))
6876                           dst_end = (unsigned char *) src;
6877                       }
6878                   }
6879                 EMIT_ONE_BYTE (c);
6880               }
6881         }
6882       else
6883         {
6884           if (!EQ (coding->src_object, coding->dst_object))
6885             {
6886               EMACS_INT require = coding->src_bytes - coding->dst_bytes;
6887
6888               if (require > 0)
6889                 {
6890                   EMACS_INT offset = src - coding->source;
6891
6892                   dst = alloc_destination (coding, require, dst);
6893                   coding_set_source (coding);
6894                   src = coding->source + offset;
6895                   src_end = coding->source + coding->src_bytes;
6896                 }
6897             }
6898           produced_chars = coding->consumed_char;
6899           while (src < src_end)
6900             *dst++ = *src++;
6901         }
6902     }
6903
6904   produced = dst - (coding->destination + coding->produced);
6905   if (BUFFERP (coding->dst_object) && produced_chars > 0)
6906     insert_from_gap (produced_chars, produced);
6907   coding->produced += produced;
6908   coding->produced_char += produced_chars;
6909   return carryover;
6910 }
6911
6912 /* Compose text in CODING->object according to the annotation data at
6913    CHARBUF.  CHARBUF is an array:
6914      [ -LENGTH ANNOTATION_MASK NCHARS NBYTES METHOD [ COMPONENTS... ] ]
6915  */
6916
6917 static INLINE void
6918 produce_composition (struct coding_system *coding, int *charbuf, EMACS_INT pos)
6919 {
6920   int len;
6921   EMACS_INT to;
6922   enum composition_method method;
6923   Lisp_Object components;
6924
6925   len = -charbuf[0] - MAX_ANNOTATION_LENGTH;
6926   to = pos + charbuf[2];
6927   method = (enum composition_method) (charbuf[4]);
6928
6929   if (method == COMPOSITION_RELATIVE)
6930     components = Qnil;
6931   else
6932     {
6933       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
6934       int i, j;
6935
6936       if (method == COMPOSITION_WITH_RULE)
6937         len = charbuf[2] * 3 - 2;
6938       charbuf += MAX_ANNOTATION_LENGTH;
6939       /* charbuf = [ CHRA ... CHAR] or [ CHAR -2 RULE ... CHAR ] */
6940       for (i = j = 0; i < len && charbuf[i] != -1; i++, j++)
6941         {
6942           if (charbuf[i] >= 0)
6943             args[j] = make_number (charbuf[i]);
6944           else
6945             {
6946               i++;
6947               args[j] = make_number (charbuf[i] % 0x100);
6948             }
6949         }
6950       components = (i == j ? Fstring (j, args) : Fvector (j, args));
6951     }
6952   compose_text (pos, to, components, Qnil, coding->dst_object);
6953 }
6954
6955
6956 /* Put `charset' property on text in CODING->object according to
6957    the annotation data at CHARBUF.  CHARBUF is an array:
6958      [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
6959  */
6960
6961 static INLINE void
6962 produce_charset (struct coding_system *coding, int *charbuf, EMACS_INT pos)
6963 {
6964   EMACS_INT from = pos - charbuf[2];
6965   struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
6966
6967   Fput_text_property (make_number (from), make_number (pos),
6968                       Qcharset, CHARSET_NAME (charset),
6969                       coding->dst_object);
6970 }
6971
6972
6973 #define CHARBUF_SIZE 0x4000
6974
6975 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
6976   do {                                                                  \
6977     int size = CHARBUF_SIZE;                                            \
6978                                                                         \
6979     coding->charbuf = NULL;                                             \
6980     while (size > 1024)                                                 \
6981       {                                                                 \
6982         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
6983         if (coding->charbuf)                                            \
6984           break;                                                        \
6985         size >>= 1;                                                     \
6986       }                                                                 \
6987     if (! coding->charbuf)                                              \
6988       {                                                                 \
6989         record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
6990         return coding->result;                                          \
6991       }                                                                 \
6992     coding->charbuf_size = size;                                        \
6993   } while (0)
6994
6995
6996 static void
6997 produce_annotation (struct coding_system *coding, EMACS_INT pos)
6998 {
6999   int *charbuf = coding->charbuf;
7000   int *charbuf_end = charbuf + coding->charbuf_used;
7001
7002   if (NILP (coding->dst_object))
7003     return;
7004
7005   while (charbuf < charbuf_end)
7006     {
7007       if (*charbuf >= 0)
7008         pos++, charbuf++;
7009       else
7010         {
7011           int len = -*charbuf;
7012
7013           if (len > 2)
7014             switch (charbuf[1])
7015               {
7016               case CODING_ANNOTATE_COMPOSITION_MASK:
7017                 produce_composition (coding, charbuf, pos);
7018                 break;
7019               case CODING_ANNOTATE_CHARSET_MASK:
7020                 produce_charset (coding, charbuf, pos);
7021                 break;
7022               }
7023           charbuf += len;
7024         }
7025     }
7026 }
7027
7028 /* Decode the data at CODING->src_object into CODING->dst_object.
7029    CODING->src_object is a buffer, a string, or nil.
7030    CODING->dst_object is a buffer.
7031
7032    If CODING->src_object is a buffer, it must be the current buffer.
7033    In this case, if CODING->src_pos is positive, it is a position of
7034    the source text in the buffer, otherwise, the source text is in the
7035    gap area of the buffer, and CODING->src_pos specifies the offset of
7036    the text from GPT (which must be the same as PT).  If this is the
7037    same buffer as CODING->dst_object, CODING->src_pos must be
7038    negative.
7039
7040    If CODING->src_object is a string, CODING->src_pos is an index to
7041    that string.
7042
7043    If CODING->src_object is nil, CODING->source must already point to
7044    the non-relocatable memory area.  In this case, CODING->src_pos is
7045    an offset from CODING->source.
7046
7047    The decoded data is inserted at the current point of the buffer
7048    CODING->dst_object.
7049 */
7050
7051 static int
7052 decode_coding (struct coding_system *coding)
7053 {
7054   Lisp_Object attrs;
7055   Lisp_Object undo_list;
7056   Lisp_Object translation_table;
7057   struct ccl_spec cclspec;
7058   int carryover;
7059   int i;
7060
7061   if (BUFFERP (coding->src_object)
7062       && coding->src_pos > 0
7063       && coding->src_pos < GPT
7064       && coding->src_pos + coding->src_chars > GPT)
7065     move_gap_both (coding->src_pos, coding->src_pos_byte);
7066
7067   undo_list = Qt;
7068   if (BUFFERP (coding->dst_object))
7069     {
7070       if (current_buffer != XBUFFER (coding->dst_object))
7071         set_buffer_internal (XBUFFER (coding->dst_object));
7072       if (GPT != PT)
7073         move_gap_both (PT, PT_BYTE);
7074       undo_list = current_buffer->undo_list;
7075       current_buffer->undo_list = Qt;
7076     }
7077
7078   coding->consumed = coding->consumed_char = 0;
7079   coding->produced = coding->produced_char = 0;
7080   coding->chars_at_source = 0;
7081   record_conversion_result (coding, CODING_RESULT_SUCCESS);
7082   coding->errors = 0;
7083
7084   ALLOC_CONVERSION_WORK_AREA (coding);
7085
7086   attrs = CODING_ID_ATTRS (coding->id);
7087   translation_table = get_translation_table (attrs, 0, NULL);
7088
7089   carryover = 0;
7090   if (coding->decoder == decode_coding_ccl)
7091     {
7092       coding->spec.ccl = &cclspec;
7093       setup_ccl_program (&cclspec.ccl, CODING_CCL_DECODER (coding));
7094     }
7095   do
7096     {
7097       EMACS_INT pos = coding->dst_pos + coding->produced_char;
7098
7099       coding_set_source (coding);
7100       coding->annotated = 0;
7101       coding->charbuf_used = carryover;
7102       (*(coding->decoder)) (coding);
7103       coding_set_destination (coding);
7104       carryover = produce_chars (coding, translation_table, 0);
7105       if (coding->annotated)
7106         produce_annotation (coding, pos);
7107       for (i = 0; i < carryover; i++)
7108         coding->charbuf[i]
7109           = coding->charbuf[coding->charbuf_used - carryover + i];
7110     }
7111   while (coding->result == CODING_RESULT_INSUFFICIENT_DST
7112          || (coding->consumed < coding->src_bytes
7113              && (coding->result == CODING_RESULT_SUCCESS
7114                  || coding->result == CODING_RESULT_INVALID_SRC)));
7115
7116   if (carryover > 0)
7117     {
7118       coding_set_destination (coding);
7119       coding->charbuf_used = carryover;
7120       produce_chars (coding, translation_table, 1);
7121     }
7122
7123   coding->carryover_bytes = 0;
7124   if (coding->consumed < coding->src_bytes)
7125     {
7126       int nbytes = coding->src_bytes - coding->consumed;
7127       const unsigned char *src;
7128
7129       coding_set_source (coding);
7130       coding_set_destination (coding);
7131       src = coding->source + coding->consumed;
7132
7133       if (coding->mode & CODING_MODE_LAST_BLOCK)
7134         {
7135           /* Flush out unprocessed data as binary chars.  We are sure
7136              that the number of data is less than the size of
7137              coding->charbuf.  */
7138           coding->charbuf_used = 0;
7139           coding->chars_at_source = 0;
7140
7141           while (nbytes-- > 0)
7142             {
7143               int c = *src++;
7144
7145               if (c & 0x80)
7146                 c = BYTE8_TO_CHAR (c);
7147               coding->charbuf[coding->charbuf_used++] = c;
7148             }
7149           produce_chars (coding, Qnil, 1);
7150         }
7151       else
7152         {
7153           /* Record unprocessed bytes in coding->carryover.  We are
7154              sure that the number of data is less than the size of
7155              coding->carryover.  */
7156           unsigned char *p = coding->carryover;
7157
7158           if (nbytes > sizeof coding->carryover)
7159             nbytes = sizeof coding->carryover;
7160           coding->carryover_bytes = nbytes;
7161           while (nbytes-- > 0)
7162             *p++ = *src++;
7163         }
7164       coding->consumed = coding->src_bytes;
7165     }
7166
7167   if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix)
7168       && !inhibit_eol_conversion)
7169     decode_eol (coding);
7170   if (BUFFERP (coding->dst_object))
7171     {
7172       current_buffer->undo_list = undo_list;
7173       record_insert (coding->dst_pos, coding->produced_char);
7174     }
7175   return coding->result;
7176 }
7177
7178
7179 /* Extract an annotation datum from a composition starting at POS and
7180    ending before LIMIT of CODING->src_object (buffer or string), store
7181    the data in BUF, set *STOP to a starting position of the next
7182    composition (if any) or to LIMIT, and return the address of the
7183    next element of BUF.
7184
7185    If such an annotation is not found, set *STOP to a starting
7186    position of a composition after POS (if any) or to LIMIT, and
7187    return BUF.  */
7188
7189 static INLINE int *
7190 handle_composition_annotation (EMACS_INT pos, EMACS_INT limit,
7191                                struct coding_system *coding, int *buf,
7192                                EMACS_INT *stop)
7193 {
7194   EMACS_INT start, end;
7195   Lisp_Object prop;
7196
7197   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
7198       || end > limit)
7199     *stop = limit;
7200   else if (start > pos)
7201     *stop = start;
7202   else
7203     {
7204       if (start == pos)
7205         {
7206           /* We found a composition.  Store the corresponding
7207              annotation data in BUF.  */
7208           int *head = buf;
7209           enum composition_method method = COMPOSITION_METHOD (prop);
7210           int nchars = COMPOSITION_LENGTH (prop);
7211
7212           ADD_COMPOSITION_DATA (buf, nchars, 0, method);
7213           if (method != COMPOSITION_RELATIVE)
7214             {
7215               Lisp_Object components;
7216               int len, i, i_byte;
7217
7218               components = COMPOSITION_COMPONENTS (prop);
7219               if (VECTORP (components))
7220                 {
7221                   len = XVECTOR (components)->size;
7222                   for (i = 0; i < len; i++)
7223                     *buf++ = XINT (AREF (components, i));
7224                 }
7225               else if (STRINGP (components))
7226                 {
7227                   len = SCHARS (components);
7228                   i = i_byte = 0;
7229                   while (i < len)
7230                     {
7231                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
7232                       buf++;
7233                     }
7234                 }
7235               else if (INTEGERP (components))
7236                 {
7237                   len = 1;
7238                   *buf++ = XINT (components);
7239                 }
7240               else if (CONSP (components))
7241                 {
7242                   for (len = 0; CONSP (components);
7243                        len++, components = XCDR (components))
7244                     *buf++ = XINT (XCAR (components));
7245                 }
7246               else
7247                 abort ();
7248               *head -= len;
7249             }
7250         }
7251
7252       if (find_composition (end, limit, &start, &end, &prop,
7253                             coding->src_object)
7254           && end <= limit)
7255         *stop = start;
7256       else
7257         *stop = limit;
7258     }
7259   return buf;
7260 }
7261
7262
7263 /* Extract an annotation datum from a text property `charset' at POS of
7264    CODING->src_object (buffer of string), store the data in BUF, set
7265    *STOP to the position where the value of `charset' property changes
7266    (limiting by LIMIT), and return the address of the next element of
7267    BUF.
7268
7269    If the property value is nil, set *STOP to the position where the
7270    property value is non-nil (limiting by LIMIT), and return BUF.  */
7271
7272 static INLINE int *
7273 handle_charset_annotation (EMACS_INT pos, EMACS_INT limit,
7274                            struct coding_system *coding, int *buf,
7275                            EMACS_INT *stop)
7276 {
7277   Lisp_Object val, next;
7278   int id;
7279
7280   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
7281   if (! NILP (val) && CHARSETP (val))
7282     id = XINT (CHARSET_SYMBOL_ID (val));
7283   else
7284     id = -1;
7285   ADD_CHARSET_DATA (buf, 0, id);
7286   next = Fnext_single_property_change (make_number (pos), Qcharset,
7287                                        coding->src_object,
7288                                        make_number (limit));
7289   *stop = XINT (next);
7290   return buf;
7291 }
7292
7293
7294 static void
7295 consume_chars (struct coding_system *coding, Lisp_Object translation_table,
7296                int max_lookup)
7297 {
7298   int *buf = coding->charbuf;
7299   int *buf_end = coding->charbuf + coding->charbuf_size;
7300   const unsigned char *src = coding->source + coding->consumed;
7301   const unsigned char *src_end = coding->source + coding->src_bytes;
7302   EMACS_INT pos = coding->src_pos + coding->consumed_char;
7303   EMACS_INT end_pos = coding->src_pos + coding->src_chars;
7304   int multibytep = coding->src_multibyte;
7305   Lisp_Object eol_type;
7306   int c;
7307   EMACS_INT stop, stop_composition, stop_charset;
7308   int *lookup_buf = NULL;
7309
7310   if (! NILP (translation_table))
7311     lookup_buf = alloca (sizeof (int) * max_lookup);
7312
7313   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
7314   if (VECTORP (eol_type))
7315     eol_type = Qunix;
7316
7317   /* Note: composition handling is not yet implemented.  */
7318   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7319
7320   if (NILP (coding->src_object))
7321     stop = stop_composition = stop_charset = end_pos;
7322   else
7323     {
7324       if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
7325         stop = stop_composition = pos;
7326       else
7327         stop = stop_composition = end_pos;
7328       if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
7329         stop = stop_charset = pos;
7330       else
7331         stop_charset = end_pos;
7332     }
7333
7334   /* Compensate for CRLF and conversion.  */
7335   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
7336   while (buf < buf_end)
7337     {
7338       Lisp_Object trans;
7339
7340       if (pos == stop)
7341         {
7342           if (pos == end_pos)
7343             break;
7344           if (pos == stop_composition)
7345             buf = handle_composition_annotation (pos, end_pos, coding,
7346                                                  buf, &stop_composition);
7347           if (pos == stop_charset)
7348             buf = handle_charset_annotation (pos, end_pos, coding,
7349                                              buf, &stop_charset);
7350           stop = (stop_composition < stop_charset
7351                   ? stop_composition : stop_charset);
7352         }
7353
7354       if (! multibytep)
7355         {
7356           EMACS_INT bytes;
7357
7358           if (coding->encoder == encode_coding_raw_text
7359               || coding->encoder == encode_coding_ccl)
7360             c = *src++, pos++;
7361           else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
7362             c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
7363           else
7364             c = BYTE8_TO_CHAR (*src), src++, pos++;
7365         }
7366       else
7367         c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
7368       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
7369         c = '\n';
7370       if (! EQ (eol_type, Qunix))
7371         {
7372           if (c == '\n')
7373             {
7374               if (EQ (eol_type, Qdos))
7375                 *buf++ = '\r';
7376               else
7377                 c = '\r';
7378             }
7379         }
7380
7381       trans = Qnil;
7382       LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
7383       if (NILP (trans))
7384         *buf++ = c;
7385       else
7386         {
7387           int from_nchars = 1, to_nchars = 1;
7388           int *lookup_buf_end;
7389           const unsigned char *p = src;
7390           int i;
7391
7392           lookup_buf[0] = c;
7393           for (i = 1; i < max_lookup && p < src_end; i++)
7394             lookup_buf[i] = STRING_CHAR_ADVANCE (p);
7395           lookup_buf_end = lookup_buf + i;
7396           trans = get_translation (trans, lookup_buf, lookup_buf_end);
7397           if (INTEGERP (trans))
7398             c = XINT (trans);
7399           else if (CONSP (trans))
7400             {
7401               from_nchars = ASIZE (XCAR (trans));
7402               trans = XCDR (trans);
7403               if (INTEGERP (trans))
7404                 c = XINT (trans);
7405               else
7406                 {
7407                   to_nchars = ASIZE (trans);
7408                   if (buf + to_nchars > buf_end)
7409                     break;
7410                   c = XINT (AREF (trans, 0));
7411                 }
7412             }
7413           else
7414             break;
7415           *buf++ = c;
7416           for (i = 1; i < to_nchars; i++)
7417             *buf++ = XINT (AREF (trans, i));
7418           for (i = 1; i < from_nchars; i++, pos++)
7419             src += MULTIBYTE_LENGTH_NO_CHECK (src);
7420         }
7421     }
7422
7423   coding->consumed = src - coding->source;
7424   coding->consumed_char = pos - coding->src_pos;
7425   coding->charbuf_used = buf - coding->charbuf;
7426   coding->chars_at_source = 0;
7427 }
7428
7429
7430 /* Encode the text at CODING->src_object into CODING->dst_object.
7431    CODING->src_object is a buffer or a string.
7432    CODING->dst_object is a buffer or nil.
7433
7434    If CODING->src_object is a buffer, it must be the current buffer.
7435    In this case, if CODING->src_pos is positive, it is a position of
7436    the source text in the buffer, otherwise. the source text is in the
7437    gap area of the buffer, and coding->src_pos specifies the offset of
7438    the text from GPT (which must be the same as PT).  If this is the
7439    same buffer as CODING->dst_object, CODING->src_pos must be
7440    negative and CODING should not have `pre-write-conversion'.
7441
7442    If CODING->src_object is a string, CODING should not have
7443    `pre-write-conversion'.
7444
7445    If CODING->dst_object is a buffer, the encoded data is inserted at
7446    the current point of that buffer.
7447
7448    If CODING->dst_object is nil, the encoded data is placed at the
7449    memory area specified by CODING->destination.  */
7450
7451 static int
7452 encode_coding (struct coding_system *coding)
7453 {
7454   Lisp_Object attrs;
7455   Lisp_Object translation_table;
7456   int max_lookup;
7457   struct ccl_spec cclspec;
7458
7459   attrs = CODING_ID_ATTRS (coding->id);
7460   if (coding->encoder == encode_coding_raw_text)
7461     translation_table = Qnil, max_lookup = 0;
7462   else
7463     translation_table = get_translation_table (attrs, 1, &max_lookup);
7464
7465   if (BUFFERP (coding->dst_object))
7466     {
7467       set_buffer_internal (XBUFFER (coding->dst_object));
7468       coding->dst_multibyte
7469         = ! NILP (current_buffer->enable_multibyte_characters);
7470     }
7471
7472   coding->consumed = coding->consumed_char = 0;
7473   coding->produced = coding->produced_char = 0;
7474   record_conversion_result (coding, CODING_RESULT_SUCCESS);
7475   coding->errors = 0;
7476
7477   ALLOC_CONVERSION_WORK_AREA (coding);
7478
7479   if (coding->encoder == encode_coding_ccl)
7480     {
7481       coding->spec.ccl = &cclspec;
7482       setup_ccl_program (&cclspec.ccl, CODING_CCL_ENCODER (coding));
7483     }
7484   do {
7485     coding_set_source (coding);
7486     consume_chars (coding, translation_table, max_lookup);
7487     coding_set_destination (coding);
7488     (*(coding->encoder)) (coding);
7489   } while (coding->consumed_char < coding->src_chars);
7490
7491   if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
7492     insert_from_gap (coding->produced_char, coding->produced);
7493
7494   return (coding->result);
7495 }
7496
7497
7498 /* Name (or base name) of work buffer for code conversion.  */
7499 static Lisp_Object Vcode_conversion_workbuf_name;
7500
7501 /* A working buffer used by the top level conversion.  Once it is
7502    created, it is never destroyed.  It has the name
7503    Vcode_conversion_workbuf_name.  The other working buffers are
7504    destroyed after the use is finished, and their names are modified
7505    versions of Vcode_conversion_workbuf_name.  */
7506 static Lisp_Object Vcode_conversion_reused_workbuf;
7507
7508 /* 1 iff Vcode_conversion_reused_workbuf is already in use.  */
7509 static int reused_workbuf_in_use;
7510
7511
7512 /* Return a working buffer of code convesion.  MULTIBYTE specifies the
7513    multibyteness of returning buffer.  */
7514
7515 static Lisp_Object
7516 make_conversion_work_buffer (int multibyte)
7517 {
7518   Lisp_Object name, workbuf;
7519   struct buffer *current;
7520
7521   if (reused_workbuf_in_use++)
7522     {
7523       name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
7524       workbuf = Fget_buffer_create (name);
7525     }
7526   else
7527     {
7528       if (NILP (Fbuffer_live_p (Vcode_conversion_reused_workbuf)))
7529         Vcode_conversion_reused_workbuf
7530           = Fget_buffer_create (Vcode_conversion_workbuf_name);
7531       workbuf = Vcode_conversion_reused_workbuf;
7532     }
7533   current = current_buffer;
7534   set_buffer_internal (XBUFFER (workbuf));
7535   /* We can't allow modification hooks to run in the work buffer.  For
7536      instance, directory_files_internal assumes that file decoding
7537      doesn't compile new regexps.  */
7538   Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt);
7539   Ferase_buffer ();
7540   current_buffer->undo_list = Qt;
7541   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
7542   set_buffer_internal (current);
7543   return workbuf;
7544 }
7545
7546
7547 static Lisp_Object
7548 code_conversion_restore (Lisp_Object arg)
7549 {
7550   Lisp_Object current, workbuf;
7551   struct gcpro gcpro1;
7552
7553   GCPRO1 (arg);
7554   current = XCAR (arg);
7555   workbuf = XCDR (arg);
7556   if (! NILP (workbuf))
7557     {
7558       if (EQ (workbuf, Vcode_conversion_reused_workbuf))
7559         reused_workbuf_in_use = 0;
7560       else if (! NILP (Fbuffer_live_p (workbuf)))
7561         Fkill_buffer (workbuf);
7562     }
7563   set_buffer_internal (XBUFFER (current));
7564   UNGCPRO;
7565   return Qnil;
7566 }
7567
7568 Lisp_Object
7569 code_conversion_save (int with_work_buf, int multibyte)
7570 {
7571   Lisp_Object workbuf = Qnil;
7572
7573   if (with_work_buf)
7574     workbuf = make_conversion_work_buffer (multibyte);
7575   record_unwind_protect (code_conversion_restore,
7576                          Fcons (Fcurrent_buffer (), workbuf));
7577   return workbuf;
7578 }
7579
7580 int
7581 decode_coding_gap (struct coding_system *coding,
7582                    EMACS_INT chars, EMACS_INT bytes)
7583 {
7584   int count = SPECPDL_INDEX ();
7585   Lisp_Object attrs;
7586
7587   code_conversion_save (0, 0);
7588
7589   coding->src_object = Fcurrent_buffer ();
7590   coding->src_chars = chars;
7591   coding->src_bytes = bytes;
7592   coding->src_pos = -chars;
7593   coding->src_pos_byte = -bytes;
7594   coding->src_multibyte = chars < bytes;
7595   coding->dst_object = coding->src_object;
7596   coding->dst_pos = PT;
7597   coding->dst_pos_byte = PT_BYTE;
7598   coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
7599
7600   if (CODING_REQUIRE_DETECTION (coding))
7601     detect_coding (coding);
7602
7603   coding->mode |= CODING_MODE_LAST_BLOCK;
7604   current_buffer->text->inhibit_shrinking = 1;
7605   decode_coding (coding);
7606   current_buffer->text->inhibit_shrinking = 0;
7607
7608   attrs = CODING_ID_ATTRS (coding->id);
7609   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7610     {
7611       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7612       Lisp_Object val;
7613
7614       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7615       val = call1 (CODING_ATTR_POST_READ (attrs),
7616                    make_number (coding->produced_char));
7617       CHECK_NATNUM (val);
7618       coding->produced_char += Z - prev_Z;
7619       coding->produced += Z_BYTE - prev_Z_BYTE;
7620     }
7621
7622   unbind_to (count, Qnil);
7623   return coding->result;
7624 }
7625
7626 int
7627 encode_coding_gap (struct coding_system *coding,
7628                    EMACS_INT chars, EMACS_INT bytes)
7629 {
7630   int count = SPECPDL_INDEX ();
7631
7632   code_conversion_save (0, 0);
7633
7634   coding->src_object = Fcurrent_buffer ();
7635   coding->src_chars = chars;
7636   coding->src_bytes = bytes;
7637   coding->src_pos = -chars;
7638   coding->src_pos_byte = -bytes;
7639   coding->src_multibyte = chars < bytes;
7640   coding->dst_object = coding->src_object;
7641   coding->dst_pos = PT;
7642   coding->dst_pos_byte = PT_BYTE;
7643
7644   encode_coding (coding);
7645
7646   unbind_to (count, Qnil);
7647   return coding->result;
7648 }
7649
7650
7651 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
7652    SRC_OBJECT into DST_OBJECT by coding context CODING.
7653
7654    SRC_OBJECT is a buffer, a string, or Qnil.
7655
7656    If it is a buffer, the text is at point of the buffer.  FROM and TO
7657    are positions in the buffer.
7658
7659    If it is a string, the text is at the beginning of the string.
7660    FROM and TO are indices to the string.
7661
7662    If it is nil, the text is at coding->source.  FROM and TO are
7663    indices to coding->source.
7664
7665    DST_OBJECT is a buffer, Qt, or Qnil.
7666
7667    If it is a buffer, the decoded text is inserted at point of the
7668    buffer.  If the buffer is the same as SRC_OBJECT, the source text
7669    is deleted.
7670
7671    If it is Qt, a string is made from the decoded text, and
7672    set in CODING->dst_object.
7673
7674    If it is Qnil, the decoded text is stored at CODING->destination.
7675    The caller must allocate CODING->dst_bytes bytes at
7676    CODING->destination by xmalloc.  If the decoded text is longer than
7677    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
7678  */
7679
7680 void
7681 decode_coding_object (struct coding_system *coding,
7682                       Lisp_Object src_object,
7683                       EMACS_INT from, EMACS_INT from_byte,
7684                       EMACS_INT to, EMACS_INT to_byte,
7685                       Lisp_Object dst_object)
7686 {
7687   int count = SPECPDL_INDEX ();
7688   unsigned char *destination;
7689   EMACS_INT dst_bytes;
7690   EMACS_INT chars = to - from;
7691   EMACS_INT bytes = to_byte - from_byte;
7692   Lisp_Object attrs;
7693   int saved_pt = -1, saved_pt_byte;
7694   int need_marker_adjustment = 0;
7695   Lisp_Object old_deactivate_mark;
7696
7697   old_deactivate_mark = Vdeactivate_mark;
7698
7699   if (NILP (dst_object))
7700     {
7701       destination = coding->destination;
7702       dst_bytes = coding->dst_bytes;
7703     }
7704
7705   coding->src_object = src_object;
7706   coding->src_chars = chars;
7707   coding->src_bytes = bytes;
7708   coding->src_multibyte = chars < bytes;
7709
7710   if (STRINGP (src_object))
7711     {
7712       coding->src_pos = from;
7713       coding->src_pos_byte = from_byte;
7714     }
7715   else if (BUFFERP (src_object))
7716     {
7717       set_buffer_internal (XBUFFER (src_object));
7718       if (from != GPT)
7719         move_gap_both (from, from_byte);
7720       if (EQ (src_object, dst_object))
7721         {
7722           struct Lisp_Marker *tail;
7723
7724           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7725             {
7726               tail->need_adjustment
7727                 = tail->charpos == (tail->insertion_type ? from : to);
7728               need_marker_adjustment |= tail->need_adjustment;
7729             }
7730           saved_pt = PT, saved_pt_byte = PT_BYTE;
7731           TEMP_SET_PT_BOTH (from, from_byte);
7732           current_buffer->text->inhibit_shrinking = 1;
7733           del_range_both (from, from_byte, to, to_byte, 1);
7734           coding->src_pos = -chars;
7735           coding->src_pos_byte = -bytes;
7736         }
7737       else
7738         {
7739           coding->src_pos = from;
7740           coding->src_pos_byte = from_byte;
7741         }
7742     }
7743
7744   if (CODING_REQUIRE_DETECTION (coding))
7745     detect_coding (coding);
7746   attrs = CODING_ID_ATTRS (coding->id);
7747
7748   if (EQ (dst_object, Qt)
7749       || (! NILP (CODING_ATTR_POST_READ (attrs))
7750           && NILP (dst_object)))
7751     {
7752       coding->dst_multibyte = !CODING_FOR_UNIBYTE (coding);
7753       coding->dst_object = code_conversion_save (1, coding->dst_multibyte);
7754       coding->dst_pos = BEG;
7755       coding->dst_pos_byte = BEG_BYTE;
7756     }
7757   else if (BUFFERP (dst_object))
7758     {
7759       code_conversion_save (0, 0);
7760       coding->dst_object = dst_object;
7761       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7762       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7763       coding->dst_multibyte
7764         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
7765     }
7766   else
7767     {
7768       code_conversion_save (0, 0);
7769       coding->dst_object = Qnil;
7770       /* Most callers presume this will return a multibyte result, and they
7771          won't use `binary' or `raw-text' anyway, so let's not worry about
7772          CODING_FOR_UNIBYTE.  */
7773       coding->dst_multibyte = 1;
7774     }
7775
7776   decode_coding (coding);
7777
7778   if (BUFFERP (coding->dst_object))
7779     set_buffer_internal (XBUFFER (coding->dst_object));
7780
7781   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7782     {
7783       struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7784       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7785       Lisp_Object val;
7786
7787       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7788       GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7789               old_deactivate_mark);
7790       val = safe_call1 (CODING_ATTR_POST_READ (attrs),
7791                         make_number (coding->produced_char));
7792       UNGCPRO;
7793       CHECK_NATNUM (val);
7794       coding->produced_char += Z - prev_Z;
7795       coding->produced += Z_BYTE - prev_Z_BYTE;
7796     }
7797
7798   if (EQ (dst_object, Qt))
7799     {
7800       coding->dst_object = Fbuffer_string ();
7801     }
7802   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
7803     {
7804       set_buffer_internal (XBUFFER (coding->dst_object));
7805       if (dst_bytes < coding->produced)
7806         {
7807           destination = xrealloc (destination, coding->produced);
7808           if (! destination)
7809             {
7810               record_conversion_result (coding,
7811                                         CODING_RESULT_INSUFFICIENT_MEM);
7812               unbind_to (count, Qnil);
7813               return;
7814             }
7815           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
7816             move_gap_both (BEGV, BEGV_BYTE);
7817           bcopy (BEGV_ADDR, destination, coding->produced);
7818           coding->destination = destination;
7819         }
7820     }
7821
7822   if (saved_pt >= 0)
7823     {
7824       /* This is the case of:
7825          (BUFFERP (src_object) && EQ (src_object, dst_object))
7826          As we have moved PT while replacing the original buffer
7827          contents, we must recover it now.  */
7828       set_buffer_internal (XBUFFER (src_object));
7829       current_buffer->text->inhibit_shrinking = 0;
7830       if (saved_pt < from)
7831         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7832       else if (saved_pt < from + chars)
7833         TEMP_SET_PT_BOTH (from, from_byte);
7834       else if (! NILP (current_buffer->enable_multibyte_characters))
7835         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7836                           saved_pt_byte + (coding->produced - bytes));
7837       else
7838         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7839                           saved_pt_byte + (coding->produced - bytes));
7840
7841       if (need_marker_adjustment)
7842         {
7843           struct Lisp_Marker *tail;
7844
7845           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7846             if (tail->need_adjustment)
7847               {
7848                 tail->need_adjustment = 0;
7849                 if (tail->insertion_type)
7850                   {
7851                     tail->bytepos = from_byte;
7852                     tail->charpos = from;
7853                   }
7854                 else
7855                   {
7856                     tail->bytepos = from_byte + coding->produced;
7857                     tail->charpos
7858                       = (NILP (current_buffer->enable_multibyte_characters)
7859                          ? tail->bytepos : from + coding->produced_char);
7860                   }
7861               }
7862         }
7863     }
7864
7865   Vdeactivate_mark = old_deactivate_mark;
7866   unbind_to (count, coding->dst_object);
7867 }
7868
7869
7870 void
7871 encode_coding_object (struct coding_system *coding,
7872                       Lisp_Object src_object,
7873                       EMACS_INT from, EMACS_INT from_byte,
7874                       EMACS_INT to, EMACS_INT to_byte,
7875                       Lisp_Object dst_object)
7876 {
7877   int count = SPECPDL_INDEX ();
7878   EMACS_INT chars = to - from;
7879   EMACS_INT bytes = to_byte - from_byte;
7880   Lisp_Object attrs;
7881   int saved_pt = -1, saved_pt_byte;
7882   int need_marker_adjustment = 0;
7883   int kill_src_buffer = 0;
7884   Lisp_Object old_deactivate_mark;
7885
7886   old_deactivate_mark = Vdeactivate_mark;
7887
7888   coding->src_object = src_object;
7889   coding->src_chars = chars;
7890   coding->src_bytes = bytes;
7891   coding->src_multibyte = chars < bytes;
7892
7893   attrs = CODING_ID_ATTRS (coding->id);
7894
7895   if (EQ (src_object, dst_object))
7896     {
7897       struct Lisp_Marker *tail;
7898
7899       for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7900         {
7901           tail->need_adjustment
7902             = tail->charpos == (tail->insertion_type ? from : to);
7903           need_marker_adjustment |= tail->need_adjustment;
7904         }
7905     }
7906
7907   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
7908     {
7909       coding->src_object = code_conversion_save (1, coding->src_multibyte);
7910       set_buffer_internal (XBUFFER (coding->src_object));
7911       if (STRINGP (src_object))
7912         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
7913       else if (BUFFERP (src_object))
7914         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
7915       else
7916         insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
7917
7918       if (EQ (src_object, dst_object))
7919         {
7920           set_buffer_internal (XBUFFER (src_object));
7921           saved_pt = PT, saved_pt_byte = PT_BYTE;
7922           del_range_both (from, from_byte, to, to_byte, 1);
7923           set_buffer_internal (XBUFFER (coding->src_object));
7924         }
7925
7926       {
7927         Lisp_Object args[3];
7928         struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7929
7930         GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7931                 old_deactivate_mark);
7932         args[0] = CODING_ATTR_PRE_WRITE (attrs);
7933         args[1] = make_number (BEG);
7934         args[2] = make_number (Z);
7935         safe_call (3, args);
7936         UNGCPRO;
7937       }
7938       if (XBUFFER (coding->src_object) != current_buffer)
7939         kill_src_buffer = 1;
7940       coding->src_object = Fcurrent_buffer ();
7941       if (BEG != GPT)
7942         move_gap_both (BEG, BEG_BYTE);
7943       coding->src_chars = Z - BEG;
7944       coding->src_bytes = Z_BYTE - BEG_BYTE;
7945       coding->src_pos = BEG;
7946       coding->src_pos_byte = BEG_BYTE;
7947       coding->src_multibyte = Z < Z_BYTE;
7948     }
7949   else if (STRINGP (src_object))
7950     {
7951       code_conversion_save (0, 0);
7952       coding->src_pos = from;
7953       coding->src_pos_byte = from_byte;
7954     }
7955   else if (BUFFERP (src_object))
7956     {
7957       code_conversion_save (0, 0);
7958       set_buffer_internal (XBUFFER (src_object));
7959       if (EQ (src_object, dst_object))
7960         {
7961           saved_pt = PT, saved_pt_byte = PT_BYTE;
7962           coding->src_object = del_range_1 (from, to, 1, 1);
7963           coding->src_pos = 0;
7964           coding->src_pos_byte = 0;
7965         }
7966       else
7967         {
7968           if (from < GPT && to >= GPT)
7969             move_gap_both (from, from_byte);
7970           coding->src_pos = from;
7971           coding->src_pos_byte = from_byte;
7972         }
7973     }
7974   else
7975     code_conversion_save (0, 0);
7976
7977   if (BUFFERP (dst_object))
7978     {
7979       coding->dst_object = dst_object;
7980       if (EQ (src_object, dst_object))
7981         {
7982           coding->dst_pos = from;
7983           coding->dst_pos_byte = from_byte;
7984         }
7985       else
7986         {
7987           struct buffer *current = current_buffer;
7988
7989           set_buffer_temp (XBUFFER (dst_object));
7990           coding->dst_pos = PT;
7991           coding->dst_pos_byte = PT_BYTE;
7992           move_gap_both (coding->dst_pos, coding->dst_pos_byte);
7993           set_buffer_temp (current);
7994         }
7995       coding->dst_multibyte
7996         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
7997     }
7998   else if (EQ (dst_object, Qt))
7999     {
8000       coding->dst_object = Qnil;
8001       coding->dst_bytes = coding->src_chars;
8002       if (coding->dst_bytes == 0)
8003         coding->dst_bytes = 1;
8004       coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
8005       coding->dst_multibyte = 0;
8006     }
8007   else
8008     {
8009       coding->dst_object = Qnil;
8010       coding->dst_multibyte = 0;
8011     }
8012
8013   encode_coding (coding);
8014
8015   if (EQ (dst_object, Qt))
8016     {
8017       if (BUFFERP (coding->dst_object))
8018         coding->dst_object = Fbuffer_string ();
8019       else
8020         {
8021           coding->dst_object
8022             = make_unibyte_string ((char *) coding->destination,
8023                                    coding->produced);
8024           xfree (coding->destination);
8025         }
8026     }
8027
8028   if (saved_pt >= 0)
8029     {
8030       /* This is the case of:
8031          (BUFFERP (src_object) && EQ (src_object, dst_object))
8032          As we have moved PT while replacing the original buffer
8033          contents, we must recover it now.  */
8034       set_buffer_internal (XBUFFER (src_object));
8035       if (saved_pt < from)
8036         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
8037       else if (saved_pt < from + chars)
8038         TEMP_SET_PT_BOTH (from, from_byte);
8039       else if (! NILP (current_buffer->enable_multibyte_characters))
8040         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
8041                           saved_pt_byte + (coding->produced - bytes));
8042       else
8043         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
8044                           saved_pt_byte + (coding->produced - bytes));
8045
8046       if (need_marker_adjustment)
8047         {
8048           struct Lisp_Marker *tail;
8049
8050           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
8051             if (tail->need_adjustment)
8052               {
8053                 tail->need_adjustment = 0;
8054                 if (tail->insertion_type)
8055                   {
8056                     tail->bytepos = from_byte;
8057                     tail->charpos = from;
8058                   }
8059                 else
8060                   {
8061                     tail->bytepos = from_byte + coding->produced;
8062                     tail->charpos
8063                       = (NILP (current_buffer->enable_multibyte_characters)
8064                          ? tail->bytepos : from + coding->produced_char);
8065                   }
8066               }
8067         }
8068     }
8069
8070   if (kill_src_buffer)
8071     Fkill_buffer (coding->src_object);
8072
8073   Vdeactivate_mark = old_deactivate_mark;
8074   unbind_to (count, Qnil);
8075 }
8076
8077
8078 Lisp_Object
8079 preferred_coding_system (void)
8080 {
8081   int id = coding_categories[coding_priorities[0]].id;
8082
8083   return CODING_ID_NAME (id);
8084 }
8085
8086 \f
8087 #ifdef emacs
8088 /*** 8. Emacs Lisp library functions ***/
8089
8090 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
8091        doc: /* Return t if OBJECT is nil or a coding-system.
8092 See the documentation of `define-coding-system' for information
8093 about coding-system objects.  */)
8094      (object)
8095      Lisp_Object object;
8096 {
8097   if (NILP (object)
8098       || CODING_SYSTEM_ID (object) >= 0)
8099     return Qt;
8100   if (! SYMBOLP (object)
8101       || NILP (Fget (object, Qcoding_system_define_form)))
8102     return Qnil;
8103   return Qt;
8104 }
8105
8106 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
8107        Sread_non_nil_coding_system, 1, 1, 0,
8108        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
8109      (prompt)
8110      Lisp_Object prompt;
8111 {
8112   Lisp_Object val;
8113   do
8114     {
8115       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8116                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
8117     }
8118   while (SCHARS (val) == 0);
8119   return (Fintern (val, Qnil));
8120 }
8121
8122 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
8123        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
8124 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.
8125 Ignores case when completing coding systems (all Emacs coding systems
8126 are lower-case).  */)
8127      (prompt, default_coding_system)
8128      Lisp_Object prompt, default_coding_system;
8129 {
8130   Lisp_Object val;
8131   int count = SPECPDL_INDEX ();
8132
8133   if (SYMBOLP (default_coding_system))
8134     default_coding_system = SYMBOL_NAME (default_coding_system);
8135   specbind (Qcompletion_ignore_case, Qt);
8136   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8137                           Qt, Qnil, Qcoding_system_history,
8138                           default_coding_system, Qnil);
8139   unbind_to (count, Qnil);
8140   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
8141 }
8142
8143 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
8144        1, 1, 0,
8145        doc: /* Check validity of CODING-SYSTEM.
8146 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
8147 It is valid if it is nil or a symbol defined as a coding system by the
8148 function `define-coding-system'.  */)
8149   (coding_system)
8150      Lisp_Object coding_system;
8151 {
8152   Lisp_Object define_form;
8153
8154   define_form = Fget (coding_system, Qcoding_system_define_form);
8155   if (! NILP (define_form))
8156     {
8157       Fput (coding_system, Qcoding_system_define_form, Qnil);
8158       safe_eval (define_form);
8159     }
8160   if (!NILP (Fcoding_system_p (coding_system)))
8161     return coding_system;
8162   xsignal1 (Qcoding_system_error, coding_system);
8163 }
8164
8165 \f
8166 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
8167    HIGHEST is nonzero, return the coding system of the highest
8168    priority among the detected coding systems.  Otherwize return a
8169    list of detected coding systems sorted by their priorities.  If
8170    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
8171    multibyte form but contains only ASCII and eight-bit chars.
8172    Otherwise, the bytes are raw bytes.
8173
8174    CODING-SYSTEM controls the detection as below:
8175
8176    If it is nil, detect both text-format and eol-format.  If the
8177    text-format part of CODING-SYSTEM is already specified
8178    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
8179    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
8180    detect only text-format.  */
8181
8182 Lisp_Object
8183 detect_coding_system (const unsigned char *src,
8184                       EMACS_INT src_chars, EMACS_INT src_bytes,
8185                       int highest, int multibytep,
8186                       Lisp_Object coding_system)
8187 {
8188   const unsigned char *src_end = src + src_bytes;
8189   Lisp_Object attrs, eol_type;
8190   Lisp_Object val = Qnil;
8191   struct coding_system coding;
8192   int id;
8193   struct coding_detection_info detect_info;
8194   enum coding_category base_category;
8195   int null_byte_found = 0, eight_bit_found = 0;
8196
8197   if (NILP (coding_system))
8198     coding_system = Qundecided;
8199   setup_coding_system (coding_system, &coding);
8200   attrs = CODING_ID_ATTRS (coding.id);
8201   eol_type = CODING_ID_EOL_TYPE (coding.id);
8202   coding_system = CODING_ATTR_BASE_NAME (attrs);
8203
8204   coding.source = src;
8205   coding.src_chars = src_chars;
8206   coding.src_bytes = src_bytes;
8207   coding.src_multibyte = multibytep;
8208   coding.consumed = 0;
8209   coding.mode |= CODING_MODE_LAST_BLOCK;
8210   coding.head_ascii = 0;
8211
8212   detect_info.checked = detect_info.found = detect_info.rejected = 0;
8213
8214   /* At first, detect text-format if necessary.  */
8215   base_category = XINT (CODING_ATTR_CATEGORY (attrs));
8216   if (base_category == coding_category_undecided)
8217     {
8218       enum coding_category category;
8219       struct coding_system *this;
8220       int c, i;
8221
8222       /* Skip all ASCII bytes except for a few ISO2022 controls.  */
8223       for (; src < src_end; src++)
8224         {
8225           c = *src;
8226           if (c & 0x80)
8227             {
8228               eight_bit_found = 1;
8229               if (null_byte_found)
8230                 break;
8231             }
8232           else if (c < 0x20)
8233             {
8234               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
8235                   && ! inhibit_iso_escape_detection
8236                   && ! detect_info.checked)
8237                 {
8238                   if (detect_coding_iso_2022 (&coding, &detect_info))
8239                     {
8240                       /* We have scanned the whole data.  */
8241                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
8242                         {
8243                           /* We didn't find an 8-bit code.  We may
8244                              have found a null-byte, but it's very
8245                              rare that a binary file confirm to
8246                              ISO-2022.  */
8247                           src = src_end;
8248                           coding.head_ascii = src - coding.source;
8249                         }
8250                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
8251                       break;
8252                     }
8253                 }
8254               else if (! c && !inhibit_null_byte_detection)
8255                 {
8256                   null_byte_found = 1;
8257                   if (eight_bit_found)
8258                     break;
8259                 }
8260               if (! eight_bit_found)
8261                 coding.head_ascii++;
8262             }
8263           else if (! eight_bit_found)
8264             coding.head_ascii++;
8265         }
8266
8267       if (null_byte_found || eight_bit_found
8268           || coding.head_ascii < coding.src_bytes
8269           || detect_info.found)
8270         {
8271           if (coding.head_ascii == coding.src_bytes)
8272             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
8273             for (i = 0; i < coding_category_raw_text; i++)
8274               {
8275                 category = coding_priorities[i];
8276                 this = coding_categories + category;
8277                 if (detect_info.found & (1 << category))
8278                   break;
8279               }
8280           else
8281             {
8282               if (null_byte_found)
8283                 {
8284                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
8285                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
8286                 }
8287               for (i = 0; i < coding_category_raw_text; i++)
8288                 {
8289                   category = coding_priorities[i];
8290                   this = coding_categories + category;
8291
8292                   if (this->id < 0)
8293                     {
8294                       /* No coding system of this category is defined.  */
8295                       detect_info.rejected |= (1 << category);
8296                     }
8297                   else if (category >= coding_category_raw_text)
8298                     continue;
8299                   else if (detect_info.checked & (1 << category))
8300                     {
8301                       if (highest
8302                           && (detect_info.found & (1 << category)))
8303                         break;
8304                     }
8305                   else if ((*(this->detector)) (&coding, &detect_info)
8306                            && highest
8307                            && (detect_info.found & (1 << category)))
8308                     {
8309                       if (category == coding_category_utf_16_auto)
8310                         {
8311                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8312                             category = coding_category_utf_16_le;
8313                           else
8314                             category = coding_category_utf_16_be;
8315                         }
8316                       break;
8317                     }
8318                 }
8319             }
8320         }
8321
8322       if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY
8323           || null_byte_found)
8324         {
8325           detect_info.found = CATEGORY_MASK_RAW_TEXT;
8326           id = CODING_SYSTEM_ID (Qno_conversion);
8327           val = Fcons (make_number (id), Qnil);
8328         }
8329       else if (! detect_info.rejected && ! detect_info.found)
8330         {
8331           detect_info.found = CATEGORY_MASK_ANY;
8332           id = coding_categories[coding_category_undecided].id;
8333           val = Fcons (make_number (id), Qnil);
8334         }
8335       else if (highest)
8336         {
8337           if (detect_info.found)
8338             {
8339               detect_info.found = 1 << category;
8340               val = Fcons (make_number (this->id), Qnil);
8341             }
8342           else
8343             for (i = 0; i < coding_category_raw_text; i++)
8344               if (! (detect_info.rejected & (1 << coding_priorities[i])))
8345                 {
8346                   detect_info.found = 1 << coding_priorities[i];
8347                   id = coding_categories[coding_priorities[i]].id;
8348                   val = Fcons (make_number (id), Qnil);
8349                   break;
8350                 }
8351         }
8352       else
8353         {
8354           int mask = detect_info.rejected | detect_info.found;
8355           int found = 0;
8356
8357           for (i = coding_category_raw_text - 1; i >= 0; i--)
8358             {
8359               category = coding_priorities[i];
8360               if (! (mask & (1 << category)))
8361                 {
8362                   found |= 1 << category;
8363                   id = coding_categories[category].id;
8364                   if (id >= 0)
8365                     val = Fcons (make_number (id), val);
8366                 }
8367             }
8368           for (i = coding_category_raw_text - 1; i >= 0; i--)
8369             {
8370               category = coding_priorities[i];
8371               if (detect_info.found & (1 << category))
8372                 {
8373                   id = coding_categories[category].id;
8374                   val = Fcons (make_number (id), val);
8375                 }
8376             }
8377           detect_info.found |= found;
8378         }
8379     }
8380   else if (base_category == coding_category_utf_8_auto)
8381     {
8382       if (detect_coding_utf_8 (&coding, &detect_info))
8383         {
8384           struct coding_system *this;
8385
8386           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
8387             this = coding_categories + coding_category_utf_8_sig;
8388           else
8389             this = coding_categories + coding_category_utf_8_nosig;
8390           val = Fcons (make_number (this->id), Qnil);
8391         }
8392     }
8393   else if (base_category == coding_category_utf_16_auto)
8394     {
8395       if (detect_coding_utf_16 (&coding, &detect_info))
8396         {
8397           struct coding_system *this;
8398
8399           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8400             this = coding_categories + coding_category_utf_16_le;
8401           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
8402             this = coding_categories + coding_category_utf_16_be;
8403           else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
8404             this = coding_categories + coding_category_utf_16_be_nosig;
8405           else
8406             this = coding_categories + coding_category_utf_16_le_nosig;
8407           val = Fcons (make_number (this->id), Qnil);
8408         }
8409     }
8410   else
8411     {
8412       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
8413       val = Fcons (make_number (coding.id), Qnil);
8414     }
8415
8416   /* Then, detect eol-format if necessary.  */
8417   {
8418     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol = -1;
8419     Lisp_Object tail;
8420
8421     if (VECTORP (eol_type))
8422       {
8423         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
8424           {
8425             if (null_byte_found)
8426               normal_eol = EOL_SEEN_LF;
8427             else
8428               normal_eol = detect_eol (coding.source, src_bytes,
8429                                        coding_category_raw_text);
8430           }
8431         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
8432                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
8433           utf_16_be_eol = detect_eol (coding.source, src_bytes,
8434                                       coding_category_utf_16_be);
8435         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
8436                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
8437           utf_16_le_eol = detect_eol (coding.source, src_bytes,
8438                                       coding_category_utf_16_le);
8439       }
8440     else
8441       {
8442         if (EQ (eol_type, Qunix))
8443           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
8444         else if (EQ (eol_type, Qdos))
8445           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
8446         else
8447           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
8448       }
8449
8450     for (tail = val; CONSP (tail); tail = XCDR (tail))
8451       {
8452         enum coding_category category;
8453         int this_eol;
8454
8455         id = XINT (XCAR (tail));
8456         attrs = CODING_ID_ATTRS (id);
8457         category = XINT (CODING_ATTR_CATEGORY (attrs));
8458         eol_type = CODING_ID_EOL_TYPE (id);
8459         if (VECTORP (eol_type))
8460           {
8461             if (category == coding_category_utf_16_be
8462                 || category == coding_category_utf_16_be_nosig)
8463               this_eol = utf_16_be_eol;
8464             else if (category == coding_category_utf_16_le
8465                      || category == coding_category_utf_16_le_nosig)
8466               this_eol = utf_16_le_eol;
8467             else
8468               this_eol = normal_eol;
8469
8470             if (this_eol == EOL_SEEN_LF)
8471               XSETCAR (tail, AREF (eol_type, 0));
8472             else if (this_eol == EOL_SEEN_CRLF)
8473               XSETCAR (tail, AREF (eol_type, 1));
8474             else if (this_eol == EOL_SEEN_CR)
8475               XSETCAR (tail, AREF (eol_type, 2));
8476             else
8477               XSETCAR (tail, CODING_ID_NAME (id));
8478           }
8479         else
8480           XSETCAR (tail, CODING_ID_NAME (id));
8481       }
8482   }
8483
8484   return (highest ? (CONSP (val) ? XCAR (val) : Qnil) : val);
8485 }
8486
8487
8488 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
8489        2, 3, 0,
8490        doc: /* Detect coding system of the text in the region between START and END.
8491 Return a list of possible coding systems ordered by priority.
8492 The coding systems to try and their priorities follows what
8493 the function `coding-system-priority-list' (which see) returns.
8494
8495 If only ASCII characters are found (except for such ISO-2022 control
8496 characters as ESC), it returns a list of single element `undecided'
8497 or its subsidiary coding system according to a detected end-of-line
8498 format.
8499
8500 If optional argument HIGHEST is non-nil, return the coding system of
8501 highest priority.  */)
8502      (start, end, highest)
8503      Lisp_Object start, end, highest;
8504 {
8505   int from, to;
8506   int from_byte, to_byte;
8507
8508   CHECK_NUMBER_COERCE_MARKER (start);
8509   CHECK_NUMBER_COERCE_MARKER (end);
8510
8511   validate_region (&start, &end);
8512   from = XINT (start), to = XINT (end);
8513   from_byte = CHAR_TO_BYTE (from);
8514   to_byte = CHAR_TO_BYTE (to);
8515
8516   if (from < GPT && to >= GPT)
8517     move_gap_both (to, to_byte);
8518
8519   return detect_coding_system (BYTE_POS_ADDR (from_byte),
8520                                to - from, to_byte - from_byte,
8521                                !NILP (highest),
8522                                !NILP (current_buffer
8523                                       ->enable_multibyte_characters),
8524                                Qnil);
8525 }
8526
8527 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
8528        1, 2, 0,
8529        doc: /* Detect coding system of the text in STRING.
8530 Return a list of possible coding systems ordered by priority.
8531 The coding systems to try and their priorities follows what
8532 the function `coding-system-priority-list' (which see) returns.
8533
8534 If only ASCII characters are found (except for such ISO-2022 control
8535 characters as ESC), it returns a list of single element `undecided'
8536 or its subsidiary coding system according to a detected end-of-line
8537 format.
8538
8539 If optional argument HIGHEST is non-nil, return the coding system of
8540 highest priority.  */)
8541      (string, highest)
8542      Lisp_Object string, highest;
8543 {
8544   CHECK_STRING (string);
8545
8546   return detect_coding_system (SDATA (string),
8547                                SCHARS (string), SBYTES (string),
8548                                !NILP (highest), STRING_MULTIBYTE (string),
8549                                Qnil);
8550 }
8551
8552
8553 static INLINE int
8554 char_encodable_p (int c, Lisp_Object attrs)
8555 {
8556   Lisp_Object tail;
8557   struct charset *charset;
8558   Lisp_Object translation_table;
8559
8560   translation_table = CODING_ATTR_TRANS_TBL (attrs);
8561   if (! NILP (translation_table))
8562     c = translate_char (translation_table, c);
8563   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
8564        CONSP (tail); tail = XCDR (tail))
8565     {
8566       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
8567       if (CHAR_CHARSET_P (c, charset))
8568         break;
8569     }
8570   return (! NILP (tail));
8571 }
8572
8573
8574 /* Return a list of coding systems that safely encode the text between
8575    START and END.  If EXCLUDE is non-nil, it is a list of coding
8576    systems not to check.  The returned list doesn't contain any such
8577    coding systems.  In any case, if the text contains only ASCII or is
8578    unibyte, return t.  */
8579
8580 DEFUN ("find-coding-systems-region-internal",
8581        Ffind_coding_systems_region_internal,
8582        Sfind_coding_systems_region_internal, 2, 3, 0,
8583        doc: /* Internal use only.  */)
8584      (start, end, exclude)
8585      Lisp_Object start, end, exclude;
8586 {
8587   Lisp_Object coding_attrs_list, safe_codings;
8588   EMACS_INT start_byte, end_byte;
8589   const unsigned char *p, *pbeg, *pend;
8590   int c;
8591   Lisp_Object tail, elt, work_table;
8592
8593   if (STRINGP (start))
8594     {
8595       if (!STRING_MULTIBYTE (start)
8596           || SCHARS (start) == SBYTES (start))
8597         return Qt;
8598       start_byte = 0;
8599       end_byte = SBYTES (start);
8600     }
8601   else
8602     {
8603       CHECK_NUMBER_COERCE_MARKER (start);
8604       CHECK_NUMBER_COERCE_MARKER (end);
8605       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8606         args_out_of_range (start, end);
8607       if (NILP (current_buffer->enable_multibyte_characters))
8608         return Qt;
8609       start_byte = CHAR_TO_BYTE (XINT (start));
8610       end_byte = CHAR_TO_BYTE (XINT (end));
8611       if (XINT (end) - XINT (start) == end_byte - start_byte)
8612         return Qt;
8613
8614       if (XINT (start) < GPT && XINT (end) > GPT)
8615         {
8616           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8617             move_gap_both (XINT (start), start_byte);
8618           else
8619             move_gap_both (XINT (end), end_byte);
8620         }
8621     }
8622
8623   coding_attrs_list = Qnil;
8624   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
8625     if (NILP (exclude)
8626         || NILP (Fmemq (XCAR (tail), exclude)))
8627       {
8628         Lisp_Object attrs;
8629
8630         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
8631         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
8632             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
8633           {
8634             ASET (attrs, coding_attr_trans_tbl,
8635                   get_translation_table (attrs, 1, NULL));
8636             coding_attrs_list = Fcons (attrs, coding_attrs_list);
8637           }
8638       }
8639
8640   if (STRINGP (start))
8641     p = pbeg = SDATA (start);
8642   else
8643     p = pbeg = BYTE_POS_ADDR (start_byte);
8644   pend = p + (end_byte - start_byte);
8645
8646   while (p < pend && ASCII_BYTE_P (*p)) p++;
8647   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8648
8649   work_table = Fmake_char_table (Qnil, Qnil);
8650   while (p < pend)
8651     {
8652       if (ASCII_BYTE_P (*p))
8653         p++;
8654       else
8655         {
8656           c = STRING_CHAR_ADVANCE (p);
8657           if (!NILP (char_table_ref (work_table, c)))
8658             /* This character was already checked.  Ignore it.  */
8659             continue;
8660
8661           charset_map_loaded = 0;
8662           for (tail = coding_attrs_list; CONSP (tail);)
8663             {
8664               elt = XCAR (tail);
8665               if (NILP (elt))
8666                 tail = XCDR (tail);
8667               else if (char_encodable_p (c, elt))
8668                 tail = XCDR (tail);
8669               else if (CONSP (XCDR (tail)))
8670                 {
8671                   XSETCAR (tail, XCAR (XCDR (tail)));
8672                   XSETCDR (tail, XCDR (XCDR (tail)));
8673                 }
8674               else
8675                 {
8676                   XSETCAR (tail, Qnil);
8677                   tail = XCDR (tail);
8678                 }
8679             }
8680           if (charset_map_loaded)
8681             {
8682               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8683
8684               if (STRINGP (start))
8685                 pbeg = SDATA (start);
8686               else
8687                 pbeg = BYTE_POS_ADDR (start_byte);
8688               p = pbeg + p_offset;
8689               pend = pbeg + pend_offset;
8690             }
8691           char_table_set (work_table, c, Qt);
8692         }
8693     }
8694
8695   safe_codings = list2 (Qraw_text, Qno_conversion);
8696   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
8697     if (! NILP (XCAR (tail)))
8698       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
8699
8700   return safe_codings;
8701 }
8702
8703
8704 DEFUN ("unencodable-char-position", Funencodable_char_position,
8705        Sunencodable_char_position, 3, 5, 0,
8706        doc: /*
8707 Return position of first un-encodable character in a region.
8708 START and END specify the region and CODING-SYSTEM specifies the
8709 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
8710
8711 If optional 4th argument COUNT is non-nil, it specifies at most how
8712 many un-encodable characters to search.  In this case, the value is a
8713 list of positions.
8714
8715 If optional 5th argument STRING is non-nil, it is a string to search
8716 for un-encodable characters.  In that case, START and END are indexes
8717 to the string.  */)
8718      (start, end, coding_system, count, string)
8719      Lisp_Object start, end, coding_system, count, string;
8720 {
8721   int n;
8722   struct coding_system coding;
8723   Lisp_Object attrs, charset_list, translation_table;
8724   Lisp_Object positions;
8725   int from, to;
8726   const unsigned char *p, *stop, *pend;
8727   int ascii_compatible;
8728
8729   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
8730   attrs = CODING_ID_ATTRS (coding.id);
8731   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
8732     return Qnil;
8733   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
8734   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8735   translation_table = get_translation_table (attrs, 1, NULL);
8736
8737   if (NILP (string))
8738     {
8739       validate_region (&start, &end);
8740       from = XINT (start);
8741       to = XINT (end);
8742       if (NILP (current_buffer->enable_multibyte_characters)
8743           || (ascii_compatible
8744               && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
8745         return Qnil;
8746       p = CHAR_POS_ADDR (from);
8747       pend = CHAR_POS_ADDR (to);
8748       if (from < GPT && to >= GPT)
8749         stop = GPT_ADDR;
8750       else
8751         stop = pend;
8752     }
8753   else
8754     {
8755       CHECK_STRING (string);
8756       CHECK_NATNUM (start);
8757       CHECK_NATNUM (end);
8758       from = XINT (start);
8759       to = XINT (end);
8760       if (from > to
8761           || to > SCHARS (string))
8762         args_out_of_range_3 (string, start, end);
8763       if (! STRING_MULTIBYTE (string))
8764         return Qnil;
8765       p = SDATA (string) + string_char_to_byte (string, from);
8766       stop = pend = SDATA (string) + string_char_to_byte (string, to);
8767       if (ascii_compatible && (to - from) == (pend - p))
8768         return Qnil;
8769     }
8770
8771   if (NILP (count))
8772     n = 1;
8773   else
8774     {
8775       CHECK_NATNUM (count);
8776       n = XINT (count);
8777     }
8778
8779   positions = Qnil;
8780   while (1)
8781     {
8782       int c;
8783
8784       if (ascii_compatible)
8785         while (p < stop && ASCII_BYTE_P (*p))
8786           p++, from++;
8787       if (p >= stop)
8788         {
8789           if (p >= pend)
8790             break;
8791           stop = pend;
8792           p = GAP_END_ADDR;
8793         }
8794
8795       c = STRING_CHAR_ADVANCE (p);
8796       if (! (ASCII_CHAR_P (c) && ascii_compatible)
8797           && ! char_charset (translate_char (translation_table, c),
8798                              charset_list, NULL))
8799         {
8800           positions = Fcons (make_number (from), positions);
8801           n--;
8802           if (n == 0)
8803             break;
8804         }
8805
8806       from++;
8807     }
8808
8809   return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
8810 }
8811
8812
8813 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
8814        Scheck_coding_systems_region, 3, 3, 0,
8815        doc: /* Check if the region is encodable by coding systems.
8816
8817 START and END are buffer positions specifying the region.
8818 CODING-SYSTEM-LIST is a list of coding systems to check.
8819
8820 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
8821 CODING-SYSTEM is a member of CODING-SYSTEM-LIST and can't encode the
8822 whole region, POS0, POS1, ... are buffer positions where non-encodable
8823 characters are found.
8824
8825 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
8826 value is nil.
8827
8828 START may be a string.  In that case, check if the string is
8829 encodable, and the value contains indices to the string instead of
8830 buffer positions.  END is ignored.
8831
8832 If the current buffer (or START if it is a string) is unibyte, the value
8833 is nil.  */)
8834      (start, end, coding_system_list)
8835      Lisp_Object start, end, coding_system_list;
8836 {
8837   Lisp_Object list;
8838   EMACS_INT start_byte, end_byte;
8839   int pos;
8840   const unsigned char *p, *pbeg, *pend;
8841   int c;
8842   Lisp_Object tail, elt, attrs;
8843
8844   if (STRINGP (start))
8845     {
8846       if (!STRING_MULTIBYTE (start)
8847           || SCHARS (start) == SBYTES (start))
8848         return Qnil;
8849       start_byte = 0;
8850       end_byte = SBYTES (start);
8851       pos = 0;
8852     }
8853   else
8854     {
8855       CHECK_NUMBER_COERCE_MARKER (start);
8856       CHECK_NUMBER_COERCE_MARKER (end);
8857       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8858         args_out_of_range (start, end);
8859       if (NILP (current_buffer->enable_multibyte_characters))
8860         return Qnil;
8861       start_byte = CHAR_TO_BYTE (XINT (start));
8862       end_byte = CHAR_TO_BYTE (XINT (end));
8863       if (XINT (end) - XINT (start) == end_byte - start_byte)
8864         return Qnil;
8865
8866       if (XINT (start) < GPT && XINT (end) > GPT)
8867         {
8868           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8869             move_gap_both (XINT (start), start_byte);
8870           else
8871             move_gap_both (XINT (end), end_byte);
8872         }
8873       pos = XINT (start);
8874     }
8875
8876   list = Qnil;
8877   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
8878     {
8879       elt = XCAR (tail);
8880       attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
8881       ASET (attrs, coding_attr_trans_tbl,
8882             get_translation_table (attrs, 1, NULL));
8883       list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
8884     }
8885
8886   if (STRINGP (start))
8887     p = pbeg = SDATA (start);
8888   else
8889     p = pbeg = BYTE_POS_ADDR (start_byte);
8890   pend = p + (end_byte - start_byte);
8891
8892   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
8893   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8894
8895   while (p < pend)
8896     {
8897       if (ASCII_BYTE_P (*p))
8898         p++;
8899       else
8900         {
8901           c = STRING_CHAR_ADVANCE (p);
8902
8903           charset_map_loaded = 0;
8904           for (tail = list; CONSP (tail); tail = XCDR (tail))
8905             {
8906               elt = XCDR (XCAR (tail));
8907               if (! char_encodable_p (c, XCAR (elt)))
8908                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
8909             }
8910           if (charset_map_loaded)
8911             {
8912               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8913
8914               if (STRINGP (start))
8915                 pbeg = SDATA (start);
8916               else
8917                 pbeg = BYTE_POS_ADDR (start_byte);
8918               p = pbeg + p_offset;
8919               pend = pbeg + pend_offset;
8920             }
8921         }
8922       pos++;
8923     }
8924
8925   tail = list;
8926   list = Qnil;
8927   for (; CONSP (tail); tail = XCDR (tail))
8928     {
8929       elt = XCAR (tail);
8930       if (CONSP (XCDR (XCDR (elt))))
8931         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
8932                       list);
8933     }
8934
8935   return list;
8936 }
8937
8938
8939 Lisp_Object
8940 code_convert_region (Lisp_Object start, Lisp_Object end,
8941                      Lisp_Object coding_system, Lisp_Object dst_object,
8942                      int encodep, int norecord)
8943 {
8944   struct coding_system coding;
8945   EMACS_INT from, from_byte, to, to_byte;
8946   Lisp_Object src_object;
8947
8948   CHECK_NUMBER_COERCE_MARKER (start);
8949   CHECK_NUMBER_COERCE_MARKER (end);
8950   if (NILP (coding_system))
8951     coding_system = Qno_conversion;
8952   else
8953     CHECK_CODING_SYSTEM (coding_system);
8954   src_object = Fcurrent_buffer ();
8955   if (NILP (dst_object))
8956     dst_object = src_object;
8957   else if (! EQ (dst_object, Qt))
8958     CHECK_BUFFER (dst_object);
8959
8960   validate_region (&start, &end);
8961   from = XFASTINT (start);
8962   from_byte = CHAR_TO_BYTE (from);
8963   to = XFASTINT (end);
8964   to_byte = CHAR_TO_BYTE (to);
8965
8966   setup_coding_system (coding_system, &coding);
8967   coding.mode |= CODING_MODE_LAST_BLOCK;
8968
8969   if (encodep)
8970     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8971                           dst_object);
8972   else
8973     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8974                           dst_object);
8975   if (! norecord)
8976     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
8977
8978   return (BUFFERP (dst_object)
8979           ? make_number (coding.produced_char)
8980           : coding.dst_object);
8981 }
8982
8983
8984 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
8985        3, 4, "r\nzCoding system: ",
8986        doc: /* Decode the current region from the specified coding system.
8987 When called from a program, takes four arguments:
8988         START, END, CODING-SYSTEM, and DESTINATION.
8989 START and END are buffer positions.
8990
8991 Optional 4th arguments DESTINATION specifies where the decoded text goes.
8992 If nil, the region between START and END is replaced by the decoded text.
8993 If buffer, the decoded text is inserted in that buffer after point (point
8994 does not move).
8995 In those cases, the length of the decoded text is returned.
8996 If DESTINATION is t, the decoded text is returned.
8997
8998 This function sets `last-coding-system-used' to the precise coding system
8999 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9000 not fully specified.)  */)
9001      (start, end, coding_system, destination)
9002      Lisp_Object start, end, coding_system, destination;
9003 {
9004   return code_convert_region (start, end, coding_system, destination, 0, 0);
9005 }
9006
9007 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
9008        3, 4, "r\nzCoding system: ",
9009        doc: /* Encode the current region by specified coding system.
9010 When called from a program, takes four arguments:
9011         START, END, CODING-SYSTEM and DESTINATION.
9012 START and END are buffer positions.
9013
9014 Optional 4th arguments DESTINATION specifies where the encoded text goes.
9015 If nil, the region between START and END is replace by the encoded text.
9016 If buffer, the encoded text is inserted in that buffer after point (point
9017 does not move).
9018 In those cases, the length of the encoded text is returned.
9019 If DESTINATION is t, the encoded text is returned.
9020
9021 This function sets `last-coding-system-used' to the precise coding system
9022 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9023 not fully specified.)  */)
9024   (start, end, coding_system, destination)
9025      Lisp_Object start, end, coding_system, destination;
9026 {
9027   return code_convert_region (start, end, coding_system, destination, 1, 0);
9028 }
9029
9030 Lisp_Object
9031 code_convert_string (string, coding_system, dst_object,
9032                      encodep, nocopy, norecord)
9033      Lisp_Object string, coding_system, dst_object;
9034      int encodep, nocopy, norecord;
9035 {
9036   struct coding_system coding;
9037   EMACS_INT chars, bytes;
9038
9039   CHECK_STRING (string);
9040   if (NILP (coding_system))
9041     {
9042       if (! norecord)
9043         Vlast_coding_system_used = Qno_conversion;
9044       if (NILP (dst_object))
9045         return (nocopy ? Fcopy_sequence (string) : string);
9046     }
9047
9048   if (NILP (coding_system))
9049     coding_system = Qno_conversion;
9050   else
9051     CHECK_CODING_SYSTEM (coding_system);
9052   if (NILP (dst_object))
9053     dst_object = Qt;
9054   else if (! EQ (dst_object, Qt))
9055     CHECK_BUFFER (dst_object);
9056
9057   setup_coding_system (coding_system, &coding);
9058   coding.mode |= CODING_MODE_LAST_BLOCK;
9059   chars = SCHARS (string);
9060   bytes = SBYTES (string);
9061   if (encodep)
9062     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9063   else
9064     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9065   if (! norecord)
9066     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
9067
9068   return (BUFFERP (dst_object)
9069           ? make_number (coding.produced_char)
9070           : coding.dst_object);
9071 }
9072
9073
9074 /* Encode or decode STRING according to CODING_SYSTEM.
9075    Do not set Vlast_coding_system_used.
9076
9077    This function is called only from macros DECODE_FILE and
9078    ENCODE_FILE, thus we ignore character composition.  */
9079
9080 Lisp_Object
9081 code_convert_string_norecord (Lisp_Object string, Lisp_Object coding_system,
9082                               int encodep)
9083 {
9084   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
9085 }
9086
9087
9088 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
9089        2, 4, 0,
9090        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
9091
9092 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
9093 if the decoding operation is trivial.
9094
9095 Optional fourth arg BUFFER non-nil means that the decoded text is
9096 inserted in that buffer after point (point does not move).  In this
9097 case, the return value is the length of the decoded text.
9098
9099 This function sets `last-coding-system-used' to the precise coding system
9100 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9101 not fully specified.)  */)
9102   (string, coding_system, nocopy, buffer)
9103      Lisp_Object string, coding_system, nocopy, buffer;
9104 {
9105   return code_convert_string (string, coding_system, buffer,
9106                               0, ! NILP (nocopy), 0);
9107 }
9108
9109 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
9110        2, 4, 0,
9111        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
9112
9113 Optional third arg NOCOPY non-nil means it is OK to return STRING
9114 itself if the encoding operation is trivial.
9115
9116 Optional fourth arg BUFFER non-nil means that the encoded text is
9117 inserted in that buffer after point (point does not move).  In this
9118 case, the return value is the length of the encoded text.
9119
9120 This function sets `last-coding-system-used' to the precise coding system
9121 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9122 not fully specified.)  */)
9123      (string, coding_system, nocopy, buffer)
9124      Lisp_Object string, coding_system, nocopy, buffer;
9125 {
9126   return code_convert_string (string, coding_system, buffer,
9127                               1, ! NILP (nocopy), 1);
9128 }
9129
9130 \f
9131 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
9132        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
9133 Return the corresponding character.  */)
9134      (code)
9135      Lisp_Object code;
9136 {
9137   Lisp_Object spec, attrs, val;
9138   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
9139   int c;
9140
9141   CHECK_NATNUM (code);
9142   c = XFASTINT (code);
9143   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9144   attrs = AREF (spec, 0);
9145
9146   if (ASCII_BYTE_P (c)
9147       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9148     return code;
9149
9150   val = CODING_ATTR_CHARSET_LIST (attrs);
9151   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9152   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9153   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
9154
9155   if (c <= 0x7F)
9156     charset = charset_roman;
9157   else if (c >= 0xA0 && c < 0xDF)
9158     {
9159       charset = charset_kana;
9160       c -= 0x80;
9161     }
9162   else
9163     {
9164       int s1 = c >> 8, s2 = c & 0xFF;
9165
9166       if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
9167           || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
9168         error ("Invalid code: %d", code);
9169       SJIS_TO_JIS (c);
9170       charset = charset_kanji;
9171     }
9172   c = DECODE_CHAR (charset, c);
9173   if (c < 0)
9174     error ("Invalid code: %d", code);
9175   return make_number (c);
9176 }
9177
9178
9179 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
9180        doc: /* Encode a Japanese character CH to shift_jis encoding.
9181 Return the corresponding code in SJIS.  */)
9182      (ch)
9183     Lisp_Object ch;
9184 {
9185   Lisp_Object spec, attrs, charset_list;
9186   int c;
9187   struct charset *charset;
9188   unsigned code;
9189
9190   CHECK_CHARACTER (ch);
9191   c = XFASTINT (ch);
9192   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9193   attrs = AREF (spec, 0);
9194
9195   if (ASCII_CHAR_P (c)
9196       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9197     return ch;
9198
9199   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9200   charset = char_charset (c, charset_list, &code);
9201   if (code == CHARSET_INVALID_CODE (charset))
9202     error ("Can't encode by shift_jis encoding: %d", c);
9203   JIS_TO_SJIS (code);
9204
9205   return make_number (code);
9206 }
9207
9208 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
9209        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
9210 Return the corresponding character.  */)
9211      (code)
9212      Lisp_Object code;
9213 {
9214   Lisp_Object spec, attrs, val;
9215   struct charset *charset_roman, *charset_big5, *charset;
9216   int c;
9217
9218   CHECK_NATNUM (code);
9219   c = XFASTINT (code);
9220   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9221   attrs = AREF (spec, 0);
9222
9223   if (ASCII_BYTE_P (c)
9224       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9225     return code;
9226
9227   val = CODING_ATTR_CHARSET_LIST (attrs);
9228   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9229   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
9230
9231   if (c <= 0x7F)
9232     charset = charset_roman;
9233   else
9234     {
9235       int b1 = c >> 8, b2 = c & 0x7F;
9236       if (b1 < 0xA1 || b1 > 0xFE
9237           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
9238         error ("Invalid code: %d", code);
9239       charset = charset_big5;
9240     }
9241   c = DECODE_CHAR (charset, (unsigned )c);
9242   if (c < 0)
9243     error ("Invalid code: %d", code);
9244   return make_number (c);
9245 }
9246
9247 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
9248        doc: /* Encode the Big5 character CH to BIG5 coding system.
9249 Return the corresponding character code in Big5.  */)
9250      (ch)
9251      Lisp_Object ch;
9252 {
9253   Lisp_Object spec, attrs, charset_list;
9254   struct charset *charset;
9255   int c;
9256   unsigned code;
9257
9258   CHECK_CHARACTER (ch);
9259   c = XFASTINT (ch);
9260   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9261   attrs = AREF (spec, 0);
9262   if (ASCII_CHAR_P (c)
9263       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9264     return ch;
9265
9266   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9267   charset = char_charset (c, charset_list, &code);
9268   if (code == CHARSET_INVALID_CODE (charset))
9269     error ("Can't encode by Big5 encoding: %d", c);
9270
9271   return make_number (code);
9272 }
9273
9274 \f
9275 DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
9276        Sset_terminal_coding_system_internal, 1, 2, 0,
9277        doc: /* Internal use only.  */)
9278      (coding_system, terminal)
9279      Lisp_Object coding_system;
9280      Lisp_Object terminal;
9281 {
9282   struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
9283   CHECK_SYMBOL (coding_system);
9284   setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
9285   /* We had better not send unsafe characters to terminal.  */
9286   terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
9287   /* Characer composition should be disabled.  */
9288   terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9289   terminal_coding->src_multibyte = 1;
9290   terminal_coding->dst_multibyte = 0;
9291   return Qnil;
9292 }
9293
9294 DEFUN ("set-safe-terminal-coding-system-internal",
9295        Fset_safe_terminal_coding_system_internal,
9296        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
9297        doc: /* Internal use only.  */)
9298      (coding_system)
9299      Lisp_Object coding_system;
9300 {
9301   CHECK_SYMBOL (coding_system);
9302   setup_coding_system (Fcheck_coding_system (coding_system),
9303                        &safe_terminal_coding);
9304   /* Characer composition should be disabled.  */
9305   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9306   safe_terminal_coding.src_multibyte = 1;
9307   safe_terminal_coding.dst_multibyte = 0;
9308   return Qnil;
9309 }
9310
9311 DEFUN ("terminal-coding-system", Fterminal_coding_system,
9312        Sterminal_coding_system, 0, 1, 0,
9313        doc: /* Return coding system specified for terminal output on the given terminal.
9314 TERMINAL may be a terminal object, a frame, or nil for the selected
9315 frame's terminal device.  */)
9316      (terminal)
9317      Lisp_Object terminal;
9318 {
9319   struct coding_system *terminal_coding
9320     = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
9321   Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
9322
9323   /* For backward compatibility, return nil if it is `undecided'. */
9324   return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
9325 }
9326
9327 DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
9328        Sset_keyboard_coding_system_internal, 1, 2, 0,
9329        doc: /* Internal use only.  */)
9330      (coding_system, terminal)
9331      Lisp_Object coding_system;
9332      Lisp_Object terminal;
9333 {
9334   struct terminal *t = get_terminal (terminal, 1);
9335   CHECK_SYMBOL (coding_system);
9336   if (NILP (coding_system))
9337     coding_system = Qno_conversion;
9338   else
9339     Fcheck_coding_system (coding_system);
9340   setup_coding_system (coding_system, TERMINAL_KEYBOARD_CODING (t));
9341   /* Characer composition should be disabled.  */
9342   TERMINAL_KEYBOARD_CODING (t)->common_flags
9343     &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9344   return Qnil;
9345 }
9346
9347 DEFUN ("keyboard-coding-system",
9348        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 1, 0,
9349        doc: /* Return coding system specified for decoding keyboard input.  */)
9350      (terminal)
9351      Lisp_Object terminal;
9352 {
9353   return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING
9354                          (get_terminal (terminal, 1))->id);
9355 }
9356
9357 \f
9358 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
9359        Sfind_operation_coding_system,  1, MANY, 0,
9360        doc: /* Choose a coding system for an operation based on the target name.
9361 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
9362 DECODING-SYSTEM is the coding system to use for decoding
9363 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
9364 for encoding (in case OPERATION does encoding).
9365
9366 The first argument OPERATION specifies an I/O primitive:
9367   For file I/O, `insert-file-contents' or `write-region'.
9368   For process I/O, `call-process', `call-process-region', or `start-process'.
9369   For network I/O, `open-network-stream'.
9370
9371 The remaining arguments should be the same arguments that were passed
9372 to the primitive.  Depending on which primitive, one of those arguments
9373 is selected as the TARGET.  For example, if OPERATION does file I/O,
9374 whichever argument specifies the file name is TARGET.
9375
9376 TARGET has a meaning which depends on OPERATION:
9377   For file I/O, TARGET is a file name (except for the special case below).
9378   For process I/O, TARGET is a process name.
9379   For network I/O, TARGET is a service name or a port number.
9380
9381 This function looks up what is specified for TARGET in
9382 `file-coding-system-alist', `process-coding-system-alist',
9383 or `network-coding-system-alist' depending on OPERATION.
9384 They may specify a coding system, a cons of coding systems,
9385 or a function symbol to call.
9386 In the last case, we call the function with one argument,
9387 which is a list of all the arguments given to this function.
9388 If the function can't decide a coding system, it can return
9389 `undecided' so that the normal code-detection is performed.
9390
9391 If OPERATION is `insert-file-contents', the argument corresponding to
9392 TARGET may be a cons (FILENAME . BUFFER).  In that case, FILENAME is a
9393 file name to look up, and BUFFER is a buffer that contains the file's
9394 contents (not yet decoded).  If `file-coding-system-alist' specifies a
9395 function to call for FILENAME, that function should examine the
9396 contents of BUFFER instead of reading the file.
9397
9398 usage: (find-operation-coding-system OPERATION ARGUMENTS...)  */)
9399      (nargs, args)
9400      int nargs;
9401      Lisp_Object *args;
9402 {
9403   Lisp_Object operation, target_idx, target, val;
9404   register Lisp_Object chain;
9405
9406   if (nargs < 2)
9407     error ("Too few arguments");
9408   operation = args[0];
9409   if (!SYMBOLP (operation)
9410       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
9411     error ("Invalid first argument");
9412   if (nargs < 1 + XINT (target_idx))
9413     error ("Too few arguments for operation: %s",
9414            SDATA (SYMBOL_NAME (operation)));
9415   target = args[XINT (target_idx) + 1];
9416   if (!(STRINGP (target)
9417         || (EQ (operation, Qinsert_file_contents) && CONSP (target)
9418             && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
9419         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
9420     error ("Invalid %dth argument", XINT (target_idx) + 1);
9421   if (CONSP (target))
9422     target = XCAR (target);
9423
9424   chain = ((EQ (operation, Qinsert_file_contents)
9425             || EQ (operation, Qwrite_region))
9426            ? Vfile_coding_system_alist
9427            : (EQ (operation, Qopen_network_stream)
9428               ? Vnetwork_coding_system_alist
9429               : Vprocess_coding_system_alist));
9430   if (NILP (chain))
9431     return Qnil;
9432
9433   for (; CONSP (chain); chain = XCDR (chain))
9434     {
9435       Lisp_Object elt;
9436
9437       elt = XCAR (chain);
9438       if (CONSP (elt)
9439           && ((STRINGP (target)
9440                && STRINGP (XCAR (elt))
9441                && fast_string_match (XCAR (elt), target) >= 0)
9442               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
9443         {
9444           val = XCDR (elt);
9445           /* Here, if VAL is both a valid coding system and a valid
9446              function symbol, we return VAL as a coding system.  */
9447           if (CONSP (val))
9448             return val;
9449           if (! SYMBOLP (val))
9450             return Qnil;
9451           if (! NILP (Fcoding_system_p (val)))
9452             return Fcons (val, val);
9453           if (! NILP (Ffboundp (val)))
9454             {
9455               /* We use call1 rather than safe_call1
9456                  so as to get bug reports about functions called here
9457                  which don't handle the current interface.  */
9458               val = call1 (val, Flist (nargs, args));
9459               if (CONSP (val))
9460                 return val;
9461               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
9462                 return Fcons (val, val);
9463             }
9464           return Qnil;
9465         }
9466     }
9467   return Qnil;
9468 }
9469
9470 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
9471        Sset_coding_system_priority, 0, MANY, 0,
9472        doc: /* Assign higher priority to the coding systems given as arguments.
9473 If multiple coding systems belong to the same category,
9474 all but the first one are ignored.
9475
9476 usage: (set-coding-system-priority &rest coding-systems)  */)
9477      (nargs, args)
9478      int nargs;
9479      Lisp_Object *args;
9480 {
9481   int i, j;
9482   int changed[coding_category_max];
9483   enum coding_category priorities[coding_category_max];
9484
9485   bzero (changed, sizeof changed);
9486
9487   for (i = j = 0; i < nargs; i++)
9488     {
9489       enum coding_category category;
9490       Lisp_Object spec, attrs;
9491
9492       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
9493       attrs = AREF (spec, 0);
9494       category = XINT (CODING_ATTR_CATEGORY (attrs));
9495       if (changed[category])
9496         /* Ignore this coding system because a coding system of the
9497            same category already had a higher priority.  */
9498         continue;
9499       changed[category] = 1;
9500       priorities[j++] = category;
9501       if (coding_categories[category].id >= 0
9502           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
9503         setup_coding_system (args[i], &coding_categories[category]);
9504       Fset (AREF (Vcoding_category_table, category), args[i]);
9505     }
9506
9507   /* Now we have decided top J priorities.  Reflect the order of the
9508      original priorities to the remaining priorities.  */
9509
9510   for (i = j, j = 0; i < coding_category_max; i++, j++)
9511     {
9512       while (j < coding_category_max
9513              && changed[coding_priorities[j]])
9514         j++;
9515       if (j == coding_category_max)
9516         abort ();
9517       priorities[i] = coding_priorities[j];
9518     }
9519
9520   bcopy (priorities, coding_priorities, sizeof priorities);
9521
9522   /* Update `coding-category-list'.  */
9523   Vcoding_category_list = Qnil;
9524   for (i = coding_category_max - 1; i >= 0; i--)
9525     Vcoding_category_list
9526       = Fcons (AREF (Vcoding_category_table, priorities[i]),
9527                Vcoding_category_list);
9528
9529   return Qnil;
9530 }
9531
9532 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
9533        Scoding_system_priority_list, 0, 1, 0,
9534        doc: /* Return a list of coding systems ordered by their priorities.
9535 The list contains a subset of coding systems; i.e. coding systems
9536 assigned to each coding category (see `coding-category-list').
9537
9538 HIGHESTP non-nil means just return the highest priority one.  */)
9539      (highestp)
9540      Lisp_Object highestp;
9541 {
9542   int i;
9543   Lisp_Object val;
9544
9545   for (i = 0, val = Qnil; i < coding_category_max; i++)
9546     {
9547       enum coding_category category = coding_priorities[i];
9548       int id = coding_categories[category].id;
9549       Lisp_Object attrs;
9550
9551       if (id < 0)
9552         continue;
9553       attrs = CODING_ID_ATTRS (id);
9554       if (! NILP (highestp))
9555         return CODING_ATTR_BASE_NAME (attrs);
9556       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
9557     }
9558   return Fnreverse (val);
9559 }
9560
9561 static const char *const suffixes[] = { "-unix", "-dos", "-mac" };
9562
9563 static Lisp_Object
9564 make_subsidiaries (Lisp_Object base)
9565 {
9566   Lisp_Object subsidiaries;
9567   int base_name_len = SBYTES (SYMBOL_NAME (base));
9568   char *buf = (char *) alloca (base_name_len + 6);
9569   int i;
9570
9571   bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
9572   subsidiaries = Fmake_vector (make_number (3), Qnil);
9573   for (i = 0; i < 3; i++)
9574     {
9575       bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
9576       ASET (subsidiaries, i, intern (buf));
9577     }
9578   return subsidiaries;
9579 }
9580
9581
9582 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
9583        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
9584        doc: /* For internal use only.
9585 usage: (define-coding-system-internal ...)  */)
9586      (nargs, args)
9587      int nargs;
9588      Lisp_Object *args;
9589 {
9590   Lisp_Object name;
9591   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
9592   Lisp_Object attrs;            /* Vector of attributes.  */
9593   Lisp_Object eol_type;
9594   Lisp_Object aliases;
9595   Lisp_Object coding_type, charset_list, safe_charsets;
9596   enum coding_category category;
9597   Lisp_Object tail, val;
9598   int max_charset_id = 0;
9599   int i;
9600
9601   if (nargs < coding_arg_max)
9602     goto short_args;
9603
9604   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
9605
9606   name = args[coding_arg_name];
9607   CHECK_SYMBOL (name);
9608   CODING_ATTR_BASE_NAME (attrs) = name;
9609
9610   val = args[coding_arg_mnemonic];
9611   if (! STRINGP (val))
9612     CHECK_CHARACTER (val);
9613   CODING_ATTR_MNEMONIC (attrs) = val;
9614
9615   coding_type = args[coding_arg_coding_type];
9616   CHECK_SYMBOL (coding_type);
9617   CODING_ATTR_TYPE (attrs) = coding_type;
9618
9619   charset_list = args[coding_arg_charset_list];
9620   if (SYMBOLP (charset_list))
9621     {
9622       if (EQ (charset_list, Qiso_2022))
9623         {
9624           if (! EQ (coding_type, Qiso_2022))
9625             error ("Invalid charset-list");
9626           charset_list = Viso_2022_charset_list;
9627         }
9628       else if (EQ (charset_list, Qemacs_mule))
9629         {
9630           if (! EQ (coding_type, Qemacs_mule))
9631             error ("Invalid charset-list");
9632           charset_list = Vemacs_mule_charset_list;
9633         }
9634       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9635         if (max_charset_id < XFASTINT (XCAR (tail)))
9636           max_charset_id = XFASTINT (XCAR (tail));
9637     }
9638   else
9639     {
9640       charset_list = Fcopy_sequence (charset_list);
9641       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9642         {
9643           struct charset *charset;
9644
9645           val = XCAR (tail);
9646           CHECK_CHARSET_GET_CHARSET (val, charset);
9647           if (EQ (coding_type, Qiso_2022)
9648               ? CHARSET_ISO_FINAL (charset) < 0
9649               : EQ (coding_type, Qemacs_mule)
9650               ? CHARSET_EMACS_MULE_ID (charset) < 0
9651               : 0)
9652             error ("Can't handle charset `%s'",
9653                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9654
9655           XSETCAR (tail, make_number (charset->id));
9656           if (max_charset_id < charset->id)
9657             max_charset_id = charset->id;
9658         }
9659     }
9660   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
9661
9662   safe_charsets = make_uninit_string (max_charset_id + 1);
9663   memset (SDATA (safe_charsets), 255, max_charset_id + 1);
9664   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9665     SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
9666   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
9667
9668   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
9669
9670   val = args[coding_arg_decode_translation_table];
9671   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9672     CHECK_SYMBOL (val);
9673   CODING_ATTR_DECODE_TBL (attrs) = val;
9674
9675   val = args[coding_arg_encode_translation_table];
9676   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9677     CHECK_SYMBOL (val);
9678   CODING_ATTR_ENCODE_TBL (attrs) = val;
9679
9680   val = args[coding_arg_post_read_conversion];
9681   CHECK_SYMBOL (val);
9682   CODING_ATTR_POST_READ (attrs) = val;
9683
9684   val = args[coding_arg_pre_write_conversion];
9685   CHECK_SYMBOL (val);
9686   CODING_ATTR_PRE_WRITE (attrs) = val;
9687
9688   val = args[coding_arg_default_char];
9689   if (NILP (val))
9690     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
9691   else
9692     {
9693       CHECK_CHARACTER (val);
9694       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9695     }
9696
9697   val = args[coding_arg_for_unibyte];
9698   CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
9699
9700   val = args[coding_arg_plist];
9701   CHECK_LIST (val);
9702   CODING_ATTR_PLIST (attrs) = val;
9703
9704   if (EQ (coding_type, Qcharset))
9705     {
9706       /* Generate a lisp vector of 256 elements.  Each element is nil,
9707          integer, or a list of charset IDs.
9708
9709          If Nth element is nil, the byte code N is invalid in this
9710          coding system.
9711
9712          If Nth element is a number NUM, N is the first byte of a
9713          charset whose ID is NUM.
9714
9715          If Nth element is a list of charset IDs, N is the first byte
9716          of one of them.  The list is sorted by dimensions of the
9717          charsets.  A charset of smaller dimension comes firtst. */
9718       val = Fmake_vector (make_number (256), Qnil);
9719
9720       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9721         {
9722           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
9723           int dim = CHARSET_DIMENSION (charset);
9724           int idx = (dim - 1) * 4;
9725
9726           if (CHARSET_ASCII_COMPATIBLE_P (charset))
9727             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9728
9729           for (i = charset->code_space[idx];
9730                i <= charset->code_space[idx + 1]; i++)
9731             {
9732               Lisp_Object tmp, tmp2;
9733               int dim2;
9734
9735               tmp = AREF (val, i);
9736               if (NILP (tmp))
9737                 tmp = XCAR (tail);
9738               else if (NUMBERP (tmp))
9739                 {
9740                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
9741                   if (dim < dim2)
9742                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
9743                   else
9744                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
9745                 }
9746               else
9747                 {
9748                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
9749                     {
9750                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
9751                       if (dim < dim2)
9752                         break;
9753                     }
9754                   if (NILP (tmp2))
9755                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
9756                   else
9757                     {
9758                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
9759                       XSETCAR (tmp2, XCAR (tail));
9760                     }
9761                 }
9762               ASET (val, i, tmp);
9763             }
9764         }
9765       ASET (attrs, coding_attr_charset_valids, val);
9766       category = coding_category_charset;
9767     }
9768   else if (EQ (coding_type, Qccl))
9769     {
9770       Lisp_Object valids;
9771
9772       if (nargs < coding_arg_ccl_max)
9773         goto short_args;
9774
9775       val = args[coding_arg_ccl_decoder];
9776       CHECK_CCL_PROGRAM (val);
9777       if (VECTORP (val))
9778         val = Fcopy_sequence (val);
9779       ASET (attrs, coding_attr_ccl_decoder, val);
9780
9781       val = args[coding_arg_ccl_encoder];
9782       CHECK_CCL_PROGRAM (val);
9783       if (VECTORP (val))
9784         val = Fcopy_sequence (val);
9785       ASET (attrs, coding_attr_ccl_encoder, val);
9786
9787       val = args[coding_arg_ccl_valids];
9788       valids = Fmake_string (make_number (256), make_number (0));
9789       for (tail = val; !NILP (tail); tail = Fcdr (tail))
9790         {
9791           int from, to;
9792
9793           val = Fcar (tail);
9794           if (INTEGERP (val))
9795             {
9796               from = to = XINT (val);
9797               if (from < 0 || from > 255)
9798                 args_out_of_range_3 (val, make_number (0), make_number (255));
9799             }
9800           else
9801             {
9802               CHECK_CONS (val);
9803               CHECK_NATNUM_CAR (val);
9804               CHECK_NATNUM_CDR (val);
9805               from = XINT (XCAR (val));
9806               if (from > 255)
9807                 args_out_of_range_3 (XCAR (val),
9808                                      make_number (0), make_number (255));
9809               to = XINT (XCDR (val));
9810               if (to < from || to > 255)
9811                 args_out_of_range_3 (XCDR (val),
9812                                      XCAR (val), make_number (255));
9813             }
9814           for (i = from; i <= to; i++)
9815             SSET (valids, i, 1);
9816         }
9817       ASET (attrs, coding_attr_ccl_valids, valids);
9818
9819       category = coding_category_ccl;
9820     }
9821   else if (EQ (coding_type, Qutf_16))
9822     {
9823       Lisp_Object bom, endian;
9824
9825       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9826
9827       if (nargs < coding_arg_utf16_max)
9828         goto short_args;
9829
9830       bom = args[coding_arg_utf16_bom];
9831       if (! NILP (bom) && ! EQ (bom, Qt))
9832         {
9833           CHECK_CONS (bom);
9834           val = XCAR (bom);
9835           CHECK_CODING_SYSTEM (val);
9836           val = XCDR (bom);
9837           CHECK_CODING_SYSTEM (val);
9838         }
9839       ASET (attrs, coding_attr_utf_bom, bom);
9840
9841       endian = args[coding_arg_utf16_endian];
9842       CHECK_SYMBOL (endian);
9843       if (NILP (endian))
9844         endian = Qbig;
9845       else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
9846         error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
9847       ASET (attrs, coding_attr_utf_16_endian, endian);
9848
9849       category = (CONSP (bom)
9850                   ? coding_category_utf_16_auto
9851                   : NILP (bom)
9852                   ? (EQ (endian, Qbig)
9853                      ? coding_category_utf_16_be_nosig
9854                      : coding_category_utf_16_le_nosig)
9855                   : (EQ (endian, Qbig)
9856                      ? coding_category_utf_16_be
9857                      : coding_category_utf_16_le));
9858     }
9859   else if (EQ (coding_type, Qiso_2022))
9860     {
9861       Lisp_Object initial, reg_usage, request, flags;
9862       int i;
9863
9864       if (nargs < coding_arg_iso2022_max)
9865         goto short_args;
9866
9867       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
9868       CHECK_VECTOR (initial);
9869       for (i = 0; i < 4; i++)
9870         {
9871           val = Faref (initial, make_number (i));
9872           if (! NILP (val))
9873             {
9874               struct charset *charset;
9875
9876               CHECK_CHARSET_GET_CHARSET (val, charset);
9877               ASET (initial, i, make_number (CHARSET_ID (charset)));
9878               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
9879                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9880             }
9881           else
9882             ASET (initial, i, make_number (-1));
9883         }
9884
9885       reg_usage = args[coding_arg_iso2022_reg_usage];
9886       CHECK_CONS (reg_usage);
9887       CHECK_NUMBER_CAR (reg_usage);
9888       CHECK_NUMBER_CDR (reg_usage);
9889
9890       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
9891       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
9892         {
9893           int id;
9894           Lisp_Object tmp;
9895
9896           val = Fcar (tail);
9897           CHECK_CONS (val);
9898           tmp = XCAR (val);
9899           CHECK_CHARSET_GET_ID (tmp, id);
9900           CHECK_NATNUM_CDR (val);
9901           if (XINT (XCDR (val)) >= 4)
9902             error ("Invalid graphic register number: %d", XINT (XCDR (val)));
9903           XSETCAR (val, make_number (id));
9904         }
9905
9906       flags = args[coding_arg_iso2022_flags];
9907       CHECK_NATNUM (flags);
9908       i = XINT (flags);
9909       if (EQ (args[coding_arg_charset_list], Qiso_2022))
9910         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
9911
9912       ASET (attrs, coding_attr_iso_initial, initial);
9913       ASET (attrs, coding_attr_iso_usage, reg_usage);
9914       ASET (attrs, coding_attr_iso_request, request);
9915       ASET (attrs, coding_attr_iso_flags, flags);
9916       setup_iso_safe_charsets (attrs);
9917
9918       if (i & CODING_ISO_FLAG_SEVEN_BITS)
9919         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
9920                           | CODING_ISO_FLAG_SINGLE_SHIFT))
9921                     ? coding_category_iso_7_else
9922                     : EQ (args[coding_arg_charset_list], Qiso_2022)
9923                     ? coding_category_iso_7
9924                     : coding_category_iso_7_tight);
9925       else
9926         {
9927           int id = XINT (AREF (initial, 1));
9928
9929           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
9930                        || EQ (args[coding_arg_charset_list], Qiso_2022)
9931                        || id < 0)
9932                       ? coding_category_iso_8_else
9933                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
9934                       ? coding_category_iso_8_1
9935                       : coding_category_iso_8_2);
9936         }
9937       if (category != coding_category_iso_8_1
9938           && category != coding_category_iso_8_2)
9939         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9940     }
9941   else if (EQ (coding_type, Qemacs_mule))
9942     {
9943       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
9944         ASET (attrs, coding_attr_emacs_mule_full, Qt);
9945       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9946       category = coding_category_emacs_mule;
9947     }
9948   else if (EQ (coding_type, Qshift_jis))
9949     {
9950
9951       struct charset *charset;
9952
9953       if (XINT (Flength (charset_list)) != 3
9954           && XINT (Flength (charset_list)) != 4)
9955         error ("There should be three or four charsets");
9956
9957       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9958       if (CHARSET_DIMENSION (charset) != 1)
9959         error ("Dimension of charset %s is not one",
9960                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9961       if (CHARSET_ASCII_COMPATIBLE_P (charset))
9962         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9963
9964       charset_list = XCDR (charset_list);
9965       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9966       if (CHARSET_DIMENSION (charset) != 1)
9967         error ("Dimension of charset %s is not one",
9968                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9969
9970       charset_list = XCDR (charset_list);
9971       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9972       if (CHARSET_DIMENSION (charset) != 2)
9973         error ("Dimension of charset %s is not two",
9974                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9975
9976       charset_list = XCDR (charset_list);
9977       if (! NILP (charset_list))
9978         {
9979           charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9980           if (CHARSET_DIMENSION (charset) != 2)
9981             error ("Dimension of charset %s is not two",
9982                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9983         }
9984
9985       category = coding_category_sjis;
9986       Vsjis_coding_system = name;
9987     }
9988   else if (EQ (coding_type, Qbig5))
9989     {
9990       struct charset *charset;
9991
9992       if (XINT (Flength (charset_list)) != 2)
9993         error ("There should be just two charsets");
9994
9995       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9996       if (CHARSET_DIMENSION (charset) != 1)
9997         error ("Dimension of charset %s is not one",
9998                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9999       if (CHARSET_ASCII_COMPATIBLE_P (charset))
10000         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
10001
10002       charset_list = XCDR (charset_list);
10003       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10004       if (CHARSET_DIMENSION (charset) != 2)
10005         error ("Dimension of charset %s is not two",
10006                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10007
10008       category = coding_category_big5;
10009       Vbig5_coding_system = name;
10010     }
10011   else if (EQ (coding_type, Qraw_text))
10012     {
10013       category = coding_category_raw_text;
10014       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
10015     }
10016   else if (EQ (coding_type, Qutf_8))
10017     {
10018       Lisp_Object bom;
10019
10020       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
10021
10022       if (nargs < coding_arg_utf8_max)
10023         goto short_args;
10024
10025       bom = args[coding_arg_utf8_bom];
10026       if (! NILP (bom) && ! EQ (bom, Qt))
10027         {
10028           CHECK_CONS (bom);
10029           val = XCAR (bom);
10030           CHECK_CODING_SYSTEM (val);
10031           val = XCDR (bom);
10032           CHECK_CODING_SYSTEM (val);
10033         }
10034       ASET (attrs, coding_attr_utf_bom, bom);
10035
10036       category = (CONSP (bom) ? coding_category_utf_8_auto
10037                   : NILP (bom) ? coding_category_utf_8_nosig
10038                   : coding_category_utf_8_sig);
10039     }
10040   else if (EQ (coding_type, Qundecided))
10041     category = coding_category_undecided;
10042   else
10043     error ("Invalid coding system type: %s",
10044            SDATA (SYMBOL_NAME (coding_type)));
10045
10046   CODING_ATTR_CATEGORY (attrs) = make_number (category);
10047   CODING_ATTR_PLIST (attrs)
10048     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
10049                                 CODING_ATTR_PLIST (attrs)));
10050   CODING_ATTR_PLIST (attrs)
10051     = Fcons (QCascii_compatible_p,
10052              Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
10053                     CODING_ATTR_PLIST (attrs)));
10054
10055   eol_type = args[coding_arg_eol_type];
10056   if (! NILP (eol_type)
10057       && ! EQ (eol_type, Qunix)
10058       && ! EQ (eol_type, Qdos)
10059       && ! EQ (eol_type, Qmac))
10060     error ("Invalid eol-type");
10061
10062   aliases = Fcons (name, Qnil);
10063
10064   if (NILP (eol_type))
10065     {
10066       eol_type = make_subsidiaries (name);
10067       for (i = 0; i < 3; i++)
10068         {
10069           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
10070
10071           this_name = AREF (eol_type, i);
10072           this_aliases = Fcons (this_name, Qnil);
10073           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
10074           this_spec = Fmake_vector (make_number (3), attrs);
10075           ASET (this_spec, 1, this_aliases);
10076           ASET (this_spec, 2, this_eol_type);
10077           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
10078           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
10079           val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist);
10080           if (NILP (val))
10081             Vcoding_system_alist
10082               = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
10083                        Vcoding_system_alist);
10084         }
10085     }
10086
10087   spec_vec = Fmake_vector (make_number (3), attrs);
10088   ASET (spec_vec, 1, aliases);
10089   ASET (spec_vec, 2, eol_type);
10090
10091   Fputhash (name, spec_vec, Vcoding_system_hash_table);
10092   Vcoding_system_list = Fcons (name, Vcoding_system_list);
10093   val = Fassoc (Fsymbol_name (name), Vcoding_system_alist);
10094   if (NILP (val))
10095     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
10096                                   Vcoding_system_alist);
10097
10098   {
10099     int id = coding_categories[category].id;
10100
10101     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
10102       setup_coding_system (name, &coding_categories[category]);
10103   }
10104
10105   return Qnil;
10106
10107  short_args:
10108   return Fsignal (Qwrong_number_of_arguments,
10109                   Fcons (intern ("define-coding-system-internal"),
10110                          make_number (nargs)));
10111 }
10112
10113
10114 DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
10115        3, 3, 0,
10116        doc: /* Change value in CODING-SYSTEM's property list PROP to VAL.  */)
10117      (coding_system, prop, val)
10118      Lisp_Object coding_system, prop, val;
10119 {
10120   Lisp_Object spec, attrs;
10121
10122   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10123   attrs = AREF (spec, 0);
10124   if (EQ (prop, QCmnemonic))
10125     {
10126       if (! STRINGP (val))
10127         CHECK_CHARACTER (val);
10128       CODING_ATTR_MNEMONIC (attrs) = val;
10129     }
10130   else if (EQ (prop, QCdefault_char))
10131     {
10132       if (NILP (val))
10133         val = make_number (' ');
10134       else
10135         CHECK_CHARACTER (val);
10136       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
10137     }
10138   else if (EQ (prop, QCdecode_translation_table))
10139     {
10140       if (! CHAR_TABLE_P (val) && ! CONSP (val))
10141         CHECK_SYMBOL (val);
10142       CODING_ATTR_DECODE_TBL (attrs) = val;
10143     }
10144   else if (EQ (prop, QCencode_translation_table))
10145     {
10146       if (! CHAR_TABLE_P (val) && ! CONSP (val))
10147         CHECK_SYMBOL (val);
10148       CODING_ATTR_ENCODE_TBL (attrs) = val;
10149     }
10150   else if (EQ (prop, QCpost_read_conversion))
10151     {
10152       CHECK_SYMBOL (val);
10153       CODING_ATTR_POST_READ (attrs) = val;
10154     }
10155   else if (EQ (prop, QCpre_write_conversion))
10156     {
10157       CHECK_SYMBOL (val);
10158       CODING_ATTR_PRE_WRITE (attrs) = val;
10159     }
10160   else if (EQ (prop, QCascii_compatible_p))
10161     {
10162       CODING_ATTR_ASCII_COMPAT (attrs) = val;
10163     }
10164
10165   CODING_ATTR_PLIST (attrs)
10166     = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
10167   return val;
10168 }
10169
10170
10171 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
10172        Sdefine_coding_system_alias, 2, 2, 0,
10173        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
10174      (alias, coding_system)
10175      Lisp_Object alias, coding_system;
10176 {
10177   Lisp_Object spec, aliases, eol_type, val;
10178
10179   CHECK_SYMBOL (alias);
10180   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10181   aliases = AREF (spec, 1);
10182   /* ALIASES should be a list of length more than zero, and the first
10183      element is a base coding system.  Append ALIAS at the tail of the
10184      list.  */
10185   while (!NILP (XCDR (aliases)))
10186     aliases = XCDR (aliases);
10187   XSETCDR (aliases, Fcons (alias, Qnil));
10188
10189   eol_type = AREF (spec, 2);
10190   if (VECTORP (eol_type))
10191     {
10192       Lisp_Object subsidiaries;
10193       int i;
10194
10195       subsidiaries = make_subsidiaries (alias);
10196       for (i = 0; i < 3; i++)
10197         Fdefine_coding_system_alias (AREF (subsidiaries, i),
10198                                      AREF (eol_type, i));
10199     }
10200
10201   Fputhash (alias, spec, Vcoding_system_hash_table);
10202   Vcoding_system_list = Fcons (alias, Vcoding_system_list);
10203   val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist);
10204   if (NILP (val))
10205     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
10206                                   Vcoding_system_alist);
10207
10208   return Qnil;
10209 }
10210
10211 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
10212        1, 1, 0,
10213        doc: /* Return the base of CODING-SYSTEM.
10214 Any alias or subsidiary coding system is not a base coding system.  */)
10215      (coding_system)
10216      Lisp_Object coding_system;
10217 {
10218   Lisp_Object spec, attrs;
10219
10220   if (NILP (coding_system))
10221     return (Qno_conversion);
10222   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10223   attrs = AREF (spec, 0);
10224   return CODING_ATTR_BASE_NAME (attrs);
10225 }
10226
10227 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
10228        1, 1, 0,
10229        doc: "Return the property list of CODING-SYSTEM.")
10230      (coding_system)
10231      Lisp_Object coding_system;
10232 {
10233   Lisp_Object spec, attrs;
10234
10235   if (NILP (coding_system))
10236     coding_system = Qno_conversion;
10237   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10238   attrs = AREF (spec, 0);
10239   return CODING_ATTR_PLIST (attrs);
10240 }
10241
10242
10243 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
10244        1, 1, 0,
10245        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
10246      (coding_system)
10247      Lisp_Object coding_system;
10248 {
10249   Lisp_Object spec;
10250
10251   if (NILP (coding_system))
10252     coding_system = Qno_conversion;
10253   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10254   return AREF (spec, 1);
10255 }
10256
10257 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
10258        Scoding_system_eol_type, 1, 1, 0,
10259        doc: /* Return eol-type of CODING-SYSTEM.
10260 An eol-type is an integer 0, 1, 2, or a vector of coding systems.
10261
10262 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
10263 and CR respectively.
10264
10265 A vector value indicates that a format of end-of-line should be
10266 detected automatically.  Nth element of the vector is the subsidiary
10267 coding system whose eol-type is N.  */)
10268      (coding_system)
10269      Lisp_Object coding_system;
10270 {
10271   Lisp_Object spec, eol_type;
10272   int n;
10273
10274   if (NILP (coding_system))
10275     coding_system = Qno_conversion;
10276   if (! CODING_SYSTEM_P (coding_system))
10277     return Qnil;
10278   spec = CODING_SYSTEM_SPEC (coding_system);
10279   eol_type = AREF (spec, 2);
10280   if (VECTORP (eol_type))
10281     return Fcopy_sequence (eol_type);
10282   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
10283   return make_number (n);
10284 }
10285
10286 #endif /* emacs */
10287
10288 \f
10289 /*** 9. Post-amble ***/
10290
10291 void
10292 init_coding_once (void)
10293 {
10294   int i;
10295
10296   for (i = 0; i < coding_category_max; i++)
10297     {
10298       coding_categories[i].id = -1;
10299       coding_priorities[i] = i;
10300     }
10301
10302   /* ISO2022 specific initialize routine.  */
10303   for (i = 0; i < 0x20; i++)
10304     iso_code_class[i] = ISO_control_0;
10305   for (i = 0x21; i < 0x7F; i++)
10306     iso_code_class[i] = ISO_graphic_plane_0;
10307   for (i = 0x80; i < 0xA0; i++)
10308     iso_code_class[i] = ISO_control_1;
10309   for (i = 0xA1; i < 0xFF; i++)
10310     iso_code_class[i] = ISO_graphic_plane_1;
10311   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
10312   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
10313   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
10314   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
10315   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
10316   iso_code_class[ISO_CODE_ESC] = ISO_escape;
10317   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
10318   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
10319   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
10320
10321   for (i = 0; i < 256; i++)
10322     {
10323       emacs_mule_bytes[i] = 1;
10324     }
10325   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
10326   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
10327   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
10328   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
10329 }
10330
10331 #ifdef emacs
10332
10333 void
10334 syms_of_coding (void)
10335 {
10336   staticpro (&Vcoding_system_hash_table);
10337   {
10338     Lisp_Object args[2];
10339     args[0] = QCtest;
10340     args[1] = Qeq;
10341     Vcoding_system_hash_table = Fmake_hash_table (2, args);
10342   }
10343
10344   staticpro (&Vsjis_coding_system);
10345   Vsjis_coding_system = Qnil;
10346
10347   staticpro (&Vbig5_coding_system);
10348   Vbig5_coding_system = Qnil;
10349
10350   staticpro (&Vcode_conversion_reused_workbuf);
10351   Vcode_conversion_reused_workbuf = Qnil;
10352
10353   staticpro (&Vcode_conversion_workbuf_name);
10354   Vcode_conversion_workbuf_name = make_pure_c_string (" *code-conversion-work*");
10355
10356   reused_workbuf_in_use = 0;
10357
10358   DEFSYM (Qcharset, "charset");
10359   DEFSYM (Qtarget_idx, "target-idx");
10360   DEFSYM (Qcoding_system_history, "coding-system-history");
10361   Fset (Qcoding_system_history, Qnil);
10362
10363   /* Target FILENAME is the first argument.  */
10364   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
10365   /* Target FILENAME is the third argument.  */
10366   Fput (Qwrite_region, Qtarget_idx, make_number (2));
10367
10368   DEFSYM (Qcall_process, "call-process");
10369   /* Target PROGRAM is the first argument.  */
10370   Fput (Qcall_process, Qtarget_idx, make_number (0));
10371
10372   DEFSYM (Qcall_process_region, "call-process-region");
10373   /* Target PROGRAM is the third argument.  */
10374   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
10375
10376   DEFSYM (Qstart_process, "start-process");
10377   /* Target PROGRAM is the third argument.  */
10378   Fput (Qstart_process, Qtarget_idx, make_number (2));
10379
10380   DEFSYM (Qopen_network_stream, "open-network-stream");
10381   /* Target SERVICE is the fourth argument.  */
10382   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
10383
10384   DEFSYM (Qcoding_system, "coding-system");
10385   DEFSYM (Qcoding_aliases, "coding-aliases");
10386
10387   DEFSYM (Qeol_type, "eol-type");
10388   DEFSYM (Qunix, "unix");
10389   DEFSYM (Qdos, "dos");
10390
10391   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
10392   DEFSYM (Qpost_read_conversion, "post-read-conversion");
10393   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
10394   DEFSYM (Qdefault_char, "default-char");
10395   DEFSYM (Qundecided, "undecided");
10396   DEFSYM (Qno_conversion, "no-conversion");
10397   DEFSYM (Qraw_text, "raw-text");
10398
10399   DEFSYM (Qiso_2022, "iso-2022");
10400
10401   DEFSYM (Qutf_8, "utf-8");
10402   DEFSYM (Qutf_8_emacs, "utf-8-emacs");
10403
10404   DEFSYM (Qutf_16, "utf-16");
10405   DEFSYM (Qbig, "big");
10406   DEFSYM (Qlittle, "little");
10407
10408   DEFSYM (Qshift_jis, "shift-jis");
10409   DEFSYM (Qbig5, "big5");
10410
10411   DEFSYM (Qcoding_system_p, "coding-system-p");
10412
10413   DEFSYM (Qcoding_system_error, "coding-system-error");
10414   Fput (Qcoding_system_error, Qerror_conditions,
10415         pure_cons (Qcoding_system_error, pure_cons (Qerror, Qnil)));
10416   Fput (Qcoding_system_error, Qerror_message,
10417         make_pure_c_string ("Invalid coding system"));
10418
10419   /* Intern this now in case it isn't already done.
10420      Setting this variable twice is harmless.
10421      But don't staticpro it here--that is done in alloc.c.  */
10422   Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
10423
10424   DEFSYM (Qtranslation_table, "translation-table");
10425   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
10426   DEFSYM (Qtranslation_table_id, "translation-table-id");
10427   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
10428   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
10429
10430   DEFSYM (Qvalid_codes, "valid-codes");
10431
10432   DEFSYM (Qemacs_mule, "emacs-mule");
10433
10434   DEFSYM (QCcategory, ":category");
10435   DEFSYM (QCmnemonic, ":mnemonic");
10436   DEFSYM (QCdefault_char, ":default-char");
10437   DEFSYM (QCdecode_translation_table, ":decode-translation-table");
10438   DEFSYM (QCencode_translation_table, ":encode-translation-table");
10439   DEFSYM (QCpost_read_conversion, ":post-read-conversion");
10440   DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
10441   DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
10442
10443   Vcoding_category_table
10444     = Fmake_vector (make_number (coding_category_max), Qnil);
10445   staticpro (&Vcoding_category_table);
10446   /* Followings are target of code detection.  */
10447   ASET (Vcoding_category_table, coding_category_iso_7,
10448         intern_c_string ("coding-category-iso-7"));
10449   ASET (Vcoding_category_table, coding_category_iso_7_tight,
10450         intern_c_string ("coding-category-iso-7-tight"));
10451   ASET (Vcoding_category_table, coding_category_iso_8_1,
10452         intern_c_string ("coding-category-iso-8-1"));
10453   ASET (Vcoding_category_table, coding_category_iso_8_2,
10454         intern_c_string ("coding-category-iso-8-2"));
10455   ASET (Vcoding_category_table, coding_category_iso_7_else,
10456         intern_c_string ("coding-category-iso-7-else"));
10457   ASET (Vcoding_category_table, coding_category_iso_8_else,
10458         intern_c_string ("coding-category-iso-8-else"));
10459   ASET (Vcoding_category_table, coding_category_utf_8_auto,
10460         intern_c_string ("coding-category-utf-8-auto"));
10461   ASET (Vcoding_category_table, coding_category_utf_8_nosig,
10462         intern_c_string ("coding-category-utf-8"));
10463   ASET (Vcoding_category_table, coding_category_utf_8_sig,
10464         intern_c_string ("coding-category-utf-8-sig"));
10465   ASET (Vcoding_category_table, coding_category_utf_16_be,
10466         intern_c_string ("coding-category-utf-16-be"));
10467   ASET (Vcoding_category_table, coding_category_utf_16_auto,
10468         intern_c_string ("coding-category-utf-16-auto"));
10469   ASET (Vcoding_category_table, coding_category_utf_16_le,
10470         intern_c_string ("coding-category-utf-16-le"));
10471   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
10472         intern_c_string ("coding-category-utf-16-be-nosig"));
10473   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
10474         intern_c_string ("coding-category-utf-16-le-nosig"));
10475   ASET (Vcoding_category_table, coding_category_charset,
10476         intern_c_string ("coding-category-charset"));
10477   ASET (Vcoding_category_table, coding_category_sjis,
10478         intern_c_string ("coding-category-sjis"));
10479   ASET (Vcoding_category_table, coding_category_big5,
10480         intern_c_string ("coding-category-big5"));
10481   ASET (Vcoding_category_table, coding_category_ccl,
10482         intern_c_string ("coding-category-ccl"));
10483   ASET (Vcoding_category_table, coding_category_emacs_mule,
10484         intern_c_string ("coding-category-emacs-mule"));
10485   /* Followings are NOT target of code detection.  */
10486   ASET (Vcoding_category_table, coding_category_raw_text,
10487         intern_c_string ("coding-category-raw-text"));
10488   ASET (Vcoding_category_table, coding_category_undecided,
10489         intern_c_string ("coding-category-undecided"));
10490
10491   DEFSYM (Qinsufficient_source, "insufficient-source");
10492   DEFSYM (Qinconsistent_eol, "inconsistent-eol");
10493   DEFSYM (Qinvalid_source, "invalid-source");
10494   DEFSYM (Qinterrupted, "interrupted");
10495   DEFSYM (Qinsufficient_memory, "insufficient-memory");
10496   DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
10497
10498   defsubr (&Scoding_system_p);
10499   defsubr (&Sread_coding_system);
10500   defsubr (&Sread_non_nil_coding_system);
10501   defsubr (&Scheck_coding_system);
10502   defsubr (&Sdetect_coding_region);
10503   defsubr (&Sdetect_coding_string);
10504   defsubr (&Sfind_coding_systems_region_internal);
10505   defsubr (&Sunencodable_char_position);
10506   defsubr (&Scheck_coding_systems_region);
10507   defsubr (&Sdecode_coding_region);
10508   defsubr (&Sencode_coding_region);
10509   defsubr (&Sdecode_coding_string);
10510   defsubr (&Sencode_coding_string);
10511   defsubr (&Sdecode_sjis_char);
10512   defsubr (&Sencode_sjis_char);
10513   defsubr (&Sdecode_big5_char);
10514   defsubr (&Sencode_big5_char);
10515   defsubr (&Sset_terminal_coding_system_internal);
10516   defsubr (&Sset_safe_terminal_coding_system_internal);
10517   defsubr (&Sterminal_coding_system);
10518   defsubr (&Sset_keyboard_coding_system_internal);
10519   defsubr (&Skeyboard_coding_system);
10520   defsubr (&Sfind_operation_coding_system);
10521   defsubr (&Sset_coding_system_priority);
10522   defsubr (&Sdefine_coding_system_internal);
10523   defsubr (&Sdefine_coding_system_alias);
10524   defsubr (&Scoding_system_put);
10525   defsubr (&Scoding_system_base);
10526   defsubr (&Scoding_system_plist);
10527   defsubr (&Scoding_system_aliases);
10528   defsubr (&Scoding_system_eol_type);
10529   defsubr (&Scoding_system_priority_list);
10530
10531   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
10532                doc: /* List of coding systems.
10533
10534 Do not alter the value of this variable manually.  This variable should be
10535 updated by the functions `define-coding-system' and
10536 `define-coding-system-alias'.  */);
10537   Vcoding_system_list = Qnil;
10538
10539   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
10540                doc: /* Alist of coding system names.
10541 Each element is one element list of coding system name.
10542 This variable is given to `completing-read' as COLLECTION argument.
10543
10544 Do not alter the value of this variable manually.  This variable should be
10545 updated by the functions `make-coding-system' and
10546 `define-coding-system-alias'.  */);
10547   Vcoding_system_alist = Qnil;
10548
10549   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
10550                doc: /* List of coding-categories (symbols) ordered by priority.
10551
10552 On detecting a coding system, Emacs tries code detection algorithms
10553 associated with each coding-category one by one in this order.  When
10554 one algorithm agrees with a byte sequence of source text, the coding
10555 system bound to the corresponding coding-category is selected.
10556
10557 Don't modify this variable directly, but use `set-coding-priority'.  */);
10558   {
10559     int i;
10560
10561     Vcoding_category_list = Qnil;
10562     for (i = coding_category_max - 1; i >= 0; i--)
10563       Vcoding_category_list
10564         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
10565                  Vcoding_category_list);
10566   }
10567
10568   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
10569                doc: /* Specify the coding system for read operations.
10570 It is useful to bind this variable with `let', but do not set it globally.
10571 If the value is a coding system, it is used for decoding on read operation.
10572 If not, an appropriate element is used from one of the coding system alists.
10573 There are three such tables: `file-coding-system-alist',
10574 `process-coding-system-alist', and `network-coding-system-alist'.  */);
10575   Vcoding_system_for_read = Qnil;
10576
10577   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
10578                doc: /* Specify the coding system for write operations.
10579 Programs bind this variable with `let', but you should not set it globally.
10580 If the value is a coding system, it is used for encoding of output,
10581 when writing it to a file and when sending it to a file or subprocess.
10582
10583 If this does not specify a coding system, an appropriate element
10584 is used from one of the coding system alists.
10585 There are three such tables: `file-coding-system-alist',
10586 `process-coding-system-alist', and `network-coding-system-alist'.
10587 For output to files, if the above procedure does not specify a coding system,
10588 the value of `buffer-file-coding-system' is used.  */);
10589   Vcoding_system_for_write = Qnil;
10590
10591   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
10592                doc: /*
10593 Coding system used in the latest file or process I/O.  */);
10594   Vlast_coding_system_used = Qnil;
10595
10596   DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
10597                doc: /*
10598 Error status of the last code conversion.
10599
10600 When an error was detected in the last code conversion, this variable
10601 is set to one of the following symbols.
10602   `insufficient-source'
10603   `inconsistent-eol'
10604   `invalid-source'
10605   `interrupted'
10606   `insufficient-memory'
10607 When no error was detected, the value doesn't change.  So, to check
10608 the error status of a code conversion by this variable, you must
10609 explicitly set this variable to nil before performing code
10610 conversion.  */);
10611   Vlast_code_conversion_error = Qnil;
10612
10613   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
10614                doc: /*
10615 *Non-nil means always inhibit code conversion of end-of-line format.
10616 See info node `Coding Systems' and info node `Text and Binary' concerning
10617 such conversion.  */);
10618   inhibit_eol_conversion = 0;
10619
10620   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
10621                doc: /*
10622 Non-nil means process buffer inherits coding system of process output.
10623 Bind it to t if the process output is to be treated as if it were a file
10624 read from some filesystem.  */);
10625   inherit_process_coding_system = 0;
10626
10627   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
10628                doc: /*
10629 Alist to decide a coding system to use for a file I/O operation.
10630 The format is ((PATTERN . VAL) ...),
10631 where PATTERN is a regular expression matching a file name,
10632 VAL is a coding system, a cons of coding systems, or a function symbol.
10633 If VAL is a coding system, it is used for both decoding and encoding
10634 the file contents.
10635 If VAL is a cons of coding systems, the car part is used for decoding,
10636 and the cdr part is used for encoding.
10637 If VAL is a function symbol, the function must return a coding system
10638 or a cons of coding systems which are used as above.  The function is
10639 called with an argument that is a list of the arguments with which
10640 `find-operation-coding-system' was called.  If the function can't decide
10641 a coding system, it can return `undecided' so that the normal
10642 code-detection is performed.
10643
10644 See also the function `find-operation-coding-system'
10645 and the variable `auto-coding-alist'.  */);
10646   Vfile_coding_system_alist = Qnil;
10647
10648   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
10649                doc: /*
10650 Alist to decide a coding system to use for a process I/O operation.
10651 The format is ((PATTERN . VAL) ...),
10652 where PATTERN is a regular expression matching a program name,
10653 VAL is a coding system, a cons of coding systems, or a function symbol.
10654 If VAL is a coding system, it is used for both decoding what received
10655 from the program and encoding what sent to the program.
10656 If VAL is a cons of coding systems, the car part is used for decoding,
10657 and the cdr part is used for encoding.
10658 If VAL is a function symbol, the function must return a coding system
10659 or a cons of coding systems which are used as above.
10660
10661 See also the function `find-operation-coding-system'.  */);
10662   Vprocess_coding_system_alist = Qnil;
10663
10664   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
10665                doc: /*
10666 Alist to decide a coding system to use for a network I/O operation.
10667 The format is ((PATTERN . VAL) ...),
10668 where PATTERN is a regular expression matching a network service name
10669 or is a port number to connect to,
10670 VAL is a coding system, a cons of coding systems, or a function symbol.
10671 If VAL is a coding system, it is used for both decoding what received
10672 from the network stream and encoding what sent to the network stream.
10673 If VAL is a cons of coding systems, the car part is used for decoding,
10674 and the cdr part is used for encoding.
10675 If VAL is a function symbol, the function must return a coding system
10676 or a cons of coding systems which are used as above.
10677
10678 See also the function `find-operation-coding-system'.  */);
10679   Vnetwork_coding_system_alist = Qnil;
10680
10681   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
10682                doc: /* Coding system to use with system messages.
10683 Also used for decoding keyboard input on X Window system.  */);
10684   Vlocale_coding_system = Qnil;
10685
10686   /* The eol mnemonics are reset in startup.el system-dependently.  */
10687   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
10688                doc: /*
10689 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
10690   eol_mnemonic_unix = make_pure_c_string (":");
10691
10692   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
10693                doc: /*
10694 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
10695   eol_mnemonic_dos = make_pure_c_string ("\\");
10696
10697   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
10698                doc: /*
10699 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
10700   eol_mnemonic_mac = make_pure_c_string ("/");
10701
10702   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
10703                doc: /*
10704 *String displayed in mode line when end-of-line format is not yet determined.  */);
10705   eol_mnemonic_undecided = make_pure_c_string (":");
10706
10707   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
10708                doc: /*
10709 *Non-nil enables character translation while encoding and decoding.  */);
10710   Venable_character_translation = Qt;
10711
10712   DEFVAR_LISP ("standard-translation-table-for-decode",
10713                &Vstandard_translation_table_for_decode,
10714                doc: /* Table for translating characters while decoding.  */);
10715   Vstandard_translation_table_for_decode = Qnil;
10716
10717   DEFVAR_LISP ("standard-translation-table-for-encode",
10718                &Vstandard_translation_table_for_encode,
10719                doc: /* Table for translating characters while encoding.  */);
10720   Vstandard_translation_table_for_encode = Qnil;
10721
10722   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
10723                doc: /* Alist of charsets vs revision numbers.
10724 While encoding, if a charset (car part of an element) is found,
10725 designate it with the escape sequence identifying revision (cdr part
10726 of the element).  */);
10727   Vcharset_revision_table = Qnil;
10728
10729   DEFVAR_LISP ("default-process-coding-system",
10730                &Vdefault_process_coding_system,
10731                doc: /* Cons of coding systems used for process I/O by default.
10732 The car part is used for decoding a process output,
10733 the cdr part is used for encoding a text to be sent to a process.  */);
10734   Vdefault_process_coding_system = Qnil;
10735
10736   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
10737                doc: /*
10738 Table of extra Latin codes in the range 128..159 (inclusive).
10739 This is a vector of length 256.
10740 If Nth element is non-nil, the existence of code N in a file
10741 \(or output of subprocess) doesn't prevent it to be detected as
10742 a coding system of ISO 2022 variant which has a flag
10743 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
10744 or reading output of a subprocess.
10745 Only 128th through 159th elements have a meaning.  */);
10746   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
10747
10748   DEFVAR_LISP ("select-safe-coding-system-function",
10749                &Vselect_safe_coding_system_function,
10750                doc: /*
10751 Function to call to select safe coding system for encoding a text.
10752
10753 If set, this function is called to force a user to select a proper
10754 coding system which can encode the text in the case that a default
10755 coding system used in each operation can't encode the text.  The
10756 function should take care that the buffer is not modified while
10757 the coding system is being selected.
10758
10759 The default value is `select-safe-coding-system' (which see).  */);
10760   Vselect_safe_coding_system_function = Qnil;
10761
10762   DEFVAR_BOOL ("coding-system-require-warning",
10763                &coding_system_require_warning,
10764                doc: /* Internal use only.
10765 If non-nil, on writing a file, `select-safe-coding-system-function' is
10766 called even if `coding-system-for-write' is non-nil.  The command
10767 `universal-coding-system-argument' binds this variable to t temporarily.  */);
10768   coding_system_require_warning = 0;
10769
10770
10771   DEFVAR_BOOL ("inhibit-iso-escape-detection",
10772                &inhibit_iso_escape_detection,
10773                doc: /*
10774 If non-nil, Emacs ignores ISO-2022 escape sequences during code detection.
10775
10776 When Emacs reads text, it tries to detect how the text is encoded.
10777 This code detection is sensitive to escape sequences.  If Emacs sees
10778 a valid ISO-2022 escape sequence, it assumes the text is encoded in one
10779 of the ISO2022 encodings, and decodes text by the corresponding coding
10780 system (e.g. `iso-2022-7bit').
10781
10782 However, there may be a case that you want to read escape sequences in
10783 a file as is.  In such a case, you can set this variable to non-nil.
10784 Then the code detection will ignore any escape sequences, and no text is
10785 detected as encoded in some ISO-2022 encoding.  The result is that all
10786 escape sequences become visible in a buffer.
10787
10788 The default value is nil, and it is strongly recommended not to change
10789 it.  That is because many Emacs Lisp source files that contain
10790 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
10791 in Emacs's distribution, and they won't be decoded correctly on
10792 reading if you suppress escape sequence detection.
10793
10794 The other way to read escape sequences in a file without decoding is
10795 to explicitly specify some coding system that doesn't use ISO-2022
10796 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
10797   inhibit_iso_escape_detection = 0;
10798
10799   DEFVAR_BOOL ("inhibit-null-byte-detection",
10800                &inhibit_null_byte_detection,
10801                doc: /* If non-nil, Emacs ignores null bytes on code detection.
10802 By default, Emacs treats it as binary data, and does not attempt to
10803 decode it.  The effect is as if you specified `no-conversion' for
10804 reading that text.
10805
10806 Set this to non-nil when a regular text happens to include null bytes.
10807 Examples are Index nodes of Info files and null-byte delimited output
10808 from GNU Find and GNU Grep.  Emacs will then ignore the null bytes and
10809 decode text as usual.  */);
10810   inhibit_null_byte_detection = 0;
10811
10812   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
10813                doc: /* Char table for translating self-inserting characters.
10814 This is applied to the result of input methods, not their input.
10815 See also `keyboard-translate-table'.
10816
10817 Use of this variable for character code unification was rendered
10818 obsolete in Emacs 23.1 and later, since Unicode is now the basis of
10819 internal character representation.  */);
10820     Vtranslation_table_for_input = Qnil;
10821
10822   {
10823     Lisp_Object args[coding_arg_max];
10824     Lisp_Object plist[16];
10825     int i;
10826
10827     for (i = 0; i < coding_arg_max; i++)
10828       args[i] = Qnil;
10829
10830     plist[0] = intern_c_string (":name");
10831     plist[1] = args[coding_arg_name] = Qno_conversion;
10832     plist[2] = intern_c_string (":mnemonic");
10833     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
10834     plist[4] = intern_c_string (":coding-type");
10835     plist[5] = args[coding_arg_coding_type] = Qraw_text;
10836     plist[6] = intern_c_string (":ascii-compatible-p");
10837     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
10838     plist[8] = intern_c_string (":default-char");
10839     plist[9] = args[coding_arg_default_char] = make_number (0);
10840     plist[10] = intern_c_string (":for-unibyte");
10841     plist[11] = args[coding_arg_for_unibyte] = Qt;
10842     plist[12] = intern_c_string (":docstring");
10843     plist[13] = make_pure_c_string ("Do no conversion.\n\
10844 \n\
10845 When you visit a file with this coding, the file is read into a\n\
10846 unibyte buffer as is, thus each byte of a file is treated as a\n\
10847 character.");
10848     plist[14] = intern_c_string (":eol-type");
10849     plist[15] = args[coding_arg_eol_type] = Qunix;
10850     args[coding_arg_plist] = Flist (16, plist);
10851     Fdefine_coding_system_internal (coding_arg_max, args);
10852
10853     plist[1] = args[coding_arg_name] = Qundecided;
10854     plist[3] = args[coding_arg_mnemonic] = make_number ('-');
10855     plist[5] = args[coding_arg_coding_type] = Qundecided;
10856     /* This is already set.
10857        plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
10858     plist[8] = intern_c_string (":charset-list");
10859     plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
10860     plist[11] = args[coding_arg_for_unibyte] = Qnil;
10861     plist[13] = make_pure_c_string ("No conversion on encoding, automatic conversion on decoding.");
10862     plist[15] = args[coding_arg_eol_type] = Qnil;
10863     args[coding_arg_plist] = Flist (16, plist);
10864     Fdefine_coding_system_internal (coding_arg_max, args);
10865   }
10866
10867   setup_coding_system (Qno_conversion, &safe_terminal_coding);
10868
10869   {
10870     int i;
10871
10872     for (i = 0; i < coding_category_max; i++)
10873       Fset (AREF (Vcoding_category_table, i), Qno_conversion);
10874   }
10875 #if defined (DOS_NT)
10876   system_eol_type = Qdos;
10877 #else
10878   system_eol_type = Qunix;
10879 #endif
10880   staticpro (&system_eol_type);
10881 }
10882
10883 char *
10884 emacs_strerror (int error_number)
10885 {
10886   char *str;
10887
10888   synchronize_system_messages_locale ();
10889   str = strerror (error_number);
10890
10891   if (! NILP (Vlocale_coding_system))
10892     {
10893       Lisp_Object dec = code_convert_string_norecord (build_string (str),
10894                                                       Vlocale_coding_system,
10895                                                       0);
10896       str = (char *) SDATA (dec);
10897     }
10898
10899   return str;
10900 }
10901
10902 #endif /* emacs */
10903
10904 /* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
10905    (do not change this comment) */