src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 2001, 2002, 2003, 2004, 2005,
   3                  2006, 2007, 2008 Free Software Foundation, Inc.
   4    Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   5      2005, 2006, 2007, 2008
   6      National Institute of Advanced Industrial Science and Technology (AIST)
   7      Registration Number H14PRO021
   8    Copyright (C) 2003
   9      National Institute of Advanced Industrial Science and Technology (AIST)
  10      Registration Number H13PRO009
  11
  12 This file is part of GNU Emacs.
  13
  14 GNU Emacs is free software: you can redistribute it and/or modify
  15 it under the terms of the GNU General Public License as published by
  16 the Free Software Foundation, either version 3 of the License, or
  17 (at your option) any later version.
  18
  19 GNU Emacs is distributed in the hope that it will be useful,
  20 but WITHOUT ANY WARRANTY; without even the implied warranty of
  21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22 GNU General Public License for more details.
  23
  24 You should have received a copy of the GNU General Public License
  25 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  26
  27 /*** TABLE OF CONTENTS ***
  28
  29   0. General comments
  30   1. Preamble
  31   2. Emacs' internal format (emacs-utf-8) handlers
  32   3. UTF-8 handlers
  33   4. UTF-16 handlers
  34   5. Charset-base coding systems handlers
  35   6. emacs-mule (old Emacs' internal format) handlers
  36   7. ISO2022 handlers
  37   8. Shift-JIS and BIG5 handlers
  38   9. CCL handlers
  39   10. C library functions
  40   11. Emacs Lisp library functions
  41   12. Postamble
  42
  43 */
  44
  45 /*** 0. General comments ***
  46
  47
  48 CODING SYSTEM
  49
  50   A coding system is an object for an encoding mechanism that contains
  51   information about how to convert byte sequences to character
  52   sequences and vice versa.  When we say "decode", it means converting
  53   a byte sequence of a specific coding system into a character
  54   sequence that is represented by Emacs' internal coding system
  55   `emacs-utf-8', and when we say "encode", it means converting a
  56   character sequence of emacs-utf-8 to a byte sequence of a specific
  57   coding system.
  58
  59   In Emacs Lisp, a coding system is represented by a Lisp symbol.  In
  60   C level, a coding system is represented by a vector of attributes
  61   stored in the hash table Vcharset_hash_table.  The conversion from
  62   coding system symbol to attributes vector is done by looking up
  63   Vcharset_hash_table by the symbol.
  64
  65   Coding systems are classified into the following types depending on
  66   the encoding mechanism.  Here's a brief description of the types.
  67
  68   o UTF-8
  69
  70   o UTF-16
  71
  72   o Charset-base coding system
  73
  74   A coding system defined by one or more (coded) character sets.
  75   Decoding and encoding are done by a code converter defined for each
  76   character set.
  77
  78   o Old Emacs internal format (emacs-mule)
  79
  80   The coding system adopted by old versions of Emacs (20 and 21).
  81
  82   o ISO2022-base coding system
  83
  84   The most famous coding system for multiple character sets.  X's
  85   Compound Text, various EUCs (Extended Unix Code), and coding systems
  86   used in the Internet communication such as ISO-2022-JP are all
  87   variants of ISO2022.
  88
  89   o SJIS (or Shift-JIS or MS-Kanji-Code)
  90
  91   A coding system to encode character sets: ASCII, JISX0201, and
  92   JISX0208.  Widely used for PC's in Japan.  Details are described in
  93   section 8.
  94
  95   o BIG5
  96
  97   A coding system to encode character sets: ASCII and Big5.  Widely
  98   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  99   described in section 8.  In this file, when we write "big5" (all
 100   lowercase), we mean the coding system, and when we write "Big5"
 101   (capitalized), we mean the character set.
 102
 103   o CCL
 104
 105   If a user wants to decode/encode text encoded in a coding system
 106   not listed above, he can supply a decoder and an encoder for it in
 107   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 108   program while decoding/encoding.
 109
 110   o Raw-text
 111
 112   A coding system for text containing raw eight-bit data.  Emacs
 113   treats each byte of source text as a character (except for
 114   end-of-line conversion).
 115
 116   o No-conversion
 117
 118   Like raw text, but don't do end-of-line conversion.
 119
 120
 121 END-OF-LINE FORMAT
 122
 123   How text end-of-line is encoded depends on operating system.  For
 124   instance, Unix's format is just one byte of LF (line-feed) code,
 125   whereas DOS's format is two-byte sequence of `carriage-return' and
 126   `line-feed' codes.  MacOS's format is usually one byte of
 127   `carriage-return'.
 128
 129   Since text character encoding and end-of-line encoding are
 130   independent, any coding system described above can take any format
 131   of end-of-line (except for no-conversion).
 132
 133 STRUCT CODING_SYSTEM
 134
 135   Before using a coding system for code conversion (i.e. decoding and
 136   encoding), we setup a structure of type `struct coding_system'.
 137   This structure keeps various information about a specific code
 138   conversion (e.g. the location of source and destination data).
 139
 140 */
 141
 142 /* COMMON MACROS */
 143
 144
 145 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 146
 147   These functions check if a byte sequence specified as a source in
 148   CODING conforms to the format of XXX, and update the members of
 149   DETECT_INFO.
 150
 151   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 152
 153   Below is the template of these functions.  */
 154
 155 #if 0
 156 static int
 157 detect_coding_XXX (coding, detect_info)
 158      struct coding_system *coding;
 159      struct coding_detection_info *detect_info;
 160 {
 161   const unsigned char *src = coding->source;
 162   const unsigned char *src_end = coding->source + coding->src_bytes;
 163   int multibytep = coding->src_multibyte;
 164   int consumed_chars = 0;
 165   int found = 0;
 166   ...;
 167
 168   while (1)
 169     {
 170       /* Get one byte from the source.  If the souce is exausted, jump
 171          to no_more_source:.  */
 172       ONE_MORE_BYTE (c);
 173
 174       if (! __C_conforms_to_XXX___ (c))
 175         break;
 176       if (! __C_strongly_suggests_XXX__ (c))
 177         found = CATEGORY_MASK_XXX;
 178     }
 179   /* The byte sequence is invalid for XXX.  */
 180   detect_info->rejected |= CATEGORY_MASK_XXX;
 181   return 0;
 182
 183  no_more_source:
 184   /* The source exausted successfully.  */
 185   detect_info->found |= found;
 186   return 1;
 187 }
 188 #endif
 189
 190 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 191
 192   These functions decode a byte sequence specified as a source by
 193   CODING.  The resulting multibyte text goes to a place pointed to by
 194   CODING->charbuf, the length of which should not exceed
 195   CODING->charbuf_size;
 196
 197   These functions set the information of original and decoded texts in
 198   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 199   They also set CODING->result to one of CODING_RESULT_XXX indicating
 200   how the decoding is finished.
 201
 202   Below is the template of these functions.  */
 203
 204 #if 0
 205 static void
 206 decode_coding_XXXX (coding)
 207      struct coding_system *coding;
 208 {
 209   const unsigned char *src = coding->source + coding->consumed;
 210   const unsigned char *src_end = coding->source + coding->src_bytes;
 211   /* SRC_BASE remembers the start position in source in each loop.
 212      The loop will be exited when there's not enough source code, or
 213      when there's no room in CHARBUF for a decoded character.  */
 214   const unsigned char *src_base;
 215   /* A buffer to produce decoded characters.  */
 216   int *charbuf = coding->charbuf + coding->charbuf_used;
 217   int *charbuf_end = coding->charbuf + coding->charbuf_size;
 218   int multibytep = coding->src_multibyte;
 219
 220   while (1)
 221     {
 222       src_base = src;
 223       if (charbuf < charbuf_end)
 224         /* No more room to produce a decoded character.  */
 225         break;
 226       ONE_MORE_BYTE (c);
 227       /* Decode it. */
 228     }
 229
 230  no_more_source:
 231   if (src_base < src_end
 232       && coding->mode & CODING_MODE_LAST_BLOCK)
 233     /* If the source ends by partial bytes to construct a character,
 234        treat them as eight-bit raw data.  */
 235     while (src_base < src_end && charbuf < charbuf_end)
 236       *charbuf++ = *src_base++;
 237   /* Remember how many bytes and characters we consumed.  If the
 238      source is multibyte, the bytes and chars are not identical.  */
 239   coding->consumed = coding->consumed_char = src_base - coding->source;
 240   /* Remember how many characters we produced.  */
 241   coding->charbuf_used = charbuf - coding->charbuf;
 242 }
 243 #endif
 244
 245 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 246
 247   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 248   internal multibyte format by CODING.  The resulting byte sequence
 249   goes to a place pointed to by DESTINATION, the length of which
 250   should not exceed DST_BYTES.
 251
 252   These functions set the information of original and encoded texts in
 253   the members produced, produced_char, consumed, and consumed_char of
 254   the structure *CODING.  They also set the member result to one of
 255   CODING_RESULT_XXX indicating how the encoding finished.
 256
 257   DST_BYTES zero means that source area and destination area are
 258   overlapped, which means that we can produce a encoded text until it
 259   reaches at the head of not-yet-encoded source text.
 260
 261   Below is a template of these functions.  */
 262 #if 0
 263 static void
 264 encode_coding_XXX (coding)
 265      struct coding_system *coding;
 266 {
 267   int multibytep = coding->dst_multibyte;
 268   int *charbuf = coding->charbuf;
 269   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 270   unsigned char *dst = coding->destination + coding->produced;
 271   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 272   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 273   int produced_chars = 0;
 274
 275   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 276     {
 277       int c = *charbuf;
 278       /* Encode C into DST, and increment DST.  */
 279     }
 280  label_no_more_destination:
 281   /* How many chars and bytes we produced.  */
 282   coding->produced_char += produced_chars;
 283   coding->produced = dst - coding->destination;
 284 }
 285 #endif
 286
 287 \f
 288 /*** 1. Preamble ***/
 289
 290 #include <config.h>
 291 #include <stdio.h>
 292
 293 #include "lisp.h"
 294 #include "buffer.h"
 295 #include "character.h"
 296 #include "charset.h"
 297 #include "ccl.h"
 298 #include "composite.h"
 299 #include "coding.h"
 300 #include "window.h"
 301 #include "frame.h"
 302 #include "termhooks.h"
 303
 304 Lisp_Object Vcoding_system_hash_table;
 305
 306 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
 307 Lisp_Object Qunix, Qdos;
 308 extern Lisp_Object Qmac;        /* frame.c */
 309 Lisp_Object Qbuffer_file_coding_system;
 310 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 311 Lisp_Object Qdefault_char;
 312 Lisp_Object Qno_conversion, Qundecided;
 313 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
 314 Lisp_Object Qbig, Qlittle;
 315 Lisp_Object Qcoding_system_history;
 316 Lisp_Object Qvalid_codes;
 317 Lisp_Object QCcategory, QCmnemonic, QCdefalut_char;
 318 Lisp_Object QCdecode_translation_table, QCencode_translation_table;
 319 Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
 320 Lisp_Object QCascii_compatible_p;
 321
 322 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 323 Lisp_Object Qcall_process, Qcall_process_region;
 324 Lisp_Object Qstart_process, Qopen_network_stream;
 325 Lisp_Object Qtarget_idx;
 326
 327 Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
 328 Lisp_Object Qinterrupted, Qinsufficient_memory;
 329
 330 extern Lisp_Object Qcompletion_ignore_case;
 331
 332 /* If a symbol has this property, evaluate the value to define the
 333    symbol as a coding system.  */
 334 static Lisp_Object Qcoding_system_define_form;
 335
 336 int coding_system_require_warning;
 337
 338 Lisp_Object Vselect_safe_coding_system_function;
 339
 340 /* Mnemonic string for each format of end-of-line.  */
 341 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 342 /* Mnemonic string to indicate format of end-of-line is not yet
 343    decided.  */
 344 Lisp_Object eol_mnemonic_undecided;
 345
 346 /* Format of end-of-line decided by system.  This is Qunix on
 347    Unix and Mac, Qdos on DOS/Windows.
 348    This has an effect only for external encoding (i.e. for output to
 349    file and process), not for in-buffer or Lisp string encoding.  */
 350 static Lisp_Object system_eol_type;
 351
 352 #ifdef emacs
 353
 354 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 355
 356 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 357
 358 /* Coding system emacs-mule and raw-text are for converting only
 359    end-of-line format.  */
 360 Lisp_Object Qemacs_mule, Qraw_text;
 361 Lisp_Object Qutf_8_emacs;
 362
 363 /* Coding-systems are handed between Emacs Lisp programs and C internal
 364    routines by the following three variables.  */
 365 /* Coding-system for reading files and receiving data from process.  */
 366 Lisp_Object Vcoding_system_for_read;
 367 /* Coding-system for writing files and sending data to process.  */
 368 Lisp_Object Vcoding_system_for_write;
 369 /* Coding-system actually used in the latest I/O.  */
 370 Lisp_Object Vlast_coding_system_used;
 371 /* Set to non-nil when an error is detected while code conversion.  */
 372 Lisp_Object Vlast_code_conversion_error;
 373 /* A vector of length 256 which contains information about special
 374    Latin codes (especially for dealing with Microsoft codes).  */
 375 Lisp_Object Vlatin_extra_code_table;
 376
 377 /* Flag to inhibit code conversion of end-of-line format.  */
 378 int inhibit_eol_conversion;
 379
 380 /* Flag to inhibit ISO2022 escape sequence detection.  */
 381 int inhibit_iso_escape_detection;
 382
 383 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 384 int inherit_process_coding_system;
 385
 386 /* Coding system to be used to encode text for terminal display when
 387    terminal coding system is nil.  */
 388 struct coding_system safe_terminal_coding;
 389
 390 Lisp_Object Vfile_coding_system_alist;
 391 Lisp_Object Vprocess_coding_system_alist;
 392 Lisp_Object Vnetwork_coding_system_alist;
 393
 394 Lisp_Object Vlocale_coding_system;
 395
 396 #endif /* emacs */
 397
 398 /* Flag to tell if we look up translation table on character code
 399    conversion.  */
 400 Lisp_Object Venable_character_translation;
 401 /* Standard translation table to look up on decoding (reading).  */
 402 Lisp_Object Vstandard_translation_table_for_decode;
 403 /* Standard translation table to look up on encoding (writing).  */
 404 Lisp_Object Vstandard_translation_table_for_encode;
 405
 406 Lisp_Object Qtranslation_table;
 407 Lisp_Object Qtranslation_table_id;
 408 Lisp_Object Qtranslation_table_for_decode;
 409 Lisp_Object Qtranslation_table_for_encode;
 410
 411 /* Alist of charsets vs revision number.  */
 412 static Lisp_Object Vcharset_revision_table;
 413
 414 /* Default coding systems used for process I/O.  */
 415 Lisp_Object Vdefault_process_coding_system;
 416
 417 /* Char table for translating Quail and self-inserting input.  */
 418 Lisp_Object Vtranslation_table_for_input;
 419
 420 /* Two special coding systems.  */
 421 Lisp_Object Vsjis_coding_system;
 422 Lisp_Object Vbig5_coding_system;
 423
 424 /* ISO2022 section */
 425
 426 #define CODING_ISO_INITIAL(coding, reg)                 \
 427   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 428                      coding_attr_iso_initial),          \
 429                reg)))
 430
 431
 432 #define CODING_ISO_REQUEST(coding, charset_id)  \
 433   ((charset_id <= (coding)->max_charset_id      \
 434     ? (coding)->safe_charsets[charset_id]       \
 435     : -1))
 436
 437
 438 #define CODING_ISO_FLAGS(coding)        \
 439   ((coding)->spec.iso_2022.flags)
 440 #define CODING_ISO_DESIGNATION(coding, reg)     \
 441   ((coding)->spec.iso_2022.current_designation[reg])
 442 #define CODING_ISO_INVOCATION(coding, plane)    \
 443   ((coding)->spec.iso_2022.current_invocation[plane])
 444 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 445   ((coding)->spec.iso_2022.single_shifting)
 446 #define CODING_ISO_BOL(coding)  \
 447   ((coding)->spec.iso_2022.bol)
 448 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 449   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 450
 451 /* Control characters of ISO2022.  */
 452                         /* code */      /* function */
 453 #define ISO_CODE_LF     0x0A            /* line-feed */
 454 #define ISO_CODE_CR     0x0D            /* carriage-return */
 455 #define ISO_CODE_SO     0x0E            /* shift-out */
 456 #define ISO_CODE_SI     0x0F            /* shift-in */
 457 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 458 #define ISO_CODE_ESC    0x1B            /* escape */
 459 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 460 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 461 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 462
 463 /* All code (1-byte) of ISO2022 is classified into one of the
 464    followings.  */
 465 enum iso_code_class_type
 466   {
 467     ISO_control_0,              /* Control codes in the range
 468                                    0x00..0x1F and 0x7F, except for the
 469                                    following 5 codes.  */
 470     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 471     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 472     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 473     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 474     ISO_control_1,              /* Control codes in the range
 475                                    0x80..0x9F, except for the
 476                                    following 3 codes.  */
 477     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 478     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 479     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 480     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 481     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 482     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 483     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 484   };
 485
 486 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 487     `iso-flags' attribute of an iso2022 coding system.  */
 488
 489 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 490    instead of the correct short-form sequence (e.g. ESC $ A).  */
 491 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 492
 493 /* If set, reset graphic planes and registers at end-of-line to the
 494    initial state.  */
 495 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 496
 497 /* If set, reset graphic planes and registers before any control
 498    characters to the initial state.  */
 499 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 500
 501 /* If set, encode by 7-bit environment.  */
 502 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 503
 504 /* If set, use locking-shift function.  */
 505 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 506
 507 /* If set, use single-shift function.  Overwrite
 508    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 509 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 510
 511 /* If set, use designation escape sequence.  */
 512 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 513
 514 /* If set, produce revision number sequence.  */
 515 #define CODING_ISO_FLAG_REVISION        0x0080
 516
 517 /* If set, produce ISO6429's direction specifying sequence.  */
 518 #define CODING_ISO_FLAG_DIRECTION       0x0100
 519
 520 /* If set, assume designation states are reset at beginning of line on
 521    output.  */
 522 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 523
 524 /* If set, designation sequence should be placed at beginning of line
 525    on output.  */
 526 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 527
 528 /* If set, do not encode unsafe charactes on output.  */
 529 #define CODING_ISO_FLAG_SAFE            0x0800
 530
 531 /* If set, extra latin codes (128..159) are accepted as a valid code
 532    on input.  */
 533 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 534
 535 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 536
 537 #define CODING_ISO_FLAG_EUC_TW_SHIFT    0x4000
 538
 539 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 540
 541 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 542
 543 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 544
 545 /* A character to be produced on output if encoding of the original
 546    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 547 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 548
 549 /* UTF-8 section */
 550 #define CODING_UTF_8_BOM(coding)        \
 551   ((coding)->spec.utf_8_bom)
 552
 553 /* UTF-16 section */
 554 #define CODING_UTF_16_BOM(coding)       \
 555   ((coding)->spec.utf_16.bom)
 556
 557 #define CODING_UTF_16_ENDIAN(coding)    \
 558   ((coding)->spec.utf_16.endian)
 559
 560 #define CODING_UTF_16_SURROGATE(coding) \
 561   ((coding)->spec.utf_16.surrogate)
 562
 563
 564 /* CCL section */
 565 #define CODING_CCL_DECODER(coding)      \
 566   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 567 #define CODING_CCL_ENCODER(coding)      \
 568   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 569 #define CODING_CCL_VALIDS(coding)                                          \
 570   (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
 571
 572 /* Index for each coding category in `coding_categories' */
 573
 574 enum coding_category
 575   {
 576     coding_category_iso_7,
 577     coding_category_iso_7_tight,
 578     coding_category_iso_8_1,
 579     coding_category_iso_8_2,
 580     coding_category_iso_7_else,
 581     coding_category_iso_8_else,
 582     coding_category_utf_8_auto,
 583     coding_category_utf_8_nosig,
 584     coding_category_utf_8_sig,
 585     coding_category_utf_16_auto,
 586     coding_category_utf_16_be,
 587     coding_category_utf_16_le,
 588     coding_category_utf_16_be_nosig,
 589     coding_category_utf_16_le_nosig,
 590     coding_category_charset,
 591     coding_category_sjis,
 592     coding_category_big5,
 593     coding_category_ccl,
 594     coding_category_emacs_mule,
 595     /* All above are targets of code detection.  */
 596     coding_category_raw_text,
 597     coding_category_undecided,
 598     coding_category_max
 599   };
 600
 601 /* Definitions of flag bits used in detect_coding_XXXX.  */
 602 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 603 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 604 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 605 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 606 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 607 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 608 #define CATEGORY_MASK_UTF_8_AUTO        (1 << coding_category_utf_8_auto)
 609 #define CATEGORY_MASK_UTF_8_NOSIG       (1 << coding_category_utf_8_nosig)
 610 #define CATEGORY_MASK_UTF_8_SIG         (1 << coding_category_utf_8_sig)
 611 #define CATEGORY_MASK_UTF_16_AUTO       (1 << coding_category_utf_16_auto)
 612 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 613 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 614 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 615 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 616 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 617 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 618 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 619 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 620 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 621 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 622
 623 /* This value is returned if detect_coding_mask () find nothing other
 624    than ASCII characters.  */
 625 #define CATEGORY_MASK_ANY               \
 626   (CATEGORY_MASK_ISO_7                  \
 627    | CATEGORY_MASK_ISO_7_TIGHT          \
 628    | CATEGORY_MASK_ISO_8_1              \
 629    | CATEGORY_MASK_ISO_8_2              \
 630    | CATEGORY_MASK_ISO_7_ELSE           \
 631    | CATEGORY_MASK_ISO_8_ELSE           \
 632    | CATEGORY_MASK_UTF_8_AUTO           \
 633    | CATEGORY_MASK_UTF_8_NOSIG          \
 634    | CATEGORY_MASK_UTF_8_SIG            \
 635    | CATEGORY_MASK_UTF_16_AUTO          \
 636    | CATEGORY_MASK_UTF_16_BE            \
 637    | CATEGORY_MASK_UTF_16_LE            \
 638    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 639    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 640    | CATEGORY_MASK_CHARSET              \
 641    | CATEGORY_MASK_SJIS                 \
 642    | CATEGORY_MASK_BIG5                 \
 643    | CATEGORY_MASK_CCL                  \
 644    | CATEGORY_MASK_EMACS_MULE)
 645
 646
 647 #define CATEGORY_MASK_ISO_7BIT \
 648   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 649
 650 #define CATEGORY_MASK_ISO_8BIT \
 651   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 652
 653 #define CATEGORY_MASK_ISO_ELSE \
 654   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 655
 656 #define CATEGORY_MASK_ISO_ESCAPE        \
 657   (CATEGORY_MASK_ISO_7                  \
 658    | CATEGORY_MASK_ISO_7_TIGHT          \
 659    | CATEGORY_MASK_ISO_7_ELSE           \
 660    | CATEGORY_MASK_ISO_8_ELSE)
 661
 662 #define CATEGORY_MASK_ISO       \
 663   (  CATEGORY_MASK_ISO_7BIT     \
 664      | CATEGORY_MASK_ISO_8BIT   \
 665      | CATEGORY_MASK_ISO_ELSE)
 666
 667 #define CATEGORY_MASK_UTF_16            \
 668   (CATEGORY_MASK_UTF_16_AUTO            \
 669    | CATEGORY_MASK_UTF_16_BE            \
 670    | CATEGORY_MASK_UTF_16_LE            \
 671    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 672    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 673
 674 #define CATEGORY_MASK_UTF_8     \
 675   (CATEGORY_MASK_UTF_8_AUTO     \
 676    | CATEGORY_MASK_UTF_8_NOSIG  \
 677    | CATEGORY_MASK_UTF_8_SIG)
 678
 679 /* List of symbols `coding-category-xxx' ordered by priority.  This
 680    variable is exposed to Emacs Lisp.  */
 681 static Lisp_Object Vcoding_category_list;
 682
 683 /* Table of coding categories (Lisp symbols).  This variable is for
 684    internal use oly.  */
 685 static Lisp_Object Vcoding_category_table;
 686
 687 /* Table of coding-categories ordered by priority.  */
 688 static enum coding_category coding_priorities[coding_category_max];
 689
 690 /* Nth element is a coding context for the coding system bound to the
 691    Nth coding category.  */
 692 static struct coding_system coding_categories[coding_category_max];
 693
 694 /*** Commonly used macros and functions ***/
 695
 696 #ifndef min
 697 #define min(a, b) ((a) < (b) ? (a) : (b))
 698 #endif
 699 #ifndef max
 700 #define max(a, b) ((a) > (b) ? (a) : (b))
 701 #endif
 702
 703 #define CODING_GET_INFO(coding, attrs, charset_list)    \
 704   do {                                                  \
 705     (attrs) = CODING_ID_ATTRS ((coding)->id);           \
 706     (charset_list) = CODING_ATTR_CHARSET_LIST (attrs);  \
 707   } while (0)
 708
 709
 710 /* Safely get one byte from the source text pointed by SRC which ends
 711    at SRC_END, and set C to that byte.  If there are not enough bytes
 712    in the source, it jumps to `no_more_source'.  If multibytep is
 713    nonzero, and a multibyte character is found at SRC, set C to the
 714    negative value of the character code.  The caller should declare
 715    and set these variables appropriately in advance:
 716         src, src_end, multibytep */
 717
 718 #define ONE_MORE_BYTE(c)                                \
 719   do {                                                  \
 720     if (src == src_end)                                 \
 721       {                                                 \
 722         if (src_base < src)                             \
 723           record_conversion_result                      \
 724             (coding, CODING_RESULT_INSUFFICIENT_SRC);   \
 725         goto no_more_source;                            \
 726       }                                                 \
 727     c = *src++;                                         \
 728     if (multibytep && (c & 0x80))                       \
 729       {                                                 \
 730         if ((c & 0xFE) == 0xC0)                         \
 731           c = ((c & 1) << 6) | *src++;                  \
 732         else                                            \
 733           {                                             \
 734             src--;                                      \
 735             c = - string_char (src, &src, NULL);        \
 736             record_conversion_result                    \
 737               (coding, CODING_RESULT_INVALID_SRC);      \
 738           }                                             \
 739       }                                                 \
 740     consumed_chars++;                                   \
 741   } while (0)
 742
 743
 744 #define ONE_MORE_BYTE_NO_CHECK(c)                       \
 745   do {                                                  \
 746     c = *src++;                                         \
 747     if (multibytep && (c & 0x80))                       \
 748       {                                                 \
 749         if ((c & 0xFE) == 0xC0)                         \
 750           c = ((c & 1) << 6) | *src++;                  \
 751         else                                            \
 752           {                                             \
 753             src--;                                      \
 754             c = - string_char (src, &src, NULL);        \
 755             record_conversion_result                    \
 756               (coding, CODING_RESULT_INVALID_SRC);      \
 757           }                                             \
 758       }                                                 \
 759     consumed_chars++;                                   \
 760   } while (0)
 761
 762
 763 /* Store a byte C in the place pointed by DST and increment DST to the
 764    next free point, and increment PRODUCED_CHARS.  The caller should
 765    assure that C is 0..127, and declare and set the variable `dst'
 766    appropriately in advance.
 767 */
 768
 769
 770 #define EMIT_ONE_ASCII_BYTE(c)  \
 771   do {                          \
 772     produced_chars++;           \
 773     *dst++ = (c);               \
 774   } while (0)
 775
 776
 777 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2.  */
 778
 779 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 780   do {                                  \
 781     produced_chars += 2;                \
 782     *dst++ = (c1), *dst++ = (c2);       \
 783   } while (0)
 784
 785
 786 /* Store a byte C in the place pointed by DST and increment DST to the
 787    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 788    nonzero, store in an appropriate multibyte from.  The caller should
 789    declare and set the variables `dst' and `multibytep' appropriately
 790    in advance.  */
 791
 792 #define EMIT_ONE_BYTE(c)                \
 793   do {                                  \
 794     produced_chars++;                   \
 795     if (multibytep)                     \
 796       {                                 \
 797         int ch = (c);                   \
 798         if (ch >= 0x80)                 \
 799           ch = BYTE8_TO_CHAR (ch);      \
 800         CHAR_STRING_ADVANCE (ch, dst);  \
 801       }                                 \
 802     else                                \
 803       *dst++ = (c);                     \
 804   } while (0)
 805
 806
 807 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 808
 809 #define EMIT_TWO_BYTES(c1, c2)          \
 810   do {                                  \
 811     produced_chars += 2;                \
 812     if (multibytep)                     \
 813       {                                 \
 814         int ch;                         \
 815                                         \
 816         ch = (c1);                      \
 817         if (ch >= 0x80)                 \
 818           ch = BYTE8_TO_CHAR (ch);      \
 819         CHAR_STRING_ADVANCE (ch, dst);  \
 820         ch = (c2);                      \
 821         if (ch >= 0x80)                 \
 822           ch = BYTE8_TO_CHAR (ch);      \
 823         CHAR_STRING_ADVANCE (ch, dst);  \
 824       }                                 \
 825     else                                \
 826       {                                 \
 827         *dst++ = (c1);                  \
 828         *dst++ = (c2);                  \
 829       }                                 \
 830   } while (0)
 831
 832
 833 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 834   do {                                  \
 835     EMIT_ONE_BYTE (c1);                 \
 836     EMIT_TWO_BYTES (c2, c3);            \
 837   } while (0)
 838
 839
 840 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 841   do {                                          \
 842     EMIT_TWO_BYTES (c1, c2);                    \
 843     EMIT_TWO_BYTES (c3, c4);                    \
 844   } while (0)
 845
 846
 847 /* Prototypes for static functions.  */
 848 static void record_conversion_result P_ ((struct coding_system *coding,
 849                                           enum coding_result_code result));
 850 static int detect_coding_utf_8 P_ ((struct coding_system *,
 851                                     struct coding_detection_info *info));
 852 static void decode_coding_utf_8 P_ ((struct coding_system *));
 853 static int encode_coding_utf_8 P_ ((struct coding_system *));
 854
 855 static int detect_coding_utf_16 P_ ((struct coding_system *,
 856                                      struct coding_detection_info *info));
 857 static void decode_coding_utf_16 P_ ((struct coding_system *));
 858 static int encode_coding_utf_16 P_ ((struct coding_system *));
 859
 860 static int detect_coding_iso_2022 P_ ((struct coding_system *,
 861                                        struct coding_detection_info *info));
 862 static void decode_coding_iso_2022 P_ ((struct coding_system *));
 863 static int encode_coding_iso_2022 P_ ((struct coding_system *));
 864
 865 static int detect_coding_emacs_mule P_ ((struct coding_system *,
 866                                          struct coding_detection_info *info));
 867 static void decode_coding_emacs_mule P_ ((struct coding_system *));
 868 static int encode_coding_emacs_mule P_ ((struct coding_system *));
 869
 870 static int detect_coding_sjis P_ ((struct coding_system *,
 871                                    struct coding_detection_info *info));
 872 static void decode_coding_sjis P_ ((struct coding_system *));
 873 static int encode_coding_sjis P_ ((struct coding_system *));
 874
 875 static int detect_coding_big5 P_ ((struct coding_system *,
 876                                    struct coding_detection_info *info));
 877 static void decode_coding_big5 P_ ((struct coding_system *));
 878 static int encode_coding_big5 P_ ((struct coding_system *));
 879
 880 static int detect_coding_ccl P_ ((struct coding_system *,
 881                                   struct coding_detection_info *info));
 882 static void decode_coding_ccl P_ ((struct coding_system *));
 883 static int encode_coding_ccl P_ ((struct coding_system *));
 884
 885 static void decode_coding_raw_text P_ ((struct coding_system *));
 886 static int encode_coding_raw_text P_ ((struct coding_system *));
 887
 888 static void coding_set_source P_ ((struct coding_system *));
 889 static void coding_set_destination P_ ((struct coding_system *));
 890 static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT));
 891 static void coding_alloc_by_making_gap P_ ((struct coding_system *,
 892                                             EMACS_INT, EMACS_INT));
 893 static unsigned char *alloc_destination P_ ((struct coding_system *,
 894                                              EMACS_INT, unsigned char *));
 895 static void setup_iso_safe_charsets P_ ((Lisp_Object));
 896 static unsigned char *encode_designation_at_bol P_ ((struct coding_system *,
 897                                                      int *, int *,
 898                                                      unsigned char *));
 899 static int detect_eol P_ ((const unsigned char *,
 900                            EMACS_INT, enum coding_category));
 901 static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int));
 902 static void decode_eol P_ ((struct coding_system *));
 903 static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *));
 904 static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *,
 905                                         int, int *, int *));
 906 static int produce_chars P_ ((struct coding_system *, Lisp_Object, int));
 907 static INLINE void produce_composition P_ ((struct coding_system *, int *,
 908                                             EMACS_INT));
 909 static INLINE void produce_charset P_ ((struct coding_system *, int *,
 910                                         EMACS_INT));
 911 static void produce_annotation P_ ((struct coding_system *, EMACS_INT));
 912 static int decode_coding P_ ((struct coding_system *));
 913 static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT,
 914                                                       struct coding_system *,
 915                                                       int *, EMACS_INT *));
 916 static INLINE int *handle_charset_annotation P_ ((EMACS_INT, EMACS_INT,
 917                                                   struct coding_system *,
 918                                                   int *, EMACS_INT *));
 919 static void consume_chars P_ ((struct coding_system *, Lisp_Object, int));
 920 static int encode_coding P_ ((struct coding_system *));
 921 static Lisp_Object make_conversion_work_buffer P_ ((int));
 922 static Lisp_Object code_conversion_restore P_ ((Lisp_Object));
 923 static INLINE int char_encodable_p P_ ((int, Lisp_Object));
 924 static Lisp_Object make_subsidiaries P_ ((Lisp_Object));
 925
 926 static void
 927 record_conversion_result (struct coding_system *coding,
 928                           enum coding_result_code result)
 929 {
 930   coding->result = result;
 931   switch (result)
 932     {
 933     case CODING_RESULT_INSUFFICIENT_SRC:
 934       Vlast_code_conversion_error = Qinsufficient_source;
 935       break;
 936     case CODING_RESULT_INCONSISTENT_EOL:
 937       Vlast_code_conversion_error = Qinconsistent_eol;
 938       break;
 939     case CODING_RESULT_INVALID_SRC:
 940       Vlast_code_conversion_error = Qinvalid_source;
 941       break;
 942     case CODING_RESULT_INTERRUPT:
 943       Vlast_code_conversion_error = Qinterrupted;
 944       break;
 945     case CODING_RESULT_INSUFFICIENT_MEM:
 946       Vlast_code_conversion_error = Qinsufficient_memory;
 947       break;
 948     default:
 949       Vlast_code_conversion_error = intern ("Unknown error");
 950     }
 951 }
 952
 953 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
 954   do {                                                                       \
 955     charset_map_loaded = 0;                                                  \
 956     c = DECODE_CHAR (charset, code);                                         \
 957     if (charset_map_loaded)                                                  \
 958       {                                                                      \
 959         const unsigned char *orig = coding->source;                          \
 960         EMACS_INT offset;                                                    \
 961                                                                              \
 962         coding_set_source (coding);                                          \
 963         offset = coding->source - orig;                                      \
 964         src += offset;                                                       \
 965         src_base += offset;                                                  \
 966         src_end += offset;                                                   \
 967       }                                                                      \
 968   } while (0)
 969
 970
 971 /* If there are at least BYTES length of room at dst, allocate memory
 972    for coding->destination and update dst and dst_end.  We don't have
 973    to take care of coding->source which will be relocated.  It is
 974    handled by calling coding_set_source in encode_coding.  */
 975
 976 #define ASSURE_DESTINATION(bytes)                               \
 977   do {                                                          \
 978     if (dst + (bytes) >= dst_end)                               \
 979       {                                                         \
 980         int more_bytes = charbuf_end - charbuf + (bytes);       \
 981                                                                 \
 982         dst = alloc_destination (coding, more_bytes, dst);      \
 983         dst_end = coding->destination + coding->dst_bytes;      \
 984       }                                                         \
 985   } while (0)
 986
 987
 988 /* Store multibyte form of the character C in P, and advance P to the
 989    end of the multibyte form.  This is like CHAR_STRING_ADVANCE but it
 990    never calls MAYBE_UNIFY_CHAR.  */
 991
 992 #define CHAR_STRING_ADVANCE_NO_UNIFY(c, p)      \
 993   do {                                          \
 994     if ((c) <= MAX_1_BYTE_CHAR)                 \
 995       *(p)++ = (c);                             \
 996     else if ((c) <= MAX_2_BYTE_CHAR)            \
 997       *(p)++ = (0xC0 | ((c) >> 6)),             \
 998         *(p)++ = (0x80 | ((c) & 0x3F));         \
 999     else if ((c) <= MAX_3_BYTE_CHAR)            \
1000       *(p)++ = (0xE0 | ((c) >> 12)),            \
1001         *(p)++ = (0x80 | (((c) >> 6) & 0x3F)),  \
1002         *(p)++ = (0x80 | ((c) & 0x3F));         \
1003     else if ((c) <= MAX_4_BYTE_CHAR)            \
1004       *(p)++ = (0xF0 | (c >> 18)),              \
1005         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1006         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1007         *(p)++ = (0x80 | (c & 0x3F));           \
1008     else if ((c) <= MAX_5_BYTE_CHAR)            \
1009       *(p)++ = 0xF8,                            \
1010         *(p)++ = (0x80 | ((c >> 18) & 0x0F)),   \
1011         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1012         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1013         *(p)++ = (0x80 | (c & 0x3F));           \
1014     else                                        \
1015       (p) += BYTE8_STRING ((c) - 0x3FFF80, p);  \
1016   } while (0)
1017
1018
1019 /* Return the character code of character whose multibyte form is at
1020    P, and advance P to the end of the multibyte form.  This is like
1021    STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR.  */
1022
1023 #define STRING_CHAR_ADVANCE_NO_UNIFY(p)                         \
1024   (!((p)[0] & 0x80)                                             \
1025    ? *(p)++                                                     \
1026    : ! ((p)[0] & 0x20)                                          \
1027    ? ((p) += 2,                                                 \
1028       ((((p)[-2] & 0x1F) << 6)                                  \
1029        | ((p)[-1] & 0x3F)                                       \
1030        | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))    \
1031    : ! ((p)[0] & 0x10)                                          \
1032    ? ((p) += 3,                                                 \
1033       ((((p)[-3] & 0x0F) << 12)                                 \
1034        | (((p)[-2] & 0x3F) << 6)                                \
1035        | ((p)[-1] & 0x3F)))                                     \
1036    : ! ((p)[0] & 0x08)                                          \
1037    ? ((p) += 4,                                                 \
1038       ((((p)[-4] & 0xF) << 18)                                  \
1039        | (((p)[-3] & 0x3F) << 12)                               \
1040        | (((p)[-2] & 0x3F) << 6)                                \
1041        | ((p)[-1] & 0x3F)))                                     \
1042    : ((p) += 5,                                                 \
1043       ((((p)[-4] & 0x3F) << 18)                                 \
1044        | (((p)[-3] & 0x3F) << 12)                               \
1045        | (((p)[-2] & 0x3F) << 6)                                \
1046        | ((p)[-1] & 0x3F))))
1047
1048
1049 static void
1050 coding_set_source (coding)
1051      struct coding_system *coding;
1052 {
1053   if (BUFFERP (coding->src_object))
1054     {
1055       struct buffer *buf = XBUFFER (coding->src_object);
1056
1057       if (coding->src_pos < 0)
1058         coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
1059       else
1060         coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
1061     }
1062   else if (STRINGP (coding->src_object))
1063     {
1064       coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
1065     }
1066   else
1067     /* Otherwise, the source is C string and is never relocated
1068        automatically.  Thus we don't have to update anything.  */
1069     ;
1070 }
1071
1072 static void
1073 coding_set_destination (coding)
1074      struct coding_system *coding;
1075 {
1076   if (BUFFERP (coding->dst_object))
1077     {
1078       if (coding->src_pos < 0)
1079         {
1080           coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
1081           coding->dst_bytes = (GAP_END_ADDR
1082                                - (coding->src_bytes - coding->consumed)
1083                                - coding->destination);
1084         }
1085       else
1086         {
1087           /* We are sure that coding->dst_pos_byte is before the gap
1088              of the buffer. */
1089           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
1090                                  + coding->dst_pos_byte - BEG_BYTE);
1091           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
1092                                - coding->destination);
1093         }
1094     }
1095   else
1096     /* Otherwise, the destination is C string and is never relocated
1097        automatically.  Thus we don't have to update anything.  */
1098     ;
1099 }
1100
1101
1102 static void
1103 coding_alloc_by_realloc (coding, bytes)
1104      struct coding_system *coding;
1105      EMACS_INT bytes;
1106 {
1107   coding->destination = (unsigned char *) xrealloc (coding->destination,
1108                                                     coding->dst_bytes + bytes);
1109   coding->dst_bytes += bytes;
1110 }
1111
1112 static void
1113 coding_alloc_by_making_gap (coding, gap_head_used, bytes)
1114      struct coding_system *coding;
1115      EMACS_INT gap_head_used, bytes;
1116 {
1117   if (EQ (coding->src_object, coding->dst_object))
1118     {
1119       /* The gap may contain the produced data at the head and not-yet
1120          consumed data at the tail.  To preserve those data, we at
1121          first make the gap size to zero, then increase the gap
1122          size.  */
1123       EMACS_INT add = GAP_SIZE;
1124
1125       GPT += gap_head_used, GPT_BYTE += gap_head_used;
1126       GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
1127       make_gap (bytes);
1128       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
1129       GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
1130     }
1131   else
1132     {
1133       Lisp_Object this_buffer;
1134
1135       this_buffer = Fcurrent_buffer ();
1136       set_buffer_internal (XBUFFER (coding->dst_object));
1137       make_gap (bytes);
1138       set_buffer_internal (XBUFFER (this_buffer));
1139     }
1140 }
1141
1142
1143 static unsigned char *
1144 alloc_destination (coding, nbytes, dst)
1145      struct coding_system *coding;
1146      EMACS_INT nbytes;
1147      unsigned char *dst;
1148 {
1149   EMACS_INT offset = dst - coding->destination;
1150
1151   if (BUFFERP (coding->dst_object))
1152     {
1153       struct buffer *buf = XBUFFER (coding->dst_object);
1154
1155       coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
1156     }
1157   else
1158     coding_alloc_by_realloc (coding, nbytes);
1159   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1160   coding_set_destination (coding);
1161   dst = coding->destination + offset;
1162   return dst;
1163 }
1164
1165 /** Macros for annotations.  */
1166
1167 /* Maximum length of annotation data (sum of annotations for
1168    composition and charset).  */
1169 #define MAX_ANNOTATION_LENGTH (4 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 4)
1170
1171 /* An annotation data is stored in the array coding->charbuf in this
1172    format:
1173      [ -LENGTH ANNOTATION_MASK NCHARS ... ]
1174    LENGTH is the number of elements in the annotation.
1175    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1176    NCHARS is the number of characters in the text annotated.
1177
1178    The format of the following elements depend on ANNOTATION_MASK.
1179
1180    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1181    follows:
1182      ... METHOD [ COMPOSITION-COMPONENTS ... ]
1183    METHOD is one of enum composition_method.
1184    Optionnal COMPOSITION-COMPONENTS are characters and composition
1185    rules.
1186
1187    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1188    follows.  */
1189
1190 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars)     \
1191   do {                                                  \
1192     *(buf)++ = -(len);                                  \
1193     *(buf)++ = (mask);                                  \
1194     *(buf)++ = (nchars);                                \
1195     coding->annotated = 1;                              \
1196   } while (0);
1197
1198 #define ADD_COMPOSITION_DATA(buf, nchars, method)                           \
1199   do {                                                                      \
1200     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1201     *buf++ = method;                                                        \
1202   } while (0)
1203
1204
1205 #define ADD_CHARSET_DATA(buf, nchars, id)                               \
1206   do {                                                                  \
1207     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1208     *buf++ = id;                                                        \
1209   } while (0)
1210
1211 \f
1212 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1213
1214
1215
1216 \f
1217 /*** 3. UTF-8 ***/
1218
1219 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1220    Check if a text is encoded in UTF-8.  If it is, return 1, else
1221    return 0.  */
1222
1223 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1224 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1225 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1226 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1227 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1228 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1229
1230 #define UTF_BOM 0xFEFF
1231 #define UTF_8_BOM_1 0xEF
1232 #define UTF_8_BOM_2 0xBB
1233 #define UTF_8_BOM_3 0xBF
1234
1235 static int
1236 detect_coding_utf_8 (coding, detect_info)
1237      struct coding_system *coding;
1238      struct coding_detection_info *detect_info;
1239 {
1240   const unsigned char *src = coding->source, *src_base;
1241   const unsigned char *src_end = coding->source + coding->src_bytes;
1242   int multibytep = coding->src_multibyte;
1243   int consumed_chars = 0;
1244   int bom_found = 0;
1245   int found = 0;
1246
1247   detect_info->checked |= CATEGORY_MASK_UTF_8;
1248   /* A coding system of this category is always ASCII compatible.  */
1249   src += coding->head_ascii;
1250
1251   while (1)
1252     {
1253       int c, c1, c2, c3, c4;
1254
1255       src_base = src;
1256       ONE_MORE_BYTE (c);
1257       if (c < 0 || UTF_8_1_OCTET_P (c))
1258         continue;
1259       ONE_MORE_BYTE (c1);
1260       if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
1261         break;
1262       if (UTF_8_2_OCTET_LEADING_P (c))
1263         {
1264           found = 1;
1265           continue;
1266         }
1267       ONE_MORE_BYTE (c2);
1268       if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1269         break;
1270       if (UTF_8_3_OCTET_LEADING_P (c))
1271         {
1272           found = 1;
1273           if (src_base == coding->source
1274               && c == UTF_8_BOM_1 && c1 == UTF_8_BOM_2 && c2 == UTF_8_BOM_3)
1275             bom_found = 1;
1276           continue;
1277         }
1278       ONE_MORE_BYTE (c3);
1279       if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1280         break;
1281       if (UTF_8_4_OCTET_LEADING_P (c))
1282         {
1283           found = 1;
1284           continue;
1285         }
1286       ONE_MORE_BYTE (c4);
1287       if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1288         break;
1289       if (UTF_8_5_OCTET_LEADING_P (c))
1290         {
1291           found = 1;
1292           continue;
1293         }
1294       break;
1295     }
1296   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1297   return 0;
1298
1299  no_more_source:
1300   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1301     {
1302       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1303       return 0;
1304     }
1305   if (bom_found)
1306     {
1307       /* The first character 0xFFFE doesn't necessarily mean a BOM.  */
1308       detect_info->found |= CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG;
1309     }
1310   else
1311     {
1312       detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG;
1313       detect_info->found |= CATEGORY_MASK_UTF_8_NOSIG;
1314     }
1315   return 1;
1316 }
1317
1318
1319 static void
1320 decode_coding_utf_8 (coding)
1321      struct coding_system *coding;
1322 {
1323   const unsigned char *src = coding->source + coding->consumed;
1324   const unsigned char *src_end = coding->source + coding->src_bytes;
1325   const unsigned char *src_base;
1326   int *charbuf = coding->charbuf + coding->charbuf_used;
1327   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1328   int consumed_chars = 0, consumed_chars_base;
1329   int multibytep = coding->src_multibyte;
1330   enum utf_bom_type bom = CODING_UTF_8_BOM (coding);
1331   Lisp_Object attr, charset_list;
1332   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1333   int byte_after_cr = -1;
1334
1335   CODING_GET_INFO (coding, attr, charset_list);
1336
1337   if (bom != utf_without_bom)
1338     {
1339       int c1, c2, c3;
1340
1341       src_base = src;
1342       ONE_MORE_BYTE (c1);
1343       if (! UTF_8_3_OCTET_LEADING_P (c1))
1344         src = src_base;
1345       else
1346         {
1347           ONE_MORE_BYTE (c2);
1348           if (! UTF_8_EXTRA_OCTET_P (c2))
1349             src = src_base;
1350           else
1351             {
1352               ONE_MORE_BYTE (c3);
1353               if (! UTF_8_EXTRA_OCTET_P (c3))
1354                 src = src_base;
1355               else
1356                 {
1357                   if ((c1 != UTF_8_BOM_1)
1358                       || (c2 != UTF_8_BOM_2) || (c3 != UTF_8_BOM_3))
1359                     src = src_base;
1360                   else
1361                     CODING_UTF_8_BOM (coding) = utf_without_bom;
1362                 }
1363             }
1364         }
1365     }
1366   CODING_UTF_8_BOM (coding) = utf_without_bom;
1367
1368
1369
1370   while (1)
1371     {
1372       int c, c1, c2, c3, c4, c5;
1373
1374       src_base = src;
1375       consumed_chars_base = consumed_chars;
1376
1377       if (charbuf >= charbuf_end)
1378         break;
1379
1380       if (byte_after_cr >= 0)
1381         c1 = byte_after_cr, byte_after_cr = -1;
1382       else
1383         ONE_MORE_BYTE (c1);
1384       if (c1 < 0)
1385         {
1386           c = - c1;
1387         }
1388       else if (UTF_8_1_OCTET_P(c1))
1389         {
1390           if (eol_crlf && c1 == '\r')
1391             ONE_MORE_BYTE (byte_after_cr);
1392           c = c1;
1393         }
1394       else
1395         {
1396           ONE_MORE_BYTE (c2);
1397           if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1398             goto invalid_code;
1399           if (UTF_8_2_OCTET_LEADING_P (c1))
1400             {
1401               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1402               /* Reject overlong sequences here and below.  Encoders
1403                  producing them are incorrect, they can be misleading,
1404                  and they mess up read/write invariance.  */
1405               if (c < 128)
1406                 goto invalid_code;
1407             }
1408           else
1409             {
1410               ONE_MORE_BYTE (c3);
1411               if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1412                 goto invalid_code;
1413               if (UTF_8_3_OCTET_LEADING_P (c1))
1414                 {
1415                   c = (((c1 & 0xF) << 12)
1416                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1417                   if (c < 0x800
1418                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1419                     goto invalid_code;
1420                 }
1421               else
1422                 {
1423                   ONE_MORE_BYTE (c4);
1424                   if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1425                     goto invalid_code;
1426                   if (UTF_8_4_OCTET_LEADING_P (c1))
1427                     {
1428                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1429                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1430                     if (c < 0x10000)
1431                       goto invalid_code;
1432                     }
1433                   else
1434                     {
1435                       ONE_MORE_BYTE (c5);
1436                       if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
1437                         goto invalid_code;
1438                       if (UTF_8_5_OCTET_LEADING_P (c1))
1439                         {
1440                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1441                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1442                                | (c5 & 0x3F));
1443                           if ((c > MAX_CHAR) || (c < 0x200000))
1444                             goto invalid_code;
1445                         }
1446                       else
1447                         goto invalid_code;
1448                     }
1449                 }
1450             }
1451         }
1452
1453       *charbuf++ = c;
1454       continue;
1455
1456     invalid_code:
1457       src = src_base;
1458       consumed_chars = consumed_chars_base;
1459       ONE_MORE_BYTE (c);
1460       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1461       coding->errors++;
1462     }
1463
1464  no_more_source:
1465   coding->consumed_char += consumed_chars_base;
1466   coding->consumed = src_base - coding->source;
1467   coding->charbuf_used = charbuf - coding->charbuf;
1468 }
1469
1470
1471 static int
1472 encode_coding_utf_8 (coding)
1473      struct coding_system *coding;
1474 {
1475   int multibytep = coding->dst_multibyte;
1476   int *charbuf = coding->charbuf;
1477   int *charbuf_end = charbuf + coding->charbuf_used;
1478   unsigned char *dst = coding->destination + coding->produced;
1479   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1480   int produced_chars = 0;
1481   int c;
1482
1483   if (CODING_UTF_8_BOM (coding) == utf_with_bom)
1484     {
1485       ASSURE_DESTINATION (3);
1486       EMIT_THREE_BYTES (UTF_8_BOM_1, UTF_8_BOM_2, UTF_8_BOM_3);
1487       CODING_UTF_8_BOM (coding) = utf_without_bom;
1488     }
1489
1490   if (multibytep)
1491     {
1492       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1493
1494       while (charbuf < charbuf_end)
1495         {
1496           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1497
1498           ASSURE_DESTINATION (safe_room);
1499           c = *charbuf++;
1500           if (CHAR_BYTE8_P (c))
1501             {
1502               c = CHAR_TO_BYTE8 (c);
1503               EMIT_ONE_BYTE (c);
1504             }
1505           else
1506             {
1507               CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
1508               for (p = str; p < pend; p++)
1509                 EMIT_ONE_BYTE (*p);
1510             }
1511         }
1512     }
1513   else
1514     {
1515       int safe_room = MAX_MULTIBYTE_LENGTH;
1516
1517       while (charbuf < charbuf_end)
1518         {
1519           ASSURE_DESTINATION (safe_room);
1520           c = *charbuf++;
1521           if (CHAR_BYTE8_P (c))
1522             *dst++ = CHAR_TO_BYTE8 (c);
1523           else
1524             CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
1525           produced_chars++;
1526         }
1527     }
1528   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1529   coding->produced_char += produced_chars;
1530   coding->produced = dst - coding->destination;
1531   return 0;
1532 }
1533
1534
1535 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1536    Check if a text is encoded in one of UTF-16 based coding systems.
1537    If it is, return 1, else return 0.  */
1538
1539 #define UTF_16_HIGH_SURROGATE_P(val) \
1540   (((val) & 0xFC00) == 0xD800)
1541
1542 #define UTF_16_LOW_SURROGATE_P(val) \
1543   (((val) & 0xFC00) == 0xDC00)
1544
1545 #define UTF_16_INVALID_P(val)   \
1546   (((val) == 0xFFFE)            \
1547    || ((val) == 0xFFFF)         \
1548    || UTF_16_LOW_SURROGATE_P (val))
1549
1550
1551 static int
1552 detect_coding_utf_16 (coding, detect_info)
1553      struct coding_system *coding;
1554      struct coding_detection_info *detect_info;
1555 {
1556   const unsigned char *src = coding->source, *src_base = src;
1557   const unsigned char *src_end = coding->source + coding->src_bytes;
1558   int multibytep = coding->src_multibyte;
1559   int consumed_chars = 0;
1560   int c1, c2;
1561
1562   detect_info->checked |= CATEGORY_MASK_UTF_16;
1563   if (coding->mode & CODING_MODE_LAST_BLOCK
1564       && (coding->src_chars & 1))
1565     {
1566       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1567       return 0;
1568     }
1569
1570   ONE_MORE_BYTE (c1);
1571   ONE_MORE_BYTE (c2);
1572   if ((c1 == 0xFF) && (c2 == 0xFE))
1573     {
1574       detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1575                              | CATEGORY_MASK_UTF_16_AUTO);
1576       detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1577                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1578                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1579     }
1580   else if ((c1 == 0xFE) && (c2 == 0xFF))
1581     {
1582       detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1583                              | CATEGORY_MASK_UTF_16_AUTO);
1584       detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1585                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1586                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1587     }
1588   else
1589     {
1590       /* We check the dispersion of Eth and Oth bytes where E is even and
1591          O is odd.  If both are high, we assume binary data.*/
1592       unsigned char e[256], o[256];
1593       unsigned e_num = 1, o_num = 1;
1594
1595       memset (e, 0, 256);
1596       memset (o, 0, 256);
1597       e[c1] = 1;
1598       o[c2] = 1;
1599
1600       detect_info->rejected
1601         |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
1602
1603       while (1)
1604         {
1605           ONE_MORE_BYTE (c1);
1606           ONE_MORE_BYTE (c2);
1607           if (! e[c1])
1608             {
1609               e[c1] = 1;
1610               e_num++;
1611               if (e_num >= 128)
1612                 break;
1613             }
1614           if (! o[c2])
1615             {
1616               o[c1] = 1;
1617               o_num++;
1618               if (o_num >= 128)
1619                 break;
1620             }
1621         }
1622       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1623       return 0;
1624     }
1625
1626  no_more_source:
1627   return 1;
1628 }
1629
1630 static void
1631 decode_coding_utf_16 (coding)
1632      struct coding_system *coding;
1633 {
1634   const unsigned char *src = coding->source + coding->consumed;
1635   const unsigned char *src_end = coding->source + coding->src_bytes;
1636   const unsigned char *src_base;
1637   int *charbuf = coding->charbuf + coding->charbuf_used;
1638   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1639   int consumed_chars = 0, consumed_chars_base;
1640   int multibytep = coding->src_multibyte;
1641   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1642   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1643   int surrogate = CODING_UTF_16_SURROGATE (coding);
1644   Lisp_Object attr, charset_list;
1645   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1646   int byte_after_cr1 = -1, byte_after_cr2 = -1;
1647
1648   CODING_GET_INFO (coding, attr, charset_list);
1649
1650   if (bom == utf_with_bom)
1651     {
1652       int c, c1, c2;
1653
1654       src_base = src;
1655       ONE_MORE_BYTE (c1);
1656       ONE_MORE_BYTE (c2);
1657       c = (c1 << 8) | c2;
1658
1659       if (endian == utf_16_big_endian
1660           ? c != 0xFEFF : c != 0xFFFE)
1661         {
1662           /* The first two bytes are not BOM.  Treat them as bytes
1663              for a normal character.  */
1664           src = src_base;
1665           coding->errors++;
1666         }
1667       CODING_UTF_16_BOM (coding) = utf_without_bom;
1668     }
1669   else if (bom == utf_detect_bom)
1670     {
1671       /* We have already tried to detect BOM and failed in
1672          detect_coding.  */
1673       CODING_UTF_16_BOM (coding) = utf_without_bom;
1674     }
1675
1676   while (1)
1677     {
1678       int c, c1, c2;
1679
1680       src_base = src;
1681       consumed_chars_base = consumed_chars;
1682
1683       if (charbuf + 2 >= charbuf_end)
1684         break;
1685
1686       if (byte_after_cr1 >= 0)
1687         c1 = byte_after_cr1, byte_after_cr1 = -1;
1688       else
1689         ONE_MORE_BYTE (c1);
1690       if (c1 < 0)
1691         {
1692           *charbuf++ = -c1;
1693           continue;
1694         }
1695       if (byte_after_cr2 >= 0)
1696         c2 = byte_after_cr2, byte_after_cr2 = -1;
1697       else
1698         ONE_MORE_BYTE (c2);
1699       if (c2 < 0)
1700         {
1701           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1702           *charbuf++ = -c2;
1703           continue;
1704         }
1705       c = (endian == utf_16_big_endian
1706            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1707
1708       if (surrogate)
1709         {
1710           if (! UTF_16_LOW_SURROGATE_P (c))
1711             {
1712               if (endian == utf_16_big_endian)
1713                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1714               else
1715                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1716               *charbuf++ = c1;
1717               *charbuf++ = c2;
1718               coding->errors++;
1719               if (UTF_16_HIGH_SURROGATE_P (c))
1720                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1721               else
1722                 *charbuf++ = c;
1723             }
1724           else
1725             {
1726               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1727               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1728               *charbuf++ = 0x10000 + c;
1729             }
1730         }
1731       else
1732         {
1733           if (UTF_16_HIGH_SURROGATE_P (c))
1734             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1735           else
1736             {
1737               if (eol_crlf && c == '\r')
1738                 {
1739                   ONE_MORE_BYTE (byte_after_cr1);
1740                   ONE_MORE_BYTE (byte_after_cr2);
1741                 }
1742               *charbuf++ = c;
1743             }
1744         }
1745     }
1746
1747  no_more_source:
1748   coding->consumed_char += consumed_chars_base;
1749   coding->consumed = src_base - coding->source;
1750   coding->charbuf_used = charbuf - coding->charbuf;
1751 }
1752
1753 static int
1754 encode_coding_utf_16 (coding)
1755      struct coding_system *coding;
1756 {
1757   int multibytep = coding->dst_multibyte;
1758   int *charbuf = coding->charbuf;
1759   int *charbuf_end = charbuf + coding->charbuf_used;
1760   unsigned char *dst = coding->destination + coding->produced;
1761   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1762   int safe_room = 8;
1763   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1764   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1765   int produced_chars = 0;
1766   Lisp_Object attrs, charset_list;
1767   int c;
1768
1769   CODING_GET_INFO (coding, attrs, charset_list);
1770
1771   if (bom != utf_without_bom)
1772     {
1773       ASSURE_DESTINATION (safe_room);
1774       if (big_endian)
1775         EMIT_TWO_BYTES (0xFE, 0xFF);
1776       else
1777         EMIT_TWO_BYTES (0xFF, 0xFE);
1778       CODING_UTF_16_BOM (coding) = utf_without_bom;
1779     }
1780
1781   while (charbuf < charbuf_end)
1782     {
1783       ASSURE_DESTINATION (safe_room);
1784       c = *charbuf++;
1785       if (c >= MAX_UNICODE_CHAR)
1786         c = coding->default_char;
1787
1788       if (c < 0x10000)
1789         {
1790           if (big_endian)
1791             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1792           else
1793             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1794         }
1795       else
1796         {
1797           int c1, c2;
1798
1799           c -= 0x10000;
1800           c1 = (c >> 10) + 0xD800;
1801           c2 = (c & 0x3FF) + 0xDC00;
1802           if (big_endian)
1803             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1804           else
1805             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1806         }
1807     }
1808   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1809   coding->produced = dst - coding->destination;
1810   coding->produced_char += produced_chars;
1811   return 0;
1812 }
1813
1814 \f
1815 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1816
1817 /* Emacs' internal format for representation of multiple character
1818    sets is a kind of multi-byte encoding, i.e. characters are
1819    represented by variable-length sequences of one-byte codes.
1820
1821    ASCII characters and control characters (e.g. `tab', `newline') are
1822    represented by one-byte sequences which are their ASCII codes, in
1823    the range 0x00 through 0x7F.
1824
1825    8-bit characters of the range 0x80..0x9F are represented by
1826    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1827    code + 0x20).
1828
1829    8-bit characters of the range 0xA0..0xFF are represented by
1830    one-byte sequences which are their 8-bit code.
1831
1832    The other characters are represented by a sequence of `base
1833    leading-code', optional `extended leading-code', and one or two
1834    `position-code's.  The length of the sequence is determined by the
1835    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1836    whereas extended leading-code and position-code take the range 0xA0
1837    through 0xFF.  See `charset.h' for more details about leading-code
1838    and position-code.
1839
1840    --- CODE RANGE of Emacs' internal format ---
1841    character set        range
1842    -------------        -----
1843    ascii                0x00..0x7F
1844    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1845    eight-bit-graphic    0xA0..0xBF
1846    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1847    ---------------------------------------------
1848
1849    As this is the internal character representation, the format is
1850    usually not used externally (i.e. in a file or in a data sent to a
1851    process).  But, it is possible to have a text externally in this
1852    format (i.e. by encoding by the coding system `emacs-mule').
1853
1854    In that case, a sequence of one-byte codes has a slightly different
1855    form.
1856
1857    At first, all characters in eight-bit-control are represented by
1858    one-byte sequences which are their 8-bit code.
1859
1860    Next, character composition data are represented by the byte
1861    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1862    where,
1863         METHOD is 0xF0 plus one of composition method (enum
1864         composition_method),
1865
1866         BYTES is 0xA0 plus a byte length of this composition data,
1867
1868         CHARS is 0x20 plus a number of characters composed by this
1869         data,
1870
1871         COMPONENTs are characters of multibye form or composition
1872         rules encoded by two-byte of ASCII codes.
1873
1874    In addition, for backward compatibility, the following formats are
1875    also recognized as composition data on decoding.
1876
1877    0x80 MSEQ ...
1878    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1879
1880    Here,
1881         MSEQ is a multibyte form but in these special format:
1882           ASCII: 0xA0 ASCII_CODE+0x80,
1883           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1884         RULE is a one byte code of the range 0xA0..0xF0 that
1885         represents a composition rule.
1886   */
1887
1888 char emacs_mule_bytes[256];
1889
1890 int
1891 emacs_mule_char (coding, src, nbytes, nchars, id)
1892      struct coding_system *coding;
1893      const unsigned char *src;
1894      int *nbytes, *nchars, *id;
1895 {
1896   const unsigned char *src_end = coding->source + coding->src_bytes;
1897   const unsigned char *src_base = src;
1898   int multibytep = coding->src_multibyte;
1899   struct charset *charset;
1900   unsigned code;
1901   int c;
1902   int consumed_chars = 0;
1903
1904   ONE_MORE_BYTE (c);
1905   if (c < 0)
1906     {
1907       c = -c;
1908       charset = emacs_mule_charset[0];
1909     }
1910   else
1911     {
1912       if (c >= 0xA0)
1913         {
1914           /* Old style component character of a composition.  */
1915           if (c == 0xA0)
1916             {
1917               ONE_MORE_BYTE (c);
1918               c -= 0x80;
1919             }
1920           else
1921             c -= 0x20;
1922         }
1923
1924       switch (emacs_mule_bytes[c])
1925         {
1926         case 2:
1927           if (! (charset = emacs_mule_charset[c]))
1928             goto invalid_code;
1929           ONE_MORE_BYTE (c);
1930           if (c < 0xA0)
1931             goto invalid_code;
1932           code = c & 0x7F;
1933           break;
1934
1935         case 3:
1936           if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1937               || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
1938             {
1939               ONE_MORE_BYTE (c);
1940               if (c < 0xA0 || ! (charset = emacs_mule_charset[c]))
1941                 goto invalid_code;
1942               ONE_MORE_BYTE (c);
1943               if (c < 0xA0)
1944                 goto invalid_code;
1945               code = c & 0x7F;
1946             }
1947           else
1948             {
1949               if (! (charset = emacs_mule_charset[c]))
1950                 goto invalid_code;
1951               ONE_MORE_BYTE (c);
1952               if (c < 0xA0)
1953                 goto invalid_code;
1954               code = (c & 0x7F) << 8;
1955               ONE_MORE_BYTE (c);
1956               if (c < 0xA0)
1957                 goto invalid_code;
1958               code |= c & 0x7F;
1959             }
1960           break;
1961
1962         case 4:
1963           ONE_MORE_BYTE (c);
1964           if (c < 0 || ! (charset = emacs_mule_charset[c]))
1965             goto invalid_code;
1966           ONE_MORE_BYTE (c);
1967           if (c < 0xA0)
1968             goto invalid_code;
1969           code = (c & 0x7F) << 8;
1970           ONE_MORE_BYTE (c);
1971           if (c < 0xA0)
1972             goto invalid_code;
1973           code |= c & 0x7F;
1974           break;
1975
1976         case 1:
1977           code = c;
1978           charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1979                                      ? charset_ascii : charset_eight_bit);
1980           break;
1981
1982         default:
1983           abort ();
1984         }
1985       c = DECODE_CHAR (charset, code);
1986       if (c < 0)
1987         goto invalid_code;
1988     }
1989   *nbytes = src - src_base;
1990   *nchars = consumed_chars;
1991   if (id)
1992     *id = charset->id;
1993   return c;
1994
1995  no_more_source:
1996   return -2;
1997
1998  invalid_code:
1999   return -1;
2000 }
2001
2002
2003 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2004    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
2005    else return 0.  */
2006
2007 static int
2008 detect_coding_emacs_mule (coding, detect_info)
2009      struct coding_system *coding;
2010      struct coding_detection_info *detect_info;
2011 {
2012   const unsigned char *src = coding->source, *src_base;
2013   const unsigned char *src_end = coding->source + coding->src_bytes;
2014   int multibytep = coding->src_multibyte;
2015   int consumed_chars = 0;
2016   int c;
2017   int found = 0;
2018
2019   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
2020   /* A coding system of this category is always ASCII compatible.  */
2021   src += coding->head_ascii;
2022
2023   while (1)
2024     {
2025       src_base = src;
2026       ONE_MORE_BYTE (c);
2027       if (c < 0)
2028         continue;
2029       if (c == 0x80)
2030         {
2031           /* Perhaps the start of composite character.  We simple skip
2032              it because analyzing it is too heavy for detecting.  But,
2033              at least, we check that the composite character
2034              constitutes of more than 4 bytes.  */
2035           const unsigned char *src_base;
2036
2037         repeat:
2038           src_base = src;
2039           do
2040             {
2041               ONE_MORE_BYTE (c);
2042             }
2043           while (c >= 0xA0);
2044
2045           if (src - src_base <= 4)
2046             break;
2047           found = CATEGORY_MASK_EMACS_MULE;
2048           if (c == 0x80)
2049             goto repeat;
2050         }
2051
2052       if (c < 0x80)
2053         {
2054           if (c < 0x20
2055               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
2056             break;
2057         }
2058       else
2059         {
2060           int more_bytes = emacs_mule_bytes[*src_base] - 1;
2061
2062           while (more_bytes > 0)
2063             {
2064               ONE_MORE_BYTE (c);
2065               if (c < 0xA0)
2066                 {
2067                   src--;        /* Unread the last byte.  */
2068                   break;
2069                 }
2070               more_bytes--;
2071             }
2072           if (more_bytes != 0)
2073             break;
2074           found = CATEGORY_MASK_EMACS_MULE;
2075         }
2076     }
2077   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2078   return 0;
2079
2080  no_more_source:
2081   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
2082     {
2083       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2084       return 0;
2085     }
2086   detect_info->found |= found;
2087   return 1;
2088 }
2089
2090
2091 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2092
2093 /* Decode a character represented as a component of composition
2094    sequence of Emacs 20/21 style at SRC.  Set C to that character and
2095    update SRC to the head of next character (or an encoded composition
2096    rule).  If SRC doesn't points a composition component, set C to -1.
2097    If SRC points an invalid byte sequence, global exit by a return
2098    value 0.  */
2099
2100 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf)                 \
2101   do                                                            \
2102     {                                                           \
2103       int c;                                                    \
2104       int nbytes, nchars;                                       \
2105                                                                 \
2106       if (src == src_end)                                       \
2107         break;                                                  \
2108       c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
2109       if (c < 0)                                                \
2110         {                                                       \
2111           if (c == -2)                                          \
2112             break;                                              \
2113           goto invalid_code;                                    \
2114         }                                                       \
2115       *buf++ = c;                                               \
2116       src += nbytes;                                            \
2117       consumed_chars += nchars;                                 \
2118     }                                                           \
2119   while (0)
2120
2121
2122 /* Decode a composition rule represented as a component of composition
2123    sequence of Emacs 20 style at SRC.  Store the decoded rule in *BUF,
2124    and increment BUF.  If SRC points an invalid byte sequence, set C
2125    to -1.  */
2126
2127 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf)      \
2128   do {                                                  \
2129     int c, gref, nref;                                  \
2130                                                         \
2131     if (src >= src_end)                                 \
2132       goto invalid_code;                                \
2133     ONE_MORE_BYTE_NO_CHECK (c);                         \
2134     c -= 0xA0;                                          \
2135     if (c < 0 || c >= 81)                               \
2136       goto invalid_code;                                \
2137                                                         \
2138     gref = c / 9, nref = c % 9;                         \
2139     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
2140   } while (0)
2141
2142
2143 /* Decode a composition rule represented as a component of composition
2144    sequence of Emacs 21 style at SRC.  Store the decoded rule in *BUF,
2145    and increment BUF.  If SRC points an invalid byte sequence, set C
2146    to -1.  */
2147
2148 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf)      \
2149   do {                                                  \
2150     int gref, nref;                                     \
2151                                                         \
2152     if (src + 1>= src_end)                              \
2153       goto invalid_code;                                \
2154     ONE_MORE_BYTE_NO_CHECK (gref);                      \
2155     gref -= 0x20;                                       \
2156     ONE_MORE_BYTE_NO_CHECK (nref);                      \
2157     nref -= 0x20;                                       \
2158     if (gref < 0 || gref >= 81                          \
2159         || nref < 0 || nref >= 81)                      \
2160       goto invalid_code;                                \
2161     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
2162   } while (0)
2163
2164
2165 #define DECODE_EMACS_MULE_21_COMPOSITION(c)                             \
2166   do {                                                                  \
2167     /* Emacs 21 style format.  The first three bytes at SRC are         \
2168        (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is  \
2169        the byte length of this composition information, CHARS is the    \
2170        number of characters composed by this composition.  */           \
2171     enum composition_method method = c - 0xF2;                          \
2172     int *charbuf_base = charbuf;                                        \
2173     int consumed_chars_limit;                                           \
2174     int nbytes, nchars;                                                 \
2175                                                                         \
2176     ONE_MORE_BYTE (c);                                                  \
2177     if (c < 0)                                                          \
2178       goto invalid_code;                                                \
2179     nbytes = c - 0xA0;                                                  \
2180     if (nbytes < 3)                                                     \
2181       goto invalid_code;                                                \
2182     ONE_MORE_BYTE (c);                                                  \
2183     if (c < 0)                                                          \
2184       goto invalid_code;                                                \
2185     nchars = c - 0xA0;                                                  \
2186     ADD_COMPOSITION_DATA (charbuf, nchars, method);                     \
2187     consumed_chars_limit = consumed_chars_base + nbytes;                \
2188     if (method != COMPOSITION_RELATIVE)                                 \
2189       {                                                                 \
2190         int i = 0;                                                      \
2191         while (consumed_chars < consumed_chars_limit)                   \
2192           {                                                             \
2193             if (i % 2 && method != COMPOSITION_WITH_ALTCHARS)           \
2194               DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf);          \
2195             else                                                        \
2196               DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf);             \
2197             i++;                                                        \
2198           }                                                             \
2199         if (consumed_chars < consumed_chars_limit)                      \
2200           goto invalid_code;                                            \
2201         charbuf_base[0] -= i;                                           \
2202       }                                                                 \
2203   } while (0)
2204
2205
2206 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c)                    \
2207   do {                                                                  \
2208     /* Emacs 20 style format for relative composition.  */              \
2209     /* Store multibyte form of characters to be composed.  */           \
2210     enum composition_method method = COMPOSITION_RELATIVE;              \
2211     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];                 \
2212     int *buf = components;                                              \
2213     int i, j;                                                           \
2214                                                                         \
2215     src = src_base;                                                     \
2216     ONE_MORE_BYTE (c);          /* skip 0x80 */                         \
2217     for (i = 0; *src >= 0xA0 && i < MAX_COMPOSITION_COMPONENTS; i++)    \
2218       DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                         \
2219     if (i < 2)                                                          \
2220       goto invalid_code;                                                \
2221     ADD_COMPOSITION_DATA (charbuf, i, method);                          \
2222     for (j = 0; j < i; j++)                                             \
2223       *charbuf++ = components[j];                                       \
2224   } while (0)
2225
2226
2227 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c)            \
2228   do {                                                          \
2229     /* Emacs 20 style format for rule-base composition.  */     \
2230     /* Store multibyte form of characters to be composed.  */   \
2231     enum composition_method method = COMPOSITION_WITH_RULE;     \
2232     int *charbuf_base = charbuf;                                \
2233     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
2234     int *buf = components;                                      \
2235     int i, j;                                                   \
2236                                                                 \
2237     DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                   \
2238     for (i = 1; i < MAX_COMPOSITION_COMPONENTS; i++)            \
2239       {                                                         \
2240         if (*src < 0xA0)                                        \
2241           break;                                                \
2242         DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf);            \
2243         DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);               \
2244       }                                                         \
2245     if (i <= 1 || (buf - components) % 2 == 0)                  \
2246       goto invalid_code;                                        \
2247     if (charbuf + i + (i / 2) + 1 >= charbuf_end)               \
2248       goto no_more_source;                                      \
2249     ADD_COMPOSITION_DATA (charbuf, i, method);                  \
2250     i = i * 2 - 1;                                              \
2251     for (j = 0; j < i; j++)                                     \
2252       *charbuf++ = components[j];                               \
2253     charbuf_base[0] -= i;                                       \
2254     for (j = 0; j < i; j += 2)                                  \
2255       *charbuf++ = components[j];                               \
2256   } while (0)
2257
2258
2259 static void
2260 decode_coding_emacs_mule (coding)
2261      struct coding_system *coding;
2262 {
2263   const unsigned char *src = coding->source + coding->consumed;
2264   const unsigned char *src_end = coding->source + coding->src_bytes;
2265   const unsigned char *src_base;
2266   int *charbuf = coding->charbuf + coding->charbuf_used;
2267   int *charbuf_end
2268     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
2269   int consumed_chars = 0, consumed_chars_base;
2270   int multibytep = coding->src_multibyte;
2271   Lisp_Object attrs, charset_list;
2272   int char_offset = coding->produced_char;
2273   int last_offset = char_offset;
2274   int last_id = charset_ascii;
2275   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
2276   int byte_after_cr = -1;
2277
2278   CODING_GET_INFO (coding, attrs, charset_list);
2279
2280   while (1)
2281     {
2282       int c;
2283
2284       src_base = src;
2285       consumed_chars_base = consumed_chars;
2286
2287       if (charbuf >= charbuf_end)
2288         break;
2289
2290       if (byte_after_cr >= 0)
2291         c = byte_after_cr, byte_after_cr = -1;
2292       else
2293         ONE_MORE_BYTE (c);
2294       if (c < 0)
2295         {
2296           *charbuf++ = -c;
2297           char_offset++;
2298         }
2299       else if (c < 0x80)
2300         {
2301           if (eol_crlf && c == '\r')
2302             ONE_MORE_BYTE (byte_after_cr);
2303           *charbuf++ = c;
2304           char_offset++;
2305         }
2306       else if (c == 0x80)
2307         {
2308           ONE_MORE_BYTE (c);
2309           if (c < 0)
2310             goto invalid_code;
2311           if (c - 0xF2 >= COMPOSITION_RELATIVE
2312               && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
2313             DECODE_EMACS_MULE_21_COMPOSITION (c);
2314           else if (c < 0xC0)
2315             DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
2316           else if (c == 0xFF)
2317             DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
2318           else
2319             goto invalid_code;
2320         }
2321       else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
2322         {
2323           int nbytes, nchars;
2324           int id;
2325
2326           src = src_base;
2327           consumed_chars = consumed_chars_base;
2328           c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
2329           if (c < 0)
2330             {
2331               if (c == -2)
2332                 break;
2333               goto invalid_code;
2334             }
2335           if (last_id != id)
2336             {
2337               if (last_id != charset_ascii)
2338                 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2339               last_id = id;
2340               last_offset = char_offset;
2341             }
2342           *charbuf++ = c;
2343           src += nbytes;
2344           consumed_chars += nchars;
2345           char_offset++;
2346         }
2347       else
2348         goto invalid_code;
2349       continue;
2350
2351     invalid_code:
2352       src = src_base;
2353       consumed_chars = consumed_chars_base;
2354       ONE_MORE_BYTE (c);
2355       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2356       char_offset++;
2357       coding->errors++;
2358     }
2359
2360  no_more_source:
2361   if (last_id != charset_ascii)
2362     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2363   coding->consumed_char += consumed_chars_base;
2364   coding->consumed = src_base - coding->source;
2365   coding->charbuf_used = charbuf - coding->charbuf;
2366 }
2367
2368
2369 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2370   do {                                          \
2371     if (id < 0xA0)                              \
2372       codes[0] = id, codes[1] = 0;              \
2373     else if (id < 0xE0)                         \
2374       codes[0] = 0x9A, codes[1] = id;           \
2375     else if (id < 0xF0)                         \
2376       codes[0] = 0x9B, codes[1] = id;           \
2377     else if (id < 0xF5)                         \
2378       codes[0] = 0x9C, codes[1] = id;           \
2379     else                                        \
2380       codes[0] = 0x9D, codes[1] = id;           \
2381   } while (0);
2382
2383
2384 static int
2385 encode_coding_emacs_mule (coding)
2386      struct coding_system *coding;
2387 {
2388   int multibytep = coding->dst_multibyte;
2389   int *charbuf = coding->charbuf;
2390   int *charbuf_end = charbuf + coding->charbuf_used;
2391   unsigned char *dst = coding->destination + coding->produced;
2392   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2393   int safe_room = 8;
2394   int produced_chars = 0;
2395   Lisp_Object attrs, charset_list;
2396   int c;
2397   int preferred_charset_id = -1;
2398
2399   CODING_GET_INFO (coding, attrs, charset_list);
2400   if (! EQ (charset_list, Vemacs_mule_charset_list))
2401     {
2402       CODING_ATTR_CHARSET_LIST (attrs)
2403         = charset_list = Vemacs_mule_charset_list;
2404     }
2405
2406   while (charbuf < charbuf_end)
2407     {
2408       ASSURE_DESTINATION (safe_room);
2409       c = *charbuf++;
2410
2411       if (c < 0)
2412         {
2413           /* Handle an annotation.  */
2414           switch (*charbuf)
2415             {
2416             case CODING_ANNOTATE_COMPOSITION_MASK:
2417               /* Not yet implemented.  */
2418               break;
2419             case CODING_ANNOTATE_CHARSET_MASK:
2420               preferred_charset_id = charbuf[3];
2421               if (preferred_charset_id >= 0
2422                   && NILP (Fmemq (make_number (preferred_charset_id),
2423                                   charset_list)))
2424                 preferred_charset_id = -1;
2425               break;
2426             default:
2427               abort ();
2428             }
2429           charbuf += -c - 1;
2430           continue;
2431         }
2432
2433       if (ASCII_CHAR_P (c))
2434         EMIT_ONE_ASCII_BYTE (c);
2435       else if (CHAR_BYTE8_P (c))
2436         {
2437           c = CHAR_TO_BYTE8 (c);
2438           EMIT_ONE_BYTE (c);
2439         }
2440       else
2441         {
2442           struct charset *charset;
2443           unsigned code;
2444           int dimension;
2445           int emacs_mule_id;
2446           unsigned char leading_codes[2];
2447
2448           if (preferred_charset_id >= 0)
2449             {
2450               charset = CHARSET_FROM_ID (preferred_charset_id);
2451               if (! CHAR_CHARSET_P (c, charset))
2452                 charset = char_charset (c, charset_list, NULL);
2453             }
2454           else
2455             charset = char_charset (c, charset_list, &code);
2456           if (! charset)
2457             {
2458               c = coding->default_char;
2459               if (ASCII_CHAR_P (c))
2460                 {
2461                   EMIT_ONE_ASCII_BYTE (c);
2462                   continue;
2463                 }
2464               charset = char_charset (c, charset_list, &code);
2465             }
2466           dimension = CHARSET_DIMENSION (charset);
2467           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2468           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2469           EMIT_ONE_BYTE (leading_codes[0]);
2470           if (leading_codes[1])
2471             EMIT_ONE_BYTE (leading_codes[1]);
2472           if (dimension == 1)
2473             EMIT_ONE_BYTE (code | 0x80);
2474           else
2475             {
2476               code |= 0x8080;
2477               EMIT_ONE_BYTE (code >> 8);
2478               EMIT_ONE_BYTE (code & 0xFF);
2479             }
2480         }
2481     }
2482   record_conversion_result (coding, CODING_RESULT_SUCCESS);
2483   coding->produced_char += produced_chars;
2484   coding->produced = dst - coding->destination;
2485   return 0;
2486 }
2487
2488 \f
2489 /*** 7. ISO2022 handlers ***/
2490
2491 /* The following note describes the coding system ISO2022 briefly.
2492    Since the intention of this note is to help understand the
2493    functions in this file, some parts are NOT ACCURATE or are OVERLY
2494    SIMPLIFIED.  For thorough understanding, please refer to the
2495    original document of ISO2022.  This is equivalent to the standard
2496    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2497
2498    ISO2022 provides many mechanisms to encode several character sets
2499    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2500    is encoded using bytes less than 128.  This may make the encoded
2501    text a little bit longer, but the text passes more easily through
2502    several types of gateway, some of which strip off the MSB (Most
2503    Significant Bit).
2504
2505    There are two kinds of character sets: control character sets and
2506    graphic character sets.  The former contain control characters such
2507    as `newline' and `escape' to provide control functions (control
2508    functions are also provided by escape sequences).  The latter
2509    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2510    two control character sets and many graphic character sets.
2511
2512    Graphic character sets are classified into one of the following
2513    four classes, according to the number of bytes (DIMENSION) and
2514    number of characters in one dimension (CHARS) of the set:
2515    - DIMENSION1_CHARS94
2516    - DIMENSION1_CHARS96
2517    - DIMENSION2_CHARS94
2518    - DIMENSION2_CHARS96
2519
2520    In addition, each character set is assigned an identification tag,
2521    unique for each set, called the "final character" (denoted as <F>
2522    hereafter).  The <F> of each character set is decided by ECMA(*)
2523    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2524    (0x30..0x3F are for private use only).
2525
2526    Note (*): ECMA = European Computer Manufacturers Association
2527
2528    Here are examples of graphic character sets [NAME(<F>)]:
2529         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2530         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2531         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2532         o DIMENSION2_CHARS96 -- none for the moment
2533
2534    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2535         C0 [0x00..0x1F] -- control character plane 0
2536         GL [0x20..0x7F] -- graphic character plane 0
2537         C1 [0x80..0x9F] -- control character plane 1
2538         GR [0xA0..0xFF] -- graphic character plane 1
2539
2540    A control character set is directly designated and invoked to C0 or
2541    C1 by an escape sequence.  The most common case is that:
2542    - ISO646's  control character set is designated/invoked to C0, and
2543    - ISO6429's control character set is designated/invoked to C1,
2544    and usually these designations/invocations are omitted in encoded
2545    text.  In a 7-bit environment, only C0 can be used, and a control
2546    character for C1 is encoded by an appropriate escape sequence to
2547    fit into the environment.  All control characters for C1 are
2548    defined to have corresponding escape sequences.
2549
2550    A graphic character set is at first designated to one of four
2551    graphic registers (G0 through G3), then these graphic registers are
2552    invoked to GL or GR.  These designations and invocations can be
2553    done independently.  The most common case is that G0 is invoked to
2554    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2555    these invocations and designations are omitted in encoded text.
2556    In a 7-bit environment, only GL can be used.
2557
2558    When a graphic character set of CHARS94 is invoked to GL, codes
2559    0x20 and 0x7F of the GL area work as control characters SPACE and
2560    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2561    be used.
2562
2563    There are two ways of invocation: locking-shift and single-shift.
2564    With locking-shift, the invocation lasts until the next different
2565    invocation, whereas with single-shift, the invocation affects the
2566    following character only and doesn't affect the locking-shift
2567    state.  Invocations are done by the following control characters or
2568    escape sequences:
2569
2570    ----------------------------------------------------------------------
2571    abbrev  function                  cntrl escape seq   description
2572    ----------------------------------------------------------------------
2573    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2574    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2575    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2576    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2577    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2578    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2579    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2580    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2581    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2582    ----------------------------------------------------------------------
2583    (*) These are not used by any known coding system.
2584
2585    Control characters for these functions are defined by macros
2586    ISO_CODE_XXX in `coding.h'.
2587
2588    Designations are done by the following escape sequences:
2589    ----------------------------------------------------------------------
2590    escape sequence      description
2591    ----------------------------------------------------------------------
2592    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2593    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2594    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2595    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2596    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2597    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2598    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2599    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2600    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2601    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2602    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2603    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2604    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2605    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2606    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2607    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2608    ----------------------------------------------------------------------
2609
2610    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2611    of dimension 1, chars 94, and final character <F>, etc...
2612
2613    Note (*): Although these designations are not allowed in ISO2022,
2614    Emacs accepts them on decoding, and produces them on encoding
2615    CHARS96 character sets in a coding system which is characterized as
2616    7-bit environment, non-locking-shift, and non-single-shift.
2617
2618    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2619    '(' must be omitted.  We refer to this as "short-form" hereafter.
2620
2621    Now you may notice that there are a lot of ways of encoding the
2622    same multilingual text in ISO2022.  Actually, there exist many
2623    coding systems such as Compound Text (used in X11's inter client
2624    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2625    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2626    localized platforms), and all of these are variants of ISO2022.
2627
2628    In addition to the above, Emacs handles two more kinds of escape
2629    sequences: ISO6429's direction specification and Emacs' private
2630    sequence for specifying character composition.
2631
2632    ISO6429's direction specification takes the following form:
2633         o CSI ']'      -- end of the current direction
2634         o CSI '0' ']'  -- end of the current direction
2635         o CSI '1' ']'  -- start of left-to-right text
2636         o CSI '2' ']'  -- start of right-to-left text
2637    The control character CSI (0x9B: control sequence introducer) is
2638    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2639
2640    Character composition specification takes the following form:
2641         o ESC '0' -- start relative composition
2642         o ESC '1' -- end composition
2643         o ESC '2' -- start rule-base composition (*)
2644         o ESC '3' -- start relative composition with alternate chars  (**)
2645         o ESC '4' -- start rule-base composition with alternate chars  (**)
2646   Since these are not standard escape sequences of any ISO standard,
2647   the use of them with these meanings is restricted to Emacs only.
2648
2649   (*) This form is used only in Emacs 20.7 and older versions,
2650   but newer versions can safely decode it.
2651   (**) This form is used only in Emacs 21.1 and newer versions,
2652   and older versions can't decode it.
2653
2654   Here's a list of example usages of these composition escape
2655   sequences (categorized by `enum composition_method').
2656
2657   COMPOSITION_RELATIVE:
2658         ESC 0 CHAR [ CHAR ] ESC 1
2659   COMPOSITION_WITH_RULE:
2660         ESC 2 CHAR [ RULE CHAR ] ESC 1
2661   COMPOSITION_WITH_ALTCHARS:
2662         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2663   COMPOSITION_WITH_RULE_ALTCHARS:
2664         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2665
2666 enum iso_code_class_type iso_code_class[256];
2667
2668 #define SAFE_CHARSET_P(coding, id)      \
2669   ((id) <= (coding)->max_charset_id     \
2670    && (coding)->safe_charsets[id] >= 0)
2671
2672
2673 #define SHIFT_OUT_OK(category)  \
2674   (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2675
2676 static void
2677 setup_iso_safe_charsets (attrs)
2678      Lisp_Object attrs;
2679 {
2680   Lisp_Object charset_list, safe_charsets;
2681   Lisp_Object request;
2682   Lisp_Object reg_usage;
2683   Lisp_Object tail;
2684   int reg94, reg96;
2685   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2686   int max_charset_id;
2687
2688   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2689   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2690       && ! EQ (charset_list, Viso_2022_charset_list))
2691     {
2692       CODING_ATTR_CHARSET_LIST (attrs)
2693         = charset_list = Viso_2022_charset_list;
2694       ASET (attrs, coding_attr_safe_charsets, Qnil);
2695     }
2696
2697   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2698     return;
2699
2700   max_charset_id = 0;
2701   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2702     {
2703       int id = XINT (XCAR (tail));
2704       if (max_charset_id < id)
2705         max_charset_id = id;
2706     }
2707
2708   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2709                                 make_number (255));
2710   request = AREF (attrs, coding_attr_iso_request);
2711   reg_usage = AREF (attrs, coding_attr_iso_usage);
2712   reg94 = XINT (XCAR (reg_usage));
2713   reg96 = XINT (XCDR (reg_usage));
2714
2715   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2716     {
2717       Lisp_Object id;
2718       Lisp_Object reg;
2719       struct charset *charset;
2720
2721       id = XCAR (tail);
2722       charset = CHARSET_FROM_ID (XINT (id));
2723       reg = Fcdr (Fassq (id, request));
2724       if (! NILP (reg))
2725         SSET (safe_charsets, XINT (id), XINT (reg));
2726       else if (charset->iso_chars_96)
2727         {
2728           if (reg96 < 4)
2729             SSET (safe_charsets, XINT (id), reg96);
2730         }
2731       else
2732         {
2733           if (reg94 < 4)
2734             SSET (safe_charsets, XINT (id), reg94);
2735         }
2736     }
2737   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2738 }
2739
2740
2741 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2742    Check if a text is encoded in one of ISO-2022 based codig systems.
2743    If it is, return 1, else return 0.  */
2744
2745 static int
2746 detect_coding_iso_2022 (coding, detect_info)
2747      struct coding_system *coding;
2748      struct coding_detection_info *detect_info;
2749 {
2750   const unsigned char *src = coding->source, *src_base = src;
2751   const unsigned char *src_end = coding->source + coding->src_bytes;
2752   int multibytep = coding->src_multibyte;
2753   int single_shifting = 0;
2754   int id;
2755   int c, c1;
2756   int consumed_chars = 0;
2757   int i;
2758   int rejected = 0;
2759   int found = 0;
2760
2761   detect_info->checked |= CATEGORY_MASK_ISO;
2762
2763   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2764     {
2765       struct coding_system *this = &(coding_categories[i]);
2766       Lisp_Object attrs, val;
2767
2768       if (this->id < 0)
2769         continue;
2770       attrs = CODING_ID_ATTRS (this->id);
2771       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2772           && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2773         setup_iso_safe_charsets (attrs);
2774       val = CODING_ATTR_SAFE_CHARSETS (attrs);
2775       this->max_charset_id = SCHARS (val) - 1;
2776       this->safe_charsets = (char *) SDATA (val);
2777     }
2778
2779   /* A coding system of this category is always ASCII compatible.  */
2780   src += coding->head_ascii;
2781
2782   while (rejected != CATEGORY_MASK_ISO)
2783     {
2784       src_base = src;
2785       ONE_MORE_BYTE (c);
2786       switch (c)
2787         {
2788         case ISO_CODE_ESC:
2789           if (inhibit_iso_escape_detection)
2790             break;
2791           single_shifting = 0;
2792           ONE_MORE_BYTE (c);
2793           if (c >= '(' && c <= '/')
2794             {
2795               /* Designation sequence for a charset of dimension 1.  */
2796               ONE_MORE_BYTE (c1);
2797               if (c1 < ' ' || c1 >= 0x80
2798                   || (id = iso_charset_table[0][c >= ','][c1]) < 0)
2799                 /* Invalid designation sequence.  Just ignore.  */
2800                 break;
2801             }
2802           else if (c == '$')
2803             {
2804               /* Designation sequence for a charset of dimension 2.  */
2805               ONE_MORE_BYTE (c);
2806               if (c >= '@' && c <= 'B')
2807                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
2808                 id = iso_charset_table[1][0][c];
2809               else if (c >= '(' && c <= '/')
2810                 {
2811                   ONE_MORE_BYTE (c1);
2812                   if (c1 < ' ' || c1 >= 0x80
2813                       || (id = iso_charset_table[1][c >= ','][c1]) < 0)
2814                     /* Invalid designation sequence.  Just ignore.  */
2815                     break;
2816                 }
2817               else
2818                 /* Invalid designation sequence.  Just ignore it.  */
2819                 break;
2820             }
2821           else if (c == 'N' || c == 'O')
2822             {
2823               /* ESC <Fe> for SS2 or SS3.  */
2824               single_shifting = 1;
2825               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2826               break;
2827             }
2828           else if (c >= '0' && c <= '4')
2829             {
2830               /* ESC <Fp> for start/end composition.  */
2831               found |= CATEGORY_MASK_ISO;
2832               break;
2833             }
2834           else
2835             {
2836               /* Invalid escape sequence.  Just ignore it.  */
2837               break;
2838             }
2839
2840           /* We found a valid designation sequence for CHARSET.  */
2841           rejected |= CATEGORY_MASK_ISO_8BIT;
2842           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2843                               id))
2844             found |= CATEGORY_MASK_ISO_7;
2845           else
2846             rejected |= CATEGORY_MASK_ISO_7;
2847           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2848                               id))
2849             found |= CATEGORY_MASK_ISO_7_TIGHT;
2850           else
2851             rejected |= CATEGORY_MASK_ISO_7_TIGHT;
2852           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2853                               id))
2854             found |= CATEGORY_MASK_ISO_7_ELSE;
2855           else
2856             rejected |= CATEGORY_MASK_ISO_7_ELSE;
2857           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2858                               id))
2859             found |= CATEGORY_MASK_ISO_8_ELSE;
2860           else
2861             rejected |= CATEGORY_MASK_ISO_8_ELSE;
2862           break;
2863
2864         case ISO_CODE_SO:
2865         case ISO_CODE_SI:
2866           /* Locking shift out/in.  */
2867           if (inhibit_iso_escape_detection)
2868             break;
2869           single_shifting = 0;
2870           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2871           break;
2872
2873         case ISO_CODE_CSI:
2874           /* Control sequence introducer.  */
2875           single_shifting = 0;
2876           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2877           found |= CATEGORY_MASK_ISO_8_ELSE;
2878           goto check_extra_latin;
2879
2880         case ISO_CODE_SS2:
2881         case ISO_CODE_SS3:
2882           /* Single shift.   */
2883           if (inhibit_iso_escape_detection)
2884             break;
2885           single_shifting = 0;
2886           rejected |= CATEGORY_MASK_ISO_7BIT;
2887           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2888               & CODING_ISO_FLAG_SINGLE_SHIFT)
2889             found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
2890           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2891               & CODING_ISO_FLAG_SINGLE_SHIFT)
2892             found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
2893           if (single_shifting)
2894             break;
2895           goto check_extra_latin;
2896
2897         default:
2898           if (c < 0)
2899             continue;
2900           if (c < 0x80)
2901             {
2902               single_shifting = 0;
2903               break;
2904             }
2905           if (c >= 0xA0)
2906             {
2907               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2908               found |= CATEGORY_MASK_ISO_8_1;
2909               /* Check the length of succeeding codes of the range
2910                  0xA0..0FF.  If the byte length is even, we include
2911                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
2912                  only when we are not single shifting.  */
2913               if (! single_shifting
2914                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
2915                 {
2916                   int i = 1;
2917                   while (src < src_end)
2918                     {
2919                       ONE_MORE_BYTE (c);
2920                       if (c < 0xA0)
2921                         break;
2922                       i++;
2923                     }
2924
2925                   if (i & 1 && src < src_end)
2926                     rejected |= CATEGORY_MASK_ISO_8_2;
2927                   else
2928                     found |= CATEGORY_MASK_ISO_8_2;
2929                 }
2930               break;
2931             }
2932         check_extra_latin:
2933           single_shifting = 0;
2934           if (! VECTORP (Vlatin_extra_code_table)
2935               || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2936             {
2937               rejected = CATEGORY_MASK_ISO;
2938               break;
2939             }
2940           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2941               & CODING_ISO_FLAG_LATIN_EXTRA)
2942             found |= CATEGORY_MASK_ISO_8_1;
2943           else
2944             rejected |= CATEGORY_MASK_ISO_8_1;
2945           rejected |= CATEGORY_MASK_ISO_8_2;
2946         }
2947     }
2948   detect_info->rejected |= CATEGORY_MASK_ISO;
2949   return 0;
2950
2951  no_more_source:
2952   detect_info->rejected |= rejected;
2953   detect_info->found |= (found & ~rejected);
2954   return 1;
2955 }
2956
2957
2958 /* Set designation state into CODING.  Set CHARS_96 to -1 if the
2959    escape sequence should be kept.  */
2960 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
2961   do {                                                                  \
2962     int id, prev;                                                       \
2963                                                                         \
2964     if (final < '0' || final >= 128                                     \
2965         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
2966         || !SAFE_CHARSET_P (coding, id))                                \
2967       {                                                                 \
2968         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
2969         chars_96 = -1;                                                  \
2970         break;                                                          \
2971       }                                                                 \
2972     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
2973     if (id == charset_jisx0201_roman)                                   \
2974       {                                                                 \
2975         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
2976           id = charset_ascii;                                           \
2977       }                                                                 \
2978     else if (id == charset_jisx0208_1978)                               \
2979       {                                                                 \
2980         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
2981           id = charset_jisx0208;                                        \
2982       }                                                                 \
2983     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
2984     /* If there was an invalid designation to REG previously, and this  \
2985        designation is ASCII to REG, we should keep this designation     \
2986        sequence.  */                                                    \
2987     if (prev == -2 && id == charset_ascii)                              \
2988       chars_96 = -1;                                                    \
2989   } while (0)
2990
2991
2992 #define MAYBE_FINISH_COMPOSITION()                              \
2993   do {                                                          \
2994     int i;                                                      \
2995     if (composition_state == COMPOSING_NO)                      \
2996       break;                                                    \
2997     /* It is assured that we have enough room for producing     \
2998        characters stored in the table `components'.  */         \
2999     if (charbuf + component_idx > charbuf_end)                  \
3000       goto no_more_source;                                      \
3001     composition_state = COMPOSING_NO;                           \
3002     if (method == COMPOSITION_RELATIVE                          \
3003         || method == COMPOSITION_WITH_ALTCHARS)                 \
3004       {                                                         \
3005         for (i = 0; i < component_idx; i++)                     \
3006           *charbuf++ = components[i];                           \
3007         char_offset += component_idx;                           \
3008       }                                                         \
3009     else                                                        \
3010       {                                                         \
3011         for (i = 0; i < component_idx; i += 2)                  \
3012           *charbuf++ = components[i];                           \
3013         char_offset += (component_idx / 2) + 1;                 \
3014       }                                                         \
3015   } while (0)
3016
3017
3018 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
3019    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
3020    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3021    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
3022    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
3023   */
3024
3025 #define DECODE_COMPOSITION_START(c1)                                    \
3026   do {                                                                  \
3027     if (c1 == '0'                                                       \
3028         && composition_state == COMPOSING_COMPONENT_RULE)               \
3029       {                                                                 \
3030         component_len = component_idx;                                  \
3031         composition_state = COMPOSING_CHAR;                             \
3032       }                                                                 \
3033     else                                                                \
3034       {                                                                 \
3035         const unsigned char *p;                                         \
3036                                                                         \
3037         MAYBE_FINISH_COMPOSITION ();                                    \
3038         if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end)         \
3039           goto no_more_source;                                          \
3040         for (p = src; p < src_end - 1; p++)                             \
3041           if (*p == ISO_CODE_ESC && p[1] == '1')                        \
3042             break;                                                      \
3043         if (p == src_end - 1)                                           \
3044           {                                                             \
3045             /* The current composition doesn't end in the current       \
3046                source.  */                                              \
3047             record_conversion_result                                    \
3048               (coding, CODING_RESULT_INSUFFICIENT_SRC);                 \
3049             goto no_more_source;                                        \
3050           }                                                             \
3051                                                                         \
3052         /* This is surely the start of a composition.  */               \
3053         method = (c1 == '0' ? COMPOSITION_RELATIVE                      \
3054                   : c1 == '2' ? COMPOSITION_WITH_RULE                   \
3055                   : c1 == '3' ? COMPOSITION_WITH_ALTCHARS               \
3056                   : COMPOSITION_WITH_RULE_ALTCHARS);                    \
3057         composition_state = (c1 <= '2' ? COMPOSING_CHAR                 \
3058                              : COMPOSING_COMPONENT_CHAR);               \
3059         component_idx = component_len = 0;                              \
3060       }                                                                 \
3061   } while (0)
3062
3063
3064 /* Handle compositoin end sequence ESC 1.  */
3065
3066 #define DECODE_COMPOSITION_END()                                        \
3067   do {                                                                  \
3068     int nchars = (component_len > 0 ? component_idx - component_len     \
3069                   : method == COMPOSITION_RELATIVE ? component_idx      \
3070                   : (component_idx + 1) / 2);                           \
3071     int i;                                                              \
3072     int *saved_charbuf = charbuf;                                       \
3073                                                                         \
3074     ADD_COMPOSITION_DATA (charbuf, nchars, method);                     \
3075     if (method != COMPOSITION_RELATIVE)                                 \
3076       {                                                                 \
3077         if (component_len == 0)                                         \
3078           for (i = 0; i < component_idx; i++)                           \
3079             *charbuf++ = components[i];                                 \
3080         else                                                            \
3081           for (i = 0; i < component_len; i++)                           \
3082             *charbuf++ = components[i];                                 \
3083         *saved_charbuf = saved_charbuf - charbuf;                       \
3084       }                                                                 \
3085     if (method == COMPOSITION_WITH_RULE)                                \
3086       for (i = 0; i < component_idx; i += 2, char_offset++)             \
3087         *charbuf++ = components[i];                                     \
3088     else                                                                \
3089       for (i = component_len; i < component_idx; i++, char_offset++)    \
3090         *charbuf++ = components[i];                                     \
3091     coding->annotated = 1;                                              \
3092     composition_state = COMPOSING_NO;                                   \
3093   } while (0)
3094
3095
3096 /* Decode a composition rule from the byte C1 (and maybe one more byte
3097    from SRC) and store one encoded composition rule in
3098    coding->cmp_data.  */
3099
3100 #define DECODE_COMPOSITION_RULE(c1)                                     \
3101   do {                                                                  \
3102     (c1) -= 32;                                                         \
3103     if (c1 < 81)                /* old format (before ver.21) */        \
3104       {                                                                 \
3105         int gref = (c1) / 9;                                            \
3106         int nref = (c1) % 9;                                            \
3107         if (gref == 4) gref = 10;                                       \
3108         if (nref == 4) nref = 10;                                       \
3109         c1 = COMPOSITION_ENCODE_RULE (gref, nref);                      \
3110       }                                                                 \
3111     else if (c1 < 93)           /* new format (after ver.21) */         \
3112       {                                                                 \
3113         ONE_MORE_BYTE (c2);                                             \
3114         c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);                \
3115       }                                                                 \
3116     else                                                                \
3117       c1 = 0;                                                           \
3118   } while (0)
3119
3120
3121 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
3122
3123 static void
3124 decode_coding_iso_2022 (coding)
3125      struct coding_system *coding;
3126 {
3127   const unsigned char *src = coding->source + coding->consumed;
3128   const unsigned char *src_end = coding->source + coding->src_bytes;
3129   const unsigned char *src_base;
3130   int *charbuf = coding->charbuf + coding->charbuf_used;
3131   int *charbuf_end
3132     = coding->charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
3133   int consumed_chars = 0, consumed_chars_base;
3134   int multibytep = coding->src_multibyte;
3135   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
3136   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3137   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3138   int charset_id_2, charset_id_3;
3139   struct charset *charset;
3140   int c;
3141   /* For handling composition sequence.  */
3142 #define COMPOSING_NO                    0
3143 #define COMPOSING_CHAR                  1
3144 #define COMPOSING_RULE                  2
3145 #define COMPOSING_COMPONENT_CHAR        3
3146 #define COMPOSING_COMPONENT_RULE        4
3147
3148   int composition_state = COMPOSING_NO;
3149   enum composition_method method;
3150   int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
3151   int component_idx;
3152   int component_len;
3153   Lisp_Object attrs, charset_list;
3154   int char_offset = coding->produced_char;
3155   int last_offset = char_offset;
3156   int last_id = charset_ascii;
3157   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
3158   int byte_after_cr = -1;
3159
3160   CODING_GET_INFO (coding, attrs, charset_list);
3161   setup_iso_safe_charsets (attrs);
3162   /* Charset list may have been changed.  */
3163   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3164   coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
3165
3166   while (1)
3167     {
3168       int c1, c2;
3169
3170       src_base = src;
3171       consumed_chars_base = consumed_chars;
3172
3173       if (charbuf >= charbuf_end)
3174         break;
3175
3176       if (byte_after_cr >= 0)
3177         c1 = byte_after_cr, byte_after_cr = -1;
3178       else
3179         ONE_MORE_BYTE (c1);
3180       if (c1 < 0)
3181         goto invalid_code;
3182
3183       /* We produce at most one character.  */
3184       switch (iso_code_class [c1])
3185         {
3186         case ISO_0x20_or_0x7F:
3187           if (composition_state != COMPOSING_NO)
3188             {
3189               if (composition_state == COMPOSING_RULE
3190                   || composition_state == COMPOSING_COMPONENT_RULE)
3191                 {
3192                   DECODE_COMPOSITION_RULE (c1);
3193                   components[component_idx++] = c1;
3194                   composition_state--;
3195                   continue;
3196                 }
3197             }
3198           if (charset_id_0 < 0
3199               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
3200             /* This is SPACE or DEL.  */
3201             charset = CHARSET_FROM_ID (charset_ascii);
3202           else
3203             charset = CHARSET_FROM_ID (charset_id_0);
3204           break;
3205
3206         case ISO_graphic_plane_0:
3207           if (composition_state != COMPOSING_NO)
3208             {
3209               if (composition_state == COMPOSING_RULE
3210                   || composition_state == COMPOSING_COMPONENT_RULE)
3211                 {
3212                   DECODE_COMPOSITION_RULE (c1);
3213                   components[component_idx++] = c1;
3214                   composition_state--;
3215                   continue;
3216                 }
3217             }
3218           if (charset_id_0 < 0)
3219             charset = CHARSET_FROM_ID (charset_ascii);
3220           else
3221             charset = CHARSET_FROM_ID (charset_id_0);
3222           break;
3223
3224         case ISO_0xA0_or_0xFF:
3225           if (charset_id_1 < 0
3226               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
3227               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3228             goto invalid_code;
3229           /* This is a graphic character, we fall down ... */
3230
3231         case ISO_graphic_plane_1:
3232           if (charset_id_1 < 0)
3233             goto invalid_code;
3234           charset = CHARSET_FROM_ID (charset_id_1);
3235           break;
3236
3237         case ISO_control_0:
3238           if (eol_crlf && c1 == '\r')
3239             ONE_MORE_BYTE (byte_after_cr);
3240           MAYBE_FINISH_COMPOSITION ();
3241           charset = CHARSET_FROM_ID (charset_ascii);
3242           break;
3243
3244         case ISO_control_1:
3245           MAYBE_FINISH_COMPOSITION ();
3246           goto invalid_code;
3247
3248         case ISO_shift_out:
3249           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3250               || CODING_ISO_DESIGNATION (coding, 1) < 0)
3251             goto invalid_code;
3252           CODING_ISO_INVOCATION (coding, 0) = 1;
3253           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3254           continue;
3255
3256         case ISO_shift_in:
3257           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3258             goto invalid_code;
3259           CODING_ISO_INVOCATION (coding, 0) = 0;
3260           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3261           continue;
3262
3263         case ISO_single_shift_2_7:
3264         case ISO_single_shift_2:
3265           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3266             goto invalid_code;
3267           /* SS2 is handled as an escape sequence of ESC 'N' */
3268           c1 = 'N';
3269           goto label_escape_sequence;
3270
3271         case ISO_single_shift_3:
3272           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3273             goto invalid_code;
3274           /* SS2 is handled as an escape sequence of ESC 'O' */
3275           c1 = 'O';
3276           goto label_escape_sequence;
3277
3278         case ISO_control_sequence_introducer:
3279           /* CSI is handled as an escape sequence of ESC '[' ...  */
3280           c1 = '[';
3281           goto label_escape_sequence;
3282
3283         case ISO_escape:
3284           ONE_MORE_BYTE (c1);
3285         label_escape_sequence:
3286           /* Escape sequences handled here are invocation,
3287              designation, direction specification, and character
3288              composition specification.  */
3289           switch (c1)
3290             {
3291             case '&':           /* revision of following character set */
3292               ONE_MORE_BYTE (c1);
3293               if (!(c1 >= '@' && c1 <= '~'))
3294                 goto invalid_code;
3295               ONE_MORE_BYTE (c1);
3296               if (c1 != ISO_CODE_ESC)
3297                 goto invalid_code;
3298               ONE_MORE_BYTE (c1);
3299               goto label_escape_sequence;
3300
3301             case '$':           /* designation of 2-byte character set */
3302               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3303                 goto invalid_code;
3304               {
3305                 int reg, chars96;
3306
3307                 ONE_MORE_BYTE (c1);
3308                 if (c1 >= '@' && c1 <= 'B')
3309                   {     /* designation of JISX0208.1978, GB2312.1980,
3310                            or JISX0208.1980 */
3311                     reg = 0, chars96 = 0;
3312                   }
3313                 else if (c1 >= 0x28 && c1 <= 0x2B)
3314                   { /* designation of DIMENSION2_CHARS94 character set */
3315                     reg = c1 - 0x28, chars96 = 0;
3316                     ONE_MORE_BYTE (c1);
3317                   }
3318                 else if (c1 >= 0x2C && c1 <= 0x2F)
3319                   { /* designation of DIMENSION2_CHARS96 character set */
3320                     reg = c1 - 0x2C, chars96 = 1;
3321                     ONE_MORE_BYTE (c1);
3322                   }
3323                 else
3324                   goto invalid_code;
3325                 DECODE_DESIGNATION (reg, 2, chars96, c1);
3326                 /* We must update these variables now.  */
3327                 if (reg == 0)
3328                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3329                 else if (reg == 1)
3330                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3331                 if (chars96 < 0)
3332                   goto invalid_code;
3333               }
3334               continue;
3335
3336             case 'n':           /* invocation of locking-shift-2 */
3337               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3338                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3339                 goto invalid_code;
3340               CODING_ISO_INVOCATION (coding, 0) = 2;
3341               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3342               continue;
3343
3344             case 'o':           /* invocation of locking-shift-3 */
3345               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3346                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3347                 goto invalid_code;
3348               CODING_ISO_INVOCATION (coding, 0) = 3;
3349               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3350               continue;
3351
3352             case 'N':           /* invocation of single-shift-2 */
3353               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3354                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3355                 goto invalid_code;
3356               charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3357               if (charset_id_2 < 0)
3358                 charset = CHARSET_FROM_ID (charset_ascii);
3359               else
3360                 charset = CHARSET_FROM_ID (charset_id_2);
3361               ONE_MORE_BYTE (c1);
3362               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3363                 goto invalid_code;
3364               break;
3365
3366             case 'O':           /* invocation of single-shift-3 */
3367               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3368                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3369                 goto invalid_code;
3370               charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3371               if (charset_id_3 < 0)
3372                 charset = CHARSET_FROM_ID (charset_ascii);
3373               else
3374                 charset = CHARSET_FROM_ID (charset_id_3);
3375               ONE_MORE_BYTE (c1);
3376               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3377                 goto invalid_code;
3378               break;
3379
3380             case '0': case '2': case '3': case '4': /* start composition */
3381               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3382                 goto invalid_code;
3383               DECODE_COMPOSITION_START (c1);
3384               continue;
3385
3386             case '1':           /* end composition */
3387               if (composition_state == COMPOSING_NO)
3388                 goto invalid_code;
3389               DECODE_COMPOSITION_END ();
3390               continue;
3391
3392             case '[':           /* specification of direction */
3393               if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3394                 goto invalid_code;
3395               /* For the moment, nested direction is not supported.
3396                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3397                  left-to-right, and nozero means right-to-left.  */
3398               ONE_MORE_BYTE (c1);
3399               switch (c1)
3400                 {
3401                 case ']':       /* end of the current direction */
3402                   coding->mode &= ~CODING_MODE_DIRECTION;
3403
3404                 case '0':       /* end of the current direction */
3405                 case '1':       /* start of left-to-right direction */
3406                   ONE_MORE_BYTE (c1);
3407                   if (c1 == ']')
3408                     coding->mode &= ~CODING_MODE_DIRECTION;
3409                   else
3410                     goto invalid_code;
3411                   break;
3412
3413                 case '2':       /* start of right-to-left direction */
3414                   ONE_MORE_BYTE (c1);
3415                   if (c1 == ']')
3416                     coding->mode |= CODING_MODE_DIRECTION;
3417                   else
3418                     goto invalid_code;
3419                   break;
3420
3421                 default:
3422                   goto invalid_code;
3423                 }
3424               continue;
3425
3426             case '%':
3427               ONE_MORE_BYTE (c1);
3428               if (c1 == '/')
3429                 {
3430                   /* CTEXT extended segment:
3431                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3432                      We keep these bytes as is for the moment.
3433                      They may be decoded by post-read-conversion.  */
3434                   int dim, M, L;
3435                   int size;
3436
3437                   ONE_MORE_BYTE (dim);
3438                   ONE_MORE_BYTE (M);
3439                   ONE_MORE_BYTE (L);
3440                   size = ((M - 128) * 128) + (L - 128);
3441                   if (charbuf + 8 + size > charbuf_end)
3442                     goto break_loop;
3443                   *charbuf++ = ISO_CODE_ESC;
3444                   *charbuf++ = '%';
3445                   *charbuf++ = '/';
3446                   *charbuf++ = dim;
3447                   *charbuf++ = BYTE8_TO_CHAR (M);
3448                   *charbuf++ = BYTE8_TO_CHAR (L);
3449                   while (size-- > 0)
3450                     {
3451                       ONE_MORE_BYTE (c1);
3452                       *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3453                     }
3454                 }
3455               else if (c1 == 'G')
3456                 {
3457                   /* XFree86 extension for embedding UTF-8 in CTEXT:
3458                      ESC % G --UTF-8-BYTES-- ESC % @
3459                      We keep these bytes as is for the moment.
3460                      They may be decoded by post-read-conversion.  */
3461                   int *p = charbuf;
3462
3463                   if (p + 6 > charbuf_end)
3464                     goto break_loop;
3465                   *p++ = ISO_CODE_ESC;
3466                   *p++ = '%';
3467                   *p++ = 'G';
3468                   while (p < charbuf_end)
3469                     {
3470                       ONE_MORE_BYTE (c1);
3471                       if (c1 == ISO_CODE_ESC
3472                           && src + 1 < src_end
3473                           && src[0] == '%'
3474                           && src[1] == '@')
3475                         {
3476                           src += 2;
3477                           break;
3478                         }
3479                       *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3480                     }
3481                   if (p + 3 > charbuf_end)
3482                     goto break_loop;
3483                   *p++ = ISO_CODE_ESC;
3484                   *p++ = '%';
3485                   *p++ = '@';
3486                   charbuf = p;
3487                 }
3488               else
3489                 goto invalid_code;
3490               continue;
3491               break;
3492
3493             default:
3494               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3495                 goto invalid_code;
3496               {
3497                 int reg, chars96;
3498
3499                 if (c1 >= 0x28 && c1 <= 0x2B)
3500                   { /* designation of DIMENSION1_CHARS94 character set */
3501                     reg = c1 - 0x28, chars96 = 0;
3502                     ONE_MORE_BYTE (c1);
3503                   }
3504                 else if (c1 >= 0x2C && c1 <= 0x2F)
3505                   { /* designation of DIMENSION1_CHARS96 character set */
3506                     reg = c1 - 0x2C, chars96 = 1;
3507                     ONE_MORE_BYTE (c1);
3508                   }
3509                 else
3510                   goto invalid_code;
3511                 DECODE_DESIGNATION (reg, 1, chars96, c1);
3512                 /* We must update these variables now.  */
3513                 if (reg == 0)
3514                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3515                 else if (reg == 1)
3516                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3517                 if (chars96 < 0)
3518                   goto invalid_code;
3519               }
3520               continue;
3521             }
3522         }
3523
3524       if (charset->id != charset_ascii
3525           && last_id != charset->id)
3526         {
3527           if (last_id != charset_ascii)
3528             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3529           last_id = charset->id;
3530           last_offset = char_offset;
3531         }
3532
3533       /* Now we know CHARSET and 1st position code C1 of a character.
3534          Produce a decoded character while getting 2nd position code
3535          C2 if necessary.  */
3536       c1 &= 0x7F;
3537       if (CHARSET_DIMENSION (charset) > 1)
3538         {
3539           ONE_MORE_BYTE (c2);
3540           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3541             /* C2 is not in a valid range.  */
3542             goto invalid_code;
3543           c1 = (c1 << 8) | (c2 & 0x7F);
3544           if (CHARSET_DIMENSION (charset) > 2)
3545             {
3546               ONE_MORE_BYTE (c2);
3547               if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3548                 /* C2 is not in a valid range.  */
3549                 goto invalid_code;
3550               c1 = (c1 << 8) | (c2 & 0x7F);
3551             }
3552         }
3553
3554       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3555       if (c < 0)
3556         {
3557           MAYBE_FINISH_COMPOSITION ();
3558           for (; src_base < src; src_base++, char_offset++)
3559             {
3560               if (ASCII_BYTE_P (*src_base))
3561                 *charbuf++ = *src_base;
3562               else
3563                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3564             }
3565         }
3566       else if (composition_state == COMPOSING_NO)
3567         {
3568           *charbuf++ = c;
3569           char_offset++;
3570         }
3571       else
3572         {
3573           components[component_idx++] = c;
3574           if (method == COMPOSITION_WITH_RULE
3575               || (method == COMPOSITION_WITH_RULE_ALTCHARS
3576                   && composition_state == COMPOSING_COMPONENT_CHAR))
3577             composition_state++;
3578         }
3579       continue;
3580
3581     invalid_code:
3582       MAYBE_FINISH_COMPOSITION ();
3583       src = src_base;
3584       consumed_chars = consumed_chars_base;
3585       ONE_MORE_BYTE (c);
3586       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3587       char_offset++;
3588       coding->errors++;
3589       continue;
3590
3591     break_loop:
3592       break;
3593     }
3594
3595  no_more_source:
3596   if (last_id != charset_ascii)
3597     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3598   coding->consumed_char += consumed_chars_base;
3599   coding->consumed = src_base - coding->source;
3600   coding->charbuf_used = charbuf - coding->charbuf;
3601 }
3602
3603
3604 /* ISO2022 encoding stuff.  */
3605
3606 /*
3607    It is not enough to say just "ISO2022" on encoding, we have to
3608    specify more details.  In Emacs, each coding system of ISO2022
3609    variant has the following specifications:
3610         1. Initial designation to G0 thru G3.
3611         2. Allows short-form designation?
3612         3. ASCII should be designated to G0 before control characters?
3613         4. ASCII should be designated to G0 at end of line?
3614         5. 7-bit environment or 8-bit environment?
3615         6. Use locking-shift?
3616         7. Use Single-shift?
3617    And the following two are only for Japanese:
3618         8. Use ASCII in place of JIS0201-1976-Roman?
3619         9. Use JISX0208-1983 in place of JISX0208-1978?
3620    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3621    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
3622    details.
3623 */
3624
3625 /* Produce codes (escape sequence) for designating CHARSET to graphic
3626    register REG at DST, and increment DST.  If <final-char> of CHARSET is
3627    '@', 'A', or 'B' and the coding system CODING allows, produce
3628    designation sequence of short-form.  */
3629
3630 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
3631   do {                                                                  \
3632     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
3633     char *intermediate_char_94 = "()*+";                                \
3634     char *intermediate_char_96 = ",-./";                                \
3635     int revision = -1;                                                  \
3636     int c;                                                              \
3637                                                                         \
3638     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
3639       revision = CHARSET_ISO_REVISION (charset);                        \
3640                                                                         \
3641     if (revision >= 0)                                                  \
3642       {                                                                 \
3643         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
3644         EMIT_ONE_BYTE ('@' + revision);                                 \
3645       }                                                                 \
3646     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
3647     if (CHARSET_DIMENSION (charset) == 1)                               \
3648       {                                                                 \
3649         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3650           c = intermediate_char_94[reg];                                \
3651         else                                                            \
3652           c = intermediate_char_96[reg];                                \
3653         EMIT_ONE_ASCII_BYTE (c);                                        \
3654       }                                                                 \
3655     else                                                                \
3656       {                                                                 \
3657         EMIT_ONE_ASCII_BYTE ('$');                                      \
3658         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3659           {                                                             \
3660             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
3661                 || reg != 0                                             \
3662                 || final_char < '@' || final_char > 'B')                \
3663               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
3664           }                                                             \
3665         else                                                            \
3666           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
3667       }                                                                 \
3668     EMIT_ONE_ASCII_BYTE (final_char);                                   \
3669                                                                         \
3670     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
3671   } while (0)
3672
3673
3674 /* The following two macros produce codes (control character or escape
3675    sequence) for ISO2022 single-shift functions (single-shift-2 and
3676    single-shift-3).  */
3677
3678 #define ENCODE_SINGLE_SHIFT_2                                           \
3679   do {                                                                  \
3680     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3681       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
3682     else                                                                \
3683       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
3684     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3685   } while (0)
3686
3687
3688 #define ENCODE_SINGLE_SHIFT_3                                           \
3689   do {                                                                  \
3690     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3691       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
3692     else                                                                \
3693       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
3694     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3695   } while (0)
3696
3697
3698 /* The following four macros produce codes (control character or
3699    escape sequence) for ISO2022 locking-shift functions (shift-in,
3700    shift-out, locking-shift-2, and locking-shift-3).  */
3701
3702 #define ENCODE_SHIFT_IN                                 \
3703   do {                                                  \
3704     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
3705     CODING_ISO_INVOCATION (coding, 0) = 0;              \
3706   } while (0)
3707
3708
3709 #define ENCODE_SHIFT_OUT                                \
3710   do {                                                  \
3711     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
3712     CODING_ISO_INVOCATION (coding, 0) = 1;              \
3713   } while (0)
3714
3715
3716 #define ENCODE_LOCKING_SHIFT_2                          \
3717   do {                                                  \
3718     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3719     CODING_ISO_INVOCATION (coding, 0) = 2;              \
3720   } while (0)
3721
3722
3723 #define ENCODE_LOCKING_SHIFT_3                          \
3724   do {                                                  \
3725     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3726     CODING_ISO_INVOCATION (coding, 0) = 3;              \
3727   } while (0)
3728
3729
3730 /* Produce codes for a DIMENSION1 character whose character set is
3731    CHARSET and whose position-code is C1.  Designation and invocation
3732    sequences are also produced in advance if necessary.  */
3733
3734 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
3735   do {                                                                  \
3736     int id = CHARSET_ID (charset);                                      \
3737                                                                         \
3738     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
3739         && id == charset_ascii)                                         \
3740       {                                                                 \
3741         id = charset_jisx0201_roman;                                    \
3742         charset = CHARSET_FROM_ID (id);                                 \
3743       }                                                                 \
3744                                                                         \
3745     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3746       {                                                                 \
3747         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3748           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
3749         else                                                            \
3750           EMIT_ONE_BYTE (c1 | 0x80);                                    \
3751         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3752         break;                                                          \
3753       }                                                                 \
3754     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3755       {                                                                 \
3756         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
3757         break;                                                          \
3758       }                                                                 \
3759     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3760       {                                                                 \
3761         EMIT_ONE_BYTE (c1 | 0x80);                                      \
3762         break;                                                          \
3763       }                                                                 \
3764     else                                                                \
3765       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3766          must invoke it, or, at first, designate it to some graphic     \
3767          register.  Then repeat the loop to actually produce the        \
3768          character.  */                                                 \
3769       dst = encode_invocation_designation (charset, coding, dst,        \
3770                                            &produced_chars);            \
3771   } while (1)
3772
3773
3774 /* Produce codes for a DIMENSION2 character whose character set is
3775    CHARSET and whose position-codes are C1 and C2.  Designation and
3776    invocation codes are also produced in advance if necessary.  */
3777
3778 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
3779   do {                                                                  \
3780     int id = CHARSET_ID (charset);                                      \
3781                                                                         \
3782     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
3783         && id == charset_jisx0208)                                      \
3784       {                                                                 \
3785         id = charset_jisx0208_1978;                                     \
3786         charset = CHARSET_FROM_ID (id);                                 \
3787       }                                                                 \
3788                                                                         \
3789     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3790       {                                                                 \
3791         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3792           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
3793         else                                                            \
3794           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
3795         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3796         break;                                                          \
3797       }                                                                 \
3798     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3799       {                                                                 \
3800         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
3801         break;                                                          \
3802       }                                                                 \
3803     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3804       {                                                                 \
3805         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
3806         break;                                                          \
3807       }                                                                 \
3808     else                                                                \
3809       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3810          must invoke it, or, at first, designate it to some graphic     \
3811          register.  Then repeat the loop to actually produce the        \
3812          character.  */                                                 \
3813       dst = encode_invocation_designation (charset, coding, dst,        \
3814                                            &produced_chars);            \
3815   } while (1)
3816
3817
3818 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
3819   do {                                                                     \
3820     int code = ENCODE_CHAR ((charset),(c));                                \
3821                                                                            \
3822     if (CHARSET_DIMENSION (charset) == 1)                                  \
3823       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
3824     else                                                                   \
3825       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3826   } while (0)
3827
3828
3829 /* Produce designation and invocation codes at a place pointed by DST
3830    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
3831    Return new DST.  */
3832
3833 unsigned char *
3834 encode_invocation_designation (charset, coding, dst, p_nchars)
3835      struct charset *charset;
3836      struct coding_system *coding;
3837      unsigned char *dst;
3838      int *p_nchars;
3839 {
3840   int multibytep = coding->dst_multibyte;
3841   int produced_chars = *p_nchars;
3842   int reg;                      /* graphic register number */
3843   int id = CHARSET_ID (charset);
3844
3845   /* At first, check designations.  */
3846   for (reg = 0; reg < 4; reg++)
3847     if (id == CODING_ISO_DESIGNATION (coding, reg))
3848       break;
3849
3850   if (reg >= 4)
3851     {
3852       /* CHARSET is not yet designated to any graphic registers.  */
3853       /* At first check the requested designation.  */
3854       reg = CODING_ISO_REQUEST (coding, id);
3855       if (reg < 0)
3856         /* Since CHARSET requests no special designation, designate it
3857            to graphic register 0.  */
3858         reg = 0;
3859
3860       ENCODE_DESIGNATION (charset, reg, coding);
3861     }
3862
3863   if (CODING_ISO_INVOCATION (coding, 0) != reg
3864       && CODING_ISO_INVOCATION (coding, 1) != reg)
3865     {
3866       /* Since the graphic register REG is not invoked to any graphic
3867          planes, invoke it to graphic plane 0.  */
3868       switch (reg)
3869         {
3870         case 0:                 /* graphic register 0 */
3871           ENCODE_SHIFT_IN;
3872           break;
3873
3874         case 1:                 /* graphic register 1 */
3875           ENCODE_SHIFT_OUT;
3876           break;
3877
3878         case 2:                 /* graphic register 2 */
3879           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3880             ENCODE_SINGLE_SHIFT_2;
3881           else
3882             ENCODE_LOCKING_SHIFT_2;
3883           break;
3884
3885         case 3:                 /* graphic register 3 */
3886           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3887             ENCODE_SINGLE_SHIFT_3;
3888           else
3889             ENCODE_LOCKING_SHIFT_3;
3890           break;
3891         }
3892     }
3893
3894   *p_nchars = produced_chars;
3895   return dst;
3896 }
3897
3898 /* The following three macros produce codes for indicating direction
3899    of text.  */
3900 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER                              \
3901   do {                                                                  \
3902     if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS)        \
3903       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '[');                         \
3904     else                                                                \
3905       EMIT_ONE_BYTE (ISO_CODE_CSI);                                     \
3906   } while (0)
3907
3908
3909 #define ENCODE_DIRECTION_R2L()                  \
3910   do {                                          \
3911     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3912     EMIT_TWO_ASCII_BYTES ('2', ']');            \
3913   } while (0)
3914
3915
3916 #define ENCODE_DIRECTION_L2R()                  \
3917   do {                                          \
3918     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3919     EMIT_TWO_ASCII_BYTES ('0', ']');            \
3920   } while (0)
3921
3922
3923 /* Produce codes for designation and invocation to reset the graphic
3924    planes and registers to initial state.  */
3925 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
3926   do {                                                                  \
3927     int reg;                                                            \
3928     struct charset *charset;                                            \
3929                                                                         \
3930     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
3931       ENCODE_SHIFT_IN;                                                  \
3932     for (reg = 0; reg < 4; reg++)                                       \
3933       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
3934           && (CODING_ISO_DESIGNATION (coding, reg)                      \
3935               != CODING_ISO_INITIAL (coding, reg)))                     \
3936         {                                                               \
3937           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3938           ENCODE_DESIGNATION (charset, reg, coding);                    \
3939         }                                                               \
3940   } while (0)
3941
3942
3943 /* Produce designation sequences of charsets in the line started from
3944    SRC to a place pointed by DST, and return updated DST.
3945
3946    If the current block ends before any end-of-line, we may fail to
3947    find all the necessary designations.  */
3948
3949 static unsigned char *
3950 encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
3951      struct coding_system *coding;
3952      int *charbuf, *charbuf_end;
3953      unsigned char *dst;
3954 {
3955   struct charset *charset;
3956   /* Table of charsets to be designated to each graphic register.  */
3957   int r[4];
3958   int c, found = 0, reg;
3959   int produced_chars = 0;
3960   int multibytep = coding->dst_multibyte;
3961   Lisp_Object attrs;
3962   Lisp_Object charset_list;
3963
3964   attrs = CODING_ID_ATTRS (coding->id);
3965   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3966   if (EQ (charset_list, Qiso_2022))
3967     charset_list = Viso_2022_charset_list;
3968
3969   for (reg = 0; reg < 4; reg++)
3970     r[reg] = -1;
3971
3972   while (found < 4)
3973     {
3974       int id;
3975
3976       c = *charbuf++;
3977       if (c == '\n')
3978         break;
3979       charset = char_charset (c, charset_list, NULL);
3980       id = CHARSET_ID (charset);
3981       reg = CODING_ISO_REQUEST (coding, id);
3982       if (reg >= 0 && r[reg] < 0)
3983         {
3984           found++;
3985           r[reg] = id;
3986         }
3987     }
3988
3989   if (found)
3990     {
3991       for (reg = 0; reg < 4; reg++)
3992         if (r[reg] >= 0
3993             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3994           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
3995     }
3996
3997   return dst;
3998 }
3999
4000 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
4001
4002 static int
4003 encode_coding_iso_2022 (coding)
4004      struct coding_system *coding;
4005 {
4006   int multibytep = coding->dst_multibyte;
4007   int *charbuf = coding->charbuf;
4008   int *charbuf_end = charbuf + coding->charbuf_used;
4009   unsigned char *dst = coding->destination + coding->produced;
4010   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4011   int safe_room = 16;
4012   int bol_designation
4013     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
4014        && CODING_ISO_BOL (coding));
4015   int produced_chars = 0;
4016   Lisp_Object attrs, eol_type, charset_list;
4017   int ascii_compatible;
4018   int c;
4019   int preferred_charset_id = -1;
4020
4021   CODING_GET_INFO (coding, attrs, charset_list);
4022   eol_type = CODING_ID_EOL_TYPE (coding->id);
4023   if (VECTORP (eol_type))
4024     eol_type = Qunix;
4025
4026   setup_iso_safe_charsets (attrs);
4027   /* Charset list may have been changed.  */
4028   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4029   coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
4030
4031   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4032
4033   while (charbuf < charbuf_end)
4034     {
4035       ASSURE_DESTINATION (safe_room);
4036
4037       if (bol_designation)
4038         {
4039           unsigned char *dst_prev = dst;
4040
4041           /* We have to produce designation sequences if any now.  */
4042           dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
4043           bol_designation = 0;
4044           /* We are sure that designation sequences are all ASCII bytes.  */
4045           produced_chars += dst - dst_prev;
4046         }
4047
4048       c = *charbuf++;
4049
4050       if (c < 0)
4051         {
4052           /* Handle an annotation.  */
4053           switch (*charbuf)
4054             {
4055             case CODING_ANNOTATE_COMPOSITION_MASK:
4056               /* Not yet implemented.  */
4057               break;
4058             case CODING_ANNOTATE_CHARSET_MASK:
4059               preferred_charset_id = charbuf[2];
4060               if (preferred_charset_id >= 0
4061                   && NILP (Fmemq (make_number (preferred_charset_id),
4062                                   charset_list)))
4063                 preferred_charset_id = -1;
4064               break;
4065             default:
4066               abort ();
4067             }
4068           charbuf += -c - 1;
4069           continue;
4070         }
4071
4072       /* Now encode the character C.  */
4073       if (c < 0x20 || c == 0x7F)
4074         {
4075           if (c == '\n'
4076               || (c == '\r' && EQ (eol_type, Qmac)))
4077             {
4078               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4079                 ENCODE_RESET_PLANE_AND_REGISTER ();
4080               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
4081                 {
4082                   int i;
4083
4084                   for (i = 0; i < 4; i++)
4085                     CODING_ISO_DESIGNATION (coding, i)
4086                       = CODING_ISO_INITIAL (coding, i);
4087                 }
4088               bol_designation
4089                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
4090             }
4091           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
4092             ENCODE_RESET_PLANE_AND_REGISTER ();
4093           EMIT_ONE_ASCII_BYTE (c);
4094         }
4095       else if (ASCII_CHAR_P (c))
4096         {
4097           if (ascii_compatible)
4098             EMIT_ONE_ASCII_BYTE (c);
4099           else
4100             {
4101               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
4102               ENCODE_ISO_CHARACTER (charset, c);
4103             }
4104         }
4105       else if (CHAR_BYTE8_P (c))
4106         {
4107           c = CHAR_TO_BYTE8 (c);
4108           EMIT_ONE_BYTE (c);
4109         }
4110       else
4111         {
4112           struct charset *charset;
4113
4114           if (preferred_charset_id >= 0)
4115             {
4116               charset = CHARSET_FROM_ID (preferred_charset_id);
4117               if (! CHAR_CHARSET_P (c, charset))
4118                 charset = char_charset (c, charset_list, NULL);
4119             }
4120           else
4121             charset = char_charset (c, charset_list, NULL);
4122           if (!charset)
4123             {
4124               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4125                 {
4126                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4127                   charset = CHARSET_FROM_ID (charset_ascii);
4128                 }
4129               else
4130                 {
4131                   c = coding->default_char;
4132                   charset = char_charset (c, charset_list, NULL);
4133                 }
4134             }
4135           ENCODE_ISO_CHARACTER (charset, c);
4136         }
4137     }
4138
4139   if (coding->mode & CODING_MODE_LAST_BLOCK
4140       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4141     {
4142       ASSURE_DESTINATION (safe_room);
4143       ENCODE_RESET_PLANE_AND_REGISTER ();
4144     }
4145   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4146   CODING_ISO_BOL (coding) = bol_designation;
4147   coding->produced_char += produced_chars;
4148   coding->produced = dst - coding->destination;
4149   return 0;
4150 }
4151
4152 \f
4153 /*** 8,9. SJIS and BIG5 handlers ***/
4154
4155 /* Although SJIS and BIG5 are not ISO's coding system, they are used
4156    quite widely.  So, for the moment, Emacs supports them in the bare
4157    C code.  But, in the future, they may be supported only by CCL.  */
4158
4159 /* SJIS is a coding system encoding three character sets: ASCII, right
4160    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
4161    as is.  A character of charset katakana-jisx0201 is encoded by
4162    "position-code + 0x80".  A character of charset japanese-jisx0208
4163    is encoded in 2-byte but two position-codes are divided and shifted
4164    so that it fit in the range below.
4165
4166    --- CODE RANGE of SJIS ---
4167    (character set)      (range)
4168    ASCII                0x00 .. 0x7F
4169    KATAKANA-JISX0201    0xA0 .. 0xDF
4170    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
4171             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
4172    -------------------------------
4173
4174 */
4175
4176 /* BIG5 is a coding system encoding two character sets: ASCII and
4177    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
4178    character set and is encoded in two-byte.
4179
4180    --- CODE RANGE of BIG5 ---
4181    (character set)      (range)
4182    ASCII                0x00 .. 0x7F
4183    Big5 (1st byte)      0xA1 .. 0xFE
4184         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
4185    --------------------------
4186
4187   */
4188
4189 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4190    Check if a text is encoded in SJIS.  If it is, return
4191    CATEGORY_MASK_SJIS, else return 0.  */
4192
4193 static int
4194 detect_coding_sjis (coding, detect_info)
4195      struct coding_system *coding;
4196      struct coding_detection_info *detect_info;
4197 {
4198   const unsigned char *src = coding->source, *src_base;
4199   const unsigned char *src_end = coding->source + coding->src_bytes;
4200   int multibytep = coding->src_multibyte;
4201   int consumed_chars = 0;
4202   int found = 0;
4203   int c;
4204
4205   detect_info->checked |= CATEGORY_MASK_SJIS;
4206   /* A coding system of this category is always ASCII compatible.  */
4207   src += coding->head_ascii;
4208
4209   while (1)
4210     {
4211       src_base = src;
4212       ONE_MORE_BYTE (c);
4213       if (c < 0x80)
4214         continue;
4215       if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
4216         {
4217           ONE_MORE_BYTE (c);
4218           if (c < 0x40 || c == 0x7F || c > 0xFC)
4219             break;
4220           found = CATEGORY_MASK_SJIS;
4221         }
4222       else if (c >= 0xA0 && c < 0xE0)
4223         found = CATEGORY_MASK_SJIS;
4224       else
4225         break;
4226     }
4227   detect_info->rejected |= CATEGORY_MASK_SJIS;
4228   return 0;
4229
4230  no_more_source:
4231   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4232     {
4233       detect_info->rejected |= CATEGORY_MASK_SJIS;
4234       return 0;
4235     }
4236   detect_info->found |= found;
4237   return 1;
4238 }
4239
4240 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4241    Check if a text is encoded in BIG5.  If it is, return
4242    CATEGORY_MASK_BIG5, else return 0.  */
4243
4244 static int
4245 detect_coding_big5 (coding, detect_info)
4246      struct coding_system *coding;
4247      struct coding_detection_info *detect_info;
4248 {
4249   const unsigned char *src = coding->source, *src_base;
4250   const unsigned char *src_end = coding->source + coding->src_bytes;
4251   int multibytep = coding->src_multibyte;
4252   int consumed_chars = 0;
4253   int found = 0;
4254   int c;
4255
4256   detect_info->checked |= CATEGORY_MASK_BIG5;
4257   /* A coding system of this category is always ASCII compatible.  */
4258   src += coding->head_ascii;
4259
4260   while (1)
4261     {
4262       src_base = src;
4263       ONE_MORE_BYTE (c);
4264       if (c < 0x80)
4265         continue;
4266       if (c >= 0xA1)
4267         {
4268           ONE_MORE_BYTE (c);
4269           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
4270             return 0;
4271           found = CATEGORY_MASK_BIG5;
4272         }
4273       else
4274         break;
4275     }
4276   detect_info->rejected |= CATEGORY_MASK_BIG5;
4277   return 0;
4278
4279  no_more_source:
4280   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4281     {
4282       detect_info->rejected |= CATEGORY_MASK_BIG5;
4283       return 0;
4284     }
4285   detect_info->found |= found;
4286   return 1;
4287 }
4288
4289 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4290    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
4291
4292 static void
4293 decode_coding_sjis (coding)
4294      struct coding_system *coding;
4295 {
4296   const unsigned char *src = coding->source + coding->consumed;
4297   const unsigned char *src_end = coding->source + coding->src_bytes;
4298   const unsigned char *src_base;
4299   int *charbuf = coding->charbuf + coding->charbuf_used;
4300   int *charbuf_end
4301     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4302   int consumed_chars = 0, consumed_chars_base;
4303   int multibytep = coding->src_multibyte;
4304   struct charset *charset_roman, *charset_kanji, *charset_kana;
4305   struct charset *charset_kanji2;
4306   Lisp_Object attrs, charset_list, val;
4307   int char_offset = coding->produced_char;
4308   int last_offset = char_offset;
4309   int last_id = charset_ascii;
4310   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4311   int byte_after_cr = -1;
4312
4313   CODING_GET_INFO (coding, attrs, charset_list);
4314
4315   val = charset_list;
4316   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4317   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4318   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4319   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4320
4321   while (1)
4322     {
4323       int c, c1;
4324       struct charset *charset;
4325
4326       src_base = src;
4327       consumed_chars_base = consumed_chars;
4328
4329       if (charbuf >= charbuf_end)
4330         break;
4331
4332       if (byte_after_cr >= 0)
4333         c = byte_after_cr, byte_after_cr = -1;
4334       else
4335         ONE_MORE_BYTE (c);
4336       if (c < 0)
4337         goto invalid_code;
4338       if (c < 0x80)
4339         {
4340           if (eol_crlf && c == '\r')
4341             ONE_MORE_BYTE (byte_after_cr);
4342           charset = charset_roman;
4343         }
4344       else if (c == 0x80 || c == 0xA0)
4345         goto invalid_code;
4346       else if (c >= 0xA1 && c <= 0xDF)
4347         {
4348           /* SJIS -> JISX0201-Kana */
4349           c &= 0x7F;
4350           charset = charset_kana;
4351         }
4352       else if (c <= 0xEF)
4353         {
4354           /* SJIS -> JISX0208 */
4355           ONE_MORE_BYTE (c1);
4356           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4357             goto invalid_code;
4358           c = (c << 8) | c1;
4359           SJIS_TO_JIS (c);
4360           charset = charset_kanji;
4361         }
4362       else if (c <= 0xFC && charset_kanji2)
4363         {
4364           /* SJIS -> JISX0213-2 */
4365           ONE_MORE_BYTE (c1);
4366           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4367             goto invalid_code;
4368           c = (c << 8) | c1;
4369           SJIS_TO_JIS2 (c);
4370           charset = charset_kanji2;
4371         }
4372       else
4373         goto invalid_code;
4374       if (charset->id != charset_ascii
4375           && last_id != charset->id)
4376         {
4377           if (last_id != charset_ascii)
4378             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4379           last_id = charset->id;
4380           last_offset = char_offset;
4381         }
4382       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4383       *charbuf++ = c;
4384       char_offset++;
4385       continue;
4386
4387     invalid_code:
4388       src = src_base;
4389       consumed_chars = consumed_chars_base;
4390       ONE_MORE_BYTE (c);
4391       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4392       char_offset++;
4393       coding->errors++;
4394     }
4395
4396  no_more_source:
4397   if (last_id != charset_ascii)
4398     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4399   coding->consumed_char += consumed_chars_base;
4400   coding->consumed = src_base - coding->source;
4401   coding->charbuf_used = charbuf - coding->charbuf;
4402 }
4403
4404 static void
4405 decode_coding_big5 (coding)
4406      struct coding_system *coding;
4407 {
4408   const unsigned char *src = coding->source + coding->consumed;
4409   const unsigned char *src_end = coding->source + coding->src_bytes;
4410   const unsigned char *src_base;
4411   int *charbuf = coding->charbuf + coding->charbuf_used;
4412   int *charbuf_end
4413     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4414   int consumed_chars = 0, consumed_chars_base;
4415   int multibytep = coding->src_multibyte;
4416   struct charset *charset_roman, *charset_big5;
4417   Lisp_Object attrs, charset_list, val;
4418   int char_offset = coding->produced_char;
4419   int last_offset = char_offset;
4420   int last_id = charset_ascii;
4421   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4422   int byte_after_cr = -1;
4423
4424   CODING_GET_INFO (coding, attrs, charset_list);
4425   val = charset_list;
4426   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4427   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4428
4429   while (1)
4430     {
4431       int c, c1;
4432       struct charset *charset;
4433
4434       src_base = src;
4435       consumed_chars_base = consumed_chars;
4436
4437       if (charbuf >= charbuf_end)
4438         break;
4439
4440       if (byte_after_cr >= 0)
4441         c = byte_after_cr, byte_after_cr = -1;
4442       else
4443         ONE_MORE_BYTE (c);
4444
4445       if (c < 0)
4446         goto invalid_code;
4447       if (c < 0x80)
4448         {
4449           if (eol_crlf && c == '\r')
4450             ONE_MORE_BYTE (byte_after_cr);
4451           charset = charset_roman;
4452         }
4453       else
4454         {
4455           /* BIG5 -> Big5 */
4456           if (c < 0xA1 || c > 0xFE)
4457             goto invalid_code;
4458           ONE_MORE_BYTE (c1);
4459           if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4460             goto invalid_code;
4461           c = c << 8 | c1;
4462           charset = charset_big5;
4463         }
4464       if (charset->id != charset_ascii
4465           && last_id != charset->id)
4466         {
4467           if (last_id != charset_ascii)
4468             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4469           last_id = charset->id;
4470           last_offset = char_offset;
4471         }
4472       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4473       *charbuf++ = c;
4474       char_offset++;
4475       continue;
4476
4477     invalid_code:
4478       src = src_base;
4479       consumed_chars = consumed_chars_base;
4480       ONE_MORE_BYTE (c);
4481       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4482       char_offset++;
4483       coding->errors++;
4484     }
4485
4486  no_more_source:
4487   if (last_id != charset_ascii)
4488     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4489   coding->consumed_char += consumed_chars_base;
4490   coding->consumed = src_base - coding->source;
4491   coding->charbuf_used = charbuf - coding->charbuf;
4492 }
4493
4494 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4495    This function can encode charsets `ascii', `katakana-jisx0201',
4496    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
4497    are sure that all these charsets are registered as official charset
4498    (i.e. do not have extended leading-codes).  Characters of other
4499    charsets are produced without any encoding.  If SJIS_P is 1, encode
4500    SJIS text, else encode BIG5 text.  */
4501
4502 static int
4503 encode_coding_sjis (coding)
4504      struct coding_system *coding;
4505 {
4506   int multibytep = coding->dst_multibyte;
4507   int *charbuf = coding->charbuf;
4508   int *charbuf_end = charbuf + coding->charbuf_used;
4509   unsigned char *dst = coding->destination + coding->produced;
4510   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4511   int safe_room = 4;
4512   int produced_chars = 0;
4513   Lisp_Object attrs, charset_list, val;
4514   int ascii_compatible;
4515   struct charset *charset_roman, *charset_kanji, *charset_kana;
4516   struct charset *charset_kanji2;
4517   int c;
4518
4519   CODING_GET_INFO (coding, attrs, charset_list);
4520   val = charset_list;
4521   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4522   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4523   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4524   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4525
4526   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4527
4528   while (charbuf < charbuf_end)
4529     {
4530       ASSURE_DESTINATION (safe_room);
4531       c = *charbuf++;
4532       /* Now encode the character C.  */
4533       if (ASCII_CHAR_P (c) && ascii_compatible)
4534         EMIT_ONE_ASCII_BYTE (c);
4535       else if (CHAR_BYTE8_P (c))
4536         {
4537           c = CHAR_TO_BYTE8 (c);
4538           EMIT_ONE_BYTE (c);
4539         }
4540       else
4541         {
4542           unsigned code;
4543           struct charset *charset = char_charset (c, charset_list, &code);
4544
4545           if (!charset)
4546             {
4547               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4548                 {
4549                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4550                   charset = CHARSET_FROM_ID (charset_ascii);
4551                 }
4552               else
4553                 {
4554                   c = coding->default_char;
4555                   charset = char_charset (c, charset_list, &code);
4556                 }
4557             }
4558           if (code == CHARSET_INVALID_CODE (charset))
4559             abort ();
4560           if (charset == charset_kanji)
4561             {
4562               int c1, c2;
4563               JIS_TO_SJIS (code);
4564               c1 = code >> 8, c2 = code & 0xFF;
4565               EMIT_TWO_BYTES (c1, c2);
4566             }
4567           else if (charset == charset_kana)
4568             EMIT_ONE_BYTE (code | 0x80);
4569           else if (charset_kanji2 && charset == charset_kanji2)
4570             {
4571               int c1, c2;
4572
4573               c1 = code >> 8;
4574               if (c1 == 0x21 || (c1 >= 0x23 && c1 < 0x25)
4575                   || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
4576                 {
4577                   JIS_TO_SJIS2 (code);
4578                   c1 = code >> 8, c2 = code & 0xFF;
4579                   EMIT_TWO_BYTES (c1, c2);
4580                 }
4581               else
4582                 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4583             }
4584           else
4585             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4586         }
4587     }
4588   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4589   coding->produced_char += produced_chars;
4590   coding->produced = dst - coding->destination;
4591   return 0;
4592 }
4593
4594 static int
4595 encode_coding_big5 (coding)
4596      struct coding_system *coding;
4597 {
4598   int multibytep = coding->dst_multibyte;
4599   int *charbuf = coding->charbuf;
4600   int *charbuf_end = charbuf + coding->charbuf_used;
4601   unsigned char *dst = coding->destination + coding->produced;
4602   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4603   int safe_room = 4;
4604   int produced_chars = 0;
4605   Lisp_Object attrs, charset_list, val;
4606   int ascii_compatible;
4607   struct charset *charset_roman, *charset_big5;
4608   int c;
4609
4610   CODING_GET_INFO (coding, attrs, charset_list);
4611   val = charset_list;
4612   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4613   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4614   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4615
4616   while (charbuf < charbuf_end)
4617     {
4618       ASSURE_DESTINATION (safe_room);
4619       c = *charbuf++;
4620       /* Now encode the character C.  */
4621       if (ASCII_CHAR_P (c) && ascii_compatible)
4622         EMIT_ONE_ASCII_BYTE (c);
4623       else if (CHAR_BYTE8_P (c))
4624         {
4625           c = CHAR_TO_BYTE8 (c);
4626           EMIT_ONE_BYTE (c);
4627         }
4628       else
4629         {
4630           unsigned code;
4631           struct charset *charset = char_charset (c, charset_list, &code);
4632
4633           if (! charset)
4634             {
4635               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4636                 {
4637                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4638                   charset = CHARSET_FROM_ID (charset_ascii);
4639                 }
4640               else
4641                 {
4642                   c = coding->default_char;
4643                   charset = char_charset (c, charset_list, &code);
4644                 }
4645             }
4646           if (code == CHARSET_INVALID_CODE (charset))
4647             abort ();
4648           if (charset == charset_big5)
4649             {
4650               int c1, c2;
4651
4652               c1 = code >> 8, c2 = code & 0xFF;
4653               EMIT_TWO_BYTES (c1, c2);
4654             }
4655           else
4656             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4657         }
4658     }
4659   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4660   coding->produced_char += produced_chars;
4661   coding->produced = dst - coding->destination;
4662   return 0;
4663 }
4664
4665 \f
4666 /*** 10. CCL handlers ***/
4667
4668 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4669    Check if a text is encoded in a coding system of which
4670    encoder/decoder are written in CCL program.  If it is, return
4671    CATEGORY_MASK_CCL, else return 0.  */
4672
4673 static int
4674 detect_coding_ccl (coding, detect_info)
4675      struct coding_system *coding;
4676      struct coding_detection_info *detect_info;
4677 {
4678   const unsigned char *src = coding->source, *src_base;
4679   const unsigned char *src_end = coding->source + coding->src_bytes;
4680   int multibytep = coding->src_multibyte;
4681   int consumed_chars = 0;
4682   int found = 0;
4683   unsigned char *valids;
4684   int head_ascii = coding->head_ascii;
4685   Lisp_Object attrs;
4686
4687   detect_info->checked |= CATEGORY_MASK_CCL;
4688
4689   coding = &coding_categories[coding_category_ccl];
4690   valids = CODING_CCL_VALIDS (coding);
4691   attrs = CODING_ID_ATTRS (coding->id);
4692   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4693     src += head_ascii;
4694
4695   while (1)
4696     {
4697       int c;
4698
4699       src_base = src;
4700       ONE_MORE_BYTE (c);
4701       if (c < 0 || ! valids[c])
4702         break;
4703       if ((valids[c] > 1))
4704         found = CATEGORY_MASK_CCL;
4705     }
4706   detect_info->rejected |= CATEGORY_MASK_CCL;
4707   return 0;
4708
4709  no_more_source:
4710   detect_info->found |= found;
4711   return 1;
4712 }
4713
4714 static void
4715 decode_coding_ccl (coding)
4716      struct coding_system *coding;
4717 {
4718   const unsigned char *src = coding->source + coding->consumed;
4719   const unsigned char *src_end = coding->source + coding->src_bytes;
4720   int *charbuf = coding->charbuf + coding->charbuf_used;
4721   int *charbuf_end = coding->charbuf + coding->charbuf_size;
4722   int consumed_chars = 0;
4723   int multibytep = coding->src_multibyte;
4724   struct ccl_program ccl;
4725   int source_charbuf[1024];
4726   int source_byteidx[1024];
4727   Lisp_Object attrs, charset_list;
4728
4729   CODING_GET_INFO (coding, attrs, charset_list);
4730   setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4731
4732   while (src < src_end)
4733     {
4734       const unsigned char *p = src;
4735       int *source, *source_end;
4736       int i = 0;
4737
4738       if (multibytep)
4739         while (i < 1024 && p < src_end)
4740           {
4741             source_byteidx[i] = p - src;
4742             source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4743           }
4744       else
4745         while (i < 1024 && p < src_end)
4746           source_charbuf[i++] = *p++;
4747
4748       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4749         ccl.last_block = 1;
4750
4751       source = source_charbuf;
4752       source_end = source + i;
4753       while (source < source_end)
4754         {
4755           ccl_driver (&ccl, source, charbuf,
4756                       source_end - source, charbuf_end - charbuf,
4757                       charset_list);
4758           source += ccl.consumed;
4759           charbuf += ccl.produced;
4760           if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4761             break;
4762         }
4763       if (source < source_end)
4764         src += source_byteidx[source - source_charbuf];
4765       else
4766         src = p;
4767       consumed_chars += source - source_charbuf;
4768
4769       if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4770           && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4771         break;
4772     }
4773
4774   switch (ccl.status)
4775     {
4776     case CCL_STAT_SUSPEND_BY_SRC:
4777       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4778       break;
4779     case CCL_STAT_SUSPEND_BY_DST:
4780       break;
4781     case CCL_STAT_QUIT:
4782     case CCL_STAT_INVALID_CMD:
4783       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
4784       break;
4785     default:
4786       record_conversion_result (coding, CODING_RESULT_SUCCESS);
4787       break;
4788     }
4789   coding->consumed_char += consumed_chars;
4790   coding->consumed = src - coding->source;
4791   coding->charbuf_used = charbuf - coding->charbuf;
4792 }
4793
4794 static int
4795 encode_coding_ccl (coding)
4796      struct coding_system *coding;
4797 {
4798   struct ccl_program ccl;
4799   int multibytep = coding->dst_multibyte;
4800   int *charbuf = coding->charbuf;
4801   int *charbuf_end = charbuf + coding->charbuf_used;
4802   unsigned char *dst = coding->destination + coding->produced;
4803   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4804   int destination_charbuf[1024];
4805   int i, produced_chars = 0;
4806   Lisp_Object attrs, charset_list;
4807
4808   CODING_GET_INFO (coding, attrs, charset_list);
4809   setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4810
4811   ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4812   ccl.dst_multibyte = coding->dst_multibyte;
4813
4814   while (charbuf < charbuf_end)
4815     {
4816       ccl_driver (&ccl, charbuf, destination_charbuf,
4817                   charbuf_end - charbuf, 1024, charset_list);
4818       if (multibytep)
4819         {
4820           ASSURE_DESTINATION (ccl.produced * 2);
4821           for (i = 0; i < ccl.produced; i++)
4822             EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4823         }
4824       else
4825         {
4826           ASSURE_DESTINATION (ccl.produced);
4827           for (i = 0; i < ccl.produced; i++)
4828             *dst++ = destination_charbuf[i] & 0xFF;
4829           produced_chars += ccl.produced;
4830         }
4831       charbuf += ccl.consumed;
4832       if (ccl.status == CCL_STAT_QUIT
4833           || ccl.status == CCL_STAT_INVALID_CMD)
4834         break;
4835     }
4836
4837   switch (ccl.status)
4838     {
4839     case CCL_STAT_SUSPEND_BY_SRC:
4840       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4841       break;
4842     case CCL_STAT_SUSPEND_BY_DST:
4843       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
4844       break;
4845     case CCL_STAT_QUIT:
4846     case CCL_STAT_INVALID_CMD:
4847       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
4848       break;
4849     default:
4850       record_conversion_result (coding, CODING_RESULT_SUCCESS);
4851       break;
4852     }
4853
4854   coding->produced_char += produced_chars;
4855   coding->produced = dst - coding->destination;
4856   return 0;
4857 }
4858
4859
4860 \f
4861 /*** 10, 11. no-conversion handlers ***/
4862
4863 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
4864
4865 static void
4866 decode_coding_raw_text (coding)
4867      struct coding_system *coding;
4868 {
4869   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4870
4871   coding->chars_at_source = 1;
4872   coding->consumed_char = coding->src_chars;
4873   coding->consumed = coding->src_bytes;
4874   if (eol_crlf && coding->source[coding->src_bytes - 1] == '\r')
4875     {
4876       coding->consumed_char--;
4877       coding->consumed--;
4878       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4879     }
4880   else
4881     record_conversion_result (coding, CODING_RESULT_SUCCESS);
4882 }
4883
4884 static int
4885 encode_coding_raw_text (coding)
4886      struct coding_system *coding;
4887 {
4888   int multibytep = coding->dst_multibyte;
4889   int *charbuf = coding->charbuf;
4890   int *charbuf_end = coding->charbuf + coding->charbuf_used;
4891   unsigned char *dst = coding->destination + coding->produced;
4892   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4893   int produced_chars = 0;
4894   int c;
4895
4896   if (multibytep)
4897     {
4898       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4899
4900       if (coding->src_multibyte)
4901         while (charbuf < charbuf_end)
4902           {
4903             ASSURE_DESTINATION (safe_room);
4904             c = *charbuf++;
4905             if (ASCII_CHAR_P (c))
4906               EMIT_ONE_ASCII_BYTE (c);
4907             else if (CHAR_BYTE8_P (c))
4908               {
4909                 c = CHAR_TO_BYTE8 (c);
4910                 EMIT_ONE_BYTE (c);
4911               }
4912             else
4913               {
4914                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
4915
4916                 CHAR_STRING_ADVANCE (c, p1);
4917                 while (p0 < p1)
4918                   {
4919                     EMIT_ONE_BYTE (*p0);
4920                     p0++;
4921                   }
4922               }
4923           }
4924       else
4925         while (charbuf < charbuf_end)
4926           {
4927             ASSURE_DESTINATION (safe_room);
4928             c = *charbuf++;
4929             EMIT_ONE_BYTE (c);
4930           }
4931     }
4932   else
4933     {
4934       if (coding->src_multibyte)
4935         {
4936           int safe_room = MAX_MULTIBYTE_LENGTH;
4937
4938           while (charbuf < charbuf_end)
4939             {
4940               ASSURE_DESTINATION (safe_room);
4941               c = *charbuf++;
4942               if (ASCII_CHAR_P (c))
4943                 *dst++ = c;
4944               else if (CHAR_BYTE8_P (c))
4945                 *dst++ = CHAR_TO_BYTE8 (c);
4946               else
4947                 CHAR_STRING_ADVANCE (c, dst);
4948             }
4949         }
4950       else
4951         {
4952           ASSURE_DESTINATION (charbuf_end - charbuf);
4953           while (charbuf < charbuf_end && dst < dst_end)
4954             *dst++ = *charbuf++;
4955         }
4956       produced_chars = dst - (coding->destination + coding->produced);
4957     }
4958   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4959   coding->produced_char += produced_chars;
4960   coding->produced = dst - coding->destination;
4961   return 0;
4962 }
4963
4964 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4965    Check if a text is encoded in a charset-based coding system.  If it
4966    is, return 1, else return 0.  */
4967
4968 static int
4969 detect_coding_charset (coding, detect_info)
4970      struct coding_system *coding;
4971      struct coding_detection_info *detect_info;
4972 {
4973   const unsigned char *src = coding->source, *src_base;
4974   const unsigned char *src_end = coding->source + coding->src_bytes;
4975   int multibytep = coding->src_multibyte;
4976   int consumed_chars = 0;
4977   Lisp_Object attrs, valids;
4978   int found = 0;
4979   int head_ascii = coding->head_ascii;
4980
4981   detect_info->checked |= CATEGORY_MASK_CHARSET;
4982
4983   coding = &coding_categories[coding_category_charset];
4984   attrs = CODING_ID_ATTRS (coding->id);
4985   valids = AREF (attrs, coding_attr_charset_valids);
4986
4987   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4988     src += head_ascii;
4989
4990   while (1)
4991     {
4992       int c;
4993       Lisp_Object val;
4994       struct charset *charset;
4995       int dim, idx;
4996
4997       src_base = src;
4998       ONE_MORE_BYTE (c);
4999       if (c < 0)
5000         continue;
5001       val = AREF (valids, c);
5002       if (NILP (val))
5003         break;
5004       if (c >= 0x80)
5005         found = CATEGORY_MASK_CHARSET;
5006       if (INTEGERP (val))
5007         {
5008           charset = CHARSET_FROM_ID (XFASTINT (val));
5009           dim = CHARSET_DIMENSION (charset);
5010           for (idx = 1; idx < dim; idx++)
5011             {
5012               if (src == src_end)
5013                 goto too_short;
5014               ONE_MORE_BYTE (c);
5015               if (c < charset->code_space[(dim - 1 - idx) * 2]
5016                   || c > charset->code_space[(dim - 1 - idx) * 2 + 1])
5017                 break;
5018             }
5019           if (idx < dim)
5020             break;
5021         }
5022       else
5023         {
5024           idx = 1;
5025           for (; CONSP (val); val = XCDR (val))
5026             {
5027               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5028               dim = CHARSET_DIMENSION (charset);
5029               while (idx < dim)
5030                 {
5031                   if (src == src_end)
5032                     goto too_short;
5033                   ONE_MORE_BYTE (c);
5034                   if (c < charset->code_space[(dim - 1 - idx) * 4]
5035                       || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
5036                     break;
5037                   idx++;
5038                 }
5039               if (idx == dim)
5040                 {
5041                   val = Qnil;
5042                   break;
5043                 }
5044             }
5045           if (CONSP (val))
5046             break;
5047         }
5048     }
5049  too_short:
5050   detect_info->rejected |= CATEGORY_MASK_CHARSET;
5051   return 0;
5052
5053  no_more_source:
5054   detect_info->found |= found;
5055   return 1;
5056 }
5057
5058 static void
5059 decode_coding_charset (coding)
5060      struct coding_system *coding;
5061 {
5062   const unsigned char *src = coding->source + coding->consumed;
5063   const unsigned char *src_end = coding->source + coding->src_bytes;
5064   const unsigned char *src_base;
5065   int *charbuf = coding->charbuf + coding->charbuf_used;
5066   int *charbuf_end
5067     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
5068   int consumed_chars = 0, consumed_chars_base;
5069   int multibytep = coding->src_multibyte;
5070   Lisp_Object attrs, charset_list, valids;
5071   int char_offset = coding->produced_char;
5072   int last_offset = char_offset;
5073   int last_id = charset_ascii;
5074   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5075   int byte_after_cr = -1;
5076
5077   CODING_GET_INFO (coding, attrs, charset_list);
5078   valids = AREF (attrs, coding_attr_charset_valids);
5079
5080   while (1)
5081     {
5082       int c;
5083       Lisp_Object val;
5084       struct charset *charset;
5085       int dim;
5086       int len = 1;
5087       unsigned code;
5088
5089       src_base = src;
5090       consumed_chars_base = consumed_chars;
5091
5092       if (charbuf >= charbuf_end)
5093         break;
5094
5095       if (byte_after_cr >= 0)
5096         {
5097           c = byte_after_cr;
5098           byte_after_cr = -1;
5099         }
5100       else
5101         {
5102           ONE_MORE_BYTE (c);
5103           if (eol_crlf && c == '\r')
5104             ONE_MORE_BYTE (byte_after_cr);
5105         }
5106       if (c < 0)
5107         goto invalid_code;
5108       code = c;
5109
5110       val = AREF (valids, c);
5111       if (NILP (val))
5112         goto invalid_code;
5113       if (INTEGERP (val))
5114         {
5115           charset = CHARSET_FROM_ID (XFASTINT (val));
5116           dim = CHARSET_DIMENSION (charset);
5117           while (len < dim)
5118             {
5119               ONE_MORE_BYTE (c);
5120               code = (code << 8) | c;
5121               len++;
5122             }
5123           CODING_DECODE_CHAR (coding, src, src_base, src_end,
5124                               charset, code, c);
5125         }
5126       else
5127         {
5128           /* VAL is a list of charset IDs.  It is assured that the
5129              list is sorted by charset dimensions (smaller one
5130              comes first).  */
5131           while (CONSP (val))
5132             {
5133               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5134               dim = CHARSET_DIMENSION (charset);
5135               while (len < dim)
5136                 {
5137                   ONE_MORE_BYTE (c);
5138                   code = (code << 8) | c;
5139                   len++;
5140                 }
5141               CODING_DECODE_CHAR (coding, src, src_base,
5142                                   src_end, charset, code, c);
5143               if (c >= 0)
5144                 break;
5145               val = XCDR (val);
5146             }
5147         }
5148       if (c < 0)
5149         goto invalid_code;
5150       if (charset->id != charset_ascii
5151           && last_id != charset->id)
5152         {
5153           if (last_id != charset_ascii)
5154             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5155           last_id = charset->id;
5156           last_offset = char_offset;
5157         }
5158
5159       *charbuf++ = c;
5160       char_offset++;
5161       continue;
5162
5163     invalid_code:
5164       src = src_base;
5165       consumed_chars = consumed_chars_base;
5166       ONE_MORE_BYTE (c);
5167       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
5168       char_offset++;
5169       coding->errors++;
5170     }
5171
5172  no_more_source:
5173   if (last_id != charset_ascii)
5174     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5175   coding->consumed_char += consumed_chars_base;
5176   coding->consumed = src_base - coding->source;
5177   coding->charbuf_used = charbuf - coding->charbuf;
5178 }
5179
5180 static int
5181 encode_coding_charset (coding)
5182      struct coding_system *coding;
5183 {
5184   int multibytep = coding->dst_multibyte;
5185   int *charbuf = coding->charbuf;
5186   int *charbuf_end = charbuf + coding->charbuf_used;
5187   unsigned char *dst = coding->destination + coding->produced;
5188   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5189   int safe_room = MAX_MULTIBYTE_LENGTH;
5190   int produced_chars = 0;
5191   Lisp_Object attrs, charset_list;
5192   int ascii_compatible;
5193   int c;
5194
5195   CODING_GET_INFO (coding, attrs, charset_list);
5196   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5197
5198   while (charbuf < charbuf_end)
5199     {
5200       struct charset *charset;
5201       unsigned code;
5202
5203       ASSURE_DESTINATION (safe_room);
5204       c = *charbuf++;
5205       if (ascii_compatible && ASCII_CHAR_P (c))
5206         EMIT_ONE_ASCII_BYTE (c);
5207       else if (CHAR_BYTE8_P (c))
5208         {
5209           c = CHAR_TO_BYTE8 (c);
5210           EMIT_ONE_BYTE (c);
5211         }
5212       else
5213         {
5214           charset = char_charset (c, charset_list, &code);
5215           if (charset)
5216             {
5217               if (CHARSET_DIMENSION (charset) == 1)
5218                 EMIT_ONE_BYTE (code);
5219               else if (CHARSET_DIMENSION (charset) == 2)
5220                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
5221               else if (CHARSET_DIMENSION (charset) == 3)
5222                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
5223               else
5224                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
5225                                  (code >> 8) & 0xFF, code & 0xFF);
5226             }
5227           else
5228             {
5229               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5230                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5231               else
5232                 c = coding->default_char;
5233               EMIT_ONE_BYTE (c);
5234             }
5235         }
5236     }
5237
5238   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5239   coding->produced_char += produced_chars;
5240   coding->produced = dst - coding->destination;
5241   return 0;
5242 }
5243
5244 \f
5245 /*** 7. C library functions ***/
5246
5247 /* Setup coding context CODING from information about CODING_SYSTEM.
5248    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
5249    CODING_SYSTEM is invalid, signal an error.  */
5250
5251 void
5252 setup_coding_system (coding_system, coding)
5253      Lisp_Object coding_system;
5254      struct coding_system *coding;
5255 {
5256   Lisp_Object attrs;
5257   Lisp_Object eol_type;
5258   Lisp_Object coding_type;
5259   Lisp_Object val;
5260
5261   if (NILP (coding_system))
5262     coding_system = Qundecided;
5263
5264   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
5265
5266   attrs = CODING_ID_ATTRS (coding->id);
5267   eol_type = CODING_ID_EOL_TYPE (coding->id);
5268
5269   coding->mode = 0;
5270   coding->head_ascii = -1;
5271   if (VECTORP (eol_type))
5272     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5273                             | CODING_REQUIRE_DETECTION_MASK);
5274   else if (! EQ (eol_type, Qunix))
5275     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5276                             | CODING_REQUIRE_ENCODING_MASK);
5277   else
5278     coding->common_flags = 0;
5279   if (! NILP (CODING_ATTR_POST_READ (attrs)))
5280     coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5281   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5282     coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5283   if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
5284     coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
5285
5286   val = CODING_ATTR_SAFE_CHARSETS (attrs);
5287   coding->max_charset_id = SCHARS (val) - 1;
5288   coding->safe_charsets = (char *) SDATA (val);
5289   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
5290
5291   coding_type = CODING_ATTR_TYPE (attrs);
5292   if (EQ (coding_type, Qundecided))
5293     {
5294       coding->detector = NULL;
5295       coding->decoder = decode_coding_raw_text;
5296       coding->encoder = encode_coding_raw_text;
5297       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5298     }
5299   else if (EQ (coding_type, Qiso_2022))
5300     {
5301       int i;
5302       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5303
5304       /* Invoke graphic register 0 to plane 0.  */
5305       CODING_ISO_INVOCATION (coding, 0) = 0;
5306       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
5307       CODING_ISO_INVOCATION (coding, 1)
5308         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
5309       /* Setup the initial status of designation.  */
5310       for (i = 0; i < 4; i++)
5311         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
5312       /* Not single shifting initially.  */
5313       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
5314       /* Beginning of buffer should also be regarded as bol. */
5315       CODING_ISO_BOL (coding) = 1;
5316       coding->detector = detect_coding_iso_2022;
5317       coding->decoder = decode_coding_iso_2022;
5318       coding->encoder = encode_coding_iso_2022;
5319       if (flags & CODING_ISO_FLAG_SAFE)
5320         coding->mode |= CODING_MODE_SAFE_ENCODING;
5321       coding->common_flags
5322         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5323             | CODING_REQUIRE_FLUSHING_MASK);
5324       if (flags & CODING_ISO_FLAG_COMPOSITION)
5325         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
5326       if (flags & CODING_ISO_FLAG_DESIGNATION)
5327         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
5328       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5329         {
5330           setup_iso_safe_charsets (attrs);
5331           val = CODING_ATTR_SAFE_CHARSETS (attrs);
5332           coding->max_charset_id = SCHARS (val) - 1;
5333           coding->safe_charsets = (char *) SDATA (val);
5334         }
5335       CODING_ISO_FLAGS (coding) = flags;
5336     }
5337   else if (EQ (coding_type, Qcharset))
5338     {
5339       coding->detector = detect_coding_charset;
5340       coding->decoder = decode_coding_charset;
5341       coding->encoder = encode_coding_charset;
5342       coding->common_flags
5343         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5344     }
5345   else if (EQ (coding_type, Qutf_8))
5346     {
5347       val = AREF (attrs, coding_attr_utf_bom);
5348       CODING_UTF_8_BOM (coding) = (CONSP (val) ? utf_detect_bom
5349                                    : EQ (val, Qt) ? utf_with_bom
5350                                    : utf_without_bom);
5351       coding->detector = detect_coding_utf_8;
5352       coding->decoder = decode_coding_utf_8;
5353       coding->encoder = encode_coding_utf_8;
5354       coding->common_flags
5355         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5356       if (CODING_UTF_8_BOM (coding) == utf_detect_bom)
5357         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5358     }
5359   else if (EQ (coding_type, Qutf_16))
5360     {
5361       val = AREF (attrs, coding_attr_utf_bom);
5362       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_detect_bom
5363                                     : EQ (val, Qt) ? utf_with_bom
5364                                     : utf_without_bom);
5365       val = AREF (attrs, coding_attr_utf_16_endian);
5366       CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
5367                                        : utf_16_little_endian);
5368       CODING_UTF_16_SURROGATE (coding) = 0;
5369       coding->detector = detect_coding_utf_16;
5370       coding->decoder = decode_coding_utf_16;
5371       coding->encoder = encode_coding_utf_16;
5372       coding->common_flags
5373         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5374       if (CODING_UTF_16_BOM (coding) == utf_detect_bom)
5375         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5376     }
5377   else if (EQ (coding_type, Qccl))
5378     {
5379       coding->detector = detect_coding_ccl;
5380       coding->decoder = decode_coding_ccl;
5381       coding->encoder = encode_coding_ccl;
5382       coding->common_flags
5383         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5384             | CODING_REQUIRE_FLUSHING_MASK);
5385     }
5386   else if (EQ (coding_type, Qemacs_mule))
5387     {
5388       coding->detector = detect_coding_emacs_mule;
5389       coding->decoder = decode_coding_emacs_mule;
5390       coding->encoder = encode_coding_emacs_mule;
5391       coding->common_flags
5392         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5393       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5394           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5395         {
5396           Lisp_Object tail, safe_charsets;
5397           int max_charset_id = 0;
5398
5399           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5400                tail = XCDR (tail))
5401             if (max_charset_id < XFASTINT (XCAR (tail)))
5402               max_charset_id = XFASTINT (XCAR (tail));
5403           safe_charsets = Fmake_string (make_number (max_charset_id + 1),
5404                                         make_number (255));
5405           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5406                tail = XCDR (tail))
5407             SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
5408           coding->max_charset_id = max_charset_id;
5409           coding->safe_charsets = (char *) SDATA (safe_charsets);
5410         }
5411     }
5412   else if (EQ (coding_type, Qshift_jis))
5413     {
5414       coding->detector = detect_coding_sjis;
5415       coding->decoder = decode_coding_sjis;
5416       coding->encoder = encode_coding_sjis;
5417       coding->common_flags
5418         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5419     }
5420   else if (EQ (coding_type, Qbig5))
5421     {
5422       coding->detector = detect_coding_big5;
5423       coding->decoder = decode_coding_big5;
5424       coding->encoder = encode_coding_big5;
5425       coding->common_flags
5426         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5427     }
5428   else                          /* EQ (coding_type, Qraw_text) */
5429     {
5430       coding->detector = NULL;
5431       coding->decoder = decode_coding_raw_text;
5432       coding->encoder = encode_coding_raw_text;
5433       if (! EQ (eol_type, Qunix))
5434         {
5435           coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5436           if (! VECTORP (eol_type))
5437             coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5438         }
5439
5440     }
5441
5442   return;
5443 }
5444
5445 /* Return a list of charsets supported by CODING.  */
5446
5447 Lisp_Object
5448 coding_charset_list (coding)
5449      struct coding_system *coding;
5450 {
5451   Lisp_Object attrs, charset_list;
5452
5453   CODING_GET_INFO (coding, attrs, charset_list);
5454   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5455     {
5456       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5457
5458       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5459         charset_list = Viso_2022_charset_list;
5460     }
5461   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5462     {
5463       charset_list = Vemacs_mule_charset_list;
5464     }
5465   return charset_list;
5466 }
5467
5468
5469 /* Return raw-text or one of its subsidiaries that has the same
5470    eol_type as CODING-SYSTEM.  */
5471
5472 Lisp_Object
5473 raw_text_coding_system (coding_system)
5474      Lisp_Object coding_system;
5475 {
5476   Lisp_Object spec, attrs;
5477   Lisp_Object eol_type, raw_text_eol_type;
5478
5479   if (NILP (coding_system))
5480     return Qraw_text;
5481   spec = CODING_SYSTEM_SPEC (coding_system);
5482   attrs = AREF (spec, 0);
5483
5484   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
5485     return coding_system;
5486
5487   eol_type = AREF (spec, 2);
5488   if (VECTORP (eol_type))
5489     return Qraw_text;
5490   spec = CODING_SYSTEM_SPEC (Qraw_text);
5491   raw_text_eol_type = AREF (spec, 2);
5492   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
5493           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
5494           : AREF (raw_text_eol_type, 2));
5495 }
5496
5497
5498 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
5499    does, return one of the subsidiary that has the same eol-spec as
5500    PARENT.  Otherwise, return CODING_SYSTEM.  If PARENT is nil,
5501    inherit end-of-line format from the system's setting
5502    (system_eol_type).  */
5503
5504 Lisp_Object
5505 coding_inherit_eol_type (coding_system, parent)
5506      Lisp_Object coding_system, parent;
5507 {
5508   Lisp_Object spec, eol_type;
5509
5510   if (NILP (coding_system))
5511     coding_system = Qraw_text;
5512   spec = CODING_SYSTEM_SPEC (coding_system);
5513   eol_type = AREF (spec, 2);
5514   if (VECTORP (eol_type))
5515     {
5516       Lisp_Object parent_eol_type;
5517
5518       if (! NILP (parent))
5519         {
5520           Lisp_Object parent_spec;
5521
5522           parent_spec = CODING_SYSTEM_SPEC (parent);
5523           parent_eol_type = AREF (parent_spec, 2);
5524         }
5525       else
5526         parent_eol_type = system_eol_type;
5527       if (EQ (parent_eol_type, Qunix))
5528         coding_system = AREF (eol_type, 0);
5529       else if (EQ (parent_eol_type, Qdos))
5530         coding_system = AREF (eol_type, 1);
5531       else if (EQ (parent_eol_type, Qmac))
5532         coding_system = AREF (eol_type, 2);
5533     }
5534   return coding_system;
5535 }
5536
5537 /* Emacs has a mechanism to automatically detect a coding system if it
5538    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
5539    it's impossible to distinguish some coding systems accurately
5540    because they use the same range of codes.  So, at first, coding
5541    systems are categorized into 7, those are:
5542
5543    o coding-category-emacs-mule
5544
5545         The category for a coding system which has the same code range
5546         as Emacs' internal format.  Assigned the coding-system (Lisp
5547         symbol) `emacs-mule' by default.
5548
5549    o coding-category-sjis
5550
5551         The category for a coding system which has the same code range
5552         as SJIS.  Assigned the coding-system (Lisp
5553         symbol) `japanese-shift-jis' by default.
5554
5555    o coding-category-iso-7
5556
5557         The category for a coding system which has the same code range
5558         as ISO2022 of 7-bit environment.  This doesn't use any locking
5559         shift and single shift functions.  This can encode/decode all
5560         charsets.  Assigned the coding-system (Lisp symbol)
5561         `iso-2022-7bit' by default.
5562
5563    o coding-category-iso-7-tight
5564
5565         Same as coding-category-iso-7 except that this can
5566         encode/decode only the specified charsets.
5567
5568    o coding-category-iso-8-1
5569
5570         The category for a coding system which has the same code range
5571         as ISO2022 of 8-bit environment and graphic plane 1 used only
5572         for DIMENSION1 charset.  This doesn't use any locking shift
5573         and single shift functions.  Assigned the coding-system (Lisp
5574         symbol) `iso-latin-1' by default.
5575
5576    o coding-category-iso-8-2
5577
5578         The category for a coding system which has the same code range
5579         as ISO2022 of 8-bit environment and graphic plane 1 used only
5580         for DIMENSION2 charset.  This doesn't use any locking shift
5581         and single shift functions.  Assigned the coding-system (Lisp
5582         symbol) `japanese-iso-8bit' by default.
5583
5584    o coding-category-iso-7-else
5585
5586         The category for a coding system which has the same code range
5587         as ISO2022 of 7-bit environemnt but uses locking shift or
5588         single shift functions.  Assigned the coding-system (Lisp
5589         symbol) `iso-2022-7bit-lock' by default.
5590
5591    o coding-category-iso-8-else
5592
5593         The category for a coding system which has the same code range
5594         as ISO2022 of 8-bit environemnt but uses locking shift or
5595         single shift functions.  Assigned the coding-system (Lisp
5596         symbol) `iso-2022-8bit-ss2' by default.
5597
5598    o coding-category-big5
5599
5600         The category for a coding system which has the same code range
5601         as BIG5.  Assigned the coding-system (Lisp symbol)
5602         `cn-big5' by default.
5603
5604    o coding-category-utf-8
5605
5606         The category for a coding system which has the same code range
5607         as UTF-8 (cf. RFC3629).  Assigned the coding-system (Lisp
5608         symbol) `utf-8' by default.
5609
5610    o coding-category-utf-16-be
5611
5612         The category for a coding system in which a text has an
5613         Unicode signature (cf. Unicode Standard) in the order of BIG
5614         endian at the head.  Assigned the coding-system (Lisp symbol)
5615         `utf-16-be' by default.
5616
5617    o coding-category-utf-16-le
5618
5619         The category for a coding system in which a text has an
5620         Unicode signature (cf. Unicode Standard) in the order of
5621         LITTLE endian at the head.  Assigned the coding-system (Lisp
5622         symbol) `utf-16-le' by default.
5623
5624    o coding-category-ccl
5625
5626         The category for a coding system of which encoder/decoder is
5627         written in CCL programs.  The default value is nil, i.e., no
5628         coding system is assigned.
5629
5630    o coding-category-binary
5631
5632         The category for a coding system not categorized in any of the
5633         above.  Assigned the coding-system (Lisp symbol)
5634         `no-conversion' by default.
5635
5636    Each of them is a Lisp symbol and the value is an actual
5637    `coding-system's (this is also a Lisp symbol) assigned by a user.
5638    What Emacs does actually is to detect a category of coding system.
5639    Then, it uses a `coding-system' assigned to it.  If Emacs can't
5640    decide only one possible category, it selects a category of the
5641    highest priority.  Priorities of categories are also specified by a
5642    user in a Lisp variable `coding-category-list'.
5643
5644 */
5645
5646 #define EOL_SEEN_NONE   0
5647 #define EOL_SEEN_LF     1
5648 #define EOL_SEEN_CR     2
5649 #define EOL_SEEN_CRLF   4
5650
5651 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
5652    SOURCE is encoded.  If CATEGORY is one of
5653    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5654    two-byte, else they are encoded by one-byte.
5655
5656    Return one of EOL_SEEN_XXX.  */
5657
5658 #define MAX_EOL_CHECK_COUNT 3
5659
5660 static int
5661 detect_eol (source, src_bytes, category)
5662      const unsigned char *source;
5663      EMACS_INT src_bytes;
5664      enum coding_category category;
5665 {
5666   const unsigned char *src = source, *src_end = src + src_bytes;
5667   unsigned char c;
5668   int total  = 0;
5669   int eol_seen = EOL_SEEN_NONE;
5670
5671   if ((1 << category) & CATEGORY_MASK_UTF_16)
5672     {
5673       int msb, lsb;
5674
5675       msb = category == (coding_category_utf_16_le
5676                          | coding_category_utf_16_le_nosig);
5677       lsb = 1 - msb;
5678
5679       while (src + 1 < src_end)
5680         {
5681           c = src[lsb];
5682           if (src[msb] == 0 && (c == '\n' || c == '\r'))
5683             {
5684               int this_eol;
5685
5686               if (c == '\n')
5687                 this_eol = EOL_SEEN_LF;
5688               else if (src + 3 >= src_end
5689                        || src[msb + 2] != 0
5690                        || src[lsb + 2] != '\n')
5691                 this_eol = EOL_SEEN_CR;
5692               else
5693                 this_eol = EOL_SEEN_CRLF;
5694
5695               if (eol_seen == EOL_SEEN_NONE)
5696                 /* This is the first end-of-line.  */
5697                 eol_seen = this_eol;
5698               else if (eol_seen != this_eol)
5699                 {
5700                   /* The found type is different from what found before.  */
5701                   eol_seen = EOL_SEEN_LF;
5702                   break;
5703                 }
5704               if (++total == MAX_EOL_CHECK_COUNT)
5705                 break;
5706             }
5707           src += 2;
5708         }
5709     }
5710   else
5711     {
5712       while (src < src_end)
5713         {
5714           c = *src++;
5715           if (c == '\n' || c == '\r')
5716             {
5717               int this_eol;
5718
5719               if (c == '\n')
5720                 this_eol = EOL_SEEN_LF;
5721               else if (src >= src_end || *src != '\n')
5722                 this_eol = EOL_SEEN_CR;
5723               else
5724                 this_eol = EOL_SEEN_CRLF, src++;
5725
5726               if (eol_seen == EOL_SEEN_NONE)
5727                 /* This is the first end-of-line.  */
5728                 eol_seen = this_eol;
5729               else if (eol_seen != this_eol)
5730                 {
5731                   /* The found type is different from what found before.  */
5732                   eol_seen = EOL_SEEN_LF;
5733                   break;
5734                 }
5735               if (++total == MAX_EOL_CHECK_COUNT)
5736                 break;
5737             }
5738         }
5739     }
5740   return eol_seen;
5741 }
5742
5743
5744 static Lisp_Object
5745 adjust_coding_eol_type (coding, eol_seen)
5746      struct coding_system *coding;
5747      int eol_seen;
5748 {
5749   Lisp_Object eol_type;
5750
5751   eol_type = CODING_ID_EOL_TYPE (coding->id);
5752   if (eol_seen & EOL_SEEN_LF)
5753     {
5754       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
5755       eol_type = Qunix;
5756     }
5757   else if (eol_seen & EOL_SEEN_CRLF)
5758     {
5759       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
5760       eol_type = Qdos;
5761     }
5762   else if (eol_seen & EOL_SEEN_CR)
5763     {
5764       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
5765       eol_type = Qmac;
5766     }
5767   return eol_type;
5768 }
5769
5770 /* Detect how a text specified in CODING is encoded.  If a coding
5771    system is detected, update fields of CODING by the detected coding
5772    system.  */
5773
5774 void
5775 detect_coding (coding)
5776      struct coding_system *coding;
5777 {
5778   const unsigned char *src, *src_end;
5779
5780   coding->consumed = coding->consumed_char = 0;
5781   coding->produced = coding->produced_char = 0;
5782   coding_set_source (coding);
5783
5784   src_end = coding->source + coding->src_bytes;
5785   coding->head_ascii = 0;
5786
5787   /* If we have not yet decided the text encoding type, detect it
5788      now.  */
5789   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
5790     {
5791       int c, i;
5792       struct coding_detection_info detect_info;
5793       int null_byte_found = 0, eight_bit_found = 0;
5794
5795       detect_info.checked = detect_info.found = detect_info.rejected = 0;
5796       for (src = coding->source; src < src_end; src++)
5797         {
5798           c = *src;
5799           if (c & 0x80)
5800             {
5801               eight_bit_found = 1;
5802               if (null_byte_found)
5803                 break;
5804             }
5805           else if (c < 0x20)
5806             {
5807               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
5808                   && ! inhibit_iso_escape_detection
5809                   && ! detect_info.checked)
5810                 {
5811                   if (detect_coding_iso_2022 (coding, &detect_info))
5812                     {
5813                       /* We have scanned the whole data.  */
5814                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
5815                         {
5816                           /* We didn't find an 8-bit code.  We may
5817                              have found a null-byte, but it's very
5818                              rare that a binary file confirm to
5819                              ISO-2022.  */
5820                           src = src_end;
5821                           coding->head_ascii = src - coding->source;
5822                         }
5823                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
5824                       break;
5825                     }
5826                 }
5827               else if (! c)
5828                 {
5829                   null_byte_found = 1;
5830                   if (eight_bit_found)
5831                     break;
5832                 }
5833               coding->head_ascii++;
5834             }
5835           else
5836             coding->head_ascii++;
5837         }
5838
5839       if (null_byte_found || eight_bit_found
5840           || coding->head_ascii < coding->src_bytes
5841           || detect_info.found)
5842         {
5843           enum coding_category category;
5844           struct coding_system *this;
5845
5846           if (coding->head_ascii == coding->src_bytes)
5847             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
5848             for (i = 0; i < coding_category_raw_text; i++)
5849               {
5850                 category = coding_priorities[i];
5851                 this = coding_categories + category;
5852                 if (detect_info.found & (1 << category))
5853                   break;
5854               }
5855           else
5856             {
5857               if (null_byte_found)
5858                 {
5859                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
5860                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
5861                 }
5862               for (i = 0; i < coding_category_raw_text; i++)
5863                 {
5864                   category = coding_priorities[i];
5865                   this = coding_categories + category;
5866                   if (this->id < 0)
5867                     {
5868                       /* No coding system of this category is defined.  */
5869                       detect_info.rejected |= (1 << category);
5870                     }
5871                   else if (category >= coding_category_raw_text)
5872                     continue;
5873                   else if (detect_info.checked & (1 << category))
5874                     {
5875                       if (detect_info.found & (1 << category))
5876                         break;
5877                     }
5878                   else if ((*(this->detector)) (coding, &detect_info)
5879                            && detect_info.found & (1 << category))
5880                     {
5881                       if (category == coding_category_utf_16_auto)
5882                         {
5883                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5884                             category = coding_category_utf_16_le;
5885                           else
5886                             category = coding_category_utf_16_be;
5887                         }
5888                       break;
5889                     }
5890                 }
5891             }
5892
5893           if (i < coding_category_raw_text)
5894             setup_coding_system (CODING_ID_NAME (this->id), coding);
5895           else if (null_byte_found)
5896             setup_coding_system (Qno_conversion, coding);
5897           else if ((detect_info.rejected & CATEGORY_MASK_ANY)
5898                    == CATEGORY_MASK_ANY)
5899             setup_coding_system (Qraw_text, coding);
5900           else if (detect_info.rejected)
5901             for (i = 0; i < coding_category_raw_text; i++)
5902               if (! (detect_info.rejected & (1 << coding_priorities[i])))
5903                 {
5904                   this = coding_categories + coding_priorities[i];
5905                   setup_coding_system (CODING_ID_NAME (this->id), coding);
5906                   break;
5907                 }
5908         }
5909     }
5910   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
5911            == coding_category_utf_8_auto)
5912     {
5913       Lisp_Object coding_systems;
5914       struct coding_detection_info detect_info;
5915
5916       coding_systems
5917         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
5918       detect_info.found = detect_info.rejected = 0;
5919       coding->head_ascii = 0;
5920       if (CONSP (coding_systems)
5921           && detect_coding_utf_8 (coding, &detect_info))
5922         {
5923           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
5924             setup_coding_system (XCAR (coding_systems), coding);
5925           else
5926             setup_coding_system (XCDR (coding_systems), coding);
5927         }
5928     }
5929   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
5930            == coding_category_utf_16_auto)
5931     {
5932       Lisp_Object coding_systems;
5933       struct coding_detection_info detect_info;
5934
5935       coding_systems
5936         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
5937       detect_info.found = detect_info.rejected = 0;
5938       coding->head_ascii = 0;
5939       if (CONSP (coding_systems)
5940           && detect_coding_utf_16 (coding, &detect_info))
5941         {
5942           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5943             setup_coding_system (XCAR (coding_systems), coding);
5944           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
5945             setup_coding_system (XCDR (coding_systems), coding);
5946         }
5947     }
5948 }
5949
5950
5951 static void
5952 decode_eol (coding)
5953      struct coding_system *coding;
5954 {
5955   Lisp_Object eol_type;
5956   unsigned char *p, *pbeg, *pend;
5957
5958   eol_type = CODING_ID_EOL_TYPE (coding->id);
5959   if (EQ (eol_type, Qunix))
5960     return;
5961
5962   if (NILP (coding->dst_object))
5963     pbeg = coding->destination;
5964   else
5965     pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
5966   pend = pbeg + coding->produced;
5967
5968   if (VECTORP (eol_type))
5969     {
5970       int eol_seen = EOL_SEEN_NONE;
5971
5972       for (p = pbeg; p < pend; p++)
5973         {
5974           if (*p == '\n')
5975             eol_seen |= EOL_SEEN_LF;
5976           else if (*p == '\r')
5977             {
5978               if (p + 1 < pend && *(p + 1) == '\n')
5979                 {
5980                   eol_seen |= EOL_SEEN_CRLF;
5981                   p++;
5982                 }
5983               else
5984                 eol_seen |= EOL_SEEN_CR;
5985             }
5986         }
5987       if (eol_seen != EOL_SEEN_NONE
5988           && eol_seen != EOL_SEEN_LF
5989           && eol_seen != EOL_SEEN_CRLF
5990           && eol_seen != EOL_SEEN_CR)
5991         eol_seen = EOL_SEEN_LF;
5992       if (eol_seen != EOL_SEEN_NONE)
5993         eol_type = adjust_coding_eol_type (coding, eol_seen);
5994     }
5995
5996   if (EQ (eol_type, Qmac))
5997     {
5998       for (p = pbeg; p < pend; p++)
5999         if (*p == '\r')
6000           *p = '\n';
6001     }
6002   else if (EQ (eol_type, Qdos))
6003     {
6004       int n = 0;
6005
6006       if (NILP (coding->dst_object))
6007         {
6008           /* Start deleting '\r' from the tail to minimize the memory
6009              movement.  */
6010           for (p = pend - 2; p >= pbeg; p--)
6011             if (*p == '\r')
6012               {
6013                 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
6014                 n++;
6015               }
6016         }
6017       else
6018         {
6019           int pos_byte = coding->dst_pos_byte;
6020           int pos = coding->dst_pos;
6021           int pos_end = pos + coding->produced_char - 1;
6022
6023           while (pos < pos_end)
6024             {
6025               p = BYTE_POS_ADDR (pos_byte);
6026               if (*p == '\r' && p[1] == '\n')
6027                 {
6028                   del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
6029                   n++;
6030                   pos_end--;
6031                 }
6032               pos++;
6033               if (coding->dst_multibyte)
6034                 pos_byte += BYTES_BY_CHAR_HEAD (*p);
6035               else
6036                 pos_byte++;
6037             }
6038         }
6039       coding->produced -= n;
6040       coding->produced_char -= n;
6041     }
6042 }
6043
6044
6045 /* Return a translation table (or list of them) from coding system
6046    attribute vector ATTRS for encoding (ENCODEP is nonzero) or
6047    decoding (ENCODEP is zero). */
6048
6049 static Lisp_Object
6050 get_translation_table (attrs, encodep, max_lookup)
6051      Lisp_Object attrs;
6052      int encodep, *max_lookup;
6053 {
6054   Lisp_Object standard, translation_table;
6055   Lisp_Object val;
6056
6057   if (encodep)
6058     translation_table = CODING_ATTR_ENCODE_TBL (attrs),
6059       standard = Vstandard_translation_table_for_encode;
6060   else
6061     translation_table = CODING_ATTR_DECODE_TBL (attrs),
6062       standard = Vstandard_translation_table_for_decode;
6063   if (NILP (translation_table))
6064     translation_table = standard;
6065   else
6066     {
6067       if (SYMBOLP (translation_table))
6068         translation_table = Fget (translation_table, Qtranslation_table);
6069       else if (CONSP (translation_table))
6070         {
6071           translation_table = Fcopy_sequence (translation_table);
6072           for (val = translation_table; CONSP (val); val = XCDR (val))
6073             if (SYMBOLP (XCAR (val)))
6074               XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
6075         }
6076       if (CHAR_TABLE_P (standard))
6077         {
6078           if (CONSP (translation_table))
6079             translation_table = nconc2 (translation_table,
6080                                         Fcons (standard, Qnil));
6081           else
6082             translation_table = Fcons (translation_table,
6083                                        Fcons (standard, Qnil));
6084         }
6085     }
6086
6087   if (max_lookup)
6088     {
6089       *max_lookup = 1;
6090       if (CHAR_TABLE_P (translation_table)
6091           && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
6092         {
6093           val = XCHAR_TABLE (translation_table)->extras[1];
6094           if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6095             *max_lookup = XFASTINT (val);
6096         }
6097       else if (CONSP (translation_table))
6098         {
6099           Lisp_Object tail, val;
6100
6101           for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
6102             if (CHAR_TABLE_P (XCAR (tail))
6103                 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
6104               {
6105                 val = XCHAR_TABLE (XCAR (tail))->extras[1];
6106                 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6107                   *max_lookup = XFASTINT (val);
6108               }
6109         }
6110     }
6111   return translation_table;
6112 }
6113
6114 #define LOOKUP_TRANSLATION_TABLE(table, c, trans)               \
6115   do {                                                          \
6116     trans = Qnil;                                               \
6117     if (CHAR_TABLE_P (table))                                   \
6118       {                                                         \
6119         trans = CHAR_TABLE_REF (table, c);                      \
6120         if (CHARACTERP (trans))                                 \
6121           c = XFASTINT (trans), trans = Qnil;                   \
6122       }                                                         \
6123     else if (CONSP (table))                                     \
6124       {                                                         \
6125         Lisp_Object tail;                                       \
6126                                                                 \
6127         for (tail = table; CONSP (tail); tail = XCDR (tail))    \
6128           if (CHAR_TABLE_P (XCAR (tail)))                       \
6129             {                                                   \
6130               trans = CHAR_TABLE_REF (XCAR (tail), c);          \
6131               if (CHARACTERP (trans))                           \
6132                 c = XFASTINT (trans), trans = Qnil;             \
6133               else if (! NILP (trans))                          \
6134                 break;                                          \
6135             }                                                   \
6136       }                                                         \
6137   } while (0)
6138
6139
6140 static Lisp_Object
6141 get_translation (val, buf, buf_end, last_block, from_nchars, to_nchars)
6142      Lisp_Object val;
6143      int *buf, *buf_end;
6144      int last_block;
6145      int *from_nchars, *to_nchars;
6146 {
6147   /* VAL is TO or (([FROM-CHAR ...] .  TO) ...) where TO is TO-CHAR or
6148      [TO-CHAR ...].  */
6149   if (CONSP (val))
6150     {
6151       Lisp_Object from, tail;
6152       int i, len;
6153
6154       for (tail = val; CONSP (tail); tail = XCDR (tail))
6155         {
6156           val = XCAR (tail);
6157           from = XCAR (val);
6158           len = ASIZE (from);
6159           for (i = 0; i < len; i++)
6160             {
6161               if (buf + i == buf_end)
6162                 {
6163                   if (! last_block)
6164                     return Qt;
6165                   break;
6166                 }
6167               if (XINT (AREF (from, i)) != buf[i])
6168                 break;
6169             }
6170           if (i == len)
6171             {
6172               val = XCDR (val);
6173               *from_nchars = len;
6174               break;
6175             }
6176         }
6177       if (! CONSP (tail))
6178         return Qnil;
6179     }
6180   if (VECTORP (val))
6181     *buf = XINT (AREF (val, 0)), *to_nchars = ASIZE (val);
6182   else
6183     *buf = XINT (val);
6184   return val;
6185 }
6186
6187
6188 static int
6189 produce_chars (coding, translation_table, last_block)
6190      struct coding_system *coding;
6191      Lisp_Object translation_table;
6192      int last_block;
6193 {
6194   unsigned char *dst = coding->destination + coding->produced;
6195   unsigned char *dst_end = coding->destination + coding->dst_bytes;
6196   EMACS_INT produced;
6197   EMACS_INT produced_chars = 0;
6198   int carryover = 0;
6199
6200   if (! coding->chars_at_source)
6201     {
6202       /* Source characters are in coding->charbuf.  */
6203       int *buf = coding->charbuf;
6204       int *buf_end = buf + coding->charbuf_used;
6205
6206       if (EQ (coding->src_object, coding->dst_object))
6207         {
6208           coding_set_source (coding);
6209           dst_end = ((unsigned char *) coding->source) + coding->consumed;
6210         }
6211
6212       while (buf < buf_end)
6213         {
6214           int c = *buf, i;
6215
6216           if (c >= 0)
6217             {
6218               int from_nchars = 1, to_nchars = 1;
6219               Lisp_Object trans = Qnil;
6220
6221               LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6222               if (! NILP (trans))
6223                 {
6224                   trans = get_translation (trans, buf, buf_end, last_block,
6225                                            &from_nchars, &to_nchars);
6226                   if (EQ (trans, Qt))
6227                     break;
6228                   c = *buf;
6229                 }
6230
6231               if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
6232                 {
6233                   dst = alloc_destination (coding,
6234                                            buf_end - buf
6235                                            + MAX_MULTIBYTE_LENGTH * to_nchars,
6236                                            dst);
6237                   if (EQ (coding->src_object, coding->dst_object))
6238                     {
6239                       coding_set_source (coding);
6240                       dst_end = ((unsigned char *) coding->source) + coding->consumed;
6241                     }
6242                   else
6243                     dst_end = coding->destination + coding->dst_bytes;
6244                 }
6245
6246               for (i = 0; i < to_nchars; i++)
6247                 {
6248                   if (i > 0)
6249                     c = XINT (AREF (trans, i));
6250                   if (coding->dst_multibyte
6251                       || ! CHAR_BYTE8_P (c))
6252                     CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
6253                   else
6254                     *dst++ = CHAR_TO_BYTE8 (c);
6255                 }
6256               produced_chars += to_nchars;
6257               *buf++ = to_nchars;
6258               while (--from_nchars > 0)
6259                 *buf++ = 0;
6260             }
6261           else
6262             /* This is an annotation datum.  (-C) is the length.  */
6263             buf += -c;
6264         }
6265       carryover = buf_end - buf;
6266     }
6267   else
6268     {
6269       /* Source characters are at coding->source.  */
6270       const unsigned char *src = coding->source;
6271       const unsigned char *src_end = src + coding->consumed;
6272
6273       if (EQ (coding->dst_object, coding->src_object))
6274         dst_end = (unsigned char *) src;
6275       if (coding->src_multibyte != coding->dst_multibyte)
6276         {
6277           if (coding->src_multibyte)
6278             {
6279               int multibytep = 1;
6280               EMACS_INT consumed_chars;
6281
6282               while (1)
6283                 {
6284                   const unsigned char *src_base = src;
6285                   int c;
6286
6287                   ONE_MORE_BYTE (c);
6288                   if (dst == dst_end)
6289                     {
6290                       if (EQ (coding->src_object, coding->dst_object))
6291                         dst_end = (unsigned char *) src;
6292                       if (dst == dst_end)
6293                         {
6294                           EMACS_INT offset = src - coding->source;
6295
6296                           dst = alloc_destination (coding, src_end - src + 1,
6297                                                    dst);
6298                           dst_end = coding->destination + coding->dst_bytes;
6299                           coding_set_source (coding);
6300                           src = coding->source + offset;
6301                           src_end = coding->source + coding->src_bytes;
6302                           if (EQ (coding->src_object, coding->dst_object))
6303                             dst_end = (unsigned char *) src;
6304                         }
6305                     }
6306                   *dst++ = c;
6307                   produced_chars++;
6308                 }
6309             no_more_source:
6310               ;
6311             }
6312           else
6313             while (src < src_end)
6314               {
6315                 int multibytep = 1;
6316                 int c = *src++;
6317
6318                 if (dst >= dst_end - 1)
6319                   {
6320                     if (EQ (coding->src_object, coding->dst_object))
6321                       dst_end = (unsigned char *) src;
6322                     if (dst >= dst_end - 1)
6323                       {
6324                         EMACS_INT offset = src - coding->source;
6325                         EMACS_INT more_bytes;
6326
6327                         if (EQ (coding->src_object, coding->dst_object))
6328                           more_bytes = ((src_end - src) / 2) + 2;
6329                         else
6330                           more_bytes = src_end - src + 2;
6331                         dst = alloc_destination (coding, more_bytes, dst);
6332                         dst_end = coding->destination + coding->dst_bytes;
6333                         coding_set_source (coding);
6334                         src = coding->source + offset;
6335                         src_end = coding->source + coding->src_bytes;
6336                         if (EQ (coding->src_object, coding->dst_object))
6337                           dst_end = (unsigned char *) src;
6338                       }
6339                   }
6340                 EMIT_ONE_BYTE (c);
6341               }
6342         }
6343       else
6344         {
6345           if (!EQ (coding->src_object, coding->dst_object))
6346             {
6347               EMACS_INT require = coding->src_bytes - coding->dst_bytes;
6348
6349               if (require > 0)
6350                 {
6351                   EMACS_INT offset = src - coding->source;
6352
6353                   dst = alloc_destination (coding, require, dst);
6354                   coding_set_source (coding);
6355                   src = coding->source + offset;
6356                   src_end = coding->source + coding->src_bytes;
6357                 }
6358             }
6359           produced_chars = coding->consumed_char;
6360           while (src < src_end)
6361             *dst++ = *src++;
6362         }
6363     }
6364
6365   produced = dst - (coding->destination + coding->produced);
6366   if (BUFFERP (coding->dst_object) && produced_chars > 0)
6367     insert_from_gap (produced_chars, produced);
6368   coding->produced += produced;
6369   coding->produced_char += produced_chars;
6370   return carryover;
6371 }
6372
6373 /* Compose text in CODING->object according to the annotation data at
6374    CHARBUF.  CHARBUF is an array:
6375      [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
6376  */
6377
6378 static INLINE void
6379 produce_composition (coding, charbuf, pos)
6380      struct coding_system *coding;
6381      int *charbuf;
6382      EMACS_INT pos;
6383 {
6384   int len;
6385   EMACS_INT to;
6386   enum composition_method method;
6387   Lisp_Object components;
6388
6389   len = -charbuf[0];
6390   to = pos + charbuf[2];
6391   if (to <= pos)
6392     return;
6393   method = (enum composition_method) (charbuf[3]);
6394
6395   if (method == COMPOSITION_RELATIVE)
6396     components = Qnil;
6397   else if (method >= COMPOSITION_WITH_RULE
6398            && method <= COMPOSITION_WITH_RULE_ALTCHARS)
6399     {
6400       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
6401       int i;
6402
6403       len -= 4;
6404       charbuf += 4;
6405       for (i = 0; i < len; i++)
6406         {
6407           args[i] = make_number (charbuf[i]);
6408           if (charbuf[i] < 0)
6409             return;
6410         }
6411       components = (method == COMPOSITION_WITH_ALTCHARS
6412                     ? Fstring (len, args) : Fvector (len, args));
6413     }
6414   else
6415     return;
6416   compose_text (pos, to, components, Qnil, coding->dst_object);
6417 }
6418
6419
6420 /* Put `charset' property on text in CODING->object according to
6421    the annotation data at CHARBUF.  CHARBUF is an array:
6422      [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
6423  */
6424
6425 static INLINE void
6426 produce_charset (coding, charbuf, pos)
6427      struct coding_system *coding;
6428      int *charbuf;
6429      EMACS_INT pos;
6430 {
6431   EMACS_INT from = pos - charbuf[2];
6432   struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
6433
6434   Fput_text_property (make_number (from), make_number (pos),
6435                       Qcharset, CHARSET_NAME (charset),
6436                       coding->dst_object);
6437 }
6438
6439
6440 #define CHARBUF_SIZE 0x4000
6441
6442 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
6443   do {                                                                  \
6444     int size = CHARBUF_SIZE;;                                           \
6445                                                                         \
6446     coding->charbuf = NULL;                                             \
6447     while (size > 1024)                                                 \
6448       {                                                                 \
6449         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
6450         if (coding->charbuf)                                            \
6451           break;                                                        \
6452         size >>= 1;                                                     \
6453       }                                                                 \
6454     if (! coding->charbuf)                                              \
6455       {                                                                 \
6456         record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
6457         return coding->result;                                          \
6458       }                                                                 \
6459     coding->charbuf_size = size;                                        \
6460   } while (0)
6461
6462
6463 static void
6464 produce_annotation (coding, pos)
6465      struct coding_system *coding;
6466      EMACS_INT pos;
6467 {
6468   int *charbuf = coding->charbuf;
6469   int *charbuf_end = charbuf + coding->charbuf_used;
6470
6471   if (NILP (coding->dst_object))
6472     return;
6473
6474   while (charbuf < charbuf_end)
6475     {
6476       if (*charbuf >= 0)
6477         pos += *charbuf++;
6478       else
6479         {
6480           int len = -*charbuf;
6481           switch (charbuf[1])
6482             {
6483             case CODING_ANNOTATE_COMPOSITION_MASK:
6484               produce_composition (coding, charbuf, pos);
6485               break;
6486             case CODING_ANNOTATE_CHARSET_MASK:
6487               produce_charset (coding, charbuf, pos);
6488               break;
6489             default:
6490               abort ();
6491             }
6492           charbuf += len;
6493         }
6494     }
6495 }
6496
6497 /* Decode the data at CODING->src_object into CODING->dst_object.
6498    CODING->src_object is a buffer, a string, or nil.
6499    CODING->dst_object is a buffer.
6500
6501    If CODING->src_object is a buffer, it must be the current buffer.
6502    In this case, if CODING->src_pos is positive, it is a position of
6503    the source text in the buffer, otherwise, the source text is in the
6504    gap area of the buffer, and CODING->src_pos specifies the offset of
6505    the text from GPT (which must be the same as PT).  If this is the
6506    same buffer as CODING->dst_object, CODING->src_pos must be
6507    negative.
6508
6509    If CODING->src_object is a string, CODING->src_pos is an index to
6510    that string.
6511
6512    If CODING->src_object is nil, CODING->source must already point to
6513    the non-relocatable memory area.  In this case, CODING->src_pos is
6514    an offset from CODING->source.
6515
6516    The decoded data is inserted at the current point of the buffer
6517    CODING->dst_object.
6518 */
6519
6520 static int
6521 decode_coding (coding)
6522      struct coding_system *coding;
6523 {
6524   Lisp_Object attrs;
6525   Lisp_Object undo_list;
6526   Lisp_Object translation_table;
6527   int carryover;
6528   int i;
6529
6530   if (BUFFERP (coding->src_object)
6531       && coding->src_pos > 0
6532       && coding->src_pos < GPT
6533       && coding->src_pos + coding->src_chars > GPT)
6534     move_gap_both (coding->src_pos, coding->src_pos_byte);
6535
6536   undo_list = Qt;
6537   if (BUFFERP (coding->dst_object))
6538     {
6539       if (current_buffer != XBUFFER (coding->dst_object))
6540         set_buffer_internal (XBUFFER (coding->dst_object));
6541       if (GPT != PT)
6542         move_gap_both (PT, PT_BYTE);
6543       undo_list = current_buffer->undo_list;
6544       current_buffer->undo_list = Qt;
6545     }
6546
6547   coding->consumed = coding->consumed_char = 0;
6548   coding->produced = coding->produced_char = 0;
6549   coding->chars_at_source = 0;
6550   record_conversion_result (coding, CODING_RESULT_SUCCESS);
6551   coding->errors = 0;
6552
6553   ALLOC_CONVERSION_WORK_AREA (coding);
6554
6555   attrs = CODING_ID_ATTRS (coding->id);
6556   translation_table = get_translation_table (attrs, 0, NULL);
6557
6558   carryover = 0;
6559   do
6560     {
6561       EMACS_INT pos = coding->dst_pos + coding->produced_char;
6562
6563       coding_set_source (coding);
6564       coding->annotated = 0;
6565       coding->charbuf_used = carryover;
6566       (*(coding->decoder)) (coding);
6567       coding_set_destination (coding);
6568       carryover = produce_chars (coding, translation_table, 0);
6569       if (coding->annotated)
6570         produce_annotation (coding, pos);
6571       for (i = 0; i < carryover; i++)
6572         coding->charbuf[i]
6573           = coding->charbuf[coding->charbuf_used - carryover + i];
6574     }
6575   while (coding->consumed < coding->src_bytes
6576          && (coding->result == CODING_RESULT_SUCCESS
6577              || coding->result == CODING_RESULT_INVALID_SRC));
6578
6579   if (carryover > 0)
6580     {
6581       coding_set_destination (coding);
6582       coding->charbuf_used = carryover;
6583       produce_chars (coding, translation_table, 1);
6584     }
6585
6586   coding->carryover_bytes = 0;
6587   if (coding->consumed < coding->src_bytes)
6588     {
6589       int nbytes = coding->src_bytes - coding->consumed;
6590       const unsigned char *src;
6591
6592       coding_set_source (coding);
6593       coding_set_destination (coding);
6594       src = coding->source + coding->consumed;
6595
6596       if (coding->mode & CODING_MODE_LAST_BLOCK)
6597         {
6598           /* Flush out unprocessed data as binary chars.  We are sure
6599              that the number of data is less than the size of
6600              coding->charbuf.  */
6601           coding->charbuf_used = 0;
6602           while (nbytes-- > 0)
6603             {
6604               int c = *src++;
6605
6606               if (c & 0x80)
6607                 c = BYTE8_TO_CHAR (c);
6608               coding->charbuf[coding->charbuf_used++] = c;
6609             }
6610           produce_chars (coding, Qnil, 1);
6611         }
6612       else
6613         {
6614           /* Record unprocessed bytes in coding->carryover.  We are
6615              sure that the number of data is less than the size of
6616              coding->carryover.  */
6617           unsigned char *p = coding->carryover;
6618
6619           coding->carryover_bytes = nbytes;
6620           while (nbytes-- > 0)
6621             *p++ = *src++;
6622         }
6623       coding->consumed = coding->src_bytes;
6624     }
6625
6626   if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
6627     decode_eol (coding);
6628   if (BUFFERP (coding->dst_object))
6629     {
6630       current_buffer->undo_list = undo_list;
6631       record_insert (coding->dst_pos, coding->produced_char);
6632     }
6633   return coding->result;
6634 }
6635
6636
6637 /* Extract an annotation datum from a composition starting at POS and
6638    ending before LIMIT of CODING->src_object (buffer or string), store
6639    the data in BUF, set *STOP to a starting position of the next
6640    composition (if any) or to LIMIT, and return the address of the
6641    next element of BUF.
6642
6643    If such an annotation is not found, set *STOP to a starting
6644    position of a composition after POS (if any) or to LIMIT, and
6645    return BUF.  */
6646
6647 static INLINE int *
6648 handle_composition_annotation (pos, limit, coding, buf, stop)
6649      EMACS_INT pos, limit;
6650      struct coding_system *coding;
6651      int *buf;
6652      EMACS_INT *stop;
6653 {
6654   EMACS_INT start, end;
6655   Lisp_Object prop;
6656
6657   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
6658       || end > limit)
6659     *stop = limit;
6660   else if (start > pos)
6661     *stop = start;
6662   else
6663     {
6664       if (start == pos)
6665         {
6666           /* We found a composition.  Store the corresponding
6667              annotation data in BUF.  */
6668           int *head = buf;
6669           enum composition_method method = COMPOSITION_METHOD (prop);
6670           int nchars = COMPOSITION_LENGTH (prop);
6671
6672           ADD_COMPOSITION_DATA (buf, nchars, method);
6673           if (method != COMPOSITION_RELATIVE)
6674             {
6675               Lisp_Object components;
6676               int len, i, i_byte;
6677
6678               components = COMPOSITION_COMPONENTS (prop);
6679               if (VECTORP (components))
6680                 {
6681                   len = XVECTOR (components)->size;
6682                   for (i = 0; i < len; i++)
6683                     *buf++ = XINT (AREF (components, i));
6684                 }
6685               else if (STRINGP (components))
6686                 {
6687                   len = SCHARS (components);
6688                   i = i_byte = 0;
6689                   while (i < len)
6690                     {
6691                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
6692                       buf++;
6693                     }
6694                 }
6695               else if (INTEGERP (components))
6696                 {
6697                   len = 1;
6698                   *buf++ = XINT (components);
6699                 }
6700               else if (CONSP (components))
6701                 {
6702                   for (len = 0; CONSP (components);
6703                        len++, components = XCDR (components))
6704                     *buf++ = XINT (XCAR (components));
6705                 }
6706               else
6707                 abort ();
6708               *head -= len;
6709             }
6710         }
6711
6712       if (find_composition (end, limit, &start, &end, &prop,
6713                             coding->src_object)
6714           && end <= limit)
6715         *stop = start;
6716       else
6717         *stop = limit;
6718     }
6719   return buf;
6720 }
6721
6722
6723 /* Extract an annotation datum from a text property `charset' at POS of
6724    CODING->src_object (buffer of string), store the data in BUF, set
6725    *STOP to the position where the value of `charset' property changes
6726    (limiting by LIMIT), and return the address of the next element of
6727    BUF.
6728
6729    If the property value is nil, set *STOP to the position where the
6730    property value is non-nil (limiting by LIMIT), and return BUF.  */
6731
6732 static INLINE int *
6733 handle_charset_annotation (pos, limit, coding, buf, stop)
6734      EMACS_INT pos, limit;
6735      struct coding_system *coding;
6736      int *buf;
6737      EMACS_INT *stop;
6738 {
6739   Lisp_Object val, next;
6740   int id;
6741
6742   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
6743   if (! NILP (val) && CHARSETP (val))
6744     id = XINT (CHARSET_SYMBOL_ID (val));
6745   else
6746     id = -1;
6747   ADD_CHARSET_DATA (buf, 0, id);
6748   next = Fnext_single_property_change (make_number (pos), Qcharset,
6749                                        coding->src_object,
6750                                        make_number (limit));
6751   *stop = XINT (next);
6752   return buf;
6753 }
6754
6755
6756 static void
6757 consume_chars (coding, translation_table, max_lookup)
6758      struct coding_system *coding;
6759      Lisp_Object translation_table;
6760      int max_lookup;
6761 {
6762   int *buf = coding->charbuf;
6763   int *buf_end = coding->charbuf + coding->charbuf_size;
6764   const unsigned char *src = coding->source + coding->consumed;
6765   const unsigned char *src_end = coding->source + coding->src_bytes;
6766   EMACS_INT pos = coding->src_pos + coding->consumed_char;
6767   EMACS_INT end_pos = coding->src_pos + coding->src_chars;
6768   int multibytep = coding->src_multibyte;
6769   Lisp_Object eol_type;
6770   int c;
6771   EMACS_INT stop, stop_composition, stop_charset;
6772   int *lookup_buf = NULL;
6773
6774   if (! NILP (translation_table))
6775     lookup_buf = alloca (sizeof (int) * max_lookup);
6776
6777   eol_type = CODING_ID_EOL_TYPE (coding->id);
6778   if (VECTORP (eol_type))
6779     eol_type = Qunix;
6780
6781   /* Note: composition handling is not yet implemented.  */
6782   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
6783
6784   if (NILP (coding->src_object))
6785     stop = stop_composition = stop_charset = end_pos;
6786   else
6787     {
6788       if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
6789         stop = stop_composition = pos;
6790       else
6791         stop = stop_composition = end_pos;
6792       if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
6793         stop = stop_charset = pos;
6794       else
6795         stop_charset = end_pos;
6796     }
6797
6798   /* Compensate for CRLF and conversion.  */
6799   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
6800   while (buf < buf_end)
6801     {
6802       Lisp_Object trans;
6803
6804       if (pos == stop)
6805         {
6806           if (pos == end_pos)
6807             break;
6808           if (pos == stop_composition)
6809             buf = handle_composition_annotation (pos, end_pos, coding,
6810                                                  buf, &stop_composition);
6811           if (pos == stop_charset)
6812             buf = handle_charset_annotation (pos, end_pos, coding,
6813                                              buf, &stop_charset);
6814           stop = (stop_composition < stop_charset
6815                   ? stop_composition : stop_charset);
6816         }
6817
6818       if (! multibytep)
6819         {
6820           EMACS_INT bytes;
6821
6822           if (coding->encoder == encode_coding_raw_text)
6823             c = *src++, pos++;
6824           else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
6825             c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
6826           else
6827             c = BYTE8_TO_CHAR (*src), src++, pos++;
6828         }
6829       else
6830         c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
6831       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
6832         c = '\n';
6833       if (! EQ (eol_type, Qunix))
6834         {
6835           if (c == '\n')
6836             {
6837               if (EQ (eol_type, Qdos))
6838                 *buf++ = '\r';
6839               else
6840                 c = '\r';
6841             }
6842         }
6843
6844       trans = Qnil;
6845       LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6846       if (NILP (trans))
6847         *buf++ = c;
6848       else
6849         {
6850           int from_nchars = 1, to_nchars = 1;
6851           int *lookup_buf_end;
6852           const unsigned char *p = src;
6853           int i;
6854
6855           lookup_buf[0] = c;
6856           for (i = 1; i < max_lookup && p < src_end; i++)
6857             lookup_buf[i] = STRING_CHAR_ADVANCE (p);
6858           lookup_buf_end = lookup_buf + i;
6859           trans = get_translation (trans, lookup_buf, lookup_buf_end, 1,
6860                                    &from_nchars, &to_nchars);
6861           if (EQ (trans, Qt)
6862               || buf + to_nchars > buf_end)
6863             break;
6864           *buf++ = *lookup_buf;
6865           for (i = 1; i < to_nchars; i++)
6866             *buf++ = XINT (AREF (trans, i));
6867           for (i = 1; i < from_nchars; i++, pos++)
6868             src += MULTIBYTE_LENGTH_NO_CHECK (src);
6869         }
6870     }
6871
6872   coding->consumed = src - coding->source;
6873   coding->consumed_char = pos - coding->src_pos;
6874   coding->charbuf_used = buf - coding->charbuf;
6875   coding->chars_at_source = 0;
6876 }
6877
6878
6879 /* Encode the text at CODING->src_object into CODING->dst_object.
6880    CODING->src_object is a buffer or a string.
6881    CODING->dst_object is a buffer or nil.
6882
6883    If CODING->src_object is a buffer, it must be the current buffer.
6884    In this case, if CODING->src_pos is positive, it is a position of
6885    the source text in the buffer, otherwise. the source text is in the
6886    gap area of the buffer, and coding->src_pos specifies the offset of
6887    the text from GPT (which must be the same as PT).  If this is the
6888    same buffer as CODING->dst_object, CODING->src_pos must be
6889    negative and CODING should not have `pre-write-conversion'.
6890
6891    If CODING->src_object is a string, CODING should not have
6892    `pre-write-conversion'.
6893
6894    If CODING->dst_object is a buffer, the encoded data is inserted at
6895    the current point of that buffer.
6896
6897    If CODING->dst_object is nil, the encoded data is placed at the
6898    memory area specified by CODING->destination.  */
6899
6900 static int
6901 encode_coding (coding)
6902      struct coding_system *coding;
6903 {
6904   Lisp_Object attrs;
6905   Lisp_Object translation_table;
6906   int max_lookup;
6907
6908   attrs = CODING_ID_ATTRS (coding->id);
6909   if (coding->encoder == encode_coding_raw_text)
6910     translation_table = Qnil, max_lookup = 0;
6911   else
6912     translation_table = get_translation_table (attrs, 1, &max_lookup);
6913
6914   if (BUFFERP (coding->dst_object))
6915     {
6916       set_buffer_internal (XBUFFER (coding->dst_object));
6917       coding->dst_multibyte
6918         = ! NILP (current_buffer->enable_multibyte_characters);
6919     }
6920
6921   coding->consumed = coding->consumed_char = 0;
6922   coding->produced = coding->produced_char = 0;
6923   record_conversion_result (coding, CODING_RESULT_SUCCESS);
6924   coding->errors = 0;
6925
6926   ALLOC_CONVERSION_WORK_AREA (coding);
6927
6928   do {
6929     coding_set_source (coding);
6930     consume_chars (coding, translation_table, max_lookup);
6931     coding_set_destination (coding);
6932     (*(coding->encoder)) (coding);
6933   } while (coding->consumed_char < coding->src_chars);
6934
6935   if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
6936     insert_from_gap (coding->produced_char, coding->produced);
6937
6938   return (coding->result);
6939 }
6940
6941
6942 /* Name (or base name) of work buffer for code conversion.  */
6943 static Lisp_Object Vcode_conversion_workbuf_name;
6944
6945 /* A working buffer used by the top level conversion.  Once it is
6946    created, it is never destroyed.  It has the name
6947    Vcode_conversion_workbuf_name.  The other working buffers are
6948    destroyed after the use is finished, and their names are modified
6949    versions of Vcode_conversion_workbuf_name.  */
6950 static Lisp_Object Vcode_conversion_reused_workbuf;
6951
6952 /* 1 iff Vcode_conversion_reused_workbuf is already in use.  */
6953 static int reused_workbuf_in_use;
6954
6955
6956 /* Return a working buffer of code convesion.  MULTIBYTE specifies the
6957    multibyteness of returning buffer.  */
6958
6959 static Lisp_Object
6960 make_conversion_work_buffer (multibyte)
6961      int multibyte;
6962 {
6963   Lisp_Object name, workbuf;
6964   struct buffer *current;
6965
6966   if (reused_workbuf_in_use++)
6967     {
6968       name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
6969       workbuf = Fget_buffer_create (name);
6970     }
6971   else
6972     {
6973       name = Vcode_conversion_workbuf_name;
6974       workbuf = Fget_buffer_create (name);
6975       if (NILP (Vcode_conversion_reused_workbuf))
6976         Vcode_conversion_reused_workbuf = workbuf;
6977     }
6978   current = current_buffer;
6979   set_buffer_internal (XBUFFER (workbuf));
6980   Ferase_buffer ();
6981   current_buffer->undo_list = Qt;
6982   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
6983   set_buffer_internal (current);
6984   return workbuf;
6985 }
6986
6987
6988 static Lisp_Object
6989 code_conversion_restore (arg)
6990      Lisp_Object arg;
6991 {
6992   Lisp_Object current, workbuf;
6993   struct gcpro gcpro1;
6994
6995   GCPRO1 (arg);
6996   current = XCAR (arg);
6997   workbuf = XCDR (arg);
6998   if (! NILP (workbuf))
6999     {
7000       if (EQ (workbuf, Vcode_conversion_reused_workbuf))
7001         reused_workbuf_in_use = 0;
7002       else if (! NILP (Fbuffer_live_p (workbuf)))
7003         Fkill_buffer (workbuf);
7004     }
7005   set_buffer_internal (XBUFFER (current));
7006   UNGCPRO;
7007   return Qnil;
7008 }
7009
7010 Lisp_Object
7011 code_conversion_save (with_work_buf, multibyte)
7012      int with_work_buf, multibyte;
7013 {
7014   Lisp_Object workbuf = Qnil;
7015
7016   if (with_work_buf)
7017     workbuf = make_conversion_work_buffer (multibyte);
7018   record_unwind_protect (code_conversion_restore,
7019                          Fcons (Fcurrent_buffer (), workbuf));
7020   return workbuf;
7021 }
7022
7023 int
7024 decode_coding_gap (coding, chars, bytes)
7025      struct coding_system *coding;
7026      EMACS_INT chars, bytes;
7027 {
7028   int count = specpdl_ptr - specpdl;
7029   Lisp_Object attrs;
7030
7031   code_conversion_save (0, 0);
7032
7033   coding->src_object = Fcurrent_buffer ();
7034   coding->src_chars = chars;
7035   coding->src_bytes = bytes;
7036   coding->src_pos = -chars;
7037   coding->src_pos_byte = -bytes;
7038   coding->src_multibyte = chars < bytes;
7039   coding->dst_object = coding->src_object;
7040   coding->dst_pos = PT;
7041   coding->dst_pos_byte = PT_BYTE;
7042   coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
7043
7044   if (CODING_REQUIRE_DETECTION (coding))
7045     detect_coding (coding);
7046
7047   coding->mode |= CODING_MODE_LAST_BLOCK;
7048   current_buffer->text->inhibit_shrinking = 1;
7049   decode_coding (coding);
7050   current_buffer->text->inhibit_shrinking = 0;
7051
7052   attrs = CODING_ID_ATTRS (coding->id);
7053   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7054     {
7055       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7056       Lisp_Object val;
7057
7058       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7059       val = call1 (CODING_ATTR_POST_READ (attrs),
7060                    make_number (coding->produced_char));
7061       CHECK_NATNUM (val);
7062       coding->produced_char += Z - prev_Z;
7063       coding->produced += Z_BYTE - prev_Z_BYTE;
7064     }
7065
7066   unbind_to (count, Qnil);
7067   return coding->result;
7068 }
7069
7070 int
7071 encode_coding_gap (coding, chars, bytes)
7072      struct coding_system *coding;
7073      EMACS_INT chars, bytes;
7074 {
7075   int count = specpdl_ptr - specpdl;
7076
7077   code_conversion_save (0, 0);
7078
7079   coding->src_object = Fcurrent_buffer ();
7080   coding->src_chars = chars;
7081   coding->src_bytes = bytes;
7082   coding->src_pos = -chars;
7083   coding->src_pos_byte = -bytes;
7084   coding->src_multibyte = chars < bytes;
7085   coding->dst_object = coding->src_object;
7086   coding->dst_pos = PT;
7087   coding->dst_pos_byte = PT_BYTE;
7088
7089   encode_coding (coding);
7090
7091   unbind_to (count, Qnil);
7092   return coding->result;
7093 }
7094
7095
7096 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
7097    SRC_OBJECT into DST_OBJECT by coding context CODING.
7098
7099    SRC_OBJECT is a buffer, a string, or Qnil.
7100
7101    If it is a buffer, the text is at point of the buffer.  FROM and TO
7102    are positions in the buffer.
7103
7104    If it is a string, the text is at the beginning of the string.
7105    FROM and TO are indices to the string.
7106
7107    If it is nil, the text is at coding->source.  FROM and TO are
7108    indices to coding->source.
7109
7110    DST_OBJECT is a buffer, Qt, or Qnil.
7111
7112    If it is a buffer, the decoded text is inserted at point of the
7113    buffer.  If the buffer is the same as SRC_OBJECT, the source text
7114    is deleted.
7115
7116    If it is Qt, a string is made from the decoded text, and
7117    set in CODING->dst_object.
7118
7119    If it is Qnil, the decoded text is stored at CODING->destination.
7120    The caller must allocate CODING->dst_bytes bytes at
7121    CODING->destination by xmalloc.  If the decoded text is longer than
7122    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
7123  */
7124
7125 void
7126 decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7127                       dst_object)
7128      struct coding_system *coding;
7129      Lisp_Object src_object;
7130      EMACS_INT from, from_byte, to, to_byte;
7131      Lisp_Object dst_object;
7132 {
7133   int count = specpdl_ptr - specpdl;
7134   unsigned char *destination;
7135   EMACS_INT dst_bytes;
7136   EMACS_INT chars = to - from;
7137   EMACS_INT bytes = to_byte - from_byte;
7138   Lisp_Object attrs;
7139   int saved_pt = -1, saved_pt_byte;
7140   int need_marker_adjustment = 0;
7141   Lisp_Object old_deactivate_mark;
7142
7143   old_deactivate_mark = Vdeactivate_mark;
7144
7145   if (NILP (dst_object))
7146     {
7147       destination = coding->destination;
7148       dst_bytes = coding->dst_bytes;
7149     }
7150
7151   coding->src_object = src_object;
7152   coding->src_chars = chars;
7153   coding->src_bytes = bytes;
7154   coding->src_multibyte = chars < bytes;
7155
7156   if (STRINGP (src_object))
7157     {
7158       coding->src_pos = from;
7159       coding->src_pos_byte = from_byte;
7160     }
7161   else if (BUFFERP (src_object))
7162     {
7163       set_buffer_internal (XBUFFER (src_object));
7164       if (from != GPT)
7165         move_gap_both (from, from_byte);
7166       if (EQ (src_object, dst_object))
7167         {
7168           struct Lisp_Marker *tail;
7169
7170           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7171             {
7172               tail->need_adjustment
7173                 = tail->charpos == (tail->insertion_type ? from : to);
7174               need_marker_adjustment |= tail->need_adjustment;
7175             }
7176           saved_pt = PT, saved_pt_byte = PT_BYTE;
7177           TEMP_SET_PT_BOTH (from, from_byte);
7178           current_buffer->text->inhibit_shrinking = 1;
7179           del_range_both (from, from_byte, to, to_byte, 1);
7180           coding->src_pos = -chars;
7181           coding->src_pos_byte = -bytes;
7182         }
7183       else
7184         {
7185           coding->src_pos = from;
7186           coding->src_pos_byte = from_byte;
7187         }
7188     }
7189
7190   if (CODING_REQUIRE_DETECTION (coding))
7191     detect_coding (coding);
7192   attrs = CODING_ID_ATTRS (coding->id);
7193
7194   if (EQ (dst_object, Qt)
7195       || (! NILP (CODING_ATTR_POST_READ (attrs))
7196           && NILP (dst_object)))
7197     {
7198       coding->dst_multibyte = !CODING_FOR_UNIBYTE (coding);
7199       coding->dst_object = code_conversion_save (1, coding->dst_multibyte);
7200       coding->dst_pos = BEG;
7201       coding->dst_pos_byte = BEG_BYTE;
7202     }
7203   else if (BUFFERP (dst_object))
7204     {
7205       code_conversion_save (0, 0);
7206       coding->dst_object = dst_object;
7207       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7208       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7209       coding->dst_multibyte
7210         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
7211     }
7212   else
7213     {
7214       code_conversion_save (0, 0);
7215       coding->dst_object = Qnil;
7216       /* Most callers presume this will return a multibyte result, and they
7217          won't use `binary' or `raw-text' anyway, so let's not worry about
7218          CODING_FOR_UNIBYTE.  */
7219       coding->dst_multibyte = 1;
7220     }
7221
7222   decode_coding (coding);
7223
7224   if (BUFFERP (coding->dst_object))
7225     set_buffer_internal (XBUFFER (coding->dst_object));
7226
7227   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7228     {
7229       struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7230       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7231       Lisp_Object val;
7232
7233       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7234       GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7235               old_deactivate_mark);
7236       val = safe_call1 (CODING_ATTR_POST_READ (attrs),
7237                         make_number (coding->produced_char));
7238       UNGCPRO;
7239       CHECK_NATNUM (val);
7240       coding->produced_char += Z - prev_Z;
7241       coding->produced += Z_BYTE - prev_Z_BYTE;
7242     }
7243
7244   if (EQ (dst_object, Qt))
7245     {
7246       coding->dst_object = Fbuffer_string ();
7247     }
7248   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
7249     {
7250       set_buffer_internal (XBUFFER (coding->dst_object));
7251       if (dst_bytes < coding->produced)
7252         {
7253           destination = xrealloc (destination, coding->produced);
7254           if (! destination)
7255             {
7256               record_conversion_result (coding,
7257                                         CODING_RESULT_INSUFFICIENT_DST);
7258               unbind_to (count, Qnil);
7259               return;
7260             }
7261           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
7262             move_gap_both (BEGV, BEGV_BYTE);
7263           bcopy (BEGV_ADDR, destination, coding->produced);
7264           coding->destination = destination;
7265         }
7266     }
7267
7268   if (saved_pt >= 0)
7269     {
7270       /* This is the case of:
7271          (BUFFERP (src_object) && EQ (src_object, dst_object))
7272          As we have moved PT while replacing the original buffer
7273          contents, we must recover it now.  */
7274       set_buffer_internal (XBUFFER (src_object));
7275       current_buffer->text->inhibit_shrinking = 0;
7276       if (saved_pt < from)
7277         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7278       else if (saved_pt < from + chars)
7279         TEMP_SET_PT_BOTH (from, from_byte);
7280       else if (! NILP (current_buffer->enable_multibyte_characters))
7281         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7282                           saved_pt_byte + (coding->produced - bytes));
7283       else
7284         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7285                           saved_pt_byte + (coding->produced - bytes));
7286
7287       if (need_marker_adjustment)
7288         {
7289           struct Lisp_Marker *tail;
7290
7291           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7292             if (tail->need_adjustment)
7293               {
7294                 tail->need_adjustment = 0;
7295                 if (tail->insertion_type)
7296                   {
7297                     tail->bytepos = from_byte;
7298                     tail->charpos = from;
7299                   }
7300                 else
7301                   {
7302                     tail->bytepos = from_byte + coding->produced;
7303                     tail->charpos
7304                       = (NILP (current_buffer->enable_multibyte_characters)
7305                          ? tail->bytepos : from + coding->produced_char);
7306                   }
7307               }
7308         }
7309     }
7310
7311   Vdeactivate_mark = old_deactivate_mark;
7312   unbind_to (count, coding->dst_object);
7313 }
7314
7315
7316 void
7317 encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7318                       dst_object)
7319      struct coding_system *coding;
7320      Lisp_Object src_object;
7321      EMACS_INT from, from_byte, to, to_byte;
7322      Lisp_Object dst_object;
7323 {
7324   int count = specpdl_ptr - specpdl;
7325   EMACS_INT chars = to - from;
7326   EMACS_INT bytes = to_byte - from_byte;
7327   Lisp_Object attrs;
7328   int saved_pt = -1, saved_pt_byte;
7329   int need_marker_adjustment = 0;
7330   int kill_src_buffer = 0;
7331   Lisp_Object old_deactivate_mark;
7332
7333   old_deactivate_mark = Vdeactivate_mark;
7334
7335   coding->src_object = src_object;
7336   coding->src_chars = chars;
7337   coding->src_bytes = bytes;
7338   coding->src_multibyte = chars < bytes;
7339
7340   attrs = CODING_ID_ATTRS (coding->id);
7341
7342   if (EQ (src_object, dst_object))
7343     {
7344       struct Lisp_Marker *tail;
7345
7346       for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7347         {
7348           tail->need_adjustment
7349             = tail->charpos == (tail->insertion_type ? from : to);
7350           need_marker_adjustment |= tail->need_adjustment;
7351         }
7352     }
7353
7354   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
7355     {
7356       coding->src_object = code_conversion_save (1, coding->src_multibyte);
7357       set_buffer_internal (XBUFFER (coding->src_object));
7358       if (STRINGP (src_object))
7359         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
7360       else if (BUFFERP (src_object))
7361         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
7362       else
7363         insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
7364
7365       if (EQ (src_object, dst_object))
7366         {
7367           set_buffer_internal (XBUFFER (src_object));
7368           saved_pt = PT, saved_pt_byte = PT_BYTE;
7369           del_range_both (from, from_byte, to, to_byte, 1);
7370           set_buffer_internal (XBUFFER (coding->src_object));
7371         }
7372
7373       {
7374         Lisp_Object args[3];
7375         struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7376
7377         GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7378                 old_deactivate_mark);
7379         args[0] = CODING_ATTR_PRE_WRITE (attrs);
7380         args[1] = make_number (BEG);
7381         args[2] = make_number (Z);
7382         safe_call (3, args);
7383         UNGCPRO;
7384       }
7385       if (XBUFFER (coding->src_object) != current_buffer)
7386         kill_src_buffer = 1;
7387       coding->src_object = Fcurrent_buffer ();
7388       if (BEG != GPT)
7389         move_gap_both (BEG, BEG_BYTE);
7390       coding->src_chars = Z - BEG;
7391       coding->src_bytes = Z_BYTE - BEG_BYTE;
7392       coding->src_pos = BEG;
7393       coding->src_pos_byte = BEG_BYTE;
7394       coding->src_multibyte = Z < Z_BYTE;
7395     }
7396   else if (STRINGP (src_object))
7397     {
7398       code_conversion_save (0, 0);
7399       coding->src_pos = from;
7400       coding->src_pos_byte = from_byte;
7401     }
7402   else if (BUFFERP (src_object))
7403     {
7404       code_conversion_save (0, 0);
7405       set_buffer_internal (XBUFFER (src_object));
7406       if (EQ (src_object, dst_object))
7407         {
7408           saved_pt = PT, saved_pt_byte = PT_BYTE;
7409           coding->src_object = del_range_1 (from, to, 1, 1);
7410           coding->src_pos = 0;
7411           coding->src_pos_byte = 0;
7412         }
7413       else
7414         {
7415           if (from < GPT && to >= GPT)
7416             move_gap_both (from, from_byte);
7417           coding->src_pos = from;
7418           coding->src_pos_byte = from_byte;
7419         }
7420     }
7421   else
7422     code_conversion_save (0, 0);
7423
7424   if (BUFFERP (dst_object))
7425     {
7426       coding->dst_object = dst_object;
7427       if (EQ (src_object, dst_object))
7428         {
7429           coding->dst_pos = from;
7430           coding->dst_pos_byte = from_byte;
7431         }
7432       else
7433         {
7434           struct buffer *current = current_buffer;
7435
7436           set_buffer_temp (XBUFFER (dst_object));
7437           coding->dst_pos = PT;
7438           coding->dst_pos_byte = PT_BYTE;
7439           move_gap_both (coding->dst_pos, coding->dst_pos_byte);
7440           set_buffer_temp (current);
7441         }
7442       coding->dst_multibyte
7443         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
7444     }
7445   else if (EQ (dst_object, Qt))
7446     {
7447       coding->dst_object = Qnil;
7448       coding->dst_bytes = coding->src_chars;
7449       if (coding->dst_bytes == 0)
7450         coding->dst_bytes = 1;
7451       coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
7452       coding->dst_multibyte = 0;
7453     }
7454   else
7455     {
7456       coding->dst_object = Qnil;
7457       coding->dst_multibyte = 0;
7458     }
7459
7460   encode_coding (coding);
7461
7462   if (EQ (dst_object, Qt))
7463     {
7464       if (BUFFERP (coding->dst_object))
7465         coding->dst_object = Fbuffer_string ();
7466       else
7467         {
7468           coding->dst_object
7469             = make_unibyte_string ((char *) coding->destination,
7470                                    coding->produced);
7471           xfree (coding->destination);
7472         }
7473     }
7474
7475   if (saved_pt >= 0)
7476     {
7477       /* This is the case of:
7478          (BUFFERP (src_object) && EQ (src_object, dst_object))
7479          As we have moved PT while replacing the original buffer
7480          contents, we must recover it now.  */
7481       set_buffer_internal (XBUFFER (src_object));
7482       if (saved_pt < from)
7483         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7484       else if (saved_pt < from + chars)
7485         TEMP_SET_PT_BOTH (from, from_byte);
7486       else if (! NILP (current_buffer->enable_multibyte_characters))
7487         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7488                           saved_pt_byte + (coding->produced - bytes));
7489       else
7490         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7491                           saved_pt_byte + (coding->produced - bytes));
7492
7493       if (need_marker_adjustment)
7494         {
7495           struct Lisp_Marker *tail;
7496
7497           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7498             if (tail->need_adjustment)
7499               {
7500                 tail->need_adjustment = 0;
7501                 if (tail->insertion_type)
7502                   {
7503                     tail->bytepos = from_byte;
7504                     tail->charpos = from;
7505                   }
7506                 else
7507                   {
7508                     tail->bytepos = from_byte + coding->produced;
7509                     tail->charpos
7510                       = (NILP (current_buffer->enable_multibyte_characters)
7511                          ? tail->bytepos : from + coding->produced_char);
7512                   }
7513               }
7514         }
7515     }
7516
7517   if (kill_src_buffer)
7518     Fkill_buffer (coding->src_object);
7519
7520   Vdeactivate_mark = old_deactivate_mark;
7521   unbind_to (count, Qnil);
7522 }
7523
7524
7525 Lisp_Object
7526 preferred_coding_system ()
7527 {
7528   int id = coding_categories[coding_priorities[0]].id;
7529
7530   return CODING_ID_NAME (id);
7531 }
7532
7533 \f
7534 #ifdef emacs
7535 /*** 8. Emacs Lisp library functions ***/
7536
7537 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
7538        doc: /* Return t if OBJECT is nil or a coding-system.
7539 See the documentation of `define-coding-system' for information
7540 about coding-system objects.  */)
7541      (object)
7542      Lisp_Object object;
7543 {
7544   if (NILP (object)
7545       || CODING_SYSTEM_ID (object) >= 0)
7546     return Qt;
7547   if (! SYMBOLP (object)
7548       || NILP (Fget (object, Qcoding_system_define_form)))
7549     return Qnil;
7550   return Qt;
7551 }
7552
7553 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
7554        Sread_non_nil_coding_system, 1, 1, 0,
7555        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
7556      (prompt)
7557      Lisp_Object prompt;
7558 {
7559   Lisp_Object val;
7560   do
7561     {
7562       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
7563                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
7564     }
7565   while (SCHARS (val) == 0);
7566   return (Fintern (val, Qnil));
7567 }
7568
7569 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
7570        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
7571 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.
7572 Ignores case when completing coding systems (all Emacs coding systems
7573 are lower-case).  */)
7574      (prompt, default_coding_system)
7575      Lisp_Object prompt, default_coding_system;
7576 {
7577   Lisp_Object val;
7578   int count = SPECPDL_INDEX ();
7579
7580   if (SYMBOLP (default_coding_system))
7581     default_coding_system = SYMBOL_NAME (default_coding_system);
7582   specbind (Qcompletion_ignore_case, Qt);
7583   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
7584                           Qt, Qnil, Qcoding_system_history,
7585                           default_coding_system, Qnil);
7586   unbind_to (count, Qnil);
7587   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
7588 }
7589
7590 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
7591        1, 1, 0,
7592        doc: /* Check validity of CODING-SYSTEM.
7593 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
7594 It is valid if it is nil or a symbol defined as a coding system by the
7595 function `define-coding-system'.  */)
7596   (coding_system)
7597      Lisp_Object coding_system;
7598 {
7599   Lisp_Object define_form;
7600
7601   define_form = Fget (coding_system, Qcoding_system_define_form);
7602   if (! NILP (define_form))
7603     {
7604       Fput (coding_system, Qcoding_system_define_form, Qnil);
7605       safe_eval (define_form);
7606     }
7607   if (!NILP (Fcoding_system_p (coding_system)))
7608     return coding_system;
7609   xsignal1 (Qcoding_system_error, coding_system);
7610 }
7611
7612 \f
7613 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
7614    HIGHEST is nonzero, return the coding system of the highest
7615    priority among the detected coding systems.  Otherwize return a
7616    list of detected coding systems sorted by their priorities.  If
7617    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
7618    multibyte form but contains only ASCII and eight-bit chars.
7619    Otherwise, the bytes are raw bytes.
7620
7621    CODING-SYSTEM controls the detection as below:
7622
7623    If it is nil, detect both text-format and eol-format.  If the
7624    text-format part of CODING-SYSTEM is already specified
7625    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
7626    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
7627    detect only text-format.  */
7628
7629 Lisp_Object
7630 detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
7631                       coding_system)
7632      const unsigned char *src;
7633      EMACS_INT src_chars, src_bytes;
7634      int highest;
7635      int multibytep;
7636      Lisp_Object coding_system;
7637 {
7638   const unsigned char *src_end = src + src_bytes;
7639   Lisp_Object attrs, eol_type;
7640   Lisp_Object val;
7641   struct coding_system coding;
7642   int id;
7643   struct coding_detection_info detect_info;
7644   enum coding_category base_category;
7645   int null_byte_found = 0, eight_bit_found = 0;
7646
7647   if (NILP (coding_system))
7648     coding_system = Qundecided;
7649   setup_coding_system (coding_system, &coding);
7650   attrs = CODING_ID_ATTRS (coding.id);
7651   eol_type = CODING_ID_EOL_TYPE (coding.id);
7652   coding_system = CODING_ATTR_BASE_NAME (attrs);
7653
7654   coding.source = src;
7655   coding.src_chars = src_chars;
7656   coding.src_bytes = src_bytes;
7657   coding.src_multibyte = multibytep;
7658   coding.consumed = 0;
7659   coding.mode |= CODING_MODE_LAST_BLOCK;
7660   coding.head_ascii = 0;
7661
7662   detect_info.checked = detect_info.found = detect_info.rejected = 0;
7663
7664   /* At first, detect text-format if necessary.  */
7665   base_category = XINT (CODING_ATTR_CATEGORY (attrs));
7666   if (base_category == coding_category_undecided)
7667     {
7668       enum coding_category category;
7669       struct coding_system *this;
7670       int c, i;
7671
7672       /* Skip all ASCII bytes except for a few ISO2022 controls.  */
7673       for (; src < src_end; src++)
7674         {
7675           c = *src;
7676           if (c & 0x80)
7677             {
7678               eight_bit_found = 1;
7679               if (null_byte_found)
7680                 break;
7681             }
7682           else if (c < 0x20)
7683             {
7684               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
7685                   && ! inhibit_iso_escape_detection
7686                   && ! detect_info.checked)
7687                 {
7688                   if (detect_coding_iso_2022 (&coding, &detect_info))
7689                     {
7690                       /* We have scanned the whole data.  */
7691                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
7692                         {
7693                           /* We didn't find an 8-bit code.  We may
7694                              have found a null-byte, but it's very
7695                              rare that a binary file confirm to
7696                              ISO-2022.  */
7697                           src = src_end;
7698                           coding.head_ascii = src - coding.source;
7699                         }
7700                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
7701                       break;
7702                     }
7703                 }
7704               else if (! c)
7705                 {
7706                   null_byte_found = 1;
7707                   if (eight_bit_found)
7708                     break;
7709                 }
7710               coding.head_ascii++;
7711             }
7712           else
7713             coding.head_ascii++;
7714         }
7715
7716       if (null_byte_found || eight_bit_found
7717           || coding.head_ascii < coding.src_bytes
7718           || detect_info.found)
7719         {
7720           if (coding.head_ascii == coding.src_bytes)
7721             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
7722             for (i = 0; i < coding_category_raw_text; i++)
7723               {
7724                 category = coding_priorities[i];
7725                 this = coding_categories + category;
7726                 if (detect_info.found & (1 << category))
7727                   break;
7728               }
7729           else
7730             {
7731               if (null_byte_found)
7732                 {
7733                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
7734                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
7735                 }
7736               for (i = 0; i < coding_category_raw_text; i++)
7737                 {
7738                   category = coding_priorities[i];
7739                   this = coding_categories + category;
7740
7741                   if (this->id < 0)
7742                     {
7743                       /* No coding system of this category is defined.  */
7744                       detect_info.rejected |= (1 << category);
7745                     }
7746                   else if (category >= coding_category_raw_text)
7747                     continue;
7748                   else if (detect_info.checked & (1 << category))
7749                     {
7750                       if (highest
7751                           && (detect_info.found & (1 << category)))
7752                         break;
7753                     }
7754                   else if ((*(this->detector)) (&coding, &detect_info)
7755                            && highest
7756                            && (detect_info.found & (1 << category)))
7757                     {
7758                       if (category == coding_category_utf_16_auto)
7759                         {
7760                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7761                             category = coding_category_utf_16_le;
7762                           else
7763                             category = coding_category_utf_16_be;
7764                         }
7765                       break;
7766                     }
7767                 }
7768             }
7769         }
7770
7771       if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY)
7772         {
7773           detect_info.found = CATEGORY_MASK_RAW_TEXT;
7774           id = coding_categories[coding_category_raw_text].id;
7775           val = Fcons (make_number (id), Qnil);
7776         }
7777       else if (! detect_info.rejected && ! detect_info.found)
7778         {
7779           detect_info.found = CATEGORY_MASK_ANY;
7780           id = coding_categories[coding_category_undecided].id;
7781           val = Fcons (make_number (id), Qnil);
7782         }
7783       else if (highest)
7784         {
7785           if (detect_info.found)
7786             {
7787               detect_info.found = 1 << category;
7788               val = Fcons (make_number (this->id), Qnil);
7789             }
7790           else
7791             for (i = 0; i < coding_category_raw_text; i++)
7792               if (! (detect_info.rejected & (1 << coding_priorities[i])))
7793                 {
7794                   detect_info.found = 1 << coding_priorities[i];
7795                   id = coding_categories[coding_priorities[i]].id;
7796                   val = Fcons (make_number (id), Qnil);
7797                   break;
7798                 }
7799         }
7800       else
7801         {
7802           int mask = detect_info.rejected | detect_info.found;
7803           int found = 0;
7804           val = Qnil;
7805
7806           for (i = coding_category_raw_text - 1; i >= 0; i--)
7807             {
7808               category = coding_priorities[i];
7809               if (! (mask & (1 << category)))
7810                 {
7811                   found |= 1 << category;
7812                   id = coding_categories[category].id;
7813                   if (id >= 0)
7814                     val = Fcons (make_number (id), val);
7815                 }
7816             }
7817           for (i = coding_category_raw_text - 1; i >= 0; i--)
7818             {
7819               category = coding_priorities[i];
7820               if (detect_info.found & (1 << category))
7821                 {
7822                   id = coding_categories[category].id;
7823                   val = Fcons (make_number (id), val);
7824                 }
7825             }
7826           detect_info.found |= found;
7827         }
7828     }
7829   else if (base_category == coding_category_utf_8_auto)
7830     {
7831       if (detect_coding_utf_8 (&coding, &detect_info))
7832         {
7833           struct coding_system *this;
7834
7835           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
7836             this = coding_categories + coding_category_utf_8_sig;
7837           else
7838             this = coding_categories + coding_category_utf_8_nosig;
7839           val = Fcons (make_number (this->id), Qnil);
7840         }
7841     }
7842   else if (base_category == coding_category_utf_16_auto)
7843     {
7844       if (detect_coding_utf_16 (&coding, &detect_info))
7845         {
7846           struct coding_system *this;
7847
7848           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7849             this = coding_categories + coding_category_utf_16_le;
7850           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
7851             this = coding_categories + coding_category_utf_16_be;
7852           else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
7853             this = coding_categories + coding_category_utf_16_be_nosig;
7854           else
7855             this = coding_categories + coding_category_utf_16_le_nosig;
7856           val = Fcons (make_number (this->id), Qnil);
7857         }
7858     }
7859   else
7860     {
7861       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
7862       val = Fcons (make_number (coding.id), Qnil);
7863     }
7864
7865   /* Then, detect eol-format if necessary.  */
7866   {
7867     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
7868     Lisp_Object tail;
7869
7870     if (VECTORP (eol_type))
7871       {
7872         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
7873           {
7874             if (null_byte_found)
7875               normal_eol = EOL_SEEN_LF;
7876             else
7877               normal_eol = detect_eol (coding.source, src_bytes,
7878                                        coding_category_raw_text);
7879           }
7880         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
7881                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
7882           utf_16_be_eol = detect_eol (coding.source, src_bytes,
7883                                       coding_category_utf_16_be);
7884         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
7885                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
7886           utf_16_le_eol = detect_eol (coding.source, src_bytes,
7887                                       coding_category_utf_16_le);
7888       }
7889     else
7890       {
7891         if (EQ (eol_type, Qunix))
7892           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
7893         else if (EQ (eol_type, Qdos))
7894           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
7895         else
7896           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
7897       }
7898
7899     for (tail = val; CONSP (tail); tail = XCDR (tail))
7900       {
7901         enum coding_category category;
7902         int this_eol;
7903
7904         id = XINT (XCAR (tail));
7905         attrs = CODING_ID_ATTRS (id);
7906         category = XINT (CODING_ATTR_CATEGORY (attrs));
7907         eol_type = CODING_ID_EOL_TYPE (id);
7908         if (VECTORP (eol_type))
7909           {
7910             if (category == coding_category_utf_16_be
7911                 || category == coding_category_utf_16_be_nosig)
7912               this_eol = utf_16_be_eol;
7913             else if (category == coding_category_utf_16_le
7914                      || category == coding_category_utf_16_le_nosig)
7915               this_eol = utf_16_le_eol;
7916             else
7917               this_eol = normal_eol;
7918
7919             if (this_eol == EOL_SEEN_LF)
7920               XSETCAR (tail, AREF (eol_type, 0));
7921             else if (this_eol == EOL_SEEN_CRLF)
7922               XSETCAR (tail, AREF (eol_type, 1));
7923             else if (this_eol == EOL_SEEN_CR)
7924               XSETCAR (tail, AREF (eol_type, 2));
7925             else
7926               XSETCAR (tail, CODING_ID_NAME (id));
7927           }
7928         else
7929           XSETCAR (tail, CODING_ID_NAME (id));
7930       }
7931   }
7932
7933   return (highest ? XCAR (val) : val);
7934 }
7935
7936
7937 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
7938        2, 3, 0,
7939        doc: /* Detect coding system of the text in the region between START and END.
7940 Return a list of possible coding systems ordered by priority.
7941
7942 If only ASCII characters are found (except for such ISO-2022 control
7943 characters as ESC), it returns a list of single element `undecided'
7944 or its subsidiary coding system according to a detected end-of-line
7945 format.
7946
7947 If optional argument HIGHEST is non-nil, return the coding system of
7948 highest priority.  */)
7949      (start, end, highest)
7950      Lisp_Object start, end, highest;
7951 {
7952   int from, to;
7953   int from_byte, to_byte;
7954
7955   CHECK_NUMBER_COERCE_MARKER (start);
7956   CHECK_NUMBER_COERCE_MARKER (end);
7957
7958   validate_region (&start, &end);
7959   from = XINT (start), to = XINT (end);
7960   from_byte = CHAR_TO_BYTE (from);
7961   to_byte = CHAR_TO_BYTE (to);
7962
7963   if (from < GPT && to >= GPT)
7964     move_gap_both (to, to_byte);
7965
7966   return detect_coding_system (BYTE_POS_ADDR (from_byte),
7967                                to - from, to_byte - from_byte,
7968                                !NILP (highest),
7969                                !NILP (current_buffer
7970                                       ->enable_multibyte_characters),
7971                                Qnil);
7972 }
7973
7974 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
7975        1, 2, 0,
7976        doc: /* Detect coding system of the text in STRING.
7977 Return a list of possible coding systems ordered by priority.
7978
7979 If only ASCII characters are found (except for such ISO-2022 control
7980 characters as ESC), it returns a list of single element `undecided'
7981 or its subsidiary coding system according to a detected end-of-line
7982 format.
7983
7984 If optional argument HIGHEST is non-nil, return the coding system of
7985 highest priority.  */)
7986      (string, highest)
7987      Lisp_Object string, highest;
7988 {
7989   CHECK_STRING (string);
7990
7991   return detect_coding_system (SDATA (string),
7992                                SCHARS (string), SBYTES (string),
7993                                !NILP (highest), STRING_MULTIBYTE (string),
7994                                Qnil);
7995 }
7996
7997
7998 static INLINE int
7999 char_encodable_p (c, attrs)
8000      int c;
8001      Lisp_Object attrs;
8002 {
8003   Lisp_Object tail;
8004   struct charset *charset;
8005   Lisp_Object translation_table;
8006
8007   translation_table = CODING_ATTR_TRANS_TBL (attrs);
8008   if (! NILP (translation_table))
8009     c = translate_char (translation_table, c);
8010   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
8011        CONSP (tail); tail = XCDR (tail))
8012     {
8013       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
8014       if (CHAR_CHARSET_P (c, charset))
8015         break;
8016     }
8017   return (! NILP (tail));
8018 }
8019
8020
8021 /* Return a list of coding systems that safely encode the text between
8022    START and END.  If EXCLUDE is non-nil, it is a list of coding
8023    systems not to check.  The returned list doesn't contain any such
8024    coding systems.  In any case, if the text contains only ASCII or is
8025    unibyte, return t.  */
8026
8027 DEFUN ("find-coding-systems-region-internal",
8028        Ffind_coding_systems_region_internal,
8029        Sfind_coding_systems_region_internal, 2, 3, 0,
8030        doc: /* Internal use only.  */)
8031      (start, end, exclude)
8032      Lisp_Object start, end, exclude;
8033 {
8034   Lisp_Object coding_attrs_list, safe_codings;
8035   EMACS_INT start_byte, end_byte;
8036   const unsigned char *p, *pbeg, *pend;
8037   int c;
8038   Lisp_Object tail, elt;
8039
8040   if (STRINGP (start))
8041     {
8042       if (!STRING_MULTIBYTE (start)
8043           || SCHARS (start) == SBYTES (start))
8044         return Qt;
8045       start_byte = 0;
8046       end_byte = SBYTES (start);
8047     }
8048   else
8049     {
8050       CHECK_NUMBER_COERCE_MARKER (start);
8051       CHECK_NUMBER_COERCE_MARKER (end);
8052       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8053         args_out_of_range (start, end);
8054       if (NILP (current_buffer->enable_multibyte_characters))
8055         return Qt;
8056       start_byte = CHAR_TO_BYTE (XINT (start));
8057       end_byte = CHAR_TO_BYTE (XINT (end));
8058       if (XINT (end) - XINT (start) == end_byte - start_byte)
8059         return Qt;
8060
8061       if (XINT (start) < GPT && XINT (end) > GPT)
8062         {
8063           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8064             move_gap_both (XINT (start), start_byte);
8065           else
8066             move_gap_both (XINT (end), end_byte);
8067         }
8068     }
8069
8070   coding_attrs_list = Qnil;
8071   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
8072     if (NILP (exclude)
8073         || NILP (Fmemq (XCAR (tail), exclude)))
8074       {
8075         Lisp_Object attrs;
8076
8077         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
8078         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
8079             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
8080           {
8081             ASET (attrs, coding_attr_trans_tbl,
8082                   get_translation_table (attrs, 1, NULL));
8083             coding_attrs_list = Fcons (attrs, coding_attrs_list);
8084           }
8085       }
8086
8087   if (STRINGP (start))
8088     p = pbeg = SDATA (start);
8089   else
8090     p = pbeg = BYTE_POS_ADDR (start_byte);
8091   pend = p + (end_byte - start_byte);
8092
8093   while (p < pend && ASCII_BYTE_P (*p)) p++;
8094   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8095
8096   while (p < pend)
8097     {
8098       if (ASCII_BYTE_P (*p))
8099         p++;
8100       else
8101         {
8102           c = STRING_CHAR_ADVANCE (p);
8103
8104           charset_map_loaded = 0;
8105           for (tail = coding_attrs_list; CONSP (tail);)
8106             {
8107               elt = XCAR (tail);
8108               if (NILP (elt))
8109                 tail = XCDR (tail);
8110               else if (char_encodable_p (c, elt))
8111                 tail = XCDR (tail);
8112               else if (CONSP (XCDR (tail)))
8113                 {
8114                   XSETCAR (tail, XCAR (XCDR (tail)));
8115                   XSETCDR (tail, XCDR (XCDR (tail)));
8116                 }
8117               else
8118                 {
8119                   XSETCAR (tail, Qnil);
8120                   tail = XCDR (tail);
8121                 }
8122             }
8123           if (charset_map_loaded)
8124             {
8125               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8126
8127               if (STRINGP (start))
8128                 pbeg = SDATA (start);
8129               else
8130                 pbeg = BYTE_POS_ADDR (start_byte);
8131               p = pbeg + p_offset;
8132               pend = pbeg + pend_offset;
8133             }
8134         }
8135     }
8136
8137   safe_codings = list2 (Qraw_text, Qno_conversion);
8138   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
8139     if (! NILP (XCAR (tail)))
8140       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
8141
8142   return safe_codings;
8143 }
8144
8145
8146 DEFUN ("unencodable-char-position", Funencodable_char_position,
8147        Sunencodable_char_position, 3, 5, 0,
8148        doc: /*
8149 Return position of first un-encodable character in a region.
8150 START and END specify the region and CODING-SYSTEM specifies the
8151 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
8152
8153 If optional 4th argument COUNT is non-nil, it specifies at most how
8154 many un-encodable characters to search.  In this case, the value is a
8155 list of positions.
8156
8157 If optional 5th argument STRING is non-nil, it is a string to search
8158 for un-encodable characters.  In that case, START and END are indexes
8159 to the string.  */)
8160      (start, end, coding_system, count, string)
8161      Lisp_Object start, end, coding_system, count, string;
8162 {
8163   int n;
8164   struct coding_system coding;
8165   Lisp_Object attrs, charset_list, translation_table;
8166   Lisp_Object positions;
8167   int from, to;
8168   const unsigned char *p, *stop, *pend;
8169   int ascii_compatible;
8170
8171   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
8172   attrs = CODING_ID_ATTRS (coding.id);
8173   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
8174     return Qnil;
8175   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
8176   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8177   translation_table = get_translation_table (attrs, 1, NULL);
8178
8179   if (NILP (string))
8180     {
8181       validate_region (&start, &end);
8182       from = XINT (start);
8183       to = XINT (end);
8184       if (NILP (current_buffer->enable_multibyte_characters)
8185           || (ascii_compatible
8186               && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
8187         return Qnil;
8188       p = CHAR_POS_ADDR (from);
8189       pend = CHAR_POS_ADDR (to);
8190       if (from < GPT && to >= GPT)
8191         stop = GPT_ADDR;
8192       else
8193         stop = pend;
8194     }
8195   else
8196     {
8197       CHECK_STRING (string);
8198       CHECK_NATNUM (start);
8199       CHECK_NATNUM (end);
8200       from = XINT (start);
8201       to = XINT (end);
8202       if (from > to
8203           || to > SCHARS (string))
8204         args_out_of_range_3 (string, start, end);
8205       if (! STRING_MULTIBYTE (string))
8206         return Qnil;
8207       p = SDATA (string) + string_char_to_byte (string, from);
8208       stop = pend = SDATA (string) + string_char_to_byte (string, to);
8209       if (ascii_compatible && (to - from) == (pend - p))
8210         return Qnil;
8211     }
8212
8213   if (NILP (count))
8214     n = 1;
8215   else
8216     {
8217       CHECK_NATNUM (count);
8218       n = XINT (count);
8219     }
8220
8221   positions = Qnil;
8222   while (1)
8223     {
8224       int c;
8225
8226       if (ascii_compatible)
8227         while (p < stop && ASCII_BYTE_P (*p))
8228           p++, from++;
8229       if (p >= stop)
8230         {
8231           if (p >= pend)
8232             break;
8233           stop = pend;
8234           p = GAP_END_ADDR;
8235         }
8236
8237       c = STRING_CHAR_ADVANCE (p);
8238       if (! (ASCII_CHAR_P (c) && ascii_compatible)
8239           && ! char_charset (translate_char (translation_table, c),
8240                              charset_list, NULL))
8241         {
8242           positions = Fcons (make_number (from), positions);
8243           n--;
8244           if (n == 0)
8245             break;
8246         }
8247
8248       from++;
8249     }
8250
8251   return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
8252 }
8253
8254
8255 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
8256        Scheck_coding_systems_region, 3, 3, 0,
8257        doc: /* Check if the region is encodable by coding systems.
8258
8259 START and END are buffer positions specifying the region.
8260 CODING-SYSTEM-LIST is a list of coding systems to check.
8261
8262 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
8263 CODING-SYSTEM is a member of CODING-SYSTEM-LIST and can't encode the
8264 whole region, POS0, POS1, ... are buffer positions where non-encodable
8265 characters are found.
8266
8267 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
8268 value is nil.
8269
8270 START may be a string.  In that case, check if the string is
8271 encodable, and the value contains indices to the string instead of
8272 buffer positions.  END is ignored.  */)
8273      (start, end, coding_system_list)
8274      Lisp_Object start, end, coding_system_list;
8275 {
8276   Lisp_Object list;
8277   EMACS_INT start_byte, end_byte;
8278   int pos;
8279   const unsigned char *p, *pbeg, *pend;
8280   int c;
8281   Lisp_Object tail, elt, attrs;
8282
8283   if (STRINGP (start))
8284     {
8285       if (!STRING_MULTIBYTE (start)
8286           && SCHARS (start) != SBYTES (start))
8287         return Qnil;
8288       start_byte = 0;
8289       end_byte = SBYTES (start);
8290       pos = 0;
8291     }
8292   else
8293     {
8294       CHECK_NUMBER_COERCE_MARKER (start);
8295       CHECK_NUMBER_COERCE_MARKER (end);
8296       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8297         args_out_of_range (start, end);
8298       if (NILP (current_buffer->enable_multibyte_characters))
8299         return Qnil;
8300       start_byte = CHAR_TO_BYTE (XINT (start));
8301       end_byte = CHAR_TO_BYTE (XINT (end));
8302       if (XINT (end) - XINT (start) == end_byte - start_byte)
8303         return Qt;
8304
8305       if (XINT (start) < GPT && XINT (end) > GPT)
8306         {
8307           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8308             move_gap_both (XINT (start), start_byte);
8309           else
8310             move_gap_both (XINT (end), end_byte);
8311         }
8312       pos = XINT (start);
8313     }
8314
8315   list = Qnil;
8316   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
8317     {
8318       elt = XCAR (tail);
8319       attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
8320       ASET (attrs, coding_attr_trans_tbl,
8321             get_translation_table (attrs, 1, NULL));
8322       list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
8323     }
8324
8325   if (STRINGP (start))
8326     p = pbeg = SDATA (start);
8327   else
8328     p = pbeg = BYTE_POS_ADDR (start_byte);
8329   pend = p + (end_byte - start_byte);
8330
8331   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
8332   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8333
8334   while (p < pend)
8335     {
8336       if (ASCII_BYTE_P (*p))
8337         p++;
8338       else
8339         {
8340           c = STRING_CHAR_ADVANCE (p);
8341
8342           charset_map_loaded = 0;
8343           for (tail = list; CONSP (tail); tail = XCDR (tail))
8344             {
8345               elt = XCDR (XCAR (tail));
8346               if (! char_encodable_p (c, XCAR (elt)))
8347                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
8348             }
8349           if (charset_map_loaded)
8350             {
8351               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8352
8353               if (STRINGP (start))
8354                 pbeg = SDATA (start);
8355               else
8356                 pbeg = BYTE_POS_ADDR (start_byte);
8357               p = pbeg + p_offset;
8358               pend = pbeg + pend_offset;
8359             }
8360         }
8361       pos++;
8362     }
8363
8364   tail = list;
8365   list = Qnil;
8366   for (; CONSP (tail); tail = XCDR (tail))
8367     {
8368       elt = XCAR (tail);
8369       if (CONSP (XCDR (XCDR (elt))))
8370         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
8371                       list);
8372     }
8373
8374   return list;
8375 }
8376
8377
8378 Lisp_Object
8379 code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
8380      Lisp_Object start, end, coding_system, dst_object;
8381      int encodep, norecord;
8382 {
8383   struct coding_system coding;
8384   EMACS_INT from, from_byte, to, to_byte;
8385   Lisp_Object src_object;
8386
8387   CHECK_NUMBER_COERCE_MARKER (start);
8388   CHECK_NUMBER_COERCE_MARKER (end);
8389   if (NILP (coding_system))
8390     coding_system = Qno_conversion;
8391   else
8392     CHECK_CODING_SYSTEM (coding_system);
8393   src_object = Fcurrent_buffer ();
8394   if (NILP (dst_object))
8395     dst_object = src_object;
8396   else if (! EQ (dst_object, Qt))
8397     CHECK_BUFFER (dst_object);
8398
8399   validate_region (&start, &end);
8400   from = XFASTINT (start);
8401   from_byte = CHAR_TO_BYTE (from);
8402   to = XFASTINT (end);
8403   to_byte = CHAR_TO_BYTE (to);
8404
8405   setup_coding_system (coding_system, &coding);
8406   coding.mode |= CODING_MODE_LAST_BLOCK;
8407
8408   if (encodep)
8409     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8410                           dst_object);
8411   else
8412     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8413                           dst_object);
8414   if (! norecord)
8415     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
8416
8417   return (BUFFERP (dst_object)
8418           ? make_number (coding.produced_char)
8419           : coding.dst_object);
8420 }
8421
8422
8423 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
8424        3, 4, "r\nzCoding system: ",
8425        doc: /* Decode the current region from the specified coding system.
8426 When called from a program, takes four arguments:
8427         START, END, CODING-SYSTEM, and DESTINATION.
8428 START and END are buffer positions.
8429
8430 Optional 4th arguments DESTINATION specifies where the decoded text goes.
8431 If nil, the region between START and END is replaced by the decoded text.
8432 If buffer, the decoded text is inserted in the buffer.
8433 In those cases, the length of the decoded text is returned.
8434 If DESTINATION is t, the decoded text is returned.
8435
8436 This function sets `last-coding-system-used' to the precise coding system
8437 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8438 not fully specified.)  */)
8439      (start, end, coding_system, destination)
8440      Lisp_Object start, end, coding_system, destination;
8441 {
8442   return code_convert_region (start, end, coding_system, destination, 0, 0);
8443 }
8444
8445 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
8446        3, 4, "r\nzCoding system: ",
8447        doc: /* Encode the current region by specified coding system.
8448 When called from a program, takes four arguments:
8449         START, END, CODING-SYSTEM and DESTINATION.
8450 START and END are buffer positions.
8451
8452 Optional 4th arguments DESTINATION specifies where the encoded text goes.
8453 If nil, the region between START and END is replace by the encoded text.
8454 If buffer, the encoded text is inserted in the buffer.
8455 In those cases, the length of the encoded text is returned.
8456 If DESTINATION is t, the encoded text is returned.
8457
8458 This function sets `last-coding-system-used' to the precise coding system
8459 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8460 not fully specified.)  */)
8461   (start, end, coding_system, destination)
8462      Lisp_Object start, end, coding_system, destination;
8463 {
8464   return code_convert_region (start, end, coding_system, destination, 1, 0);
8465 }
8466
8467 Lisp_Object
8468 code_convert_string (string, coding_system, dst_object,
8469                      encodep, nocopy, norecord)
8470      Lisp_Object string, coding_system, dst_object;
8471      int encodep, nocopy, norecord;
8472 {
8473   struct coding_system coding;
8474   EMACS_INT chars, bytes;
8475
8476   CHECK_STRING (string);
8477   if (NILP (coding_system))
8478     {
8479       if (! norecord)
8480         Vlast_coding_system_used = Qno_conversion;
8481       if (NILP (dst_object))
8482         return (nocopy ? Fcopy_sequence (string) : string);
8483     }
8484
8485   if (NILP (coding_system))
8486     coding_system = Qno_conversion;
8487   else
8488     CHECK_CODING_SYSTEM (coding_system);
8489   if (NILP (dst_object))
8490     dst_object = Qt;
8491   else if (! EQ (dst_object, Qt))
8492     CHECK_BUFFER (dst_object);
8493
8494   setup_coding_system (coding_system, &coding);
8495   coding.mode |= CODING_MODE_LAST_BLOCK;
8496   chars = SCHARS (string);
8497   bytes = SBYTES (string);
8498   if (encodep)
8499     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
8500   else
8501     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
8502   if (! norecord)
8503     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
8504
8505   return (BUFFERP (dst_object)
8506           ? make_number (coding.produced_char)
8507           : coding.dst_object);
8508 }
8509
8510
8511 /* Encode or decode STRING according to CODING_SYSTEM.
8512    Do not set Vlast_coding_system_used.
8513
8514    This function is called only from macros DECODE_FILE and
8515    ENCODE_FILE, thus we ignore character composition.  */
8516
8517 Lisp_Object
8518 code_convert_string_norecord (string, coding_system, encodep)
8519      Lisp_Object string, coding_system;
8520      int encodep;
8521 {
8522   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
8523 }
8524
8525
8526 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
8527        2, 4, 0,
8528        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
8529
8530 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
8531 if the decoding operation is trivial.
8532
8533 Optional fourth arg BUFFER non-nil means that the decoded text is
8534 inserted in BUFFER instead of returned as a string.  In this case,
8535 the return value is the length of the decoded text.
8536
8537 This function sets `last-coding-system-used' to the precise coding system
8538 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8539 not fully specified.)  */)
8540   (string, coding_system, nocopy, buffer)
8541      Lisp_Object string, coding_system, nocopy, buffer;
8542 {
8543   return code_convert_string (string, coding_system, buffer,
8544                               0, ! NILP (nocopy), 0);
8545 }
8546
8547 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
8548        2, 4, 0,
8549        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
8550
8551 Optional third arg NOCOPY non-nil means it is OK to return STRING
8552 itself if the encoding operation is trivial.
8553
8554 Optional fourth arg BUFFER non-nil means that the encoded text is
8555 inserted in BUFFER instead of returned as a string.  In this case,
8556 the return value is the length of the encoded text.
8557
8558 This function sets `last-coding-system-used' to the precise coding system
8559 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8560 not fully specified.)  */)
8561      (string, coding_system, nocopy, buffer)
8562      Lisp_Object string, coding_system, nocopy, buffer;
8563 {
8564   return code_convert_string (string, coding_system, buffer,
8565                               1, ! NILP (nocopy), 1);
8566 }
8567
8568 \f
8569 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
8570        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
8571 Return the corresponding character.  */)
8572      (code)
8573      Lisp_Object code;
8574 {
8575   Lisp_Object spec, attrs, val;
8576   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
8577   int c;
8578
8579   CHECK_NATNUM (code);
8580   c = XFASTINT (code);
8581   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
8582   attrs = AREF (spec, 0);
8583
8584   if (ASCII_BYTE_P (c)
8585       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8586     return code;
8587
8588   val = CODING_ATTR_CHARSET_LIST (attrs);
8589   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8590   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8591   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
8592
8593   if (c <= 0x7F)
8594     charset = charset_roman;
8595   else if (c >= 0xA0 && c < 0xDF)
8596     {
8597       charset = charset_kana;
8598       c -= 0x80;
8599     }
8600   else
8601     {
8602       int s1 = c >> 8, s2 = c & 0xFF;
8603
8604       if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
8605           || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
8606         error ("Invalid code: %d", code);
8607       SJIS_TO_JIS (c);
8608       charset = charset_kanji;
8609     }
8610   c = DECODE_CHAR (charset, c);
8611   if (c < 0)
8612     error ("Invalid code: %d", code);
8613   return make_number (c);
8614 }
8615
8616
8617 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
8618        doc: /* Encode a Japanese character CH to shift_jis encoding.
8619 Return the corresponding code in SJIS.  */)
8620      (ch)
8621     Lisp_Object ch;
8622 {
8623   Lisp_Object spec, attrs, charset_list;
8624   int c;
8625   struct charset *charset;
8626   unsigned code;
8627
8628   CHECK_CHARACTER (ch);
8629   c = XFASTINT (ch);
8630   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
8631   attrs = AREF (spec, 0);
8632
8633   if (ASCII_CHAR_P (c)
8634       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8635     return ch;
8636
8637   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8638   charset = char_charset (c, charset_list, &code);
8639   if (code == CHARSET_INVALID_CODE (charset))
8640     error ("Can't encode by shift_jis encoding: %d", c);
8641   JIS_TO_SJIS (code);
8642
8643   return make_number (code);
8644 }
8645
8646 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
8647        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
8648 Return the corresponding character.  */)
8649      (code)
8650      Lisp_Object code;
8651 {
8652   Lisp_Object spec, attrs, val;
8653   struct charset *charset_roman, *charset_big5, *charset;
8654   int c;
8655
8656   CHECK_NATNUM (code);
8657   c = XFASTINT (code);
8658   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8659   attrs = AREF (spec, 0);
8660
8661   if (ASCII_BYTE_P (c)
8662       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8663     return code;
8664
8665   val = CODING_ATTR_CHARSET_LIST (attrs);
8666   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8667   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
8668
8669   if (c <= 0x7F)
8670     charset = charset_roman;
8671   else
8672     {
8673       int b1 = c >> 8, b2 = c & 0x7F;
8674       if (b1 < 0xA1 || b1 > 0xFE
8675           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
8676         error ("Invalid code: %d", code);
8677       charset = charset_big5;
8678     }
8679   c = DECODE_CHAR (charset, (unsigned )c);
8680   if (c < 0)
8681     error ("Invalid code: %d", code);
8682   return make_number (c);
8683 }
8684
8685 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
8686        doc: /* Encode the Big5 character CH to BIG5 coding system.
8687 Return the corresponding character code in Big5.  */)
8688      (ch)
8689      Lisp_Object ch;
8690 {
8691   Lisp_Object spec, attrs, charset_list;
8692   struct charset *charset;
8693   int c;
8694   unsigned code;
8695
8696   CHECK_CHARACTER (ch);
8697   c = XFASTINT (ch);
8698   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8699   attrs = AREF (spec, 0);
8700   if (ASCII_CHAR_P (c)
8701       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8702     return ch;
8703
8704   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8705   charset = char_charset (c, charset_list, &code);
8706   if (code == CHARSET_INVALID_CODE (charset))
8707     error ("Can't encode by Big5 encoding: %d", c);
8708
8709   return make_number (code);
8710 }
8711
8712 \f
8713 DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
8714        Sset_terminal_coding_system_internal, 1, 2, 0,
8715        doc: /* Internal use only.  */)
8716      (coding_system, terminal)
8717      Lisp_Object coding_system;
8718      Lisp_Object terminal;
8719 {
8720   struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
8721   CHECK_SYMBOL (coding_system);
8722   setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
8723   /* We had better not send unsafe characters to terminal.  */
8724   terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
8725   /* Characer composition should be disabled.  */
8726   terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8727   terminal_coding->src_multibyte = 1;
8728   terminal_coding->dst_multibyte = 0;
8729   return Qnil;
8730 }
8731
8732 DEFUN ("set-safe-terminal-coding-system-internal",
8733        Fset_safe_terminal_coding_system_internal,
8734        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
8735        doc: /* Internal use only.  */)
8736      (coding_system)
8737      Lisp_Object coding_system;
8738 {
8739   CHECK_SYMBOL (coding_system);
8740   setup_coding_system (Fcheck_coding_system (coding_system),
8741                        &safe_terminal_coding);
8742   /* Characer composition should be disabled.  */
8743   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8744   safe_terminal_coding.src_multibyte = 1;
8745   safe_terminal_coding.dst_multibyte = 0;
8746   return Qnil;
8747 }
8748
8749 DEFUN ("terminal-coding-system", Fterminal_coding_system,
8750        Sterminal_coding_system, 0, 1, 0,
8751        doc: /* Return coding system specified for terminal output on the given terminal.
8752 TERMINAL may be a terminal id, a frame, or nil for the selected
8753 frame's terminal device.  */)
8754      (terminal)
8755      Lisp_Object terminal;
8756 {
8757   struct coding_system *terminal_coding
8758     = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
8759   Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
8760
8761   /* For backward compatibility, return nil if it is `undecided'. */
8762   return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
8763 }
8764
8765 DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
8766        Sset_keyboard_coding_system_internal, 1, 2, 0,
8767        doc: /* Internal use only.  */)
8768      (coding_system, terminal)
8769      Lisp_Object coding_system;
8770      Lisp_Object terminal;
8771 {
8772   struct terminal *t = get_terminal (terminal, 1);
8773   CHECK_SYMBOL (coding_system);
8774   setup_coding_system (Fcheck_coding_system (coding_system),
8775                        TERMINAL_KEYBOARD_CODING (t));
8776   /* Characer composition should be disabled.  */
8777   TERMINAL_KEYBOARD_CODING (t)->common_flags
8778     &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8779   return Qnil;
8780 }
8781
8782 DEFUN ("keyboard-coding-system",
8783        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 1, 0,
8784        doc: /* Return coding system specified for decoding keyboard input.  */)
8785      (terminal)
8786      Lisp_Object terminal;
8787 {
8788   return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING
8789                          (get_terminal (terminal, 1))->id);
8790 }
8791
8792 \f
8793 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
8794        Sfind_operation_coding_system,  1, MANY, 0,
8795        doc: /* Choose a coding system for an operation based on the target name.
8796 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
8797 DECODING-SYSTEM is the coding system to use for decoding
8798 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
8799 for encoding (in case OPERATION does encoding).
8800
8801 The first argument OPERATION specifies an I/O primitive:
8802   For file I/O, `insert-file-contents' or `write-region'.
8803   For process I/O, `call-process', `call-process-region', or `start-process'.
8804   For network I/O, `open-network-stream'.
8805
8806 The remaining arguments should be the same arguments that were passed
8807 to the primitive.  Depending on which primitive, one of those arguments
8808 is selected as the TARGET.  For example, if OPERATION does file I/O,
8809 whichever argument specifies the file name is TARGET.
8810
8811 TARGET has a meaning which depends on OPERATION:
8812   For file I/O, TARGET is a file name (except for the special case below).
8813   For process I/O, TARGET is a process name.
8814   For network I/O, TARGET is a service name or a port number.
8815
8816 This function looks up what is specified for TARGET in
8817 `file-coding-system-alist', `process-coding-system-alist',
8818 or `network-coding-system-alist' depending on OPERATION.
8819 They may specify a coding system, a cons of coding systems,
8820 or a function symbol to call.
8821 In the last case, we call the function with one argument,
8822 which is a list of all the arguments given to this function.
8823 If the function can't decide a coding system, it can return
8824 `undecided' so that the normal code-detection is performed.
8825
8826 If OPERATION is `insert-file-contents', the argument corresponding to
8827 TARGET may be a cons (FILENAME . BUFFER).  In that case, FILENAME is a
8828 file name to look up, and BUFFER is a buffer that contains the file's
8829 contents (not yet decoded).  If `file-coding-system-alist' specifies a
8830 function to call for FILENAME, that function should examine the
8831 contents of BUFFER instead of reading the file.
8832
8833 usage: (find-operation-coding-system OPERATION ARGUMENTS...)  */)
8834      (nargs, args)
8835      int nargs;
8836      Lisp_Object *args;
8837 {
8838   Lisp_Object operation, target_idx, target, val;
8839   register Lisp_Object chain;
8840
8841   if (nargs < 2)
8842     error ("Too few arguments");
8843   operation = args[0];
8844   if (!SYMBOLP (operation)
8845       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
8846     error ("Invalid first argument");
8847   if (nargs < 1 + XINT (target_idx))
8848     error ("Too few arguments for operation: %s",
8849            SDATA (SYMBOL_NAME (operation)));
8850   target = args[XINT (target_idx) + 1];
8851   if (!(STRINGP (target)
8852         || (EQ (operation, Qinsert_file_contents) && CONSP (target)
8853             && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
8854         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
8855     error ("Invalid %dth argument", XINT (target_idx) + 1);
8856   if (CONSP (target))
8857     target = XCAR (target);
8858
8859   chain = ((EQ (operation, Qinsert_file_contents)
8860             || EQ (operation, Qwrite_region))
8861            ? Vfile_coding_system_alist
8862            : (EQ (operation, Qopen_network_stream)
8863               ? Vnetwork_coding_system_alist
8864               : Vprocess_coding_system_alist));
8865   if (NILP (chain))
8866     return Qnil;
8867
8868   for (; CONSP (chain); chain = XCDR (chain))
8869     {
8870       Lisp_Object elt;
8871
8872       elt = XCAR (chain);
8873       if (CONSP (elt)
8874           && ((STRINGP (target)
8875                && STRINGP (XCAR (elt))
8876                && fast_string_match (XCAR (elt), target) >= 0)
8877               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
8878         {
8879           val = XCDR (elt);
8880           /* Here, if VAL is both a valid coding system and a valid
8881              function symbol, we return VAL as a coding system.  */
8882           if (CONSP (val))
8883             return val;
8884           if (! SYMBOLP (val))
8885             return Qnil;
8886           if (! NILP (Fcoding_system_p (val)))
8887             return Fcons (val, val);
8888           if (! NILP (Ffboundp (val)))
8889             {
8890               /* We use call1 rather than safe_call1
8891                  so as to get bug reports about functions called here
8892                  which don't handle the current interface.  */
8893               val = call1 (val, Flist (nargs, args));
8894               if (CONSP (val))
8895                 return val;
8896               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
8897                 return Fcons (val, val);
8898             }
8899           return Qnil;
8900         }
8901     }
8902   return Qnil;
8903 }
8904
8905 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
8906        Sset_coding_system_priority, 0, MANY, 0,
8907        doc: /* Assign higher priority to the coding systems given as arguments.
8908 If multiple coding systems belong to the same category,
8909 all but the first one are ignored.
8910
8911 usage: (set-coding-system-priority &rest coding-systems)  */)
8912      (nargs, args)
8913      int nargs;
8914      Lisp_Object *args;
8915 {
8916   int i, j;
8917   int changed[coding_category_max];
8918   enum coding_category priorities[coding_category_max];
8919
8920   bzero (changed, sizeof changed);
8921
8922   for (i = j = 0; i < nargs; i++)
8923     {
8924       enum coding_category category;
8925       Lisp_Object spec, attrs;
8926
8927       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
8928       attrs = AREF (spec, 0);
8929       category = XINT (CODING_ATTR_CATEGORY (attrs));
8930       if (changed[category])
8931         /* Ignore this coding system because a coding system of the
8932            same category already had a higher priority.  */
8933         continue;
8934       changed[category] = 1;
8935       priorities[j++] = category;
8936       if (coding_categories[category].id >= 0
8937           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
8938         setup_coding_system (args[i], &coding_categories[category]);
8939       Fset (AREF (Vcoding_category_table, category), args[i]);
8940     }
8941
8942   /* Now we have decided top J priorities.  Reflect the order of the
8943      original priorities to the remaining priorities.  */
8944
8945   for (i = j, j = 0; i < coding_category_max; i++, j++)
8946     {
8947       while (j < coding_category_max
8948              && changed[coding_priorities[j]])
8949         j++;
8950       if (j == coding_category_max)
8951         abort ();
8952       priorities[i] = coding_priorities[j];
8953     }
8954
8955   bcopy (priorities, coding_priorities, sizeof priorities);
8956
8957   /* Update `coding-category-list'.  */
8958   Vcoding_category_list = Qnil;
8959   for (i = coding_category_max - 1; i >= 0; i--)
8960     Vcoding_category_list
8961       = Fcons (AREF (Vcoding_category_table, priorities[i]),
8962                Vcoding_category_list);
8963
8964   return Qnil;
8965 }
8966
8967 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
8968        Scoding_system_priority_list, 0, 1, 0,
8969        doc: /* Return a list of coding systems ordered by their priorities.
8970 HIGHESTP non-nil means just return the highest priority one.  */)
8971      (highestp)
8972      Lisp_Object highestp;
8973 {
8974   int i;
8975   Lisp_Object val;
8976
8977   for (i = 0, val = Qnil; i < coding_category_max; i++)
8978     {
8979       enum coding_category category = coding_priorities[i];
8980       int id = coding_categories[category].id;
8981       Lisp_Object attrs;
8982
8983       if (id < 0)
8984         continue;
8985       attrs = CODING_ID_ATTRS (id);
8986       if (! NILP (highestp))
8987         return CODING_ATTR_BASE_NAME (attrs);
8988       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
8989     }
8990   return Fnreverse (val);
8991 }
8992
8993 static char *suffixes[] = { "-unix", "-dos", "-mac" };
8994
8995 static Lisp_Object
8996 make_subsidiaries (base)
8997      Lisp_Object base;
8998 {
8999   Lisp_Object subsidiaries;
9000   int base_name_len = SBYTES (SYMBOL_NAME (base));
9001   char *buf = (char *) alloca (base_name_len + 6);
9002   int i;
9003
9004   bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
9005   subsidiaries = Fmake_vector (make_number (3), Qnil);
9006   for (i = 0; i < 3; i++)
9007     {
9008       bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
9009       ASET (subsidiaries, i, intern (buf));
9010     }
9011   return subsidiaries;
9012 }
9013
9014
9015 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
9016        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
9017        doc: /* For internal use only.
9018 usage: (define-coding-system-internal ...)  */)
9019      (nargs, args)
9020      int nargs;
9021      Lisp_Object *args;
9022 {
9023   Lisp_Object name;
9024   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
9025   Lisp_Object attrs;            /* Vector of attributes.  */
9026   Lisp_Object eol_type;
9027   Lisp_Object aliases;
9028   Lisp_Object coding_type, charset_list, safe_charsets;
9029   enum coding_category category;
9030   Lisp_Object tail, val;
9031   int max_charset_id = 0;
9032   int i;
9033
9034   if (nargs < coding_arg_max)
9035     goto short_args;
9036
9037   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
9038
9039   name = args[coding_arg_name];
9040   CHECK_SYMBOL (name);
9041   CODING_ATTR_BASE_NAME (attrs) = name;
9042
9043   val = args[coding_arg_mnemonic];
9044   if (! STRINGP (val))
9045     CHECK_CHARACTER (val);
9046   CODING_ATTR_MNEMONIC (attrs) = val;
9047
9048   coding_type = args[coding_arg_coding_type];
9049   CHECK_SYMBOL (coding_type);
9050   CODING_ATTR_TYPE (attrs) = coding_type;
9051
9052   charset_list = args[coding_arg_charset_list];
9053   if (SYMBOLP (charset_list))
9054     {
9055       if (EQ (charset_list, Qiso_2022))
9056         {
9057           if (! EQ (coding_type, Qiso_2022))
9058             error ("Invalid charset-list");
9059           charset_list = Viso_2022_charset_list;
9060         }
9061       else if (EQ (charset_list, Qemacs_mule))
9062         {
9063           if (! EQ (coding_type, Qemacs_mule))
9064             error ("Invalid charset-list");
9065           charset_list = Vemacs_mule_charset_list;
9066         }
9067       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9068         if (max_charset_id < XFASTINT (XCAR (tail)))
9069           max_charset_id = XFASTINT (XCAR (tail));
9070     }
9071   else
9072     {
9073       charset_list = Fcopy_sequence (charset_list);
9074       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9075         {
9076           struct charset *charset;
9077
9078           val = XCAR (tail);
9079           CHECK_CHARSET_GET_CHARSET (val, charset);
9080           if (EQ (coding_type, Qiso_2022)
9081               ? CHARSET_ISO_FINAL (charset) < 0
9082               : EQ (coding_type, Qemacs_mule)
9083               ? CHARSET_EMACS_MULE_ID (charset) < 0
9084               : 0)
9085             error ("Can't handle charset `%s'",
9086                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9087
9088           XSETCAR (tail, make_number (charset->id));
9089           if (max_charset_id < charset->id)
9090             max_charset_id = charset->id;
9091         }
9092     }
9093   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
9094
9095   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
9096                                 make_number (255));
9097   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9098     SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
9099   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
9100
9101   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
9102
9103   val = args[coding_arg_decode_translation_table];
9104   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9105     CHECK_SYMBOL (val);
9106   CODING_ATTR_DECODE_TBL (attrs) = val;
9107
9108   val = args[coding_arg_encode_translation_table];
9109   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9110     CHECK_SYMBOL (val);
9111   CODING_ATTR_ENCODE_TBL (attrs) = val;
9112
9113   val = args[coding_arg_post_read_conversion];
9114   CHECK_SYMBOL (val);
9115   CODING_ATTR_POST_READ (attrs) = val;
9116
9117   val = args[coding_arg_pre_write_conversion];
9118   CHECK_SYMBOL (val);
9119   CODING_ATTR_PRE_WRITE (attrs) = val;
9120
9121   val = args[coding_arg_default_char];
9122   if (NILP (val))
9123     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
9124   else
9125     {
9126       CHECK_CHARACTER (val);
9127       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9128     }
9129
9130   val = args[coding_arg_for_unibyte];
9131   CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
9132
9133   val = args[coding_arg_plist];
9134   CHECK_LIST (val);
9135   CODING_ATTR_PLIST (attrs) = val;
9136
9137   if (EQ (coding_type, Qcharset))
9138     {
9139       /* Generate a lisp vector of 256 elements.  Each element is nil,
9140          integer, or a list of charset IDs.
9141
9142          If Nth element is nil, the byte code N is invalid in this
9143          coding system.
9144
9145          If Nth element is a number NUM, N is the first byte of a
9146          charset whose ID is NUM.
9147
9148          If Nth element is a list of charset IDs, N is the first byte
9149          of one of them.  The list is sorted by dimensions of the
9150          charsets.  A charset of smaller dimension comes firtst. */
9151       val = Fmake_vector (make_number (256), Qnil);
9152
9153       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9154         {
9155           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
9156           int dim = CHARSET_DIMENSION (charset);
9157           int idx = (dim - 1) * 4;
9158
9159           if (CHARSET_ASCII_COMPATIBLE_P (charset))
9160             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9161
9162           for (i = charset->code_space[idx];
9163                i <= charset->code_space[idx + 1]; i++)
9164             {
9165               Lisp_Object tmp, tmp2;
9166               int dim2;
9167
9168               tmp = AREF (val, i);
9169               if (NILP (tmp))
9170                 tmp = XCAR (tail);
9171               else if (NUMBERP (tmp))
9172                 {
9173                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
9174                   if (dim < dim2)
9175                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
9176                   else
9177                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
9178                 }
9179               else
9180                 {
9181                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
9182                     {
9183                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
9184                       if (dim < dim2)
9185                         break;
9186                     }
9187                   if (NILP (tmp2))
9188                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
9189                   else
9190                     {
9191                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
9192                       XSETCAR (tmp2, XCAR (tail));
9193                     }
9194                 }
9195               ASET (val, i, tmp);
9196             }
9197         }
9198       ASET (attrs, coding_attr_charset_valids, val);
9199       category = coding_category_charset;
9200     }
9201   else if (EQ (coding_type, Qccl))
9202     {
9203       Lisp_Object valids;
9204
9205       if (nargs < coding_arg_ccl_max)
9206         goto short_args;
9207
9208       val = args[coding_arg_ccl_decoder];
9209       CHECK_CCL_PROGRAM (val);
9210       if (VECTORP (val))
9211         val = Fcopy_sequence (val);
9212       ASET (attrs, coding_attr_ccl_decoder, val);
9213
9214       val = args[coding_arg_ccl_encoder];
9215       CHECK_CCL_PROGRAM (val);
9216       if (VECTORP (val))
9217         val = Fcopy_sequence (val);
9218       ASET (attrs, coding_attr_ccl_encoder, val);
9219
9220       val = args[coding_arg_ccl_valids];
9221       valids = Fmake_string (make_number (256), make_number (0));
9222       for (tail = val; !NILP (tail); tail = Fcdr (tail))
9223         {
9224           int from, to;
9225
9226           val = Fcar (tail);
9227           if (INTEGERP (val))
9228             {
9229               from = to = XINT (val);
9230               if (from < 0 || from > 255)
9231                 args_out_of_range_3 (val, make_number (0), make_number (255));
9232             }
9233           else
9234             {
9235               CHECK_CONS (val);
9236               CHECK_NATNUM_CAR (val);
9237               CHECK_NATNUM_CDR (val);
9238               from = XINT (XCAR (val));
9239               if (from > 255)
9240                 args_out_of_range_3 (XCAR (val),
9241                                      make_number (0), make_number (255));
9242               to = XINT (XCDR (val));
9243               if (to < from || to > 255)
9244                 args_out_of_range_3 (XCDR (val),
9245                                      XCAR (val), make_number (255));
9246             }
9247           for (i = from; i <= to; i++)
9248             SSET (valids, i, 1);
9249         }
9250       ASET (attrs, coding_attr_ccl_valids, valids);
9251
9252       category = coding_category_ccl;
9253     }
9254   else if (EQ (coding_type, Qutf_16))
9255     {
9256       Lisp_Object bom, endian;
9257
9258       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9259
9260       if (nargs < coding_arg_utf16_max)
9261         goto short_args;
9262
9263       bom = args[coding_arg_utf16_bom];
9264       if (! NILP (bom) && ! EQ (bom, Qt))
9265         {
9266           CHECK_CONS (bom);
9267           val = XCAR (bom);
9268           CHECK_CODING_SYSTEM (val);
9269           val = XCDR (bom);
9270           CHECK_CODING_SYSTEM (val);
9271         }
9272       ASET (attrs, coding_attr_utf_bom, bom);
9273
9274       endian = args[coding_arg_utf16_endian];
9275       CHECK_SYMBOL (endian);
9276       if (NILP (endian))
9277         endian = Qbig;
9278       else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
9279         error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
9280       ASET (attrs, coding_attr_utf_16_endian, endian);
9281
9282       category = (CONSP (bom)
9283                   ? coding_category_utf_16_auto
9284                   : NILP (bom)
9285                   ? (EQ (endian, Qbig)
9286                      ? coding_category_utf_16_be_nosig
9287                      : coding_category_utf_16_le_nosig)
9288                   : (EQ (endian, Qbig)
9289                      ? coding_category_utf_16_be
9290                      : coding_category_utf_16_le));
9291     }
9292   else if (EQ (coding_type, Qiso_2022))
9293     {
9294       Lisp_Object initial, reg_usage, request, flags;
9295       int i;
9296
9297       if (nargs < coding_arg_iso2022_max)
9298         goto short_args;
9299
9300       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
9301       CHECK_VECTOR (initial);
9302       for (i = 0; i < 4; i++)
9303         {
9304           val = Faref (initial, make_number (i));
9305           if (! NILP (val))
9306             {
9307               struct charset *charset;
9308
9309               CHECK_CHARSET_GET_CHARSET (val, charset);
9310               ASET (initial, i, make_number (CHARSET_ID (charset)));
9311               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
9312                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9313             }
9314           else
9315             ASET (initial, i, make_number (-1));
9316         }
9317
9318       reg_usage = args[coding_arg_iso2022_reg_usage];
9319       CHECK_CONS (reg_usage);
9320       CHECK_NUMBER_CAR (reg_usage);
9321       CHECK_NUMBER_CDR (reg_usage);
9322
9323       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
9324       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
9325         {
9326           int id;
9327           Lisp_Object tmp;
9328
9329           val = Fcar (tail);
9330           CHECK_CONS (val);
9331           tmp = XCAR (val);
9332           CHECK_CHARSET_GET_ID (tmp, id);
9333           CHECK_NATNUM_CDR (val);
9334           if (XINT (XCDR (val)) >= 4)
9335             error ("Invalid graphic register number: %d", XINT (XCDR (val)));
9336           XSETCAR (val, make_number (id));
9337         }
9338
9339       flags = args[coding_arg_iso2022_flags];
9340       CHECK_NATNUM (flags);
9341       i = XINT (flags);
9342       if (EQ (args[coding_arg_charset_list], Qiso_2022))
9343         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
9344
9345       ASET (attrs, coding_attr_iso_initial, initial);
9346       ASET (attrs, coding_attr_iso_usage, reg_usage);
9347       ASET (attrs, coding_attr_iso_request, request);
9348       ASET (attrs, coding_attr_iso_flags, flags);
9349       setup_iso_safe_charsets (attrs);
9350
9351       if (i & CODING_ISO_FLAG_SEVEN_BITS)
9352         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
9353                           | CODING_ISO_FLAG_SINGLE_SHIFT))
9354                     ? coding_category_iso_7_else
9355                     : EQ (args[coding_arg_charset_list], Qiso_2022)
9356                     ? coding_category_iso_7
9357                     : coding_category_iso_7_tight);
9358       else
9359         {
9360           int id = XINT (AREF (initial, 1));
9361
9362           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
9363                        || EQ (args[coding_arg_charset_list], Qiso_2022)
9364                        || id < 0)
9365                       ? coding_category_iso_8_else
9366                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
9367                       ? coding_category_iso_8_1
9368                       : coding_category_iso_8_2);
9369         }
9370       if (category != coding_category_iso_8_1
9371           && category != coding_category_iso_8_2)
9372         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9373     }
9374   else if (EQ (coding_type, Qemacs_mule))
9375     {
9376       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
9377         ASET (attrs, coding_attr_emacs_mule_full, Qt);
9378       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9379       category = coding_category_emacs_mule;
9380     }
9381   else if (EQ (coding_type, Qshift_jis))
9382     {
9383
9384       struct charset *charset;
9385
9386       if (XINT (Flength (charset_list)) != 3
9387           && XINT (Flength (charset_list)) != 4)
9388         error ("There should be three or four charsets");
9389
9390       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9391       if (CHARSET_DIMENSION (charset) != 1)
9392         error ("Dimension of charset %s is not one",
9393                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9394       if (CHARSET_ASCII_COMPATIBLE_P (charset))
9395         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9396
9397       charset_list = XCDR (charset_list);
9398       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9399       if (CHARSET_DIMENSION (charset) != 1)
9400         error ("Dimension of charset %s is not one",
9401                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9402
9403       charset_list = XCDR (charset_list);
9404       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9405       if (CHARSET_DIMENSION (charset) != 2)
9406         error ("Dimension of charset %s is not two",
9407                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9408
9409       charset_list = XCDR (charset_list);
9410       if (! NILP (charset_list))
9411         {
9412           charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9413           if (CHARSET_DIMENSION (charset) != 2)
9414             error ("Dimension of charset %s is not two",
9415                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9416         }
9417
9418       category = coding_category_sjis;
9419       Vsjis_coding_system = name;
9420     }
9421   else if (EQ (coding_type, Qbig5))
9422     {
9423       struct charset *charset;
9424
9425       if (XINT (Flength (charset_list)) != 2)
9426         error ("There should be just two charsets");
9427
9428       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9429       if (CHARSET_DIMENSION (charset) != 1)
9430         error ("Dimension of charset %s is not one",
9431                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9432       if (CHARSET_ASCII_COMPATIBLE_P (charset))
9433         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9434
9435       charset_list = XCDR (charset_list);
9436       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9437       if (CHARSET_DIMENSION (charset) != 2)
9438         error ("Dimension of charset %s is not two",
9439                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9440
9441       category = coding_category_big5;
9442       Vbig5_coding_system = name;
9443     }
9444   else if (EQ (coding_type, Qraw_text))
9445     {
9446       category = coding_category_raw_text;
9447       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9448     }
9449   else if (EQ (coding_type, Qutf_8))
9450     {
9451       Lisp_Object bom;
9452
9453       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9454
9455       if (nargs < coding_arg_utf8_max)
9456         goto short_args;
9457
9458       bom = args[coding_arg_utf8_bom];
9459       if (! NILP (bom) && ! EQ (bom, Qt))
9460         {
9461           CHECK_CONS (bom);
9462           val = XCAR (bom);
9463           CHECK_CODING_SYSTEM (val);
9464           val = XCDR (bom);
9465           CHECK_CODING_SYSTEM (val);
9466         }
9467       ASET (attrs, coding_attr_utf_bom, bom);
9468
9469       category = (CONSP (bom) ? coding_category_utf_8_auto
9470                   : NILP (bom) ? coding_category_utf_8_nosig
9471                   : coding_category_utf_8_sig);
9472     }
9473   else if (EQ (coding_type, Qundecided))
9474     category = coding_category_undecided;
9475   else
9476     error ("Invalid coding system type: %s",
9477            SDATA (SYMBOL_NAME (coding_type)));
9478
9479   CODING_ATTR_CATEGORY (attrs) = make_number (category);
9480   CODING_ATTR_PLIST (attrs)
9481     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
9482                                 CODING_ATTR_PLIST (attrs)));
9483   CODING_ATTR_PLIST (attrs)
9484     = Fcons (QCascii_compatible_p,
9485              Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
9486                     CODING_ATTR_PLIST (attrs)));
9487
9488   eol_type = args[coding_arg_eol_type];
9489   if (! NILP (eol_type)
9490       && ! EQ (eol_type, Qunix)
9491       && ! EQ (eol_type, Qdos)
9492       && ! EQ (eol_type, Qmac))
9493     error ("Invalid eol-type");
9494
9495   aliases = Fcons (name, Qnil);
9496
9497   if (NILP (eol_type))
9498     {
9499       eol_type = make_subsidiaries (name);
9500       for (i = 0; i < 3; i++)
9501         {
9502           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
9503
9504           this_name = AREF (eol_type, i);
9505           this_aliases = Fcons (this_name, Qnil);
9506           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
9507           this_spec = Fmake_vector (make_number (3), attrs);
9508           ASET (this_spec, 1, this_aliases);
9509           ASET (this_spec, 2, this_eol_type);
9510           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
9511           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
9512           val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist);
9513           if (NILP (val))
9514             Vcoding_system_alist
9515               = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
9516                        Vcoding_system_alist);
9517         }
9518     }
9519
9520   spec_vec = Fmake_vector (make_number (3), attrs);
9521   ASET (spec_vec, 1, aliases);
9522   ASET (spec_vec, 2, eol_type);
9523
9524   Fputhash (name, spec_vec, Vcoding_system_hash_table);
9525   Vcoding_system_list = Fcons (name, Vcoding_system_list);
9526   val = Fassoc (Fsymbol_name (name), Vcoding_system_alist);
9527   if (NILP (val))
9528     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
9529                                   Vcoding_system_alist);
9530
9531   {
9532     int id = coding_categories[category].id;
9533
9534     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
9535       setup_coding_system (name, &coding_categories[category]);
9536   }
9537
9538   return Qnil;
9539
9540  short_args:
9541   return Fsignal (Qwrong_number_of_arguments,
9542                   Fcons (intern ("define-coding-system-internal"),
9543                          make_number (nargs)));
9544 }
9545
9546
9547 DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
9548        3, 3, 0,
9549        doc: /* Change value in CODING-SYSTEM's property list PROP to VAL.  */)
9550   (coding_system, prop, val)
9551      Lisp_Object coding_system, prop, val;
9552 {
9553   Lisp_Object spec, attrs;
9554
9555   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9556   attrs = AREF (spec, 0);
9557   if (EQ (prop, QCmnemonic))
9558     {
9559       if (! STRINGP (val))
9560         CHECK_CHARACTER (val);
9561       CODING_ATTR_MNEMONIC (attrs) = val;
9562     }
9563   else if (EQ (prop, QCdefalut_char))
9564     {
9565       if (NILP (val))
9566         val = make_number (' ');
9567       else
9568         CHECK_CHARACTER (val);
9569       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9570     }
9571   else if (EQ (prop, QCdecode_translation_table))
9572     {
9573       if (! CHAR_TABLE_P (val) && ! CONSP (val))
9574         CHECK_SYMBOL (val);
9575       CODING_ATTR_DECODE_TBL (attrs) = val;
9576     }
9577   else if (EQ (prop, QCencode_translation_table))
9578     {
9579       if (! CHAR_TABLE_P (val) && ! CONSP (val))
9580         CHECK_SYMBOL (val);
9581       CODING_ATTR_ENCODE_TBL (attrs) = val;
9582     }
9583   else if (EQ (prop, QCpost_read_conversion))
9584     {
9585       CHECK_SYMBOL (val);
9586       CODING_ATTR_POST_READ (attrs) = val;
9587     }
9588   else if (EQ (prop, QCpre_write_conversion))
9589     {
9590       CHECK_SYMBOL (val);
9591       CODING_ATTR_PRE_WRITE (attrs) = val;
9592     }
9593   else if (EQ (prop, QCascii_compatible_p))
9594     {
9595       CODING_ATTR_ASCII_COMPAT (attrs) = val;
9596     }
9597
9598   CODING_ATTR_PLIST (attrs)
9599     = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
9600   return val;
9601 }
9602
9603
9604 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
9605        Sdefine_coding_system_alias, 2, 2, 0,
9606        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
9607      (alias, coding_system)
9608      Lisp_Object alias, coding_system;
9609 {
9610   Lisp_Object spec, aliases, eol_type, val;
9611
9612   CHECK_SYMBOL (alias);
9613   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9614   aliases = AREF (spec, 1);
9615   /* ALIASES should be a list of length more than zero, and the first
9616      element is a base coding system.  Append ALIAS at the tail of the
9617      list.  */
9618   while (!NILP (XCDR (aliases)))
9619     aliases = XCDR (aliases);
9620   XSETCDR (aliases, Fcons (alias, Qnil));
9621
9622   eol_type = AREF (spec, 2);
9623   if (VECTORP (eol_type))
9624     {
9625       Lisp_Object subsidiaries;
9626       int i;
9627
9628       subsidiaries = make_subsidiaries (alias);
9629       for (i = 0; i < 3; i++)
9630         Fdefine_coding_system_alias (AREF (subsidiaries, i),
9631                                      AREF (eol_type, i));
9632     }
9633
9634   Fputhash (alias, spec, Vcoding_system_hash_table);
9635   Vcoding_system_list = Fcons (alias, Vcoding_system_list);
9636   val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist);
9637   if (NILP (val))
9638     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
9639                                   Vcoding_system_alist);
9640
9641   return Qnil;
9642 }
9643
9644 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
9645        1, 1, 0,
9646        doc: /* Return the base of CODING-SYSTEM.
9647 Any alias or subsidiary coding system is not a base coding system.  */)
9648   (coding_system)
9649      Lisp_Object coding_system;
9650 {
9651   Lisp_Object spec, attrs;
9652
9653   if (NILP (coding_system))
9654     return (Qno_conversion);
9655   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9656   attrs = AREF (spec, 0);
9657   return CODING_ATTR_BASE_NAME (attrs);
9658 }
9659
9660 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
9661        1, 1, 0,
9662        doc: "Return the property list of CODING-SYSTEM.")
9663      (coding_system)
9664      Lisp_Object coding_system;
9665 {
9666   Lisp_Object spec, attrs;
9667
9668   if (NILP (coding_system))
9669     coding_system = Qno_conversion;
9670   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9671   attrs = AREF (spec, 0);
9672   return CODING_ATTR_PLIST (attrs);
9673 }
9674
9675
9676 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
9677        1, 1, 0,
9678        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
9679      (coding_system)
9680      Lisp_Object coding_system;
9681 {
9682   Lisp_Object spec;
9683
9684   if (NILP (coding_system))
9685     coding_system = Qno_conversion;
9686   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9687   return AREF (spec, 1);
9688 }
9689
9690 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
9691        Scoding_system_eol_type, 1, 1, 0,
9692        doc: /* Return eol-type of CODING-SYSTEM.
9693 An eol-type is an integer 0, 1, 2, or a vector of coding systems.
9694
9695 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
9696 and CR respectively.
9697
9698 A vector value indicates that a format of end-of-line should be
9699 detected automatically.  Nth element of the vector is the subsidiary
9700 coding system whose eol-type is N.  */)
9701      (coding_system)
9702      Lisp_Object coding_system;
9703 {
9704   Lisp_Object spec, eol_type;
9705   int n;
9706
9707   if (NILP (coding_system))
9708     coding_system = Qno_conversion;
9709   if (! CODING_SYSTEM_P (coding_system))
9710     return Qnil;
9711   spec = CODING_SYSTEM_SPEC (coding_system);
9712   eol_type = AREF (spec, 2);
9713   if (VECTORP (eol_type))
9714     return Fcopy_sequence (eol_type);
9715   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
9716   return make_number (n);
9717 }
9718
9719 #endif /* emacs */
9720
9721 \f
9722 /*** 9. Post-amble ***/
9723
9724 void
9725 init_coding_once ()
9726 {
9727   int i;
9728
9729   for (i = 0; i < coding_category_max; i++)
9730     {
9731       coding_categories[i].id = -1;
9732       coding_priorities[i] = i;
9733     }
9734
9735   /* ISO2022 specific initialize routine.  */
9736   for (i = 0; i < 0x20; i++)
9737     iso_code_class[i] = ISO_control_0;
9738   for (i = 0x21; i < 0x7F; i++)
9739     iso_code_class[i] = ISO_graphic_plane_0;
9740   for (i = 0x80; i < 0xA0; i++)
9741     iso_code_class[i] = ISO_control_1;
9742   for (i = 0xA1; i < 0xFF; i++)
9743     iso_code_class[i] = ISO_graphic_plane_1;
9744   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
9745   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
9746   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
9747   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
9748   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
9749   iso_code_class[ISO_CODE_ESC] = ISO_escape;
9750   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
9751   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
9752   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
9753
9754   for (i = 0; i < 256; i++)
9755     {
9756       emacs_mule_bytes[i] = 1;
9757     }
9758   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
9759   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
9760   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
9761   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
9762 }
9763
9764 #ifdef emacs
9765
9766 void
9767 syms_of_coding ()
9768 {
9769   staticpro (&Vcoding_system_hash_table);
9770   {
9771     Lisp_Object args[2];
9772     args[0] = QCtest;
9773     args[1] = Qeq;
9774     Vcoding_system_hash_table = Fmake_hash_table (2, args);
9775   }
9776
9777   staticpro (&Vsjis_coding_system);
9778   Vsjis_coding_system = Qnil;
9779
9780   staticpro (&Vbig5_coding_system);
9781   Vbig5_coding_system = Qnil;
9782
9783   staticpro (&Vcode_conversion_reused_workbuf);
9784   Vcode_conversion_reused_workbuf = Qnil;
9785
9786   staticpro (&Vcode_conversion_workbuf_name);
9787   Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
9788
9789   reused_workbuf_in_use = 0;
9790
9791   DEFSYM (Qcharset, "charset");
9792   DEFSYM (Qtarget_idx, "target-idx");
9793   DEFSYM (Qcoding_system_history, "coding-system-history");
9794   Fset (Qcoding_system_history, Qnil);
9795
9796   /* Target FILENAME is the first argument.  */
9797   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9798   /* Target FILENAME is the third argument.  */
9799   Fput (Qwrite_region, Qtarget_idx, make_number (2));
9800
9801   DEFSYM (Qcall_process, "call-process");
9802   /* Target PROGRAM is the first argument.  */
9803   Fput (Qcall_process, Qtarget_idx, make_number (0));
9804
9805   DEFSYM (Qcall_process_region, "call-process-region");
9806   /* Target PROGRAM is the third argument.  */
9807   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
9808
9809   DEFSYM (Qstart_process, "start-process");
9810   /* Target PROGRAM is the third argument.  */
9811   Fput (Qstart_process, Qtarget_idx, make_number (2));
9812
9813   DEFSYM (Qopen_network_stream, "open-network-stream");
9814   /* Target SERVICE is the fourth argument.  */
9815   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
9816
9817   DEFSYM (Qcoding_system, "coding-system");
9818   DEFSYM (Qcoding_aliases, "coding-aliases");
9819
9820   DEFSYM (Qeol_type, "eol-type");
9821   DEFSYM (Qunix, "unix");
9822   DEFSYM (Qdos, "dos");
9823
9824   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
9825   DEFSYM (Qpost_read_conversion, "post-read-conversion");
9826   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
9827   DEFSYM (Qdefault_char, "default-char");
9828   DEFSYM (Qundecided, "undecided");
9829   DEFSYM (Qno_conversion, "no-conversion");
9830   DEFSYM (Qraw_text, "raw-text");
9831
9832   DEFSYM (Qiso_2022, "iso-2022");
9833
9834   DEFSYM (Qutf_8, "utf-8");
9835   DEFSYM (Qutf_8_emacs, "utf-8-emacs");
9836
9837   DEFSYM (Qutf_16, "utf-16");
9838   DEFSYM (Qbig, "big");
9839   DEFSYM (Qlittle, "little");
9840
9841   DEFSYM (Qshift_jis, "shift-jis");
9842   DEFSYM (Qbig5, "big5");
9843
9844   DEFSYM (Qcoding_system_p, "coding-system-p");
9845
9846   DEFSYM (Qcoding_system_error, "coding-system-error");
9847   Fput (Qcoding_system_error, Qerror_conditions,
9848         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
9849   Fput (Qcoding_system_error, Qerror_message,
9850         build_string ("Invalid coding system"));
9851
9852   /* Intern this now in case it isn't already done.
9853      Setting this variable twice is harmless.
9854      But don't staticpro it here--that is done in alloc.c.  */
9855   Qchar_table_extra_slots = intern ("char-table-extra-slots");
9856
9857   DEFSYM (Qtranslation_table, "translation-table");
9858   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
9859   DEFSYM (Qtranslation_table_id, "translation-table-id");
9860   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
9861   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
9862
9863   DEFSYM (Qvalid_codes, "valid-codes");
9864
9865   DEFSYM (Qemacs_mule, "emacs-mule");
9866
9867   DEFSYM (QCcategory, ":category");
9868   DEFSYM (QCmnemonic, ":mnemonic");
9869   DEFSYM (QCdefalut_char, ":default-char");
9870   DEFSYM (QCdecode_translation_table, ":decode-translation-table");
9871   DEFSYM (QCencode_translation_table, ":encode-translation-table");
9872   DEFSYM (QCpost_read_conversion, ":post-read-conversion");
9873   DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
9874   DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
9875
9876   Vcoding_category_table
9877     = Fmake_vector (make_number (coding_category_max), Qnil);
9878   staticpro (&Vcoding_category_table);
9879   /* Followings are target of code detection.  */
9880   ASET (Vcoding_category_table, coding_category_iso_7,
9881         intern ("coding-category-iso-7"));
9882   ASET (Vcoding_category_table, coding_category_iso_7_tight,
9883         intern ("coding-category-iso-7-tight"));
9884   ASET (Vcoding_category_table, coding_category_iso_8_1,
9885         intern ("coding-category-iso-8-1"));
9886   ASET (Vcoding_category_table, coding_category_iso_8_2,
9887         intern ("coding-category-iso-8-2"));
9888   ASET (Vcoding_category_table, coding_category_iso_7_else,
9889         intern ("coding-category-iso-7-else"));
9890   ASET (Vcoding_category_table, coding_category_iso_8_else,
9891         intern ("coding-category-iso-8-else"));
9892   ASET (Vcoding_category_table, coding_category_utf_8_auto,
9893         intern ("coding-category-utf-8-auto"));
9894   ASET (Vcoding_category_table, coding_category_utf_8_nosig,
9895         intern ("coding-category-utf-8"));
9896   ASET (Vcoding_category_table, coding_category_utf_8_sig,
9897         intern ("coding-category-utf-8-sig"));
9898   ASET (Vcoding_category_table, coding_category_utf_16_be,
9899         intern ("coding-category-utf-16-be"));
9900   ASET (Vcoding_category_table, coding_category_utf_16_auto,
9901         intern ("coding-category-utf-16-auto"));
9902   ASET (Vcoding_category_table, coding_category_utf_16_le,
9903         intern ("coding-category-utf-16-le"));
9904   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
9905         intern ("coding-category-utf-16-be-nosig"));
9906   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
9907         intern ("coding-category-utf-16-le-nosig"));
9908   ASET (Vcoding_category_table, coding_category_charset,
9909         intern ("coding-category-charset"));
9910   ASET (Vcoding_category_table, coding_category_sjis,
9911         intern ("coding-category-sjis"));
9912   ASET (Vcoding_category_table, coding_category_big5,
9913         intern ("coding-category-big5"));
9914   ASET (Vcoding_category_table, coding_category_ccl,
9915         intern ("coding-category-ccl"));
9916   ASET (Vcoding_category_table, coding_category_emacs_mule,
9917         intern ("coding-category-emacs-mule"));
9918   /* Followings are NOT target of code detection.  */
9919   ASET (Vcoding_category_table, coding_category_raw_text,
9920         intern ("coding-category-raw-text"));
9921   ASET (Vcoding_category_table, coding_category_undecided,
9922         intern ("coding-category-undecided"));
9923
9924   DEFSYM (Qinsufficient_source, "insufficient-source");
9925   DEFSYM (Qinconsistent_eol, "inconsistent-eol");
9926   DEFSYM (Qinvalid_source, "invalid-source");
9927   DEFSYM (Qinterrupted, "interrupted");
9928   DEFSYM (Qinsufficient_memory, "insufficient-memory");
9929   DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
9930
9931   defsubr (&Scoding_system_p);
9932   defsubr (&Sread_coding_system);
9933   defsubr (&Sread_non_nil_coding_system);
9934   defsubr (&Scheck_coding_system);
9935   defsubr (&Sdetect_coding_region);
9936   defsubr (&Sdetect_coding_string);
9937   defsubr (&Sfind_coding_systems_region_internal);
9938   defsubr (&Sunencodable_char_position);
9939   defsubr (&Scheck_coding_systems_region);
9940   defsubr (&Sdecode_coding_region);
9941   defsubr (&Sencode_coding_region);
9942   defsubr (&Sdecode_coding_string);
9943   defsubr (&Sencode_coding_string);
9944   defsubr (&Sdecode_sjis_char);
9945   defsubr (&Sencode_sjis_char);
9946   defsubr (&Sdecode_big5_char);
9947   defsubr (&Sencode_big5_char);
9948   defsubr (&Sset_terminal_coding_system_internal);
9949   defsubr (&Sset_safe_terminal_coding_system_internal);
9950   defsubr (&Sterminal_coding_system);
9951   defsubr (&Sset_keyboard_coding_system_internal);
9952   defsubr (&Skeyboard_coding_system);
9953   defsubr (&Sfind_operation_coding_system);
9954   defsubr (&Sset_coding_system_priority);
9955   defsubr (&Sdefine_coding_system_internal);
9956   defsubr (&Sdefine_coding_system_alias);
9957   defsubr (&Scoding_system_put);
9958   defsubr (&Scoding_system_base);
9959   defsubr (&Scoding_system_plist);
9960   defsubr (&Scoding_system_aliases);
9961   defsubr (&Scoding_system_eol_type);
9962   defsubr (&Scoding_system_priority_list);
9963
9964   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
9965                doc: /* List of coding systems.
9966
9967 Do not alter the value of this variable manually.  This variable should be
9968 updated by the functions `define-coding-system' and
9969 `define-coding-system-alias'.  */);
9970   Vcoding_system_list = Qnil;
9971
9972   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
9973                doc: /* Alist of coding system names.
9974 Each element is one element list of coding system name.
9975 This variable is given to `completing-read' as COLLECTION argument.
9976
9977 Do not alter the value of this variable manually.  This variable should be
9978 updated by the functions `make-coding-system' and
9979 `define-coding-system-alias'.  */);
9980   Vcoding_system_alist = Qnil;
9981
9982   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
9983                doc: /* List of coding-categories (symbols) ordered by priority.
9984
9985 On detecting a coding system, Emacs tries code detection algorithms
9986 associated with each coding-category one by one in this order.  When
9987 one algorithm agrees with a byte sequence of source text, the coding
9988 system bound to the corresponding coding-category is selected.
9989
9990 Don't modify this variable directly, but use `set-coding-priority'.  */);
9991   {
9992     int i;
9993
9994     Vcoding_category_list = Qnil;
9995     for (i = coding_category_max - 1; i >= 0; i--)
9996       Vcoding_category_list
9997         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
9998                  Vcoding_category_list);
9999   }
10000
10001   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
10002                doc: /* Specify the coding system for read operations.
10003 It is useful to bind this variable with `let', but do not set it globally.
10004 If the value is a coding system, it is used for decoding on read operation.
10005 If not, an appropriate element is used from one of the coding system alists.
10006 There are three such tables: `file-coding-system-alist',
10007 `process-coding-system-alist', and `network-coding-system-alist'.  */);
10008   Vcoding_system_for_read = Qnil;
10009
10010   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
10011                doc: /* Specify the coding system for write operations.
10012 Programs bind this variable with `let', but you should not set it globally.
10013 If the value is a coding system, it is used for encoding of output,
10014 when writing it to a file and when sending it to a file or subprocess.
10015
10016 If this does not specify a coding system, an appropriate element
10017 is used from one of the coding system alists.
10018 There are three such tables: `file-coding-system-alist',
10019 `process-coding-system-alist', and `network-coding-system-alist'.
10020 For output to files, if the above procedure does not specify a coding system,
10021 the value of `buffer-file-coding-system' is used.  */);
10022   Vcoding_system_for_write = Qnil;
10023
10024   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
10025                doc: /*
10026 Coding system used in the latest file or process I/O.  */);
10027   Vlast_coding_system_used = Qnil;
10028
10029   DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
10030                doc: /*
10031 Error status of the last code conversion.
10032
10033 When an error was detected in the last code conversion, this variable
10034 is set to one of the following symbols.
10035   `insufficient-source'
10036   `inconsistent-eol'
10037   `invalid-source'
10038   `interrupted'
10039   `insufficient-memory'
10040 When no error was detected, the value doesn't change.  So, to check
10041 the error status of a code conversion by this variable, you must
10042 explicitly set this variable to nil before performing code
10043 conversion.  */);
10044   Vlast_code_conversion_error = Qnil;
10045
10046   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
10047                doc: /*
10048 *Non-nil means always inhibit code conversion of end-of-line format.
10049 See info node `Coding Systems' and info node `Text and Binary' concerning
10050 such conversion.  */);
10051   inhibit_eol_conversion = 0;
10052
10053   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
10054                doc: /*
10055 Non-nil means process buffer inherits coding system of process output.
10056 Bind it to t if the process output is to be treated as if it were a file
10057 read from some filesystem.  */);
10058   inherit_process_coding_system = 0;
10059
10060   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
10061                doc: /*
10062 Alist to decide a coding system to use for a file I/O operation.
10063 The format is ((PATTERN . VAL) ...),
10064 where PATTERN is a regular expression matching a file name,
10065 VAL is a coding system, a cons of coding systems, or a function symbol.
10066 If VAL is a coding system, it is used for both decoding and encoding
10067 the file contents.
10068 If VAL is a cons of coding systems, the car part is used for decoding,
10069 and the cdr part is used for encoding.
10070 If VAL is a function symbol, the function must return a coding system
10071 or a cons of coding systems which are used as above.  The function is
10072 called with an argument that is a list of the arguments with which
10073 `find-operation-coding-system' was called.  If the function can't decide
10074 a coding system, it can return `undecided' so that the normal
10075 code-detection is performed.
10076
10077 See also the function `find-operation-coding-system'
10078 and the variable `auto-coding-alist'.  */);
10079   Vfile_coding_system_alist = Qnil;
10080
10081   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
10082                doc: /*
10083 Alist to decide a coding system to use for a process I/O operation.
10084 The format is ((PATTERN . VAL) ...),
10085 where PATTERN is a regular expression matching a program name,
10086 VAL is a coding system, a cons of coding systems, or a function symbol.
10087 If VAL is a coding system, it is used for both decoding what received
10088 from the program and encoding what sent to the program.
10089 If VAL is a cons of coding systems, the car part is used for decoding,
10090 and the cdr part is used for encoding.
10091 If VAL is a function symbol, the function must return a coding system
10092 or a cons of coding systems which are used as above.
10093
10094 See also the function `find-operation-coding-system'.  */);
10095   Vprocess_coding_system_alist = Qnil;
10096
10097   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
10098                doc: /*
10099 Alist to decide a coding system to use for a network I/O operation.
10100 The format is ((PATTERN . VAL) ...),
10101 where PATTERN is a regular expression matching a network service name
10102 or is a port number to connect to,
10103 VAL is a coding system, a cons of coding systems, or a function symbol.
10104 If VAL is a coding system, it is used for both decoding what received
10105 from the network stream and encoding what sent to the network stream.
10106 If VAL is a cons of coding systems, the car part is used for decoding,
10107 and the cdr part is used for encoding.
10108 If VAL is a function symbol, the function must return a coding system
10109 or a cons of coding systems which are used as above.
10110
10111 See also the function `find-operation-coding-system'.  */);
10112   Vnetwork_coding_system_alist = Qnil;
10113
10114   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
10115                doc: /* Coding system to use with system messages.
10116 Also used for decoding keyboard input on X Window system.  */);
10117   Vlocale_coding_system = Qnil;
10118
10119   /* The eol mnemonics are reset in startup.el system-dependently.  */
10120   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
10121                doc: /*
10122 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
10123   eol_mnemonic_unix = build_string (":");
10124
10125   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
10126                doc: /*
10127 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
10128   eol_mnemonic_dos = build_string ("\\");
10129
10130   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
10131                doc: /*
10132 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
10133   eol_mnemonic_mac = build_string ("/");
10134
10135   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
10136                doc: /*
10137 *String displayed in mode line when end-of-line format is not yet determined.  */);
10138   eol_mnemonic_undecided = build_string (":");
10139
10140   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
10141                doc: /*
10142 *Non-nil enables character translation while encoding and decoding.  */);
10143   Venable_character_translation = Qt;
10144
10145   DEFVAR_LISP ("standard-translation-table-for-decode",
10146                &Vstandard_translation_table_for_decode,
10147                doc: /* Table for translating characters while decoding.  */);
10148   Vstandard_translation_table_for_decode = Qnil;
10149
10150   DEFVAR_LISP ("standard-translation-table-for-encode",
10151                &Vstandard_translation_table_for_encode,
10152                doc: /* Table for translating characters while encoding.  */);
10153   Vstandard_translation_table_for_encode = Qnil;
10154
10155   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
10156                doc: /* Alist of charsets vs revision numbers.
10157 While encoding, if a charset (car part of an element) is found,
10158 designate it with the escape sequence identifying revision (cdr part
10159 of the element).  */);
10160   Vcharset_revision_table = Qnil;
10161
10162   DEFVAR_LISP ("default-process-coding-system",
10163                &Vdefault_process_coding_system,
10164                doc: /* Cons of coding systems used for process I/O by default.
10165 The car part is used for decoding a process output,
10166 the cdr part is used for encoding a text to be sent to a process.  */);
10167   Vdefault_process_coding_system = Qnil;
10168
10169   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
10170                doc: /*
10171 Table of extra Latin codes in the range 128..159 (inclusive).
10172 This is a vector of length 256.
10173 If Nth element is non-nil, the existence of code N in a file
10174 \(or output of subprocess) doesn't prevent it to be detected as
10175 a coding system of ISO 2022 variant which has a flag
10176 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
10177 or reading output of a subprocess.
10178 Only 128th through 159th elements have a meaning.  */);
10179   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
10180
10181   DEFVAR_LISP ("select-safe-coding-system-function",
10182                &Vselect_safe_coding_system_function,
10183                doc: /*
10184 Function to call to select safe coding system for encoding a text.
10185
10186 If set, this function is called to force a user to select a proper
10187 coding system which can encode the text in the case that a default
10188 coding system used in each operation can't encode the text.  The
10189 function should take care that the buffer is not modified while
10190 the coding system is being selected.
10191
10192 The default value is `select-safe-coding-system' (which see).  */);
10193   Vselect_safe_coding_system_function = Qnil;
10194
10195   DEFVAR_BOOL ("coding-system-require-warning",
10196                &coding_system_require_warning,
10197                doc: /* Internal use only.
10198 If non-nil, on writing a file, `select-safe-coding-system-function' is
10199 called even if `coding-system-for-write' is non-nil.  The command
10200 `universal-coding-system-argument' binds this variable to t temporarily.  */);
10201   coding_system_require_warning = 0;
10202
10203
10204   DEFVAR_BOOL ("inhibit-iso-escape-detection",
10205                &inhibit_iso_escape_detection,
10206                doc: /*
10207 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
10208
10209 By default, on reading a file, Emacs tries to detect how the text is
10210 encoded.  This code detection is sensitive to escape sequences.  If
10211 the sequence is valid as ISO2022, the code is determined as one of
10212 the ISO2022 encodings, and the file is decoded by the corresponding
10213 coding system (e.g. `iso-2022-7bit').
10214
10215 However, there may be a case that you want to read escape sequences in
10216 a file as is.  In such a case, you can set this variable to non-nil.
10217 Then, as the code detection ignores any escape sequences, no file is
10218 detected as encoded in some ISO2022 encoding.  The result is that all
10219 escape sequences become visible in a buffer.
10220
10221 The default value is nil, and it is strongly recommended not to change
10222 it.  That is because many Emacs Lisp source files that contain
10223 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
10224 in Emacs's distribution, and they won't be decoded correctly on
10225 reading if you suppress escape sequence detection.
10226
10227 The other way to read escape sequences in a file without decoding is
10228 to explicitly specify some coding system that doesn't use ISO2022's
10229 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
10230   inhibit_iso_escape_detection = 0;
10231
10232   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
10233                doc: /* Char table for translating self-inserting characters.
10234 This is applied to the result of input methods, not their input.
10235 See also `keyboard-translate-table'.  */);
10236     Vtranslation_table_for_input = Qnil;
10237
10238   {
10239     Lisp_Object args[coding_arg_max];
10240     Lisp_Object plist[16];
10241     int i;
10242
10243     for (i = 0; i < coding_arg_max; i++)
10244       args[i] = Qnil;
10245
10246     plist[0] = intern (":name");
10247     plist[1] = args[coding_arg_name] = Qno_conversion;
10248     plist[2] = intern (":mnemonic");
10249     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
10250     plist[4] = intern (":coding-type");
10251     plist[5] = args[coding_arg_coding_type] = Qraw_text;
10252     plist[6] = intern (":ascii-compatible-p");
10253     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
10254     plist[8] = intern (":default-char");
10255     plist[9] = args[coding_arg_default_char] = make_number (0);
10256     plist[10] = intern (":for-unibyte");
10257     plist[11] = args[coding_arg_for_unibyte] = Qt;
10258     plist[12] = intern (":docstring");
10259     plist[13] = build_string ("Do no conversion.\n\
10260 \n\
10261 When you visit a file with this coding, the file is read into a\n\
10262 unibyte buffer as is, thus each byte of a file is treated as a\n\
10263 character.");
10264     plist[14] = intern (":eol-type");
10265     plist[15] = args[coding_arg_eol_type] = Qunix;
10266     args[coding_arg_plist] = Flist (16, plist);
10267     Fdefine_coding_system_internal (coding_arg_max, args);
10268
10269     plist[1] = args[coding_arg_name] = Qundecided;
10270     plist[3] = args[coding_arg_mnemonic] = make_number ('-');
10271     plist[5] = args[coding_arg_coding_type] = Qundecided;
10272     /* This is already set.
10273        plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
10274     plist[8] = intern (":charset-list");
10275     plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
10276     plist[11] = args[coding_arg_for_unibyte] = Qnil;
10277     plist[13] = build_string ("No conversion on encoding, automatic conversion on decoding.");
10278     plist[15] = args[coding_arg_eol_type] = Qnil;
10279     args[coding_arg_plist] = Flist (16, plist);
10280     Fdefine_coding_system_internal (coding_arg_max, args);
10281   }
10282
10283   setup_coding_system (Qno_conversion, &safe_terminal_coding);
10284
10285   {
10286     int i;
10287
10288     for (i = 0; i < coding_category_max; i++)
10289       Fset (AREF (Vcoding_category_table, i), Qno_conversion);
10290   }
10291 #if defined (MSDOS) || defined (WINDOWSNT)
10292   system_eol_type = Qdos;
10293 #else
10294   system_eol_type = Qunix;
10295 #endif
10296   staticpro (&system_eol_type);
10297 }
10298
10299 char *
10300 emacs_strerror (error_number)
10301      int error_number;
10302 {
10303   char *str;
10304
10305   synchronize_system_messages_locale ();
10306   str = strerror (error_number);
10307
10308   if (! NILP (Vlocale_coding_system))
10309     {
10310       Lisp_Object dec = code_convert_string_norecord (build_string (str),
10311                                                       Vlocale_coding_system,
10312                                                       0);
10313       str = (char *) SDATA (dec);
10314     }
10315
10316   return str;
10317 }
10318
10319 #endif /* emacs */
10320
10321 /* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
10322    (do not change this comment) */