src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 2001, 2002, 2003, 2004, 2005,
   3                  2006, 2007, 2008 Free Software Foundation, Inc.
   4    Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   5      2005, 2006, 2007, 2008
   6      National Institute of Advanced Industrial Science and Technology (AIST)
   7      Registration Number H14PRO021
   8    Copyright (C) 2003
   9      National Institute of Advanced Industrial Science and Technology (AIST)
  10      Registration Number H13PRO009
  11
  12 This file is part of GNU Emacs.
  13
  14 GNU Emacs is free software: you can redistribute it and/or modify
  15 it under the terms of the GNU General Public License as published by
  16 the Free Software Foundation, either version 3 of the License, or
  17 (at your option) any later version.
  18
  19 GNU Emacs is distributed in the hope that it will be useful,
  20 but WITHOUT ANY WARRANTY; without even the implied warranty of
  21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22 GNU General Public License for more details.
  23
  24 You should have received a copy of the GNU General Public License
  25 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  26
  27 /*** TABLE OF CONTENTS ***
  28
  29   0. General comments
  30   1. Preamble
  31   2. Emacs' internal format (emacs-utf-8) handlers
  32   3. UTF-8 handlers
  33   4. UTF-16 handlers
  34   5. Charset-base coding systems handlers
  35   6. emacs-mule (old Emacs' internal format) handlers
  36   7. ISO2022 handlers
  37   8. Shift-JIS and BIG5 handlers
  38   9. CCL handlers
  39   10. C library functions
  40   11. Emacs Lisp library functions
  41   12. Postamble
  42
  43 */
  44
  45 /*** 0. General comments ***
  46
  47
  48 CODING SYSTEM
  49
  50   A coding system is an object for an encoding mechanism that contains
  51   information about how to convert byte sequences to character
  52   sequences and vice versa.  When we say "decode", it means converting
  53   a byte sequence of a specific coding system into a character
  54   sequence that is represented by Emacs' internal coding system
  55   `emacs-utf-8', and when we say "encode", it means converting a
  56   character sequence of emacs-utf-8 to a byte sequence of a specific
  57   coding system.
  58
  59   In Emacs Lisp, a coding system is represented by a Lisp symbol.  In
  60   C level, a coding system is represented by a vector of attributes
  61   stored in the hash table Vcharset_hash_table.  The conversion from
  62   coding system symbol to attributes vector is done by looking up
  63   Vcharset_hash_table by the symbol.
  64
  65   Coding systems are classified into the following types depending on
  66   the encoding mechanism.  Here's a brief description of the types.
  67
  68   o UTF-8
  69
  70   o UTF-16
  71
  72   o Charset-base coding system
  73
  74   A coding system defined by one or more (coded) character sets.
  75   Decoding and encoding are done by a code converter defined for each
  76   character set.
  77
  78   o Old Emacs internal format (emacs-mule)
  79
  80   The coding system adopted by old versions of Emacs (20 and 21).
  81
  82   o ISO2022-base coding system
  83
  84   The most famous coding system for multiple character sets.  X's
  85   Compound Text, various EUCs (Extended Unix Code), and coding systems
  86   used in the Internet communication such as ISO-2022-JP are all
  87   variants of ISO2022.
  88
  89   o SJIS (or Shift-JIS or MS-Kanji-Code)
  90
  91   A coding system to encode character sets: ASCII, JISX0201, and
  92   JISX0208.  Widely used for PC's in Japan.  Details are described in
  93   section 8.
  94
  95   o BIG5
  96
  97   A coding system to encode character sets: ASCII and Big5.  Widely
  98   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  99   described in section 8.  In this file, when we write "big5" (all
 100   lowercase), we mean the coding system, and when we write "Big5"
 101   (capitalized), we mean the character set.
 102
 103   o CCL
 104
 105   If a user wants to decode/encode text encoded in a coding system
 106   not listed above, he can supply a decoder and an encoder for it in
 107   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 108   program while decoding/encoding.
 109
 110   o Raw-text
 111
 112   A coding system for text containing raw eight-bit data.  Emacs
 113   treats each byte of source text as a character (except for
 114   end-of-line conversion).
 115
 116   o No-conversion
 117
 118   Like raw text, but don't do end-of-line conversion.
 119
 120
 121 END-OF-LINE FORMAT
 122
 123   How text end-of-line is encoded depends on operating system.  For
 124   instance, Unix's format is just one byte of LF (line-feed) code,
 125   whereas DOS's format is two-byte sequence of `carriage-return' and
 126   `line-feed' codes.  MacOS's format is usually one byte of
 127   `carriage-return'.
 128
 129   Since text character encoding and end-of-line encoding are
 130   independent, any coding system described above can take any format
 131   of end-of-line (except for no-conversion).
 132
 133 STRUCT CODING_SYSTEM
 134
 135   Before using a coding system for code conversion (i.e. decoding and
 136   encoding), we setup a structure of type `struct coding_system'.
 137   This structure keeps various information about a specific code
 138   conversion (e.g. the location of source and destination data).
 139
 140 */
 141
 142 /* COMMON MACROS */
 143
 144
 145 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 146
 147   These functions check if a byte sequence specified as a source in
 148   CODING conforms to the format of XXX, and update the members of
 149   DETECT_INFO.
 150
 151   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 152
 153   Below is the template of these functions.  */
 154
 155 #if 0
 156 static int
 157 detect_coding_XXX (coding, detect_info)
 158      struct coding_system *coding;
 159      struct coding_detection_info *detect_info;
 160 {
 161   const unsigned char *src = coding->source;
 162   const unsigned char *src_end = coding->source + coding->src_bytes;
 163   int multibytep = coding->src_multibyte;
 164   int consumed_chars = 0;
 165   int found = 0;
 166   ...;
 167
 168   while (1)
 169     {
 170       /* Get one byte from the source.  If the souce is exausted, jump
 171          to no_more_source:.  */
 172       ONE_MORE_BYTE (c);
 173
 174       if (! __C_conforms_to_XXX___ (c))
 175         break;
 176       if (! __C_strongly_suggests_XXX__ (c))
 177         found = CATEGORY_MASK_XXX;
 178     }
 179   /* The byte sequence is invalid for XXX.  */
 180   detect_info->rejected |= CATEGORY_MASK_XXX;
 181   return 0;
 182
 183  no_more_source:
 184   /* The source exausted successfully.  */
 185   detect_info->found |= found;
 186   return 1;
 187 }
 188 #endif
 189
 190 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 191
 192   These functions decode a byte sequence specified as a source by
 193   CODING.  The resulting multibyte text goes to a place pointed to by
 194   CODING->charbuf, the length of which should not exceed
 195   CODING->charbuf_size;
 196
 197   These functions set the information of original and decoded texts in
 198   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 199   They also set CODING->result to one of CODING_RESULT_XXX indicating
 200   how the decoding is finished.
 201
 202   Below is the template of these functions.  */
 203
 204 #if 0
 205 static void
 206 decode_coding_XXXX (coding)
 207      struct coding_system *coding;
 208 {
 209   const unsigned char *src = coding->source + coding->consumed;
 210   const unsigned char *src_end = coding->source + coding->src_bytes;
 211   /* SRC_BASE remembers the start position in source in each loop.
 212      The loop will be exited when there's not enough source code, or
 213      when there's no room in CHARBUF for a decoded character.  */
 214   const unsigned char *src_base;
 215   /* A buffer to produce decoded characters.  */
 216   int *charbuf = coding->charbuf + coding->charbuf_used;
 217   int *charbuf_end = coding->charbuf + coding->charbuf_size;
 218   int multibytep = coding->src_multibyte;
 219
 220   while (1)
 221     {
 222       src_base = src;
 223       if (charbuf < charbuf_end)
 224         /* No more room to produce a decoded character.  */
 225         break;
 226       ONE_MORE_BYTE (c);
 227       /* Decode it. */
 228     }
 229
 230  no_more_source:
 231   if (src_base < src_end
 232       && coding->mode & CODING_MODE_LAST_BLOCK)
 233     /* If the source ends by partial bytes to construct a character,
 234        treat them as eight-bit raw data.  */
 235     while (src_base < src_end && charbuf < charbuf_end)
 236       *charbuf++ = *src_base++;
 237   /* Remember how many bytes and characters we consumed.  If the
 238      source is multibyte, the bytes and chars are not identical.  */
 239   coding->consumed = coding->consumed_char = src_base - coding->source;
 240   /* Remember how many characters we produced.  */
 241   coding->charbuf_used = charbuf - coding->charbuf;
 242 }
 243 #endif
 244
 245 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 246
 247   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 248   internal multibyte format by CODING.  The resulting byte sequence
 249   goes to a place pointed to by DESTINATION, the length of which
 250   should not exceed DST_BYTES.
 251
 252   These functions set the information of original and encoded texts in
 253   the members produced, produced_char, consumed, and consumed_char of
 254   the structure *CODING.  They also set the member result to one of
 255   CODING_RESULT_XXX indicating how the encoding finished.
 256
 257   DST_BYTES zero means that source area and destination area are
 258   overlapped, which means that we can produce a encoded text until it
 259   reaches at the head of not-yet-encoded source text.
 260
 261   Below is a template of these functions.  */
 262 #if 0
 263 static void
 264 encode_coding_XXX (coding)
 265      struct coding_system *coding;
 266 {
 267   int multibytep = coding->dst_multibyte;
 268   int *charbuf = coding->charbuf;
 269   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 270   unsigned char *dst = coding->destination + coding->produced;
 271   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 272   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 273   int produced_chars = 0;
 274
 275   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 276     {
 277       int c = *charbuf;
 278       /* Encode C into DST, and increment DST.  */
 279     }
 280  label_no_more_destination:
 281   /* How many chars and bytes we produced.  */
 282   coding->produced_char += produced_chars;
 283   coding->produced = dst - coding->destination;
 284 }
 285 #endif
 286
 287 \f
 288 /*** 1. Preamble ***/
 289
 290 #include <config.h>
 291 #include <stdio.h>
 292
 293 #include "lisp.h"
 294 #include "buffer.h"
 295 #include "character.h"
 296 #include "charset.h"
 297 #include "ccl.h"
 298 #include "composite.h"
 299 #include "coding.h"
 300 #include "window.h"
 301 #include "frame.h"
 302 #include "termhooks.h"
 303
 304 Lisp_Object Vcoding_system_hash_table;
 305
 306 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
 307 Lisp_Object Qunix, Qdos;
 308 extern Lisp_Object Qmac;        /* frame.c */
 309 Lisp_Object Qbuffer_file_coding_system;
 310 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 311 Lisp_Object Qdefault_char;
 312 Lisp_Object Qno_conversion, Qundecided;
 313 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
 314 Lisp_Object Qbig, Qlittle;
 315 Lisp_Object Qcoding_system_history;
 316 Lisp_Object Qvalid_codes;
 317 Lisp_Object QCcategory, QCmnemonic, QCdefault_char;
 318 Lisp_Object QCdecode_translation_table, QCencode_translation_table;
 319 Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
 320 Lisp_Object QCascii_compatible_p;
 321
 322 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 323 Lisp_Object Qcall_process, Qcall_process_region;
 324 Lisp_Object Qstart_process, Qopen_network_stream;
 325 Lisp_Object Qtarget_idx;
 326
 327 Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
 328 Lisp_Object Qinterrupted, Qinsufficient_memory;
 329
 330 extern Lisp_Object Qcompletion_ignore_case;
 331
 332 /* If a symbol has this property, evaluate the value to define the
 333    symbol as a coding system.  */
 334 static Lisp_Object Qcoding_system_define_form;
 335
 336 int coding_system_require_warning;
 337
 338 Lisp_Object Vselect_safe_coding_system_function;
 339
 340 /* Mnemonic string for each format of end-of-line.  */
 341 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 342 /* Mnemonic string to indicate format of end-of-line is not yet
 343    decided.  */
 344 Lisp_Object eol_mnemonic_undecided;
 345
 346 /* Format of end-of-line decided by system.  This is Qunix on
 347    Unix and Mac, Qdos on DOS/Windows.
 348    This has an effect only for external encoding (i.e. for output to
 349    file and process), not for in-buffer or Lisp string encoding.  */
 350 static Lisp_Object system_eol_type;
 351
 352 #ifdef emacs
 353
 354 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 355
 356 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 357
 358 /* Coding system emacs-mule and raw-text are for converting only
 359    end-of-line format.  */
 360 Lisp_Object Qemacs_mule, Qraw_text;
 361 Lisp_Object Qutf_8_emacs;
 362
 363 /* Coding-systems are handed between Emacs Lisp programs and C internal
 364    routines by the following three variables.  */
 365 /* Coding-system for reading files and receiving data from process.  */
 366 Lisp_Object Vcoding_system_for_read;
 367 /* Coding-system for writing files and sending data to process.  */
 368 Lisp_Object Vcoding_system_for_write;
 369 /* Coding-system actually used in the latest I/O.  */
 370 Lisp_Object Vlast_coding_system_used;
 371 /* Set to non-nil when an error is detected while code conversion.  */
 372 Lisp_Object Vlast_code_conversion_error;
 373 /* A vector of length 256 which contains information about special
 374    Latin codes (especially for dealing with Microsoft codes).  */
 375 Lisp_Object Vlatin_extra_code_table;
 376
 377 /* Flag to inhibit code conversion of end-of-line format.  */
 378 int inhibit_eol_conversion;
 379
 380 /* Flag to inhibit ISO2022 escape sequence detection.  */
 381 int inhibit_iso_escape_detection;
 382
 383 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 384 int inherit_process_coding_system;
 385
 386 /* Coding system to be used to encode text for terminal display when
 387    terminal coding system is nil.  */
 388 struct coding_system safe_terminal_coding;
 389
 390 Lisp_Object Vfile_coding_system_alist;
 391 Lisp_Object Vprocess_coding_system_alist;
 392 Lisp_Object Vnetwork_coding_system_alist;
 393
 394 Lisp_Object Vlocale_coding_system;
 395
 396 #endif /* emacs */
 397
 398 /* Flag to tell if we look up translation table on character code
 399    conversion.  */
 400 Lisp_Object Venable_character_translation;
 401 /* Standard translation table to look up on decoding (reading).  */
 402 Lisp_Object Vstandard_translation_table_for_decode;
 403 /* Standard translation table to look up on encoding (writing).  */
 404 Lisp_Object Vstandard_translation_table_for_encode;
 405
 406 Lisp_Object Qtranslation_table;
 407 Lisp_Object Qtranslation_table_id;
 408 Lisp_Object Qtranslation_table_for_decode;
 409 Lisp_Object Qtranslation_table_for_encode;
 410
 411 /* Alist of charsets vs revision number.  */
 412 static Lisp_Object Vcharset_revision_table;
 413
 414 /* Default coding systems used for process I/O.  */
 415 Lisp_Object Vdefault_process_coding_system;
 416
 417 /* Char table for translating Quail and self-inserting input.  */
 418 Lisp_Object Vtranslation_table_for_input;
 419
 420 /* Two special coding systems.  */
 421 Lisp_Object Vsjis_coding_system;
 422 Lisp_Object Vbig5_coding_system;
 423
 424 /* ISO2022 section */
 425
 426 #define CODING_ISO_INITIAL(coding, reg)                 \
 427   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 428                      coding_attr_iso_initial),          \
 429                reg)))
 430
 431
 432 #define CODING_ISO_REQUEST(coding, charset_id)  \
 433   ((charset_id <= (coding)->max_charset_id      \
 434     ? (coding)->safe_charsets[charset_id]       \
 435     : -1))
 436
 437
 438 #define CODING_ISO_FLAGS(coding)        \
 439   ((coding)->spec.iso_2022.flags)
 440 #define CODING_ISO_DESIGNATION(coding, reg)     \
 441   ((coding)->spec.iso_2022.current_designation[reg])
 442 #define CODING_ISO_INVOCATION(coding, plane)    \
 443   ((coding)->spec.iso_2022.current_invocation[plane])
 444 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 445   ((coding)->spec.iso_2022.single_shifting)
 446 #define CODING_ISO_BOL(coding)  \
 447   ((coding)->spec.iso_2022.bol)
 448 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 449   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 450
 451 /* Control characters of ISO2022.  */
 452                         /* code */      /* function */
 453 #define ISO_CODE_LF     0x0A            /* line-feed */
 454 #define ISO_CODE_CR     0x0D            /* carriage-return */
 455 #define ISO_CODE_SO     0x0E            /* shift-out */
 456 #define ISO_CODE_SI     0x0F            /* shift-in */
 457 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 458 #define ISO_CODE_ESC    0x1B            /* escape */
 459 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 460 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 461 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 462
 463 /* All code (1-byte) of ISO2022 is classified into one of the
 464    followings.  */
 465 enum iso_code_class_type
 466   {
 467     ISO_control_0,              /* Control codes in the range
 468                                    0x00..0x1F and 0x7F, except for the
 469                                    following 5 codes.  */
 470     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 471     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 472     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 473     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 474     ISO_control_1,              /* Control codes in the range
 475                                    0x80..0x9F, except for the
 476                                    following 3 codes.  */
 477     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 478     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 479     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 480     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 481     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 482     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 483     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 484   };
 485
 486 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 487     `iso-flags' attribute of an iso2022 coding system.  */
 488
 489 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 490    instead of the correct short-form sequence (e.g. ESC $ A).  */
 491 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 492
 493 /* If set, reset graphic planes and registers at end-of-line to the
 494    initial state.  */
 495 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 496
 497 /* If set, reset graphic planes and registers before any control
 498    characters to the initial state.  */
 499 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 500
 501 /* If set, encode by 7-bit environment.  */
 502 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 503
 504 /* If set, use locking-shift function.  */
 505 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 506
 507 /* If set, use single-shift function.  Overwrite
 508    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 509 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 510
 511 /* If set, use designation escape sequence.  */
 512 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 513
 514 /* If set, produce revision number sequence.  */
 515 #define CODING_ISO_FLAG_REVISION        0x0080
 516
 517 /* If set, produce ISO6429's direction specifying sequence.  */
 518 #define CODING_ISO_FLAG_DIRECTION       0x0100
 519
 520 /* If set, assume designation states are reset at beginning of line on
 521    output.  */
 522 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 523
 524 /* If set, designation sequence should be placed at beginning of line
 525    on output.  */
 526 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 527
 528 /* If set, do not encode unsafe charactes on output.  */
 529 #define CODING_ISO_FLAG_SAFE            0x0800
 530
 531 /* If set, extra latin codes (128..159) are accepted as a valid code
 532    on input.  */
 533 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 534
 535 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 536
 537 #define CODING_ISO_FLAG_EUC_TW_SHIFT    0x4000
 538
 539 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 540
 541 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 542
 543 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 544
 545 /* A character to be produced on output if encoding of the original
 546    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 547 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 548
 549 /* UTF-8 section */
 550 #define CODING_UTF_8_BOM(coding)        \
 551   ((coding)->spec.utf_8_bom)
 552
 553 /* UTF-16 section */
 554 #define CODING_UTF_16_BOM(coding)       \
 555   ((coding)->spec.utf_16.bom)
 556
 557 #define CODING_UTF_16_ENDIAN(coding)    \
 558   ((coding)->spec.utf_16.endian)
 559
 560 #define CODING_UTF_16_SURROGATE(coding) \
 561   ((coding)->spec.utf_16.surrogate)
 562
 563
 564 /* CCL section */
 565 #define CODING_CCL_DECODER(coding)      \
 566   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 567 #define CODING_CCL_ENCODER(coding)      \
 568   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 569 #define CODING_CCL_VALIDS(coding)                                          \
 570   (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
 571
 572 /* Index for each coding category in `coding_categories' */
 573
 574 enum coding_category
 575   {
 576     coding_category_iso_7,
 577     coding_category_iso_7_tight,
 578     coding_category_iso_8_1,
 579     coding_category_iso_8_2,
 580     coding_category_iso_7_else,
 581     coding_category_iso_8_else,
 582     coding_category_utf_8_auto,
 583     coding_category_utf_8_nosig,
 584     coding_category_utf_8_sig,
 585     coding_category_utf_16_auto,
 586     coding_category_utf_16_be,
 587     coding_category_utf_16_le,
 588     coding_category_utf_16_be_nosig,
 589     coding_category_utf_16_le_nosig,
 590     coding_category_charset,
 591     coding_category_sjis,
 592     coding_category_big5,
 593     coding_category_ccl,
 594     coding_category_emacs_mule,
 595     /* All above are targets of code detection.  */
 596     coding_category_raw_text,
 597     coding_category_undecided,
 598     coding_category_max
 599   };
 600
 601 /* Definitions of flag bits used in detect_coding_XXXX.  */
 602 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 603 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 604 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 605 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 606 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 607 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 608 #define CATEGORY_MASK_UTF_8_AUTO        (1 << coding_category_utf_8_auto)
 609 #define CATEGORY_MASK_UTF_8_NOSIG       (1 << coding_category_utf_8_nosig)
 610 #define CATEGORY_MASK_UTF_8_SIG         (1 << coding_category_utf_8_sig)
 611 #define CATEGORY_MASK_UTF_16_AUTO       (1 << coding_category_utf_16_auto)
 612 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 613 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 614 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 615 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 616 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 617 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 618 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 619 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 620 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 621 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 622
 623 /* This value is returned if detect_coding_mask () find nothing other
 624    than ASCII characters.  */
 625 #define CATEGORY_MASK_ANY               \
 626   (CATEGORY_MASK_ISO_7                  \
 627    | CATEGORY_MASK_ISO_7_TIGHT          \
 628    | CATEGORY_MASK_ISO_8_1              \
 629    | CATEGORY_MASK_ISO_8_2              \
 630    | CATEGORY_MASK_ISO_7_ELSE           \
 631    | CATEGORY_MASK_ISO_8_ELSE           \
 632    | CATEGORY_MASK_UTF_8_AUTO           \
 633    | CATEGORY_MASK_UTF_8_NOSIG          \
 634    | CATEGORY_MASK_UTF_8_SIG            \
 635    | CATEGORY_MASK_UTF_16_AUTO          \
 636    | CATEGORY_MASK_UTF_16_BE            \
 637    | CATEGORY_MASK_UTF_16_LE            \
 638    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 639    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 640    | CATEGORY_MASK_CHARSET              \
 641    | CATEGORY_MASK_SJIS                 \
 642    | CATEGORY_MASK_BIG5                 \
 643    | CATEGORY_MASK_CCL                  \
 644    | CATEGORY_MASK_EMACS_MULE)
 645
 646
 647 #define CATEGORY_MASK_ISO_7BIT \
 648   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 649
 650 #define CATEGORY_MASK_ISO_8BIT \
 651   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 652
 653 #define CATEGORY_MASK_ISO_ELSE \
 654   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 655
 656 #define CATEGORY_MASK_ISO_ESCAPE        \
 657   (CATEGORY_MASK_ISO_7                  \
 658    | CATEGORY_MASK_ISO_7_TIGHT          \
 659    | CATEGORY_MASK_ISO_7_ELSE           \
 660    | CATEGORY_MASK_ISO_8_ELSE)
 661
 662 #define CATEGORY_MASK_ISO       \
 663   (  CATEGORY_MASK_ISO_7BIT     \
 664      | CATEGORY_MASK_ISO_8BIT   \
 665      | CATEGORY_MASK_ISO_ELSE)
 666
 667 #define CATEGORY_MASK_UTF_16            \
 668   (CATEGORY_MASK_UTF_16_AUTO            \
 669    | CATEGORY_MASK_UTF_16_BE            \
 670    | CATEGORY_MASK_UTF_16_LE            \
 671    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 672    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 673
 674 #define CATEGORY_MASK_UTF_8     \
 675   (CATEGORY_MASK_UTF_8_AUTO     \
 676    | CATEGORY_MASK_UTF_8_NOSIG  \
 677    | CATEGORY_MASK_UTF_8_SIG)
 678
 679 /* List of symbols `coding-category-xxx' ordered by priority.  This
 680    variable is exposed to Emacs Lisp.  */
 681 static Lisp_Object Vcoding_category_list;
 682
 683 /* Table of coding categories (Lisp symbols).  This variable is for
 684    internal use oly.  */
 685 static Lisp_Object Vcoding_category_table;
 686
 687 /* Table of coding-categories ordered by priority.  */
 688 static enum coding_category coding_priorities[coding_category_max];
 689
 690 /* Nth element is a coding context for the coding system bound to the
 691    Nth coding category.  */
 692 static struct coding_system coding_categories[coding_category_max];
 693
 694 /*** Commonly used macros and functions ***/
 695
 696 #ifndef min
 697 #define min(a, b) ((a) < (b) ? (a) : (b))
 698 #endif
 699 #ifndef max
 700 #define max(a, b) ((a) > (b) ? (a) : (b))
 701 #endif
 702
 703 #define CODING_GET_INFO(coding, attrs, charset_list)    \
 704   do {                                                  \
 705     (attrs) = CODING_ID_ATTRS ((coding)->id);           \
 706     (charset_list) = CODING_ATTR_CHARSET_LIST (attrs);  \
 707   } while (0)
 708
 709
 710 /* Safely get one byte from the source text pointed by SRC which ends
 711    at SRC_END, and set C to that byte.  If there are not enough bytes
 712    in the source, it jumps to `no_more_source'.  If multibytep is
 713    nonzero, and a multibyte character is found at SRC, set C to the
 714    negative value of the character code.  The caller should declare
 715    and set these variables appropriately in advance:
 716         src, src_end, multibytep */
 717
 718 #define ONE_MORE_BYTE(c)                                \
 719   do {                                                  \
 720     if (src == src_end)                                 \
 721       {                                                 \
 722         if (src_base < src)                             \
 723           record_conversion_result                      \
 724             (coding, CODING_RESULT_INSUFFICIENT_SRC);   \
 725         goto no_more_source;                            \
 726       }                                                 \
 727     c = *src++;                                         \
 728     if (multibytep && (c & 0x80))                       \
 729       {                                                 \
 730         if ((c & 0xFE) == 0xC0)                         \
 731           c = ((c & 1) << 6) | *src++;                  \
 732         else                                            \
 733           {                                             \
 734             src--;                                      \
 735             c = - string_char (src, &src, NULL);        \
 736             record_conversion_result                    \
 737               (coding, CODING_RESULT_INVALID_SRC);      \
 738           }                                             \
 739       }                                                 \
 740     consumed_chars++;                                   \
 741   } while (0)
 742
 743
 744 #define ONE_MORE_BYTE_NO_CHECK(c)                       \
 745   do {                                                  \
 746     c = *src++;                                         \
 747     if (multibytep && (c & 0x80))                       \
 748       {                                                 \
 749         if ((c & 0xFE) == 0xC0)                         \
 750           c = ((c & 1) << 6) | *src++;                  \
 751         else                                            \
 752           {                                             \
 753             src--;                                      \
 754             c = - string_char (src, &src, NULL);        \
 755             record_conversion_result                    \
 756               (coding, CODING_RESULT_INVALID_SRC);      \
 757           }                                             \
 758       }                                                 \
 759     consumed_chars++;                                   \
 760   } while (0)
 761
 762
 763 /* Store a byte C in the place pointed by DST and increment DST to the
 764    next free point, and increment PRODUCED_CHARS.  The caller should
 765    assure that C is 0..127, and declare and set the variable `dst'
 766    appropriately in advance.
 767 */
 768
 769
 770 #define EMIT_ONE_ASCII_BYTE(c)  \
 771   do {                          \
 772     produced_chars++;           \
 773     *dst++ = (c);               \
 774   } while (0)
 775
 776
 777 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2.  */
 778
 779 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 780   do {                                  \
 781     produced_chars += 2;                \
 782     *dst++ = (c1), *dst++ = (c2);       \
 783   } while (0)
 784
 785
 786 /* Store a byte C in the place pointed by DST and increment DST to the
 787    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 788    nonzero, store in an appropriate multibyte from.  The caller should
 789    declare and set the variables `dst' and `multibytep' appropriately
 790    in advance.  */
 791
 792 #define EMIT_ONE_BYTE(c)                \
 793   do {                                  \
 794     produced_chars++;                   \
 795     if (multibytep)                     \
 796       {                                 \
 797         int ch = (c);                   \
 798         if (ch >= 0x80)                 \
 799           ch = BYTE8_TO_CHAR (ch);      \
 800         CHAR_STRING_ADVANCE (ch, dst);  \
 801       }                                 \
 802     else                                \
 803       *dst++ = (c);                     \
 804   } while (0)
 805
 806
 807 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 808
 809 #define EMIT_TWO_BYTES(c1, c2)          \
 810   do {                                  \
 811     produced_chars += 2;                \
 812     if (multibytep)                     \
 813       {                                 \
 814         int ch;                         \
 815                                         \
 816         ch = (c1);                      \
 817         if (ch >= 0x80)                 \
 818           ch = BYTE8_TO_CHAR (ch);      \
 819         CHAR_STRING_ADVANCE (ch, dst);  \
 820         ch = (c2);                      \
 821         if (ch >= 0x80)                 \
 822           ch = BYTE8_TO_CHAR (ch);      \
 823         CHAR_STRING_ADVANCE (ch, dst);  \
 824       }                                 \
 825     else                                \
 826       {                                 \
 827         *dst++ = (c1);                  \
 828         *dst++ = (c2);                  \
 829       }                                 \
 830   } while (0)
 831
 832
 833 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 834   do {                                  \
 835     EMIT_ONE_BYTE (c1);                 \
 836     EMIT_TWO_BYTES (c2, c3);            \
 837   } while (0)
 838
 839
 840 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 841   do {                                          \
 842     EMIT_TWO_BYTES (c1, c2);                    \
 843     EMIT_TWO_BYTES (c3, c4);                    \
 844   } while (0)
 845
 846
 847 /* Prototypes for static functions.  */
 848 static void record_conversion_result P_ ((struct coding_system *coding,
 849                                           enum coding_result_code result));
 850 static int detect_coding_utf_8 P_ ((struct coding_system *,
 851                                     struct coding_detection_info *info));
 852 static void decode_coding_utf_8 P_ ((struct coding_system *));
 853 static int encode_coding_utf_8 P_ ((struct coding_system *));
 854
 855 static int detect_coding_utf_16 P_ ((struct coding_system *,
 856                                      struct coding_detection_info *info));
 857 static void decode_coding_utf_16 P_ ((struct coding_system *));
 858 static int encode_coding_utf_16 P_ ((struct coding_system *));
 859
 860 static int detect_coding_iso_2022 P_ ((struct coding_system *,
 861                                        struct coding_detection_info *info));
 862 static void decode_coding_iso_2022 P_ ((struct coding_system *));
 863 static int encode_coding_iso_2022 P_ ((struct coding_system *));
 864
 865 static int detect_coding_emacs_mule P_ ((struct coding_system *,
 866                                          struct coding_detection_info *info));
 867 static void decode_coding_emacs_mule P_ ((struct coding_system *));
 868 static int encode_coding_emacs_mule P_ ((struct coding_system *));
 869
 870 static int detect_coding_sjis P_ ((struct coding_system *,
 871                                    struct coding_detection_info *info));
 872 static void decode_coding_sjis P_ ((struct coding_system *));
 873 static int encode_coding_sjis P_ ((struct coding_system *));
 874
 875 static int detect_coding_big5 P_ ((struct coding_system *,
 876                                    struct coding_detection_info *info));
 877 static void decode_coding_big5 P_ ((struct coding_system *));
 878 static int encode_coding_big5 P_ ((struct coding_system *));
 879
 880 static int detect_coding_ccl P_ ((struct coding_system *,
 881                                   struct coding_detection_info *info));
 882 static void decode_coding_ccl P_ ((struct coding_system *));
 883 static int encode_coding_ccl P_ ((struct coding_system *));
 884
 885 static void decode_coding_raw_text P_ ((struct coding_system *));
 886 static int encode_coding_raw_text P_ ((struct coding_system *));
 887
 888 static void coding_set_source P_ ((struct coding_system *));
 889 static void coding_set_destination P_ ((struct coding_system *));
 890 static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT));
 891 static void coding_alloc_by_making_gap P_ ((struct coding_system *,
 892                                             EMACS_INT, EMACS_INT));
 893 static unsigned char *alloc_destination P_ ((struct coding_system *,
 894                                              EMACS_INT, unsigned char *));
 895 static void setup_iso_safe_charsets P_ ((Lisp_Object));
 896 static unsigned char *encode_designation_at_bol P_ ((struct coding_system *,
 897                                                      int *, int *,
 898                                                      unsigned char *));
 899 static int detect_eol P_ ((const unsigned char *,
 900                            EMACS_INT, enum coding_category));
 901 static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int));
 902 static void decode_eol P_ ((struct coding_system *));
 903 static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *));
 904 static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *,
 905                                         int, int *, int *));
 906 static int produce_chars P_ ((struct coding_system *, Lisp_Object, int));
 907 static INLINE void produce_composition P_ ((struct coding_system *, int *,
 908                                             EMACS_INT));
 909 static INLINE void produce_charset P_ ((struct coding_system *, int *,
 910                                         EMACS_INT));
 911 static void produce_annotation P_ ((struct coding_system *, EMACS_INT));
 912 static int decode_coding P_ ((struct coding_system *));
 913 static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT,
 914                                                       struct coding_system *,
 915                                                       int *, EMACS_INT *));
 916 static INLINE int *handle_charset_annotation P_ ((EMACS_INT, EMACS_INT,
 917                                                   struct coding_system *,
 918                                                   int *, EMACS_INT *));
 919 static void consume_chars P_ ((struct coding_system *, Lisp_Object, int));
 920 static int encode_coding P_ ((struct coding_system *));
 921 static Lisp_Object make_conversion_work_buffer P_ ((int));
 922 static Lisp_Object code_conversion_restore P_ ((Lisp_Object));
 923 static INLINE int char_encodable_p P_ ((int, Lisp_Object));
 924 static Lisp_Object make_subsidiaries P_ ((Lisp_Object));
 925
 926 static void
 927 record_conversion_result (struct coding_system *coding,
 928                           enum coding_result_code result)
 929 {
 930   coding->result = result;
 931   switch (result)
 932     {
 933     case CODING_RESULT_INSUFFICIENT_SRC:
 934       Vlast_code_conversion_error = Qinsufficient_source;
 935       break;
 936     case CODING_RESULT_INCONSISTENT_EOL:
 937       Vlast_code_conversion_error = Qinconsistent_eol;
 938       break;
 939     case CODING_RESULT_INVALID_SRC:
 940       Vlast_code_conversion_error = Qinvalid_source;
 941       break;
 942     case CODING_RESULT_INTERRUPT:
 943       Vlast_code_conversion_error = Qinterrupted;
 944       break;
 945     case CODING_RESULT_INSUFFICIENT_MEM:
 946       Vlast_code_conversion_error = Qinsufficient_memory;
 947       break;
 948     default:
 949       Vlast_code_conversion_error = intern ("Unknown error");
 950     }
 951 }
 952
 953 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
 954   do {                                                                       \
 955     charset_map_loaded = 0;                                                  \
 956     c = DECODE_CHAR (charset, code);                                         \
 957     if (charset_map_loaded)                                                  \
 958       {                                                                      \
 959         const unsigned char *orig = coding->source;                          \
 960         EMACS_INT offset;                                                    \
 961                                                                              \
 962         coding_set_source (coding);                                          \
 963         offset = coding->source - orig;                                      \
 964         src += offset;                                                       \
 965         src_base += offset;                                                  \
 966         src_end += offset;                                                   \
 967       }                                                                      \
 968   } while (0)
 969
 970
 971 /* If there are at least BYTES length of room at dst, allocate memory
 972    for coding->destination and update dst and dst_end.  We don't have
 973    to take care of coding->source which will be relocated.  It is
 974    handled by calling coding_set_source in encode_coding.  */
 975
 976 #define ASSURE_DESTINATION(bytes)                               \
 977   do {                                                          \
 978     if (dst + (bytes) >= dst_end)                               \
 979       {                                                         \
 980         int more_bytes = charbuf_end - charbuf + (bytes);       \
 981                                                                 \
 982         dst = alloc_destination (coding, more_bytes, dst);      \
 983         dst_end = coding->destination + coding->dst_bytes;      \
 984       }                                                         \
 985   } while (0)
 986
 987
 988 /* Store multibyte form of the character C in P, and advance P to the
 989    end of the multibyte form.  This is like CHAR_STRING_ADVANCE but it
 990    never calls MAYBE_UNIFY_CHAR.  */
 991
 992 #define CHAR_STRING_ADVANCE_NO_UNIFY(c, p)      \
 993   do {                                          \
 994     if ((c) <= MAX_1_BYTE_CHAR)                 \
 995       *(p)++ = (c);                             \
 996     else if ((c) <= MAX_2_BYTE_CHAR)            \
 997       *(p)++ = (0xC0 | ((c) >> 6)),             \
 998         *(p)++ = (0x80 | ((c) & 0x3F));         \
 999     else if ((c) <= MAX_3_BYTE_CHAR)            \
1000       *(p)++ = (0xE0 | ((c) >> 12)),            \
1001         *(p)++ = (0x80 | (((c) >> 6) & 0x3F)),  \
1002         *(p)++ = (0x80 | ((c) & 0x3F));         \
1003     else if ((c) <= MAX_4_BYTE_CHAR)            \
1004       *(p)++ = (0xF0 | (c >> 18)),              \
1005         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1006         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1007         *(p)++ = (0x80 | (c & 0x3F));           \
1008     else if ((c) <= MAX_5_BYTE_CHAR)            \
1009       *(p)++ = 0xF8,                            \
1010         *(p)++ = (0x80 | ((c >> 18) & 0x0F)),   \
1011         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1012         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1013         *(p)++ = (0x80 | (c & 0x3F));           \
1014     else                                        \
1015       (p) += BYTE8_STRING ((c) - 0x3FFF80, p);  \
1016   } while (0)
1017
1018
1019 /* Return the character code of character whose multibyte form is at
1020    P, and advance P to the end of the multibyte form.  This is like
1021    STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR.  */
1022
1023 #define STRING_CHAR_ADVANCE_NO_UNIFY(p)                         \
1024   (!((p)[0] & 0x80)                                             \
1025    ? *(p)++                                                     \
1026    : ! ((p)[0] & 0x20)                                          \
1027    ? ((p) += 2,                                                 \
1028       ((((p)[-2] & 0x1F) << 6)                                  \
1029        | ((p)[-1] & 0x3F)                                       \
1030        | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))    \
1031    : ! ((p)[0] & 0x10)                                          \
1032    ? ((p) += 3,                                                 \
1033       ((((p)[-3] & 0x0F) << 12)                                 \
1034        | (((p)[-2] & 0x3F) << 6)                                \
1035        | ((p)[-1] & 0x3F)))                                     \
1036    : ! ((p)[0] & 0x08)                                          \
1037    ? ((p) += 4,                                                 \
1038       ((((p)[-4] & 0xF) << 18)                                  \
1039        | (((p)[-3] & 0x3F) << 12)                               \
1040        | (((p)[-2] & 0x3F) << 6)                                \
1041        | ((p)[-1] & 0x3F)))                                     \
1042    : ((p) += 5,                                                 \
1043       ((((p)[-4] & 0x3F) << 18)                                 \
1044        | (((p)[-3] & 0x3F) << 12)                               \
1045        | (((p)[-2] & 0x3F) << 6)                                \
1046        | ((p)[-1] & 0x3F))))
1047
1048
1049 static void
1050 coding_set_source (coding)
1051      struct coding_system *coding;
1052 {
1053   if (BUFFERP (coding->src_object))
1054     {
1055       struct buffer *buf = XBUFFER (coding->src_object);
1056
1057       if (coding->src_pos < 0)
1058         coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
1059       else
1060         coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
1061     }
1062   else if (STRINGP (coding->src_object))
1063     {
1064       coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
1065     }
1066   else
1067     /* Otherwise, the source is C string and is never relocated
1068        automatically.  Thus we don't have to update anything.  */
1069     ;
1070 }
1071
1072 static void
1073 coding_set_destination (coding)
1074      struct coding_system *coding;
1075 {
1076   if (BUFFERP (coding->dst_object))
1077     {
1078       if (coding->src_pos < 0)
1079         {
1080           coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
1081           coding->dst_bytes = (GAP_END_ADDR
1082                                - (coding->src_bytes - coding->consumed)
1083                                - coding->destination);
1084         }
1085       else
1086         {
1087           /* We are sure that coding->dst_pos_byte is before the gap
1088              of the buffer. */
1089           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
1090                                  + coding->dst_pos_byte - BEG_BYTE);
1091           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
1092                                - coding->destination);
1093         }
1094     }
1095   else
1096     /* Otherwise, the destination is C string and is never relocated
1097        automatically.  Thus we don't have to update anything.  */
1098     ;
1099 }
1100
1101
1102 static void
1103 coding_alloc_by_realloc (coding, bytes)
1104      struct coding_system *coding;
1105      EMACS_INT bytes;
1106 {
1107   coding->destination = (unsigned char *) xrealloc (coding->destination,
1108                                                     coding->dst_bytes + bytes);
1109   coding->dst_bytes += bytes;
1110 }
1111
1112 static void
1113 coding_alloc_by_making_gap (coding, gap_head_used, bytes)
1114      struct coding_system *coding;
1115      EMACS_INT gap_head_used, bytes;
1116 {
1117   if (EQ (coding->src_object, coding->dst_object))
1118     {
1119       /* The gap may contain the produced data at the head and not-yet
1120          consumed data at the tail.  To preserve those data, we at
1121          first make the gap size to zero, then increase the gap
1122          size.  */
1123       EMACS_INT add = GAP_SIZE;
1124
1125       GPT += gap_head_used, GPT_BYTE += gap_head_used;
1126       GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
1127       make_gap (bytes);
1128       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
1129       GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
1130     }
1131   else
1132     {
1133       Lisp_Object this_buffer;
1134
1135       this_buffer = Fcurrent_buffer ();
1136       set_buffer_internal (XBUFFER (coding->dst_object));
1137       make_gap (bytes);
1138       set_buffer_internal (XBUFFER (this_buffer));
1139     }
1140 }
1141
1142
1143 static unsigned char *
1144 alloc_destination (coding, nbytes, dst)
1145      struct coding_system *coding;
1146      EMACS_INT nbytes;
1147      unsigned char *dst;
1148 {
1149   EMACS_INT offset = dst - coding->destination;
1150
1151   if (BUFFERP (coding->dst_object))
1152     {
1153       struct buffer *buf = XBUFFER (coding->dst_object);
1154
1155       coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
1156     }
1157   else
1158     coding_alloc_by_realloc (coding, nbytes);
1159   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1160   coding_set_destination (coding);
1161   dst = coding->destination + offset;
1162   return dst;
1163 }
1164
1165 /** Macros for annotations.  */
1166
1167 /* Maximum length of annotation data (sum of annotations for
1168    composition and charset).  */
1169 #define MAX_ANNOTATION_LENGTH (4 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 4)
1170
1171 /* An annotation data is stored in the array coding->charbuf in this
1172    format:
1173      [ -LENGTH ANNOTATION_MASK NCHARS ... ]
1174    LENGTH is the number of elements in the annotation.
1175    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1176    NCHARS is the number of characters in the text annotated.
1177
1178    The format of the following elements depend on ANNOTATION_MASK.
1179
1180    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1181    follows:
1182      ... METHOD [ COMPOSITION-COMPONENTS ... ]
1183    METHOD is one of enum composition_method.
1184    Optionnal COMPOSITION-COMPONENTS are characters and composition
1185    rules.
1186
1187    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1188    follows.  */
1189
1190 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars)     \
1191   do {                                                  \
1192     *(buf)++ = -(len);                                  \
1193     *(buf)++ = (mask);                                  \
1194     *(buf)++ = (nchars);                                \
1195     coding->annotated = 1;                              \
1196   } while (0);
1197
1198 #define ADD_COMPOSITION_DATA(buf, nchars, method)                           \
1199   do {                                                                      \
1200     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1201     *buf++ = method;                                                        \
1202   } while (0)
1203
1204
1205 #define ADD_CHARSET_DATA(buf, nchars, id)                               \
1206   do {                                                                  \
1207     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1208     *buf++ = id;                                                        \
1209   } while (0)
1210
1211 \f
1212 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1213
1214
1215
1216 \f
1217 /*** 3. UTF-8 ***/
1218
1219 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1220    Check if a text is encoded in UTF-8.  If it is, return 1, else
1221    return 0.  */
1222
1223 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1224 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1225 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1226 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1227 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1228 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1229
1230 #define UTF_BOM 0xFEFF
1231 #define UTF_8_BOM_1 0xEF
1232 #define UTF_8_BOM_2 0xBB
1233 #define UTF_8_BOM_3 0xBF
1234
1235 static int
1236 detect_coding_utf_8 (coding, detect_info)
1237      struct coding_system *coding;
1238      struct coding_detection_info *detect_info;
1239 {
1240   const unsigned char *src = coding->source, *src_base;
1241   const unsigned char *src_end = coding->source + coding->src_bytes;
1242   int multibytep = coding->src_multibyte;
1243   int consumed_chars = 0;
1244   int bom_found = 0;
1245   int found = 0;
1246
1247   detect_info->checked |= CATEGORY_MASK_UTF_8;
1248   /* A coding system of this category is always ASCII compatible.  */
1249   src += coding->head_ascii;
1250
1251   while (1)
1252     {
1253       int c, c1, c2, c3, c4;
1254
1255       src_base = src;
1256       ONE_MORE_BYTE (c);
1257       if (c < 0 || UTF_8_1_OCTET_P (c))
1258         continue;
1259       ONE_MORE_BYTE (c1);
1260       if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
1261         break;
1262       if (UTF_8_2_OCTET_LEADING_P (c))
1263         {
1264           found = 1;
1265           continue;
1266         }
1267       ONE_MORE_BYTE (c2);
1268       if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1269         break;
1270       if (UTF_8_3_OCTET_LEADING_P (c))
1271         {
1272           found = 1;
1273           if (src_base == coding->source
1274               && c == UTF_8_BOM_1 && c1 == UTF_8_BOM_2 && c2 == UTF_8_BOM_3)
1275             bom_found = 1;
1276           continue;
1277         }
1278       ONE_MORE_BYTE (c3);
1279       if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1280         break;
1281       if (UTF_8_4_OCTET_LEADING_P (c))
1282         {
1283           found = 1;
1284           continue;
1285         }
1286       ONE_MORE_BYTE (c4);
1287       if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1288         break;
1289       if (UTF_8_5_OCTET_LEADING_P (c))
1290         {
1291           found = 1;
1292           continue;
1293         }
1294       break;
1295     }
1296   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1297   return 0;
1298
1299  no_more_source:
1300   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1301     {
1302       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1303       return 0;
1304     }
1305   if (bom_found)
1306     {
1307       /* The first character 0xFFFE doesn't necessarily mean a BOM.  */
1308       detect_info->found |= CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG;
1309     }
1310   else
1311     {
1312       detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG;
1313       if (found)
1314         detect_info->found |= CATEGORY_MASK_UTF_8_NOSIG;
1315     }
1316   return 1;
1317 }
1318
1319
1320 static void
1321 decode_coding_utf_8 (coding)
1322      struct coding_system *coding;
1323 {
1324   const unsigned char *src = coding->source + coding->consumed;
1325   const unsigned char *src_end = coding->source + coding->src_bytes;
1326   const unsigned char *src_base;
1327   int *charbuf = coding->charbuf + coding->charbuf_used;
1328   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1329   int consumed_chars = 0, consumed_chars_base = 0;
1330   int multibytep = coding->src_multibyte;
1331   enum utf_bom_type bom = CODING_UTF_8_BOM (coding);
1332   Lisp_Object attr, charset_list;
1333   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1334   int byte_after_cr = -1;
1335
1336   CODING_GET_INFO (coding, attr, charset_list);
1337
1338   if (bom != utf_without_bom)
1339     {
1340       int c1, c2, c3;
1341
1342       src_base = src;
1343       ONE_MORE_BYTE (c1);
1344       if (! UTF_8_3_OCTET_LEADING_P (c1))
1345         src = src_base;
1346       else
1347         {
1348           ONE_MORE_BYTE (c2);
1349           if (! UTF_8_EXTRA_OCTET_P (c2))
1350             src = src_base;
1351           else
1352             {
1353               ONE_MORE_BYTE (c3);
1354               if (! UTF_8_EXTRA_OCTET_P (c3))
1355                 src = src_base;
1356               else
1357                 {
1358                   if ((c1 != UTF_8_BOM_1)
1359                       || (c2 != UTF_8_BOM_2) || (c3 != UTF_8_BOM_3))
1360                     src = src_base;
1361                   else
1362                     CODING_UTF_8_BOM (coding) = utf_without_bom;
1363                 }
1364             }
1365         }
1366     }
1367   CODING_UTF_8_BOM (coding) = utf_without_bom;
1368
1369
1370
1371   while (1)
1372     {
1373       int c, c1, c2, c3, c4, c5;
1374
1375       src_base = src;
1376       consumed_chars_base = consumed_chars;
1377
1378       if (charbuf >= charbuf_end)
1379         {
1380           if (byte_after_cr >= 0)
1381             src_base--;
1382           break;
1383         }
1384
1385       if (byte_after_cr >= 0)
1386         c1 = byte_after_cr, byte_after_cr = -1;
1387       else
1388         ONE_MORE_BYTE (c1);
1389       if (c1 < 0)
1390         {
1391           c = - c1;
1392         }
1393       else if (UTF_8_1_OCTET_P(c1))
1394         {
1395           if (eol_crlf && c1 == '\r')
1396             ONE_MORE_BYTE (byte_after_cr);
1397           c = c1;
1398         }
1399       else
1400         {
1401           ONE_MORE_BYTE (c2);
1402           if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1403             goto invalid_code;
1404           if (UTF_8_2_OCTET_LEADING_P (c1))
1405             {
1406               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1407               /* Reject overlong sequences here and below.  Encoders
1408                  producing them are incorrect, they can be misleading,
1409                  and they mess up read/write invariance.  */
1410               if (c < 128)
1411                 goto invalid_code;
1412             }
1413           else
1414             {
1415               ONE_MORE_BYTE (c3);
1416               if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1417                 goto invalid_code;
1418               if (UTF_8_3_OCTET_LEADING_P (c1))
1419                 {
1420                   c = (((c1 & 0xF) << 12)
1421                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1422                   if (c < 0x800
1423                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1424                     goto invalid_code;
1425                 }
1426               else
1427                 {
1428                   ONE_MORE_BYTE (c4);
1429                   if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1430                     goto invalid_code;
1431                   if (UTF_8_4_OCTET_LEADING_P (c1))
1432                     {
1433                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1434                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1435                     if (c < 0x10000)
1436                       goto invalid_code;
1437                     }
1438                   else
1439                     {
1440                       ONE_MORE_BYTE (c5);
1441                       if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
1442                         goto invalid_code;
1443                       if (UTF_8_5_OCTET_LEADING_P (c1))
1444                         {
1445                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1446                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1447                                | (c5 & 0x3F));
1448                           if ((c > MAX_CHAR) || (c < 0x200000))
1449                             goto invalid_code;
1450                         }
1451                       else
1452                         goto invalid_code;
1453                     }
1454                 }
1455             }
1456         }
1457
1458       *charbuf++ = c;
1459       continue;
1460
1461     invalid_code:
1462       src = src_base;
1463       consumed_chars = consumed_chars_base;
1464       ONE_MORE_BYTE (c);
1465       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1466       coding->errors++;
1467     }
1468
1469  no_more_source:
1470   coding->consumed_char += consumed_chars_base;
1471   coding->consumed = src_base - coding->source;
1472   coding->charbuf_used = charbuf - coding->charbuf;
1473 }
1474
1475
1476 static int
1477 encode_coding_utf_8 (coding)
1478      struct coding_system *coding;
1479 {
1480   int multibytep = coding->dst_multibyte;
1481   int *charbuf = coding->charbuf;
1482   int *charbuf_end = charbuf + coding->charbuf_used;
1483   unsigned char *dst = coding->destination + coding->produced;
1484   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1485   int produced_chars = 0;
1486   int c;
1487
1488   if (CODING_UTF_8_BOM (coding) == utf_with_bom)
1489     {
1490       ASSURE_DESTINATION (3);
1491       EMIT_THREE_BYTES (UTF_8_BOM_1, UTF_8_BOM_2, UTF_8_BOM_3);
1492       CODING_UTF_8_BOM (coding) = utf_without_bom;
1493     }
1494
1495   if (multibytep)
1496     {
1497       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1498
1499       while (charbuf < charbuf_end)
1500         {
1501           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1502
1503           ASSURE_DESTINATION (safe_room);
1504           c = *charbuf++;
1505           if (CHAR_BYTE8_P (c))
1506             {
1507               c = CHAR_TO_BYTE8 (c);
1508               EMIT_ONE_BYTE (c);
1509             }
1510           else
1511             {
1512               CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
1513               for (p = str; p < pend; p++)
1514                 EMIT_ONE_BYTE (*p);
1515             }
1516         }
1517     }
1518   else
1519     {
1520       int safe_room = MAX_MULTIBYTE_LENGTH;
1521
1522       while (charbuf < charbuf_end)
1523         {
1524           ASSURE_DESTINATION (safe_room);
1525           c = *charbuf++;
1526           if (CHAR_BYTE8_P (c))
1527             *dst++ = CHAR_TO_BYTE8 (c);
1528           else
1529             CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
1530           produced_chars++;
1531         }
1532     }
1533   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1534   coding->produced_char += produced_chars;
1535   coding->produced = dst - coding->destination;
1536   return 0;
1537 }
1538
1539
1540 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1541    Check if a text is encoded in one of UTF-16 based coding systems.
1542    If it is, return 1, else return 0.  */
1543
1544 #define UTF_16_HIGH_SURROGATE_P(val) \
1545   (((val) & 0xFC00) == 0xD800)
1546
1547 #define UTF_16_LOW_SURROGATE_P(val) \
1548   (((val) & 0xFC00) == 0xDC00)
1549
1550 #define UTF_16_INVALID_P(val)   \
1551   (((val) == 0xFFFE)            \
1552    || ((val) == 0xFFFF)         \
1553    || UTF_16_LOW_SURROGATE_P (val))
1554
1555
1556 static int
1557 detect_coding_utf_16 (coding, detect_info)
1558      struct coding_system *coding;
1559      struct coding_detection_info *detect_info;
1560 {
1561   const unsigned char *src = coding->source, *src_base = src;
1562   const unsigned char *src_end = coding->source + coding->src_bytes;
1563   int multibytep = coding->src_multibyte;
1564   int consumed_chars = 0;
1565   int c1, c2;
1566
1567   detect_info->checked |= CATEGORY_MASK_UTF_16;
1568   if (coding->mode & CODING_MODE_LAST_BLOCK
1569       && (coding->src_chars & 1))
1570     {
1571       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1572       return 0;
1573     }
1574
1575   ONE_MORE_BYTE (c1);
1576   ONE_MORE_BYTE (c2);
1577   if ((c1 == 0xFF) && (c2 == 0xFE))
1578     {
1579       detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1580                              | CATEGORY_MASK_UTF_16_AUTO);
1581       detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1582                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1583                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1584     }
1585   else if ((c1 == 0xFE) && (c2 == 0xFF))
1586     {
1587       detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1588                              | CATEGORY_MASK_UTF_16_AUTO);
1589       detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1590                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1591                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1592     }
1593   else
1594     {
1595       /* We check the dispersion of Eth and Oth bytes where E is even and
1596          O is odd.  If both are high, we assume binary data.*/
1597       unsigned char e[256], o[256];
1598       unsigned e_num = 1, o_num = 1;
1599
1600       memset (e, 0, 256);
1601       memset (o, 0, 256);
1602       e[c1] = 1;
1603       o[c2] = 1;
1604
1605       detect_info->rejected
1606         |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
1607
1608       while (1)
1609         {
1610           ONE_MORE_BYTE (c1);
1611           ONE_MORE_BYTE (c2);
1612           if (! e[c1])
1613             {
1614               e[c1] = 1;
1615               e_num++;
1616               if (e_num >= 128)
1617                 break;
1618             }
1619           if (! o[c2])
1620             {
1621               o[c1] = 1;
1622               o_num++;
1623               if (o_num >= 128)
1624                 break;
1625             }
1626         }
1627       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1628       return 0;
1629     }
1630
1631  no_more_source:
1632   return 1;
1633 }
1634
1635 static void
1636 decode_coding_utf_16 (coding)
1637      struct coding_system *coding;
1638 {
1639   const unsigned char *src = coding->source + coding->consumed;
1640   const unsigned char *src_end = coding->source + coding->src_bytes;
1641   const unsigned char *src_base;
1642   int *charbuf = coding->charbuf + coding->charbuf_used;
1643   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1644   int consumed_chars = 0, consumed_chars_base = 0;
1645   int multibytep = coding->src_multibyte;
1646   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1647   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1648   int surrogate = CODING_UTF_16_SURROGATE (coding);
1649   Lisp_Object attr, charset_list;
1650   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1651   int byte_after_cr1 = -1, byte_after_cr2 = -1;
1652
1653   CODING_GET_INFO (coding, attr, charset_list);
1654
1655   if (bom == utf_with_bom)
1656     {
1657       int c, c1, c2;
1658
1659       src_base = src;
1660       ONE_MORE_BYTE (c1);
1661       ONE_MORE_BYTE (c2);
1662       c = (c1 << 8) | c2;
1663
1664       if (endian == utf_16_big_endian
1665           ? c != 0xFEFF : c != 0xFFFE)
1666         {
1667           /* The first two bytes are not BOM.  Treat them as bytes
1668              for a normal character.  */
1669           src = src_base;
1670           coding->errors++;
1671         }
1672       CODING_UTF_16_BOM (coding) = utf_without_bom;
1673     }
1674   else if (bom == utf_detect_bom)
1675     {
1676       /* We have already tried to detect BOM and failed in
1677          detect_coding.  */
1678       CODING_UTF_16_BOM (coding) = utf_without_bom;
1679     }
1680
1681   while (1)
1682     {
1683       int c, c1, c2;
1684
1685       src_base = src;
1686       consumed_chars_base = consumed_chars;
1687
1688       if (charbuf + 2 >= charbuf_end)
1689         {
1690           if (byte_after_cr1 >= 0)
1691             src_base -= 2;
1692           break;
1693         }
1694
1695       if (byte_after_cr1 >= 0)
1696         c1 = byte_after_cr1, byte_after_cr1 = -1;
1697       else
1698         ONE_MORE_BYTE (c1);
1699       if (c1 < 0)
1700         {
1701           *charbuf++ = -c1;
1702           continue;
1703         }
1704       if (byte_after_cr2 >= 0)
1705         c2 = byte_after_cr2, byte_after_cr2 = -1;
1706       else
1707         ONE_MORE_BYTE (c2);
1708       if (c2 < 0)
1709         {
1710           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1711           *charbuf++ = -c2;
1712           continue;
1713         }
1714       c = (endian == utf_16_big_endian
1715            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1716
1717       if (surrogate)
1718         {
1719           if (! UTF_16_LOW_SURROGATE_P (c))
1720             {
1721               if (endian == utf_16_big_endian)
1722                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1723               else
1724                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1725               *charbuf++ = c1;
1726               *charbuf++ = c2;
1727               coding->errors++;
1728               if (UTF_16_HIGH_SURROGATE_P (c))
1729                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1730               else
1731                 *charbuf++ = c;
1732             }
1733           else
1734             {
1735               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1736               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1737               *charbuf++ = 0x10000 + c;
1738             }
1739         }
1740       else
1741         {
1742           if (UTF_16_HIGH_SURROGATE_P (c))
1743             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1744           else
1745             {
1746               if (eol_crlf && c == '\r')
1747                 {
1748                   ONE_MORE_BYTE (byte_after_cr1);
1749                   ONE_MORE_BYTE (byte_after_cr2);
1750                 }
1751               *charbuf++ = c;
1752             }
1753         }
1754     }
1755
1756  no_more_source:
1757   coding->consumed_char += consumed_chars_base;
1758   coding->consumed = src_base - coding->source;
1759   coding->charbuf_used = charbuf - coding->charbuf;
1760 }
1761
1762 static int
1763 encode_coding_utf_16 (coding)
1764      struct coding_system *coding;
1765 {
1766   int multibytep = coding->dst_multibyte;
1767   int *charbuf = coding->charbuf;
1768   int *charbuf_end = charbuf + coding->charbuf_used;
1769   unsigned char *dst = coding->destination + coding->produced;
1770   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1771   int safe_room = 8;
1772   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1773   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1774   int produced_chars = 0;
1775   Lisp_Object attrs, charset_list;
1776   int c;
1777
1778   CODING_GET_INFO (coding, attrs, charset_list);
1779
1780   if (bom != utf_without_bom)
1781     {
1782       ASSURE_DESTINATION (safe_room);
1783       if (big_endian)
1784         EMIT_TWO_BYTES (0xFE, 0xFF);
1785       else
1786         EMIT_TWO_BYTES (0xFF, 0xFE);
1787       CODING_UTF_16_BOM (coding) = utf_without_bom;
1788     }
1789
1790   while (charbuf < charbuf_end)
1791     {
1792       ASSURE_DESTINATION (safe_room);
1793       c = *charbuf++;
1794       if (c >= MAX_UNICODE_CHAR)
1795         c = coding->default_char;
1796
1797       if (c < 0x10000)
1798         {
1799           if (big_endian)
1800             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1801           else
1802             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1803         }
1804       else
1805         {
1806           int c1, c2;
1807
1808           c -= 0x10000;
1809           c1 = (c >> 10) + 0xD800;
1810           c2 = (c & 0x3FF) + 0xDC00;
1811           if (big_endian)
1812             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1813           else
1814             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1815         }
1816     }
1817   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1818   coding->produced = dst - coding->destination;
1819   coding->produced_char += produced_chars;
1820   return 0;
1821 }
1822
1823 \f
1824 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1825
1826 /* Emacs' internal format for representation of multiple character
1827    sets is a kind of multi-byte encoding, i.e. characters are
1828    represented by variable-length sequences of one-byte codes.
1829
1830    ASCII characters and control characters (e.g. `tab', `newline') are
1831    represented by one-byte sequences which are their ASCII codes, in
1832    the range 0x00 through 0x7F.
1833
1834    8-bit characters of the range 0x80..0x9F are represented by
1835    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1836    code + 0x20).
1837
1838    8-bit characters of the range 0xA0..0xFF are represented by
1839    one-byte sequences which are their 8-bit code.
1840
1841    The other characters are represented by a sequence of `base
1842    leading-code', optional `extended leading-code', and one or two
1843    `position-code's.  The length of the sequence is determined by the
1844    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1845    whereas extended leading-code and position-code take the range 0xA0
1846    through 0xFF.  See `charset.h' for more details about leading-code
1847    and position-code.
1848
1849    --- CODE RANGE of Emacs' internal format ---
1850    character set        range
1851    -------------        -----
1852    ascii                0x00..0x7F
1853    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1854    eight-bit-graphic    0xA0..0xBF
1855    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1856    ---------------------------------------------
1857
1858    As this is the internal character representation, the format is
1859    usually not used externally (i.e. in a file or in a data sent to a
1860    process).  But, it is possible to have a text externally in this
1861    format (i.e. by encoding by the coding system `emacs-mule').
1862
1863    In that case, a sequence of one-byte codes has a slightly different
1864    form.
1865
1866    At first, all characters in eight-bit-control are represented by
1867    one-byte sequences which are their 8-bit code.
1868
1869    Next, character composition data are represented by the byte
1870    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1871    where,
1872         METHOD is 0xF0 plus one of composition method (enum
1873         composition_method),
1874
1875         BYTES is 0xA0 plus a byte length of this composition data,
1876
1877         CHARS is 0x20 plus a number of characters composed by this
1878         data,
1879
1880         COMPONENTs are characters of multibye form or composition
1881         rules encoded by two-byte of ASCII codes.
1882
1883    In addition, for backward compatibility, the following formats are
1884    also recognized as composition data on decoding.
1885
1886    0x80 MSEQ ...
1887    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1888
1889    Here,
1890         MSEQ is a multibyte form but in these special format:
1891           ASCII: 0xA0 ASCII_CODE+0x80,
1892           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1893         RULE is a one byte code of the range 0xA0..0xF0 that
1894         represents a composition rule.
1895   */
1896
1897 char emacs_mule_bytes[256];
1898
1899 int
1900 emacs_mule_char (coding, src, nbytes, nchars, id)
1901      struct coding_system *coding;
1902      const unsigned char *src;
1903      int *nbytes, *nchars, *id;
1904 {
1905   const unsigned char *src_end = coding->source + coding->src_bytes;
1906   const unsigned char *src_base = src;
1907   int multibytep = coding->src_multibyte;
1908   struct charset *charset;
1909   unsigned code;
1910   int c;
1911   int consumed_chars = 0;
1912
1913   ONE_MORE_BYTE (c);
1914   if (c < 0)
1915     {
1916       c = -c;
1917       charset = emacs_mule_charset[0];
1918     }
1919   else
1920     {
1921       if (c >= 0xA0)
1922         {
1923           /* Old style component character of a composition.  */
1924           if (c == 0xA0)
1925             {
1926               ONE_MORE_BYTE (c);
1927               c -= 0x80;
1928             }
1929           else
1930             c -= 0x20;
1931         }
1932
1933       switch (emacs_mule_bytes[c])
1934         {
1935         case 2:
1936           if (! (charset = emacs_mule_charset[c]))
1937             goto invalid_code;
1938           ONE_MORE_BYTE (c);
1939           if (c < 0xA0)
1940             goto invalid_code;
1941           code = c & 0x7F;
1942           break;
1943
1944         case 3:
1945           if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1946               || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
1947             {
1948               ONE_MORE_BYTE (c);
1949               if (c < 0xA0 || ! (charset = emacs_mule_charset[c]))
1950                 goto invalid_code;
1951               ONE_MORE_BYTE (c);
1952               if (c < 0xA0)
1953                 goto invalid_code;
1954               code = c & 0x7F;
1955             }
1956           else
1957             {
1958               if (! (charset = emacs_mule_charset[c]))
1959                 goto invalid_code;
1960               ONE_MORE_BYTE (c);
1961               if (c < 0xA0)
1962                 goto invalid_code;
1963               code = (c & 0x7F) << 8;
1964               ONE_MORE_BYTE (c);
1965               if (c < 0xA0)
1966                 goto invalid_code;
1967               code |= c & 0x7F;
1968             }
1969           break;
1970
1971         case 4:
1972           ONE_MORE_BYTE (c);
1973           if (c < 0 || ! (charset = emacs_mule_charset[c]))
1974             goto invalid_code;
1975           ONE_MORE_BYTE (c);
1976           if (c < 0xA0)
1977             goto invalid_code;
1978           code = (c & 0x7F) << 8;
1979           ONE_MORE_BYTE (c);
1980           if (c < 0xA0)
1981             goto invalid_code;
1982           code |= c & 0x7F;
1983           break;
1984
1985         case 1:
1986           code = c;
1987           charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1988                                      ? charset_ascii : charset_eight_bit);
1989           break;
1990
1991         default:
1992           abort ();
1993         }
1994       c = DECODE_CHAR (charset, code);
1995       if (c < 0)
1996         goto invalid_code;
1997     }
1998   *nbytes = src - src_base;
1999   *nchars = consumed_chars;
2000   if (id)
2001     *id = charset->id;
2002   return c;
2003
2004  no_more_source:
2005   return -2;
2006
2007  invalid_code:
2008   return -1;
2009 }
2010
2011
2012 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2013    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
2014    else return 0.  */
2015
2016 static int
2017 detect_coding_emacs_mule (coding, detect_info)
2018      struct coding_system *coding;
2019      struct coding_detection_info *detect_info;
2020 {
2021   const unsigned char *src = coding->source, *src_base;
2022   const unsigned char *src_end = coding->source + coding->src_bytes;
2023   int multibytep = coding->src_multibyte;
2024   int consumed_chars = 0;
2025   int c;
2026   int found = 0;
2027
2028   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
2029   /* A coding system of this category is always ASCII compatible.  */
2030   src += coding->head_ascii;
2031
2032   while (1)
2033     {
2034       src_base = src;
2035       ONE_MORE_BYTE (c);
2036       if (c < 0)
2037         continue;
2038       if (c == 0x80)
2039         {
2040           /* Perhaps the start of composite character.  We simple skip
2041              it because analyzing it is too heavy for detecting.  But,
2042              at least, we check that the composite character
2043              constitutes of more than 4 bytes.  */
2044           const unsigned char *src_base;
2045
2046         repeat:
2047           src_base = src;
2048           do
2049             {
2050               ONE_MORE_BYTE (c);
2051             }
2052           while (c >= 0xA0);
2053
2054           if (src - src_base <= 4)
2055             break;
2056           found = CATEGORY_MASK_EMACS_MULE;
2057           if (c == 0x80)
2058             goto repeat;
2059         }
2060
2061       if (c < 0x80)
2062         {
2063           if (c < 0x20
2064               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
2065             break;
2066         }
2067       else
2068         {
2069           int more_bytes = emacs_mule_bytes[*src_base] - 1;
2070
2071           while (more_bytes > 0)
2072             {
2073               ONE_MORE_BYTE (c);
2074               if (c < 0xA0)
2075                 {
2076                   src--;        /* Unread the last byte.  */
2077                   break;
2078                 }
2079               more_bytes--;
2080             }
2081           if (more_bytes != 0)
2082             break;
2083           found = CATEGORY_MASK_EMACS_MULE;
2084         }
2085     }
2086   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2087   return 0;
2088
2089  no_more_source:
2090   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
2091     {
2092       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2093       return 0;
2094     }
2095   detect_info->found |= found;
2096   return 1;
2097 }
2098
2099
2100 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2101
2102 /* Decode a character represented as a component of composition
2103    sequence of Emacs 20/21 style at SRC.  Set C to that character and
2104    update SRC to the head of next character (or an encoded composition
2105    rule).  If SRC doesn't points a composition component, set C to -1.
2106    If SRC points an invalid byte sequence, global exit by a return
2107    value 0.  */
2108
2109 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf)                 \
2110   do                                                            \
2111     {                                                           \
2112       int c;                                                    \
2113       int nbytes, nchars;                                       \
2114                                                                 \
2115       if (src == src_end)                                       \
2116         break;                                                  \
2117       c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
2118       if (c < 0)                                                \
2119         {                                                       \
2120           if (c == -2)                                          \
2121             break;                                              \
2122           goto invalid_code;                                    \
2123         }                                                       \
2124       *buf++ = c;                                               \
2125       src += nbytes;                                            \
2126       consumed_chars += nchars;                                 \
2127     }                                                           \
2128   while (0)
2129
2130
2131 /* Decode a composition rule represented as a component of composition
2132    sequence of Emacs 20 style at SRC.  Store the decoded rule in *BUF,
2133    and increment BUF.  If SRC points an invalid byte sequence, set C
2134    to -1.  */
2135
2136 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf)      \
2137   do {                                                  \
2138     int c, gref, nref;                                  \
2139                                                         \
2140     if (src >= src_end)                                 \
2141       goto invalid_code;                                \
2142     ONE_MORE_BYTE_NO_CHECK (c);                         \
2143     c -= 0xA0;                                          \
2144     if (c < 0 || c >= 81)                               \
2145       goto invalid_code;                                \
2146                                                         \
2147     gref = c / 9, nref = c % 9;                         \
2148     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
2149   } while (0)
2150
2151
2152 /* Decode a composition rule represented as a component of composition
2153    sequence of Emacs 21 style at SRC.  Store the decoded rule in *BUF,
2154    and increment BUF.  If SRC points an invalid byte sequence, set C
2155    to -1.  */
2156
2157 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf)      \
2158   do {                                                  \
2159     int gref, nref;                                     \
2160                                                         \
2161     if (src + 1>= src_end)                              \
2162       goto invalid_code;                                \
2163     ONE_MORE_BYTE_NO_CHECK (gref);                      \
2164     gref -= 0x20;                                       \
2165     ONE_MORE_BYTE_NO_CHECK (nref);                      \
2166     nref -= 0x20;                                       \
2167     if (gref < 0 || gref >= 81                          \
2168         || nref < 0 || nref >= 81)                      \
2169       goto invalid_code;                                \
2170     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
2171   } while (0)
2172
2173
2174 #define DECODE_EMACS_MULE_21_COMPOSITION(c)                             \
2175   do {                                                                  \
2176     /* Emacs 21 style format.  The first three bytes at SRC are         \
2177        (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is  \
2178        the byte length of this composition information, CHARS is the    \
2179        number of characters composed by this composition.  */           \
2180     enum composition_method method = c - 0xF2;                          \
2181     int *charbuf_base = charbuf;                                        \
2182     int consumed_chars_limit;                                           \
2183     int nbytes, nchars;                                                 \
2184                                                                         \
2185     ONE_MORE_BYTE (c);                                                  \
2186     if (c < 0)                                                          \
2187       goto invalid_code;                                                \
2188     nbytes = c - 0xA0;                                                  \
2189     if (nbytes < 3)                                                     \
2190       goto invalid_code;                                                \
2191     ONE_MORE_BYTE (c);                                                  \
2192     if (c < 0)                                                          \
2193       goto invalid_code;                                                \
2194     nchars = c - 0xA0;                                                  \
2195     ADD_COMPOSITION_DATA (charbuf, nchars, method);                     \
2196     consumed_chars_limit = consumed_chars_base + nbytes;                \
2197     if (method != COMPOSITION_RELATIVE)                                 \
2198       {                                                                 \
2199         int i = 0;                                                      \
2200         while (consumed_chars < consumed_chars_limit)                   \
2201           {                                                             \
2202             if (i % 2 && method != COMPOSITION_WITH_ALTCHARS)           \
2203               DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf);          \
2204             else                                                        \
2205               DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf);             \
2206             i++;                                                        \
2207           }                                                             \
2208         if (consumed_chars < consumed_chars_limit)                      \
2209           goto invalid_code;                                            \
2210         charbuf_base[0] -= i;                                           \
2211       }                                                                 \
2212   } while (0)
2213
2214
2215 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c)                    \
2216   do {                                                                  \
2217     /* Emacs 20 style format for relative composition.  */              \
2218     /* Store multibyte form of characters to be composed.  */           \
2219     enum composition_method method = COMPOSITION_RELATIVE;              \
2220     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];                 \
2221     int *buf = components;                                              \
2222     int i, j;                                                           \
2223                                                                         \
2224     src = src_base;                                                     \
2225     ONE_MORE_BYTE (c);          /* skip 0x80 */                         \
2226     for (i = 0; *src >= 0xA0 && i < MAX_COMPOSITION_COMPONENTS; i++)    \
2227       DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                         \
2228     if (i < 2)                                                          \
2229       goto invalid_code;                                                \
2230     ADD_COMPOSITION_DATA (charbuf, i, method);                          \
2231     for (j = 0; j < i; j++)                                             \
2232       *charbuf++ = components[j];                                       \
2233   } while (0)
2234
2235
2236 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c)            \
2237   do {                                                          \
2238     /* Emacs 20 style format for rule-base composition.  */     \
2239     /* Store multibyte form of characters to be composed.  */   \
2240     enum composition_method method = COMPOSITION_WITH_RULE;     \
2241     int *charbuf_base = charbuf;                                \
2242     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
2243     int *buf = components;                                      \
2244     int i, j;                                                   \
2245                                                                 \
2246     DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                   \
2247     for (i = 1; i < MAX_COMPOSITION_COMPONENTS; i++)            \
2248       {                                                         \
2249         if (*src < 0xA0)                                        \
2250           break;                                                \
2251         DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf);            \
2252         DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);               \
2253       }                                                         \
2254     if (i <= 1 || (buf - components) % 2 == 0)                  \
2255       goto invalid_code;                                        \
2256     if (charbuf + i + (i / 2) + 1 >= charbuf_end)               \
2257       goto no_more_source;                                      \
2258     ADD_COMPOSITION_DATA (charbuf, i, method);                  \
2259     i = i * 2 - 1;                                              \
2260     for (j = 0; j < i; j++)                                     \
2261       *charbuf++ = components[j];                               \
2262     charbuf_base[0] -= i;                                       \
2263     for (j = 0; j < i; j += 2)                                  \
2264       *charbuf++ = components[j];                               \
2265   } while (0)
2266
2267
2268 static void
2269 decode_coding_emacs_mule (coding)
2270      struct coding_system *coding;
2271 {
2272   const unsigned char *src = coding->source + coding->consumed;
2273   const unsigned char *src_end = coding->source + coding->src_bytes;
2274   const unsigned char *src_base;
2275   int *charbuf = coding->charbuf + coding->charbuf_used;
2276   int *charbuf_end
2277     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
2278   int consumed_chars = 0, consumed_chars_base;
2279   int multibytep = coding->src_multibyte;
2280   Lisp_Object attrs, charset_list;
2281   int char_offset = coding->produced_char;
2282   int last_offset = char_offset;
2283   int last_id = charset_ascii;
2284   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
2285   int byte_after_cr = -1;
2286
2287   CODING_GET_INFO (coding, attrs, charset_list);
2288
2289   while (1)
2290     {
2291       int c;
2292
2293       src_base = src;
2294       consumed_chars_base = consumed_chars;
2295
2296       if (charbuf >= charbuf_end)
2297         {
2298           if (byte_after_cr >= 0)
2299             src_base--;
2300           break;
2301         }
2302
2303       if (byte_after_cr >= 0)
2304         c = byte_after_cr, byte_after_cr = -1;
2305       else
2306         ONE_MORE_BYTE (c);
2307       if (c < 0)
2308         {
2309           *charbuf++ = -c;
2310           char_offset++;
2311         }
2312       else if (c < 0x80)
2313         {
2314           if (eol_crlf && c == '\r')
2315             ONE_MORE_BYTE (byte_after_cr);
2316           *charbuf++ = c;
2317           char_offset++;
2318         }
2319       else if (c == 0x80)
2320         {
2321           ONE_MORE_BYTE (c);
2322           if (c < 0)
2323             goto invalid_code;
2324           if (c - 0xF2 >= COMPOSITION_RELATIVE
2325               && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
2326             DECODE_EMACS_MULE_21_COMPOSITION (c);
2327           else if (c < 0xC0)
2328             DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
2329           else if (c == 0xFF)
2330             DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
2331           else
2332             goto invalid_code;
2333         }
2334       else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
2335         {
2336           int nbytes, nchars;
2337           int id;
2338
2339           src = src_base;
2340           consumed_chars = consumed_chars_base;
2341           c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
2342           if (c < 0)
2343             {
2344               if (c == -2)
2345                 break;
2346               goto invalid_code;
2347             }
2348           if (last_id != id)
2349             {
2350               if (last_id != charset_ascii)
2351                 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2352               last_id = id;
2353               last_offset = char_offset;
2354             }
2355           *charbuf++ = c;
2356           src += nbytes;
2357           consumed_chars += nchars;
2358           char_offset++;
2359         }
2360       else
2361         goto invalid_code;
2362       continue;
2363
2364     invalid_code:
2365       src = src_base;
2366       consumed_chars = consumed_chars_base;
2367       ONE_MORE_BYTE (c);
2368       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2369       char_offset++;
2370       coding->errors++;
2371     }
2372
2373  no_more_source:
2374   if (last_id != charset_ascii)
2375     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2376   coding->consumed_char += consumed_chars_base;
2377   coding->consumed = src_base - coding->source;
2378   coding->charbuf_used = charbuf - coding->charbuf;
2379 }
2380
2381
2382 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2383   do {                                          \
2384     if (id < 0xA0)                              \
2385       codes[0] = id, codes[1] = 0;              \
2386     else if (id < 0xE0)                         \
2387       codes[0] = 0x9A, codes[1] = id;           \
2388     else if (id < 0xF0)                         \
2389       codes[0] = 0x9B, codes[1] = id;           \
2390     else if (id < 0xF5)                         \
2391       codes[0] = 0x9C, codes[1] = id;           \
2392     else                                        \
2393       codes[0] = 0x9D, codes[1] = id;           \
2394   } while (0);
2395
2396
2397 static int
2398 encode_coding_emacs_mule (coding)
2399      struct coding_system *coding;
2400 {
2401   int multibytep = coding->dst_multibyte;
2402   int *charbuf = coding->charbuf;
2403   int *charbuf_end = charbuf + coding->charbuf_used;
2404   unsigned char *dst = coding->destination + coding->produced;
2405   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2406   int safe_room = 8;
2407   int produced_chars = 0;
2408   Lisp_Object attrs, charset_list;
2409   int c;
2410   int preferred_charset_id = -1;
2411
2412   CODING_GET_INFO (coding, attrs, charset_list);
2413   if (! EQ (charset_list, Vemacs_mule_charset_list))
2414     {
2415       CODING_ATTR_CHARSET_LIST (attrs)
2416         = charset_list = Vemacs_mule_charset_list;
2417     }
2418
2419   while (charbuf < charbuf_end)
2420     {
2421       ASSURE_DESTINATION (safe_room);
2422       c = *charbuf++;
2423
2424       if (c < 0)
2425         {
2426           /* Handle an annotation.  */
2427           switch (*charbuf)
2428             {
2429             case CODING_ANNOTATE_COMPOSITION_MASK:
2430               /* Not yet implemented.  */
2431               break;
2432             case CODING_ANNOTATE_CHARSET_MASK:
2433               preferred_charset_id = charbuf[3];
2434               if (preferred_charset_id >= 0
2435                   && NILP (Fmemq (make_number (preferred_charset_id),
2436                                   charset_list)))
2437                 preferred_charset_id = -1;
2438               break;
2439             default:
2440               abort ();
2441             }
2442           charbuf += -c - 1;
2443           continue;
2444         }
2445
2446       if (ASCII_CHAR_P (c))
2447         EMIT_ONE_ASCII_BYTE (c);
2448       else if (CHAR_BYTE8_P (c))
2449         {
2450           c = CHAR_TO_BYTE8 (c);
2451           EMIT_ONE_BYTE (c);
2452         }
2453       else
2454         {
2455           struct charset *charset;
2456           unsigned code;
2457           int dimension;
2458           int emacs_mule_id;
2459           unsigned char leading_codes[2];
2460
2461           if (preferred_charset_id >= 0)
2462             {
2463               charset = CHARSET_FROM_ID (preferred_charset_id);
2464               if (CHAR_CHARSET_P (c, charset))
2465                 code = ENCODE_CHAR (charset, c);
2466               else
2467                 charset = char_charset (c, charset_list, &code);
2468             }
2469           else
2470             charset = char_charset (c, charset_list, &code);
2471           if (! charset)
2472             {
2473               c = coding->default_char;
2474               if (ASCII_CHAR_P (c))
2475                 {
2476                   EMIT_ONE_ASCII_BYTE (c);
2477                   continue;
2478                 }
2479               charset = char_charset (c, charset_list, &code);
2480             }
2481           dimension = CHARSET_DIMENSION (charset);
2482           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2483           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2484           EMIT_ONE_BYTE (leading_codes[0]);
2485           if (leading_codes[1])
2486             EMIT_ONE_BYTE (leading_codes[1]);
2487           if (dimension == 1)
2488             EMIT_ONE_BYTE (code | 0x80);
2489           else
2490             {
2491               code |= 0x8080;
2492               EMIT_ONE_BYTE (code >> 8);
2493               EMIT_ONE_BYTE (code & 0xFF);
2494             }
2495         }
2496     }
2497   record_conversion_result (coding, CODING_RESULT_SUCCESS);
2498   coding->produced_char += produced_chars;
2499   coding->produced = dst - coding->destination;
2500   return 0;
2501 }
2502
2503 \f
2504 /*** 7. ISO2022 handlers ***/
2505
2506 /* The following note describes the coding system ISO2022 briefly.
2507    Since the intention of this note is to help understand the
2508    functions in this file, some parts are NOT ACCURATE or are OVERLY
2509    SIMPLIFIED.  For thorough understanding, please refer to the
2510    original document of ISO2022.  This is equivalent to the standard
2511    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2512
2513    ISO2022 provides many mechanisms to encode several character sets
2514    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2515    is encoded using bytes less than 128.  This may make the encoded
2516    text a little bit longer, but the text passes more easily through
2517    several types of gateway, some of which strip off the MSB (Most
2518    Significant Bit).
2519
2520    There are two kinds of character sets: control character sets and
2521    graphic character sets.  The former contain control characters such
2522    as `newline' and `escape' to provide control functions (control
2523    functions are also provided by escape sequences).  The latter
2524    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2525    two control character sets and many graphic character sets.
2526
2527    Graphic character sets are classified into one of the following
2528    four classes, according to the number of bytes (DIMENSION) and
2529    number of characters in one dimension (CHARS) of the set:
2530    - DIMENSION1_CHARS94
2531    - DIMENSION1_CHARS96
2532    - DIMENSION2_CHARS94
2533    - DIMENSION2_CHARS96
2534
2535    In addition, each character set is assigned an identification tag,
2536    unique for each set, called the "final character" (denoted as <F>
2537    hereafter).  The <F> of each character set is decided by ECMA(*)
2538    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2539    (0x30..0x3F are for private use only).
2540
2541    Note (*): ECMA = European Computer Manufacturers Association
2542
2543    Here are examples of graphic character sets [NAME(<F>)]:
2544         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2545         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2546         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2547         o DIMENSION2_CHARS96 -- none for the moment
2548
2549    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2550         C0 [0x00..0x1F] -- control character plane 0
2551         GL [0x20..0x7F] -- graphic character plane 0
2552         C1 [0x80..0x9F] -- control character plane 1
2553         GR [0xA0..0xFF] -- graphic character plane 1
2554
2555    A control character set is directly designated and invoked to C0 or
2556    C1 by an escape sequence.  The most common case is that:
2557    - ISO646's  control character set is designated/invoked to C0, and
2558    - ISO6429's control character set is designated/invoked to C1,
2559    and usually these designations/invocations are omitted in encoded
2560    text.  In a 7-bit environment, only C0 can be used, and a control
2561    character for C1 is encoded by an appropriate escape sequence to
2562    fit into the environment.  All control characters for C1 are
2563    defined to have corresponding escape sequences.
2564
2565    A graphic character set is at first designated to one of four
2566    graphic registers (G0 through G3), then these graphic registers are
2567    invoked to GL or GR.  These designations and invocations can be
2568    done independently.  The most common case is that G0 is invoked to
2569    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2570    these invocations and designations are omitted in encoded text.
2571    In a 7-bit environment, only GL can be used.
2572
2573    When a graphic character set of CHARS94 is invoked to GL, codes
2574    0x20 and 0x7F of the GL area work as control characters SPACE and
2575    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2576    be used.
2577
2578    There are two ways of invocation: locking-shift and single-shift.
2579    With locking-shift, the invocation lasts until the next different
2580    invocation, whereas with single-shift, the invocation affects the
2581    following character only and doesn't affect the locking-shift
2582    state.  Invocations are done by the following control characters or
2583    escape sequences:
2584
2585    ----------------------------------------------------------------------
2586    abbrev  function                  cntrl escape seq   description
2587    ----------------------------------------------------------------------
2588    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2589    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2590    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2591    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2592    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2593    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2594    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2595    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2596    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2597    ----------------------------------------------------------------------
2598    (*) These are not used by any known coding system.
2599
2600    Control characters for these functions are defined by macros
2601    ISO_CODE_XXX in `coding.h'.
2602
2603    Designations are done by the following escape sequences:
2604    ----------------------------------------------------------------------
2605    escape sequence      description
2606    ----------------------------------------------------------------------
2607    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2608    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2609    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2610    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2611    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2612    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2613    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2614    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2615    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2616    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2617    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2618    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2619    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2620    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2621    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2622    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2623    ----------------------------------------------------------------------
2624
2625    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2626    of dimension 1, chars 94, and final character <F>, etc...
2627
2628    Note (*): Although these designations are not allowed in ISO2022,
2629    Emacs accepts them on decoding, and produces them on encoding
2630    CHARS96 character sets in a coding system which is characterized as
2631    7-bit environment, non-locking-shift, and non-single-shift.
2632
2633    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2634    '(' must be omitted.  We refer to this as "short-form" hereafter.
2635
2636    Now you may notice that there are a lot of ways of encoding the
2637    same multilingual text in ISO2022.  Actually, there exist many
2638    coding systems such as Compound Text (used in X11's inter client
2639    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2640    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2641    localized platforms), and all of these are variants of ISO2022.
2642
2643    In addition to the above, Emacs handles two more kinds of escape
2644    sequences: ISO6429's direction specification and Emacs' private
2645    sequence for specifying character composition.
2646
2647    ISO6429's direction specification takes the following form:
2648         o CSI ']'      -- end of the current direction
2649         o CSI '0' ']'  -- end of the current direction
2650         o CSI '1' ']'  -- start of left-to-right text
2651         o CSI '2' ']'  -- start of right-to-left text
2652    The control character CSI (0x9B: control sequence introducer) is
2653    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2654
2655    Character composition specification takes the following form:
2656         o ESC '0' -- start relative composition
2657         o ESC '1' -- end composition
2658         o ESC '2' -- start rule-base composition (*)
2659         o ESC '3' -- start relative composition with alternate chars  (**)
2660         o ESC '4' -- start rule-base composition with alternate chars  (**)
2661   Since these are not standard escape sequences of any ISO standard,
2662   the use of them with these meanings is restricted to Emacs only.
2663
2664   (*) This form is used only in Emacs 20.7 and older versions,
2665   but newer versions can safely decode it.
2666   (**) This form is used only in Emacs 21.1 and newer versions,
2667   and older versions can't decode it.
2668
2669   Here's a list of example usages of these composition escape
2670   sequences (categorized by `enum composition_method').
2671
2672   COMPOSITION_RELATIVE:
2673         ESC 0 CHAR [ CHAR ] ESC 1
2674   COMPOSITION_WITH_RULE:
2675         ESC 2 CHAR [ RULE CHAR ] ESC 1
2676   COMPOSITION_WITH_ALTCHARS:
2677         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2678   COMPOSITION_WITH_RULE_ALTCHARS:
2679         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2680
2681 enum iso_code_class_type iso_code_class[256];
2682
2683 #define SAFE_CHARSET_P(coding, id)      \
2684   ((id) <= (coding)->max_charset_id     \
2685    && (coding)->safe_charsets[id] >= 0)
2686
2687
2688 #define SHIFT_OUT_OK(category)  \
2689   (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2690
2691 static void
2692 setup_iso_safe_charsets (attrs)
2693      Lisp_Object attrs;
2694 {
2695   Lisp_Object charset_list, safe_charsets;
2696   Lisp_Object request;
2697   Lisp_Object reg_usage;
2698   Lisp_Object tail;
2699   int reg94, reg96;
2700   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2701   int max_charset_id;
2702
2703   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2704   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2705       && ! EQ (charset_list, Viso_2022_charset_list))
2706     {
2707       CODING_ATTR_CHARSET_LIST (attrs)
2708         = charset_list = Viso_2022_charset_list;
2709       ASET (attrs, coding_attr_safe_charsets, Qnil);
2710     }
2711
2712   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2713     return;
2714
2715   max_charset_id = 0;
2716   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2717     {
2718       int id = XINT (XCAR (tail));
2719       if (max_charset_id < id)
2720         max_charset_id = id;
2721     }
2722
2723   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2724                                 make_number (255));
2725   request = AREF (attrs, coding_attr_iso_request);
2726   reg_usage = AREF (attrs, coding_attr_iso_usage);
2727   reg94 = XINT (XCAR (reg_usage));
2728   reg96 = XINT (XCDR (reg_usage));
2729
2730   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2731     {
2732       Lisp_Object id;
2733       Lisp_Object reg;
2734       struct charset *charset;
2735
2736       id = XCAR (tail);
2737       charset = CHARSET_FROM_ID (XINT (id));
2738       reg = Fcdr (Fassq (id, request));
2739       if (! NILP (reg))
2740         SSET (safe_charsets, XINT (id), XINT (reg));
2741       else if (charset->iso_chars_96)
2742         {
2743           if (reg96 < 4)
2744             SSET (safe_charsets, XINT (id), reg96);
2745         }
2746       else
2747         {
2748           if (reg94 < 4)
2749             SSET (safe_charsets, XINT (id), reg94);
2750         }
2751     }
2752   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2753 }
2754
2755
2756 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2757    Check if a text is encoded in one of ISO-2022 based codig systems.
2758    If it is, return 1, else return 0.  */
2759
2760 static int
2761 detect_coding_iso_2022 (coding, detect_info)
2762      struct coding_system *coding;
2763      struct coding_detection_info *detect_info;
2764 {
2765   const unsigned char *src = coding->source, *src_base = src;
2766   const unsigned char *src_end = coding->source + coding->src_bytes;
2767   int multibytep = coding->src_multibyte;
2768   int single_shifting = 0;
2769   int id;
2770   int c, c1;
2771   int consumed_chars = 0;
2772   int i;
2773   int rejected = 0;
2774   int found = 0;
2775   int composition_count = -1;
2776
2777   detect_info->checked |= CATEGORY_MASK_ISO;
2778
2779   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2780     {
2781       struct coding_system *this = &(coding_categories[i]);
2782       Lisp_Object attrs, val;
2783
2784       if (this->id < 0)
2785         continue;
2786       attrs = CODING_ID_ATTRS (this->id);
2787       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2788           && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2789         setup_iso_safe_charsets (attrs);
2790       val = CODING_ATTR_SAFE_CHARSETS (attrs);
2791       this->max_charset_id = SCHARS (val) - 1;
2792       this->safe_charsets = (char *) SDATA (val);
2793     }
2794
2795   /* A coding system of this category is always ASCII compatible.  */
2796   src += coding->head_ascii;
2797
2798   while (rejected != CATEGORY_MASK_ISO)
2799     {
2800       src_base = src;
2801       ONE_MORE_BYTE (c);
2802       switch (c)
2803         {
2804         case ISO_CODE_ESC:
2805           if (inhibit_iso_escape_detection)
2806             break;
2807           single_shifting = 0;
2808           ONE_MORE_BYTE (c);
2809           if (c >= '(' && c <= '/')
2810             {
2811               /* Designation sequence for a charset of dimension 1.  */
2812               ONE_MORE_BYTE (c1);
2813               if (c1 < ' ' || c1 >= 0x80
2814                   || (id = iso_charset_table[0][c >= ','][c1]) < 0)
2815                 /* Invalid designation sequence.  Just ignore.  */
2816                 break;
2817             }
2818           else if (c == '$')
2819             {
2820               /* Designation sequence for a charset of dimension 2.  */
2821               ONE_MORE_BYTE (c);
2822               if (c >= '@' && c <= 'B')
2823                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
2824                 id = iso_charset_table[1][0][c];
2825               else if (c >= '(' && c <= '/')
2826                 {
2827                   ONE_MORE_BYTE (c1);
2828                   if (c1 < ' ' || c1 >= 0x80
2829                       || (id = iso_charset_table[1][c >= ','][c1]) < 0)
2830                     /* Invalid designation sequence.  Just ignore.  */
2831                     break;
2832                 }
2833               else
2834                 /* Invalid designation sequence.  Just ignore it.  */
2835                 break;
2836             }
2837           else if (c == 'N' || c == 'O')
2838             {
2839               /* ESC <Fe> for SS2 or SS3.  */
2840               single_shifting = 1;
2841               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2842               break;
2843             }
2844           else if (c == '1')
2845             {
2846               /* End of composition.  */
2847               if (composition_count < 0
2848                   || composition_count > MAX_COMPOSITION_COMPONENTS)
2849                 /* Invalid */
2850                 break;
2851               composition_count = -1;
2852               found |= CATEGORY_MASK_ISO;
2853             }
2854           else if (c >= '0' && c <= '4')
2855             {
2856               /* ESC <Fp> for start/end composition.  */
2857               composition_count = 0;
2858               break;
2859             }
2860           else
2861             {
2862               /* Invalid escape sequence.  Just ignore it.  */
2863               break;
2864             }
2865
2866           /* We found a valid designation sequence for CHARSET.  */
2867           rejected |= CATEGORY_MASK_ISO_8BIT;
2868           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2869                               id))
2870             found |= CATEGORY_MASK_ISO_7;
2871           else
2872             rejected |= CATEGORY_MASK_ISO_7;
2873           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2874                               id))
2875             found |= CATEGORY_MASK_ISO_7_TIGHT;
2876           else
2877             rejected |= CATEGORY_MASK_ISO_7_TIGHT;
2878           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2879                               id))
2880             found |= CATEGORY_MASK_ISO_7_ELSE;
2881           else
2882             rejected |= CATEGORY_MASK_ISO_7_ELSE;
2883           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2884                               id))
2885             found |= CATEGORY_MASK_ISO_8_ELSE;
2886           else
2887             rejected |= CATEGORY_MASK_ISO_8_ELSE;
2888           break;
2889
2890         case ISO_CODE_SO:
2891         case ISO_CODE_SI:
2892           /* Locking shift out/in.  */
2893           if (inhibit_iso_escape_detection)
2894             break;
2895           single_shifting = 0;
2896           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2897           break;
2898
2899         case ISO_CODE_CSI:
2900           /* Control sequence introducer.  */
2901           single_shifting = 0;
2902           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2903           found |= CATEGORY_MASK_ISO_8_ELSE;
2904           goto check_extra_latin;
2905
2906         case ISO_CODE_SS2:
2907         case ISO_CODE_SS3:
2908           /* Single shift.   */
2909           if (inhibit_iso_escape_detection)
2910             break;
2911           single_shifting = 0;
2912           rejected |= CATEGORY_MASK_ISO_7BIT;
2913           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2914               & CODING_ISO_FLAG_SINGLE_SHIFT)
2915             found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
2916           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2917               & CODING_ISO_FLAG_SINGLE_SHIFT)
2918             found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
2919           if (single_shifting)
2920             break;
2921           goto check_extra_latin;
2922
2923         default:
2924           if (c < 0)
2925             continue;
2926           if (c < 0x80)
2927             {
2928               if (composition_count >= 0)
2929                 composition_count++;
2930               single_shifting = 0;
2931               break;
2932             }
2933           if (c >= 0xA0)
2934             {
2935               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2936               found |= CATEGORY_MASK_ISO_8_1;
2937               /* Check the length of succeeding codes of the range
2938                  0xA0..0FF.  If the byte length is even, we include
2939                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
2940                  only when we are not single shifting.  */
2941               if (! single_shifting
2942                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
2943                 {
2944                   int i = 1;
2945                   while (src < src_end)
2946                     {
2947                       ONE_MORE_BYTE (c);
2948                       if (c < 0xA0)
2949                         break;
2950                       i++;
2951                     }
2952
2953                   if (i & 1 && src < src_end)
2954                     {
2955                       rejected |= CATEGORY_MASK_ISO_8_2;
2956                       if (composition_count >= 0)
2957                         composition_count += i;
2958                     }
2959                   else
2960                     {
2961                       found |= CATEGORY_MASK_ISO_8_2;
2962                       if (composition_count >= 0)
2963                         composition_count += i / 2;
2964                     }
2965                 }
2966               break;
2967             }
2968         check_extra_latin:
2969           single_shifting = 0;
2970           if (! VECTORP (Vlatin_extra_code_table)
2971               || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2972             {
2973               rejected = CATEGORY_MASK_ISO;
2974               break;
2975             }
2976           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2977               & CODING_ISO_FLAG_LATIN_EXTRA)
2978             found |= CATEGORY_MASK_ISO_8_1;
2979           else
2980             rejected |= CATEGORY_MASK_ISO_8_1;
2981           rejected |= CATEGORY_MASK_ISO_8_2;
2982         }
2983     }
2984   detect_info->rejected |= CATEGORY_MASK_ISO;
2985   return 0;
2986
2987  no_more_source:
2988   detect_info->rejected |= rejected;
2989   detect_info->found |= (found & ~rejected);
2990   return 1;
2991 }
2992
2993
2994 /* Set designation state into CODING.  Set CHARS_96 to -1 if the
2995    escape sequence should be kept.  */
2996 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
2997   do {                                                                  \
2998     int id, prev;                                                       \
2999                                                                         \
3000     if (final < '0' || final >= 128                                     \
3001         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
3002         || !SAFE_CHARSET_P (coding, id))                                \
3003       {                                                                 \
3004         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
3005         chars_96 = -1;                                                  \
3006         break;                                                          \
3007       }                                                                 \
3008     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
3009     if (id == charset_jisx0201_roman)                                   \
3010       {                                                                 \
3011         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
3012           id = charset_ascii;                                           \
3013       }                                                                 \
3014     else if (id == charset_jisx0208_1978)                               \
3015       {                                                                 \
3016         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
3017           id = charset_jisx0208;                                        \
3018       }                                                                 \
3019     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
3020     /* If there was an invalid designation to REG previously, and this  \
3021        designation is ASCII to REG, we should keep this designation     \
3022        sequence.  */                                                    \
3023     if (prev == -2 && id == charset_ascii)                              \
3024       chars_96 = -1;                                                    \
3025   } while (0)
3026
3027
3028 #define MAYBE_FINISH_COMPOSITION()                              \
3029   do {                                                          \
3030     int i;                                                      \
3031     if (composition_state == COMPOSING_NO)                      \
3032       break;                                                    \
3033     /* It is assured that we have enough room for producing     \
3034        characters stored in the table `components'.  */         \
3035     if (charbuf + component_idx > charbuf_end)                  \
3036       goto no_more_source;                                      \
3037     composition_state = COMPOSING_NO;                           \
3038     if (method == COMPOSITION_RELATIVE                          \
3039         || method == COMPOSITION_WITH_ALTCHARS)                 \
3040       {                                                         \
3041         for (i = 0; i < component_idx; i++)                     \
3042           *charbuf++ = components[i];                           \
3043         char_offset += component_idx;                           \
3044       }                                                         \
3045     else                                                        \
3046       {                                                         \
3047         for (i = 0; i < component_idx; i += 2)                  \
3048           *charbuf++ = components[i];                           \
3049         char_offset += (component_idx / 2) + 1;                 \
3050       }                                                         \
3051   } while (0)
3052
3053
3054 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
3055    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
3056    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3057    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
3058    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
3059   */
3060
3061 #define DECODE_COMPOSITION_START(c1)                                    \
3062   do {                                                                  \
3063     if (c1 == '0'                                                       \
3064         && composition_state == COMPOSING_COMPONENT_RULE)               \
3065       {                                                                 \
3066         component_len = component_idx;                                  \
3067         composition_state = COMPOSING_CHAR;                             \
3068       }                                                                 \
3069     else                                                                \
3070       {                                                                 \
3071         const unsigned char *p;                                         \
3072                                                                         \
3073         MAYBE_FINISH_COMPOSITION ();                                    \
3074         if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end)         \
3075           goto no_more_source;                                          \
3076         for (p = src; p < src_end - 1; p++)                             \
3077           if (*p == ISO_CODE_ESC && p[1] == '1')                        \
3078             break;                                                      \
3079         if (p == src_end - 1)                                           \
3080           {                                                             \
3081             if (coding->mode & CODING_MODE_LAST_BLOCK)                  \
3082               goto invalid_code;                                        \
3083             /* The current composition doesn't end in the current       \
3084                source.  */                                              \
3085             record_conversion_result                                    \
3086               (coding, CODING_RESULT_INSUFFICIENT_SRC);                 \
3087             goto no_more_source;                                        \
3088           }                                                             \
3089                                                                         \
3090         /* This is surely the start of a composition.  */               \
3091         method = (c1 == '0' ? COMPOSITION_RELATIVE                      \
3092                   : c1 == '2' ? COMPOSITION_WITH_RULE                   \
3093                   : c1 == '3' ? COMPOSITION_WITH_ALTCHARS               \
3094                   : COMPOSITION_WITH_RULE_ALTCHARS);                    \
3095         composition_state = (c1 <= '2' ? COMPOSING_CHAR                 \
3096                              : COMPOSING_COMPONENT_CHAR);               \
3097         component_idx = component_len = 0;                              \
3098       }                                                                 \
3099   } while (0)
3100
3101
3102 /* Handle compositoin end sequence ESC 1.  */
3103
3104 #define DECODE_COMPOSITION_END()                                        \
3105   do {                                                                  \
3106     int nchars = (component_len > 0 ? component_idx - component_len     \
3107                   : method == COMPOSITION_RELATIVE ? component_idx      \
3108                   : (component_idx + 1) / 2);                           \
3109     int i;                                                              \
3110     int *saved_charbuf = charbuf;                                       \
3111                                                                         \
3112     ADD_COMPOSITION_DATA (charbuf, nchars, method);                     \
3113     if (method != COMPOSITION_RELATIVE)                                 \
3114       {                                                                 \
3115         if (component_len == 0)                                         \
3116           for (i = 0; i < component_idx; i++)                           \
3117             *charbuf++ = components[i];                                 \
3118         else                                                            \
3119           for (i = 0; i < component_len; i++)                           \
3120             *charbuf++ = components[i];                                 \
3121         *saved_charbuf = saved_charbuf - charbuf;                       \
3122       }                                                                 \
3123     if (method == COMPOSITION_WITH_RULE)                                \
3124       for (i = 0; i < component_idx; i += 2, char_offset++)             \
3125         *charbuf++ = components[i];                                     \
3126     else                                                                \
3127       for (i = component_len; i < component_idx; i++, char_offset++)    \
3128         *charbuf++ = components[i];                                     \
3129     coding->annotated = 1;                                              \
3130     composition_state = COMPOSING_NO;                                   \
3131   } while (0)
3132
3133
3134 /* Decode a composition rule from the byte C1 (and maybe one more byte
3135    from SRC) and store one encoded composition rule in
3136    coding->cmp_data.  */
3137
3138 #define DECODE_COMPOSITION_RULE(c1)                                     \
3139   do {                                                                  \
3140     (c1) -= 32;                                                         \
3141     if (c1 < 81)                /* old format (before ver.21) */        \
3142       {                                                                 \
3143         int gref = (c1) / 9;                                            \
3144         int nref = (c1) % 9;                                            \
3145         if (gref == 4) gref = 10;                                       \
3146         if (nref == 4) nref = 10;                                       \
3147         c1 = COMPOSITION_ENCODE_RULE (gref, nref);                      \
3148       }                                                                 \
3149     else if (c1 < 93)           /* new format (after ver.21) */         \
3150       {                                                                 \
3151         ONE_MORE_BYTE (c2);                                             \
3152         c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);                \
3153       }                                                                 \
3154     else                                                                \
3155       c1 = 0;                                                           \
3156   } while (0)
3157
3158
3159 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
3160
3161 static void
3162 decode_coding_iso_2022 (coding)
3163      struct coding_system *coding;
3164 {
3165   const unsigned char *src = coding->source + coding->consumed;
3166   const unsigned char *src_end = coding->source + coding->src_bytes;
3167   const unsigned char *src_base;
3168   int *charbuf = coding->charbuf + coding->charbuf_used;
3169   int *charbuf_end
3170     = coding->charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
3171   int consumed_chars = 0, consumed_chars_base;
3172   int multibytep = coding->src_multibyte;
3173   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
3174   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3175   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3176   int charset_id_2, charset_id_3;
3177   struct charset *charset;
3178   int c;
3179   /* For handling composition sequence.  */
3180 #define COMPOSING_NO                    0
3181 #define COMPOSING_CHAR                  1
3182 #define COMPOSING_RULE                  2
3183 #define COMPOSING_COMPONENT_CHAR        3
3184 #define COMPOSING_COMPONENT_RULE        4
3185
3186   int composition_state = COMPOSING_NO;
3187   enum composition_method method;
3188   int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
3189   int component_idx;
3190   int component_len;
3191   Lisp_Object attrs, charset_list;
3192   int char_offset = coding->produced_char;
3193   int last_offset = char_offset;
3194   int last_id = charset_ascii;
3195   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
3196   int byte_after_cr = -1;
3197
3198   CODING_GET_INFO (coding, attrs, charset_list);
3199   setup_iso_safe_charsets (attrs);
3200   /* Charset list may have been changed.  */
3201   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3202   coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
3203
3204   while (1)
3205     {
3206       int c1, c2;
3207
3208       src_base = src;
3209       consumed_chars_base = consumed_chars;
3210
3211       if (charbuf >= charbuf_end)
3212         {
3213           if (byte_after_cr >= 0)
3214             src_base--;
3215           break;
3216         }
3217
3218       if (byte_after_cr >= 0)
3219         c1 = byte_after_cr, byte_after_cr = -1;
3220       else
3221         ONE_MORE_BYTE (c1);
3222       if (c1 < 0)
3223         goto invalid_code;
3224
3225       /* We produce at most one character.  */
3226       switch (iso_code_class [c1])
3227         {
3228         case ISO_0x20_or_0x7F:
3229           if (composition_state != COMPOSING_NO)
3230             {
3231               if (composition_state == COMPOSING_RULE
3232                   || composition_state == COMPOSING_COMPONENT_RULE)
3233                 {
3234                   if (component_idx < MAX_COMPOSITION_COMPONENTS * 2 + 1)
3235                     {
3236                       DECODE_COMPOSITION_RULE (c1);
3237                       components[component_idx++] = c1;
3238                       composition_state--;
3239                       continue;
3240                     }
3241                   /* Too long composition.  */
3242                   MAYBE_FINISH_COMPOSITION ();
3243                 }
3244             }
3245           if (charset_id_0 < 0
3246               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
3247             /* This is SPACE or DEL.  */
3248             charset = CHARSET_FROM_ID (charset_ascii);
3249           else
3250             charset = CHARSET_FROM_ID (charset_id_0);
3251           break;
3252
3253         case ISO_graphic_plane_0:
3254           if (composition_state != COMPOSING_NO)
3255             {
3256               if (composition_state == COMPOSING_RULE
3257                   || composition_state == COMPOSING_COMPONENT_RULE)
3258                 {
3259                   if (component_idx < MAX_COMPOSITION_COMPONENTS * 2 + 1)
3260                     {
3261                       DECODE_COMPOSITION_RULE (c1);
3262                       components[component_idx++] = c1;
3263                       composition_state--;
3264                       continue;
3265                     }
3266                   MAYBE_FINISH_COMPOSITION ();
3267                 }
3268             }
3269           if (charset_id_0 < 0)
3270             charset = CHARSET_FROM_ID (charset_ascii);
3271           else
3272             charset = CHARSET_FROM_ID (charset_id_0);
3273           break;
3274
3275         case ISO_0xA0_or_0xFF:
3276           if (charset_id_1 < 0
3277               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
3278               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3279             goto invalid_code;
3280           /* This is a graphic character, we fall down ... */
3281
3282         case ISO_graphic_plane_1:
3283           if (charset_id_1 < 0)
3284             goto invalid_code;
3285           charset = CHARSET_FROM_ID (charset_id_1);
3286           break;
3287
3288         case ISO_control_0:
3289           if (eol_crlf && c1 == '\r')
3290             ONE_MORE_BYTE (byte_after_cr);
3291           MAYBE_FINISH_COMPOSITION ();
3292           charset = CHARSET_FROM_ID (charset_ascii);
3293           break;
3294
3295         case ISO_control_1:
3296           MAYBE_FINISH_COMPOSITION ();
3297           goto invalid_code;
3298
3299         case ISO_shift_out:
3300           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3301               || CODING_ISO_DESIGNATION (coding, 1) < 0)
3302             goto invalid_code;
3303           CODING_ISO_INVOCATION (coding, 0) = 1;
3304           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3305           continue;
3306
3307         case ISO_shift_in:
3308           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3309             goto invalid_code;
3310           CODING_ISO_INVOCATION (coding, 0) = 0;
3311           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3312           continue;
3313
3314         case ISO_single_shift_2_7:
3315         case ISO_single_shift_2:
3316           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3317             goto invalid_code;
3318           /* SS2 is handled as an escape sequence of ESC 'N' */
3319           c1 = 'N';
3320           goto label_escape_sequence;
3321
3322         case ISO_single_shift_3:
3323           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3324             goto invalid_code;
3325           /* SS2 is handled as an escape sequence of ESC 'O' */
3326           c1 = 'O';
3327           goto label_escape_sequence;
3328
3329         case ISO_control_sequence_introducer:
3330           /* CSI is handled as an escape sequence of ESC '[' ...  */
3331           c1 = '[';
3332           goto label_escape_sequence;
3333
3334         case ISO_escape:
3335           ONE_MORE_BYTE (c1);
3336         label_escape_sequence:
3337           /* Escape sequences handled here are invocation,
3338              designation, direction specification, and character
3339              composition specification.  */
3340           switch (c1)
3341             {
3342             case '&':           /* revision of following character set */
3343               ONE_MORE_BYTE (c1);
3344               if (!(c1 >= '@' && c1 <= '~'))
3345                 goto invalid_code;
3346               ONE_MORE_BYTE (c1);
3347               if (c1 != ISO_CODE_ESC)
3348                 goto invalid_code;
3349               ONE_MORE_BYTE (c1);
3350               goto label_escape_sequence;
3351
3352             case '$':           /* designation of 2-byte character set */
3353               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3354                 goto invalid_code;
3355               {
3356                 int reg, chars96;
3357
3358                 ONE_MORE_BYTE (c1);
3359                 if (c1 >= '@' && c1 <= 'B')
3360                   {     /* designation of JISX0208.1978, GB2312.1980,
3361                            or JISX0208.1980 */
3362                     reg = 0, chars96 = 0;
3363                   }
3364                 else if (c1 >= 0x28 && c1 <= 0x2B)
3365                   { /* designation of DIMENSION2_CHARS94 character set */
3366                     reg = c1 - 0x28, chars96 = 0;
3367                     ONE_MORE_BYTE (c1);
3368                   }
3369                 else if (c1 >= 0x2C && c1 <= 0x2F)
3370                   { /* designation of DIMENSION2_CHARS96 character set */
3371                     reg = c1 - 0x2C, chars96 = 1;
3372                     ONE_MORE_BYTE (c1);
3373                   }
3374                 else
3375                   goto invalid_code;
3376                 DECODE_DESIGNATION (reg, 2, chars96, c1);
3377                 /* We must update these variables now.  */
3378                 if (reg == 0)
3379                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3380                 else if (reg == 1)
3381                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3382                 if (chars96 < 0)
3383                   goto invalid_code;
3384               }
3385               continue;
3386
3387             case 'n':           /* invocation of locking-shift-2 */
3388               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3389                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3390                 goto invalid_code;
3391               CODING_ISO_INVOCATION (coding, 0) = 2;
3392               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3393               continue;
3394
3395             case 'o':           /* invocation of locking-shift-3 */
3396               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3397                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3398                 goto invalid_code;
3399               CODING_ISO_INVOCATION (coding, 0) = 3;
3400               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3401               continue;
3402
3403             case 'N':           /* invocation of single-shift-2 */
3404               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3405                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3406                 goto invalid_code;
3407               charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3408               if (charset_id_2 < 0)
3409                 charset = CHARSET_FROM_ID (charset_ascii);
3410               else
3411                 charset = CHARSET_FROM_ID (charset_id_2);
3412               ONE_MORE_BYTE (c1);
3413               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3414                 goto invalid_code;
3415               break;
3416
3417             case 'O':           /* invocation of single-shift-3 */
3418               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3419                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3420                 goto invalid_code;
3421               charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3422               if (charset_id_3 < 0)
3423                 charset = CHARSET_FROM_ID (charset_ascii);
3424               else
3425                 charset = CHARSET_FROM_ID (charset_id_3);
3426               ONE_MORE_BYTE (c1);
3427               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3428                 goto invalid_code;
3429               break;
3430
3431             case '0': case '2': case '3': case '4': /* start composition */
3432               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3433                 goto invalid_code;
3434               DECODE_COMPOSITION_START (c1);
3435               continue;
3436
3437             case '1':           /* end composition */
3438               if (composition_state == COMPOSING_NO)
3439                 goto invalid_code;
3440               DECODE_COMPOSITION_END ();
3441               continue;
3442
3443             case '[':           /* specification of direction */
3444               if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3445                 goto invalid_code;
3446               /* For the moment, nested direction is not supported.
3447                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3448                  left-to-right, and nozero means right-to-left.  */
3449               ONE_MORE_BYTE (c1);
3450               switch (c1)
3451                 {
3452                 case ']':       /* end of the current direction */
3453                   coding->mode &= ~CODING_MODE_DIRECTION;
3454
3455                 case '0':       /* end of the current direction */
3456                 case '1':       /* start of left-to-right direction */
3457                   ONE_MORE_BYTE (c1);
3458                   if (c1 == ']')
3459                     coding->mode &= ~CODING_MODE_DIRECTION;
3460                   else
3461                     goto invalid_code;
3462                   break;
3463
3464                 case '2':       /* start of right-to-left direction */
3465                   ONE_MORE_BYTE (c1);
3466                   if (c1 == ']')
3467                     coding->mode |= CODING_MODE_DIRECTION;
3468                   else
3469                     goto invalid_code;
3470                   break;
3471
3472                 default:
3473                   goto invalid_code;
3474                 }
3475               continue;
3476
3477             case '%':
3478               ONE_MORE_BYTE (c1);
3479               if (c1 == '/')
3480                 {
3481                   /* CTEXT extended segment:
3482                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3483                      We keep these bytes as is for the moment.
3484                      They may be decoded by post-read-conversion.  */
3485                   int dim, M, L;
3486                   int size;
3487
3488                   ONE_MORE_BYTE (dim);
3489                   ONE_MORE_BYTE (M);
3490                   ONE_MORE_BYTE (L);
3491                   size = ((M - 128) * 128) + (L - 128);
3492                   if (charbuf + 8 + size > charbuf_end)
3493                     goto break_loop;
3494                   *charbuf++ = ISO_CODE_ESC;
3495                   *charbuf++ = '%';
3496                   *charbuf++ = '/';
3497                   *charbuf++ = dim;
3498                   *charbuf++ = BYTE8_TO_CHAR (M);
3499                   *charbuf++ = BYTE8_TO_CHAR (L);
3500                   while (size-- > 0)
3501                     {
3502                       ONE_MORE_BYTE (c1);
3503                       *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3504                     }
3505                 }
3506               else if (c1 == 'G')
3507                 {
3508                   /* XFree86 extension for embedding UTF-8 in CTEXT:
3509                      ESC % G --UTF-8-BYTES-- ESC % @
3510                      We keep these bytes as is for the moment.
3511                      They may be decoded by post-read-conversion.  */
3512                   int *p = charbuf;
3513
3514                   if (p + 6 > charbuf_end)
3515                     goto break_loop;
3516                   *p++ = ISO_CODE_ESC;
3517                   *p++ = '%';
3518                   *p++ = 'G';
3519                   while (p < charbuf_end)
3520                     {
3521                       ONE_MORE_BYTE (c1);
3522                       if (c1 == ISO_CODE_ESC
3523                           && src + 1 < src_end
3524                           && src[0] == '%'
3525                           && src[1] == '@')
3526                         {
3527                           src += 2;
3528                           break;
3529                         }
3530                       *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3531                     }
3532                   if (p + 3 > charbuf_end)
3533                     goto break_loop;
3534                   *p++ = ISO_CODE_ESC;
3535                   *p++ = '%';
3536                   *p++ = '@';
3537                   charbuf = p;
3538                 }
3539               else
3540                 goto invalid_code;
3541               continue;
3542               break;
3543
3544             default:
3545               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3546                 goto invalid_code;
3547               {
3548                 int reg, chars96;
3549
3550                 if (c1 >= 0x28 && c1 <= 0x2B)
3551                   { /* designation of DIMENSION1_CHARS94 character set */
3552                     reg = c1 - 0x28, chars96 = 0;
3553                     ONE_MORE_BYTE (c1);
3554                   }
3555                 else if (c1 >= 0x2C && c1 <= 0x2F)
3556                   { /* designation of DIMENSION1_CHARS96 character set */
3557                     reg = c1 - 0x2C, chars96 = 1;
3558                     ONE_MORE_BYTE (c1);
3559                   }
3560                 else
3561                   goto invalid_code;
3562                 DECODE_DESIGNATION (reg, 1, chars96, c1);
3563                 /* We must update these variables now.  */
3564                 if (reg == 0)
3565                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3566                 else if (reg == 1)
3567                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3568                 if (chars96 < 0)
3569                   goto invalid_code;
3570               }
3571               continue;
3572             }
3573         }
3574
3575       if (charset->id != charset_ascii
3576           && last_id != charset->id)
3577         {
3578           if (last_id != charset_ascii)
3579             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3580           last_id = charset->id;
3581           last_offset = char_offset;
3582         }
3583
3584       /* Now we know CHARSET and 1st position code C1 of a character.
3585          Produce a decoded character while getting 2nd position code
3586          C2 if necessary.  */
3587       c1 &= 0x7F;
3588       if (CHARSET_DIMENSION (charset) > 1)
3589         {
3590           ONE_MORE_BYTE (c2);
3591           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3592             /* C2 is not in a valid range.  */
3593             goto invalid_code;
3594           c1 = (c1 << 8) | (c2 & 0x7F);
3595           if (CHARSET_DIMENSION (charset) > 2)
3596             {
3597               ONE_MORE_BYTE (c2);
3598               if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3599                 /* C2 is not in a valid range.  */
3600                 goto invalid_code;
3601               c1 = (c1 << 8) | (c2 & 0x7F);
3602             }
3603         }
3604
3605       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3606       if (c < 0)
3607         {
3608           MAYBE_FINISH_COMPOSITION ();
3609           for (; src_base < src; src_base++, char_offset++)
3610             {
3611               if (ASCII_BYTE_P (*src_base))
3612                 *charbuf++ = *src_base;
3613               else
3614                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3615             }
3616         }
3617       else if (composition_state == COMPOSING_NO)
3618         {
3619           *charbuf++ = c;
3620           char_offset++;
3621         }
3622       else
3623         {
3624           if (component_idx < MAX_COMPOSITION_COMPONENTS * 2 + 1)
3625             {
3626               components[component_idx++] = c;
3627               if (method == COMPOSITION_WITH_RULE
3628                   || (method == COMPOSITION_WITH_RULE_ALTCHARS
3629                       && composition_state == COMPOSING_COMPONENT_CHAR))
3630                 composition_state++;
3631             }
3632           else
3633             {
3634               MAYBE_FINISH_COMPOSITION ();
3635               *charbuf++ = c;
3636               char_offset++;
3637             }
3638         }
3639       continue;
3640
3641     invalid_code:
3642       MAYBE_FINISH_COMPOSITION ();
3643       src = src_base;
3644       consumed_chars = consumed_chars_base;
3645       ONE_MORE_BYTE (c);
3646       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3647       char_offset++;
3648       coding->errors++;
3649       continue;
3650
3651     break_loop:
3652       break;
3653     }
3654
3655  no_more_source:
3656   if (last_id != charset_ascii)
3657     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3658   coding->consumed_char += consumed_chars_base;
3659   coding->consumed = src_base - coding->source;
3660   coding->charbuf_used = charbuf - coding->charbuf;
3661 }
3662
3663
3664 /* ISO2022 encoding stuff.  */
3665
3666 /*
3667    It is not enough to say just "ISO2022" on encoding, we have to
3668    specify more details.  In Emacs, each coding system of ISO2022
3669    variant has the following specifications:
3670         1. Initial designation to G0 thru G3.
3671         2. Allows short-form designation?
3672         3. ASCII should be designated to G0 before control characters?
3673         4. ASCII should be designated to G0 at end of line?
3674         5. 7-bit environment or 8-bit environment?
3675         6. Use locking-shift?
3676         7. Use Single-shift?
3677    And the following two are only for Japanese:
3678         8. Use ASCII in place of JIS0201-1976-Roman?
3679         9. Use JISX0208-1983 in place of JISX0208-1978?
3680    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3681    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
3682    details.
3683 */
3684
3685 /* Produce codes (escape sequence) for designating CHARSET to graphic
3686    register REG at DST, and increment DST.  If <final-char> of CHARSET is
3687    '@', 'A', or 'B' and the coding system CODING allows, produce
3688    designation sequence of short-form.  */
3689
3690 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
3691   do {                                                                  \
3692     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
3693     char *intermediate_char_94 = "()*+";                                \
3694     char *intermediate_char_96 = ",-./";                                \
3695     int revision = -1;                                                  \
3696     int c;                                                              \
3697                                                                         \
3698     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
3699       revision = CHARSET_ISO_REVISION (charset);                        \
3700                                                                         \
3701     if (revision >= 0)                                                  \
3702       {                                                                 \
3703         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
3704         EMIT_ONE_BYTE ('@' + revision);                                 \
3705       }                                                                 \
3706     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
3707     if (CHARSET_DIMENSION (charset) == 1)                               \
3708       {                                                                 \
3709         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3710           c = intermediate_char_94[reg];                                \
3711         else                                                            \
3712           c = intermediate_char_96[reg];                                \
3713         EMIT_ONE_ASCII_BYTE (c);                                        \
3714       }                                                                 \
3715     else                                                                \
3716       {                                                                 \
3717         EMIT_ONE_ASCII_BYTE ('$');                                      \
3718         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3719           {                                                             \
3720             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
3721                 || reg != 0                                             \
3722                 || final_char < '@' || final_char > 'B')                \
3723               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
3724           }                                                             \
3725         else                                                            \
3726           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
3727       }                                                                 \
3728     EMIT_ONE_ASCII_BYTE (final_char);                                   \
3729                                                                         \
3730     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
3731   } while (0)
3732
3733
3734 /* The following two macros produce codes (control character or escape
3735    sequence) for ISO2022 single-shift functions (single-shift-2 and
3736    single-shift-3).  */
3737
3738 #define ENCODE_SINGLE_SHIFT_2                                           \
3739   do {                                                                  \
3740     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3741       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
3742     else                                                                \
3743       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
3744     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3745   } while (0)
3746
3747
3748 #define ENCODE_SINGLE_SHIFT_3                                           \
3749   do {                                                                  \
3750     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3751       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
3752     else                                                                \
3753       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
3754     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3755   } while (0)
3756
3757
3758 /* The following four macros produce codes (control character or
3759    escape sequence) for ISO2022 locking-shift functions (shift-in,
3760    shift-out, locking-shift-2, and locking-shift-3).  */
3761
3762 #define ENCODE_SHIFT_IN                                 \
3763   do {                                                  \
3764     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
3765     CODING_ISO_INVOCATION (coding, 0) = 0;              \
3766   } while (0)
3767
3768
3769 #define ENCODE_SHIFT_OUT                                \
3770   do {                                                  \
3771     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
3772     CODING_ISO_INVOCATION (coding, 0) = 1;              \
3773   } while (0)
3774
3775
3776 #define ENCODE_LOCKING_SHIFT_2                          \
3777   do {                                                  \
3778     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3779     CODING_ISO_INVOCATION (coding, 0) = 2;              \
3780   } while (0)
3781
3782
3783 #define ENCODE_LOCKING_SHIFT_3                          \
3784   do {                                                  \
3785     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3786     CODING_ISO_INVOCATION (coding, 0) = 3;              \
3787   } while (0)
3788
3789
3790 /* Produce codes for a DIMENSION1 character whose character set is
3791    CHARSET and whose position-code is C1.  Designation and invocation
3792    sequences are also produced in advance if necessary.  */
3793
3794 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
3795   do {                                                                  \
3796     int id = CHARSET_ID (charset);                                      \
3797                                                                         \
3798     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
3799         && id == charset_ascii)                                         \
3800       {                                                                 \
3801         id = charset_jisx0201_roman;                                    \
3802         charset = CHARSET_FROM_ID (id);                                 \
3803       }                                                                 \
3804                                                                         \
3805     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3806       {                                                                 \
3807         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3808           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
3809         else                                                            \
3810           EMIT_ONE_BYTE (c1 | 0x80);                                    \
3811         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3812         break;                                                          \
3813       }                                                                 \
3814     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3815       {                                                                 \
3816         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
3817         break;                                                          \
3818       }                                                                 \
3819     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3820       {                                                                 \
3821         EMIT_ONE_BYTE (c1 | 0x80);                                      \
3822         break;                                                          \
3823       }                                                                 \
3824     else                                                                \
3825       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3826          must invoke it, or, at first, designate it to some graphic     \
3827          register.  Then repeat the loop to actually produce the        \
3828          character.  */                                                 \
3829       dst = encode_invocation_designation (charset, coding, dst,        \
3830                                            &produced_chars);            \
3831   } while (1)
3832
3833
3834 /* Produce codes for a DIMENSION2 character whose character set is
3835    CHARSET and whose position-codes are C1 and C2.  Designation and
3836    invocation codes are also produced in advance if necessary.  */
3837
3838 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
3839   do {                                                                  \
3840     int id = CHARSET_ID (charset);                                      \
3841                                                                         \
3842     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
3843         && id == charset_jisx0208)                                      \
3844       {                                                                 \
3845         id = charset_jisx0208_1978;                                     \
3846         charset = CHARSET_FROM_ID (id);                                 \
3847       }                                                                 \
3848                                                                         \
3849     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3850       {                                                                 \
3851         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3852           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
3853         else                                                            \
3854           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
3855         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3856         break;                                                          \
3857       }                                                                 \
3858     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3859       {                                                                 \
3860         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
3861         break;                                                          \
3862       }                                                                 \
3863     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3864       {                                                                 \
3865         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
3866         break;                                                          \
3867       }                                                                 \
3868     else                                                                \
3869       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3870          must invoke it, or, at first, designate it to some graphic     \
3871          register.  Then repeat the loop to actually produce the        \
3872          character.  */                                                 \
3873       dst = encode_invocation_designation (charset, coding, dst,        \
3874                                            &produced_chars);            \
3875   } while (1)
3876
3877
3878 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
3879   do {                                                                     \
3880     int code = ENCODE_CHAR ((charset),(c));                                \
3881                                                                            \
3882     if (CHARSET_DIMENSION (charset) == 1)                                  \
3883       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
3884     else                                                                   \
3885       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3886   } while (0)
3887
3888
3889 /* Produce designation and invocation codes at a place pointed by DST
3890    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
3891    Return new DST.  */
3892
3893 unsigned char *
3894 encode_invocation_designation (charset, coding, dst, p_nchars)
3895      struct charset *charset;
3896      struct coding_system *coding;
3897      unsigned char *dst;
3898      int *p_nchars;
3899 {
3900   int multibytep = coding->dst_multibyte;
3901   int produced_chars = *p_nchars;
3902   int reg;                      /* graphic register number */
3903   int id = CHARSET_ID (charset);
3904
3905   /* At first, check designations.  */
3906   for (reg = 0; reg < 4; reg++)
3907     if (id == CODING_ISO_DESIGNATION (coding, reg))
3908       break;
3909
3910   if (reg >= 4)
3911     {
3912       /* CHARSET is not yet designated to any graphic registers.  */
3913       /* At first check the requested designation.  */
3914       reg = CODING_ISO_REQUEST (coding, id);
3915       if (reg < 0)
3916         /* Since CHARSET requests no special designation, designate it
3917            to graphic register 0.  */
3918         reg = 0;
3919
3920       ENCODE_DESIGNATION (charset, reg, coding);
3921     }
3922
3923   if (CODING_ISO_INVOCATION (coding, 0) != reg
3924       && CODING_ISO_INVOCATION (coding, 1) != reg)
3925     {
3926       /* Since the graphic register REG is not invoked to any graphic
3927          planes, invoke it to graphic plane 0.  */
3928       switch (reg)
3929         {
3930         case 0:                 /* graphic register 0 */
3931           ENCODE_SHIFT_IN;
3932           break;
3933
3934         case 1:                 /* graphic register 1 */
3935           ENCODE_SHIFT_OUT;
3936           break;
3937
3938         case 2:                 /* graphic register 2 */
3939           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3940             ENCODE_SINGLE_SHIFT_2;
3941           else
3942             ENCODE_LOCKING_SHIFT_2;
3943           break;
3944
3945         case 3:                 /* graphic register 3 */
3946           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3947             ENCODE_SINGLE_SHIFT_3;
3948           else
3949             ENCODE_LOCKING_SHIFT_3;
3950           break;
3951         }
3952     }
3953
3954   *p_nchars = produced_chars;
3955   return dst;
3956 }
3957
3958 /* The following three macros produce codes for indicating direction
3959    of text.  */
3960 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER                              \
3961   do {                                                                  \
3962     if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS)        \
3963       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '[');                         \
3964     else                                                                \
3965       EMIT_ONE_BYTE (ISO_CODE_CSI);                                     \
3966   } while (0)
3967
3968
3969 #define ENCODE_DIRECTION_R2L()                  \
3970   do {                                          \
3971     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3972     EMIT_TWO_ASCII_BYTES ('2', ']');            \
3973   } while (0)
3974
3975
3976 #define ENCODE_DIRECTION_L2R()                  \
3977   do {                                          \
3978     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3979     EMIT_TWO_ASCII_BYTES ('0', ']');            \
3980   } while (0)
3981
3982
3983 /* Produce codes for designation and invocation to reset the graphic
3984    planes and registers to initial state.  */
3985 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
3986   do {                                                                  \
3987     int reg;                                                            \
3988     struct charset *charset;                                            \
3989                                                                         \
3990     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
3991       ENCODE_SHIFT_IN;                                                  \
3992     for (reg = 0; reg < 4; reg++)                                       \
3993       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
3994           && (CODING_ISO_DESIGNATION (coding, reg)                      \
3995               != CODING_ISO_INITIAL (coding, reg)))                     \
3996         {                                                               \
3997           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3998           ENCODE_DESIGNATION (charset, reg, coding);                    \
3999         }                                                               \
4000   } while (0)
4001
4002
4003 /* Produce designation sequences of charsets in the line started from
4004    SRC to a place pointed by DST, and return updated DST.
4005
4006    If the current block ends before any end-of-line, we may fail to
4007    find all the necessary designations.  */
4008
4009 static unsigned char *
4010 encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
4011      struct coding_system *coding;
4012      int *charbuf, *charbuf_end;
4013      unsigned char *dst;
4014 {
4015   struct charset *charset;
4016   /* Table of charsets to be designated to each graphic register.  */
4017   int r[4];
4018   int c, found = 0, reg;
4019   int produced_chars = 0;
4020   int multibytep = coding->dst_multibyte;
4021   Lisp_Object attrs;
4022   Lisp_Object charset_list;
4023
4024   attrs = CODING_ID_ATTRS (coding->id);
4025   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4026   if (EQ (charset_list, Qiso_2022))
4027     charset_list = Viso_2022_charset_list;
4028
4029   for (reg = 0; reg < 4; reg++)
4030     r[reg] = -1;
4031
4032   while (found < 4)
4033     {
4034       int id;
4035
4036       c = *charbuf++;
4037       if (c == '\n')
4038         break;
4039       charset = char_charset (c, charset_list, NULL);
4040       id = CHARSET_ID (charset);
4041       reg = CODING_ISO_REQUEST (coding, id);
4042       if (reg >= 0 && r[reg] < 0)
4043         {
4044           found++;
4045           r[reg] = id;
4046         }
4047     }
4048
4049   if (found)
4050     {
4051       for (reg = 0; reg < 4; reg++)
4052         if (r[reg] >= 0
4053             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
4054           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
4055     }
4056
4057   return dst;
4058 }
4059
4060 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
4061
4062 static int
4063 encode_coding_iso_2022 (coding)
4064      struct coding_system *coding;
4065 {
4066   int multibytep = coding->dst_multibyte;
4067   int *charbuf = coding->charbuf;
4068   int *charbuf_end = charbuf + coding->charbuf_used;
4069   unsigned char *dst = coding->destination + coding->produced;
4070   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4071   int safe_room = 16;
4072   int bol_designation
4073     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
4074        && CODING_ISO_BOL (coding));
4075   int produced_chars = 0;
4076   Lisp_Object attrs, eol_type, charset_list;
4077   int ascii_compatible;
4078   int c;
4079   int preferred_charset_id = -1;
4080
4081   CODING_GET_INFO (coding, attrs, charset_list);
4082   eol_type = CODING_ID_EOL_TYPE (coding->id);
4083   if (VECTORP (eol_type))
4084     eol_type = Qunix;
4085
4086   setup_iso_safe_charsets (attrs);
4087   /* Charset list may have been changed.  */
4088   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4089   coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
4090
4091   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4092
4093   while (charbuf < charbuf_end)
4094     {
4095       ASSURE_DESTINATION (safe_room);
4096
4097       if (bol_designation)
4098         {
4099           unsigned char *dst_prev = dst;
4100
4101           /* We have to produce designation sequences if any now.  */
4102           dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
4103           bol_designation = 0;
4104           /* We are sure that designation sequences are all ASCII bytes.  */
4105           produced_chars += dst - dst_prev;
4106         }
4107
4108       c = *charbuf++;
4109
4110       if (c < 0)
4111         {
4112           /* Handle an annotation.  */
4113           switch (*charbuf)
4114             {
4115             case CODING_ANNOTATE_COMPOSITION_MASK:
4116               /* Not yet implemented.  */
4117               break;
4118             case CODING_ANNOTATE_CHARSET_MASK:
4119               preferred_charset_id = charbuf[2];
4120               if (preferred_charset_id >= 0
4121                   && NILP (Fmemq (make_number (preferred_charset_id),
4122                                   charset_list)))
4123                 preferred_charset_id = -1;
4124               break;
4125             default:
4126               abort ();
4127             }
4128           charbuf += -c - 1;
4129           continue;
4130         }
4131
4132       /* Now encode the character C.  */
4133       if (c < 0x20 || c == 0x7F)
4134         {
4135           if (c == '\n'
4136               || (c == '\r' && EQ (eol_type, Qmac)))
4137             {
4138               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4139                 ENCODE_RESET_PLANE_AND_REGISTER ();
4140               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
4141                 {
4142                   int i;
4143
4144                   for (i = 0; i < 4; i++)
4145                     CODING_ISO_DESIGNATION (coding, i)
4146                       = CODING_ISO_INITIAL (coding, i);
4147                 }
4148               bol_designation
4149                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
4150             }
4151           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
4152             ENCODE_RESET_PLANE_AND_REGISTER ();
4153           EMIT_ONE_ASCII_BYTE (c);
4154         }
4155       else if (ASCII_CHAR_P (c))
4156         {
4157           if (ascii_compatible)
4158             EMIT_ONE_ASCII_BYTE (c);
4159           else
4160             {
4161               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
4162               ENCODE_ISO_CHARACTER (charset, c);
4163             }
4164         }
4165       else if (CHAR_BYTE8_P (c))
4166         {
4167           c = CHAR_TO_BYTE8 (c);
4168           EMIT_ONE_BYTE (c);
4169         }
4170       else
4171         {
4172           struct charset *charset;
4173
4174           if (preferred_charset_id >= 0)
4175             {
4176               charset = CHARSET_FROM_ID (preferred_charset_id);
4177               if (! CHAR_CHARSET_P (c, charset))
4178                 charset = char_charset (c, charset_list, NULL);
4179             }
4180           else
4181             charset = char_charset (c, charset_list, NULL);
4182           if (!charset)
4183             {
4184               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4185                 {
4186                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4187                   charset = CHARSET_FROM_ID (charset_ascii);
4188                 }
4189               else
4190                 {
4191                   c = coding->default_char;
4192                   charset = char_charset (c, charset_list, NULL);
4193                 }
4194             }
4195           ENCODE_ISO_CHARACTER (charset, c);
4196         }
4197     }
4198
4199   if (coding->mode & CODING_MODE_LAST_BLOCK
4200       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4201     {
4202       ASSURE_DESTINATION (safe_room);
4203       ENCODE_RESET_PLANE_AND_REGISTER ();
4204     }
4205   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4206   CODING_ISO_BOL (coding) = bol_designation;
4207   coding->produced_char += produced_chars;
4208   coding->produced = dst - coding->destination;
4209   return 0;
4210 }
4211
4212 \f
4213 /*** 8,9. SJIS and BIG5 handlers ***/
4214
4215 /* Although SJIS and BIG5 are not ISO's coding system, they are used
4216    quite widely.  So, for the moment, Emacs supports them in the bare
4217    C code.  But, in the future, they may be supported only by CCL.  */
4218
4219 /* SJIS is a coding system encoding three character sets: ASCII, right
4220    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
4221    as is.  A character of charset katakana-jisx0201 is encoded by
4222    "position-code + 0x80".  A character of charset japanese-jisx0208
4223    is encoded in 2-byte but two position-codes are divided and shifted
4224    so that it fit in the range below.
4225
4226    --- CODE RANGE of SJIS ---
4227    (character set)      (range)
4228    ASCII                0x00 .. 0x7F
4229    KATAKANA-JISX0201    0xA0 .. 0xDF
4230    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
4231             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
4232    -------------------------------
4233
4234 */
4235
4236 /* BIG5 is a coding system encoding two character sets: ASCII and
4237    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
4238    character set and is encoded in two-byte.
4239
4240    --- CODE RANGE of BIG5 ---
4241    (character set)      (range)
4242    ASCII                0x00 .. 0x7F
4243    Big5 (1st byte)      0xA1 .. 0xFE
4244         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
4245    --------------------------
4246
4247   */
4248
4249 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4250    Check if a text is encoded in SJIS.  If it is, return
4251    CATEGORY_MASK_SJIS, else return 0.  */
4252
4253 static int
4254 detect_coding_sjis (coding, detect_info)
4255      struct coding_system *coding;
4256      struct coding_detection_info *detect_info;
4257 {
4258   const unsigned char *src = coding->source, *src_base;
4259   const unsigned char *src_end = coding->source + coding->src_bytes;
4260   int multibytep = coding->src_multibyte;
4261   int consumed_chars = 0;
4262   int found = 0;
4263   int c;
4264
4265   detect_info->checked |= CATEGORY_MASK_SJIS;
4266   /* A coding system of this category is always ASCII compatible.  */
4267   src += coding->head_ascii;
4268
4269   while (1)
4270     {
4271       src_base = src;
4272       ONE_MORE_BYTE (c);
4273       if (c < 0x80)
4274         continue;
4275       if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
4276         {
4277           ONE_MORE_BYTE (c);
4278           if (c < 0x40 || c == 0x7F || c > 0xFC)
4279             break;
4280           found = CATEGORY_MASK_SJIS;
4281         }
4282       else if (c >= 0xA0 && c < 0xE0)
4283         found = CATEGORY_MASK_SJIS;
4284       else
4285         break;
4286     }
4287   detect_info->rejected |= CATEGORY_MASK_SJIS;
4288   return 0;
4289
4290  no_more_source:
4291   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4292     {
4293       detect_info->rejected |= CATEGORY_MASK_SJIS;
4294       return 0;
4295     }
4296   detect_info->found |= found;
4297   return 1;
4298 }
4299
4300 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4301    Check if a text is encoded in BIG5.  If it is, return
4302    CATEGORY_MASK_BIG5, else return 0.  */
4303
4304 static int
4305 detect_coding_big5 (coding, detect_info)
4306      struct coding_system *coding;
4307      struct coding_detection_info *detect_info;
4308 {
4309   const unsigned char *src = coding->source, *src_base;
4310   const unsigned char *src_end = coding->source + coding->src_bytes;
4311   int multibytep = coding->src_multibyte;
4312   int consumed_chars = 0;
4313   int found = 0;
4314   int c;
4315
4316   detect_info->checked |= CATEGORY_MASK_BIG5;
4317   /* A coding system of this category is always ASCII compatible.  */
4318   src += coding->head_ascii;
4319
4320   while (1)
4321     {
4322       src_base = src;
4323       ONE_MORE_BYTE (c);
4324       if (c < 0x80)
4325         continue;
4326       if (c >= 0xA1)
4327         {
4328           ONE_MORE_BYTE (c);
4329           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
4330             return 0;
4331           found = CATEGORY_MASK_BIG5;
4332         }
4333       else
4334         break;
4335     }
4336   detect_info->rejected |= CATEGORY_MASK_BIG5;
4337   return 0;
4338
4339  no_more_source:
4340   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4341     {
4342       detect_info->rejected |= CATEGORY_MASK_BIG5;
4343       return 0;
4344     }
4345   detect_info->found |= found;
4346   return 1;
4347 }
4348
4349 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4350    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
4351
4352 static void
4353 decode_coding_sjis (coding)
4354      struct coding_system *coding;
4355 {
4356   const unsigned char *src = coding->source + coding->consumed;
4357   const unsigned char *src_end = coding->source + coding->src_bytes;
4358   const unsigned char *src_base;
4359   int *charbuf = coding->charbuf + coding->charbuf_used;
4360   int *charbuf_end
4361     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4362   int consumed_chars = 0, consumed_chars_base;
4363   int multibytep = coding->src_multibyte;
4364   struct charset *charset_roman, *charset_kanji, *charset_kana;
4365   struct charset *charset_kanji2;
4366   Lisp_Object attrs, charset_list, val;
4367   int char_offset = coding->produced_char;
4368   int last_offset = char_offset;
4369   int last_id = charset_ascii;
4370   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4371   int byte_after_cr = -1;
4372
4373   CODING_GET_INFO (coding, attrs, charset_list);
4374
4375   val = charset_list;
4376   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4377   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4378   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4379   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4380
4381   while (1)
4382     {
4383       int c, c1;
4384       struct charset *charset;
4385
4386       src_base = src;
4387       consumed_chars_base = consumed_chars;
4388
4389       if (charbuf >= charbuf_end)
4390         {
4391           if (byte_after_cr >= 0)
4392             src_base--;
4393           break;
4394         }
4395
4396       if (byte_after_cr >= 0)
4397         c = byte_after_cr, byte_after_cr = -1;
4398       else
4399         ONE_MORE_BYTE (c);
4400       if (c < 0)
4401         goto invalid_code;
4402       if (c < 0x80)
4403         {
4404           if (eol_crlf && c == '\r')
4405             ONE_MORE_BYTE (byte_after_cr);
4406           charset = charset_roman;
4407         }
4408       else if (c == 0x80 || c == 0xA0)
4409         goto invalid_code;
4410       else if (c >= 0xA1 && c <= 0xDF)
4411         {
4412           /* SJIS -> JISX0201-Kana */
4413           c &= 0x7F;
4414           charset = charset_kana;
4415         }
4416       else if (c <= 0xEF)
4417         {
4418           /* SJIS -> JISX0208 */
4419           ONE_MORE_BYTE (c1);
4420           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4421             goto invalid_code;
4422           c = (c << 8) | c1;
4423           SJIS_TO_JIS (c);
4424           charset = charset_kanji;
4425         }
4426       else if (c <= 0xFC && charset_kanji2)
4427         {
4428           /* SJIS -> JISX0213-2 */
4429           ONE_MORE_BYTE (c1);
4430           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4431             goto invalid_code;
4432           c = (c << 8) | c1;
4433           SJIS_TO_JIS2 (c);
4434           charset = charset_kanji2;
4435         }
4436       else
4437         goto invalid_code;
4438       if (charset->id != charset_ascii
4439           && last_id != charset->id)
4440         {
4441           if (last_id != charset_ascii)
4442             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4443           last_id = charset->id;
4444           last_offset = char_offset;
4445         }
4446       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4447       *charbuf++ = c;
4448       char_offset++;
4449       continue;
4450
4451     invalid_code:
4452       src = src_base;
4453       consumed_chars = consumed_chars_base;
4454       ONE_MORE_BYTE (c);
4455       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4456       char_offset++;
4457       coding->errors++;
4458     }
4459
4460  no_more_source:
4461   if (last_id != charset_ascii)
4462     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4463   coding->consumed_char += consumed_chars_base;
4464   coding->consumed = src_base - coding->source;
4465   coding->charbuf_used = charbuf - coding->charbuf;
4466 }
4467
4468 static void
4469 decode_coding_big5 (coding)
4470      struct coding_system *coding;
4471 {
4472   const unsigned char *src = coding->source + coding->consumed;
4473   const unsigned char *src_end = coding->source + coding->src_bytes;
4474   const unsigned char *src_base;
4475   int *charbuf = coding->charbuf + coding->charbuf_used;
4476   int *charbuf_end
4477     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4478   int consumed_chars = 0, consumed_chars_base;
4479   int multibytep = coding->src_multibyte;
4480   struct charset *charset_roman, *charset_big5;
4481   Lisp_Object attrs, charset_list, val;
4482   int char_offset = coding->produced_char;
4483   int last_offset = char_offset;
4484   int last_id = charset_ascii;
4485   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4486   int byte_after_cr = -1;
4487
4488   CODING_GET_INFO (coding, attrs, charset_list);
4489   val = charset_list;
4490   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4491   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4492
4493   while (1)
4494     {
4495       int c, c1;
4496       struct charset *charset;
4497
4498       src_base = src;
4499       consumed_chars_base = consumed_chars;
4500
4501       if (charbuf >= charbuf_end)
4502         {
4503           if (byte_after_cr >= 0)
4504             src_base--;
4505           break;
4506         }
4507
4508       if (byte_after_cr >= 0)
4509         c = byte_after_cr, byte_after_cr = -1;
4510       else
4511         ONE_MORE_BYTE (c);
4512
4513       if (c < 0)
4514         goto invalid_code;
4515       if (c < 0x80)
4516         {
4517           if (eol_crlf && c == '\r')
4518             ONE_MORE_BYTE (byte_after_cr);
4519           charset = charset_roman;
4520         }
4521       else
4522         {
4523           /* BIG5 -> Big5 */
4524           if (c < 0xA1 || c > 0xFE)
4525             goto invalid_code;
4526           ONE_MORE_BYTE (c1);
4527           if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4528             goto invalid_code;
4529           c = c << 8 | c1;
4530           charset = charset_big5;
4531         }
4532       if (charset->id != charset_ascii
4533           && last_id != charset->id)
4534         {
4535           if (last_id != charset_ascii)
4536             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4537           last_id = charset->id;
4538           last_offset = char_offset;
4539         }
4540       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4541       *charbuf++ = c;
4542       char_offset++;
4543       continue;
4544
4545     invalid_code:
4546       src = src_base;
4547       consumed_chars = consumed_chars_base;
4548       ONE_MORE_BYTE (c);
4549       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4550       char_offset++;
4551       coding->errors++;
4552     }
4553
4554  no_more_source:
4555   if (last_id != charset_ascii)
4556     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4557   coding->consumed_char += consumed_chars_base;
4558   coding->consumed = src_base - coding->source;
4559   coding->charbuf_used = charbuf - coding->charbuf;
4560 }
4561
4562 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4563    This function can encode charsets `ascii', `katakana-jisx0201',
4564    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
4565    are sure that all these charsets are registered as official charset
4566    (i.e. do not have extended leading-codes).  Characters of other
4567    charsets are produced without any encoding.  If SJIS_P is 1, encode
4568    SJIS text, else encode BIG5 text.  */
4569
4570 static int
4571 encode_coding_sjis (coding)
4572      struct coding_system *coding;
4573 {
4574   int multibytep = coding->dst_multibyte;
4575   int *charbuf = coding->charbuf;
4576   int *charbuf_end = charbuf + coding->charbuf_used;
4577   unsigned char *dst = coding->destination + coding->produced;
4578   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4579   int safe_room = 4;
4580   int produced_chars = 0;
4581   Lisp_Object attrs, charset_list, val;
4582   int ascii_compatible;
4583   struct charset *charset_roman, *charset_kanji, *charset_kana;
4584   struct charset *charset_kanji2;
4585   int c;
4586
4587   CODING_GET_INFO (coding, attrs, charset_list);
4588   val = charset_list;
4589   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4590   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4591   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4592   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4593
4594   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4595
4596   while (charbuf < charbuf_end)
4597     {
4598       ASSURE_DESTINATION (safe_room);
4599       c = *charbuf++;
4600       /* Now encode the character C.  */
4601       if (ASCII_CHAR_P (c) && ascii_compatible)
4602         EMIT_ONE_ASCII_BYTE (c);
4603       else if (CHAR_BYTE8_P (c))
4604         {
4605           c = CHAR_TO_BYTE8 (c);
4606           EMIT_ONE_BYTE (c);
4607         }
4608       else
4609         {
4610           unsigned code;
4611           struct charset *charset = char_charset (c, charset_list, &code);
4612
4613           if (!charset)
4614             {
4615               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4616                 {
4617                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4618                   charset = CHARSET_FROM_ID (charset_ascii);
4619                 }
4620               else
4621                 {
4622                   c = coding->default_char;
4623                   charset = char_charset (c, charset_list, &code);
4624                 }
4625             }
4626           if (code == CHARSET_INVALID_CODE (charset))
4627             abort ();
4628           if (charset == charset_kanji)
4629             {
4630               int c1, c2;
4631               JIS_TO_SJIS (code);
4632               c1 = code >> 8, c2 = code & 0xFF;
4633               EMIT_TWO_BYTES (c1, c2);
4634             }
4635           else if (charset == charset_kana)
4636             EMIT_ONE_BYTE (code | 0x80);
4637           else if (charset_kanji2 && charset == charset_kanji2)
4638             {
4639               int c1, c2;
4640
4641               c1 = code >> 8;
4642               if (c1 == 0x21 || (c1 >= 0x23 && c1 < 0x25)
4643                   || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
4644                 {
4645                   JIS_TO_SJIS2 (code);
4646                   c1 = code >> 8, c2 = code & 0xFF;
4647                   EMIT_TWO_BYTES (c1, c2);
4648                 }
4649               else
4650                 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4651             }
4652           else
4653             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4654         }
4655     }
4656   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4657   coding->produced_char += produced_chars;
4658   coding->produced = dst - coding->destination;
4659   return 0;
4660 }
4661
4662 static int
4663 encode_coding_big5 (coding)
4664      struct coding_system *coding;
4665 {
4666   int multibytep = coding->dst_multibyte;
4667   int *charbuf = coding->charbuf;
4668   int *charbuf_end = charbuf + coding->charbuf_used;
4669   unsigned char *dst = coding->destination + coding->produced;
4670   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4671   int safe_room = 4;
4672   int produced_chars = 0;
4673   Lisp_Object attrs, charset_list, val;
4674   int ascii_compatible;
4675   struct charset *charset_roman, *charset_big5;
4676   int c;
4677
4678   CODING_GET_INFO (coding, attrs, charset_list);
4679   val = charset_list;
4680   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4681   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4682   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4683
4684   while (charbuf < charbuf_end)
4685     {
4686       ASSURE_DESTINATION (safe_room);
4687       c = *charbuf++;
4688       /* Now encode the character C.  */
4689       if (ASCII_CHAR_P (c) && ascii_compatible)
4690         EMIT_ONE_ASCII_BYTE (c);
4691       else if (CHAR_BYTE8_P (c))
4692         {
4693           c = CHAR_TO_BYTE8 (c);
4694           EMIT_ONE_BYTE (c);
4695         }
4696       else
4697         {
4698           unsigned code;
4699           struct charset *charset = char_charset (c, charset_list, &code);
4700
4701           if (! charset)
4702             {
4703               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4704                 {
4705                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4706                   charset = CHARSET_FROM_ID (charset_ascii);
4707                 }
4708               else
4709                 {
4710                   c = coding->default_char;
4711                   charset = char_charset (c, charset_list, &code);
4712                 }
4713             }
4714           if (code == CHARSET_INVALID_CODE (charset))
4715             abort ();
4716           if (charset == charset_big5)
4717             {
4718               int c1, c2;
4719
4720               c1 = code >> 8, c2 = code & 0xFF;
4721               EMIT_TWO_BYTES (c1, c2);
4722             }
4723           else
4724             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4725         }
4726     }
4727   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4728   coding->produced_char += produced_chars;
4729   coding->produced = dst - coding->destination;
4730   return 0;
4731 }
4732
4733 \f
4734 /*** 10. CCL handlers ***/
4735
4736 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4737    Check if a text is encoded in a coding system of which
4738    encoder/decoder are written in CCL program.  If it is, return
4739    CATEGORY_MASK_CCL, else return 0.  */
4740
4741 static int
4742 detect_coding_ccl (coding, detect_info)
4743      struct coding_system *coding;
4744      struct coding_detection_info *detect_info;
4745 {
4746   const unsigned char *src = coding->source, *src_base;
4747   const unsigned char *src_end = coding->source + coding->src_bytes;
4748   int multibytep = coding->src_multibyte;
4749   int consumed_chars = 0;
4750   int found = 0;
4751   unsigned char *valids;
4752   int head_ascii = coding->head_ascii;
4753   Lisp_Object attrs;
4754
4755   detect_info->checked |= CATEGORY_MASK_CCL;
4756
4757   coding = &coding_categories[coding_category_ccl];
4758   valids = CODING_CCL_VALIDS (coding);
4759   attrs = CODING_ID_ATTRS (coding->id);
4760   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4761     src += head_ascii;
4762
4763   while (1)
4764     {
4765       int c;
4766
4767       src_base = src;
4768       ONE_MORE_BYTE (c);
4769       if (c < 0 || ! valids[c])
4770         break;
4771       if ((valids[c] > 1))
4772         found = CATEGORY_MASK_CCL;
4773     }
4774   detect_info->rejected |= CATEGORY_MASK_CCL;
4775   return 0;
4776
4777  no_more_source:
4778   detect_info->found |= found;
4779   return 1;
4780 }
4781
4782 static void
4783 decode_coding_ccl (coding)
4784      struct coding_system *coding;
4785 {
4786   const unsigned char *src = coding->source + coding->consumed;
4787   const unsigned char *src_end = coding->source + coding->src_bytes;
4788   int *charbuf = coding->charbuf + coding->charbuf_used;
4789   int *charbuf_end = coding->charbuf + coding->charbuf_size;
4790   int consumed_chars = 0;
4791   int multibytep = coding->src_multibyte;
4792   struct ccl_program ccl;
4793   int source_charbuf[1024];
4794   int source_byteidx[1024];
4795   Lisp_Object attrs, charset_list;
4796
4797   CODING_GET_INFO (coding, attrs, charset_list);
4798   setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4799
4800   while (src < src_end)
4801     {
4802       const unsigned char *p = src;
4803       int *source, *source_end;
4804       int i = 0;
4805
4806       if (multibytep)
4807         while (i < 1024 && p < src_end)
4808           {
4809             source_byteidx[i] = p - src;
4810             source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4811           }
4812       else
4813         while (i < 1024 && p < src_end)
4814           source_charbuf[i++] = *p++;
4815
4816       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4817         ccl.last_block = 1;
4818
4819       source = source_charbuf;
4820       source_end = source + i;
4821       while (source < source_end)
4822         {
4823           ccl_driver (&ccl, source, charbuf,
4824                       source_end - source, charbuf_end - charbuf,
4825                       charset_list);
4826           source += ccl.consumed;
4827           charbuf += ccl.produced;
4828           if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4829             break;
4830         }
4831       if (source < source_end)
4832         src += source_byteidx[source - source_charbuf];
4833       else
4834         src = p;
4835       consumed_chars += source - source_charbuf;
4836
4837       if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4838           && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4839         break;
4840     }
4841
4842   switch (ccl.status)
4843     {
4844     case CCL_STAT_SUSPEND_BY_SRC:
4845       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4846       break;
4847     case CCL_STAT_SUSPEND_BY_DST:
4848       break;
4849     case CCL_STAT_QUIT:
4850     case CCL_STAT_INVALID_CMD:
4851       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
4852       break;
4853     default:
4854       record_conversion_result (coding, CODING_RESULT_SUCCESS);
4855       break;
4856     }
4857   coding->consumed_char += consumed_chars;
4858   coding->consumed = src - coding->source;
4859   coding->charbuf_used = charbuf - coding->charbuf;
4860 }
4861
4862 static int
4863 encode_coding_ccl (coding)
4864      struct coding_system *coding;
4865 {
4866   struct ccl_program ccl;
4867   int multibytep = coding->dst_multibyte;
4868   int *charbuf = coding->charbuf;
4869   int *charbuf_end = charbuf + coding->charbuf_used;
4870   unsigned char *dst = coding->destination + coding->produced;
4871   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4872   int destination_charbuf[1024];
4873   int i, produced_chars = 0;
4874   Lisp_Object attrs, charset_list;
4875
4876   CODING_GET_INFO (coding, attrs, charset_list);
4877   setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4878
4879   ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4880   ccl.dst_multibyte = coding->dst_multibyte;
4881
4882   while (charbuf < charbuf_end)
4883     {
4884       ccl_driver (&ccl, charbuf, destination_charbuf,
4885                   charbuf_end - charbuf, 1024, charset_list);
4886       if (multibytep)
4887         {
4888           ASSURE_DESTINATION (ccl.produced * 2);
4889           for (i = 0; i < ccl.produced; i++)
4890             EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4891         }
4892       else
4893         {
4894           ASSURE_DESTINATION (ccl.produced);
4895           for (i = 0; i < ccl.produced; i++)
4896             *dst++ = destination_charbuf[i] & 0xFF;
4897           produced_chars += ccl.produced;
4898         }
4899       charbuf += ccl.consumed;
4900       if (ccl.status == CCL_STAT_QUIT
4901           || ccl.status == CCL_STAT_INVALID_CMD)
4902         break;
4903     }
4904
4905   switch (ccl.status)
4906     {
4907     case CCL_STAT_SUSPEND_BY_SRC:
4908       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4909       break;
4910     case CCL_STAT_SUSPEND_BY_DST:
4911       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
4912       break;
4913     case CCL_STAT_QUIT:
4914     case CCL_STAT_INVALID_CMD:
4915       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
4916       break;
4917     default:
4918       record_conversion_result (coding, CODING_RESULT_SUCCESS);
4919       break;
4920     }
4921
4922   coding->produced_char += produced_chars;
4923   coding->produced = dst - coding->destination;
4924   return 0;
4925 }
4926
4927
4928 \f
4929 /*** 10, 11. no-conversion handlers ***/
4930
4931 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
4932
4933 static void
4934 decode_coding_raw_text (coding)
4935      struct coding_system *coding;
4936 {
4937   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4938
4939   coding->chars_at_source = 1;
4940   coding->consumed_char = coding->src_chars;
4941   coding->consumed = coding->src_bytes;
4942   if (eol_crlf && coding->source[coding->src_bytes - 1] == '\r')
4943     {
4944       coding->consumed_char--;
4945       coding->consumed--;
4946       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4947     }
4948   else
4949     record_conversion_result (coding, CODING_RESULT_SUCCESS);
4950 }
4951
4952 static int
4953 encode_coding_raw_text (coding)
4954      struct coding_system *coding;
4955 {
4956   int multibytep = coding->dst_multibyte;
4957   int *charbuf = coding->charbuf;
4958   int *charbuf_end = coding->charbuf + coding->charbuf_used;
4959   unsigned char *dst = coding->destination + coding->produced;
4960   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4961   int produced_chars = 0;
4962   int c;
4963
4964   if (multibytep)
4965     {
4966       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4967
4968       if (coding->src_multibyte)
4969         while (charbuf < charbuf_end)
4970           {
4971             ASSURE_DESTINATION (safe_room);
4972             c = *charbuf++;
4973             if (ASCII_CHAR_P (c))
4974               EMIT_ONE_ASCII_BYTE (c);
4975             else if (CHAR_BYTE8_P (c))
4976               {
4977                 c = CHAR_TO_BYTE8 (c);
4978                 EMIT_ONE_BYTE (c);
4979               }
4980             else
4981               {
4982                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
4983
4984                 CHAR_STRING_ADVANCE (c, p1);
4985                 while (p0 < p1)
4986                   {
4987                     EMIT_ONE_BYTE (*p0);
4988                     p0++;
4989                   }
4990               }
4991           }
4992       else
4993         while (charbuf < charbuf_end)
4994           {
4995             ASSURE_DESTINATION (safe_room);
4996             c = *charbuf++;
4997             EMIT_ONE_BYTE (c);
4998           }
4999     }
5000   else
5001     {
5002       if (coding->src_multibyte)
5003         {
5004           int safe_room = MAX_MULTIBYTE_LENGTH;
5005
5006           while (charbuf < charbuf_end)
5007             {
5008               ASSURE_DESTINATION (safe_room);
5009               c = *charbuf++;
5010               if (ASCII_CHAR_P (c))
5011                 *dst++ = c;
5012               else if (CHAR_BYTE8_P (c))
5013                 *dst++ = CHAR_TO_BYTE8 (c);
5014               else
5015                 CHAR_STRING_ADVANCE (c, dst);
5016             }
5017         }
5018       else
5019         {
5020           ASSURE_DESTINATION (charbuf_end - charbuf);
5021           while (charbuf < charbuf_end && dst < dst_end)
5022             *dst++ = *charbuf++;
5023         }
5024       produced_chars = dst - (coding->destination + coding->produced);
5025     }
5026   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5027   coding->produced_char += produced_chars;
5028   coding->produced = dst - coding->destination;
5029   return 0;
5030 }
5031
5032 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5033    Check if a text is encoded in a charset-based coding system.  If it
5034    is, return 1, else return 0.  */
5035
5036 static int
5037 detect_coding_charset (coding, detect_info)
5038      struct coding_system *coding;
5039      struct coding_detection_info *detect_info;
5040 {
5041   const unsigned char *src = coding->source, *src_base;
5042   const unsigned char *src_end = coding->source + coding->src_bytes;
5043   int multibytep = coding->src_multibyte;
5044   int consumed_chars = 0;
5045   Lisp_Object attrs, valids, name;
5046   int found = 0;
5047   int head_ascii = coding->head_ascii;
5048   int check_latin_extra = 0;
5049
5050   detect_info->checked |= CATEGORY_MASK_CHARSET;
5051
5052   coding = &coding_categories[coding_category_charset];
5053   attrs = CODING_ID_ATTRS (coding->id);
5054   valids = AREF (attrs, coding_attr_charset_valids);
5055   name = CODING_ID_NAME (coding->id);
5056   if (VECTORP (Vlatin_extra_code_table)
5057       && strcmp ((char *) SDATA (SYMBOL_NAME (name)), "iso-8859-"))
5058     check_latin_extra = 1;
5059   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5060     src += head_ascii;
5061
5062   while (1)
5063     {
5064       int c;
5065       Lisp_Object val;
5066       struct charset *charset;
5067       int dim, idx;
5068
5069       src_base = src;
5070       ONE_MORE_BYTE (c);
5071       if (c < 0)
5072         continue;
5073       val = AREF (valids, c);
5074       if (NILP (val))
5075         break;
5076       if (c >= 0x80)
5077         {
5078           if (c < 0xA0
5079               && check_latin_extra
5080               && NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
5081             break;
5082           found = CATEGORY_MASK_CHARSET;
5083         }
5084       if (INTEGERP (val))
5085         {
5086           charset = CHARSET_FROM_ID (XFASTINT (val));
5087           dim = CHARSET_DIMENSION (charset);
5088           for (idx = 1; idx < dim; idx++)
5089             {
5090               if (src == src_end)
5091                 goto too_short;
5092               ONE_MORE_BYTE (c);
5093               if (c < charset->code_space[(dim - 1 - idx) * 2]
5094                   || c > charset->code_space[(dim - 1 - idx) * 2 + 1])
5095                 break;
5096             }
5097           if (idx < dim)
5098             break;
5099         }
5100       else
5101         {
5102           idx = 1;
5103           for (; CONSP (val); val = XCDR (val))
5104             {
5105               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5106               dim = CHARSET_DIMENSION (charset);
5107               while (idx < dim)
5108                 {
5109                   if (src == src_end)
5110                     goto too_short;
5111                   ONE_MORE_BYTE (c);
5112                   if (c < charset->code_space[(dim - 1 - idx) * 4]
5113                       || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
5114                     break;
5115                   idx++;
5116                 }
5117               if (idx == dim)
5118                 {
5119                   val = Qnil;
5120                   break;
5121                 }
5122             }
5123           if (CONSP (val))
5124             break;
5125         }
5126     }
5127  too_short:
5128   detect_info->rejected |= CATEGORY_MASK_CHARSET;
5129   return 0;
5130
5131  no_more_source:
5132   detect_info->found |= found;
5133   return 1;
5134 }
5135
5136 static void
5137 decode_coding_charset (coding)
5138      struct coding_system *coding;
5139 {
5140   const unsigned char *src = coding->source + coding->consumed;
5141   const unsigned char *src_end = coding->source + coding->src_bytes;
5142   const unsigned char *src_base;
5143   int *charbuf = coding->charbuf + coding->charbuf_used;
5144   int *charbuf_end
5145     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
5146   int consumed_chars = 0, consumed_chars_base;
5147   int multibytep = coding->src_multibyte;
5148   Lisp_Object attrs, charset_list, valids;
5149   int char_offset = coding->produced_char;
5150   int last_offset = char_offset;
5151   int last_id = charset_ascii;
5152   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5153   int byte_after_cr = -1;
5154
5155   CODING_GET_INFO (coding, attrs, charset_list);
5156   valids = AREF (attrs, coding_attr_charset_valids);
5157
5158   while (1)
5159     {
5160       int c;
5161       Lisp_Object val;
5162       struct charset *charset;
5163       int dim;
5164       int len = 1;
5165       unsigned code;
5166
5167       src_base = src;
5168       consumed_chars_base = consumed_chars;
5169
5170       if (charbuf >= charbuf_end)
5171         {
5172           if (byte_after_cr >= 0)
5173             src_base--;
5174           break;
5175         }
5176
5177       if (byte_after_cr >= 0)
5178         {
5179           c = byte_after_cr;
5180           byte_after_cr = -1;
5181         }
5182       else
5183         {
5184           ONE_MORE_BYTE (c);
5185           if (eol_crlf && c == '\r')
5186             ONE_MORE_BYTE (byte_after_cr);
5187         }
5188       if (c < 0)
5189         goto invalid_code;
5190       code = c;
5191
5192       val = AREF (valids, c);
5193       if (! INTEGERP (val) && ! CONSP (val))
5194         goto invalid_code;
5195       if (INTEGERP (val))
5196         {
5197           charset = CHARSET_FROM_ID (XFASTINT (val));
5198           dim = CHARSET_DIMENSION (charset);
5199           while (len < dim)
5200             {
5201               ONE_MORE_BYTE (c);
5202               code = (code << 8) | c;
5203               len++;
5204             }
5205           CODING_DECODE_CHAR (coding, src, src_base, src_end,
5206                               charset, code, c);
5207         }
5208       else
5209         {
5210           /* VAL is a list of charset IDs.  It is assured that the
5211              list is sorted by charset dimensions (smaller one
5212              comes first).  */
5213           while (CONSP (val))
5214             {
5215               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5216               dim = CHARSET_DIMENSION (charset);
5217               while (len < dim)
5218                 {
5219                   ONE_MORE_BYTE (c);
5220                   code = (code << 8) | c;
5221                   len++;
5222                 }
5223               CODING_DECODE_CHAR (coding, src, src_base,
5224                                   src_end, charset, code, c);
5225               if (c >= 0)
5226                 break;
5227               val = XCDR (val);
5228             }
5229         }
5230       if (c < 0)
5231         goto invalid_code;
5232       if (charset->id != charset_ascii
5233           && last_id != charset->id)
5234         {
5235           if (last_id != charset_ascii)
5236             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5237           last_id = charset->id;
5238           last_offset = char_offset;
5239         }
5240
5241       *charbuf++ = c;
5242       char_offset++;
5243       continue;
5244
5245     invalid_code:
5246       src = src_base;
5247       consumed_chars = consumed_chars_base;
5248       ONE_MORE_BYTE (c);
5249       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
5250       char_offset++;
5251       coding->errors++;
5252     }
5253
5254  no_more_source:
5255   if (last_id != charset_ascii)
5256     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5257   coding->consumed_char += consumed_chars_base;
5258   coding->consumed = src_base - coding->source;
5259   coding->charbuf_used = charbuf - coding->charbuf;
5260 }
5261
5262 static int
5263 encode_coding_charset (coding)
5264      struct coding_system *coding;
5265 {
5266   int multibytep = coding->dst_multibyte;
5267   int *charbuf = coding->charbuf;
5268   int *charbuf_end = charbuf + coding->charbuf_used;
5269   unsigned char *dst = coding->destination + coding->produced;
5270   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5271   int safe_room = MAX_MULTIBYTE_LENGTH;
5272   int produced_chars = 0;
5273   Lisp_Object attrs, charset_list;
5274   int ascii_compatible;
5275   int c;
5276
5277   CODING_GET_INFO (coding, attrs, charset_list);
5278   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5279
5280   while (charbuf < charbuf_end)
5281     {
5282       struct charset *charset;
5283       unsigned code;
5284
5285       ASSURE_DESTINATION (safe_room);
5286       c = *charbuf++;
5287       if (ascii_compatible && ASCII_CHAR_P (c))
5288         EMIT_ONE_ASCII_BYTE (c);
5289       else if (CHAR_BYTE8_P (c))
5290         {
5291           c = CHAR_TO_BYTE8 (c);
5292           EMIT_ONE_BYTE (c);
5293         }
5294       else
5295         {
5296           charset = char_charset (c, charset_list, &code);
5297           if (charset)
5298             {
5299               if (CHARSET_DIMENSION (charset) == 1)
5300                 EMIT_ONE_BYTE (code);
5301               else if (CHARSET_DIMENSION (charset) == 2)
5302                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
5303               else if (CHARSET_DIMENSION (charset) == 3)
5304                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
5305               else
5306                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
5307                                  (code >> 8) & 0xFF, code & 0xFF);
5308             }
5309           else
5310             {
5311               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5312                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5313               else
5314                 c = coding->default_char;
5315               EMIT_ONE_BYTE (c);
5316             }
5317         }
5318     }
5319
5320   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5321   coding->produced_char += produced_chars;
5322   coding->produced = dst - coding->destination;
5323   return 0;
5324 }
5325
5326 \f
5327 /*** 7. C library functions ***/
5328
5329 /* Setup coding context CODING from information about CODING_SYSTEM.
5330    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
5331    CODING_SYSTEM is invalid, signal an error.  */
5332
5333 void
5334 setup_coding_system (coding_system, coding)
5335      Lisp_Object coding_system;
5336      struct coding_system *coding;
5337 {
5338   Lisp_Object attrs;
5339   Lisp_Object eol_type;
5340   Lisp_Object coding_type;
5341   Lisp_Object val;
5342
5343   if (NILP (coding_system))
5344     coding_system = Qundecided;
5345
5346   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
5347
5348   attrs = CODING_ID_ATTRS (coding->id);
5349   eol_type = CODING_ID_EOL_TYPE (coding->id);
5350
5351   coding->mode = 0;
5352   coding->head_ascii = -1;
5353   if (VECTORP (eol_type))
5354     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5355                             | CODING_REQUIRE_DETECTION_MASK);
5356   else if (! EQ (eol_type, Qunix))
5357     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5358                             | CODING_REQUIRE_ENCODING_MASK);
5359   else
5360     coding->common_flags = 0;
5361   if (! NILP (CODING_ATTR_POST_READ (attrs)))
5362     coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5363   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5364     coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5365   if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
5366     coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
5367
5368   val = CODING_ATTR_SAFE_CHARSETS (attrs);
5369   coding->max_charset_id = SCHARS (val) - 1;
5370   coding->safe_charsets = (char *) SDATA (val);
5371   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
5372
5373   coding_type = CODING_ATTR_TYPE (attrs);
5374   if (EQ (coding_type, Qundecided))
5375     {
5376       coding->detector = NULL;
5377       coding->decoder = decode_coding_raw_text;
5378       coding->encoder = encode_coding_raw_text;
5379       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5380     }
5381   else if (EQ (coding_type, Qiso_2022))
5382     {
5383       int i;
5384       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5385
5386       /* Invoke graphic register 0 to plane 0.  */
5387       CODING_ISO_INVOCATION (coding, 0) = 0;
5388       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
5389       CODING_ISO_INVOCATION (coding, 1)
5390         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
5391       /* Setup the initial status of designation.  */
5392       for (i = 0; i < 4; i++)
5393         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
5394       /* Not single shifting initially.  */
5395       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
5396       /* Beginning of buffer should also be regarded as bol. */
5397       CODING_ISO_BOL (coding) = 1;
5398       coding->detector = detect_coding_iso_2022;
5399       coding->decoder = decode_coding_iso_2022;
5400       coding->encoder = encode_coding_iso_2022;
5401       if (flags & CODING_ISO_FLAG_SAFE)
5402         coding->mode |= CODING_MODE_SAFE_ENCODING;
5403       coding->common_flags
5404         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5405             | CODING_REQUIRE_FLUSHING_MASK);
5406       if (flags & CODING_ISO_FLAG_COMPOSITION)
5407         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
5408       if (flags & CODING_ISO_FLAG_DESIGNATION)
5409         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
5410       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5411         {
5412           setup_iso_safe_charsets (attrs);
5413           val = CODING_ATTR_SAFE_CHARSETS (attrs);
5414           coding->max_charset_id = SCHARS (val) - 1;
5415           coding->safe_charsets = (char *) SDATA (val);
5416         }
5417       CODING_ISO_FLAGS (coding) = flags;
5418     }
5419   else if (EQ (coding_type, Qcharset))
5420     {
5421       coding->detector = detect_coding_charset;
5422       coding->decoder = decode_coding_charset;
5423       coding->encoder = encode_coding_charset;
5424       coding->common_flags
5425         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5426     }
5427   else if (EQ (coding_type, Qutf_8))
5428     {
5429       val = AREF (attrs, coding_attr_utf_bom);
5430       CODING_UTF_8_BOM (coding) = (CONSP (val) ? utf_detect_bom
5431                                    : EQ (val, Qt) ? utf_with_bom
5432                                    : utf_without_bom);
5433       coding->detector = detect_coding_utf_8;
5434       coding->decoder = decode_coding_utf_8;
5435       coding->encoder = encode_coding_utf_8;
5436       coding->common_flags
5437         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5438       if (CODING_UTF_8_BOM (coding) == utf_detect_bom)
5439         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5440     }
5441   else if (EQ (coding_type, Qutf_16))
5442     {
5443       val = AREF (attrs, coding_attr_utf_bom);
5444       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_detect_bom
5445                                     : EQ (val, Qt) ? utf_with_bom
5446                                     : utf_without_bom);
5447       val = AREF (attrs, coding_attr_utf_16_endian);
5448       CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
5449                                        : utf_16_little_endian);
5450       CODING_UTF_16_SURROGATE (coding) = 0;
5451       coding->detector = detect_coding_utf_16;
5452       coding->decoder = decode_coding_utf_16;
5453       coding->encoder = encode_coding_utf_16;
5454       coding->common_flags
5455         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5456       if (CODING_UTF_16_BOM (coding) == utf_detect_bom)
5457         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5458     }
5459   else if (EQ (coding_type, Qccl))
5460     {
5461       coding->detector = detect_coding_ccl;
5462       coding->decoder = decode_coding_ccl;
5463       coding->encoder = encode_coding_ccl;
5464       coding->common_flags
5465         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5466             | CODING_REQUIRE_FLUSHING_MASK);
5467     }
5468   else if (EQ (coding_type, Qemacs_mule))
5469     {
5470       coding->detector = detect_coding_emacs_mule;
5471       coding->decoder = decode_coding_emacs_mule;
5472       coding->encoder = encode_coding_emacs_mule;
5473       coding->common_flags
5474         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5475       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5476           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5477         {
5478           Lisp_Object tail, safe_charsets;
5479           int max_charset_id = 0;
5480
5481           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5482                tail = XCDR (tail))
5483             if (max_charset_id < XFASTINT (XCAR (tail)))
5484               max_charset_id = XFASTINT (XCAR (tail));
5485           safe_charsets = Fmake_string (make_number (max_charset_id + 1),
5486                                         make_number (255));
5487           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5488                tail = XCDR (tail))
5489             SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
5490           coding->max_charset_id = max_charset_id;
5491           coding->safe_charsets = (char *) SDATA (safe_charsets);
5492         }
5493     }
5494   else if (EQ (coding_type, Qshift_jis))
5495     {
5496       coding->detector = detect_coding_sjis;
5497       coding->decoder = decode_coding_sjis;
5498       coding->encoder = encode_coding_sjis;
5499       coding->common_flags
5500         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5501     }
5502   else if (EQ (coding_type, Qbig5))
5503     {
5504       coding->detector = detect_coding_big5;
5505       coding->decoder = decode_coding_big5;
5506       coding->encoder = encode_coding_big5;
5507       coding->common_flags
5508         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5509     }
5510   else                          /* EQ (coding_type, Qraw_text) */
5511     {
5512       coding->detector = NULL;
5513       coding->decoder = decode_coding_raw_text;
5514       coding->encoder = encode_coding_raw_text;
5515       if (! EQ (eol_type, Qunix))
5516         {
5517           coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5518           if (! VECTORP (eol_type))
5519             coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5520         }
5521
5522     }
5523
5524   return;
5525 }
5526
5527 /* Return a list of charsets supported by CODING.  */
5528
5529 Lisp_Object
5530 coding_charset_list (coding)
5531      struct coding_system *coding;
5532 {
5533   Lisp_Object attrs, charset_list;
5534
5535   CODING_GET_INFO (coding, attrs, charset_list);
5536   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5537     {
5538       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5539
5540       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5541         charset_list = Viso_2022_charset_list;
5542     }
5543   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5544     {
5545       charset_list = Vemacs_mule_charset_list;
5546     }
5547   return charset_list;
5548 }
5549
5550
5551 /* Return raw-text or one of its subsidiaries that has the same
5552    eol_type as CODING-SYSTEM.  */
5553
5554 Lisp_Object
5555 raw_text_coding_system (coding_system)
5556      Lisp_Object coding_system;
5557 {
5558   Lisp_Object spec, attrs;
5559   Lisp_Object eol_type, raw_text_eol_type;
5560
5561   if (NILP (coding_system))
5562     return Qraw_text;
5563   spec = CODING_SYSTEM_SPEC (coding_system);
5564   attrs = AREF (spec, 0);
5565
5566   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
5567     return coding_system;
5568
5569   eol_type = AREF (spec, 2);
5570   if (VECTORP (eol_type))
5571     return Qraw_text;
5572   spec = CODING_SYSTEM_SPEC (Qraw_text);
5573   raw_text_eol_type = AREF (spec, 2);
5574   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
5575           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
5576           : AREF (raw_text_eol_type, 2));
5577 }
5578
5579
5580 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
5581    does, return one of the subsidiary that has the same eol-spec as
5582    PARENT.  Otherwise, return CODING_SYSTEM.  If PARENT is nil,
5583    inherit end-of-line format from the system's setting
5584    (system_eol_type).  */
5585
5586 Lisp_Object
5587 coding_inherit_eol_type (coding_system, parent)
5588      Lisp_Object coding_system, parent;
5589 {
5590   Lisp_Object spec, eol_type;
5591
5592   if (NILP (coding_system))
5593     coding_system = Qraw_text;
5594   spec = CODING_SYSTEM_SPEC (coding_system);
5595   eol_type = AREF (spec, 2);
5596   if (VECTORP (eol_type))
5597     {
5598       Lisp_Object parent_eol_type;
5599
5600       if (! NILP (parent))
5601         {
5602           Lisp_Object parent_spec;
5603
5604           parent_spec = CODING_SYSTEM_SPEC (parent);
5605           parent_eol_type = AREF (parent_spec, 2);
5606         }
5607       else
5608         parent_eol_type = system_eol_type;
5609       if (EQ (parent_eol_type, Qunix))
5610         coding_system = AREF (eol_type, 0);
5611       else if (EQ (parent_eol_type, Qdos))
5612         coding_system = AREF (eol_type, 1);
5613       else if (EQ (parent_eol_type, Qmac))
5614         coding_system = AREF (eol_type, 2);
5615     }
5616   return coding_system;
5617 }
5618
5619 /* Emacs has a mechanism to automatically detect a coding system if it
5620    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
5621    it's impossible to distinguish some coding systems accurately
5622    because they use the same range of codes.  So, at first, coding
5623    systems are categorized into 7, those are:
5624
5625    o coding-category-emacs-mule
5626
5627         The category for a coding system which has the same code range
5628         as Emacs' internal format.  Assigned the coding-system (Lisp
5629         symbol) `emacs-mule' by default.
5630
5631    o coding-category-sjis
5632
5633         The category for a coding system which has the same code range
5634         as SJIS.  Assigned the coding-system (Lisp
5635         symbol) `japanese-shift-jis' by default.
5636
5637    o coding-category-iso-7
5638
5639         The category for a coding system which has the same code range
5640         as ISO2022 of 7-bit environment.  This doesn't use any locking
5641         shift and single shift functions.  This can encode/decode all
5642         charsets.  Assigned the coding-system (Lisp symbol)
5643         `iso-2022-7bit' by default.
5644
5645    o coding-category-iso-7-tight
5646
5647         Same as coding-category-iso-7 except that this can
5648         encode/decode only the specified charsets.
5649
5650    o coding-category-iso-8-1
5651
5652         The category for a coding system which has the same code range
5653         as ISO2022 of 8-bit environment and graphic plane 1 used only
5654         for DIMENSION1 charset.  This doesn't use any locking shift
5655         and single shift functions.  Assigned the coding-system (Lisp
5656         symbol) `iso-latin-1' by default.
5657
5658    o coding-category-iso-8-2
5659
5660         The category for a coding system which has the same code range
5661         as ISO2022 of 8-bit environment and graphic plane 1 used only
5662         for DIMENSION2 charset.  This doesn't use any locking shift
5663         and single shift functions.  Assigned the coding-system (Lisp
5664         symbol) `japanese-iso-8bit' by default.
5665
5666    o coding-category-iso-7-else
5667
5668         The category for a coding system which has the same code range
5669         as ISO2022 of 7-bit environemnt but uses locking shift or
5670         single shift functions.  Assigned the coding-system (Lisp
5671         symbol) `iso-2022-7bit-lock' by default.
5672
5673    o coding-category-iso-8-else
5674
5675         The category for a coding system which has the same code range
5676         as ISO2022 of 8-bit environemnt but uses locking shift or
5677         single shift functions.  Assigned the coding-system (Lisp
5678         symbol) `iso-2022-8bit-ss2' by default.
5679
5680    o coding-category-big5
5681
5682         The category for a coding system which has the same code range
5683         as BIG5.  Assigned the coding-system (Lisp symbol)
5684         `cn-big5' by default.
5685
5686    o coding-category-utf-8
5687
5688         The category for a coding system which has the same code range
5689         as UTF-8 (cf. RFC3629).  Assigned the coding-system (Lisp
5690         symbol) `utf-8' by default.
5691
5692    o coding-category-utf-16-be
5693
5694         The category for a coding system in which a text has an
5695         Unicode signature (cf. Unicode Standard) in the order of BIG
5696         endian at the head.  Assigned the coding-system (Lisp symbol)
5697         `utf-16-be' by default.
5698
5699    o coding-category-utf-16-le
5700
5701         The category for a coding system in which a text has an
5702         Unicode signature (cf. Unicode Standard) in the order of
5703         LITTLE endian at the head.  Assigned the coding-system (Lisp
5704         symbol) `utf-16-le' by default.
5705
5706    o coding-category-ccl
5707
5708         The category for a coding system of which encoder/decoder is
5709         written in CCL programs.  The default value is nil, i.e., no
5710         coding system is assigned.
5711
5712    o coding-category-binary
5713
5714         The category for a coding system not categorized in any of the
5715         above.  Assigned the coding-system (Lisp symbol)
5716         `no-conversion' by default.
5717
5718    Each of them is a Lisp symbol and the value is an actual
5719    `coding-system's (this is also a Lisp symbol) assigned by a user.
5720    What Emacs does actually is to detect a category of coding system.
5721    Then, it uses a `coding-system' assigned to it.  If Emacs can't
5722    decide only one possible category, it selects a category of the
5723    highest priority.  Priorities of categories are also specified by a
5724    user in a Lisp variable `coding-category-list'.
5725
5726 */
5727
5728 #define EOL_SEEN_NONE   0
5729 #define EOL_SEEN_LF     1
5730 #define EOL_SEEN_CR     2
5731 #define EOL_SEEN_CRLF   4
5732
5733 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
5734    SOURCE is encoded.  If CATEGORY is one of
5735    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5736    two-byte, else they are encoded by one-byte.
5737
5738    Return one of EOL_SEEN_XXX.  */
5739
5740 #define MAX_EOL_CHECK_COUNT 3
5741
5742 static int
5743 detect_eol (source, src_bytes, category)
5744      const unsigned char *source;
5745      EMACS_INT src_bytes;
5746      enum coding_category category;
5747 {
5748   const unsigned char *src = source, *src_end = src + src_bytes;
5749   unsigned char c;
5750   int total  = 0;
5751   int eol_seen = EOL_SEEN_NONE;
5752
5753   if ((1 << category) & CATEGORY_MASK_UTF_16)
5754     {
5755       int msb, lsb;
5756
5757       msb = category == (coding_category_utf_16_le
5758                          | coding_category_utf_16_le_nosig);
5759       lsb = 1 - msb;
5760
5761       while (src + 1 < src_end)
5762         {
5763           c = src[lsb];
5764           if (src[msb] == 0 && (c == '\n' || c == '\r'))
5765             {
5766               int this_eol;
5767
5768               if (c == '\n')
5769                 this_eol = EOL_SEEN_LF;
5770               else if (src + 3 >= src_end
5771                        || src[msb + 2] != 0
5772                        || src[lsb + 2] != '\n')
5773                 this_eol = EOL_SEEN_CR;
5774               else
5775                 this_eol = EOL_SEEN_CRLF;
5776
5777               if (eol_seen == EOL_SEEN_NONE)
5778                 /* This is the first end-of-line.  */
5779                 eol_seen = this_eol;
5780               else if (eol_seen != this_eol)
5781                 {
5782                   /* The found type is different from what found before.  */
5783                   eol_seen = EOL_SEEN_LF;
5784                   break;
5785                 }
5786               if (++total == MAX_EOL_CHECK_COUNT)
5787                 break;
5788             }
5789           src += 2;
5790         }
5791     }
5792   else
5793     {
5794       while (src < src_end)
5795         {
5796           c = *src++;
5797           if (c == '\n' || c == '\r')
5798             {
5799               int this_eol;
5800
5801               if (c == '\n')
5802                 this_eol = EOL_SEEN_LF;
5803               else if (src >= src_end || *src != '\n')
5804                 this_eol = EOL_SEEN_CR;
5805               else
5806                 this_eol = EOL_SEEN_CRLF, src++;
5807
5808               if (eol_seen == EOL_SEEN_NONE)
5809                 /* This is the first end-of-line.  */
5810                 eol_seen = this_eol;
5811               else if (eol_seen != this_eol)
5812                 {
5813                   /* The found type is different from what found before.  */
5814                   eol_seen = EOL_SEEN_LF;
5815                   break;
5816                 }
5817               if (++total == MAX_EOL_CHECK_COUNT)
5818                 break;
5819             }
5820         }
5821     }
5822   return eol_seen;
5823 }
5824
5825
5826 static Lisp_Object
5827 adjust_coding_eol_type (coding, eol_seen)
5828      struct coding_system *coding;
5829      int eol_seen;
5830 {
5831   Lisp_Object eol_type;
5832
5833   eol_type = CODING_ID_EOL_TYPE (coding->id);
5834   if (eol_seen & EOL_SEEN_LF)
5835     {
5836       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
5837       eol_type = Qunix;
5838     }
5839   else if (eol_seen & EOL_SEEN_CRLF)
5840     {
5841       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
5842       eol_type = Qdos;
5843     }
5844   else if (eol_seen & EOL_SEEN_CR)
5845     {
5846       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
5847       eol_type = Qmac;
5848     }
5849   return eol_type;
5850 }
5851
5852 /* Detect how a text specified in CODING is encoded.  If a coding
5853    system is detected, update fields of CODING by the detected coding
5854    system.  */
5855
5856 void
5857 detect_coding (coding)
5858      struct coding_system *coding;
5859 {
5860   const unsigned char *src, *src_end;
5861
5862   coding->consumed = coding->consumed_char = 0;
5863   coding->produced = coding->produced_char = 0;
5864   coding_set_source (coding);
5865
5866   src_end = coding->source + coding->src_bytes;
5867   coding->head_ascii = 0;
5868
5869   /* If we have not yet decided the text encoding type, detect it
5870      now.  */
5871   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
5872     {
5873       int c, i;
5874       struct coding_detection_info detect_info;
5875       int null_byte_found = 0, eight_bit_found = 0;
5876
5877       detect_info.checked = detect_info.found = detect_info.rejected = 0;
5878       for (src = coding->source; src < src_end; src++)
5879         {
5880           c = *src;
5881           if (c & 0x80)
5882             {
5883               eight_bit_found = 1;
5884               if (null_byte_found)
5885                 break;
5886             }
5887           else if (c < 0x20)
5888             {
5889               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
5890                   && ! inhibit_iso_escape_detection
5891                   && ! detect_info.checked)
5892                 {
5893                   if (detect_coding_iso_2022 (coding, &detect_info))
5894                     {
5895                       /* We have scanned the whole data.  */
5896                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
5897                         {
5898                           /* We didn't find an 8-bit code.  We may
5899                              have found a null-byte, but it's very
5900                              rare that a binary file confirm to
5901                              ISO-2022.  */
5902                           src = src_end;
5903                           coding->head_ascii = src - coding->source;
5904                         }
5905                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
5906                       break;
5907                     }
5908                 }
5909               else if (! c)
5910                 {
5911                   null_byte_found = 1;
5912                   if (eight_bit_found)
5913                     break;
5914                 }
5915               if (! eight_bit_found)
5916                 coding->head_ascii++;
5917             }
5918           else if (! eight_bit_found)
5919             coding->head_ascii++;
5920         }
5921
5922       if (null_byte_found || eight_bit_found
5923           || coding->head_ascii < coding->src_bytes
5924           || detect_info.found)
5925         {
5926           enum coding_category category;
5927           struct coding_system *this;
5928
5929           if (coding->head_ascii == coding->src_bytes)
5930             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
5931             for (i = 0; i < coding_category_raw_text; i++)
5932               {
5933                 category = coding_priorities[i];
5934                 this = coding_categories + category;
5935                 if (detect_info.found & (1 << category))
5936                   break;
5937               }
5938           else
5939             {
5940               if (null_byte_found)
5941                 {
5942                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
5943                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
5944                 }
5945               for (i = 0; i < coding_category_raw_text; i++)
5946                 {
5947                   category = coding_priorities[i];
5948                   this = coding_categories + category;
5949                   if (this->id < 0)
5950                     {
5951                       /* No coding system of this category is defined.  */
5952                       detect_info.rejected |= (1 << category);
5953                     }
5954                   else if (category >= coding_category_raw_text)
5955                     continue;
5956                   else if (detect_info.checked & (1 << category))
5957                     {
5958                       if (detect_info.found & (1 << category))
5959                         break;
5960                     }
5961                   else if ((*(this->detector)) (coding, &detect_info)
5962                            && detect_info.found & (1 << category))
5963                     {
5964                       if (category == coding_category_utf_16_auto)
5965                         {
5966                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5967                             category = coding_category_utf_16_le;
5968                           else
5969                             category = coding_category_utf_16_be;
5970                         }
5971                       break;
5972                     }
5973                 }
5974             }
5975
5976           if (i < coding_category_raw_text)
5977             setup_coding_system (CODING_ID_NAME (this->id), coding);
5978           else if (null_byte_found)
5979             setup_coding_system (Qno_conversion, coding);
5980           else if ((detect_info.rejected & CATEGORY_MASK_ANY)
5981                    == CATEGORY_MASK_ANY)
5982             setup_coding_system (Qraw_text, coding);
5983           else if (detect_info.rejected)
5984             for (i = 0; i < coding_category_raw_text; i++)
5985               if (! (detect_info.rejected & (1 << coding_priorities[i])))
5986                 {
5987                   this = coding_categories + coding_priorities[i];
5988                   setup_coding_system (CODING_ID_NAME (this->id), coding);
5989                   break;
5990                 }
5991         }
5992     }
5993   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
5994            == coding_category_utf_8_auto)
5995     {
5996       Lisp_Object coding_systems;
5997       struct coding_detection_info detect_info;
5998
5999       coding_systems
6000         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6001       detect_info.found = detect_info.rejected = 0;
6002       coding->head_ascii = 0;
6003       if (CONSP (coding_systems)
6004           && detect_coding_utf_8 (coding, &detect_info))
6005         {
6006           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
6007             setup_coding_system (XCAR (coding_systems), coding);
6008           else
6009             setup_coding_system (XCDR (coding_systems), coding);
6010         }
6011     }
6012   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6013            == coding_category_utf_16_auto)
6014     {
6015       Lisp_Object coding_systems;
6016       struct coding_detection_info detect_info;
6017
6018       coding_systems
6019         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6020       detect_info.found = detect_info.rejected = 0;
6021       coding->head_ascii = 0;
6022       if (CONSP (coding_systems)
6023           && detect_coding_utf_16 (coding, &detect_info))
6024         {
6025           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6026             setup_coding_system (XCAR (coding_systems), coding);
6027           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
6028             setup_coding_system (XCDR (coding_systems), coding);
6029         }
6030     }
6031 }
6032
6033
6034 static void
6035 decode_eol (coding)
6036      struct coding_system *coding;
6037 {
6038   Lisp_Object eol_type;
6039   unsigned char *p, *pbeg, *pend;
6040
6041   eol_type = CODING_ID_EOL_TYPE (coding->id);
6042   if (EQ (eol_type, Qunix))
6043     return;
6044
6045   if (NILP (coding->dst_object))
6046     pbeg = coding->destination;
6047   else
6048     pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
6049   pend = pbeg + coding->produced;
6050
6051   if (VECTORP (eol_type))
6052     {
6053       int eol_seen = EOL_SEEN_NONE;
6054
6055       for (p = pbeg; p < pend; p++)
6056         {
6057           if (*p == '\n')
6058             eol_seen |= EOL_SEEN_LF;
6059           else if (*p == '\r')
6060             {
6061               if (p + 1 < pend && *(p + 1) == '\n')
6062                 {
6063                   eol_seen |= EOL_SEEN_CRLF;
6064                   p++;
6065                 }
6066               else
6067                 eol_seen |= EOL_SEEN_CR;
6068             }
6069         }
6070       if (eol_seen != EOL_SEEN_NONE
6071           && eol_seen != EOL_SEEN_LF
6072           && eol_seen != EOL_SEEN_CRLF
6073           && eol_seen != EOL_SEEN_CR)
6074         eol_seen = EOL_SEEN_LF;
6075       if (eol_seen != EOL_SEEN_NONE)
6076         eol_type = adjust_coding_eol_type (coding, eol_seen);
6077     }
6078
6079   if (EQ (eol_type, Qmac))
6080     {
6081       for (p = pbeg; p < pend; p++)
6082         if (*p == '\r')
6083           *p = '\n';
6084     }
6085   else if (EQ (eol_type, Qdos))
6086     {
6087       int n = 0;
6088
6089       if (NILP (coding->dst_object))
6090         {
6091           /* Start deleting '\r' from the tail to minimize the memory
6092              movement.  */
6093           for (p = pend - 2; p >= pbeg; p--)
6094             if (*p == '\r')
6095               {
6096                 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
6097                 n++;
6098               }
6099         }
6100       else
6101         {
6102           int pos_byte = coding->dst_pos_byte;
6103           int pos = coding->dst_pos;
6104           int pos_end = pos + coding->produced_char - 1;
6105
6106           while (pos < pos_end)
6107             {
6108               p = BYTE_POS_ADDR (pos_byte);
6109               if (*p == '\r' && p[1] == '\n')
6110                 {
6111                   del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
6112                   n++;
6113                   pos_end--;
6114                 }
6115               pos++;
6116               if (coding->dst_multibyte)
6117                 pos_byte += BYTES_BY_CHAR_HEAD (*p);
6118               else
6119                 pos_byte++;
6120             }
6121         }
6122       coding->produced -= n;
6123       coding->produced_char -= n;
6124     }
6125 }
6126
6127
6128 /* Return a translation table (or list of them) from coding system
6129    attribute vector ATTRS for encoding (ENCODEP is nonzero) or
6130    decoding (ENCODEP is zero). */
6131
6132 static Lisp_Object
6133 get_translation_table (attrs, encodep, max_lookup)
6134      Lisp_Object attrs;
6135      int encodep, *max_lookup;
6136 {
6137   Lisp_Object standard, translation_table;
6138   Lisp_Object val;
6139
6140   if (encodep)
6141     translation_table = CODING_ATTR_ENCODE_TBL (attrs),
6142       standard = Vstandard_translation_table_for_encode;
6143   else
6144     translation_table = CODING_ATTR_DECODE_TBL (attrs),
6145       standard = Vstandard_translation_table_for_decode;
6146   if (NILP (translation_table))
6147     translation_table = standard;
6148   else
6149     {
6150       if (SYMBOLP (translation_table))
6151         translation_table = Fget (translation_table, Qtranslation_table);
6152       else if (CONSP (translation_table))
6153         {
6154           translation_table = Fcopy_sequence (translation_table);
6155           for (val = translation_table; CONSP (val); val = XCDR (val))
6156             if (SYMBOLP (XCAR (val)))
6157               XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
6158         }
6159       if (CHAR_TABLE_P (standard))
6160         {
6161           if (CONSP (translation_table))
6162             translation_table = nconc2 (translation_table,
6163                                         Fcons (standard, Qnil));
6164           else
6165             translation_table = Fcons (translation_table,
6166                                        Fcons (standard, Qnil));
6167         }
6168     }
6169
6170   if (max_lookup)
6171     {
6172       *max_lookup = 1;
6173       if (CHAR_TABLE_P (translation_table)
6174           && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
6175         {
6176           val = XCHAR_TABLE (translation_table)->extras[1];
6177           if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6178             *max_lookup = XFASTINT (val);
6179         }
6180       else if (CONSP (translation_table))
6181         {
6182           Lisp_Object tail, val;
6183
6184           for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
6185             if (CHAR_TABLE_P (XCAR (tail))
6186                 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
6187               {
6188                 val = XCHAR_TABLE (XCAR (tail))->extras[1];
6189                 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6190                   *max_lookup = XFASTINT (val);
6191               }
6192         }
6193     }
6194   return translation_table;
6195 }
6196
6197 #define LOOKUP_TRANSLATION_TABLE(table, c, trans)               \
6198   do {                                                          \
6199     trans = Qnil;                                               \
6200     if (CHAR_TABLE_P (table))                                   \
6201       {                                                         \
6202         trans = CHAR_TABLE_REF (table, c);                      \
6203         if (CHARACTERP (trans))                                 \
6204           c = XFASTINT (trans), trans = Qnil;                   \
6205       }                                                         \
6206     else if (CONSP (table))                                     \
6207       {                                                         \
6208         Lisp_Object tail;                                       \
6209                                                                 \
6210         for (tail = table; CONSP (tail); tail = XCDR (tail))    \
6211           if (CHAR_TABLE_P (XCAR (tail)))                       \
6212             {                                                   \
6213               trans = CHAR_TABLE_REF (XCAR (tail), c);          \
6214               if (CHARACTERP (trans))                           \
6215                 c = XFASTINT (trans), trans = Qnil;             \
6216               else if (! NILP (trans))                          \
6217                 break;                                          \
6218             }                                                   \
6219       }                                                         \
6220   } while (0)
6221
6222
6223 static Lisp_Object
6224 get_translation (val, buf, buf_end, last_block, from_nchars, to_nchars)
6225      Lisp_Object val;
6226      int *buf, *buf_end;
6227      int last_block;
6228      int *from_nchars, *to_nchars;
6229 {
6230   /* VAL is TO or (([FROM-CHAR ...] .  TO) ...) where TO is TO-CHAR or
6231      [TO-CHAR ...].  */
6232   if (CONSP (val))
6233     {
6234       Lisp_Object from, tail;
6235       int i, len;
6236
6237       for (tail = val; CONSP (tail); tail = XCDR (tail))
6238         {
6239           val = XCAR (tail);
6240           from = XCAR (val);
6241           len = ASIZE (from);
6242           for (i = 0; i < len; i++)
6243             {
6244               if (buf + i == buf_end)
6245                 {
6246                   if (! last_block)
6247                     return Qt;
6248                   break;
6249                 }
6250               if (XINT (AREF (from, i)) != buf[i])
6251                 break;
6252             }
6253           if (i == len)
6254             {
6255               val = XCDR (val);
6256               *from_nchars = len;
6257               break;
6258             }
6259         }
6260       if (! CONSP (tail))
6261         return Qnil;
6262     }
6263   if (VECTORP (val))
6264     *buf = XINT (AREF (val, 0)), *to_nchars = ASIZE (val);
6265   else
6266     *buf = XINT (val);
6267   return val;
6268 }
6269
6270
6271 static int
6272 produce_chars (coding, translation_table, last_block)
6273      struct coding_system *coding;
6274      Lisp_Object translation_table;
6275      int last_block;
6276 {
6277   unsigned char *dst = coding->destination + coding->produced;
6278   unsigned char *dst_end = coding->destination + coding->dst_bytes;
6279   EMACS_INT produced;
6280   EMACS_INT produced_chars = 0;
6281   int carryover = 0;
6282
6283   if (! coding->chars_at_source)
6284     {
6285       /* Source characters are in coding->charbuf.  */
6286       int *buf = coding->charbuf;
6287       int *buf_end = buf + coding->charbuf_used;
6288
6289       if (EQ (coding->src_object, coding->dst_object))
6290         {
6291           coding_set_source (coding);
6292           dst_end = ((unsigned char *) coding->source) + coding->consumed;
6293         }
6294
6295       while (buf < buf_end)
6296         {
6297           int c = *buf, i;
6298
6299           if (c >= 0)
6300             {
6301               int from_nchars = 1, to_nchars = 1;
6302               Lisp_Object trans = Qnil;
6303
6304               LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6305               if (! NILP (trans))
6306                 {
6307                   trans = get_translation (trans, buf, buf_end, last_block,
6308                                            &from_nchars, &to_nchars);
6309                   if (EQ (trans, Qt))
6310                     break;
6311                   c = *buf;
6312                 }
6313
6314               if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
6315                 {
6316                   dst = alloc_destination (coding,
6317                                            buf_end - buf
6318                                            + MAX_MULTIBYTE_LENGTH * to_nchars,
6319                                            dst);
6320                   if (EQ (coding->src_object, coding->dst_object))
6321                     {
6322                       coding_set_source (coding);
6323                       dst_end = ((unsigned char *) coding->source) + coding->consumed;
6324                     }
6325                   else
6326                     dst_end = coding->destination + coding->dst_bytes;
6327                 }
6328
6329               for (i = 0; i < to_nchars; i++)
6330                 {
6331                   if (i > 0)
6332                     c = XINT (AREF (trans, i));
6333                   if (coding->dst_multibyte
6334                       || ! CHAR_BYTE8_P (c))
6335                     CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
6336                   else
6337                     *dst++ = CHAR_TO_BYTE8 (c);
6338                 }
6339               produced_chars += to_nchars;
6340               *buf++ = to_nchars;
6341               while (--from_nchars > 0)
6342                 *buf++ = 0;
6343             }
6344           else
6345             /* This is an annotation datum.  (-C) is the length.  */
6346             buf += -c;
6347         }
6348       carryover = buf_end - buf;
6349     }
6350   else
6351     {
6352       /* Source characters are at coding->source.  */
6353       const unsigned char *src = coding->source;
6354       const unsigned char *src_end = src + coding->consumed;
6355
6356       if (EQ (coding->dst_object, coding->src_object))
6357         dst_end = (unsigned char *) src;
6358       if (coding->src_multibyte != coding->dst_multibyte)
6359         {
6360           if (coding->src_multibyte)
6361             {
6362               int multibytep = 1;
6363               EMACS_INT consumed_chars = 0;
6364
6365               while (1)
6366                 {
6367                   const unsigned char *src_base = src;
6368                   int c;
6369
6370                   ONE_MORE_BYTE (c);
6371                   if (dst == dst_end)
6372                     {
6373                       if (EQ (coding->src_object, coding->dst_object))
6374                         dst_end = (unsigned char *) src;
6375                       if (dst == dst_end)
6376                         {
6377                           EMACS_INT offset = src - coding->source;
6378
6379                           dst = alloc_destination (coding, src_end - src + 1,
6380                                                    dst);
6381                           dst_end = coding->destination + coding->dst_bytes;
6382                           coding_set_source (coding);
6383                           src = coding->source + offset;
6384                           src_end = coding->source + coding->src_bytes;
6385                           if (EQ (coding->src_object, coding->dst_object))
6386                             dst_end = (unsigned char *) src;
6387                         }
6388                     }
6389                   *dst++ = c;
6390                   produced_chars++;
6391                 }
6392             no_more_source:
6393               ;
6394             }
6395           else
6396             while (src < src_end)
6397               {
6398                 int multibytep = 1;
6399                 int c = *src++;
6400
6401                 if (dst >= dst_end - 1)
6402                   {
6403                     if (EQ (coding->src_object, coding->dst_object))
6404                       dst_end = (unsigned char *) src;
6405                     if (dst >= dst_end - 1)
6406                       {
6407                         EMACS_INT offset = src - coding->source;
6408                         EMACS_INT more_bytes;
6409
6410                         if (EQ (coding->src_object, coding->dst_object))
6411                           more_bytes = ((src_end - src) / 2) + 2;
6412                         else
6413                           more_bytes = src_end - src + 2;
6414                         dst = alloc_destination (coding, more_bytes, dst);
6415                         dst_end = coding->destination + coding->dst_bytes;
6416                         coding_set_source (coding);
6417                         src = coding->source + offset;
6418                         src_end = coding->source + coding->src_bytes;
6419                         if (EQ (coding->src_object, coding->dst_object))
6420                           dst_end = (unsigned char *) src;
6421                       }
6422                   }
6423                 EMIT_ONE_BYTE (c);
6424               }
6425         }
6426       else
6427         {
6428           if (!EQ (coding->src_object, coding->dst_object))
6429             {
6430               EMACS_INT require = coding->src_bytes - coding->dst_bytes;
6431
6432               if (require > 0)
6433                 {
6434                   EMACS_INT offset = src - coding->source;
6435
6436                   dst = alloc_destination (coding, require, dst);
6437                   coding_set_source (coding);
6438                   src = coding->source + offset;
6439                   src_end = coding->source + coding->src_bytes;
6440                 }
6441             }
6442           produced_chars = coding->consumed_char;
6443           while (src < src_end)
6444             *dst++ = *src++;
6445         }
6446     }
6447
6448   produced = dst - (coding->destination + coding->produced);
6449   if (BUFFERP (coding->dst_object) && produced_chars > 0)
6450     insert_from_gap (produced_chars, produced);
6451   coding->produced += produced;
6452   coding->produced_char += produced_chars;
6453   return carryover;
6454 }
6455
6456 /* Compose text in CODING->object according to the annotation data at
6457    CHARBUF.  CHARBUF is an array:
6458      [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
6459  */
6460
6461 static INLINE void
6462 produce_composition (coding, charbuf, pos)
6463      struct coding_system *coding;
6464      int *charbuf;
6465      EMACS_INT pos;
6466 {
6467   int len;
6468   EMACS_INT to;
6469   enum composition_method method;
6470   Lisp_Object components;
6471
6472   len = -charbuf[0];
6473   to = pos + charbuf[2];
6474   if (to <= pos)
6475     return;
6476   method = (enum composition_method) (charbuf[3]);
6477
6478   if (method == COMPOSITION_RELATIVE)
6479     components = Qnil;
6480   else if (method >= COMPOSITION_WITH_RULE
6481            && method <= COMPOSITION_WITH_RULE_ALTCHARS)
6482     {
6483       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
6484       int i;
6485
6486       len -= 4;
6487       charbuf += 4;
6488       for (i = 0; i < len; i++)
6489         {
6490           args[i] = make_number (charbuf[i]);
6491           if (charbuf[i] < 0)
6492             return;
6493         }
6494       components = (method == COMPOSITION_WITH_ALTCHARS
6495                     ? Fstring (len, args) : Fvector (len, args));
6496     }
6497   else
6498     return;
6499   compose_text (pos, to, components, Qnil, coding->dst_object);
6500 }
6501
6502
6503 /* Put `charset' property on text in CODING->object according to
6504    the annotation data at CHARBUF.  CHARBUF is an array:
6505      [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
6506  */
6507
6508 static INLINE void
6509 produce_charset (coding, charbuf, pos)
6510      struct coding_system *coding;
6511      int *charbuf;
6512      EMACS_INT pos;
6513 {
6514   EMACS_INT from = pos - charbuf[2];
6515   struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
6516
6517   Fput_text_property (make_number (from), make_number (pos),
6518                       Qcharset, CHARSET_NAME (charset),
6519                       coding->dst_object);
6520 }
6521
6522
6523 #define CHARBUF_SIZE 0x4000
6524
6525 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
6526   do {                                                                  \
6527     int size = CHARBUF_SIZE;;                                           \
6528                                                                         \
6529     coding->charbuf = NULL;                                             \
6530     while (size > 1024)                                                 \
6531       {                                                                 \
6532         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
6533         if (coding->charbuf)                                            \
6534           break;                                                        \
6535         size >>= 1;                                                     \
6536       }                                                                 \
6537     if (! coding->charbuf)                                              \
6538       {                                                                 \
6539         record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
6540         return coding->result;                                          \
6541       }                                                                 \
6542     coding->charbuf_size = size;                                        \
6543   } while (0)
6544
6545
6546 static void
6547 produce_annotation (coding, pos)
6548      struct coding_system *coding;
6549      EMACS_INT pos;
6550 {
6551   int *charbuf = coding->charbuf;
6552   int *charbuf_end = charbuf + coding->charbuf_used;
6553
6554   if (NILP (coding->dst_object))
6555     return;
6556
6557   while (charbuf < charbuf_end)
6558     {
6559       if (*charbuf >= 0)
6560         pos += *charbuf++;
6561       else
6562         {
6563           int len = -*charbuf;
6564           switch (charbuf[1])
6565             {
6566             case CODING_ANNOTATE_COMPOSITION_MASK:
6567               produce_composition (coding, charbuf, pos);
6568               break;
6569             case CODING_ANNOTATE_CHARSET_MASK:
6570               produce_charset (coding, charbuf, pos);
6571               break;
6572             default:
6573               abort ();
6574             }
6575           charbuf += len;
6576         }
6577     }
6578 }
6579
6580 /* Decode the data at CODING->src_object into CODING->dst_object.
6581    CODING->src_object is a buffer, a string, or nil.
6582    CODING->dst_object is a buffer.
6583
6584    If CODING->src_object is a buffer, it must be the current buffer.
6585    In this case, if CODING->src_pos is positive, it is a position of
6586    the source text in the buffer, otherwise, the source text is in the
6587    gap area of the buffer, and CODING->src_pos specifies the offset of
6588    the text from GPT (which must be the same as PT).  If this is the
6589    same buffer as CODING->dst_object, CODING->src_pos must be
6590    negative.
6591
6592    If CODING->src_object is a string, CODING->src_pos is an index to
6593    that string.
6594
6595    If CODING->src_object is nil, CODING->source must already point to
6596    the non-relocatable memory area.  In this case, CODING->src_pos is
6597    an offset from CODING->source.
6598
6599    The decoded data is inserted at the current point of the buffer
6600    CODING->dst_object.
6601 */
6602
6603 static int
6604 decode_coding (coding)
6605      struct coding_system *coding;
6606 {
6607   Lisp_Object attrs;
6608   Lisp_Object undo_list;
6609   Lisp_Object translation_table;
6610   int carryover;
6611   int i;
6612
6613   if (BUFFERP (coding->src_object)
6614       && coding->src_pos > 0
6615       && coding->src_pos < GPT
6616       && coding->src_pos + coding->src_chars > GPT)
6617     move_gap_both (coding->src_pos, coding->src_pos_byte);
6618
6619   undo_list = Qt;
6620   if (BUFFERP (coding->dst_object))
6621     {
6622       if (current_buffer != XBUFFER (coding->dst_object))
6623         set_buffer_internal (XBUFFER (coding->dst_object));
6624       if (GPT != PT)
6625         move_gap_both (PT, PT_BYTE);
6626       undo_list = current_buffer->undo_list;
6627       current_buffer->undo_list = Qt;
6628     }
6629
6630   coding->consumed = coding->consumed_char = 0;
6631   coding->produced = coding->produced_char = 0;
6632   coding->chars_at_source = 0;
6633   record_conversion_result (coding, CODING_RESULT_SUCCESS);
6634   coding->errors = 0;
6635
6636   ALLOC_CONVERSION_WORK_AREA (coding);
6637
6638   attrs = CODING_ID_ATTRS (coding->id);
6639   translation_table = get_translation_table (attrs, 0, NULL);
6640
6641   carryover = 0;
6642   do
6643     {
6644       EMACS_INT pos = coding->dst_pos + coding->produced_char;
6645
6646       coding_set_source (coding);
6647       coding->annotated = 0;
6648       coding->charbuf_used = carryover;
6649       (*(coding->decoder)) (coding);
6650       coding_set_destination (coding);
6651       carryover = produce_chars (coding, translation_table, 0);
6652       if (coding->annotated)
6653         produce_annotation (coding, pos);
6654       for (i = 0; i < carryover; i++)
6655         coding->charbuf[i]
6656           = coding->charbuf[coding->charbuf_used - carryover + i];
6657     }
6658   while (coding->consumed < coding->src_bytes
6659          && (coding->result == CODING_RESULT_SUCCESS
6660              || coding->result == CODING_RESULT_INVALID_SRC));
6661
6662   if (carryover > 0)
6663     {
6664       coding_set_destination (coding);
6665       coding->charbuf_used = carryover;
6666       produce_chars (coding, translation_table, 1);
6667     }
6668
6669   coding->carryover_bytes = 0;
6670   if (coding->consumed < coding->src_bytes)
6671     {
6672       int nbytes = coding->src_bytes - coding->consumed;
6673       const unsigned char *src;
6674
6675       coding_set_source (coding);
6676       coding_set_destination (coding);
6677       src = coding->source + coding->consumed;
6678
6679       if (coding->mode & CODING_MODE_LAST_BLOCK)
6680         {
6681           /* Flush out unprocessed data as binary chars.  We are sure
6682              that the number of data is less than the size of
6683              coding->charbuf.  */
6684           coding->charbuf_used = 0;
6685           coding->chars_at_source = 0;
6686
6687           while (nbytes-- > 0)
6688             {
6689               int c = *src++;
6690
6691               if (c & 0x80)
6692                 c = BYTE8_TO_CHAR (c);
6693               coding->charbuf[coding->charbuf_used++] = c;
6694             }
6695           produce_chars (coding, Qnil, 1);
6696         }
6697       else
6698         {
6699           /* Record unprocessed bytes in coding->carryover.  We are
6700              sure that the number of data is less than the size of
6701              coding->carryover.  */
6702           unsigned char *p = coding->carryover;
6703
6704           coding->carryover_bytes = nbytes;
6705           while (nbytes-- > 0)
6706             *p++ = *src++;
6707         }
6708       coding->consumed = coding->src_bytes;
6709     }
6710
6711   if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
6712     decode_eol (coding);
6713   if (BUFFERP (coding->dst_object))
6714     {
6715       current_buffer->undo_list = undo_list;
6716       record_insert (coding->dst_pos, coding->produced_char);
6717     }
6718   return coding->result;
6719 }
6720
6721
6722 /* Extract an annotation datum from a composition starting at POS and
6723    ending before LIMIT of CODING->src_object (buffer or string), store
6724    the data in BUF, set *STOP to a starting position of the next
6725    composition (if any) or to LIMIT, and return the address of the
6726    next element of BUF.
6727
6728    If such an annotation is not found, set *STOP to a starting
6729    position of a composition after POS (if any) or to LIMIT, and
6730    return BUF.  */
6731
6732 static INLINE int *
6733 handle_composition_annotation (pos, limit, coding, buf, stop)
6734      EMACS_INT pos, limit;
6735      struct coding_system *coding;
6736      int *buf;
6737      EMACS_INT *stop;
6738 {
6739   EMACS_INT start, end;
6740   Lisp_Object prop;
6741
6742   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
6743       || end > limit)
6744     *stop = limit;
6745   else if (start > pos)
6746     *stop = start;
6747   else
6748     {
6749       if (start == pos)
6750         {
6751           /* We found a composition.  Store the corresponding
6752              annotation data in BUF.  */
6753           int *head = buf;
6754           enum composition_method method = COMPOSITION_METHOD (prop);
6755           int nchars = COMPOSITION_LENGTH (prop);
6756
6757           ADD_COMPOSITION_DATA (buf, nchars, method);
6758           if (method != COMPOSITION_RELATIVE)
6759             {
6760               Lisp_Object components;
6761               int len, i, i_byte;
6762
6763               components = COMPOSITION_COMPONENTS (prop);
6764               if (VECTORP (components))
6765                 {
6766                   len = XVECTOR (components)->size;
6767                   for (i = 0; i < len; i++)
6768                     *buf++ = XINT (AREF (components, i));
6769                 }
6770               else if (STRINGP (components))
6771                 {
6772                   len = SCHARS (components);
6773                   i = i_byte = 0;
6774                   while (i < len)
6775                     {
6776                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
6777                       buf++;
6778                     }
6779                 }
6780               else if (INTEGERP (components))
6781                 {
6782                   len = 1;
6783                   *buf++ = XINT (components);
6784                 }
6785               else if (CONSP (components))
6786                 {
6787                   for (len = 0; CONSP (components);
6788                        len++, components = XCDR (components))
6789                     *buf++ = XINT (XCAR (components));
6790                 }
6791               else
6792                 abort ();
6793               *head -= len;
6794             }
6795         }
6796
6797       if (find_composition (end, limit, &start, &end, &prop,
6798                             coding->src_object)
6799           && end <= limit)
6800         *stop = start;
6801       else
6802         *stop = limit;
6803     }
6804   return buf;
6805 }
6806
6807
6808 /* Extract an annotation datum from a text property `charset' at POS of
6809    CODING->src_object (buffer of string), store the data in BUF, set
6810    *STOP to the position where the value of `charset' property changes
6811    (limiting by LIMIT), and return the address of the next element of
6812    BUF.
6813
6814    If the property value is nil, set *STOP to the position where the
6815    property value is non-nil (limiting by LIMIT), and return BUF.  */
6816
6817 static INLINE int *
6818 handle_charset_annotation (pos, limit, coding, buf, stop)
6819      EMACS_INT pos, limit;
6820      struct coding_system *coding;
6821      int *buf;
6822      EMACS_INT *stop;
6823 {
6824   Lisp_Object val, next;
6825   int id;
6826
6827   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
6828   if (! NILP (val) && CHARSETP (val))
6829     id = XINT (CHARSET_SYMBOL_ID (val));
6830   else
6831     id = -1;
6832   ADD_CHARSET_DATA (buf, 0, id);
6833   next = Fnext_single_property_change (make_number (pos), Qcharset,
6834                                        coding->src_object,
6835                                        make_number (limit));
6836   *stop = XINT (next);
6837   return buf;
6838 }
6839
6840
6841 static void
6842 consume_chars (coding, translation_table, max_lookup)
6843      struct coding_system *coding;
6844      Lisp_Object translation_table;
6845      int max_lookup;
6846 {
6847   int *buf = coding->charbuf;
6848   int *buf_end = coding->charbuf + coding->charbuf_size;
6849   const unsigned char *src = coding->source + coding->consumed;
6850   const unsigned char *src_end = coding->source + coding->src_bytes;
6851   EMACS_INT pos = coding->src_pos + coding->consumed_char;
6852   EMACS_INT end_pos = coding->src_pos + coding->src_chars;
6853   int multibytep = coding->src_multibyte;
6854   Lisp_Object eol_type;
6855   int c;
6856   EMACS_INT stop, stop_composition, stop_charset;
6857   int *lookup_buf = NULL;
6858
6859   if (! NILP (translation_table))
6860     lookup_buf = alloca (sizeof (int) * max_lookup);
6861
6862   eol_type = CODING_ID_EOL_TYPE (coding->id);
6863   if (VECTORP (eol_type))
6864     eol_type = Qunix;
6865
6866   /* Note: composition handling is not yet implemented.  */
6867   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
6868
6869   if (NILP (coding->src_object))
6870     stop = stop_composition = stop_charset = end_pos;
6871   else
6872     {
6873       if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
6874         stop = stop_composition = pos;
6875       else
6876         stop = stop_composition = end_pos;
6877       if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
6878         stop = stop_charset = pos;
6879       else
6880         stop_charset = end_pos;
6881     }
6882
6883   /* Compensate for CRLF and conversion.  */
6884   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
6885   while (buf < buf_end)
6886     {
6887       Lisp_Object trans;
6888
6889       if (pos == stop)
6890         {
6891           if (pos == end_pos)
6892             break;
6893           if (pos == stop_composition)
6894             buf = handle_composition_annotation (pos, end_pos, coding,
6895                                                  buf, &stop_composition);
6896           if (pos == stop_charset)
6897             buf = handle_charset_annotation (pos, end_pos, coding,
6898                                              buf, &stop_charset);
6899           stop = (stop_composition < stop_charset
6900                   ? stop_composition : stop_charset);
6901         }
6902
6903       if (! multibytep)
6904         {
6905           EMACS_INT bytes;
6906
6907           if (coding->encoder == encode_coding_raw_text)
6908             c = *src++, pos++;
6909           else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
6910             c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
6911           else
6912             c = BYTE8_TO_CHAR (*src), src++, pos++;
6913         }
6914       else
6915         c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
6916       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
6917         c = '\n';
6918       if (! EQ (eol_type, Qunix))
6919         {
6920           if (c == '\n')
6921             {
6922               if (EQ (eol_type, Qdos))
6923                 *buf++ = '\r';
6924               else
6925                 c = '\r';
6926             }
6927         }
6928
6929       trans = Qnil;
6930       LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6931       if (NILP (trans))
6932         *buf++ = c;
6933       else
6934         {
6935           int from_nchars = 1, to_nchars = 1;
6936           int *lookup_buf_end;
6937           const unsigned char *p = src;
6938           int i;
6939
6940           lookup_buf[0] = c;
6941           for (i = 1; i < max_lookup && p < src_end; i++)
6942             lookup_buf[i] = STRING_CHAR_ADVANCE (p);
6943           lookup_buf_end = lookup_buf + i;
6944           trans = get_translation (trans, lookup_buf, lookup_buf_end, 1,
6945                                    &from_nchars, &to_nchars);
6946           if (EQ (trans, Qt)
6947               || buf + to_nchars > buf_end)
6948             break;
6949           *buf++ = *lookup_buf;
6950           for (i = 1; i < to_nchars; i++)
6951             *buf++ = XINT (AREF (trans, i));
6952           for (i = 1; i < from_nchars; i++, pos++)
6953             src += MULTIBYTE_LENGTH_NO_CHECK (src);
6954         }
6955     }
6956
6957   coding->consumed = src - coding->source;
6958   coding->consumed_char = pos - coding->src_pos;
6959   coding->charbuf_used = buf - coding->charbuf;
6960   coding->chars_at_source = 0;
6961 }
6962
6963
6964 /* Encode the text at CODING->src_object into CODING->dst_object.
6965    CODING->src_object is a buffer or a string.
6966    CODING->dst_object is a buffer or nil.
6967
6968    If CODING->src_object is a buffer, it must be the current buffer.
6969    In this case, if CODING->src_pos is positive, it is a position of
6970    the source text in the buffer, otherwise. the source text is in the
6971    gap area of the buffer, and coding->src_pos specifies the offset of
6972    the text from GPT (which must be the same as PT).  If this is the
6973    same buffer as CODING->dst_object, CODING->src_pos must be
6974    negative and CODING should not have `pre-write-conversion'.
6975
6976    If CODING->src_object is a string, CODING should not have
6977    `pre-write-conversion'.
6978
6979    If CODING->dst_object is a buffer, the encoded data is inserted at
6980    the current point of that buffer.
6981
6982    If CODING->dst_object is nil, the encoded data is placed at the
6983    memory area specified by CODING->destination.  */
6984
6985 static int
6986 encode_coding (coding)
6987      struct coding_system *coding;
6988 {
6989   Lisp_Object attrs;
6990   Lisp_Object translation_table;
6991   int max_lookup;
6992
6993   attrs = CODING_ID_ATTRS (coding->id);
6994   if (coding->encoder == encode_coding_raw_text)
6995     translation_table = Qnil, max_lookup = 0;
6996   else
6997     translation_table = get_translation_table (attrs, 1, &max_lookup);
6998
6999   if (BUFFERP (coding->dst_object))
7000     {
7001       set_buffer_internal (XBUFFER (coding->dst_object));
7002       coding->dst_multibyte
7003         = ! NILP (current_buffer->enable_multibyte_characters);
7004     }
7005
7006   coding->consumed = coding->consumed_char = 0;
7007   coding->produced = coding->produced_char = 0;
7008   record_conversion_result (coding, CODING_RESULT_SUCCESS);
7009   coding->errors = 0;
7010
7011   ALLOC_CONVERSION_WORK_AREA (coding);
7012
7013   do {
7014     coding_set_source (coding);
7015     consume_chars (coding, translation_table, max_lookup);
7016     coding_set_destination (coding);
7017     (*(coding->encoder)) (coding);
7018   } while (coding->consumed_char < coding->src_chars);
7019
7020   if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
7021     insert_from_gap (coding->produced_char, coding->produced);
7022
7023   return (coding->result);
7024 }
7025
7026
7027 /* Name (or base name) of work buffer for code conversion.  */
7028 static Lisp_Object Vcode_conversion_workbuf_name;
7029
7030 /* A working buffer used by the top level conversion.  Once it is
7031    created, it is never destroyed.  It has the name
7032    Vcode_conversion_workbuf_name.  The other working buffers are
7033    destroyed after the use is finished, and their names are modified
7034    versions of Vcode_conversion_workbuf_name.  */
7035 static Lisp_Object Vcode_conversion_reused_workbuf;
7036
7037 /* 1 iff Vcode_conversion_reused_workbuf is already in use.  */
7038 static int reused_workbuf_in_use;
7039
7040
7041 /* Return a working buffer of code convesion.  MULTIBYTE specifies the
7042    multibyteness of returning buffer.  */
7043
7044 static Lisp_Object
7045 make_conversion_work_buffer (multibyte)
7046      int multibyte;
7047 {
7048   Lisp_Object name, workbuf;
7049   struct buffer *current;
7050
7051   if (reused_workbuf_in_use++)
7052     {
7053       name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
7054       workbuf = Fget_buffer_create (name);
7055     }
7056   else
7057     {
7058       if (NILP (Fbuffer_live_p (Vcode_conversion_reused_workbuf)))
7059         Vcode_conversion_reused_workbuf
7060           = Fget_buffer_create (Vcode_conversion_workbuf_name);
7061       workbuf = Vcode_conversion_reused_workbuf;
7062     }
7063   current = current_buffer;
7064   set_buffer_internal (XBUFFER (workbuf));
7065   /* We can't allow modification hooks to run in the work buffer.  For
7066      instance, directory_files_internal assumes that file decoding
7067      doesn't compile new regexps.  */
7068   Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt);
7069   Ferase_buffer ();
7070   current_buffer->undo_list = Qt;
7071   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
7072   set_buffer_internal (current);
7073   return workbuf;
7074 }
7075
7076
7077 static Lisp_Object
7078 code_conversion_restore (arg)
7079      Lisp_Object arg;
7080 {
7081   Lisp_Object current, workbuf;
7082   struct gcpro gcpro1;
7083
7084   GCPRO1 (arg);
7085   current = XCAR (arg);
7086   workbuf = XCDR (arg);
7087   if (! NILP (workbuf))
7088     {
7089       if (EQ (workbuf, Vcode_conversion_reused_workbuf))
7090         reused_workbuf_in_use = 0;
7091       else if (! NILP (Fbuffer_live_p (workbuf)))
7092         Fkill_buffer (workbuf);
7093     }
7094   set_buffer_internal (XBUFFER (current));
7095   UNGCPRO;
7096   return Qnil;
7097 }
7098
7099 Lisp_Object
7100 code_conversion_save (with_work_buf, multibyte)
7101      int with_work_buf, multibyte;
7102 {
7103   Lisp_Object workbuf = Qnil;
7104
7105   if (with_work_buf)
7106     workbuf = make_conversion_work_buffer (multibyte);
7107   record_unwind_protect (code_conversion_restore,
7108                          Fcons (Fcurrent_buffer (), workbuf));
7109   return workbuf;
7110 }
7111
7112 int
7113 decode_coding_gap (coding, chars, bytes)
7114      struct coding_system *coding;
7115      EMACS_INT chars, bytes;
7116 {
7117   int count = specpdl_ptr - specpdl;
7118   Lisp_Object attrs;
7119
7120   code_conversion_save (0, 0);
7121
7122   coding->src_object = Fcurrent_buffer ();
7123   coding->src_chars = chars;
7124   coding->src_bytes = bytes;
7125   coding->src_pos = -chars;
7126   coding->src_pos_byte = -bytes;
7127   coding->src_multibyte = chars < bytes;
7128   coding->dst_object = coding->src_object;
7129   coding->dst_pos = PT;
7130   coding->dst_pos_byte = PT_BYTE;
7131   coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
7132
7133   if (CODING_REQUIRE_DETECTION (coding))
7134     detect_coding (coding);
7135
7136   coding->mode |= CODING_MODE_LAST_BLOCK;
7137   current_buffer->text->inhibit_shrinking = 1;
7138   decode_coding (coding);
7139   current_buffer->text->inhibit_shrinking = 0;
7140
7141   attrs = CODING_ID_ATTRS (coding->id);
7142   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7143     {
7144       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7145       Lisp_Object val;
7146
7147       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7148       val = call1 (CODING_ATTR_POST_READ (attrs),
7149                    make_number (coding->produced_char));
7150       CHECK_NATNUM (val);
7151       coding->produced_char += Z - prev_Z;
7152       coding->produced += Z_BYTE - prev_Z_BYTE;
7153     }
7154
7155   unbind_to (count, Qnil);
7156   return coding->result;
7157 }
7158
7159 int
7160 encode_coding_gap (coding, chars, bytes)
7161      struct coding_system *coding;
7162      EMACS_INT chars, bytes;
7163 {
7164   int count = specpdl_ptr - specpdl;
7165
7166   code_conversion_save (0, 0);
7167
7168   coding->src_object = Fcurrent_buffer ();
7169   coding->src_chars = chars;
7170   coding->src_bytes = bytes;
7171   coding->src_pos = -chars;
7172   coding->src_pos_byte = -bytes;
7173   coding->src_multibyte = chars < bytes;
7174   coding->dst_object = coding->src_object;
7175   coding->dst_pos = PT;
7176   coding->dst_pos_byte = PT_BYTE;
7177
7178   encode_coding (coding);
7179
7180   unbind_to (count, Qnil);
7181   return coding->result;
7182 }
7183
7184
7185 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
7186    SRC_OBJECT into DST_OBJECT by coding context CODING.
7187
7188    SRC_OBJECT is a buffer, a string, or Qnil.
7189
7190    If it is a buffer, the text is at point of the buffer.  FROM and TO
7191    are positions in the buffer.
7192
7193    If it is a string, the text is at the beginning of the string.
7194    FROM and TO are indices to the string.
7195
7196    If it is nil, the text is at coding->source.  FROM and TO are
7197    indices to coding->source.
7198
7199    DST_OBJECT is a buffer, Qt, or Qnil.
7200
7201    If it is a buffer, the decoded text is inserted at point of the
7202    buffer.  If the buffer is the same as SRC_OBJECT, the source text
7203    is deleted.
7204
7205    If it is Qt, a string is made from the decoded text, and
7206    set in CODING->dst_object.
7207
7208    If it is Qnil, the decoded text is stored at CODING->destination.
7209    The caller must allocate CODING->dst_bytes bytes at
7210    CODING->destination by xmalloc.  If the decoded text is longer than
7211    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
7212  */
7213
7214 void
7215 decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7216                       dst_object)
7217      struct coding_system *coding;
7218      Lisp_Object src_object;
7219      EMACS_INT from, from_byte, to, to_byte;
7220      Lisp_Object dst_object;
7221 {
7222   int count = specpdl_ptr - specpdl;
7223   unsigned char *destination;
7224   EMACS_INT dst_bytes;
7225   EMACS_INT chars = to - from;
7226   EMACS_INT bytes = to_byte - from_byte;
7227   Lisp_Object attrs;
7228   int saved_pt = -1, saved_pt_byte;
7229   int need_marker_adjustment = 0;
7230   Lisp_Object old_deactivate_mark;
7231
7232   old_deactivate_mark = Vdeactivate_mark;
7233
7234   if (NILP (dst_object))
7235     {
7236       destination = coding->destination;
7237       dst_bytes = coding->dst_bytes;
7238     }
7239
7240   coding->src_object = src_object;
7241   coding->src_chars = chars;
7242   coding->src_bytes = bytes;
7243   coding->src_multibyte = chars < bytes;
7244
7245   if (STRINGP (src_object))
7246     {
7247       coding->src_pos = from;
7248       coding->src_pos_byte = from_byte;
7249     }
7250   else if (BUFFERP (src_object))
7251     {
7252       set_buffer_internal (XBUFFER (src_object));
7253       if (from != GPT)
7254         move_gap_both (from, from_byte);
7255       if (EQ (src_object, dst_object))
7256         {
7257           struct Lisp_Marker *tail;
7258
7259           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7260             {
7261               tail->need_adjustment
7262                 = tail->charpos == (tail->insertion_type ? from : to);
7263               need_marker_adjustment |= tail->need_adjustment;
7264             }
7265           saved_pt = PT, saved_pt_byte = PT_BYTE;
7266           TEMP_SET_PT_BOTH (from, from_byte);
7267           current_buffer->text->inhibit_shrinking = 1;
7268           del_range_both (from, from_byte, to, to_byte, 1);
7269           coding->src_pos = -chars;
7270           coding->src_pos_byte = -bytes;
7271         }
7272       else
7273         {
7274           coding->src_pos = from;
7275           coding->src_pos_byte = from_byte;
7276         }
7277     }
7278
7279   if (CODING_REQUIRE_DETECTION (coding))
7280     detect_coding (coding);
7281   attrs = CODING_ID_ATTRS (coding->id);
7282
7283   if (EQ (dst_object, Qt)
7284       || (! NILP (CODING_ATTR_POST_READ (attrs))
7285           && NILP (dst_object)))
7286     {
7287       coding->dst_multibyte = !CODING_FOR_UNIBYTE (coding);
7288       coding->dst_object = code_conversion_save (1, coding->dst_multibyte);
7289       coding->dst_pos = BEG;
7290       coding->dst_pos_byte = BEG_BYTE;
7291     }
7292   else if (BUFFERP (dst_object))
7293     {
7294       code_conversion_save (0, 0);
7295       coding->dst_object = dst_object;
7296       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7297       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7298       coding->dst_multibyte
7299         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
7300     }
7301   else
7302     {
7303       code_conversion_save (0, 0);
7304       coding->dst_object = Qnil;
7305       /* Most callers presume this will return a multibyte result, and they
7306          won't use `binary' or `raw-text' anyway, so let's not worry about
7307          CODING_FOR_UNIBYTE.  */
7308       coding->dst_multibyte = 1;
7309     }
7310
7311   decode_coding (coding);
7312
7313   if (BUFFERP (coding->dst_object))
7314     set_buffer_internal (XBUFFER (coding->dst_object));
7315
7316   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7317     {
7318       struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7319       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7320       Lisp_Object val;
7321
7322       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7323       GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7324               old_deactivate_mark);
7325       val = safe_call1 (CODING_ATTR_POST_READ (attrs),
7326                         make_number (coding->produced_char));
7327       UNGCPRO;
7328       CHECK_NATNUM (val);
7329       coding->produced_char += Z - prev_Z;
7330       coding->produced += Z_BYTE - prev_Z_BYTE;
7331     }
7332
7333   if (EQ (dst_object, Qt))
7334     {
7335       coding->dst_object = Fbuffer_string ();
7336     }
7337   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
7338     {
7339       set_buffer_internal (XBUFFER (coding->dst_object));
7340       if (dst_bytes < coding->produced)
7341         {
7342           destination = xrealloc (destination, coding->produced);
7343           if (! destination)
7344             {
7345               record_conversion_result (coding,
7346                                         CODING_RESULT_INSUFFICIENT_DST);
7347               unbind_to (count, Qnil);
7348               return;
7349             }
7350           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
7351             move_gap_both (BEGV, BEGV_BYTE);
7352           bcopy (BEGV_ADDR, destination, coding->produced);
7353           coding->destination = destination;
7354         }
7355     }
7356
7357   if (saved_pt >= 0)
7358     {
7359       /* This is the case of:
7360          (BUFFERP (src_object) && EQ (src_object, dst_object))
7361          As we have moved PT while replacing the original buffer
7362          contents, we must recover it now.  */
7363       set_buffer_internal (XBUFFER (src_object));
7364       current_buffer->text->inhibit_shrinking = 0;
7365       if (saved_pt < from)
7366         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7367       else if (saved_pt < from + chars)
7368         TEMP_SET_PT_BOTH (from, from_byte);
7369       else if (! NILP (current_buffer->enable_multibyte_characters))
7370         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7371                           saved_pt_byte + (coding->produced - bytes));
7372       else
7373         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7374                           saved_pt_byte + (coding->produced - bytes));
7375
7376       if (need_marker_adjustment)
7377         {
7378           struct Lisp_Marker *tail;
7379
7380           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7381             if (tail->need_adjustment)
7382               {
7383                 tail->need_adjustment = 0;
7384                 if (tail->insertion_type)
7385                   {
7386                     tail->bytepos = from_byte;
7387                     tail->charpos = from;
7388                   }
7389                 else
7390                   {
7391                     tail->bytepos = from_byte + coding->produced;
7392                     tail->charpos
7393                       = (NILP (current_buffer->enable_multibyte_characters)
7394                          ? tail->bytepos : from + coding->produced_char);
7395                   }
7396               }
7397         }
7398     }
7399
7400   Vdeactivate_mark = old_deactivate_mark;
7401   unbind_to (count, coding->dst_object);
7402 }
7403
7404
7405 void
7406 encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7407                       dst_object)
7408      struct coding_system *coding;
7409      Lisp_Object src_object;
7410      EMACS_INT from, from_byte, to, to_byte;
7411      Lisp_Object dst_object;
7412 {
7413   int count = specpdl_ptr - specpdl;
7414   EMACS_INT chars = to - from;
7415   EMACS_INT bytes = to_byte - from_byte;
7416   Lisp_Object attrs;
7417   int saved_pt = -1, saved_pt_byte;
7418   int need_marker_adjustment = 0;
7419   int kill_src_buffer = 0;
7420   Lisp_Object old_deactivate_mark;
7421
7422   old_deactivate_mark = Vdeactivate_mark;
7423
7424   coding->src_object = src_object;
7425   coding->src_chars = chars;
7426   coding->src_bytes = bytes;
7427   coding->src_multibyte = chars < bytes;
7428
7429   attrs = CODING_ID_ATTRS (coding->id);
7430
7431   if (EQ (src_object, dst_object))
7432     {
7433       struct Lisp_Marker *tail;
7434
7435       for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7436         {
7437           tail->need_adjustment
7438             = tail->charpos == (tail->insertion_type ? from : to);
7439           need_marker_adjustment |= tail->need_adjustment;
7440         }
7441     }
7442
7443   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
7444     {
7445       coding->src_object = code_conversion_save (1, coding->src_multibyte);
7446       set_buffer_internal (XBUFFER (coding->src_object));
7447       if (STRINGP (src_object))
7448         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
7449       else if (BUFFERP (src_object))
7450         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
7451       else
7452         insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
7453
7454       if (EQ (src_object, dst_object))
7455         {
7456           set_buffer_internal (XBUFFER (src_object));
7457           saved_pt = PT, saved_pt_byte = PT_BYTE;
7458           del_range_both (from, from_byte, to, to_byte, 1);
7459           set_buffer_internal (XBUFFER (coding->src_object));
7460         }
7461
7462       {
7463         Lisp_Object args[3];
7464         struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7465
7466         GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7467                 old_deactivate_mark);
7468         args[0] = CODING_ATTR_PRE_WRITE (attrs);
7469         args[1] = make_number (BEG);
7470         args[2] = make_number (Z);
7471         safe_call (3, args);
7472         UNGCPRO;
7473       }
7474       if (XBUFFER (coding->src_object) != current_buffer)
7475         kill_src_buffer = 1;
7476       coding->src_object = Fcurrent_buffer ();
7477       if (BEG != GPT)
7478         move_gap_both (BEG, BEG_BYTE);
7479       coding->src_chars = Z - BEG;
7480       coding->src_bytes = Z_BYTE - BEG_BYTE;
7481       coding->src_pos = BEG;
7482       coding->src_pos_byte = BEG_BYTE;
7483       coding->src_multibyte = Z < Z_BYTE;
7484     }
7485   else if (STRINGP (src_object))
7486     {
7487       code_conversion_save (0, 0);
7488       coding->src_pos = from;
7489       coding->src_pos_byte = from_byte;
7490     }
7491   else if (BUFFERP (src_object))
7492     {
7493       code_conversion_save (0, 0);
7494       set_buffer_internal (XBUFFER (src_object));
7495       if (EQ (src_object, dst_object))
7496         {
7497           saved_pt = PT, saved_pt_byte = PT_BYTE;
7498           coding->src_object = del_range_1 (from, to, 1, 1);
7499           coding->src_pos = 0;
7500           coding->src_pos_byte = 0;
7501         }
7502       else
7503         {
7504           if (from < GPT && to >= GPT)
7505             move_gap_both (from, from_byte);
7506           coding->src_pos = from;
7507           coding->src_pos_byte = from_byte;
7508         }
7509     }
7510   else
7511     code_conversion_save (0, 0);
7512
7513   if (BUFFERP (dst_object))
7514     {
7515       coding->dst_object = dst_object;
7516       if (EQ (src_object, dst_object))
7517         {
7518           coding->dst_pos = from;
7519           coding->dst_pos_byte = from_byte;
7520         }
7521       else
7522         {
7523           struct buffer *current = current_buffer;
7524
7525           set_buffer_temp (XBUFFER (dst_object));
7526           coding->dst_pos = PT;
7527           coding->dst_pos_byte = PT_BYTE;
7528           move_gap_both (coding->dst_pos, coding->dst_pos_byte);
7529           set_buffer_temp (current);
7530         }
7531       coding->dst_multibyte
7532         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
7533     }
7534   else if (EQ (dst_object, Qt))
7535     {
7536       coding->dst_object = Qnil;
7537       coding->dst_bytes = coding->src_chars;
7538       if (coding->dst_bytes == 0)
7539         coding->dst_bytes = 1;
7540       coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
7541       coding->dst_multibyte = 0;
7542     }
7543   else
7544     {
7545       coding->dst_object = Qnil;
7546       coding->dst_multibyte = 0;
7547     }
7548
7549   encode_coding (coding);
7550
7551   if (EQ (dst_object, Qt))
7552     {
7553       if (BUFFERP (coding->dst_object))
7554         coding->dst_object = Fbuffer_string ();
7555       else
7556         {
7557           coding->dst_object
7558             = make_unibyte_string ((char *) coding->destination,
7559                                    coding->produced);
7560           xfree (coding->destination);
7561         }
7562     }
7563
7564   if (saved_pt >= 0)
7565     {
7566       /* This is the case of:
7567          (BUFFERP (src_object) && EQ (src_object, dst_object))
7568          As we have moved PT while replacing the original buffer
7569          contents, we must recover it now.  */
7570       set_buffer_internal (XBUFFER (src_object));
7571       if (saved_pt < from)
7572         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7573       else if (saved_pt < from + chars)
7574         TEMP_SET_PT_BOTH (from, from_byte);
7575       else if (! NILP (current_buffer->enable_multibyte_characters))
7576         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7577                           saved_pt_byte + (coding->produced - bytes));
7578       else
7579         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7580                           saved_pt_byte + (coding->produced - bytes));
7581
7582       if (need_marker_adjustment)
7583         {
7584           struct Lisp_Marker *tail;
7585
7586           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7587             if (tail->need_adjustment)
7588               {
7589                 tail->need_adjustment = 0;
7590                 if (tail->insertion_type)
7591                   {
7592                     tail->bytepos = from_byte;
7593                     tail->charpos = from;
7594                   }
7595                 else
7596                   {
7597                     tail->bytepos = from_byte + coding->produced;
7598                     tail->charpos
7599                       = (NILP (current_buffer->enable_multibyte_characters)
7600                          ? tail->bytepos : from + coding->produced_char);
7601                   }
7602               }
7603         }
7604     }
7605
7606   if (kill_src_buffer)
7607     Fkill_buffer (coding->src_object);
7608
7609   Vdeactivate_mark = old_deactivate_mark;
7610   unbind_to (count, Qnil);
7611 }
7612
7613
7614 Lisp_Object
7615 preferred_coding_system ()
7616 {
7617   int id = coding_categories[coding_priorities[0]].id;
7618
7619   return CODING_ID_NAME (id);
7620 }
7621
7622 \f
7623 #ifdef emacs
7624 /*** 8. Emacs Lisp library functions ***/
7625
7626 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
7627        doc: /* Return t if OBJECT is nil or a coding-system.
7628 See the documentation of `define-coding-system' for information
7629 about coding-system objects.  */)
7630      (object)
7631      Lisp_Object object;
7632 {
7633   if (NILP (object)
7634       || CODING_SYSTEM_ID (object) >= 0)
7635     return Qt;
7636   if (! SYMBOLP (object)
7637       || NILP (Fget (object, Qcoding_system_define_form)))
7638     return Qnil;
7639   return Qt;
7640 }
7641
7642 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
7643        Sread_non_nil_coding_system, 1, 1, 0,
7644        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
7645      (prompt)
7646      Lisp_Object prompt;
7647 {
7648   Lisp_Object val;
7649   do
7650     {
7651       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
7652                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
7653     }
7654   while (SCHARS (val) == 0);
7655   return (Fintern (val, Qnil));
7656 }
7657
7658 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
7659        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
7660 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.
7661 Ignores case when completing coding systems (all Emacs coding systems
7662 are lower-case).  */)
7663      (prompt, default_coding_system)
7664      Lisp_Object prompt, default_coding_system;
7665 {
7666   Lisp_Object val;
7667   int count = SPECPDL_INDEX ();
7668
7669   if (SYMBOLP (default_coding_system))
7670     default_coding_system = SYMBOL_NAME (default_coding_system);
7671   specbind (Qcompletion_ignore_case, Qt);
7672   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
7673                           Qt, Qnil, Qcoding_system_history,
7674                           default_coding_system, Qnil);
7675   unbind_to (count, Qnil);
7676   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
7677 }
7678
7679 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
7680        1, 1, 0,
7681        doc: /* Check validity of CODING-SYSTEM.
7682 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
7683 It is valid if it is nil or a symbol defined as a coding system by the
7684 function `define-coding-system'.  */)
7685   (coding_system)
7686      Lisp_Object coding_system;
7687 {
7688   Lisp_Object define_form;
7689
7690   define_form = Fget (coding_system, Qcoding_system_define_form);
7691   if (! NILP (define_form))
7692     {
7693       Fput (coding_system, Qcoding_system_define_form, Qnil);
7694       safe_eval (define_form);
7695     }
7696   if (!NILP (Fcoding_system_p (coding_system)))
7697     return coding_system;
7698   xsignal1 (Qcoding_system_error, coding_system);
7699 }
7700
7701 \f
7702 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
7703    HIGHEST is nonzero, return the coding system of the highest
7704    priority among the detected coding systems.  Otherwize return a
7705    list of detected coding systems sorted by their priorities.  If
7706    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
7707    multibyte form but contains only ASCII and eight-bit chars.
7708    Otherwise, the bytes are raw bytes.
7709
7710    CODING-SYSTEM controls the detection as below:
7711
7712    If it is nil, detect both text-format and eol-format.  If the
7713    text-format part of CODING-SYSTEM is already specified
7714    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
7715    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
7716    detect only text-format.  */
7717
7718 Lisp_Object
7719 detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
7720                       coding_system)
7721      const unsigned char *src;
7722      EMACS_INT src_chars, src_bytes;
7723      int highest;
7724      int multibytep;
7725      Lisp_Object coding_system;
7726 {
7727   const unsigned char *src_end = src + src_bytes;
7728   Lisp_Object attrs, eol_type;
7729   Lisp_Object val = Qnil;
7730   struct coding_system coding;
7731   int id;
7732   struct coding_detection_info detect_info;
7733   enum coding_category base_category;
7734   int null_byte_found = 0, eight_bit_found = 0;
7735
7736   if (NILP (coding_system))
7737     coding_system = Qundecided;
7738   setup_coding_system (coding_system, &coding);
7739   attrs = CODING_ID_ATTRS (coding.id);
7740   eol_type = CODING_ID_EOL_TYPE (coding.id);
7741   coding_system = CODING_ATTR_BASE_NAME (attrs);
7742
7743   coding.source = src;
7744   coding.src_chars = src_chars;
7745   coding.src_bytes = src_bytes;
7746   coding.src_multibyte = multibytep;
7747   coding.consumed = 0;
7748   coding.mode |= CODING_MODE_LAST_BLOCK;
7749   coding.head_ascii = 0;
7750
7751   detect_info.checked = detect_info.found = detect_info.rejected = 0;
7752
7753   /* At first, detect text-format if necessary.  */
7754   base_category = XINT (CODING_ATTR_CATEGORY (attrs));
7755   if (base_category == coding_category_undecided)
7756     {
7757       enum coding_category category;
7758       struct coding_system *this;
7759       int c, i;
7760
7761       /* Skip all ASCII bytes except for a few ISO2022 controls.  */
7762       for (; src < src_end; src++)
7763         {
7764           c = *src;
7765           if (c & 0x80)
7766             {
7767               eight_bit_found = 1;
7768               if (null_byte_found)
7769                 break;
7770             }
7771           else if (c < 0x20)
7772             {
7773               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
7774                   && ! inhibit_iso_escape_detection
7775                   && ! detect_info.checked)
7776                 {
7777                   if (detect_coding_iso_2022 (&coding, &detect_info))
7778                     {
7779                       /* We have scanned the whole data.  */
7780                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
7781                         {
7782                           /* We didn't find an 8-bit code.  We may
7783                              have found a null-byte, but it's very
7784                              rare that a binary file confirm to
7785                              ISO-2022.  */
7786                           src = src_end;
7787                           coding.head_ascii = src - coding.source;
7788                         }
7789                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
7790                       break;
7791                     }
7792                 }
7793               else if (! c)
7794                 {
7795                   null_byte_found = 1;
7796                   if (eight_bit_found)
7797                     break;
7798                 }
7799               if (! eight_bit_found)
7800                 coding.head_ascii++;
7801             }
7802           else if (! eight_bit_found)
7803             coding.head_ascii++;
7804         }
7805
7806       if (null_byte_found || eight_bit_found
7807           || coding.head_ascii < coding.src_bytes
7808           || detect_info.found)
7809         {
7810           if (coding.head_ascii == coding.src_bytes)
7811             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
7812             for (i = 0; i < coding_category_raw_text; i++)
7813               {
7814                 category = coding_priorities[i];
7815                 this = coding_categories + category;
7816                 if (detect_info.found & (1 << category))
7817                   break;
7818               }
7819           else
7820             {
7821               if (null_byte_found)
7822                 {
7823                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
7824                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
7825                 }
7826               for (i = 0; i < coding_category_raw_text; i++)
7827                 {
7828                   category = coding_priorities[i];
7829                   this = coding_categories + category;
7830
7831                   if (this->id < 0)
7832                     {
7833                       /* No coding system of this category is defined.  */
7834                       detect_info.rejected |= (1 << category);
7835                     }
7836                   else if (category >= coding_category_raw_text)
7837                     continue;
7838                   else if (detect_info.checked & (1 << category))
7839                     {
7840                       if (highest
7841                           && (detect_info.found & (1 << category)))
7842                         break;
7843                     }
7844                   else if ((*(this->detector)) (&coding, &detect_info)
7845                            && highest
7846                            && (detect_info.found & (1 << category)))
7847                     {
7848                       if (category == coding_category_utf_16_auto)
7849                         {
7850                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7851                             category = coding_category_utf_16_le;
7852                           else
7853                             category = coding_category_utf_16_be;
7854                         }
7855                       break;
7856                     }
7857                 }
7858             }
7859         }
7860
7861       if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY)
7862         {
7863           detect_info.found = CATEGORY_MASK_RAW_TEXT;
7864           id = coding_categories[coding_category_raw_text].id;
7865           val = Fcons (make_number (id), Qnil);
7866         }
7867       else if (! detect_info.rejected && ! detect_info.found)
7868         {
7869           detect_info.found = CATEGORY_MASK_ANY;
7870           id = coding_categories[coding_category_undecided].id;
7871           val = Fcons (make_number (id), Qnil);
7872         }
7873       else if (highest)
7874         {
7875           if (detect_info.found)
7876             {
7877               detect_info.found = 1 << category;
7878               val = Fcons (make_number (this->id), Qnil);
7879             }
7880           else
7881             for (i = 0; i < coding_category_raw_text; i++)
7882               if (! (detect_info.rejected & (1 << coding_priorities[i])))
7883                 {
7884                   detect_info.found = 1 << coding_priorities[i];
7885                   id = coding_categories[coding_priorities[i]].id;
7886                   val = Fcons (make_number (id), Qnil);
7887                   break;
7888                 }
7889         }
7890       else
7891         {
7892           int mask = detect_info.rejected | detect_info.found;
7893           int found = 0;
7894
7895           for (i = coding_category_raw_text - 1; i >= 0; i--)
7896             {
7897               category = coding_priorities[i];
7898               if (! (mask & (1 << category)))
7899                 {
7900                   found |= 1 << category;
7901                   id = coding_categories[category].id;
7902                   if (id >= 0)
7903                     val = Fcons (make_number (id), val);
7904                 }
7905             }
7906           for (i = coding_category_raw_text - 1; i >= 0; i--)
7907             {
7908               category = coding_priorities[i];
7909               if (detect_info.found & (1 << category))
7910                 {
7911                   id = coding_categories[category].id;
7912                   val = Fcons (make_number (id), val);
7913                 }
7914             }
7915           detect_info.found |= found;
7916         }
7917     }
7918   else if (base_category == coding_category_utf_8_auto)
7919     {
7920       if (detect_coding_utf_8 (&coding, &detect_info))
7921         {
7922           struct coding_system *this;
7923
7924           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
7925             this = coding_categories + coding_category_utf_8_sig;
7926           else
7927             this = coding_categories + coding_category_utf_8_nosig;
7928           val = Fcons (make_number (this->id), Qnil);
7929         }
7930     }
7931   else if (base_category == coding_category_utf_16_auto)
7932     {
7933       if (detect_coding_utf_16 (&coding, &detect_info))
7934         {
7935           struct coding_system *this;
7936
7937           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7938             this = coding_categories + coding_category_utf_16_le;
7939           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
7940             this = coding_categories + coding_category_utf_16_be;
7941           else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
7942             this = coding_categories + coding_category_utf_16_be_nosig;
7943           else
7944             this = coding_categories + coding_category_utf_16_le_nosig;
7945           val = Fcons (make_number (this->id), Qnil);
7946         }
7947     }
7948   else
7949     {
7950       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
7951       val = Fcons (make_number (coding.id), Qnil);
7952     }
7953
7954   /* Then, detect eol-format if necessary.  */
7955   {
7956     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol = -1;
7957     Lisp_Object tail;
7958
7959     if (VECTORP (eol_type))
7960       {
7961         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
7962           {
7963             if (null_byte_found)
7964               normal_eol = EOL_SEEN_LF;
7965             else
7966               normal_eol = detect_eol (coding.source, src_bytes,
7967                                        coding_category_raw_text);
7968           }
7969         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
7970                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
7971           utf_16_be_eol = detect_eol (coding.source, src_bytes,
7972                                       coding_category_utf_16_be);
7973         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
7974                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
7975           utf_16_le_eol = detect_eol (coding.source, src_bytes,
7976                                       coding_category_utf_16_le);
7977       }
7978     else
7979       {
7980         if (EQ (eol_type, Qunix))
7981           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
7982         else if (EQ (eol_type, Qdos))
7983           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
7984         else
7985           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
7986       }
7987
7988     for (tail = val; CONSP (tail); tail = XCDR (tail))
7989       {
7990         enum coding_category category;
7991         int this_eol;
7992
7993         id = XINT (XCAR (tail));
7994         attrs = CODING_ID_ATTRS (id);
7995         category = XINT (CODING_ATTR_CATEGORY (attrs));
7996         eol_type = CODING_ID_EOL_TYPE (id);
7997         if (VECTORP (eol_type))
7998           {
7999             if (category == coding_category_utf_16_be
8000                 || category == coding_category_utf_16_be_nosig)
8001               this_eol = utf_16_be_eol;
8002             else if (category == coding_category_utf_16_le
8003                      || category == coding_category_utf_16_le_nosig)
8004               this_eol = utf_16_le_eol;
8005             else
8006               this_eol = normal_eol;
8007
8008             if (this_eol == EOL_SEEN_LF)
8009               XSETCAR (tail, AREF (eol_type, 0));
8010             else if (this_eol == EOL_SEEN_CRLF)
8011               XSETCAR (tail, AREF (eol_type, 1));
8012             else if (this_eol == EOL_SEEN_CR)
8013               XSETCAR (tail, AREF (eol_type, 2));
8014             else
8015               XSETCAR (tail, CODING_ID_NAME (id));
8016           }
8017         else
8018           XSETCAR (tail, CODING_ID_NAME (id));
8019       }
8020   }
8021
8022   return (highest ? (CONSP (val) ? XCAR (val) : Qnil) : val);
8023 }
8024
8025
8026 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
8027        2, 3, 0,
8028        doc: /* Detect coding system of the text in the region between START and END.
8029 Return a list of possible coding systems ordered by priority.
8030
8031 If only ASCII characters are found (except for such ISO-2022 control
8032 characters as ESC), it returns a list of single element `undecided'
8033 or its subsidiary coding system according to a detected end-of-line
8034 format.
8035
8036 If optional argument HIGHEST is non-nil, return the coding system of
8037 highest priority.  */)
8038      (start, end, highest)
8039      Lisp_Object start, end, highest;
8040 {
8041   int from, to;
8042   int from_byte, to_byte;
8043
8044   CHECK_NUMBER_COERCE_MARKER (start);
8045   CHECK_NUMBER_COERCE_MARKER (end);
8046
8047   validate_region (&start, &end);
8048   from = XINT (start), to = XINT (end);
8049   from_byte = CHAR_TO_BYTE (from);
8050   to_byte = CHAR_TO_BYTE (to);
8051
8052   if (from < GPT && to >= GPT)
8053     move_gap_both (to, to_byte);
8054
8055   return detect_coding_system (BYTE_POS_ADDR (from_byte),
8056                                to - from, to_byte - from_byte,
8057                                !NILP (highest),
8058                                !NILP (current_buffer
8059                                       ->enable_multibyte_characters),
8060                                Qnil);
8061 }
8062
8063 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
8064        1, 2, 0,
8065        doc: /* Detect coding system of the text in STRING.
8066 Return a list of possible coding systems ordered by priority.
8067
8068 If only ASCII characters are found (except for such ISO-2022 control
8069 characters as ESC), it returns a list of single element `undecided'
8070 or its subsidiary coding system according to a detected end-of-line
8071 format.
8072
8073 If optional argument HIGHEST is non-nil, return the coding system of
8074 highest priority.  */)
8075      (string, highest)
8076      Lisp_Object string, highest;
8077 {
8078   CHECK_STRING (string);
8079
8080   return detect_coding_system (SDATA (string),
8081                                SCHARS (string), SBYTES (string),
8082                                !NILP (highest), STRING_MULTIBYTE (string),
8083                                Qnil);
8084 }
8085
8086
8087 static INLINE int
8088 char_encodable_p (c, attrs)
8089      int c;
8090      Lisp_Object attrs;
8091 {
8092   Lisp_Object tail;
8093   struct charset *charset;
8094   Lisp_Object translation_table;
8095
8096   translation_table = CODING_ATTR_TRANS_TBL (attrs);
8097   if (! NILP (translation_table))
8098     c = translate_char (translation_table, c);
8099   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
8100        CONSP (tail); tail = XCDR (tail))
8101     {
8102       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
8103       if (CHAR_CHARSET_P (c, charset))
8104         break;
8105     }
8106   return (! NILP (tail));
8107 }
8108
8109
8110 /* Return a list of coding systems that safely encode the text between
8111    START and END.  If EXCLUDE is non-nil, it is a list of coding
8112    systems not to check.  The returned list doesn't contain any such
8113    coding systems.  In any case, if the text contains only ASCII or is
8114    unibyte, return t.  */
8115
8116 DEFUN ("find-coding-systems-region-internal",
8117        Ffind_coding_systems_region_internal,
8118        Sfind_coding_systems_region_internal, 2, 3, 0,
8119        doc: /* Internal use only.  */)
8120      (start, end, exclude)
8121      Lisp_Object start, end, exclude;
8122 {
8123   Lisp_Object coding_attrs_list, safe_codings;
8124   EMACS_INT start_byte, end_byte;
8125   const unsigned char *p, *pbeg, *pend;
8126   int c;
8127   Lisp_Object tail, elt;
8128
8129   if (STRINGP (start))
8130     {
8131       if (!STRING_MULTIBYTE (start)
8132           || SCHARS (start) == SBYTES (start))
8133         return Qt;
8134       start_byte = 0;
8135       end_byte = SBYTES (start);
8136     }
8137   else
8138     {
8139       CHECK_NUMBER_COERCE_MARKER (start);
8140       CHECK_NUMBER_COERCE_MARKER (end);
8141       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8142         args_out_of_range (start, end);
8143       if (NILP (current_buffer->enable_multibyte_characters))
8144         return Qt;
8145       start_byte = CHAR_TO_BYTE (XINT (start));
8146       end_byte = CHAR_TO_BYTE (XINT (end));
8147       if (XINT (end) - XINT (start) == end_byte - start_byte)
8148         return Qt;
8149
8150       if (XINT (start) < GPT && XINT (end) > GPT)
8151         {
8152           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8153             move_gap_both (XINT (start), start_byte);
8154           else
8155             move_gap_both (XINT (end), end_byte);
8156         }
8157     }
8158
8159   coding_attrs_list = Qnil;
8160   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
8161     if (NILP (exclude)
8162         || NILP (Fmemq (XCAR (tail), exclude)))
8163       {
8164         Lisp_Object attrs;
8165
8166         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
8167         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
8168             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
8169           {
8170             ASET (attrs, coding_attr_trans_tbl,
8171                   get_translation_table (attrs, 1, NULL));
8172             coding_attrs_list = Fcons (attrs, coding_attrs_list);
8173           }
8174       }
8175
8176   if (STRINGP (start))
8177     p = pbeg = SDATA (start);
8178   else
8179     p = pbeg = BYTE_POS_ADDR (start_byte);
8180   pend = p + (end_byte - start_byte);
8181
8182   while (p < pend && ASCII_BYTE_P (*p)) p++;
8183   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8184
8185   while (p < pend)
8186     {
8187       if (ASCII_BYTE_P (*p))
8188         p++;
8189       else
8190         {
8191           c = STRING_CHAR_ADVANCE (p);
8192
8193           charset_map_loaded = 0;
8194           for (tail = coding_attrs_list; CONSP (tail);)
8195             {
8196               elt = XCAR (tail);
8197               if (NILP (elt))
8198                 tail = XCDR (tail);
8199               else if (char_encodable_p (c, elt))
8200                 tail = XCDR (tail);
8201               else if (CONSP (XCDR (tail)))
8202                 {
8203                   XSETCAR (tail, XCAR (XCDR (tail)));
8204                   XSETCDR (tail, XCDR (XCDR (tail)));
8205                 }
8206               else
8207                 {
8208                   XSETCAR (tail, Qnil);
8209                   tail = XCDR (tail);
8210                 }
8211             }
8212           if (charset_map_loaded)
8213             {
8214               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8215
8216               if (STRINGP (start))
8217                 pbeg = SDATA (start);
8218               else
8219                 pbeg = BYTE_POS_ADDR (start_byte);
8220               p = pbeg + p_offset;
8221               pend = pbeg + pend_offset;
8222             }
8223         }
8224     }
8225
8226   safe_codings = list2 (Qraw_text, Qno_conversion);
8227   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
8228     if (! NILP (XCAR (tail)))
8229       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
8230
8231   return safe_codings;
8232 }
8233
8234
8235 DEFUN ("unencodable-char-position", Funencodable_char_position,
8236        Sunencodable_char_position, 3, 5, 0,
8237        doc: /*
8238 Return position of first un-encodable character in a region.
8239 START and END specify the region and CODING-SYSTEM specifies the
8240 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
8241
8242 If optional 4th argument COUNT is non-nil, it specifies at most how
8243 many un-encodable characters to search.  In this case, the value is a
8244 list of positions.
8245
8246 If optional 5th argument STRING is non-nil, it is a string to search
8247 for un-encodable characters.  In that case, START and END are indexes
8248 to the string.  */)
8249      (start, end, coding_system, count, string)
8250      Lisp_Object start, end, coding_system, count, string;
8251 {
8252   int n;
8253   struct coding_system coding;
8254   Lisp_Object attrs, charset_list, translation_table;
8255   Lisp_Object positions;
8256   int from, to;
8257   const unsigned char *p, *stop, *pend;
8258   int ascii_compatible;
8259
8260   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
8261   attrs = CODING_ID_ATTRS (coding.id);
8262   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
8263     return Qnil;
8264   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
8265   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8266   translation_table = get_translation_table (attrs, 1, NULL);
8267
8268   if (NILP (string))
8269     {
8270       validate_region (&start, &end);
8271       from = XINT (start);
8272       to = XINT (end);
8273       if (NILP (current_buffer->enable_multibyte_characters)
8274           || (ascii_compatible
8275               && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
8276         return Qnil;
8277       p = CHAR_POS_ADDR (from);
8278       pend = CHAR_POS_ADDR (to);
8279       if (from < GPT && to >= GPT)
8280         stop = GPT_ADDR;
8281       else
8282         stop = pend;
8283     }
8284   else
8285     {
8286       CHECK_STRING (string);
8287       CHECK_NATNUM (start);
8288       CHECK_NATNUM (end);
8289       from = XINT (start);
8290       to = XINT (end);
8291       if (from > to
8292           || to > SCHARS (string))
8293         args_out_of_range_3 (string, start, end);
8294       if (! STRING_MULTIBYTE (string))
8295         return Qnil;
8296       p = SDATA (string) + string_char_to_byte (string, from);
8297       stop = pend = SDATA (string) + string_char_to_byte (string, to);
8298       if (ascii_compatible && (to - from) == (pend - p))
8299         return Qnil;
8300     }
8301
8302   if (NILP (count))
8303     n = 1;
8304   else
8305     {
8306       CHECK_NATNUM (count);
8307       n = XINT (count);
8308     }
8309
8310   positions = Qnil;
8311   while (1)
8312     {
8313       int c;
8314
8315       if (ascii_compatible)
8316         while (p < stop && ASCII_BYTE_P (*p))
8317           p++, from++;
8318       if (p >= stop)
8319         {
8320           if (p >= pend)
8321             break;
8322           stop = pend;
8323           p = GAP_END_ADDR;
8324         }
8325
8326       c = STRING_CHAR_ADVANCE (p);
8327       if (! (ASCII_CHAR_P (c) && ascii_compatible)
8328           && ! char_charset (translate_char (translation_table, c),
8329                              charset_list, NULL))
8330         {
8331           positions = Fcons (make_number (from), positions);
8332           n--;
8333           if (n == 0)
8334             break;
8335         }
8336
8337       from++;
8338     }
8339
8340   return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
8341 }
8342
8343
8344 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
8345        Scheck_coding_systems_region, 3, 3, 0,
8346        doc: /* Check if the region is encodable by coding systems.
8347
8348 START and END are buffer positions specifying the region.
8349 CODING-SYSTEM-LIST is a list of coding systems to check.
8350
8351 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
8352 CODING-SYSTEM is a member of CODING-SYSTEM-LIST and can't encode the
8353 whole region, POS0, POS1, ... are buffer positions where non-encodable
8354 characters are found.
8355
8356 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
8357 value is nil.
8358
8359 START may be a string.  In that case, check if the string is
8360 encodable, and the value contains indices to the string instead of
8361 buffer positions.  END is ignored.  */)
8362      (start, end, coding_system_list)
8363      Lisp_Object start, end, coding_system_list;
8364 {
8365   Lisp_Object list;
8366   EMACS_INT start_byte, end_byte;
8367   int pos;
8368   const unsigned char *p, *pbeg, *pend;
8369   int c;
8370   Lisp_Object tail, elt, attrs;
8371
8372   if (STRINGP (start))
8373     {
8374       if (!STRING_MULTIBYTE (start)
8375           && SCHARS (start) != SBYTES (start))
8376         return Qnil;
8377       start_byte = 0;
8378       end_byte = SBYTES (start);
8379       pos = 0;
8380     }
8381   else
8382     {
8383       CHECK_NUMBER_COERCE_MARKER (start);
8384       CHECK_NUMBER_COERCE_MARKER (end);
8385       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8386         args_out_of_range (start, end);
8387       if (NILP (current_buffer->enable_multibyte_characters))
8388         return Qnil;
8389       start_byte = CHAR_TO_BYTE (XINT (start));
8390       end_byte = CHAR_TO_BYTE (XINT (end));
8391       if (XINT (end) - XINT (start) == end_byte - start_byte)
8392         return Qt;
8393
8394       if (XINT (start) < GPT && XINT (end) > GPT)
8395         {
8396           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8397             move_gap_both (XINT (start), start_byte);
8398           else
8399             move_gap_both (XINT (end), end_byte);
8400         }
8401       pos = XINT (start);
8402     }
8403
8404   list = Qnil;
8405   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
8406     {
8407       elt = XCAR (tail);
8408       attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
8409       ASET (attrs, coding_attr_trans_tbl,
8410             get_translation_table (attrs, 1, NULL));
8411       list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
8412     }
8413
8414   if (STRINGP (start))
8415     p = pbeg = SDATA (start);
8416   else
8417     p = pbeg = BYTE_POS_ADDR (start_byte);
8418   pend = p + (end_byte - start_byte);
8419
8420   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
8421   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8422
8423   while (p < pend)
8424     {
8425       if (ASCII_BYTE_P (*p))
8426         p++;
8427       else
8428         {
8429           c = STRING_CHAR_ADVANCE (p);
8430
8431           charset_map_loaded = 0;
8432           for (tail = list; CONSP (tail); tail = XCDR (tail))
8433             {
8434               elt = XCDR (XCAR (tail));
8435               if (! char_encodable_p (c, XCAR (elt)))
8436                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
8437             }
8438           if (charset_map_loaded)
8439             {
8440               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8441
8442               if (STRINGP (start))
8443                 pbeg = SDATA (start);
8444               else
8445                 pbeg = BYTE_POS_ADDR (start_byte);
8446               p = pbeg + p_offset;
8447               pend = pbeg + pend_offset;
8448             }
8449         }
8450       pos++;
8451     }
8452
8453   tail = list;
8454   list = Qnil;
8455   for (; CONSP (tail); tail = XCDR (tail))
8456     {
8457       elt = XCAR (tail);
8458       if (CONSP (XCDR (XCDR (elt))))
8459         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
8460                       list);
8461     }
8462
8463   return list;
8464 }
8465
8466
8467 Lisp_Object
8468 code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
8469      Lisp_Object start, end, coding_system, dst_object;
8470      int encodep, norecord;
8471 {
8472   struct coding_system coding;
8473   EMACS_INT from, from_byte, to, to_byte;
8474   Lisp_Object src_object;
8475
8476   CHECK_NUMBER_COERCE_MARKER (start);
8477   CHECK_NUMBER_COERCE_MARKER (end);
8478   if (NILP (coding_system))
8479     coding_system = Qno_conversion;
8480   else
8481     CHECK_CODING_SYSTEM (coding_system);
8482   src_object = Fcurrent_buffer ();
8483   if (NILP (dst_object))
8484     dst_object = src_object;
8485   else if (! EQ (dst_object, Qt))
8486     CHECK_BUFFER (dst_object);
8487
8488   validate_region (&start, &end);
8489   from = XFASTINT (start);
8490   from_byte = CHAR_TO_BYTE (from);
8491   to = XFASTINT (end);
8492   to_byte = CHAR_TO_BYTE (to);
8493
8494   setup_coding_system (coding_system, &coding);
8495   coding.mode |= CODING_MODE_LAST_BLOCK;
8496
8497   if (encodep)
8498     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8499                           dst_object);
8500   else
8501     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8502                           dst_object);
8503   if (! norecord)
8504     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
8505
8506   return (BUFFERP (dst_object)
8507           ? make_number (coding.produced_char)
8508           : coding.dst_object);
8509 }
8510
8511
8512 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
8513        3, 4, "r\nzCoding system: ",
8514        doc: /* Decode the current region from the specified coding system.
8515 When called from a program, takes four arguments:
8516         START, END, CODING-SYSTEM, and DESTINATION.
8517 START and END are buffer positions.
8518
8519 Optional 4th arguments DESTINATION specifies where the decoded text goes.
8520 If nil, the region between START and END is replaced by the decoded text.
8521 If buffer, the decoded text is inserted in that buffer after point (point
8522 does not move).
8523 In those cases, the length of the decoded text is returned.
8524 If DESTINATION is t, the decoded text is returned.
8525
8526 This function sets `last-coding-system-used' to the precise coding system
8527 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8528 not fully specified.)  */)
8529      (start, end, coding_system, destination)
8530      Lisp_Object start, end, coding_system, destination;
8531 {
8532   return code_convert_region (start, end, coding_system, destination, 0, 0);
8533 }
8534
8535 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
8536        3, 4, "r\nzCoding system: ",
8537        doc: /* Encode the current region by specified coding system.
8538 When called from a program, takes four arguments:
8539         START, END, CODING-SYSTEM and DESTINATION.
8540 START and END are buffer positions.
8541
8542 Optional 4th arguments DESTINATION specifies where the encoded text goes.
8543 If nil, the region between START and END is replace by the encoded text.
8544 If buffer, the encoded text is inserted in that buffer after point (point
8545 does not move).
8546 In those cases, the length of the encoded text is returned.
8547 If DESTINATION is t, the encoded text is returned.
8548
8549 This function sets `last-coding-system-used' to the precise coding system
8550 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8551 not fully specified.)  */)
8552   (start, end, coding_system, destination)
8553      Lisp_Object start, end, coding_system, destination;
8554 {
8555   return code_convert_region (start, end, coding_system, destination, 1, 0);
8556 }
8557
8558 Lisp_Object
8559 code_convert_string (string, coding_system, dst_object,
8560                      encodep, nocopy, norecord)
8561      Lisp_Object string, coding_system, dst_object;
8562      int encodep, nocopy, norecord;
8563 {
8564   struct coding_system coding;
8565   EMACS_INT chars, bytes;
8566
8567   CHECK_STRING (string);
8568   if (NILP (coding_system))
8569     {
8570       if (! norecord)
8571         Vlast_coding_system_used = Qno_conversion;
8572       if (NILP (dst_object))
8573         return (nocopy ? Fcopy_sequence (string) : string);
8574     }
8575
8576   if (NILP (coding_system))
8577     coding_system = Qno_conversion;
8578   else
8579     CHECK_CODING_SYSTEM (coding_system);
8580   if (NILP (dst_object))
8581     dst_object = Qt;
8582   else if (! EQ (dst_object, Qt))
8583     CHECK_BUFFER (dst_object);
8584
8585   setup_coding_system (coding_system, &coding);
8586   coding.mode |= CODING_MODE_LAST_BLOCK;
8587   chars = SCHARS (string);
8588   bytes = SBYTES (string);
8589   if (encodep)
8590     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
8591   else
8592     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
8593   if (! norecord)
8594     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
8595
8596   return (BUFFERP (dst_object)
8597           ? make_number (coding.produced_char)
8598           : coding.dst_object);
8599 }
8600
8601
8602 /* Encode or decode STRING according to CODING_SYSTEM.
8603    Do not set Vlast_coding_system_used.
8604
8605    This function is called only from macros DECODE_FILE and
8606    ENCODE_FILE, thus we ignore character composition.  */
8607
8608 Lisp_Object
8609 code_convert_string_norecord (string, coding_system, encodep)
8610      Lisp_Object string, coding_system;
8611      int encodep;
8612 {
8613   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
8614 }
8615
8616
8617 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
8618        2, 4, 0,
8619        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
8620
8621 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
8622 if the decoding operation is trivial.
8623
8624 Optional fourth arg BUFFER non-nil means that the decoded text is
8625 inserted in that buffer after point (point does not move).  In this
8626 case, the return value is the length of the decoded text.
8627
8628 This function sets `last-coding-system-used' to the precise coding system
8629 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8630 not fully specified.)  */)
8631   (string, coding_system, nocopy, buffer)
8632      Lisp_Object string, coding_system, nocopy, buffer;
8633 {
8634   return code_convert_string (string, coding_system, buffer,
8635                               0, ! NILP (nocopy), 0);
8636 }
8637
8638 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
8639        2, 4, 0,
8640        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
8641
8642 Optional third arg NOCOPY non-nil means it is OK to return STRING
8643 itself if the encoding operation is trivial.
8644
8645 Optional fourth arg BUFFER non-nil means that the encoded text is
8646 inserted in that buffer after point (point does not move).  In this
8647 case, the return value is the length of the encoded text.
8648
8649 This function sets `last-coding-system-used' to the precise coding system
8650 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8651 not fully specified.)  */)
8652      (string, coding_system, nocopy, buffer)
8653      Lisp_Object string, coding_system, nocopy, buffer;
8654 {
8655   return code_convert_string (string, coding_system, buffer,
8656                               1, ! NILP (nocopy), 1);
8657 }
8658
8659 \f
8660 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
8661        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
8662 Return the corresponding character.  */)
8663      (code)
8664      Lisp_Object code;
8665 {
8666   Lisp_Object spec, attrs, val;
8667   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
8668   int c;
8669
8670   CHECK_NATNUM (code);
8671   c = XFASTINT (code);
8672   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
8673   attrs = AREF (spec, 0);
8674
8675   if (ASCII_BYTE_P (c)
8676       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8677     return code;
8678
8679   val = CODING_ATTR_CHARSET_LIST (attrs);
8680   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8681   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8682   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
8683
8684   if (c <= 0x7F)
8685     charset = charset_roman;
8686   else if (c >= 0xA0 && c < 0xDF)
8687     {
8688       charset = charset_kana;
8689       c -= 0x80;
8690     }
8691   else
8692     {
8693       int s1 = c >> 8, s2 = c & 0xFF;
8694
8695       if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
8696           || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
8697         error ("Invalid code: %d", code);
8698       SJIS_TO_JIS (c);
8699       charset = charset_kanji;
8700     }
8701   c = DECODE_CHAR (charset, c);
8702   if (c < 0)
8703     error ("Invalid code: %d", code);
8704   return make_number (c);
8705 }
8706
8707
8708 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
8709        doc: /* Encode a Japanese character CH to shift_jis encoding.
8710 Return the corresponding code in SJIS.  */)
8711      (ch)
8712     Lisp_Object ch;
8713 {
8714   Lisp_Object spec, attrs, charset_list;
8715   int c;
8716   struct charset *charset;
8717   unsigned code;
8718
8719   CHECK_CHARACTER (ch);
8720   c = XFASTINT (ch);
8721   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
8722   attrs = AREF (spec, 0);
8723
8724   if (ASCII_CHAR_P (c)
8725       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8726     return ch;
8727
8728   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8729   charset = char_charset (c, charset_list, &code);
8730   if (code == CHARSET_INVALID_CODE (charset))
8731     error ("Can't encode by shift_jis encoding: %d", c);
8732   JIS_TO_SJIS (code);
8733
8734   return make_number (code);
8735 }
8736
8737 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
8738        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
8739 Return the corresponding character.  */)
8740      (code)
8741      Lisp_Object code;
8742 {
8743   Lisp_Object spec, attrs, val;
8744   struct charset *charset_roman, *charset_big5, *charset;
8745   int c;
8746
8747   CHECK_NATNUM (code);
8748   c = XFASTINT (code);
8749   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8750   attrs = AREF (spec, 0);
8751
8752   if (ASCII_BYTE_P (c)
8753       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8754     return code;
8755
8756   val = CODING_ATTR_CHARSET_LIST (attrs);
8757   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8758   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
8759
8760   if (c <= 0x7F)
8761     charset = charset_roman;
8762   else
8763     {
8764       int b1 = c >> 8, b2 = c & 0x7F;
8765       if (b1 < 0xA1 || b1 > 0xFE
8766           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
8767         error ("Invalid code: %d", code);
8768       charset = charset_big5;
8769     }
8770   c = DECODE_CHAR (charset, (unsigned )c);
8771   if (c < 0)
8772     error ("Invalid code: %d", code);
8773   return make_number (c);
8774 }
8775
8776 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
8777        doc: /* Encode the Big5 character CH to BIG5 coding system.
8778 Return the corresponding character code in Big5.  */)
8779      (ch)
8780      Lisp_Object ch;
8781 {
8782   Lisp_Object spec, attrs, charset_list;
8783   struct charset *charset;
8784   int c;
8785   unsigned code;
8786
8787   CHECK_CHARACTER (ch);
8788   c = XFASTINT (ch);
8789   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8790   attrs = AREF (spec, 0);
8791   if (ASCII_CHAR_P (c)
8792       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8793     return ch;
8794
8795   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8796   charset = char_charset (c, charset_list, &code);
8797   if (code == CHARSET_INVALID_CODE (charset))
8798     error ("Can't encode by Big5 encoding: %d", c);
8799
8800   return make_number (code);
8801 }
8802
8803 \f
8804 DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
8805        Sset_terminal_coding_system_internal, 1, 2, 0,
8806        doc: /* Internal use only.  */)
8807      (coding_system, terminal)
8808      Lisp_Object coding_system;
8809      Lisp_Object terminal;
8810 {
8811   struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
8812   CHECK_SYMBOL (coding_system);
8813   setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
8814   /* We had better not send unsafe characters to terminal.  */
8815   terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
8816   /* Characer composition should be disabled.  */
8817   terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8818   terminal_coding->src_multibyte = 1;
8819   terminal_coding->dst_multibyte = 0;
8820   return Qnil;
8821 }
8822
8823 DEFUN ("set-safe-terminal-coding-system-internal",
8824        Fset_safe_terminal_coding_system_internal,
8825        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
8826        doc: /* Internal use only.  */)
8827      (coding_system)
8828      Lisp_Object coding_system;
8829 {
8830   CHECK_SYMBOL (coding_system);
8831   setup_coding_system (Fcheck_coding_system (coding_system),
8832                        &safe_terminal_coding);
8833   /* Characer composition should be disabled.  */
8834   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8835   safe_terminal_coding.src_multibyte = 1;
8836   safe_terminal_coding.dst_multibyte = 0;
8837   return Qnil;
8838 }
8839
8840 DEFUN ("terminal-coding-system", Fterminal_coding_system,
8841        Sterminal_coding_system, 0, 1, 0,
8842        doc: /* Return coding system specified for terminal output on the given terminal.
8843 TERMINAL may be a terminal id, a frame, or nil for the selected
8844 frame's terminal device.  */)
8845      (terminal)
8846      Lisp_Object terminal;
8847 {
8848   struct coding_system *terminal_coding
8849     = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
8850   Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
8851
8852   /* For backward compatibility, return nil if it is `undecided'. */
8853   return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
8854 }
8855
8856 DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
8857        Sset_keyboard_coding_system_internal, 1, 2, 0,
8858        doc: /* Internal use only.  */)
8859      (coding_system, terminal)
8860      Lisp_Object coding_system;
8861      Lisp_Object terminal;
8862 {
8863   struct terminal *t = get_terminal (terminal, 1);
8864   CHECK_SYMBOL (coding_system);
8865   setup_coding_system (Fcheck_coding_system (coding_system),
8866                        TERMINAL_KEYBOARD_CODING (t));
8867   /* Characer composition should be disabled.  */
8868   TERMINAL_KEYBOARD_CODING (t)->common_flags
8869     &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8870   return Qnil;
8871 }
8872
8873 DEFUN ("keyboard-coding-system",
8874        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 1, 0,
8875        doc: /* Return coding system specified for decoding keyboard input.  */)
8876      (terminal)
8877      Lisp_Object terminal;
8878 {
8879   return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING
8880                          (get_terminal (terminal, 1))->id);
8881 }
8882
8883 \f
8884 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
8885        Sfind_operation_coding_system,  1, MANY, 0,
8886        doc: /* Choose a coding system for an operation based on the target name.
8887 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
8888 DECODING-SYSTEM is the coding system to use for decoding
8889 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
8890 for encoding (in case OPERATION does encoding).
8891
8892 The first argument OPERATION specifies an I/O primitive:
8893   For file I/O, `insert-file-contents' or `write-region'.
8894   For process I/O, `call-process', `call-process-region', or `start-process'.
8895   For network I/O, `open-network-stream'.
8896
8897 The remaining arguments should be the same arguments that were passed
8898 to the primitive.  Depending on which primitive, one of those arguments
8899 is selected as the TARGET.  For example, if OPERATION does file I/O,
8900 whichever argument specifies the file name is TARGET.
8901
8902 TARGET has a meaning which depends on OPERATION:
8903   For file I/O, TARGET is a file name (except for the special case below).
8904   For process I/O, TARGET is a process name.
8905   For network I/O, TARGET is a service name or a port number.
8906
8907 This function looks up what is specified for TARGET in
8908 `file-coding-system-alist', `process-coding-system-alist',
8909 or `network-coding-system-alist' depending on OPERATION.
8910 They may specify a coding system, a cons of coding systems,
8911 or a function symbol to call.
8912 In the last case, we call the function with one argument,
8913 which is a list of all the arguments given to this function.
8914 If the function can't decide a coding system, it can return
8915 `undecided' so that the normal code-detection is performed.
8916
8917 If OPERATION is `insert-file-contents', the argument corresponding to
8918 TARGET may be a cons (FILENAME . BUFFER).  In that case, FILENAME is a
8919 file name to look up, and BUFFER is a buffer that contains the file's
8920 contents (not yet decoded).  If `file-coding-system-alist' specifies a
8921 function to call for FILENAME, that function should examine the
8922 contents of BUFFER instead of reading the file.
8923
8924 usage: (find-operation-coding-system OPERATION ARGUMENTS...)  */)
8925      (nargs, args)
8926      int nargs;
8927      Lisp_Object *args;
8928 {
8929   Lisp_Object operation, target_idx, target, val;
8930   register Lisp_Object chain;
8931
8932   if (nargs < 2)
8933     error ("Too few arguments");
8934   operation = args[0];
8935   if (!SYMBOLP (operation)
8936       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
8937     error ("Invalid first argument");
8938   if (nargs < 1 + XINT (target_idx))
8939     error ("Too few arguments for operation: %s",
8940            SDATA (SYMBOL_NAME (operation)));
8941   target = args[XINT (target_idx) + 1];
8942   if (!(STRINGP (target)
8943         || (EQ (operation, Qinsert_file_contents) && CONSP (target)
8944             && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
8945         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
8946     error ("Invalid %dth argument", XINT (target_idx) + 1);
8947   if (CONSP (target))
8948     target = XCAR (target);
8949
8950   chain = ((EQ (operation, Qinsert_file_contents)
8951             || EQ (operation, Qwrite_region))
8952            ? Vfile_coding_system_alist
8953            : (EQ (operation, Qopen_network_stream)
8954               ? Vnetwork_coding_system_alist
8955               : Vprocess_coding_system_alist));
8956   if (NILP (chain))
8957     return Qnil;
8958
8959   for (; CONSP (chain); chain = XCDR (chain))
8960     {
8961       Lisp_Object elt;
8962
8963       elt = XCAR (chain);
8964       if (CONSP (elt)
8965           && ((STRINGP (target)
8966                && STRINGP (XCAR (elt))
8967                && fast_string_match (XCAR (elt), target) >= 0)
8968               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
8969         {
8970           val = XCDR (elt);
8971           /* Here, if VAL is both a valid coding system and a valid
8972              function symbol, we return VAL as a coding system.  */
8973           if (CONSP (val))
8974             return val;
8975           if (! SYMBOLP (val))
8976             return Qnil;
8977           if (! NILP (Fcoding_system_p (val)))
8978             return Fcons (val, val);
8979           if (! NILP (Ffboundp (val)))
8980             {
8981               /* We use call1 rather than safe_call1
8982                  so as to get bug reports about functions called here
8983                  which don't handle the current interface.  */
8984               val = call1 (val, Flist (nargs, args));
8985               if (CONSP (val))
8986                 return val;
8987               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
8988                 return Fcons (val, val);
8989             }
8990           return Qnil;
8991         }
8992     }
8993   return Qnil;
8994 }
8995
8996 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
8997        Sset_coding_system_priority, 0, MANY, 0,
8998        doc: /* Assign higher priority to the coding systems given as arguments.
8999 If multiple coding systems belong to the same category,
9000 all but the first one are ignored.
9001
9002 usage: (set-coding-system-priority &rest coding-systems)  */)
9003      (nargs, args)
9004      int nargs;
9005      Lisp_Object *args;
9006 {
9007   int i, j;
9008   int changed[coding_category_max];
9009   enum coding_category priorities[coding_category_max];
9010
9011   bzero (changed, sizeof changed);
9012
9013   for (i = j = 0; i < nargs; i++)
9014     {
9015       enum coding_category category;
9016       Lisp_Object spec, attrs;
9017
9018       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
9019       attrs = AREF (spec, 0);
9020       category = XINT (CODING_ATTR_CATEGORY (attrs));
9021       if (changed[category])
9022         /* Ignore this coding system because a coding system of the
9023            same category already had a higher priority.  */
9024         continue;
9025       changed[category] = 1;
9026       priorities[j++] = category;
9027       if (coding_categories[category].id >= 0
9028           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
9029         setup_coding_system (args[i], &coding_categories[category]);
9030       Fset (AREF (Vcoding_category_table, category), args[i]);
9031     }
9032
9033   /* Now we have decided top J priorities.  Reflect the order of the
9034      original priorities to the remaining priorities.  */
9035
9036   for (i = j, j = 0; i < coding_category_max; i++, j++)
9037     {
9038       while (j < coding_category_max
9039              && changed[coding_priorities[j]])
9040         j++;
9041       if (j == coding_category_max)
9042         abort ();
9043       priorities[i] = coding_priorities[j];
9044     }
9045
9046   bcopy (priorities, coding_priorities, sizeof priorities);
9047
9048   /* Update `coding-category-list'.  */
9049   Vcoding_category_list = Qnil;
9050   for (i = coding_category_max - 1; i >= 0; i--)
9051     Vcoding_category_list
9052       = Fcons (AREF (Vcoding_category_table, priorities[i]),
9053                Vcoding_category_list);
9054
9055   return Qnil;
9056 }
9057
9058 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
9059        Scoding_system_priority_list, 0, 1, 0,
9060        doc: /* Return a list of coding systems ordered by their priorities.
9061 HIGHESTP non-nil means just return the highest priority one.  */)
9062      (highestp)
9063      Lisp_Object highestp;
9064 {
9065   int i;
9066   Lisp_Object val;
9067
9068   for (i = 0, val = Qnil; i < coding_category_max; i++)
9069     {
9070       enum coding_category category = coding_priorities[i];
9071       int id = coding_categories[category].id;
9072       Lisp_Object attrs;
9073
9074       if (id < 0)
9075         continue;
9076       attrs = CODING_ID_ATTRS (id);
9077       if (! NILP (highestp))
9078         return CODING_ATTR_BASE_NAME (attrs);
9079       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
9080     }
9081   return Fnreverse (val);
9082 }
9083
9084 static char *suffixes[] = { "-unix", "-dos", "-mac" };
9085
9086 static Lisp_Object
9087 make_subsidiaries (base)
9088      Lisp_Object base;
9089 {
9090   Lisp_Object subsidiaries;
9091   int base_name_len = SBYTES (SYMBOL_NAME (base));
9092   char *buf = (char *) alloca (base_name_len + 6);
9093   int i;
9094
9095   bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
9096   subsidiaries = Fmake_vector (make_number (3), Qnil);
9097   for (i = 0; i < 3; i++)
9098     {
9099       bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
9100       ASET (subsidiaries, i, intern (buf));
9101     }
9102   return subsidiaries;
9103 }
9104
9105
9106 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
9107        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
9108        doc: /* For internal use only.
9109 usage: (define-coding-system-internal ...)  */)
9110      (nargs, args)
9111      int nargs;
9112      Lisp_Object *args;
9113 {
9114   Lisp_Object name;
9115   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
9116   Lisp_Object attrs;            /* Vector of attributes.  */
9117   Lisp_Object eol_type;
9118   Lisp_Object aliases;
9119   Lisp_Object coding_type, charset_list, safe_charsets;
9120   enum coding_category category;
9121   Lisp_Object tail, val;
9122   int max_charset_id = 0;
9123   int i;
9124
9125   if (nargs < coding_arg_max)
9126     goto short_args;
9127
9128   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
9129
9130   name = args[coding_arg_name];
9131   CHECK_SYMBOL (name);
9132   CODING_ATTR_BASE_NAME (attrs) = name;
9133
9134   val = args[coding_arg_mnemonic];
9135   if (! STRINGP (val))
9136     CHECK_CHARACTER (val);
9137   CODING_ATTR_MNEMONIC (attrs) = val;
9138
9139   coding_type = args[coding_arg_coding_type];
9140   CHECK_SYMBOL (coding_type);
9141   CODING_ATTR_TYPE (attrs) = coding_type;
9142
9143   charset_list = args[coding_arg_charset_list];
9144   if (SYMBOLP (charset_list))
9145     {
9146       if (EQ (charset_list, Qiso_2022))
9147         {
9148           if (! EQ (coding_type, Qiso_2022))
9149             error ("Invalid charset-list");
9150           charset_list = Viso_2022_charset_list;
9151         }
9152       else if (EQ (charset_list, Qemacs_mule))
9153         {
9154           if (! EQ (coding_type, Qemacs_mule))
9155             error ("Invalid charset-list");
9156           charset_list = Vemacs_mule_charset_list;
9157         }
9158       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9159         if (max_charset_id < XFASTINT (XCAR (tail)))
9160           max_charset_id = XFASTINT (XCAR (tail));
9161     }
9162   else
9163     {
9164       charset_list = Fcopy_sequence (charset_list);
9165       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9166         {
9167           struct charset *charset;
9168
9169           val = XCAR (tail);
9170           CHECK_CHARSET_GET_CHARSET (val, charset);
9171           if (EQ (coding_type, Qiso_2022)
9172               ? CHARSET_ISO_FINAL (charset) < 0
9173               : EQ (coding_type, Qemacs_mule)
9174               ? CHARSET_EMACS_MULE_ID (charset) < 0
9175               : 0)
9176             error ("Can't handle charset `%s'",
9177                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9178
9179           XSETCAR (tail, make_number (charset->id));
9180           if (max_charset_id < charset->id)
9181             max_charset_id = charset->id;
9182         }
9183     }
9184   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
9185
9186   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
9187                                 make_number (255));
9188   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9189     SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
9190   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
9191
9192   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
9193
9194   val = args[coding_arg_decode_translation_table];
9195   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9196     CHECK_SYMBOL (val);
9197   CODING_ATTR_DECODE_TBL (attrs) = val;
9198
9199   val = args[coding_arg_encode_translation_table];
9200   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9201     CHECK_SYMBOL (val);
9202   CODING_ATTR_ENCODE_TBL (attrs) = val;
9203
9204   val = args[coding_arg_post_read_conversion];
9205   CHECK_SYMBOL (val);
9206   CODING_ATTR_POST_READ (attrs) = val;
9207
9208   val = args[coding_arg_pre_write_conversion];
9209   CHECK_SYMBOL (val);
9210   CODING_ATTR_PRE_WRITE (attrs) = val;
9211
9212   val = args[coding_arg_default_char];
9213   if (NILP (val))
9214     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
9215   else
9216     {
9217       CHECK_CHARACTER (val);
9218       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9219     }
9220
9221   val = args[coding_arg_for_unibyte];
9222   CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
9223
9224   val = args[coding_arg_plist];
9225   CHECK_LIST (val);
9226   CODING_ATTR_PLIST (attrs) = val;
9227
9228   if (EQ (coding_type, Qcharset))
9229     {
9230       /* Generate a lisp vector of 256 elements.  Each element is nil,
9231          integer, or a list of charset IDs.
9232
9233          If Nth element is nil, the byte code N is invalid in this
9234          coding system.
9235
9236          If Nth element is a number NUM, N is the first byte of a
9237          charset whose ID is NUM.
9238
9239          If Nth element is a list of charset IDs, N is the first byte
9240          of one of them.  The list is sorted by dimensions of the
9241          charsets.  A charset of smaller dimension comes firtst. */
9242       val = Fmake_vector (make_number (256), Qnil);
9243
9244       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9245         {
9246           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
9247           int dim = CHARSET_DIMENSION (charset);
9248           int idx = (dim - 1) * 4;
9249
9250           if (CHARSET_ASCII_COMPATIBLE_P (charset))
9251             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9252
9253           for (i = charset->code_space[idx];
9254                i <= charset->code_space[idx + 1]; i++)
9255             {
9256               Lisp_Object tmp, tmp2;
9257               int dim2;
9258
9259               tmp = AREF (val, i);
9260               if (NILP (tmp))
9261                 tmp = XCAR (tail);
9262               else if (NUMBERP (tmp))
9263                 {
9264                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
9265                   if (dim < dim2)
9266                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
9267                   else
9268                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
9269                 }
9270               else
9271                 {
9272                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
9273                     {
9274                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
9275                       if (dim < dim2)
9276                         break;
9277                     }
9278                   if (NILP (tmp2))
9279                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
9280                   else
9281                     {
9282                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
9283                       XSETCAR (tmp2, XCAR (tail));
9284                     }
9285                 }
9286               ASET (val, i, tmp);
9287             }
9288         }
9289       ASET (attrs, coding_attr_charset_valids, val);
9290       category = coding_category_charset;
9291     }
9292   else if (EQ (coding_type, Qccl))
9293     {
9294       Lisp_Object valids;
9295
9296       if (nargs < coding_arg_ccl_max)
9297         goto short_args;
9298
9299       val = args[coding_arg_ccl_decoder];
9300       CHECK_CCL_PROGRAM (val);
9301       if (VECTORP (val))
9302         val = Fcopy_sequence (val);
9303       ASET (attrs, coding_attr_ccl_decoder, val);
9304
9305       val = args[coding_arg_ccl_encoder];
9306       CHECK_CCL_PROGRAM (val);
9307       if (VECTORP (val))
9308         val = Fcopy_sequence (val);
9309       ASET (attrs, coding_attr_ccl_encoder, val);
9310
9311       val = args[coding_arg_ccl_valids];
9312       valids = Fmake_string (make_number (256), make_number (0));
9313       for (tail = val; !NILP (tail); tail = Fcdr (tail))
9314         {
9315           int from, to;
9316
9317           val = Fcar (tail);
9318           if (INTEGERP (val))
9319             {
9320               from = to = XINT (val);
9321               if (from < 0 || from > 255)
9322                 args_out_of_range_3 (val, make_number (0), make_number (255));
9323             }
9324           else
9325             {
9326               CHECK_CONS (val);
9327               CHECK_NATNUM_CAR (val);
9328               CHECK_NATNUM_CDR (val);
9329               from = XINT (XCAR (val));
9330               if (from > 255)
9331                 args_out_of_range_3 (XCAR (val),
9332                                      make_number (0), make_number (255));
9333               to = XINT (XCDR (val));
9334               if (to < from || to > 255)
9335                 args_out_of_range_3 (XCDR (val),
9336                                      XCAR (val), make_number (255));
9337             }
9338           for (i = from; i <= to; i++)
9339             SSET (valids, i, 1);
9340         }
9341       ASET (attrs, coding_attr_ccl_valids, valids);
9342
9343       category = coding_category_ccl;
9344     }
9345   else if (EQ (coding_type, Qutf_16))
9346     {
9347       Lisp_Object bom, endian;
9348
9349       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9350
9351       if (nargs < coding_arg_utf16_max)
9352         goto short_args;
9353
9354       bom = args[coding_arg_utf16_bom];
9355       if (! NILP (bom) && ! EQ (bom, Qt))
9356         {
9357           CHECK_CONS (bom);
9358           val = XCAR (bom);
9359           CHECK_CODING_SYSTEM (val);
9360           val = XCDR (bom);
9361           CHECK_CODING_SYSTEM (val);
9362         }
9363       ASET (attrs, coding_attr_utf_bom, bom);
9364
9365       endian = args[coding_arg_utf16_endian];
9366       CHECK_SYMBOL (endian);
9367       if (NILP (endian))
9368         endian = Qbig;
9369       else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
9370         error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
9371       ASET (attrs, coding_attr_utf_16_endian, endian);
9372
9373       category = (CONSP (bom)
9374                   ? coding_category_utf_16_auto
9375                   : NILP (bom)
9376                   ? (EQ (endian, Qbig)
9377                      ? coding_category_utf_16_be_nosig
9378                      : coding_category_utf_16_le_nosig)
9379                   : (EQ (endian, Qbig)
9380                      ? coding_category_utf_16_be
9381                      : coding_category_utf_16_le));
9382     }
9383   else if (EQ (coding_type, Qiso_2022))
9384     {
9385       Lisp_Object initial, reg_usage, request, flags;
9386       int i;
9387
9388       if (nargs < coding_arg_iso2022_max)
9389         goto short_args;
9390
9391       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
9392       CHECK_VECTOR (initial);
9393       for (i = 0; i < 4; i++)
9394         {
9395           val = Faref (initial, make_number (i));
9396           if (! NILP (val))
9397             {
9398               struct charset *charset;
9399
9400               CHECK_CHARSET_GET_CHARSET (val, charset);
9401               ASET (initial, i, make_number (CHARSET_ID (charset)));
9402               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
9403                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9404             }
9405           else
9406             ASET (initial, i, make_number (-1));
9407         }
9408
9409       reg_usage = args[coding_arg_iso2022_reg_usage];
9410       CHECK_CONS (reg_usage);
9411       CHECK_NUMBER_CAR (reg_usage);
9412       CHECK_NUMBER_CDR (reg_usage);
9413
9414       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
9415       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
9416         {
9417           int id;
9418           Lisp_Object tmp;
9419
9420           val = Fcar (tail);
9421           CHECK_CONS (val);
9422           tmp = XCAR (val);
9423           CHECK_CHARSET_GET_ID (tmp, id);
9424           CHECK_NATNUM_CDR (val);
9425           if (XINT (XCDR (val)) >= 4)
9426             error ("Invalid graphic register number: %d", XINT (XCDR (val)));
9427           XSETCAR (val, make_number (id));
9428         }
9429
9430       flags = args[coding_arg_iso2022_flags];
9431       CHECK_NATNUM (flags);
9432       i = XINT (flags);
9433       if (EQ (args[coding_arg_charset_list], Qiso_2022))
9434         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
9435
9436       ASET (attrs, coding_attr_iso_initial, initial);
9437       ASET (attrs, coding_attr_iso_usage, reg_usage);
9438       ASET (attrs, coding_attr_iso_request, request);
9439       ASET (attrs, coding_attr_iso_flags, flags);
9440       setup_iso_safe_charsets (attrs);
9441
9442       if (i & CODING_ISO_FLAG_SEVEN_BITS)
9443         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
9444                           | CODING_ISO_FLAG_SINGLE_SHIFT))
9445                     ? coding_category_iso_7_else
9446                     : EQ (args[coding_arg_charset_list], Qiso_2022)
9447                     ? coding_category_iso_7
9448                     : coding_category_iso_7_tight);
9449       else
9450         {
9451           int id = XINT (AREF (initial, 1));
9452
9453           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
9454                        || EQ (args[coding_arg_charset_list], Qiso_2022)
9455                        || id < 0)
9456                       ? coding_category_iso_8_else
9457                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
9458                       ? coding_category_iso_8_1
9459                       : coding_category_iso_8_2);
9460         }
9461       if (category != coding_category_iso_8_1
9462           && category != coding_category_iso_8_2)
9463         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9464     }
9465   else if (EQ (coding_type, Qemacs_mule))
9466     {
9467       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
9468         ASET (attrs, coding_attr_emacs_mule_full, Qt);
9469       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9470       category = coding_category_emacs_mule;
9471     }
9472   else if (EQ (coding_type, Qshift_jis))
9473     {
9474
9475       struct charset *charset;
9476
9477       if (XINT (Flength (charset_list)) != 3
9478           && XINT (Flength (charset_list)) != 4)
9479         error ("There should be three or four charsets");
9480
9481       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9482       if (CHARSET_DIMENSION (charset) != 1)
9483         error ("Dimension of charset %s is not one",
9484                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9485       if (CHARSET_ASCII_COMPATIBLE_P (charset))
9486         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9487
9488       charset_list = XCDR (charset_list);
9489       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9490       if (CHARSET_DIMENSION (charset) != 1)
9491         error ("Dimension of charset %s is not one",
9492                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9493
9494       charset_list = XCDR (charset_list);
9495       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9496       if (CHARSET_DIMENSION (charset) != 2)
9497         error ("Dimension of charset %s is not two",
9498                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9499
9500       charset_list = XCDR (charset_list);
9501       if (! NILP (charset_list))
9502         {
9503           charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9504           if (CHARSET_DIMENSION (charset) != 2)
9505             error ("Dimension of charset %s is not two",
9506                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9507         }
9508
9509       category = coding_category_sjis;
9510       Vsjis_coding_system = name;
9511     }
9512   else if (EQ (coding_type, Qbig5))
9513     {
9514       struct charset *charset;
9515
9516       if (XINT (Flength (charset_list)) != 2)
9517         error ("There should be just two charsets");
9518
9519       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9520       if (CHARSET_DIMENSION (charset) != 1)
9521         error ("Dimension of charset %s is not one",
9522                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9523       if (CHARSET_ASCII_COMPATIBLE_P (charset))
9524         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9525
9526       charset_list = XCDR (charset_list);
9527       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9528       if (CHARSET_DIMENSION (charset) != 2)
9529         error ("Dimension of charset %s is not two",
9530                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9531
9532       category = coding_category_big5;
9533       Vbig5_coding_system = name;
9534     }
9535   else if (EQ (coding_type, Qraw_text))
9536     {
9537       category = coding_category_raw_text;
9538       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9539     }
9540   else if (EQ (coding_type, Qutf_8))
9541     {
9542       Lisp_Object bom;
9543
9544       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9545
9546       if (nargs < coding_arg_utf8_max)
9547         goto short_args;
9548
9549       bom = args[coding_arg_utf8_bom];
9550       if (! NILP (bom) && ! EQ (bom, Qt))
9551         {
9552           CHECK_CONS (bom);
9553           val = XCAR (bom);
9554           CHECK_CODING_SYSTEM (val);
9555           val = XCDR (bom);
9556           CHECK_CODING_SYSTEM (val);
9557         }
9558       ASET (attrs, coding_attr_utf_bom, bom);
9559
9560       category = (CONSP (bom) ? coding_category_utf_8_auto
9561                   : NILP (bom) ? coding_category_utf_8_nosig
9562                   : coding_category_utf_8_sig);
9563     }
9564   else if (EQ (coding_type, Qundecided))
9565     category = coding_category_undecided;
9566   else
9567     error ("Invalid coding system type: %s",
9568            SDATA (SYMBOL_NAME (coding_type)));
9569
9570   CODING_ATTR_CATEGORY (attrs) = make_number (category);
9571   CODING_ATTR_PLIST (attrs)
9572     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
9573                                 CODING_ATTR_PLIST (attrs)));
9574   CODING_ATTR_PLIST (attrs)
9575     = Fcons (QCascii_compatible_p,
9576              Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
9577                     CODING_ATTR_PLIST (attrs)));
9578
9579   eol_type = args[coding_arg_eol_type];
9580   if (! NILP (eol_type)
9581       && ! EQ (eol_type, Qunix)
9582       && ! EQ (eol_type, Qdos)
9583       && ! EQ (eol_type, Qmac))
9584     error ("Invalid eol-type");
9585
9586   aliases = Fcons (name, Qnil);
9587
9588   if (NILP (eol_type))
9589     {
9590       eol_type = make_subsidiaries (name);
9591       for (i = 0; i < 3; i++)
9592         {
9593           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
9594
9595           this_name = AREF (eol_type, i);
9596           this_aliases = Fcons (this_name, Qnil);
9597           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
9598           this_spec = Fmake_vector (make_number (3), attrs);
9599           ASET (this_spec, 1, this_aliases);
9600           ASET (this_spec, 2, this_eol_type);
9601           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
9602           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
9603           val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist);
9604           if (NILP (val))
9605             Vcoding_system_alist
9606               = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
9607                        Vcoding_system_alist);
9608         }
9609     }
9610
9611   spec_vec = Fmake_vector (make_number (3), attrs);
9612   ASET (spec_vec, 1, aliases);
9613   ASET (spec_vec, 2, eol_type);
9614
9615   Fputhash (name, spec_vec, Vcoding_system_hash_table);
9616   Vcoding_system_list = Fcons (name, Vcoding_system_list);
9617   val = Fassoc (Fsymbol_name (name), Vcoding_system_alist);
9618   if (NILP (val))
9619     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
9620                                   Vcoding_system_alist);
9621
9622   {
9623     int id = coding_categories[category].id;
9624
9625     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
9626       setup_coding_system (name, &coding_categories[category]);
9627   }
9628
9629   return Qnil;
9630
9631  short_args:
9632   return Fsignal (Qwrong_number_of_arguments,
9633                   Fcons (intern ("define-coding-system-internal"),
9634                          make_number (nargs)));
9635 }
9636
9637
9638 DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
9639        3, 3, 0,
9640        doc: /* Change value in CODING-SYSTEM's property list PROP to VAL.  */)
9641   (coding_system, prop, val)
9642      Lisp_Object coding_system, prop, val;
9643 {
9644   Lisp_Object spec, attrs;
9645
9646   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9647   attrs = AREF (spec, 0);
9648   if (EQ (prop, QCmnemonic))
9649     {
9650       if (! STRINGP (val))
9651         CHECK_CHARACTER (val);
9652       CODING_ATTR_MNEMONIC (attrs) = val;
9653     }
9654   else if (EQ (prop, QCdefault_char))
9655     {
9656       if (NILP (val))
9657         val = make_number (' ');
9658       else
9659         CHECK_CHARACTER (val);
9660       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9661     }
9662   else if (EQ (prop, QCdecode_translation_table))
9663     {
9664       if (! CHAR_TABLE_P (val) && ! CONSP (val))
9665         CHECK_SYMBOL (val);
9666       CODING_ATTR_DECODE_TBL (attrs) = val;
9667     }
9668   else if (EQ (prop, QCencode_translation_table))
9669     {
9670       if (! CHAR_TABLE_P (val) && ! CONSP (val))
9671         CHECK_SYMBOL (val);
9672       CODING_ATTR_ENCODE_TBL (attrs) = val;
9673     }
9674   else if (EQ (prop, QCpost_read_conversion))
9675     {
9676       CHECK_SYMBOL (val);
9677       CODING_ATTR_POST_READ (attrs) = val;
9678     }
9679   else if (EQ (prop, QCpre_write_conversion))
9680     {
9681       CHECK_SYMBOL (val);
9682       CODING_ATTR_PRE_WRITE (attrs) = val;
9683     }
9684   else if (EQ (prop, QCascii_compatible_p))
9685     {
9686       CODING_ATTR_ASCII_COMPAT (attrs) = val;
9687     }
9688
9689   CODING_ATTR_PLIST (attrs)
9690     = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
9691   return val;
9692 }
9693
9694
9695 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
9696        Sdefine_coding_system_alias, 2, 2, 0,
9697        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
9698      (alias, coding_system)
9699      Lisp_Object alias, coding_system;
9700 {
9701   Lisp_Object spec, aliases, eol_type, val;
9702
9703   CHECK_SYMBOL (alias);
9704   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9705   aliases = AREF (spec, 1);
9706   /* ALIASES should be a list of length more than zero, and the first
9707      element is a base coding system.  Append ALIAS at the tail of the
9708      list.  */
9709   while (!NILP (XCDR (aliases)))
9710     aliases = XCDR (aliases);
9711   XSETCDR (aliases, Fcons (alias, Qnil));
9712
9713   eol_type = AREF (spec, 2);
9714   if (VECTORP (eol_type))
9715     {
9716       Lisp_Object subsidiaries;
9717       int i;
9718
9719       subsidiaries = make_subsidiaries (alias);
9720       for (i = 0; i < 3; i++)
9721         Fdefine_coding_system_alias (AREF (subsidiaries, i),
9722                                      AREF (eol_type, i));
9723     }
9724
9725   Fputhash (alias, spec, Vcoding_system_hash_table);
9726   Vcoding_system_list = Fcons (alias, Vcoding_system_list);
9727   val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist);
9728   if (NILP (val))
9729     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
9730                                   Vcoding_system_alist);
9731
9732   return Qnil;
9733 }
9734
9735 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
9736        1, 1, 0,
9737        doc: /* Return the base of CODING-SYSTEM.
9738 Any alias or subsidiary coding system is not a base coding system.  */)
9739   (coding_system)
9740      Lisp_Object coding_system;
9741 {
9742   Lisp_Object spec, attrs;
9743
9744   if (NILP (coding_system))
9745     return (Qno_conversion);
9746   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9747   attrs = AREF (spec, 0);
9748   return CODING_ATTR_BASE_NAME (attrs);
9749 }
9750
9751 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
9752        1, 1, 0,
9753        doc: "Return the property list of CODING-SYSTEM.")
9754      (coding_system)
9755      Lisp_Object coding_system;
9756 {
9757   Lisp_Object spec, attrs;
9758
9759   if (NILP (coding_system))
9760     coding_system = Qno_conversion;
9761   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9762   attrs = AREF (spec, 0);
9763   return CODING_ATTR_PLIST (attrs);
9764 }
9765
9766
9767 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
9768        1, 1, 0,
9769        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
9770      (coding_system)
9771      Lisp_Object coding_system;
9772 {
9773   Lisp_Object spec;
9774
9775   if (NILP (coding_system))
9776     coding_system = Qno_conversion;
9777   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9778   return AREF (spec, 1);
9779 }
9780
9781 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
9782        Scoding_system_eol_type, 1, 1, 0,
9783        doc: /* Return eol-type of CODING-SYSTEM.
9784 An eol-type is an integer 0, 1, 2, or a vector of coding systems.
9785
9786 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
9787 and CR respectively.
9788
9789 A vector value indicates that a format of end-of-line should be
9790 detected automatically.  Nth element of the vector is the subsidiary
9791 coding system whose eol-type is N.  */)
9792      (coding_system)
9793      Lisp_Object coding_system;
9794 {
9795   Lisp_Object spec, eol_type;
9796   int n;
9797
9798   if (NILP (coding_system))
9799     coding_system = Qno_conversion;
9800   if (! CODING_SYSTEM_P (coding_system))
9801     return Qnil;
9802   spec = CODING_SYSTEM_SPEC (coding_system);
9803   eol_type = AREF (spec, 2);
9804   if (VECTORP (eol_type))
9805     return Fcopy_sequence (eol_type);
9806   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
9807   return make_number (n);
9808 }
9809
9810 #endif /* emacs */
9811
9812 \f
9813 /*** 9. Post-amble ***/
9814
9815 void
9816 init_coding_once ()
9817 {
9818   int i;
9819
9820   for (i = 0; i < coding_category_max; i++)
9821     {
9822       coding_categories[i].id = -1;
9823       coding_priorities[i] = i;
9824     }
9825
9826   /* ISO2022 specific initialize routine.  */
9827   for (i = 0; i < 0x20; i++)
9828     iso_code_class[i] = ISO_control_0;
9829   for (i = 0x21; i < 0x7F; i++)
9830     iso_code_class[i] = ISO_graphic_plane_0;
9831   for (i = 0x80; i < 0xA0; i++)
9832     iso_code_class[i] = ISO_control_1;
9833   for (i = 0xA1; i < 0xFF; i++)
9834     iso_code_class[i] = ISO_graphic_plane_1;
9835   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
9836   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
9837   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
9838   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
9839   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
9840   iso_code_class[ISO_CODE_ESC] = ISO_escape;
9841   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
9842   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
9843   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
9844
9845   for (i = 0; i < 256; i++)
9846     {
9847       emacs_mule_bytes[i] = 1;
9848     }
9849   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
9850   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
9851   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
9852   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
9853 }
9854
9855 #ifdef emacs
9856
9857 void
9858 syms_of_coding ()
9859 {
9860   staticpro (&Vcoding_system_hash_table);
9861   {
9862     Lisp_Object args[2];
9863     args[0] = QCtest;
9864     args[1] = Qeq;
9865     Vcoding_system_hash_table = Fmake_hash_table (2, args);
9866   }
9867
9868   staticpro (&Vsjis_coding_system);
9869   Vsjis_coding_system = Qnil;
9870
9871   staticpro (&Vbig5_coding_system);
9872   Vbig5_coding_system = Qnil;
9873
9874   staticpro (&Vcode_conversion_reused_workbuf);
9875   Vcode_conversion_reused_workbuf = Qnil;
9876
9877   staticpro (&Vcode_conversion_workbuf_name);
9878   Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
9879
9880   reused_workbuf_in_use = 0;
9881
9882   DEFSYM (Qcharset, "charset");
9883   DEFSYM (Qtarget_idx, "target-idx");
9884   DEFSYM (Qcoding_system_history, "coding-system-history");
9885   Fset (Qcoding_system_history, Qnil);
9886
9887   /* Target FILENAME is the first argument.  */
9888   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9889   /* Target FILENAME is the third argument.  */
9890   Fput (Qwrite_region, Qtarget_idx, make_number (2));
9891
9892   DEFSYM (Qcall_process, "call-process");
9893   /* Target PROGRAM is the first argument.  */
9894   Fput (Qcall_process, Qtarget_idx, make_number (0));
9895
9896   DEFSYM (Qcall_process_region, "call-process-region");
9897   /* Target PROGRAM is the third argument.  */
9898   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
9899
9900   DEFSYM (Qstart_process, "start-process");
9901   /* Target PROGRAM is the third argument.  */
9902   Fput (Qstart_process, Qtarget_idx, make_number (2));
9903
9904   DEFSYM (Qopen_network_stream, "open-network-stream");
9905   /* Target SERVICE is the fourth argument.  */
9906   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
9907
9908   DEFSYM (Qcoding_system, "coding-system");
9909   DEFSYM (Qcoding_aliases, "coding-aliases");
9910
9911   DEFSYM (Qeol_type, "eol-type");
9912   DEFSYM (Qunix, "unix");
9913   DEFSYM (Qdos, "dos");
9914
9915   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
9916   DEFSYM (Qpost_read_conversion, "post-read-conversion");
9917   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
9918   DEFSYM (Qdefault_char, "default-char");
9919   DEFSYM (Qundecided, "undecided");
9920   DEFSYM (Qno_conversion, "no-conversion");
9921   DEFSYM (Qraw_text, "raw-text");
9922
9923   DEFSYM (Qiso_2022, "iso-2022");
9924
9925   DEFSYM (Qutf_8, "utf-8");
9926   DEFSYM (Qutf_8_emacs, "utf-8-emacs");
9927
9928   DEFSYM (Qutf_16, "utf-16");
9929   DEFSYM (Qbig, "big");
9930   DEFSYM (Qlittle, "little");
9931
9932   DEFSYM (Qshift_jis, "shift-jis");
9933   DEFSYM (Qbig5, "big5");
9934
9935   DEFSYM (Qcoding_system_p, "coding-system-p");
9936
9937   DEFSYM (Qcoding_system_error, "coding-system-error");
9938   Fput (Qcoding_system_error, Qerror_conditions,
9939         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
9940   Fput (Qcoding_system_error, Qerror_message,
9941         build_string ("Invalid coding system"));
9942
9943   /* Intern this now in case it isn't already done.
9944      Setting this variable twice is harmless.
9945      But don't staticpro it here--that is done in alloc.c.  */
9946   Qchar_table_extra_slots = intern ("char-table-extra-slots");
9947
9948   DEFSYM (Qtranslation_table, "translation-table");
9949   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
9950   DEFSYM (Qtranslation_table_id, "translation-table-id");
9951   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
9952   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
9953
9954   DEFSYM (Qvalid_codes, "valid-codes");
9955
9956   DEFSYM (Qemacs_mule, "emacs-mule");
9957
9958   DEFSYM (QCcategory, ":category");
9959   DEFSYM (QCmnemonic, ":mnemonic");
9960   DEFSYM (QCdefault_char, ":default-char");
9961   DEFSYM (QCdecode_translation_table, ":decode-translation-table");
9962   DEFSYM (QCencode_translation_table, ":encode-translation-table");
9963   DEFSYM (QCpost_read_conversion, ":post-read-conversion");
9964   DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
9965   DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
9966
9967   Vcoding_category_table
9968     = Fmake_vector (make_number (coding_category_max), Qnil);
9969   staticpro (&Vcoding_category_table);
9970   /* Followings are target of code detection.  */
9971   ASET (Vcoding_category_table, coding_category_iso_7,
9972         intern ("coding-category-iso-7"));
9973   ASET (Vcoding_category_table, coding_category_iso_7_tight,
9974         intern ("coding-category-iso-7-tight"));
9975   ASET (Vcoding_category_table, coding_category_iso_8_1,
9976         intern ("coding-category-iso-8-1"));
9977   ASET (Vcoding_category_table, coding_category_iso_8_2,
9978         intern ("coding-category-iso-8-2"));
9979   ASET (Vcoding_category_table, coding_category_iso_7_else,
9980         intern ("coding-category-iso-7-else"));
9981   ASET (Vcoding_category_table, coding_category_iso_8_else,
9982         intern ("coding-category-iso-8-else"));
9983   ASET (Vcoding_category_table, coding_category_utf_8_auto,
9984         intern ("coding-category-utf-8-auto"));
9985   ASET (Vcoding_category_table, coding_category_utf_8_nosig,
9986         intern ("coding-category-utf-8"));
9987   ASET (Vcoding_category_table, coding_category_utf_8_sig,
9988         intern ("coding-category-utf-8-sig"));
9989   ASET (Vcoding_category_table, coding_category_utf_16_be,
9990         intern ("coding-category-utf-16-be"));
9991   ASET (Vcoding_category_table, coding_category_utf_16_auto,
9992         intern ("coding-category-utf-16-auto"));
9993   ASET (Vcoding_category_table, coding_category_utf_16_le,
9994         intern ("coding-category-utf-16-le"));
9995   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
9996         intern ("coding-category-utf-16-be-nosig"));
9997   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
9998         intern ("coding-category-utf-16-le-nosig"));
9999   ASET (Vcoding_category_table, coding_category_charset,
10000         intern ("coding-category-charset"));
10001   ASET (Vcoding_category_table, coding_category_sjis,
10002         intern ("coding-category-sjis"));
10003   ASET (Vcoding_category_table, coding_category_big5,
10004         intern ("coding-category-big5"));
10005   ASET (Vcoding_category_table, coding_category_ccl,
10006         intern ("coding-category-ccl"));
10007   ASET (Vcoding_category_table, coding_category_emacs_mule,
10008         intern ("coding-category-emacs-mule"));
10009   /* Followings are NOT target of code detection.  */
10010   ASET (Vcoding_category_table, coding_category_raw_text,
10011         intern ("coding-category-raw-text"));
10012   ASET (Vcoding_category_table, coding_category_undecided,
10013         intern ("coding-category-undecided"));
10014
10015   DEFSYM (Qinsufficient_source, "insufficient-source");
10016   DEFSYM (Qinconsistent_eol, "inconsistent-eol");
10017   DEFSYM (Qinvalid_source, "invalid-source");
10018   DEFSYM (Qinterrupted, "interrupted");
10019   DEFSYM (Qinsufficient_memory, "insufficient-memory");
10020   DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
10021
10022   defsubr (&Scoding_system_p);
10023   defsubr (&Sread_coding_system);
10024   defsubr (&Sread_non_nil_coding_system);
10025   defsubr (&Scheck_coding_system);
10026   defsubr (&Sdetect_coding_region);
10027   defsubr (&Sdetect_coding_string);
10028   defsubr (&Sfind_coding_systems_region_internal);
10029   defsubr (&Sunencodable_char_position);
10030   defsubr (&Scheck_coding_systems_region);
10031   defsubr (&Sdecode_coding_region);
10032   defsubr (&Sencode_coding_region);
10033   defsubr (&Sdecode_coding_string);
10034   defsubr (&Sencode_coding_string);
10035   defsubr (&Sdecode_sjis_char);
10036   defsubr (&Sencode_sjis_char);
10037   defsubr (&Sdecode_big5_char);
10038   defsubr (&Sencode_big5_char);
10039   defsubr (&Sset_terminal_coding_system_internal);
10040   defsubr (&Sset_safe_terminal_coding_system_internal);
10041   defsubr (&Sterminal_coding_system);
10042   defsubr (&Sset_keyboard_coding_system_internal);
10043   defsubr (&Skeyboard_coding_system);
10044   defsubr (&Sfind_operation_coding_system);
10045   defsubr (&Sset_coding_system_priority);
10046   defsubr (&Sdefine_coding_system_internal);
10047   defsubr (&Sdefine_coding_system_alias);
10048   defsubr (&Scoding_system_put);
10049   defsubr (&Scoding_system_base);
10050   defsubr (&Scoding_system_plist);
10051   defsubr (&Scoding_system_aliases);
10052   defsubr (&Scoding_system_eol_type);
10053   defsubr (&Scoding_system_priority_list);
10054
10055   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
10056                doc: /* List of coding systems.
10057
10058 Do not alter the value of this variable manually.  This variable should be
10059 updated by the functions `define-coding-system' and
10060 `define-coding-system-alias'.  */);
10061   Vcoding_system_list = Qnil;
10062
10063   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
10064                doc: /* Alist of coding system names.
10065 Each element is one element list of coding system name.
10066 This variable is given to `completing-read' as COLLECTION argument.
10067
10068 Do not alter the value of this variable manually.  This variable should be
10069 updated by the functions `make-coding-system' and
10070 `define-coding-system-alias'.  */);
10071   Vcoding_system_alist = Qnil;
10072
10073   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
10074                doc: /* List of coding-categories (symbols) ordered by priority.
10075
10076 On detecting a coding system, Emacs tries code detection algorithms
10077 associated with each coding-category one by one in this order.  When
10078 one algorithm agrees with a byte sequence of source text, the coding
10079 system bound to the corresponding coding-category is selected.
10080
10081 Don't modify this variable directly, but use `set-coding-priority'.  */);
10082   {
10083     int i;
10084
10085     Vcoding_category_list = Qnil;
10086     for (i = coding_category_max - 1; i >= 0; i--)
10087       Vcoding_category_list
10088         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
10089                  Vcoding_category_list);
10090   }
10091
10092   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
10093                doc: /* Specify the coding system for read operations.
10094 It is useful to bind this variable with `let', but do not set it globally.
10095 If the value is a coding system, it is used for decoding on read operation.
10096 If not, an appropriate element is used from one of the coding system alists.
10097 There are three such tables: `file-coding-system-alist',
10098 `process-coding-system-alist', and `network-coding-system-alist'.  */);
10099   Vcoding_system_for_read = Qnil;
10100
10101   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
10102                doc: /* Specify the coding system for write operations.
10103 Programs bind this variable with `let', but you should not set it globally.
10104 If the value is a coding system, it is used for encoding of output,
10105 when writing it to a file and when sending it to a file or subprocess.
10106
10107 If this does not specify a coding system, an appropriate element
10108 is used from one of the coding system alists.
10109 There are three such tables: `file-coding-system-alist',
10110 `process-coding-system-alist', and `network-coding-system-alist'.
10111 For output to files, if the above procedure does not specify a coding system,
10112 the value of `buffer-file-coding-system' is used.  */);
10113   Vcoding_system_for_write = Qnil;
10114
10115   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
10116                doc: /*
10117 Coding system used in the latest file or process I/O.  */);
10118   Vlast_coding_system_used = Qnil;
10119
10120   DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
10121                doc: /*
10122 Error status of the last code conversion.
10123
10124 When an error was detected in the last code conversion, this variable
10125 is set to one of the following symbols.
10126   `insufficient-source'
10127   `inconsistent-eol'
10128   `invalid-source'
10129   `interrupted'
10130   `insufficient-memory'
10131 When no error was detected, the value doesn't change.  So, to check
10132 the error status of a code conversion by this variable, you must
10133 explicitly set this variable to nil before performing code
10134 conversion.  */);
10135   Vlast_code_conversion_error = Qnil;
10136
10137   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
10138                doc: /*
10139 *Non-nil means always inhibit code conversion of end-of-line format.
10140 See info node `Coding Systems' and info node `Text and Binary' concerning
10141 such conversion.  */);
10142   inhibit_eol_conversion = 0;
10143
10144   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
10145                doc: /*
10146 Non-nil means process buffer inherits coding system of process output.
10147 Bind it to t if the process output is to be treated as if it were a file
10148 read from some filesystem.  */);
10149   inherit_process_coding_system = 0;
10150
10151   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
10152                doc: /*
10153 Alist to decide a coding system to use for a file I/O operation.
10154 The format is ((PATTERN . VAL) ...),
10155 where PATTERN is a regular expression matching a file name,
10156 VAL is a coding system, a cons of coding systems, or a function symbol.
10157 If VAL is a coding system, it is used for both decoding and encoding
10158 the file contents.
10159 If VAL is a cons of coding systems, the car part is used for decoding,
10160 and the cdr part is used for encoding.
10161 If VAL is a function symbol, the function must return a coding system
10162 or a cons of coding systems which are used as above.  The function is
10163 called with an argument that is a list of the arguments with which
10164 `find-operation-coding-system' was called.  If the function can't decide
10165 a coding system, it can return `undecided' so that the normal
10166 code-detection is performed.
10167
10168 See also the function `find-operation-coding-system'
10169 and the variable `auto-coding-alist'.  */);
10170   Vfile_coding_system_alist = Qnil;
10171
10172   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
10173                doc: /*
10174 Alist to decide a coding system to use for a process I/O operation.
10175 The format is ((PATTERN . VAL) ...),
10176 where PATTERN is a regular expression matching a program name,
10177 VAL is a coding system, a cons of coding systems, or a function symbol.
10178 If VAL is a coding system, it is used for both decoding what received
10179 from the program and encoding what sent to the program.
10180 If VAL is a cons of coding systems, the car part is used for decoding,
10181 and the cdr part is used for encoding.
10182 If VAL is a function symbol, the function must return a coding system
10183 or a cons of coding systems which are used as above.
10184
10185 See also the function `find-operation-coding-system'.  */);
10186   Vprocess_coding_system_alist = Qnil;
10187
10188   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
10189                doc: /*
10190 Alist to decide a coding system to use for a network I/O operation.
10191 The format is ((PATTERN . VAL) ...),
10192 where PATTERN is a regular expression matching a network service name
10193 or is a port number to connect to,
10194 VAL is a coding system, a cons of coding systems, or a function symbol.
10195 If VAL is a coding system, it is used for both decoding what received
10196 from the network stream and encoding what sent to the network stream.
10197 If VAL is a cons of coding systems, the car part is used for decoding,
10198 and the cdr part is used for encoding.
10199 If VAL is a function symbol, the function must return a coding system
10200 or a cons of coding systems which are used as above.
10201
10202 See also the function `find-operation-coding-system'.  */);
10203   Vnetwork_coding_system_alist = Qnil;
10204
10205   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
10206                doc: /* Coding system to use with system messages.
10207 Also used for decoding keyboard input on X Window system.  */);
10208   Vlocale_coding_system = Qnil;
10209
10210   /* The eol mnemonics are reset in startup.el system-dependently.  */
10211   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
10212                doc: /*
10213 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
10214   eol_mnemonic_unix = build_string (":");
10215
10216   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
10217                doc: /*
10218 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
10219   eol_mnemonic_dos = build_string ("\\");
10220
10221   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
10222                doc: /*
10223 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
10224   eol_mnemonic_mac = build_string ("/");
10225
10226   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
10227                doc: /*
10228 *String displayed in mode line when end-of-line format is not yet determined.  */);
10229   eol_mnemonic_undecided = build_string (":");
10230
10231   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
10232                doc: /*
10233 *Non-nil enables character translation while encoding and decoding.  */);
10234   Venable_character_translation = Qt;
10235
10236   DEFVAR_LISP ("standard-translation-table-for-decode",
10237                &Vstandard_translation_table_for_decode,
10238                doc: /* Table for translating characters while decoding.  */);
10239   Vstandard_translation_table_for_decode = Qnil;
10240
10241   DEFVAR_LISP ("standard-translation-table-for-encode",
10242                &Vstandard_translation_table_for_encode,
10243                doc: /* Table for translating characters while encoding.  */);
10244   Vstandard_translation_table_for_encode = Qnil;
10245
10246   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
10247                doc: /* Alist of charsets vs revision numbers.
10248 While encoding, if a charset (car part of an element) is found,
10249 designate it with the escape sequence identifying revision (cdr part
10250 of the element).  */);
10251   Vcharset_revision_table = Qnil;
10252
10253   DEFVAR_LISP ("default-process-coding-system",
10254                &Vdefault_process_coding_system,
10255                doc: /* Cons of coding systems used for process I/O by default.
10256 The car part is used for decoding a process output,
10257 the cdr part is used for encoding a text to be sent to a process.  */);
10258   Vdefault_process_coding_system = Qnil;
10259
10260   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
10261                doc: /*
10262 Table of extra Latin codes in the range 128..159 (inclusive).
10263 This is a vector of length 256.
10264 If Nth element is non-nil, the existence of code N in a file
10265 \(or output of subprocess) doesn't prevent it to be detected as
10266 a coding system of ISO 2022 variant which has a flag
10267 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
10268 or reading output of a subprocess.
10269 Only 128th through 159th elements have a meaning.  */);
10270   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
10271
10272   DEFVAR_LISP ("select-safe-coding-system-function",
10273                &Vselect_safe_coding_system_function,
10274                doc: /*
10275 Function to call to select safe coding system for encoding a text.
10276
10277 If set, this function is called to force a user to select a proper
10278 coding system which can encode the text in the case that a default
10279 coding system used in each operation can't encode the text.  The
10280 function should take care that the buffer is not modified while
10281 the coding system is being selected.
10282
10283 The default value is `select-safe-coding-system' (which see).  */);
10284   Vselect_safe_coding_system_function = Qnil;
10285
10286   DEFVAR_BOOL ("coding-system-require-warning",
10287                &coding_system_require_warning,
10288                doc: /* Internal use only.
10289 If non-nil, on writing a file, `select-safe-coding-system-function' is
10290 called even if `coding-system-for-write' is non-nil.  The command
10291 `universal-coding-system-argument' binds this variable to t temporarily.  */);
10292   coding_system_require_warning = 0;
10293
10294
10295   DEFVAR_BOOL ("inhibit-iso-escape-detection",
10296                &inhibit_iso_escape_detection,
10297                doc: /*
10298 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
10299
10300 By default, on reading a file, Emacs tries to detect how the text is
10301 encoded.  This code detection is sensitive to escape sequences.  If
10302 the sequence is valid as ISO2022, the code is determined as one of
10303 the ISO2022 encodings, and the file is decoded by the corresponding
10304 coding system (e.g. `iso-2022-7bit').
10305
10306 However, there may be a case that you want to read escape sequences in
10307 a file as is.  In such a case, you can set this variable to non-nil.
10308 Then, as the code detection ignores any escape sequences, no file is
10309 detected as encoded in some ISO2022 encoding.  The result is that all
10310 escape sequences become visible in a buffer.
10311
10312 The default value is nil, and it is strongly recommended not to change
10313 it.  That is because many Emacs Lisp source files that contain
10314 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
10315 in Emacs's distribution, and they won't be decoded correctly on
10316 reading if you suppress escape sequence detection.
10317
10318 The other way to read escape sequences in a file without decoding is
10319 to explicitly specify some coding system that doesn't use ISO2022's
10320 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
10321   inhibit_iso_escape_detection = 0;
10322
10323   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
10324                doc: /* Char table for translating self-inserting characters.
10325 This is applied to the result of input methods, not their input.
10326 See also `keyboard-translate-table'.  */);
10327     Vtranslation_table_for_input = Qnil;
10328
10329   {
10330     Lisp_Object args[coding_arg_max];
10331     Lisp_Object plist[16];
10332     int i;
10333
10334     for (i = 0; i < coding_arg_max; i++)
10335       args[i] = Qnil;
10336
10337     plist[0] = intern (":name");
10338     plist[1] = args[coding_arg_name] = Qno_conversion;
10339     plist[2] = intern (":mnemonic");
10340     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
10341     plist[4] = intern (":coding-type");
10342     plist[5] = args[coding_arg_coding_type] = Qraw_text;
10343     plist[6] = intern (":ascii-compatible-p");
10344     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
10345     plist[8] = intern (":default-char");
10346     plist[9] = args[coding_arg_default_char] = make_number (0);
10347     plist[10] = intern (":for-unibyte");
10348     plist[11] = args[coding_arg_for_unibyte] = Qt;
10349     plist[12] = intern (":docstring");
10350     plist[13] = build_string ("Do no conversion.\n\
10351 \n\
10352 When you visit a file with this coding, the file is read into a\n\
10353 unibyte buffer as is, thus each byte of a file is treated as a\n\
10354 character.");
10355     plist[14] = intern (":eol-type");
10356     plist[15] = args[coding_arg_eol_type] = Qunix;
10357     args[coding_arg_plist] = Flist (16, plist);
10358     Fdefine_coding_system_internal (coding_arg_max, args);
10359
10360     plist[1] = args[coding_arg_name] = Qundecided;
10361     plist[3] = args[coding_arg_mnemonic] = make_number ('-');
10362     plist[5] = args[coding_arg_coding_type] = Qundecided;
10363     /* This is already set.
10364        plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
10365     plist[8] = intern (":charset-list");
10366     plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
10367     plist[11] = args[coding_arg_for_unibyte] = Qnil;
10368     plist[13] = build_string ("No conversion on encoding, automatic conversion on decoding.");
10369     plist[15] = args[coding_arg_eol_type] = Qnil;
10370     args[coding_arg_plist] = Flist (16, plist);
10371     Fdefine_coding_system_internal (coding_arg_max, args);
10372   }
10373
10374   setup_coding_system (Qno_conversion, &safe_terminal_coding);
10375
10376   {
10377     int i;
10378
10379     for (i = 0; i < coding_category_max; i++)
10380       Fset (AREF (Vcoding_category_table, i), Qno_conversion);
10381   }
10382 #if defined (MSDOS) || defined (WINDOWSNT)
10383   system_eol_type = Qdos;
10384 #else
10385   system_eol_type = Qunix;
10386 #endif
10387   staticpro (&system_eol_type);
10388 }
10389
10390 char *
10391 emacs_strerror (error_number)
10392      int error_number;
10393 {
10394   char *str;
10395
10396   synchronize_system_messages_locale ();
10397   str = strerror (error_number);
10398
10399   if (! NILP (Vlocale_coding_system))
10400     {
10401       Lisp_Object dec = code_convert_string_norecord (build_string (str),
10402                                                       Vlocale_coding_system,
10403                                                       0);
10404       str = (char *) SDATA (dec);
10405     }
10406
10407   return str;
10408 }
10409
10410 #endif /* emacs */
10411
10412 /* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
10413    (do not change this comment) */