src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 2001, 2002, 2003, 2004, 2005,
   3                  2006, 2007, 2008 Free Software Foundation, Inc.
   4    Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   5      2005, 2006, 2007, 2008
   6      National Institute of Advanced Industrial Science and Technology (AIST)
   7      Registration Number H14PRO021
   8    Copyright (C) 2003
   9      National Institute of Advanced Industrial Science and Technology (AIST)
  10      Registration Number H13PRO009
  11
  12 This file is part of GNU Emacs.
  13
  14 GNU Emacs is free software: you can redistribute it and/or modify
  15 it under the terms of the GNU General Public License as published by
  16 the Free Software Foundation, either version 3 of the License, or
  17 (at your option) any later version.
  18
  19 GNU Emacs is distributed in the hope that it will be useful,
  20 but WITHOUT ANY WARRANTY; without even the implied warranty of
  21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22 GNU General Public License for more details.
  23
  24 You should have received a copy of the GNU General Public License
  25 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  26
  27 /*** TABLE OF CONTENTS ***
  28
  29   0. General comments
  30   1. Preamble
  31   2. Emacs' internal format (emacs-utf-8) handlers
  32   3. UTF-8 handlers
  33   4. UTF-16 handlers
  34   5. Charset-base coding systems handlers
  35   6. emacs-mule (old Emacs' internal format) handlers
  36   7. ISO2022 handlers
  37   8. Shift-JIS and BIG5 handlers
  38   9. CCL handlers
  39   10. C library functions
  40   11. Emacs Lisp library functions
  41   12. Postamble
  42
  43 */
  44
  45 /*** 0. General comments ***
  46
  47
  48 CODING SYSTEM
  49
  50   A coding system is an object for an encoding mechanism that contains
  51   information about how to convert byte sequences to character
  52   sequences and vice versa.  When we say "decode", it means converting
  53   a byte sequence of a specific coding system into a character
  54   sequence that is represented by Emacs' internal coding system
  55   `emacs-utf-8', and when we say "encode", it means converting a
  56   character sequence of emacs-utf-8 to a byte sequence of a specific
  57   coding system.
  58
  59   In Emacs Lisp, a coding system is represented by a Lisp symbol.  In
  60   C level, a coding system is represented by a vector of attributes
  61   stored in the hash table Vcharset_hash_table.  The conversion from
  62   coding system symbol to attributes vector is done by looking up
  63   Vcharset_hash_table by the symbol.
  64
  65   Coding systems are classified into the following types depending on
  66   the encoding mechanism.  Here's a brief description of the types.
  67
  68   o UTF-8
  69
  70   o UTF-16
  71
  72   o Charset-base coding system
  73
  74   A coding system defined by one or more (coded) character sets.
  75   Decoding and encoding are done by a code converter defined for each
  76   character set.
  77
  78   o Old Emacs internal format (emacs-mule)
  79
  80   The coding system adopted by old versions of Emacs (20 and 21).
  81
  82   o ISO2022-base coding system
  83
  84   The most famous coding system for multiple character sets.  X's
  85   Compound Text, various EUCs (Extended Unix Code), and coding systems
  86   used in the Internet communication such as ISO-2022-JP are all
  87   variants of ISO2022.
  88
  89   o SJIS (or Shift-JIS or MS-Kanji-Code)
  90
  91   A coding system to encode character sets: ASCII, JISX0201, and
  92   JISX0208.  Widely used for PC's in Japan.  Details are described in
  93   section 8.
  94
  95   o BIG5
  96
  97   A coding system to encode character sets: ASCII and Big5.  Widely
  98   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  99   described in section 8.  In this file, when we write "big5" (all
 100   lowercase), we mean the coding system, and when we write "Big5"
 101   (capitalized), we mean the character set.
 102
 103   o CCL
 104
 105   If a user wants to decode/encode text encoded in a coding system
 106   not listed above, he can supply a decoder and an encoder for it in
 107   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 108   program while decoding/encoding.
 109
 110   o Raw-text
 111
 112   A coding system for text containing raw eight-bit data.  Emacs
 113   treats each byte of source text as a character (except for
 114   end-of-line conversion).
 115
 116   o No-conversion
 117
 118   Like raw text, but don't do end-of-line conversion.
 119
 120
 121 END-OF-LINE FORMAT
 122
 123   How text end-of-line is encoded depends on operating system.  For
 124   instance, Unix's format is just one byte of LF (line-feed) code,
 125   whereas DOS's format is two-byte sequence of `carriage-return' and
 126   `line-feed' codes.  MacOS's format is usually one byte of
 127   `carriage-return'.
 128
 129   Since text character encoding and end-of-line encoding are
 130   independent, any coding system described above can take any format
 131   of end-of-line (except for no-conversion).
 132
 133 STRUCT CODING_SYSTEM
 134
 135   Before using a coding system for code conversion (i.e. decoding and
 136   encoding), we setup a structure of type `struct coding_system'.
 137   This structure keeps various information about a specific code
 138   conversion (e.g. the location of source and destination data).
 139
 140 */
 141
 142 /* COMMON MACROS */
 143
 144
 145 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 146
 147   These functions check if a byte sequence specified as a source in
 148   CODING conforms to the format of XXX, and update the members of
 149   DETECT_INFO.
 150
 151   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 152
 153   Below is the template of these functions.  */
 154
 155 #if 0
 156 static int
 157 detect_coding_XXX (coding, detect_info)
 158      struct coding_system *coding;
 159      struct coding_detection_info *detect_info;
 160 {
 161   const unsigned char *src = coding->source;
 162   const unsigned char *src_end = coding->source + coding->src_bytes;
 163   int multibytep = coding->src_multibyte;
 164   int consumed_chars = 0;
 165   int found = 0;
 166   ...;
 167
 168   while (1)
 169     {
 170       /* Get one byte from the source.  If the souce is exausted, jump
 171          to no_more_source:.  */
 172       ONE_MORE_BYTE (c);
 173
 174       if (! __C_conforms_to_XXX___ (c))
 175         break;
 176       if (! __C_strongly_suggests_XXX__ (c))
 177         found = CATEGORY_MASK_XXX;
 178     }
 179   /* The byte sequence is invalid for XXX.  */
 180   detect_info->rejected |= CATEGORY_MASK_XXX;
 181   return 0;
 182
 183  no_more_source:
 184   /* The source exausted successfully.  */
 185   detect_info->found |= found;
 186   return 1;
 187 }
 188 #endif
 189
 190 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 191
 192   These functions decode a byte sequence specified as a source by
 193   CODING.  The resulting multibyte text goes to a place pointed to by
 194   CODING->charbuf, the length of which should not exceed
 195   CODING->charbuf_size;
 196
 197   These functions set the information of original and decoded texts in
 198   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 199   They also set CODING->result to one of CODING_RESULT_XXX indicating
 200   how the decoding is finished.
 201
 202   Below is the template of these functions.  */
 203
 204 #if 0
 205 static void
 206 decode_coding_XXXX (coding)
 207      struct coding_system *coding;
 208 {
 209   const unsigned char *src = coding->source + coding->consumed;
 210   const unsigned char *src_end = coding->source + coding->src_bytes;
 211   /* SRC_BASE remembers the start position in source in each loop.
 212      The loop will be exited when there's not enough source code, or
 213      when there's no room in CHARBUF for a decoded character.  */
 214   const unsigned char *src_base;
 215   /* A buffer to produce decoded characters.  */
 216   int *charbuf = coding->charbuf + coding->charbuf_used;
 217   int *charbuf_end = coding->charbuf + coding->charbuf_size;
 218   int multibytep = coding->src_multibyte;
 219
 220   while (1)
 221     {
 222       src_base = src;
 223       if (charbuf < charbuf_end)
 224         /* No more room to produce a decoded character.  */
 225         break;
 226       ONE_MORE_BYTE (c);
 227       /* Decode it. */
 228     }
 229
 230  no_more_source:
 231   if (src_base < src_end
 232       && coding->mode & CODING_MODE_LAST_BLOCK)
 233     /* If the source ends by partial bytes to construct a character,
 234        treat them as eight-bit raw data.  */
 235     while (src_base < src_end && charbuf < charbuf_end)
 236       *charbuf++ = *src_base++;
 237   /* Remember how many bytes and characters we consumed.  If the
 238      source is multibyte, the bytes and chars are not identical.  */
 239   coding->consumed = coding->consumed_char = src_base - coding->source;
 240   /* Remember how many characters we produced.  */
 241   coding->charbuf_used = charbuf - coding->charbuf;
 242 }
 243 #endif
 244
 245 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 246
 247   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 248   internal multibyte format by CODING.  The resulting byte sequence
 249   goes to a place pointed to by DESTINATION, the length of which
 250   should not exceed DST_BYTES.
 251
 252   These functions set the information of original and encoded texts in
 253   the members produced, produced_char, consumed, and consumed_char of
 254   the structure *CODING.  They also set the member result to one of
 255   CODING_RESULT_XXX indicating how the encoding finished.
 256
 257   DST_BYTES zero means that source area and destination area are
 258   overlapped, which means that we can produce a encoded text until it
 259   reaches at the head of not-yet-encoded source text.
 260
 261   Below is a template of these functions.  */
 262 #if 0
 263 static void
 264 encode_coding_XXX (coding)
 265      struct coding_system *coding;
 266 {
 267   int multibytep = coding->dst_multibyte;
 268   int *charbuf = coding->charbuf;
 269   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 270   unsigned char *dst = coding->destination + coding->produced;
 271   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 272   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 273   int produced_chars = 0;
 274
 275   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 276     {
 277       int c = *charbuf;
 278       /* Encode C into DST, and increment DST.  */
 279     }
 280  label_no_more_destination:
 281   /* How many chars and bytes we produced.  */
 282   coding->produced_char += produced_chars;
 283   coding->produced = dst - coding->destination;
 284 }
 285 #endif
 286
 287 \f
 288 /*** 1. Preamble ***/
 289
 290 #include <config.h>
 291 #include <stdio.h>
 292
 293 #include "lisp.h"
 294 #include "buffer.h"
 295 #include "character.h"
 296 #include "charset.h"
 297 #include "ccl.h"
 298 #include "composite.h"
 299 #include "coding.h"
 300 #include "window.h"
 301 #include "frame.h"
 302 #include "termhooks.h"
 303
 304 Lisp_Object Vcoding_system_hash_table;
 305
 306 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
 307 Lisp_Object Qunix, Qdos;
 308 extern Lisp_Object Qmac;        /* frame.c */
 309 Lisp_Object Qbuffer_file_coding_system;
 310 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 311 Lisp_Object Qdefault_char;
 312 Lisp_Object Qno_conversion, Qundecided;
 313 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
 314 Lisp_Object Qbig, Qlittle;
 315 Lisp_Object Qcoding_system_history;
 316 Lisp_Object Qvalid_codes;
 317 Lisp_Object QCcategory, QCmnemonic, QCdefalut_char;
 318 Lisp_Object QCdecode_translation_table, QCencode_translation_table;
 319 Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
 320 Lisp_Object QCascii_compatible_p;
 321
 322 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 323 Lisp_Object Qcall_process, Qcall_process_region;
 324 Lisp_Object Qstart_process, Qopen_network_stream;
 325 Lisp_Object Qtarget_idx;
 326
 327 Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
 328 Lisp_Object Qinterrupted, Qinsufficient_memory;
 329
 330 extern Lisp_Object Qcompletion_ignore_case;
 331
 332 /* If a symbol has this property, evaluate the value to define the
 333    symbol as a coding system.  */
 334 static Lisp_Object Qcoding_system_define_form;
 335
 336 int coding_system_require_warning;
 337
 338 Lisp_Object Vselect_safe_coding_system_function;
 339
 340 /* Mnemonic string for each format of end-of-line.  */
 341 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 342 /* Mnemonic string to indicate format of end-of-line is not yet
 343    decided.  */
 344 Lisp_Object eol_mnemonic_undecided;
 345
 346 /* Format of end-of-line decided by system.  This is Qunix on
 347    Unix and Mac, Qdos on DOS/Windows.
 348    This has an effect only for external encoding (i.e. for output to
 349    file and process), not for in-buffer or Lisp string encoding.  */
 350 static Lisp_Object system_eol_type;
 351
 352 #ifdef emacs
 353
 354 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 355
 356 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 357
 358 /* Coding system emacs-mule and raw-text are for converting only
 359    end-of-line format.  */
 360 Lisp_Object Qemacs_mule, Qraw_text;
 361 Lisp_Object Qutf_8_emacs;
 362
 363 /* Coding-systems are handed between Emacs Lisp programs and C internal
 364    routines by the following three variables.  */
 365 /* Coding-system for reading files and receiving data from process.  */
 366 Lisp_Object Vcoding_system_for_read;
 367 /* Coding-system for writing files and sending data to process.  */
 368 Lisp_Object Vcoding_system_for_write;
 369 /* Coding-system actually used in the latest I/O.  */
 370 Lisp_Object Vlast_coding_system_used;
 371 /* Set to non-nil when an error is detected while code conversion.  */
 372 Lisp_Object Vlast_code_conversion_error;
 373 /* A vector of length 256 which contains information about special
 374    Latin codes (especially for dealing with Microsoft codes).  */
 375 Lisp_Object Vlatin_extra_code_table;
 376
 377 /* Flag to inhibit code conversion of end-of-line format.  */
 378 int inhibit_eol_conversion;
 379
 380 /* Flag to inhibit ISO2022 escape sequence detection.  */
 381 int inhibit_iso_escape_detection;
 382
 383 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 384 int inherit_process_coding_system;
 385
 386 /* Coding system to be used to encode text for terminal display when
 387    terminal coding system is nil.  */
 388 struct coding_system safe_terminal_coding;
 389
 390 Lisp_Object Vfile_coding_system_alist;
 391 Lisp_Object Vprocess_coding_system_alist;
 392 Lisp_Object Vnetwork_coding_system_alist;
 393
 394 Lisp_Object Vlocale_coding_system;
 395
 396 #endif /* emacs */
 397
 398 /* Flag to tell if we look up translation table on character code
 399    conversion.  */
 400 Lisp_Object Venable_character_translation;
 401 /* Standard translation table to look up on decoding (reading).  */
 402 Lisp_Object Vstandard_translation_table_for_decode;
 403 /* Standard translation table to look up on encoding (writing).  */
 404 Lisp_Object Vstandard_translation_table_for_encode;
 405
 406 Lisp_Object Qtranslation_table;
 407 Lisp_Object Qtranslation_table_id;
 408 Lisp_Object Qtranslation_table_for_decode;
 409 Lisp_Object Qtranslation_table_for_encode;
 410
 411 /* Alist of charsets vs revision number.  */
 412 static Lisp_Object Vcharset_revision_table;
 413
 414 /* Default coding systems used for process I/O.  */
 415 Lisp_Object Vdefault_process_coding_system;
 416
 417 /* Char table for translating Quail and self-inserting input.  */
 418 Lisp_Object Vtranslation_table_for_input;
 419
 420 /* Two special coding systems.  */
 421 Lisp_Object Vsjis_coding_system;
 422 Lisp_Object Vbig5_coding_system;
 423
 424 /* ISO2022 section */
 425
 426 #define CODING_ISO_INITIAL(coding, reg)                 \
 427   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 428                      coding_attr_iso_initial),          \
 429                reg)))
 430
 431
 432 #define CODING_ISO_REQUEST(coding, charset_id)  \
 433   ((charset_id <= (coding)->max_charset_id      \
 434     ? (coding)->safe_charsets[charset_id]       \
 435     : -1))
 436
 437
 438 #define CODING_ISO_FLAGS(coding)        \
 439   ((coding)->spec.iso_2022.flags)
 440 #define CODING_ISO_DESIGNATION(coding, reg)     \
 441   ((coding)->spec.iso_2022.current_designation[reg])
 442 #define CODING_ISO_INVOCATION(coding, plane)    \
 443   ((coding)->spec.iso_2022.current_invocation[plane])
 444 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 445   ((coding)->spec.iso_2022.single_shifting)
 446 #define CODING_ISO_BOL(coding)  \
 447   ((coding)->spec.iso_2022.bol)
 448 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 449   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 450
 451 /* Control characters of ISO2022.  */
 452                         /* code */      /* function */
 453 #define ISO_CODE_LF     0x0A            /* line-feed */
 454 #define ISO_CODE_CR     0x0D            /* carriage-return */
 455 #define ISO_CODE_SO     0x0E            /* shift-out */
 456 #define ISO_CODE_SI     0x0F            /* shift-in */
 457 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 458 #define ISO_CODE_ESC    0x1B            /* escape */
 459 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 460 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 461 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 462
 463 /* All code (1-byte) of ISO2022 is classified into one of the
 464    followings.  */
 465 enum iso_code_class_type
 466   {
 467     ISO_control_0,              /* Control codes in the range
 468                                    0x00..0x1F and 0x7F, except for the
 469                                    following 5 codes.  */
 470     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 471     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 472     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 473     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 474     ISO_control_1,              /* Control codes in the range
 475                                    0x80..0x9F, except for the
 476                                    following 3 codes.  */
 477     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 478     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 479     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 480     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 481     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 482     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 483     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 484   };
 485
 486 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 487     `iso-flags' attribute of an iso2022 coding system.  */
 488
 489 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 490    instead of the correct short-form sequence (e.g. ESC $ A).  */
 491 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 492
 493 /* If set, reset graphic planes and registers at end-of-line to the
 494    initial state.  */
 495 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 496
 497 /* If set, reset graphic planes and registers before any control
 498    characters to the initial state.  */
 499 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 500
 501 /* If set, encode by 7-bit environment.  */
 502 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 503
 504 /* If set, use locking-shift function.  */
 505 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 506
 507 /* If set, use single-shift function.  Overwrite
 508    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 509 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 510
 511 /* If set, use designation escape sequence.  */
 512 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 513
 514 /* If set, produce revision number sequence.  */
 515 #define CODING_ISO_FLAG_REVISION        0x0080
 516
 517 /* If set, produce ISO6429's direction specifying sequence.  */
 518 #define CODING_ISO_FLAG_DIRECTION       0x0100
 519
 520 /* If set, assume designation states are reset at beginning of line on
 521    output.  */
 522 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 523
 524 /* If set, designation sequence should be placed at beginning of line
 525    on output.  */
 526 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 527
 528 /* If set, do not encode unsafe charactes on output.  */
 529 #define CODING_ISO_FLAG_SAFE            0x0800
 530
 531 /* If set, extra latin codes (128..159) are accepted as a valid code
 532    on input.  */
 533 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 534
 535 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 536
 537 #define CODING_ISO_FLAG_EUC_TW_SHIFT    0x4000
 538
 539 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 540
 541 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 542
 543 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 544
 545 /* A character to be produced on output if encoding of the original
 546    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 547 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 548
 549 /* UTF-8 section */
 550 #define CODING_UTF_8_BOM(coding)        \
 551   ((coding)->spec.utf_8_bom)
 552
 553 /* UTF-16 section */
 554 #define CODING_UTF_16_BOM(coding)       \
 555   ((coding)->spec.utf_16.bom)
 556
 557 #define CODING_UTF_16_ENDIAN(coding)    \
 558   ((coding)->spec.utf_16.endian)
 559
 560 #define CODING_UTF_16_SURROGATE(coding) \
 561   ((coding)->spec.utf_16.surrogate)
 562
 563
 564 /* CCL section */
 565 #define CODING_CCL_DECODER(coding)      \
 566   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 567 #define CODING_CCL_ENCODER(coding)      \
 568   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 569 #define CODING_CCL_VALIDS(coding)                                          \
 570   (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
 571
 572 /* Index for each coding category in `coding_categories' */
 573
 574 enum coding_category
 575   {
 576     coding_category_iso_7,
 577     coding_category_iso_7_tight,
 578     coding_category_iso_8_1,
 579     coding_category_iso_8_2,
 580     coding_category_iso_7_else,
 581     coding_category_iso_8_else,
 582     coding_category_utf_8_auto,
 583     coding_category_utf_8_nosig,
 584     coding_category_utf_8_sig,
 585     coding_category_utf_16_auto,
 586     coding_category_utf_16_be,
 587     coding_category_utf_16_le,
 588     coding_category_utf_16_be_nosig,
 589     coding_category_utf_16_le_nosig,
 590     coding_category_charset,
 591     coding_category_sjis,
 592     coding_category_big5,
 593     coding_category_ccl,
 594     coding_category_emacs_mule,
 595     /* All above are targets of code detection.  */
 596     coding_category_raw_text,
 597     coding_category_undecided,
 598     coding_category_max
 599   };
 600
 601 /* Definitions of flag bits used in detect_coding_XXXX.  */
 602 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 603 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 604 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 605 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 606 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 607 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 608 #define CATEGORY_MASK_UTF_8_AUTO        (1 << coding_category_utf_8_auto)
 609 #define CATEGORY_MASK_UTF_8_NOSIG       (1 << coding_category_utf_8_nosig)
 610 #define CATEGORY_MASK_UTF_8_SIG         (1 << coding_category_utf_8_sig)
 611 #define CATEGORY_MASK_UTF_16_AUTO       (1 << coding_category_utf_16_auto)
 612 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 613 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 614 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 615 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 616 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 617 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 618 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 619 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 620 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 621 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 622
 623 /* This value is returned if detect_coding_mask () find nothing other
 624    than ASCII characters.  */
 625 #define CATEGORY_MASK_ANY               \
 626   (CATEGORY_MASK_ISO_7                  \
 627    | CATEGORY_MASK_ISO_7_TIGHT          \
 628    | CATEGORY_MASK_ISO_8_1              \
 629    | CATEGORY_MASK_ISO_8_2              \
 630    | CATEGORY_MASK_ISO_7_ELSE           \
 631    | CATEGORY_MASK_ISO_8_ELSE           \
 632    | CATEGORY_MASK_UTF_8_AUTO           \
 633    | CATEGORY_MASK_UTF_8_NOSIG          \
 634    | CATEGORY_MASK_UTF_8_SIG            \
 635    | CATEGORY_MASK_UTF_16_AUTO          \
 636    | CATEGORY_MASK_UTF_16_BE            \
 637    | CATEGORY_MASK_UTF_16_LE            \
 638    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 639    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 640    | CATEGORY_MASK_CHARSET              \
 641    | CATEGORY_MASK_SJIS                 \
 642    | CATEGORY_MASK_BIG5                 \
 643    | CATEGORY_MASK_CCL                  \
 644    | CATEGORY_MASK_EMACS_MULE)
 645
 646
 647 #define CATEGORY_MASK_ISO_7BIT \
 648   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 649
 650 #define CATEGORY_MASK_ISO_8BIT \
 651   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 652
 653 #define CATEGORY_MASK_ISO_ELSE \
 654   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 655
 656 #define CATEGORY_MASK_ISO_ESCAPE        \
 657   (CATEGORY_MASK_ISO_7                  \
 658    | CATEGORY_MASK_ISO_7_TIGHT          \
 659    | CATEGORY_MASK_ISO_7_ELSE           \
 660    | CATEGORY_MASK_ISO_8_ELSE)
 661
 662 #define CATEGORY_MASK_ISO       \
 663   (  CATEGORY_MASK_ISO_7BIT     \
 664      | CATEGORY_MASK_ISO_8BIT   \
 665      | CATEGORY_MASK_ISO_ELSE)
 666
 667 #define CATEGORY_MASK_UTF_16            \
 668   (CATEGORY_MASK_UTF_16_AUTO            \
 669    | CATEGORY_MASK_UTF_16_BE            \
 670    | CATEGORY_MASK_UTF_16_LE            \
 671    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 672    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 673
 674 #define CATEGORY_MASK_UTF_8     \
 675   (CATEGORY_MASK_UTF_8_AUTO     \
 676    | CATEGORY_MASK_UTF_8_NOSIG  \
 677    | CATEGORY_MASK_UTF_8_SIG)
 678
 679 /* List of symbols `coding-category-xxx' ordered by priority.  This
 680    variable is exposed to Emacs Lisp.  */
 681 static Lisp_Object Vcoding_category_list;
 682
 683 /* Table of coding categories (Lisp symbols).  This variable is for
 684    internal use oly.  */
 685 static Lisp_Object Vcoding_category_table;
 686
 687 /* Table of coding-categories ordered by priority.  */
 688 static enum coding_category coding_priorities[coding_category_max];
 689
 690 /* Nth element is a coding context for the coding system bound to the
 691    Nth coding category.  */
 692 static struct coding_system coding_categories[coding_category_max];
 693
 694 /*** Commonly used macros and functions ***/
 695
 696 #ifndef min
 697 #define min(a, b) ((a) < (b) ? (a) : (b))
 698 #endif
 699 #ifndef max
 700 #define max(a, b) ((a) > (b) ? (a) : (b))
 701 #endif
 702
 703 #define CODING_GET_INFO(coding, attrs, charset_list)    \
 704   do {                                                  \
 705     (attrs) = CODING_ID_ATTRS ((coding)->id);           \
 706     (charset_list) = CODING_ATTR_CHARSET_LIST (attrs);  \
 707   } while (0)
 708
 709
 710 /* Safely get one byte from the source text pointed by SRC which ends
 711    at SRC_END, and set C to that byte.  If there are not enough bytes
 712    in the source, it jumps to `no_more_source'.  If multibytep is
 713    nonzero, and a multibyte character is found at SRC, set C to the
 714    negative value of the character code.  The caller should declare
 715    and set these variables appropriately in advance:
 716         src, src_end, multibytep */
 717
 718 #define ONE_MORE_BYTE(c)                                \
 719   do {                                                  \
 720     if (src == src_end)                                 \
 721       {                                                 \
 722         if (src_base < src)                             \
 723           record_conversion_result                      \
 724             (coding, CODING_RESULT_INSUFFICIENT_SRC);   \
 725         goto no_more_source;                            \
 726       }                                                 \
 727     c = *src++;                                         \
 728     if (multibytep && (c & 0x80))                       \
 729       {                                                 \
 730         if ((c & 0xFE) == 0xC0)                         \
 731           c = ((c & 1) << 6) | *src++;                  \
 732         else                                            \
 733           {                                             \
 734             src--;                                      \
 735             c = - string_char (src, &src, NULL);        \
 736             record_conversion_result                    \
 737               (coding, CODING_RESULT_INVALID_SRC);      \
 738           }                                             \
 739       }                                                 \
 740     consumed_chars++;                                   \
 741   } while (0)
 742
 743
 744 #define ONE_MORE_BYTE_NO_CHECK(c)                       \
 745   do {                                                  \
 746     c = *src++;                                         \
 747     if (multibytep && (c & 0x80))                       \
 748       {                                                 \
 749         if ((c & 0xFE) == 0xC0)                         \
 750           c = ((c & 1) << 6) | *src++;                  \
 751         else                                            \
 752           {                                             \
 753             src--;                                      \
 754             c = - string_char (src, &src, NULL);        \
 755             record_conversion_result                    \
 756               (coding, CODING_RESULT_INVALID_SRC);      \
 757           }                                             \
 758       }                                                 \
 759     consumed_chars++;                                   \
 760   } while (0)
 761
 762
 763 /* Store a byte C in the place pointed by DST and increment DST to the
 764    next free point, and increment PRODUCED_CHARS.  The caller should
 765    assure that C is 0..127, and declare and set the variable `dst'
 766    appropriately in advance.
 767 */
 768
 769
 770 #define EMIT_ONE_ASCII_BYTE(c)  \
 771   do {                          \
 772     produced_chars++;           \
 773     *dst++ = (c);               \
 774   } while (0)
 775
 776
 777 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2.  */
 778
 779 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 780   do {                                  \
 781     produced_chars += 2;                \
 782     *dst++ = (c1), *dst++ = (c2);       \
 783   } while (0)
 784
 785
 786 /* Store a byte C in the place pointed by DST and increment DST to the
 787    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 788    nonzero, store in an appropriate multibyte from.  The caller should
 789    declare and set the variables `dst' and `multibytep' appropriately
 790    in advance.  */
 791
 792 #define EMIT_ONE_BYTE(c)                \
 793   do {                                  \
 794     produced_chars++;                   \
 795     if (multibytep)                     \
 796       {                                 \
 797         int ch = (c);                   \
 798         if (ch >= 0x80)                 \
 799           ch = BYTE8_TO_CHAR (ch);      \
 800         CHAR_STRING_ADVANCE (ch, dst);  \
 801       }                                 \
 802     else                                \
 803       *dst++ = (c);                     \
 804   } while (0)
 805
 806
 807 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 808
 809 #define EMIT_TWO_BYTES(c1, c2)          \
 810   do {                                  \
 811     produced_chars += 2;                \
 812     if (multibytep)                     \
 813       {                                 \
 814         int ch;                         \
 815                                         \
 816         ch = (c1);                      \
 817         if (ch >= 0x80)                 \
 818           ch = BYTE8_TO_CHAR (ch);      \
 819         CHAR_STRING_ADVANCE (ch, dst);  \
 820         ch = (c2);                      \
 821         if (ch >= 0x80)                 \
 822           ch = BYTE8_TO_CHAR (ch);      \
 823         CHAR_STRING_ADVANCE (ch, dst);  \
 824       }                                 \
 825     else                                \
 826       {                                 \
 827         *dst++ = (c1);                  \
 828         *dst++ = (c2);                  \
 829       }                                 \
 830   } while (0)
 831
 832
 833 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 834   do {                                  \
 835     EMIT_ONE_BYTE (c1);                 \
 836     EMIT_TWO_BYTES (c2, c3);            \
 837   } while (0)
 838
 839
 840 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 841   do {                                          \
 842     EMIT_TWO_BYTES (c1, c2);                    \
 843     EMIT_TWO_BYTES (c3, c4);                    \
 844   } while (0)
 845
 846
 847 /* Prototypes for static functions.  */
 848 static void record_conversion_result P_ ((struct coding_system *coding,
 849                                           enum coding_result_code result));
 850 static int detect_coding_utf_8 P_ ((struct coding_system *,
 851                                     struct coding_detection_info *info));
 852 static void decode_coding_utf_8 P_ ((struct coding_system *));
 853 static int encode_coding_utf_8 P_ ((struct coding_system *));
 854
 855 static int detect_coding_utf_16 P_ ((struct coding_system *,
 856                                      struct coding_detection_info *info));
 857 static void decode_coding_utf_16 P_ ((struct coding_system *));
 858 static int encode_coding_utf_16 P_ ((struct coding_system *));
 859
 860 static int detect_coding_iso_2022 P_ ((struct coding_system *,
 861                                        struct coding_detection_info *info));
 862 static void decode_coding_iso_2022 P_ ((struct coding_system *));
 863 static int encode_coding_iso_2022 P_ ((struct coding_system *));
 864
 865 static int detect_coding_emacs_mule P_ ((struct coding_system *,
 866                                          struct coding_detection_info *info));
 867 static void decode_coding_emacs_mule P_ ((struct coding_system *));
 868 static int encode_coding_emacs_mule P_ ((struct coding_system *));
 869
 870 static int detect_coding_sjis P_ ((struct coding_system *,
 871                                    struct coding_detection_info *info));
 872 static void decode_coding_sjis P_ ((struct coding_system *));
 873 static int encode_coding_sjis P_ ((struct coding_system *));
 874
 875 static int detect_coding_big5 P_ ((struct coding_system *,
 876                                    struct coding_detection_info *info));
 877 static void decode_coding_big5 P_ ((struct coding_system *));
 878 static int encode_coding_big5 P_ ((struct coding_system *));
 879
 880 static int detect_coding_ccl P_ ((struct coding_system *,
 881                                   struct coding_detection_info *info));
 882 static void decode_coding_ccl P_ ((struct coding_system *));
 883 static int encode_coding_ccl P_ ((struct coding_system *));
 884
 885 static void decode_coding_raw_text P_ ((struct coding_system *));
 886 static int encode_coding_raw_text P_ ((struct coding_system *));
 887
 888 static void coding_set_source P_ ((struct coding_system *));
 889 static void coding_set_destination P_ ((struct coding_system *));
 890 static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT));
 891 static void coding_alloc_by_making_gap P_ ((struct coding_system *,
 892                                             EMACS_INT, EMACS_INT));
 893 static unsigned char *alloc_destination P_ ((struct coding_system *,
 894                                              EMACS_INT, unsigned char *));
 895 static void setup_iso_safe_charsets P_ ((Lisp_Object));
 896 static unsigned char *encode_designation_at_bol P_ ((struct coding_system *,
 897                                                      int *, int *,
 898                                                      unsigned char *));
 899 static int detect_eol P_ ((const unsigned char *,
 900                            EMACS_INT, enum coding_category));
 901 static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int));
 902 static void decode_eol P_ ((struct coding_system *));
 903 static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *));
 904 static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *,
 905                                         int, int *, int *));
 906 static int produce_chars P_ ((struct coding_system *, Lisp_Object, int));
 907 static INLINE void produce_composition P_ ((struct coding_system *, int *,
 908                                             EMACS_INT));
 909 static INLINE void produce_charset P_ ((struct coding_system *, int *,
 910                                         EMACS_INT));
 911 static void produce_annotation P_ ((struct coding_system *, EMACS_INT));
 912 static int decode_coding P_ ((struct coding_system *));
 913 static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT,
 914                                                       struct coding_system *,
 915                                                       int *, EMACS_INT *));
 916 static INLINE int *handle_charset_annotation P_ ((EMACS_INT, EMACS_INT,
 917                                                   struct coding_system *,
 918                                                   int *, EMACS_INT *));
 919 static void consume_chars P_ ((struct coding_system *, Lisp_Object, int));
 920 static int encode_coding P_ ((struct coding_system *));
 921 static Lisp_Object make_conversion_work_buffer P_ ((int));
 922 static Lisp_Object code_conversion_restore P_ ((Lisp_Object));
 923 static INLINE int char_encodable_p P_ ((int, Lisp_Object));
 924 static Lisp_Object make_subsidiaries P_ ((Lisp_Object));
 925
 926 static void
 927 record_conversion_result (struct coding_system *coding,
 928                           enum coding_result_code result)
 929 {
 930   coding->result = result;
 931   switch (result)
 932     {
 933     case CODING_RESULT_INSUFFICIENT_SRC:
 934       Vlast_code_conversion_error = Qinsufficient_source;
 935       break;
 936     case CODING_RESULT_INCONSISTENT_EOL:
 937       Vlast_code_conversion_error = Qinconsistent_eol;
 938       break;
 939     case CODING_RESULT_INVALID_SRC:
 940       Vlast_code_conversion_error = Qinvalid_source;
 941       break;
 942     case CODING_RESULT_INTERRUPT:
 943       Vlast_code_conversion_error = Qinterrupted;
 944       break;
 945     case CODING_RESULT_INSUFFICIENT_MEM:
 946       Vlast_code_conversion_error = Qinsufficient_memory;
 947       break;
 948     default:
 949       Vlast_code_conversion_error = intern ("Unknown error");
 950     }
 951 }
 952
 953 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
 954   do {                                                                       \
 955     charset_map_loaded = 0;                                                  \
 956     c = DECODE_CHAR (charset, code);                                         \
 957     if (charset_map_loaded)                                                  \
 958       {                                                                      \
 959         const unsigned char *orig = coding->source;                          \
 960         EMACS_INT offset;                                                    \
 961                                                                              \
 962         coding_set_source (coding);                                          \
 963         offset = coding->source - orig;                                      \
 964         src += offset;                                                       \
 965         src_base += offset;                                                  \
 966         src_end += offset;                                                   \
 967       }                                                                      \
 968   } while (0)
 969
 970
 971 /* If there are at least BYTES length of room at dst, allocate memory
 972    for coding->destination and update dst and dst_end.  We don't have
 973    to take care of coding->source which will be relocated.  It is
 974    handled by calling coding_set_source in encode_coding.  */
 975
 976 #define ASSURE_DESTINATION(bytes)                               \
 977   do {                                                          \
 978     if (dst + (bytes) >= dst_end)                               \
 979       {                                                         \
 980         int more_bytes = charbuf_end - charbuf + (bytes);       \
 981                                                                 \
 982         dst = alloc_destination (coding, more_bytes, dst);      \
 983         dst_end = coding->destination + coding->dst_bytes;      \
 984       }                                                         \
 985   } while (0)
 986
 987
 988 /* Store multibyte form of the character C in P, and advance P to the
 989    end of the multibyte form.  This is like CHAR_STRING_ADVANCE but it
 990    never calls MAYBE_UNIFY_CHAR.  */
 991
 992 #define CHAR_STRING_ADVANCE_NO_UNIFY(c, p)      \
 993   do {                                          \
 994     if ((c) <= MAX_1_BYTE_CHAR)                 \
 995       *(p)++ = (c);                             \
 996     else if ((c) <= MAX_2_BYTE_CHAR)            \
 997       *(p)++ = (0xC0 | ((c) >> 6)),             \
 998         *(p)++ = (0x80 | ((c) & 0x3F));         \
 999     else if ((c) <= MAX_3_BYTE_CHAR)            \
1000       *(p)++ = (0xE0 | ((c) >> 12)),            \
1001         *(p)++ = (0x80 | (((c) >> 6) & 0x3F)),  \
1002         *(p)++ = (0x80 | ((c) & 0x3F));         \
1003     else if ((c) <= MAX_4_BYTE_CHAR)            \
1004       *(p)++ = (0xF0 | (c >> 18)),              \
1005         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1006         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1007         *(p)++ = (0x80 | (c & 0x3F));           \
1008     else if ((c) <= MAX_5_BYTE_CHAR)            \
1009       *(p)++ = 0xF8,                            \
1010         *(p)++ = (0x80 | ((c >> 18) & 0x0F)),   \
1011         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1012         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1013         *(p)++ = (0x80 | (c & 0x3F));           \
1014     else                                        \
1015       (p) += BYTE8_STRING ((c) - 0x3FFF80, p);  \
1016   } while (0)
1017
1018
1019 /* Return the character code of character whose multibyte form is at
1020    P, and advance P to the end of the multibyte form.  This is like
1021    STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR.  */
1022
1023 #define STRING_CHAR_ADVANCE_NO_UNIFY(p)                         \
1024   (!((p)[0] & 0x80)                                             \
1025    ? *(p)++                                                     \
1026    : ! ((p)[0] & 0x20)                                          \
1027    ? ((p) += 2,                                                 \
1028       ((((p)[-2] & 0x1F) << 6)                                  \
1029        | ((p)[-1] & 0x3F)                                       \
1030        | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))    \
1031    : ! ((p)[0] & 0x10)                                          \
1032    ? ((p) += 3,                                                 \
1033       ((((p)[-3] & 0x0F) << 12)                                 \
1034        | (((p)[-2] & 0x3F) << 6)                                \
1035        | ((p)[-1] & 0x3F)))                                     \
1036    : ! ((p)[0] & 0x08)                                          \
1037    ? ((p) += 4,                                                 \
1038       ((((p)[-4] & 0xF) << 18)                                  \
1039        | (((p)[-3] & 0x3F) << 12)                               \
1040        | (((p)[-2] & 0x3F) << 6)                                \
1041        | ((p)[-1] & 0x3F)))                                     \
1042    : ((p) += 5,                                                 \
1043       ((((p)[-4] & 0x3F) << 18)                                 \
1044        | (((p)[-3] & 0x3F) << 12)                               \
1045        | (((p)[-2] & 0x3F) << 6)                                \
1046        | ((p)[-1] & 0x3F))))
1047
1048
1049 static void
1050 coding_set_source (coding)
1051      struct coding_system *coding;
1052 {
1053   if (BUFFERP (coding->src_object))
1054     {
1055       struct buffer *buf = XBUFFER (coding->src_object);
1056
1057       if (coding->src_pos < 0)
1058         coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
1059       else
1060         coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
1061     }
1062   else if (STRINGP (coding->src_object))
1063     {
1064       coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
1065     }
1066   else
1067     /* Otherwise, the source is C string and is never relocated
1068        automatically.  Thus we don't have to update anything.  */
1069     ;
1070 }
1071
1072 static void
1073 coding_set_destination (coding)
1074      struct coding_system *coding;
1075 {
1076   if (BUFFERP (coding->dst_object))
1077     {
1078       if (coding->src_pos < 0)
1079         {
1080           coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
1081           coding->dst_bytes = (GAP_END_ADDR
1082                                - (coding->src_bytes - coding->consumed)
1083                                - coding->destination);
1084         }
1085       else
1086         {
1087           /* We are sure that coding->dst_pos_byte is before the gap
1088              of the buffer. */
1089           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
1090                                  + coding->dst_pos_byte - BEG_BYTE);
1091           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
1092                                - coding->destination);
1093         }
1094     }
1095   else
1096     /* Otherwise, the destination is C string and is never relocated
1097        automatically.  Thus we don't have to update anything.  */
1098     ;
1099 }
1100
1101
1102 static void
1103 coding_alloc_by_realloc (coding, bytes)
1104      struct coding_system *coding;
1105      EMACS_INT bytes;
1106 {
1107   coding->destination = (unsigned char *) xrealloc (coding->destination,
1108                                                     coding->dst_bytes + bytes);
1109   coding->dst_bytes += bytes;
1110 }
1111
1112 static void
1113 coding_alloc_by_making_gap (coding, gap_head_used, bytes)
1114      struct coding_system *coding;
1115      EMACS_INT gap_head_used, bytes;
1116 {
1117   if (EQ (coding->src_object, coding->dst_object))
1118     {
1119       /* The gap may contain the produced data at the head and not-yet
1120          consumed data at the tail.  To preserve those data, we at
1121          first make the gap size to zero, then increase the gap
1122          size.  */
1123       EMACS_INT add = GAP_SIZE;
1124
1125       GPT += gap_head_used, GPT_BYTE += gap_head_used;
1126       GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
1127       make_gap (bytes);
1128       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
1129       GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
1130     }
1131   else
1132     {
1133       Lisp_Object this_buffer;
1134
1135       this_buffer = Fcurrent_buffer ();
1136       set_buffer_internal (XBUFFER (coding->dst_object));
1137       make_gap (bytes);
1138       set_buffer_internal (XBUFFER (this_buffer));
1139     }
1140 }
1141
1142
1143 static unsigned char *
1144 alloc_destination (coding, nbytes, dst)
1145      struct coding_system *coding;
1146      EMACS_INT nbytes;
1147      unsigned char *dst;
1148 {
1149   EMACS_INT offset = dst - coding->destination;
1150
1151   if (BUFFERP (coding->dst_object))
1152     {
1153       struct buffer *buf = XBUFFER (coding->dst_object);
1154
1155       coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
1156     }
1157   else
1158     coding_alloc_by_realloc (coding, nbytes);
1159   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1160   coding_set_destination (coding);
1161   dst = coding->destination + offset;
1162   return dst;
1163 }
1164
1165 /** Macros for annotations.  */
1166
1167 /* Maximum length of annotation data (sum of annotations for
1168    composition and charset).  */
1169 #define MAX_ANNOTATION_LENGTH (4 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 4)
1170
1171 /* An annotation data is stored in the array coding->charbuf in this
1172    format:
1173      [ -LENGTH ANNOTATION_MASK NCHARS ... ]
1174    LENGTH is the number of elements in the annotation.
1175    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1176    NCHARS is the number of characters in the text annotated.
1177
1178    The format of the following elements depend on ANNOTATION_MASK.
1179
1180    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1181    follows:
1182      ... METHOD [ COMPOSITION-COMPONENTS ... ]
1183    METHOD is one of enum composition_method.
1184    Optionnal COMPOSITION-COMPONENTS are characters and composition
1185    rules.
1186
1187    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1188    follows.  */
1189
1190 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars)     \
1191   do {                                                  \
1192     *(buf)++ = -(len);                                  \
1193     *(buf)++ = (mask);                                  \
1194     *(buf)++ = (nchars);                                \
1195     coding->annotated = 1;                              \
1196   } while (0);
1197
1198 #define ADD_COMPOSITION_DATA(buf, nchars, method)                           \
1199   do {                                                                      \
1200     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1201     *buf++ = method;                                                        \
1202   } while (0)
1203
1204
1205 #define ADD_CHARSET_DATA(buf, nchars, id)                               \
1206   do {                                                                  \
1207     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1208     *buf++ = id;                                                        \
1209   } while (0)
1210
1211 \f
1212 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1213
1214
1215
1216 \f
1217 /*** 3. UTF-8 ***/
1218
1219 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1220    Check if a text is encoded in UTF-8.  If it is, return 1, else
1221    return 0.  */
1222
1223 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1224 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1225 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1226 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1227 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1228 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1229
1230 #define UTF_BOM 0xFEFF
1231 #define UTF_8_BOM_1 0xEF
1232 #define UTF_8_BOM_2 0xBB
1233 #define UTF_8_BOM_3 0xBF
1234
1235 static int
1236 detect_coding_utf_8 (coding, detect_info)
1237      struct coding_system *coding;
1238      struct coding_detection_info *detect_info;
1239 {
1240   const unsigned char *src = coding->source, *src_base;
1241   const unsigned char *src_end = coding->source + coding->src_bytes;
1242   int multibytep = coding->src_multibyte;
1243   int consumed_chars = 0;
1244   int bom_found = 0;
1245   int found = 0;
1246
1247   detect_info->checked |= CATEGORY_MASK_UTF_8;
1248   /* A coding system of this category is always ASCII compatible.  */
1249   src += coding->head_ascii;
1250
1251   while (1)
1252     {
1253       int c, c1, c2, c3, c4;
1254
1255       src_base = src;
1256       ONE_MORE_BYTE (c);
1257       if (c < 0 || UTF_8_1_OCTET_P (c))
1258         continue;
1259       ONE_MORE_BYTE (c1);
1260       if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
1261         break;
1262       if (UTF_8_2_OCTET_LEADING_P (c))
1263         {
1264           found = 1;
1265           continue;
1266         }
1267       ONE_MORE_BYTE (c2);
1268       if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1269         break;
1270       if (UTF_8_3_OCTET_LEADING_P (c))
1271         {
1272           found = 1;
1273           if (src_base == coding->source
1274               && c == UTF_8_BOM_1 && c1 == UTF_8_BOM_2 && c2 == UTF_8_BOM_3)
1275             bom_found = 1;
1276           continue;
1277         }
1278       ONE_MORE_BYTE (c3);
1279       if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1280         break;
1281       if (UTF_8_4_OCTET_LEADING_P (c))
1282         {
1283           found = 1;
1284           continue;
1285         }
1286       ONE_MORE_BYTE (c4);
1287       if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1288         break;
1289       if (UTF_8_5_OCTET_LEADING_P (c))
1290         {
1291           found = 1;
1292           continue;
1293         }
1294       break;
1295     }
1296   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1297   return 0;
1298
1299  no_more_source:
1300   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1301     {
1302       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1303       return 0;
1304     }
1305   if (bom_found)
1306     {
1307       /* The first character 0xFFFE doesn't necessarily mean a BOM.  */
1308       detect_info->found |= CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG;
1309     }
1310   else
1311     {
1312       detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG;
1313       if (found)
1314         detect_info->found |= CATEGORY_MASK_UTF_8_NOSIG;
1315     }
1316   return 1;
1317 }
1318
1319
1320 static void
1321 decode_coding_utf_8 (coding)
1322      struct coding_system *coding;
1323 {
1324   const unsigned char *src = coding->source + coding->consumed;
1325   const unsigned char *src_end = coding->source + coding->src_bytes;
1326   const unsigned char *src_base;
1327   int *charbuf = coding->charbuf + coding->charbuf_used;
1328   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1329   int consumed_chars = 0, consumed_chars_base;
1330   int multibytep = coding->src_multibyte;
1331   enum utf_bom_type bom = CODING_UTF_8_BOM (coding);
1332   Lisp_Object attr, charset_list;
1333   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1334   int byte_after_cr = -1;
1335
1336   CODING_GET_INFO (coding, attr, charset_list);
1337
1338   if (bom != utf_without_bom)
1339     {
1340       int c1, c2, c3;
1341
1342       src_base = src;
1343       ONE_MORE_BYTE (c1);
1344       if (! UTF_8_3_OCTET_LEADING_P (c1))
1345         src = src_base;
1346       else
1347         {
1348           ONE_MORE_BYTE (c2);
1349           if (! UTF_8_EXTRA_OCTET_P (c2))
1350             src = src_base;
1351           else
1352             {
1353               ONE_MORE_BYTE (c3);
1354               if (! UTF_8_EXTRA_OCTET_P (c3))
1355                 src = src_base;
1356               else
1357                 {
1358                   if ((c1 != UTF_8_BOM_1)
1359                       || (c2 != UTF_8_BOM_2) || (c3 != UTF_8_BOM_3))
1360                     src = src_base;
1361                   else
1362                     CODING_UTF_8_BOM (coding) = utf_without_bom;
1363                 }
1364             }
1365         }
1366     }
1367   CODING_UTF_8_BOM (coding) = utf_without_bom;
1368
1369
1370
1371   while (1)
1372     {
1373       int c, c1, c2, c3, c4, c5;
1374
1375       src_base = src;
1376       consumed_chars_base = consumed_chars;
1377
1378       if (charbuf >= charbuf_end)
1379         break;
1380
1381       if (byte_after_cr >= 0)
1382         c1 = byte_after_cr, byte_after_cr = -1;
1383       else
1384         ONE_MORE_BYTE (c1);
1385       if (c1 < 0)
1386         {
1387           c = - c1;
1388         }
1389       else if (UTF_8_1_OCTET_P(c1))
1390         {
1391           if (eol_crlf && c1 == '\r')
1392             ONE_MORE_BYTE (byte_after_cr);
1393           c = c1;
1394         }
1395       else
1396         {
1397           ONE_MORE_BYTE (c2);
1398           if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1399             goto invalid_code;
1400           if (UTF_8_2_OCTET_LEADING_P (c1))
1401             {
1402               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1403               /* Reject overlong sequences here and below.  Encoders
1404                  producing them are incorrect, they can be misleading,
1405                  and they mess up read/write invariance.  */
1406               if (c < 128)
1407                 goto invalid_code;
1408             }
1409           else
1410             {
1411               ONE_MORE_BYTE (c3);
1412               if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1413                 goto invalid_code;
1414               if (UTF_8_3_OCTET_LEADING_P (c1))
1415                 {
1416                   c = (((c1 & 0xF) << 12)
1417                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1418                   if (c < 0x800
1419                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1420                     goto invalid_code;
1421                 }
1422               else
1423                 {
1424                   ONE_MORE_BYTE (c4);
1425                   if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1426                     goto invalid_code;
1427                   if (UTF_8_4_OCTET_LEADING_P (c1))
1428                     {
1429                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1430                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1431                     if (c < 0x10000)
1432                       goto invalid_code;
1433                     }
1434                   else
1435                     {
1436                       ONE_MORE_BYTE (c5);
1437                       if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
1438                         goto invalid_code;
1439                       if (UTF_8_5_OCTET_LEADING_P (c1))
1440                         {
1441                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1442                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1443                                | (c5 & 0x3F));
1444                           if ((c > MAX_CHAR) || (c < 0x200000))
1445                             goto invalid_code;
1446                         }
1447                       else
1448                         goto invalid_code;
1449                     }
1450                 }
1451             }
1452         }
1453
1454       *charbuf++ = c;
1455       continue;
1456
1457     invalid_code:
1458       src = src_base;
1459       consumed_chars = consumed_chars_base;
1460       ONE_MORE_BYTE (c);
1461       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1462       coding->errors++;
1463     }
1464
1465  no_more_source:
1466   coding->consumed_char += consumed_chars_base;
1467   coding->consumed = src_base - coding->source;
1468   coding->charbuf_used = charbuf - coding->charbuf;
1469 }
1470
1471
1472 static int
1473 encode_coding_utf_8 (coding)
1474      struct coding_system *coding;
1475 {
1476   int multibytep = coding->dst_multibyte;
1477   int *charbuf = coding->charbuf;
1478   int *charbuf_end = charbuf + coding->charbuf_used;
1479   unsigned char *dst = coding->destination + coding->produced;
1480   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1481   int produced_chars = 0;
1482   int c;
1483
1484   if (CODING_UTF_8_BOM (coding) == utf_with_bom)
1485     {
1486       ASSURE_DESTINATION (3);
1487       EMIT_THREE_BYTES (UTF_8_BOM_1, UTF_8_BOM_2, UTF_8_BOM_3);
1488       CODING_UTF_8_BOM (coding) = utf_without_bom;
1489     }
1490
1491   if (multibytep)
1492     {
1493       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1494
1495       while (charbuf < charbuf_end)
1496         {
1497           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1498
1499           ASSURE_DESTINATION (safe_room);
1500           c = *charbuf++;
1501           if (CHAR_BYTE8_P (c))
1502             {
1503               c = CHAR_TO_BYTE8 (c);
1504               EMIT_ONE_BYTE (c);
1505             }
1506           else
1507             {
1508               CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
1509               for (p = str; p < pend; p++)
1510                 EMIT_ONE_BYTE (*p);
1511             }
1512         }
1513     }
1514   else
1515     {
1516       int safe_room = MAX_MULTIBYTE_LENGTH;
1517
1518       while (charbuf < charbuf_end)
1519         {
1520           ASSURE_DESTINATION (safe_room);
1521           c = *charbuf++;
1522           if (CHAR_BYTE8_P (c))
1523             *dst++ = CHAR_TO_BYTE8 (c);
1524           else
1525             CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
1526           produced_chars++;
1527         }
1528     }
1529   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1530   coding->produced_char += produced_chars;
1531   coding->produced = dst - coding->destination;
1532   return 0;
1533 }
1534
1535
1536 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1537    Check if a text is encoded in one of UTF-16 based coding systems.
1538    If it is, return 1, else return 0.  */
1539
1540 #define UTF_16_HIGH_SURROGATE_P(val) \
1541   (((val) & 0xFC00) == 0xD800)
1542
1543 #define UTF_16_LOW_SURROGATE_P(val) \
1544   (((val) & 0xFC00) == 0xDC00)
1545
1546 #define UTF_16_INVALID_P(val)   \
1547   (((val) == 0xFFFE)            \
1548    || ((val) == 0xFFFF)         \
1549    || UTF_16_LOW_SURROGATE_P (val))
1550
1551
1552 static int
1553 detect_coding_utf_16 (coding, detect_info)
1554      struct coding_system *coding;
1555      struct coding_detection_info *detect_info;
1556 {
1557   const unsigned char *src = coding->source, *src_base = src;
1558   const unsigned char *src_end = coding->source + coding->src_bytes;
1559   int multibytep = coding->src_multibyte;
1560   int consumed_chars = 0;
1561   int c1, c2;
1562
1563   detect_info->checked |= CATEGORY_MASK_UTF_16;
1564   if (coding->mode & CODING_MODE_LAST_BLOCK
1565       && (coding->src_chars & 1))
1566     {
1567       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1568       return 0;
1569     }
1570
1571   ONE_MORE_BYTE (c1);
1572   ONE_MORE_BYTE (c2);
1573   if ((c1 == 0xFF) && (c2 == 0xFE))
1574     {
1575       detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1576                              | CATEGORY_MASK_UTF_16_AUTO);
1577       detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1578                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1579                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1580     }
1581   else if ((c1 == 0xFE) && (c2 == 0xFF))
1582     {
1583       detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1584                              | CATEGORY_MASK_UTF_16_AUTO);
1585       detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1586                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1587                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1588     }
1589   else
1590     {
1591       /* We check the dispersion of Eth and Oth bytes where E is even and
1592          O is odd.  If both are high, we assume binary data.*/
1593       unsigned char e[256], o[256];
1594       unsigned e_num = 1, o_num = 1;
1595
1596       memset (e, 0, 256);
1597       memset (o, 0, 256);
1598       e[c1] = 1;
1599       o[c2] = 1;
1600
1601       detect_info->rejected
1602         |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
1603
1604       while (1)
1605         {
1606           ONE_MORE_BYTE (c1);
1607           ONE_MORE_BYTE (c2);
1608           if (! e[c1])
1609             {
1610               e[c1] = 1;
1611               e_num++;
1612               if (e_num >= 128)
1613                 break;
1614             }
1615           if (! o[c2])
1616             {
1617               o[c1] = 1;
1618               o_num++;
1619               if (o_num >= 128)
1620                 break;
1621             }
1622         }
1623       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1624       return 0;
1625     }
1626
1627  no_more_source:
1628   return 1;
1629 }
1630
1631 static void
1632 decode_coding_utf_16 (coding)
1633      struct coding_system *coding;
1634 {
1635   const unsigned char *src = coding->source + coding->consumed;
1636   const unsigned char *src_end = coding->source + coding->src_bytes;
1637   const unsigned char *src_base;
1638   int *charbuf = coding->charbuf + coding->charbuf_used;
1639   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1640   int consumed_chars = 0, consumed_chars_base = 0;
1641   int multibytep = coding->src_multibyte;
1642   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1643   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1644   int surrogate = CODING_UTF_16_SURROGATE (coding);
1645   Lisp_Object attr, charset_list;
1646   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1647   int byte_after_cr1 = -1, byte_after_cr2 = -1;
1648
1649   CODING_GET_INFO (coding, attr, charset_list);
1650
1651   if (bom == utf_with_bom)
1652     {
1653       int c, c1, c2;
1654
1655       src_base = src;
1656       ONE_MORE_BYTE (c1);
1657       ONE_MORE_BYTE (c2);
1658       c = (c1 << 8) | c2;
1659
1660       if (endian == utf_16_big_endian
1661           ? c != 0xFEFF : c != 0xFFFE)
1662         {
1663           /* The first two bytes are not BOM.  Treat them as bytes
1664              for a normal character.  */
1665           src = src_base;
1666           coding->errors++;
1667         }
1668       CODING_UTF_16_BOM (coding) = utf_without_bom;
1669     }
1670   else if (bom == utf_detect_bom)
1671     {
1672       /* We have already tried to detect BOM and failed in
1673          detect_coding.  */
1674       CODING_UTF_16_BOM (coding) = utf_without_bom;
1675     }
1676
1677   while (1)
1678     {
1679       int c, c1, c2;
1680
1681       src_base = src;
1682       consumed_chars_base = consumed_chars;
1683
1684       if (charbuf + 2 >= charbuf_end)
1685         break;
1686
1687       if (byte_after_cr1 >= 0)
1688         c1 = byte_after_cr1, byte_after_cr1 = -1;
1689       else
1690         ONE_MORE_BYTE (c1);
1691       if (c1 < 0)
1692         {
1693           *charbuf++ = -c1;
1694           continue;
1695         }
1696       if (byte_after_cr2 >= 0)
1697         c2 = byte_after_cr2, byte_after_cr2 = -1;
1698       else
1699         ONE_MORE_BYTE (c2);
1700       if (c2 < 0)
1701         {
1702           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1703           *charbuf++ = -c2;
1704           continue;
1705         }
1706       c = (endian == utf_16_big_endian
1707            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1708
1709       if (surrogate)
1710         {
1711           if (! UTF_16_LOW_SURROGATE_P (c))
1712             {
1713               if (endian == utf_16_big_endian)
1714                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1715               else
1716                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1717               *charbuf++ = c1;
1718               *charbuf++ = c2;
1719               coding->errors++;
1720               if (UTF_16_HIGH_SURROGATE_P (c))
1721                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1722               else
1723                 *charbuf++ = c;
1724             }
1725           else
1726             {
1727               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1728               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1729               *charbuf++ = 0x10000 + c;
1730             }
1731         }
1732       else
1733         {
1734           if (UTF_16_HIGH_SURROGATE_P (c))
1735             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1736           else
1737             {
1738               if (eol_crlf && c == '\r')
1739                 {
1740                   ONE_MORE_BYTE (byte_after_cr1);
1741                   ONE_MORE_BYTE (byte_after_cr2);
1742                 }
1743               *charbuf++ = c;
1744             }
1745         }
1746     }
1747
1748  no_more_source:
1749   coding->consumed_char += consumed_chars_base;
1750   coding->consumed = src_base - coding->source;
1751   coding->charbuf_used = charbuf - coding->charbuf;
1752 }
1753
1754 static int
1755 encode_coding_utf_16 (coding)
1756      struct coding_system *coding;
1757 {
1758   int multibytep = coding->dst_multibyte;
1759   int *charbuf = coding->charbuf;
1760   int *charbuf_end = charbuf + coding->charbuf_used;
1761   unsigned char *dst = coding->destination + coding->produced;
1762   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1763   int safe_room = 8;
1764   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1765   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1766   int produced_chars = 0;
1767   Lisp_Object attrs, charset_list;
1768   int c;
1769
1770   CODING_GET_INFO (coding, attrs, charset_list);
1771
1772   if (bom != utf_without_bom)
1773     {
1774       ASSURE_DESTINATION (safe_room);
1775       if (big_endian)
1776         EMIT_TWO_BYTES (0xFE, 0xFF);
1777       else
1778         EMIT_TWO_BYTES (0xFF, 0xFE);
1779       CODING_UTF_16_BOM (coding) = utf_without_bom;
1780     }
1781
1782   while (charbuf < charbuf_end)
1783     {
1784       ASSURE_DESTINATION (safe_room);
1785       c = *charbuf++;
1786       if (c >= MAX_UNICODE_CHAR)
1787         c = coding->default_char;
1788
1789       if (c < 0x10000)
1790         {
1791           if (big_endian)
1792             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1793           else
1794             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1795         }
1796       else
1797         {
1798           int c1, c2;
1799
1800           c -= 0x10000;
1801           c1 = (c >> 10) + 0xD800;
1802           c2 = (c & 0x3FF) + 0xDC00;
1803           if (big_endian)
1804             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1805           else
1806             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1807         }
1808     }
1809   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1810   coding->produced = dst - coding->destination;
1811   coding->produced_char += produced_chars;
1812   return 0;
1813 }
1814
1815 \f
1816 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1817
1818 /* Emacs' internal format for representation of multiple character
1819    sets is a kind of multi-byte encoding, i.e. characters are
1820    represented by variable-length sequences of one-byte codes.
1821
1822    ASCII characters and control characters (e.g. `tab', `newline') are
1823    represented by one-byte sequences which are their ASCII codes, in
1824    the range 0x00 through 0x7F.
1825
1826    8-bit characters of the range 0x80..0x9F are represented by
1827    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1828    code + 0x20).
1829
1830    8-bit characters of the range 0xA0..0xFF are represented by
1831    one-byte sequences which are their 8-bit code.
1832
1833    The other characters are represented by a sequence of `base
1834    leading-code', optional `extended leading-code', and one or two
1835    `position-code's.  The length of the sequence is determined by the
1836    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1837    whereas extended leading-code and position-code take the range 0xA0
1838    through 0xFF.  See `charset.h' for more details about leading-code
1839    and position-code.
1840
1841    --- CODE RANGE of Emacs' internal format ---
1842    character set        range
1843    -------------        -----
1844    ascii                0x00..0x7F
1845    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1846    eight-bit-graphic    0xA0..0xBF
1847    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1848    ---------------------------------------------
1849
1850    As this is the internal character representation, the format is
1851    usually not used externally (i.e. in a file or in a data sent to a
1852    process).  But, it is possible to have a text externally in this
1853    format (i.e. by encoding by the coding system `emacs-mule').
1854
1855    In that case, a sequence of one-byte codes has a slightly different
1856    form.
1857
1858    At first, all characters in eight-bit-control are represented by
1859    one-byte sequences which are their 8-bit code.
1860
1861    Next, character composition data are represented by the byte
1862    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1863    where,
1864         METHOD is 0xF0 plus one of composition method (enum
1865         composition_method),
1866
1867         BYTES is 0xA0 plus a byte length of this composition data,
1868
1869         CHARS is 0x20 plus a number of characters composed by this
1870         data,
1871
1872         COMPONENTs are characters of multibye form or composition
1873         rules encoded by two-byte of ASCII codes.
1874
1875    In addition, for backward compatibility, the following formats are
1876    also recognized as composition data on decoding.
1877
1878    0x80 MSEQ ...
1879    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1880
1881    Here,
1882         MSEQ is a multibyte form but in these special format:
1883           ASCII: 0xA0 ASCII_CODE+0x80,
1884           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1885         RULE is a one byte code of the range 0xA0..0xF0 that
1886         represents a composition rule.
1887   */
1888
1889 char emacs_mule_bytes[256];
1890
1891 int
1892 emacs_mule_char (coding, src, nbytes, nchars, id)
1893      struct coding_system *coding;
1894      const unsigned char *src;
1895      int *nbytes, *nchars, *id;
1896 {
1897   const unsigned char *src_end = coding->source + coding->src_bytes;
1898   const unsigned char *src_base = src;
1899   int multibytep = coding->src_multibyte;
1900   struct charset *charset;
1901   unsigned code;
1902   int c;
1903   int consumed_chars = 0;
1904
1905   ONE_MORE_BYTE (c);
1906   if (c < 0)
1907     {
1908       c = -c;
1909       charset = emacs_mule_charset[0];
1910     }
1911   else
1912     {
1913       if (c >= 0xA0)
1914         {
1915           /* Old style component character of a composition.  */
1916           if (c == 0xA0)
1917             {
1918               ONE_MORE_BYTE (c);
1919               c -= 0x80;
1920             }
1921           else
1922             c -= 0x20;
1923         }
1924
1925       switch (emacs_mule_bytes[c])
1926         {
1927         case 2:
1928           if (! (charset = emacs_mule_charset[c]))
1929             goto invalid_code;
1930           ONE_MORE_BYTE (c);
1931           if (c < 0xA0)
1932             goto invalid_code;
1933           code = c & 0x7F;
1934           break;
1935
1936         case 3:
1937           if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1938               || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
1939             {
1940               ONE_MORE_BYTE (c);
1941               if (c < 0xA0 || ! (charset = emacs_mule_charset[c]))
1942                 goto invalid_code;
1943               ONE_MORE_BYTE (c);
1944               if (c < 0xA0)
1945                 goto invalid_code;
1946               code = c & 0x7F;
1947             }
1948           else
1949             {
1950               if (! (charset = emacs_mule_charset[c]))
1951                 goto invalid_code;
1952               ONE_MORE_BYTE (c);
1953               if (c < 0xA0)
1954                 goto invalid_code;
1955               code = (c & 0x7F) << 8;
1956               ONE_MORE_BYTE (c);
1957               if (c < 0xA0)
1958                 goto invalid_code;
1959               code |= c & 0x7F;
1960             }
1961           break;
1962
1963         case 4:
1964           ONE_MORE_BYTE (c);
1965           if (c < 0 || ! (charset = emacs_mule_charset[c]))
1966             goto invalid_code;
1967           ONE_MORE_BYTE (c);
1968           if (c < 0xA0)
1969             goto invalid_code;
1970           code = (c & 0x7F) << 8;
1971           ONE_MORE_BYTE (c);
1972           if (c < 0xA0)
1973             goto invalid_code;
1974           code |= c & 0x7F;
1975           break;
1976
1977         case 1:
1978           code = c;
1979           charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1980                                      ? charset_ascii : charset_eight_bit);
1981           break;
1982
1983         default:
1984           abort ();
1985         }
1986       c = DECODE_CHAR (charset, code);
1987       if (c < 0)
1988         goto invalid_code;
1989     }
1990   *nbytes = src - src_base;
1991   *nchars = consumed_chars;
1992   if (id)
1993     *id = charset->id;
1994   return c;
1995
1996  no_more_source:
1997   return -2;
1998
1999  invalid_code:
2000   return -1;
2001 }
2002
2003
2004 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2005    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
2006    else return 0.  */
2007
2008 static int
2009 detect_coding_emacs_mule (coding, detect_info)
2010      struct coding_system *coding;
2011      struct coding_detection_info *detect_info;
2012 {
2013   const unsigned char *src = coding->source, *src_base;
2014   const unsigned char *src_end = coding->source + coding->src_bytes;
2015   int multibytep = coding->src_multibyte;
2016   int consumed_chars = 0;
2017   int c;
2018   int found = 0;
2019
2020   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
2021   /* A coding system of this category is always ASCII compatible.  */
2022   src += coding->head_ascii;
2023
2024   while (1)
2025     {
2026       src_base = src;
2027       ONE_MORE_BYTE (c);
2028       if (c < 0)
2029         continue;
2030       if (c == 0x80)
2031         {
2032           /* Perhaps the start of composite character.  We simple skip
2033              it because analyzing it is too heavy for detecting.  But,
2034              at least, we check that the composite character
2035              constitutes of more than 4 bytes.  */
2036           const unsigned char *src_base;
2037
2038         repeat:
2039           src_base = src;
2040           do
2041             {
2042               ONE_MORE_BYTE (c);
2043             }
2044           while (c >= 0xA0);
2045
2046           if (src - src_base <= 4)
2047             break;
2048           found = CATEGORY_MASK_EMACS_MULE;
2049           if (c == 0x80)
2050             goto repeat;
2051         }
2052
2053       if (c < 0x80)
2054         {
2055           if (c < 0x20
2056               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
2057             break;
2058         }
2059       else
2060         {
2061           int more_bytes = emacs_mule_bytes[*src_base] - 1;
2062
2063           while (more_bytes > 0)
2064             {
2065               ONE_MORE_BYTE (c);
2066               if (c < 0xA0)
2067                 {
2068                   src--;        /* Unread the last byte.  */
2069                   break;
2070                 }
2071               more_bytes--;
2072             }
2073           if (more_bytes != 0)
2074             break;
2075           found = CATEGORY_MASK_EMACS_MULE;
2076         }
2077     }
2078   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2079   return 0;
2080
2081  no_more_source:
2082   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
2083     {
2084       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2085       return 0;
2086     }
2087   detect_info->found |= found;
2088   return 1;
2089 }
2090
2091
2092 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2093
2094 /* Decode a character represented as a component of composition
2095    sequence of Emacs 20/21 style at SRC.  Set C to that character and
2096    update SRC to the head of next character (or an encoded composition
2097    rule).  If SRC doesn't points a composition component, set C to -1.
2098    If SRC points an invalid byte sequence, global exit by a return
2099    value 0.  */
2100
2101 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf)                 \
2102   do                                                            \
2103     {                                                           \
2104       int c;                                                    \
2105       int nbytes, nchars;                                       \
2106                                                                 \
2107       if (src == src_end)                                       \
2108         break;                                                  \
2109       c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
2110       if (c < 0)                                                \
2111         {                                                       \
2112           if (c == -2)                                          \
2113             break;                                              \
2114           goto invalid_code;                                    \
2115         }                                                       \
2116       *buf++ = c;                                               \
2117       src += nbytes;                                            \
2118       consumed_chars += nchars;                                 \
2119     }                                                           \
2120   while (0)
2121
2122
2123 /* Decode a composition rule represented as a component of composition
2124    sequence of Emacs 20 style at SRC.  Store the decoded rule in *BUF,
2125    and increment BUF.  If SRC points an invalid byte sequence, set C
2126    to -1.  */
2127
2128 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf)      \
2129   do {                                                  \
2130     int c, gref, nref;                                  \
2131                                                         \
2132     if (src >= src_end)                                 \
2133       goto invalid_code;                                \
2134     ONE_MORE_BYTE_NO_CHECK (c);                         \
2135     c -= 0xA0;                                          \
2136     if (c < 0 || c >= 81)                               \
2137       goto invalid_code;                                \
2138                                                         \
2139     gref = c / 9, nref = c % 9;                         \
2140     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
2141   } while (0)
2142
2143
2144 /* Decode a composition rule represented as a component of composition
2145    sequence of Emacs 21 style at SRC.  Store the decoded rule in *BUF,
2146    and increment BUF.  If SRC points an invalid byte sequence, set C
2147    to -1.  */
2148
2149 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf)      \
2150   do {                                                  \
2151     int gref, nref;                                     \
2152                                                         \
2153     if (src + 1>= src_end)                              \
2154       goto invalid_code;                                \
2155     ONE_MORE_BYTE_NO_CHECK (gref);                      \
2156     gref -= 0x20;                                       \
2157     ONE_MORE_BYTE_NO_CHECK (nref);                      \
2158     nref -= 0x20;                                       \
2159     if (gref < 0 || gref >= 81                          \
2160         || nref < 0 || nref >= 81)                      \
2161       goto invalid_code;                                \
2162     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
2163   } while (0)
2164
2165
2166 #define DECODE_EMACS_MULE_21_COMPOSITION(c)                             \
2167   do {                                                                  \
2168     /* Emacs 21 style format.  The first three bytes at SRC are         \
2169        (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is  \
2170        the byte length of this composition information, CHARS is the    \
2171        number of characters composed by this composition.  */           \
2172     enum composition_method method = c - 0xF2;                          \
2173     int *charbuf_base = charbuf;                                        \
2174     int consumed_chars_limit;                                           \
2175     int nbytes, nchars;                                                 \
2176                                                                         \
2177     ONE_MORE_BYTE (c);                                                  \
2178     if (c < 0)                                                          \
2179       goto invalid_code;                                                \
2180     nbytes = c - 0xA0;                                                  \
2181     if (nbytes < 3)                                                     \
2182       goto invalid_code;                                                \
2183     ONE_MORE_BYTE (c);                                                  \
2184     if (c < 0)                                                          \
2185       goto invalid_code;                                                \
2186     nchars = c - 0xA0;                                                  \
2187     ADD_COMPOSITION_DATA (charbuf, nchars, method);                     \
2188     consumed_chars_limit = consumed_chars_base + nbytes;                \
2189     if (method != COMPOSITION_RELATIVE)                                 \
2190       {                                                                 \
2191         int i = 0;                                                      \
2192         while (consumed_chars < consumed_chars_limit)                   \
2193           {                                                             \
2194             if (i % 2 && method != COMPOSITION_WITH_ALTCHARS)           \
2195               DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf);          \
2196             else                                                        \
2197               DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf);             \
2198             i++;                                                        \
2199           }                                                             \
2200         if (consumed_chars < consumed_chars_limit)                      \
2201           goto invalid_code;                                            \
2202         charbuf_base[0] -= i;                                           \
2203       }                                                                 \
2204   } while (0)
2205
2206
2207 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c)                    \
2208   do {                                                                  \
2209     /* Emacs 20 style format for relative composition.  */              \
2210     /* Store multibyte form of characters to be composed.  */           \
2211     enum composition_method method = COMPOSITION_RELATIVE;              \
2212     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];                 \
2213     int *buf = components;                                              \
2214     int i, j;                                                           \
2215                                                                         \
2216     src = src_base;                                                     \
2217     ONE_MORE_BYTE (c);          /* skip 0x80 */                         \
2218     for (i = 0; *src >= 0xA0 && i < MAX_COMPOSITION_COMPONENTS; i++)    \
2219       DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                         \
2220     if (i < 2)                                                          \
2221       goto invalid_code;                                                \
2222     ADD_COMPOSITION_DATA (charbuf, i, method);                          \
2223     for (j = 0; j < i; j++)                                             \
2224       *charbuf++ = components[j];                                       \
2225   } while (0)
2226
2227
2228 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c)            \
2229   do {                                                          \
2230     /* Emacs 20 style format for rule-base composition.  */     \
2231     /* Store multibyte form of characters to be composed.  */   \
2232     enum composition_method method = COMPOSITION_WITH_RULE;     \
2233     int *charbuf_base = charbuf;                                \
2234     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
2235     int *buf = components;                                      \
2236     int i, j;                                                   \
2237                                                                 \
2238     DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                   \
2239     for (i = 1; i < MAX_COMPOSITION_COMPONENTS; i++)            \
2240       {                                                         \
2241         if (*src < 0xA0)                                        \
2242           break;                                                \
2243         DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf);            \
2244         DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);               \
2245       }                                                         \
2246     if (i <= 1 || (buf - components) % 2 == 0)                  \
2247       goto invalid_code;                                        \
2248     if (charbuf + i + (i / 2) + 1 >= charbuf_end)               \
2249       goto no_more_source;                                      \
2250     ADD_COMPOSITION_DATA (charbuf, i, method);                  \
2251     i = i * 2 - 1;                                              \
2252     for (j = 0; j < i; j++)                                     \
2253       *charbuf++ = components[j];                               \
2254     charbuf_base[0] -= i;                                       \
2255     for (j = 0; j < i; j += 2)                                  \
2256       *charbuf++ = components[j];                               \
2257   } while (0)
2258
2259
2260 static void
2261 decode_coding_emacs_mule (coding)
2262      struct coding_system *coding;
2263 {
2264   const unsigned char *src = coding->source + coding->consumed;
2265   const unsigned char *src_end = coding->source + coding->src_bytes;
2266   const unsigned char *src_base;
2267   int *charbuf = coding->charbuf + coding->charbuf_used;
2268   int *charbuf_end
2269     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
2270   int consumed_chars = 0, consumed_chars_base;
2271   int multibytep = coding->src_multibyte;
2272   Lisp_Object attrs, charset_list;
2273   int char_offset = coding->produced_char;
2274   int last_offset = char_offset;
2275   int last_id = charset_ascii;
2276   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
2277   int byte_after_cr = -1;
2278
2279   CODING_GET_INFO (coding, attrs, charset_list);
2280
2281   while (1)
2282     {
2283       int c;
2284
2285       src_base = src;
2286       consumed_chars_base = consumed_chars;
2287
2288       if (charbuf >= charbuf_end)
2289         break;
2290
2291       if (byte_after_cr >= 0)
2292         c = byte_after_cr, byte_after_cr = -1;
2293       else
2294         ONE_MORE_BYTE (c);
2295       if (c < 0)
2296         {
2297           *charbuf++ = -c;
2298           char_offset++;
2299         }
2300       else if (c < 0x80)
2301         {
2302           if (eol_crlf && c == '\r')
2303             ONE_MORE_BYTE (byte_after_cr);
2304           *charbuf++ = c;
2305           char_offset++;
2306         }
2307       else if (c == 0x80)
2308         {
2309           ONE_MORE_BYTE (c);
2310           if (c < 0)
2311             goto invalid_code;
2312           if (c - 0xF2 >= COMPOSITION_RELATIVE
2313               && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
2314             DECODE_EMACS_MULE_21_COMPOSITION (c);
2315           else if (c < 0xC0)
2316             DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
2317           else if (c == 0xFF)
2318             DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
2319           else
2320             goto invalid_code;
2321         }
2322       else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
2323         {
2324           int nbytes, nchars;
2325           int id;
2326
2327           src = src_base;
2328           consumed_chars = consumed_chars_base;
2329           c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
2330           if (c < 0)
2331             {
2332               if (c == -2)
2333                 break;
2334               goto invalid_code;
2335             }
2336           if (last_id != id)
2337             {
2338               if (last_id != charset_ascii)
2339                 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2340               last_id = id;
2341               last_offset = char_offset;
2342             }
2343           *charbuf++ = c;
2344           src += nbytes;
2345           consumed_chars += nchars;
2346           char_offset++;
2347         }
2348       else
2349         goto invalid_code;
2350       continue;
2351
2352     invalid_code:
2353       src = src_base;
2354       consumed_chars = consumed_chars_base;
2355       ONE_MORE_BYTE (c);
2356       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2357       char_offset++;
2358       coding->errors++;
2359     }
2360
2361  no_more_source:
2362   if (last_id != charset_ascii)
2363     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2364   coding->consumed_char += consumed_chars_base;
2365   coding->consumed = src_base - coding->source;
2366   coding->charbuf_used = charbuf - coding->charbuf;
2367 }
2368
2369
2370 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2371   do {                                          \
2372     if (id < 0xA0)                              \
2373       codes[0] = id, codes[1] = 0;              \
2374     else if (id < 0xE0)                         \
2375       codes[0] = 0x9A, codes[1] = id;           \
2376     else if (id < 0xF0)                         \
2377       codes[0] = 0x9B, codes[1] = id;           \
2378     else if (id < 0xF5)                         \
2379       codes[0] = 0x9C, codes[1] = id;           \
2380     else                                        \
2381       codes[0] = 0x9D, codes[1] = id;           \
2382   } while (0);
2383
2384
2385 static int
2386 encode_coding_emacs_mule (coding)
2387      struct coding_system *coding;
2388 {
2389   int multibytep = coding->dst_multibyte;
2390   int *charbuf = coding->charbuf;
2391   int *charbuf_end = charbuf + coding->charbuf_used;
2392   unsigned char *dst = coding->destination + coding->produced;
2393   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2394   int safe_room = 8;
2395   int produced_chars = 0;
2396   Lisp_Object attrs, charset_list;
2397   int c;
2398   int preferred_charset_id = -1;
2399
2400   CODING_GET_INFO (coding, attrs, charset_list);
2401   if (! EQ (charset_list, Vemacs_mule_charset_list))
2402     {
2403       CODING_ATTR_CHARSET_LIST (attrs)
2404         = charset_list = Vemacs_mule_charset_list;
2405     }
2406
2407   while (charbuf < charbuf_end)
2408     {
2409       ASSURE_DESTINATION (safe_room);
2410       c = *charbuf++;
2411
2412       if (c < 0)
2413         {
2414           /* Handle an annotation.  */
2415           switch (*charbuf)
2416             {
2417             case CODING_ANNOTATE_COMPOSITION_MASK:
2418               /* Not yet implemented.  */
2419               break;
2420             case CODING_ANNOTATE_CHARSET_MASK:
2421               preferred_charset_id = charbuf[3];
2422               if (preferred_charset_id >= 0
2423                   && NILP (Fmemq (make_number (preferred_charset_id),
2424                                   charset_list)))
2425                 preferred_charset_id = -1;
2426               break;
2427             default:
2428               abort ();
2429             }
2430           charbuf += -c - 1;
2431           continue;
2432         }
2433
2434       if (ASCII_CHAR_P (c))
2435         EMIT_ONE_ASCII_BYTE (c);
2436       else if (CHAR_BYTE8_P (c))
2437         {
2438           c = CHAR_TO_BYTE8 (c);
2439           EMIT_ONE_BYTE (c);
2440         }
2441       else
2442         {
2443           struct charset *charset;
2444           unsigned code;
2445           int dimension;
2446           int emacs_mule_id;
2447           unsigned char leading_codes[2];
2448
2449           if (preferred_charset_id >= 0)
2450             {
2451               charset = CHARSET_FROM_ID (preferred_charset_id);
2452               if (! CHAR_CHARSET_P (c, charset))
2453                 charset = char_charset (c, charset_list, NULL);
2454             }
2455           else
2456             charset = char_charset (c, charset_list, &code);
2457           if (! charset)
2458             {
2459               c = coding->default_char;
2460               if (ASCII_CHAR_P (c))
2461                 {
2462                   EMIT_ONE_ASCII_BYTE (c);
2463                   continue;
2464                 }
2465               charset = char_charset (c, charset_list, &code);
2466             }
2467           dimension = CHARSET_DIMENSION (charset);
2468           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2469           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2470           EMIT_ONE_BYTE (leading_codes[0]);
2471           if (leading_codes[1])
2472             EMIT_ONE_BYTE (leading_codes[1]);
2473           if (dimension == 1)
2474             EMIT_ONE_BYTE (code | 0x80);
2475           else
2476             {
2477               code |= 0x8080;
2478               EMIT_ONE_BYTE (code >> 8);
2479               EMIT_ONE_BYTE (code & 0xFF);
2480             }
2481         }
2482     }
2483   record_conversion_result (coding, CODING_RESULT_SUCCESS);
2484   coding->produced_char += produced_chars;
2485   coding->produced = dst - coding->destination;
2486   return 0;
2487 }
2488
2489 \f
2490 /*** 7. ISO2022 handlers ***/
2491
2492 /* The following note describes the coding system ISO2022 briefly.
2493    Since the intention of this note is to help understand the
2494    functions in this file, some parts are NOT ACCURATE or are OVERLY
2495    SIMPLIFIED.  For thorough understanding, please refer to the
2496    original document of ISO2022.  This is equivalent to the standard
2497    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2498
2499    ISO2022 provides many mechanisms to encode several character sets
2500    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2501    is encoded using bytes less than 128.  This may make the encoded
2502    text a little bit longer, but the text passes more easily through
2503    several types of gateway, some of which strip off the MSB (Most
2504    Significant Bit).
2505
2506    There are two kinds of character sets: control character sets and
2507    graphic character sets.  The former contain control characters such
2508    as `newline' and `escape' to provide control functions (control
2509    functions are also provided by escape sequences).  The latter
2510    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2511    two control character sets and many graphic character sets.
2512
2513    Graphic character sets are classified into one of the following
2514    four classes, according to the number of bytes (DIMENSION) and
2515    number of characters in one dimension (CHARS) of the set:
2516    - DIMENSION1_CHARS94
2517    - DIMENSION1_CHARS96
2518    - DIMENSION2_CHARS94
2519    - DIMENSION2_CHARS96
2520
2521    In addition, each character set is assigned an identification tag,
2522    unique for each set, called the "final character" (denoted as <F>
2523    hereafter).  The <F> of each character set is decided by ECMA(*)
2524    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2525    (0x30..0x3F are for private use only).
2526
2527    Note (*): ECMA = European Computer Manufacturers Association
2528
2529    Here are examples of graphic character sets [NAME(<F>)]:
2530         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2531         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2532         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2533         o DIMENSION2_CHARS96 -- none for the moment
2534
2535    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2536         C0 [0x00..0x1F] -- control character plane 0
2537         GL [0x20..0x7F] -- graphic character plane 0
2538         C1 [0x80..0x9F] -- control character plane 1
2539         GR [0xA0..0xFF] -- graphic character plane 1
2540
2541    A control character set is directly designated and invoked to C0 or
2542    C1 by an escape sequence.  The most common case is that:
2543    - ISO646's  control character set is designated/invoked to C0, and
2544    - ISO6429's control character set is designated/invoked to C1,
2545    and usually these designations/invocations are omitted in encoded
2546    text.  In a 7-bit environment, only C0 can be used, and a control
2547    character for C1 is encoded by an appropriate escape sequence to
2548    fit into the environment.  All control characters for C1 are
2549    defined to have corresponding escape sequences.
2550
2551    A graphic character set is at first designated to one of four
2552    graphic registers (G0 through G3), then these graphic registers are
2553    invoked to GL or GR.  These designations and invocations can be
2554    done independently.  The most common case is that G0 is invoked to
2555    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2556    these invocations and designations are omitted in encoded text.
2557    In a 7-bit environment, only GL can be used.
2558
2559    When a graphic character set of CHARS94 is invoked to GL, codes
2560    0x20 and 0x7F of the GL area work as control characters SPACE and
2561    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2562    be used.
2563
2564    There are two ways of invocation: locking-shift and single-shift.
2565    With locking-shift, the invocation lasts until the next different
2566    invocation, whereas with single-shift, the invocation affects the
2567    following character only and doesn't affect the locking-shift
2568    state.  Invocations are done by the following control characters or
2569    escape sequences:
2570
2571    ----------------------------------------------------------------------
2572    abbrev  function                  cntrl escape seq   description
2573    ----------------------------------------------------------------------
2574    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2575    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2576    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2577    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2578    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2579    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2580    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2581    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2582    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2583    ----------------------------------------------------------------------
2584    (*) These are not used by any known coding system.
2585
2586    Control characters for these functions are defined by macros
2587    ISO_CODE_XXX in `coding.h'.
2588
2589    Designations are done by the following escape sequences:
2590    ----------------------------------------------------------------------
2591    escape sequence      description
2592    ----------------------------------------------------------------------
2593    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2594    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2595    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2596    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2597    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2598    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2599    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2600    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2601    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2602    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2603    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2604    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2605    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2606    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2607    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2608    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2609    ----------------------------------------------------------------------
2610
2611    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2612    of dimension 1, chars 94, and final character <F>, etc...
2613
2614    Note (*): Although these designations are not allowed in ISO2022,
2615    Emacs accepts them on decoding, and produces them on encoding
2616    CHARS96 character sets in a coding system which is characterized as
2617    7-bit environment, non-locking-shift, and non-single-shift.
2618
2619    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2620    '(' must be omitted.  We refer to this as "short-form" hereafter.
2621
2622    Now you may notice that there are a lot of ways of encoding the
2623    same multilingual text in ISO2022.  Actually, there exist many
2624    coding systems such as Compound Text (used in X11's inter client
2625    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2626    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2627    localized platforms), and all of these are variants of ISO2022.
2628
2629    In addition to the above, Emacs handles two more kinds of escape
2630    sequences: ISO6429's direction specification and Emacs' private
2631    sequence for specifying character composition.
2632
2633    ISO6429's direction specification takes the following form:
2634         o CSI ']'      -- end of the current direction
2635         o CSI '0' ']'  -- end of the current direction
2636         o CSI '1' ']'  -- start of left-to-right text
2637         o CSI '2' ']'  -- start of right-to-left text
2638    The control character CSI (0x9B: control sequence introducer) is
2639    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2640
2641    Character composition specification takes the following form:
2642         o ESC '0' -- start relative composition
2643         o ESC '1' -- end composition
2644         o ESC '2' -- start rule-base composition (*)
2645         o ESC '3' -- start relative composition with alternate chars  (**)
2646         o ESC '4' -- start rule-base composition with alternate chars  (**)
2647   Since these are not standard escape sequences of any ISO standard,
2648   the use of them with these meanings is restricted to Emacs only.
2649
2650   (*) This form is used only in Emacs 20.7 and older versions,
2651   but newer versions can safely decode it.
2652   (**) This form is used only in Emacs 21.1 and newer versions,
2653   and older versions can't decode it.
2654
2655   Here's a list of example usages of these composition escape
2656   sequences (categorized by `enum composition_method').
2657
2658   COMPOSITION_RELATIVE:
2659         ESC 0 CHAR [ CHAR ] ESC 1
2660   COMPOSITION_WITH_RULE:
2661         ESC 2 CHAR [ RULE CHAR ] ESC 1
2662   COMPOSITION_WITH_ALTCHARS:
2663         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2664   COMPOSITION_WITH_RULE_ALTCHARS:
2665         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2666
2667 enum iso_code_class_type iso_code_class[256];
2668
2669 #define SAFE_CHARSET_P(coding, id)      \
2670   ((id) <= (coding)->max_charset_id     \
2671    && (coding)->safe_charsets[id] >= 0)
2672
2673
2674 #define SHIFT_OUT_OK(category)  \
2675   (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2676
2677 static void
2678 setup_iso_safe_charsets (attrs)
2679      Lisp_Object attrs;
2680 {
2681   Lisp_Object charset_list, safe_charsets;
2682   Lisp_Object request;
2683   Lisp_Object reg_usage;
2684   Lisp_Object tail;
2685   int reg94, reg96;
2686   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2687   int max_charset_id;
2688
2689   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2690   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2691       && ! EQ (charset_list, Viso_2022_charset_list))
2692     {
2693       CODING_ATTR_CHARSET_LIST (attrs)
2694         = charset_list = Viso_2022_charset_list;
2695       ASET (attrs, coding_attr_safe_charsets, Qnil);
2696     }
2697
2698   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2699     return;
2700
2701   max_charset_id = 0;
2702   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2703     {
2704       int id = XINT (XCAR (tail));
2705       if (max_charset_id < id)
2706         max_charset_id = id;
2707     }
2708
2709   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2710                                 make_number (255));
2711   request = AREF (attrs, coding_attr_iso_request);
2712   reg_usage = AREF (attrs, coding_attr_iso_usage);
2713   reg94 = XINT (XCAR (reg_usage));
2714   reg96 = XINT (XCDR (reg_usage));
2715
2716   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2717     {
2718       Lisp_Object id;
2719       Lisp_Object reg;
2720       struct charset *charset;
2721
2722       id = XCAR (tail);
2723       charset = CHARSET_FROM_ID (XINT (id));
2724       reg = Fcdr (Fassq (id, request));
2725       if (! NILP (reg))
2726         SSET (safe_charsets, XINT (id), XINT (reg));
2727       else if (charset->iso_chars_96)
2728         {
2729           if (reg96 < 4)
2730             SSET (safe_charsets, XINT (id), reg96);
2731         }
2732       else
2733         {
2734           if (reg94 < 4)
2735             SSET (safe_charsets, XINT (id), reg94);
2736         }
2737     }
2738   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2739 }
2740
2741
2742 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2743    Check if a text is encoded in one of ISO-2022 based codig systems.
2744    If it is, return 1, else return 0.  */
2745
2746 static int
2747 detect_coding_iso_2022 (coding, detect_info)
2748      struct coding_system *coding;
2749      struct coding_detection_info *detect_info;
2750 {
2751   const unsigned char *src = coding->source, *src_base = src;
2752   const unsigned char *src_end = coding->source + coding->src_bytes;
2753   int multibytep = coding->src_multibyte;
2754   int single_shifting = 0;
2755   int id;
2756   int c, c1;
2757   int consumed_chars = 0;
2758   int i;
2759   int rejected = 0;
2760   int found = 0;
2761   int composition_count = -1;
2762
2763   detect_info->checked |= CATEGORY_MASK_ISO;
2764
2765   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2766     {
2767       struct coding_system *this = &(coding_categories[i]);
2768       Lisp_Object attrs, val;
2769
2770       if (this->id < 0)
2771         continue;
2772       attrs = CODING_ID_ATTRS (this->id);
2773       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2774           && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2775         setup_iso_safe_charsets (attrs);
2776       val = CODING_ATTR_SAFE_CHARSETS (attrs);
2777       this->max_charset_id = SCHARS (val) - 1;
2778       this->safe_charsets = (char *) SDATA (val);
2779     }
2780
2781   /* A coding system of this category is always ASCII compatible.  */
2782   src += coding->head_ascii;
2783
2784   while (rejected != CATEGORY_MASK_ISO)
2785     {
2786       src_base = src;
2787       ONE_MORE_BYTE (c);
2788       switch (c)
2789         {
2790         case ISO_CODE_ESC:
2791           if (inhibit_iso_escape_detection)
2792             break;
2793           single_shifting = 0;
2794           ONE_MORE_BYTE (c);
2795           if (c >= '(' && c <= '/')
2796             {
2797               /* Designation sequence for a charset of dimension 1.  */
2798               ONE_MORE_BYTE (c1);
2799               if (c1 < ' ' || c1 >= 0x80
2800                   || (id = iso_charset_table[0][c >= ','][c1]) < 0)
2801                 /* Invalid designation sequence.  Just ignore.  */
2802                 break;
2803             }
2804           else if (c == '$')
2805             {
2806               /* Designation sequence for a charset of dimension 2.  */
2807               ONE_MORE_BYTE (c);
2808               if (c >= '@' && c <= 'B')
2809                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
2810                 id = iso_charset_table[1][0][c];
2811               else if (c >= '(' && c <= '/')
2812                 {
2813                   ONE_MORE_BYTE (c1);
2814                   if (c1 < ' ' || c1 >= 0x80
2815                       || (id = iso_charset_table[1][c >= ','][c1]) < 0)
2816                     /* Invalid designation sequence.  Just ignore.  */
2817                     break;
2818                 }
2819               else
2820                 /* Invalid designation sequence.  Just ignore it.  */
2821                 break;
2822             }
2823           else if (c == 'N' || c == 'O')
2824             {
2825               /* ESC <Fe> for SS2 or SS3.  */
2826               single_shifting = 1;
2827               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2828               break;
2829             }
2830           else if (c == '1')
2831             {
2832               /* End of composition.  */
2833               if (composition_count < 0
2834                   || composition_count > MAX_COMPOSITION_COMPONENTS)
2835                 /* Invalid */
2836                 break;
2837               composition_count = -1;
2838               found |= CATEGORY_MASK_ISO;
2839             }
2840           else if (c >= '0' && c <= '4')
2841             {
2842               /* ESC <Fp> for start/end composition.  */
2843               composition_count = 0;
2844               break;
2845             }
2846           else
2847             {
2848               /* Invalid escape sequence.  Just ignore it.  */
2849               break;
2850             }
2851
2852           /* We found a valid designation sequence for CHARSET.  */
2853           rejected |= CATEGORY_MASK_ISO_8BIT;
2854           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2855                               id))
2856             found |= CATEGORY_MASK_ISO_7;
2857           else
2858             rejected |= CATEGORY_MASK_ISO_7;
2859           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2860                               id))
2861             found |= CATEGORY_MASK_ISO_7_TIGHT;
2862           else
2863             rejected |= CATEGORY_MASK_ISO_7_TIGHT;
2864           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2865                               id))
2866             found |= CATEGORY_MASK_ISO_7_ELSE;
2867           else
2868             rejected |= CATEGORY_MASK_ISO_7_ELSE;
2869           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2870                               id))
2871             found |= CATEGORY_MASK_ISO_8_ELSE;
2872           else
2873             rejected |= CATEGORY_MASK_ISO_8_ELSE;
2874           break;
2875
2876         case ISO_CODE_SO:
2877         case ISO_CODE_SI:
2878           /* Locking shift out/in.  */
2879           if (inhibit_iso_escape_detection)
2880             break;
2881           single_shifting = 0;
2882           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2883           break;
2884
2885         case ISO_CODE_CSI:
2886           /* Control sequence introducer.  */
2887           single_shifting = 0;
2888           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2889           found |= CATEGORY_MASK_ISO_8_ELSE;
2890           goto check_extra_latin;
2891
2892         case ISO_CODE_SS2:
2893         case ISO_CODE_SS3:
2894           /* Single shift.   */
2895           if (inhibit_iso_escape_detection)
2896             break;
2897           single_shifting = 0;
2898           rejected |= CATEGORY_MASK_ISO_7BIT;
2899           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2900               & CODING_ISO_FLAG_SINGLE_SHIFT)
2901             found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
2902           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2903               & CODING_ISO_FLAG_SINGLE_SHIFT)
2904             found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
2905           if (single_shifting)
2906             break;
2907           goto check_extra_latin;
2908
2909         default:
2910           if (c < 0)
2911             continue;
2912           if (c < 0x80)
2913             {
2914               if (composition_count >= 0)
2915                 composition_count++;
2916               single_shifting = 0;
2917               break;
2918             }
2919           if (c >= 0xA0)
2920             {
2921               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2922               found |= CATEGORY_MASK_ISO_8_1;
2923               /* Check the length of succeeding codes of the range
2924                  0xA0..0FF.  If the byte length is even, we include
2925                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
2926                  only when we are not single shifting.  */
2927               if (! single_shifting
2928                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
2929                 {
2930                   int i = 1;
2931                   while (src < src_end)
2932                     {
2933                       ONE_MORE_BYTE (c);
2934                       if (c < 0xA0)
2935                         break;
2936                       i++;
2937                     }
2938
2939                   if (i & 1 && src < src_end)
2940                     {
2941                       rejected |= CATEGORY_MASK_ISO_8_2;
2942                       if (composition_count >= 0)
2943                         composition_count += i;
2944                     }
2945                   else
2946                     {
2947                       found |= CATEGORY_MASK_ISO_8_2;
2948                       if (composition_count >= 0)
2949                         composition_count += i / 2;
2950                     }
2951                 }
2952               break;
2953             }
2954         check_extra_latin:
2955           single_shifting = 0;
2956           if (! VECTORP (Vlatin_extra_code_table)
2957               || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2958             {
2959               rejected = CATEGORY_MASK_ISO;
2960               break;
2961             }
2962           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2963               & CODING_ISO_FLAG_LATIN_EXTRA)
2964             found |= CATEGORY_MASK_ISO_8_1;
2965           else
2966             rejected |= CATEGORY_MASK_ISO_8_1;
2967           rejected |= CATEGORY_MASK_ISO_8_2;
2968         }
2969     }
2970   detect_info->rejected |= CATEGORY_MASK_ISO;
2971   return 0;
2972
2973  no_more_source:
2974   detect_info->rejected |= rejected;
2975   detect_info->found |= (found & ~rejected);
2976   return 1;
2977 }
2978
2979
2980 /* Set designation state into CODING.  Set CHARS_96 to -1 if the
2981    escape sequence should be kept.  */
2982 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
2983   do {                                                                  \
2984     int id, prev;                                                       \
2985                                                                         \
2986     if (final < '0' || final >= 128                                     \
2987         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
2988         || !SAFE_CHARSET_P (coding, id))                                \
2989       {                                                                 \
2990         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
2991         chars_96 = -1;                                                  \
2992         break;                                                          \
2993       }                                                                 \
2994     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
2995     if (id == charset_jisx0201_roman)                                   \
2996       {                                                                 \
2997         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
2998           id = charset_ascii;                                           \
2999       }                                                                 \
3000     else if (id == charset_jisx0208_1978)                               \
3001       {                                                                 \
3002         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
3003           id = charset_jisx0208;                                        \
3004       }                                                                 \
3005     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
3006     /* If there was an invalid designation to REG previously, and this  \
3007        designation is ASCII to REG, we should keep this designation     \
3008        sequence.  */                                                    \
3009     if (prev == -2 && id == charset_ascii)                              \
3010       chars_96 = -1;                                                    \
3011   } while (0)
3012
3013
3014 #define MAYBE_FINISH_COMPOSITION()                              \
3015   do {                                                          \
3016     int i;                                                      \
3017     if (composition_state == COMPOSING_NO)                      \
3018       break;                                                    \
3019     /* It is assured that we have enough room for producing     \
3020        characters stored in the table `components'.  */         \
3021     if (charbuf + component_idx > charbuf_end)                  \
3022       goto no_more_source;                                      \
3023     composition_state = COMPOSING_NO;                           \
3024     if (method == COMPOSITION_RELATIVE                          \
3025         || method == COMPOSITION_WITH_ALTCHARS)                 \
3026       {                                                         \
3027         for (i = 0; i < component_idx; i++)                     \
3028           *charbuf++ = components[i];                           \
3029         char_offset += component_idx;                           \
3030       }                                                         \
3031     else                                                        \
3032       {                                                         \
3033         for (i = 0; i < component_idx; i += 2)                  \
3034           *charbuf++ = components[i];                           \
3035         char_offset += (component_idx / 2) + 1;                 \
3036       }                                                         \
3037   } while (0)
3038
3039
3040 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
3041    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
3042    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3043    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
3044    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
3045   */
3046
3047 #define DECODE_COMPOSITION_START(c1)                                    \
3048   do {                                                                  \
3049     if (c1 == '0'                                                       \
3050         && composition_state == COMPOSING_COMPONENT_RULE)               \
3051       {                                                                 \
3052         component_len = component_idx;                                  \
3053         composition_state = COMPOSING_CHAR;                             \
3054       }                                                                 \
3055     else                                                                \
3056       {                                                                 \
3057         const unsigned char *p;                                         \
3058                                                                         \
3059         MAYBE_FINISH_COMPOSITION ();                                    \
3060         if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end)         \
3061           goto no_more_source;                                          \
3062         for (p = src; p < src_end - 1; p++)                             \
3063           if (*p == ISO_CODE_ESC && p[1] == '1')                        \
3064             break;                                                      \
3065         if (p == src_end - 1)                                           \
3066           {                                                             \
3067             if (coding->mode & CODING_MODE_LAST_BLOCK)                  \
3068               goto invalid_code;                                        \
3069             /* The current composition doesn't end in the current       \
3070                source.  */                                              \
3071             record_conversion_result                                    \
3072               (coding, CODING_RESULT_INSUFFICIENT_SRC);                 \
3073             goto no_more_source;                                        \
3074           }                                                             \
3075                                                                         \
3076         /* This is surely the start of a composition.  */               \
3077         method = (c1 == '0' ? COMPOSITION_RELATIVE                      \
3078                   : c1 == '2' ? COMPOSITION_WITH_RULE                   \
3079                   : c1 == '3' ? COMPOSITION_WITH_ALTCHARS               \
3080                   : COMPOSITION_WITH_RULE_ALTCHARS);                    \
3081         composition_state = (c1 <= '2' ? COMPOSING_CHAR                 \
3082                              : COMPOSING_COMPONENT_CHAR);               \
3083         component_idx = component_len = 0;                              \
3084       }                                                                 \
3085   } while (0)
3086
3087
3088 /* Handle compositoin end sequence ESC 1.  */
3089
3090 #define DECODE_COMPOSITION_END()                                        \
3091   do {                                                                  \
3092     int nchars = (component_len > 0 ? component_idx - component_len     \
3093                   : method == COMPOSITION_RELATIVE ? component_idx      \
3094                   : (component_idx + 1) / 2);                           \
3095     int i;                                                              \
3096     int *saved_charbuf = charbuf;                                       \
3097                                                                         \
3098     ADD_COMPOSITION_DATA (charbuf, nchars, method);                     \
3099     if (method != COMPOSITION_RELATIVE)                                 \
3100       {                                                                 \
3101         if (component_len == 0)                                         \
3102           for (i = 0; i < component_idx; i++)                           \
3103             *charbuf++ = components[i];                                 \
3104         else                                                            \
3105           for (i = 0; i < component_len; i++)                           \
3106             *charbuf++ = components[i];                                 \
3107         *saved_charbuf = saved_charbuf - charbuf;                       \
3108       }                                                                 \
3109     if (method == COMPOSITION_WITH_RULE)                                \
3110       for (i = 0; i < component_idx; i += 2, char_offset++)             \
3111         *charbuf++ = components[i];                                     \
3112     else                                                                \
3113       for (i = component_len; i < component_idx; i++, char_offset++)    \
3114         *charbuf++ = components[i];                                     \
3115     coding->annotated = 1;                                              \
3116     composition_state = COMPOSING_NO;                                   \
3117   } while (0)
3118
3119
3120 /* Decode a composition rule from the byte C1 (and maybe one more byte
3121    from SRC) and store one encoded composition rule in
3122    coding->cmp_data.  */
3123
3124 #define DECODE_COMPOSITION_RULE(c1)                                     \
3125   do {                                                                  \
3126     (c1) -= 32;                                                         \
3127     if (c1 < 81)                /* old format (before ver.21) */        \
3128       {                                                                 \
3129         int gref = (c1) / 9;                                            \
3130         int nref = (c1) % 9;                                            \
3131         if (gref == 4) gref = 10;                                       \
3132         if (nref == 4) nref = 10;                                       \
3133         c1 = COMPOSITION_ENCODE_RULE (gref, nref);                      \
3134       }                                                                 \
3135     else if (c1 < 93)           /* new format (after ver.21) */         \
3136       {                                                                 \
3137         ONE_MORE_BYTE (c2);                                             \
3138         c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);                \
3139       }                                                                 \
3140     else                                                                \
3141       c1 = 0;                                                           \
3142   } while (0)
3143
3144
3145 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
3146
3147 static void
3148 decode_coding_iso_2022 (coding)
3149      struct coding_system *coding;
3150 {
3151   const unsigned char *src = coding->source + coding->consumed;
3152   const unsigned char *src_end = coding->source + coding->src_bytes;
3153   const unsigned char *src_base;
3154   int *charbuf = coding->charbuf + coding->charbuf_used;
3155   int *charbuf_end
3156     = coding->charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
3157   int consumed_chars = 0, consumed_chars_base;
3158   int multibytep = coding->src_multibyte;
3159   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
3160   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3161   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3162   int charset_id_2, charset_id_3;
3163   struct charset *charset;
3164   int c;
3165   /* For handling composition sequence.  */
3166 #define COMPOSING_NO                    0
3167 #define COMPOSING_CHAR                  1
3168 #define COMPOSING_RULE                  2
3169 #define COMPOSING_COMPONENT_CHAR        3
3170 #define COMPOSING_COMPONENT_RULE        4
3171
3172   int composition_state = COMPOSING_NO;
3173   enum composition_method method;
3174   int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
3175   int component_idx;
3176   int component_len;
3177   Lisp_Object attrs, charset_list;
3178   int char_offset = coding->produced_char;
3179   int last_offset = char_offset;
3180   int last_id = charset_ascii;
3181   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
3182   int byte_after_cr = -1;
3183
3184   CODING_GET_INFO (coding, attrs, charset_list);
3185   setup_iso_safe_charsets (attrs);
3186   /* Charset list may have been changed.  */
3187   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3188   coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
3189
3190   while (1)
3191     {
3192       int c1, c2;
3193
3194       src_base = src;
3195       consumed_chars_base = consumed_chars;
3196
3197       if (charbuf >= charbuf_end)
3198         break;
3199
3200       if (byte_after_cr >= 0)
3201         c1 = byte_after_cr, byte_after_cr = -1;
3202       else
3203         ONE_MORE_BYTE (c1);
3204       if (c1 < 0)
3205         goto invalid_code;
3206
3207       /* We produce at most one character.  */
3208       switch (iso_code_class [c1])
3209         {
3210         case ISO_0x20_or_0x7F:
3211           if (composition_state != COMPOSING_NO)
3212             {
3213               if (composition_state == COMPOSING_RULE
3214                   || composition_state == COMPOSING_COMPONENT_RULE)
3215                 {
3216                   if (component_idx < MAX_COMPOSITION_COMPONENTS * 2 + 1)
3217                     {
3218                       DECODE_COMPOSITION_RULE (c1);
3219                       components[component_idx++] = c1;
3220                       composition_state--;
3221                       continue;
3222                     }
3223                   /* Too long composition.  */
3224                   MAYBE_FINISH_COMPOSITION ();
3225                 }
3226             }
3227           if (charset_id_0 < 0
3228               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
3229             /* This is SPACE or DEL.  */
3230             charset = CHARSET_FROM_ID (charset_ascii);
3231           else
3232             charset = CHARSET_FROM_ID (charset_id_0);
3233           break;
3234
3235         case ISO_graphic_plane_0:
3236           if (composition_state != COMPOSING_NO)
3237             {
3238               if (composition_state == COMPOSING_RULE
3239                   || composition_state == COMPOSING_COMPONENT_RULE)
3240                 {
3241                   if (component_idx < MAX_COMPOSITION_COMPONENTS * 2 + 1)
3242                     {
3243                       DECODE_COMPOSITION_RULE (c1);
3244                       components[component_idx++] = c1;
3245                       composition_state--;
3246                       continue;
3247                     }
3248                   MAYBE_FINISH_COMPOSITION ();
3249                 }
3250             }
3251           if (charset_id_0 < 0)
3252             charset = CHARSET_FROM_ID (charset_ascii);
3253           else
3254             charset = CHARSET_FROM_ID (charset_id_0);
3255           break;
3256
3257         case ISO_0xA0_or_0xFF:
3258           if (charset_id_1 < 0
3259               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
3260               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3261             goto invalid_code;
3262           /* This is a graphic character, we fall down ... */
3263
3264         case ISO_graphic_plane_1:
3265           if (charset_id_1 < 0)
3266             goto invalid_code;
3267           charset = CHARSET_FROM_ID (charset_id_1);
3268           break;
3269
3270         case ISO_control_0:
3271           if (eol_crlf && c1 == '\r')
3272             ONE_MORE_BYTE (byte_after_cr);
3273           MAYBE_FINISH_COMPOSITION ();
3274           charset = CHARSET_FROM_ID (charset_ascii);
3275           break;
3276
3277         case ISO_control_1:
3278           MAYBE_FINISH_COMPOSITION ();
3279           goto invalid_code;
3280
3281         case ISO_shift_out:
3282           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3283               || CODING_ISO_DESIGNATION (coding, 1) < 0)
3284             goto invalid_code;
3285           CODING_ISO_INVOCATION (coding, 0) = 1;
3286           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3287           continue;
3288
3289         case ISO_shift_in:
3290           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3291             goto invalid_code;
3292           CODING_ISO_INVOCATION (coding, 0) = 0;
3293           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3294           continue;
3295
3296         case ISO_single_shift_2_7:
3297         case ISO_single_shift_2:
3298           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3299             goto invalid_code;
3300           /* SS2 is handled as an escape sequence of ESC 'N' */
3301           c1 = 'N';
3302           goto label_escape_sequence;
3303
3304         case ISO_single_shift_3:
3305           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3306             goto invalid_code;
3307           /* SS2 is handled as an escape sequence of ESC 'O' */
3308           c1 = 'O';
3309           goto label_escape_sequence;
3310
3311         case ISO_control_sequence_introducer:
3312           /* CSI is handled as an escape sequence of ESC '[' ...  */
3313           c1 = '[';
3314           goto label_escape_sequence;
3315
3316         case ISO_escape:
3317           ONE_MORE_BYTE (c1);
3318         label_escape_sequence:
3319           /* Escape sequences handled here are invocation,
3320              designation, direction specification, and character
3321              composition specification.  */
3322           switch (c1)
3323             {
3324             case '&':           /* revision of following character set */
3325               ONE_MORE_BYTE (c1);
3326               if (!(c1 >= '@' && c1 <= '~'))
3327                 goto invalid_code;
3328               ONE_MORE_BYTE (c1);
3329               if (c1 != ISO_CODE_ESC)
3330                 goto invalid_code;
3331               ONE_MORE_BYTE (c1);
3332               goto label_escape_sequence;
3333
3334             case '$':           /* designation of 2-byte character set */
3335               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3336                 goto invalid_code;
3337               {
3338                 int reg, chars96;
3339
3340                 ONE_MORE_BYTE (c1);
3341                 if (c1 >= '@' && c1 <= 'B')
3342                   {     /* designation of JISX0208.1978, GB2312.1980,
3343                            or JISX0208.1980 */
3344                     reg = 0, chars96 = 0;
3345                   }
3346                 else if (c1 >= 0x28 && c1 <= 0x2B)
3347                   { /* designation of DIMENSION2_CHARS94 character set */
3348                     reg = c1 - 0x28, chars96 = 0;
3349                     ONE_MORE_BYTE (c1);
3350                   }
3351                 else if (c1 >= 0x2C && c1 <= 0x2F)
3352                   { /* designation of DIMENSION2_CHARS96 character set */
3353                     reg = c1 - 0x2C, chars96 = 1;
3354                     ONE_MORE_BYTE (c1);
3355                   }
3356                 else
3357                   goto invalid_code;
3358                 DECODE_DESIGNATION (reg, 2, chars96, c1);
3359                 /* We must update these variables now.  */
3360                 if (reg == 0)
3361                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3362                 else if (reg == 1)
3363                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3364                 if (chars96 < 0)
3365                   goto invalid_code;
3366               }
3367               continue;
3368
3369             case 'n':           /* invocation of locking-shift-2 */
3370               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3371                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3372                 goto invalid_code;
3373               CODING_ISO_INVOCATION (coding, 0) = 2;
3374               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3375               continue;
3376
3377             case 'o':           /* invocation of locking-shift-3 */
3378               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3379                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3380                 goto invalid_code;
3381               CODING_ISO_INVOCATION (coding, 0) = 3;
3382               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3383               continue;
3384
3385             case 'N':           /* invocation of single-shift-2 */
3386               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3387                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3388                 goto invalid_code;
3389               charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3390               if (charset_id_2 < 0)
3391                 charset = CHARSET_FROM_ID (charset_ascii);
3392               else
3393                 charset = CHARSET_FROM_ID (charset_id_2);
3394               ONE_MORE_BYTE (c1);
3395               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3396                 goto invalid_code;
3397               break;
3398
3399             case 'O':           /* invocation of single-shift-3 */
3400               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3401                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3402                 goto invalid_code;
3403               charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3404               if (charset_id_3 < 0)
3405                 charset = CHARSET_FROM_ID (charset_ascii);
3406               else
3407                 charset = CHARSET_FROM_ID (charset_id_3);
3408               ONE_MORE_BYTE (c1);
3409               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3410                 goto invalid_code;
3411               break;
3412
3413             case '0': case '2': case '3': case '4': /* start composition */
3414               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3415                 goto invalid_code;
3416               DECODE_COMPOSITION_START (c1);
3417               continue;
3418
3419             case '1':           /* end composition */
3420               if (composition_state == COMPOSING_NO)
3421                 goto invalid_code;
3422               DECODE_COMPOSITION_END ();
3423               continue;
3424
3425             case '[':           /* specification of direction */
3426               if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3427                 goto invalid_code;
3428               /* For the moment, nested direction is not supported.
3429                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3430                  left-to-right, and nozero means right-to-left.  */
3431               ONE_MORE_BYTE (c1);
3432               switch (c1)
3433                 {
3434                 case ']':       /* end of the current direction */
3435                   coding->mode &= ~CODING_MODE_DIRECTION;
3436
3437                 case '0':       /* end of the current direction */
3438                 case '1':       /* start of left-to-right direction */
3439                   ONE_MORE_BYTE (c1);
3440                   if (c1 == ']')
3441                     coding->mode &= ~CODING_MODE_DIRECTION;
3442                   else
3443                     goto invalid_code;
3444                   break;
3445
3446                 case '2':       /* start of right-to-left direction */
3447                   ONE_MORE_BYTE (c1);
3448                   if (c1 == ']')
3449                     coding->mode |= CODING_MODE_DIRECTION;
3450                   else
3451                     goto invalid_code;
3452                   break;
3453
3454                 default:
3455                   goto invalid_code;
3456                 }
3457               continue;
3458
3459             case '%':
3460               ONE_MORE_BYTE (c1);
3461               if (c1 == '/')
3462                 {
3463                   /* CTEXT extended segment:
3464                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3465                      We keep these bytes as is for the moment.
3466                      They may be decoded by post-read-conversion.  */
3467                   int dim, M, L;
3468                   int size;
3469
3470                   ONE_MORE_BYTE (dim);
3471                   ONE_MORE_BYTE (M);
3472                   ONE_MORE_BYTE (L);
3473                   size = ((M - 128) * 128) + (L - 128);
3474                   if (charbuf + 8 + size > charbuf_end)
3475                     goto break_loop;
3476                   *charbuf++ = ISO_CODE_ESC;
3477                   *charbuf++ = '%';
3478                   *charbuf++ = '/';
3479                   *charbuf++ = dim;
3480                   *charbuf++ = BYTE8_TO_CHAR (M);
3481                   *charbuf++ = BYTE8_TO_CHAR (L);
3482                   while (size-- > 0)
3483                     {
3484                       ONE_MORE_BYTE (c1);
3485                       *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3486                     }
3487                 }
3488               else if (c1 == 'G')
3489                 {
3490                   /* XFree86 extension for embedding UTF-8 in CTEXT:
3491                      ESC % G --UTF-8-BYTES-- ESC % @
3492                      We keep these bytes as is for the moment.
3493                      They may be decoded by post-read-conversion.  */
3494                   int *p = charbuf;
3495
3496                   if (p + 6 > charbuf_end)
3497                     goto break_loop;
3498                   *p++ = ISO_CODE_ESC;
3499                   *p++ = '%';
3500                   *p++ = 'G';
3501                   while (p < charbuf_end)
3502                     {
3503                       ONE_MORE_BYTE (c1);
3504                       if (c1 == ISO_CODE_ESC
3505                           && src + 1 < src_end
3506                           && src[0] == '%'
3507                           && src[1] == '@')
3508                         {
3509                           src += 2;
3510                           break;
3511                         }
3512                       *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3513                     }
3514                   if (p + 3 > charbuf_end)
3515                     goto break_loop;
3516                   *p++ = ISO_CODE_ESC;
3517                   *p++ = '%';
3518                   *p++ = '@';
3519                   charbuf = p;
3520                 }
3521               else
3522                 goto invalid_code;
3523               continue;
3524               break;
3525
3526             default:
3527               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3528                 goto invalid_code;
3529               {
3530                 int reg, chars96;
3531
3532                 if (c1 >= 0x28 && c1 <= 0x2B)
3533                   { /* designation of DIMENSION1_CHARS94 character set */
3534                     reg = c1 - 0x28, chars96 = 0;
3535                     ONE_MORE_BYTE (c1);
3536                   }
3537                 else if (c1 >= 0x2C && c1 <= 0x2F)
3538                   { /* designation of DIMENSION1_CHARS96 character set */
3539                     reg = c1 - 0x2C, chars96 = 1;
3540                     ONE_MORE_BYTE (c1);
3541                   }
3542                 else
3543                   goto invalid_code;
3544                 DECODE_DESIGNATION (reg, 1, chars96, c1);
3545                 /* We must update these variables now.  */
3546                 if (reg == 0)
3547                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3548                 else if (reg == 1)
3549                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3550                 if (chars96 < 0)
3551                   goto invalid_code;
3552               }
3553               continue;
3554             }
3555         }
3556
3557       if (charset->id != charset_ascii
3558           && last_id != charset->id)
3559         {
3560           if (last_id != charset_ascii)
3561             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3562           last_id = charset->id;
3563           last_offset = char_offset;
3564         }
3565
3566       /* Now we know CHARSET and 1st position code C1 of a character.
3567          Produce a decoded character while getting 2nd position code
3568          C2 if necessary.  */
3569       c1 &= 0x7F;
3570       if (CHARSET_DIMENSION (charset) > 1)
3571         {
3572           ONE_MORE_BYTE (c2);
3573           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3574             /* C2 is not in a valid range.  */
3575             goto invalid_code;
3576           c1 = (c1 << 8) | (c2 & 0x7F);
3577           if (CHARSET_DIMENSION (charset) > 2)
3578             {
3579               ONE_MORE_BYTE (c2);
3580               if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3581                 /* C2 is not in a valid range.  */
3582                 goto invalid_code;
3583               c1 = (c1 << 8) | (c2 & 0x7F);
3584             }
3585         }
3586
3587       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3588       if (c < 0)
3589         {
3590           MAYBE_FINISH_COMPOSITION ();
3591           for (; src_base < src; src_base++, char_offset++)
3592             {
3593               if (ASCII_BYTE_P (*src_base))
3594                 *charbuf++ = *src_base;
3595               else
3596                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3597             }
3598         }
3599       else if (composition_state == COMPOSING_NO)
3600         {
3601           *charbuf++ = c;
3602           char_offset++;
3603         }
3604       else
3605         {
3606           if (component_idx < MAX_COMPOSITION_COMPONENTS * 2 + 1)
3607             {
3608               components[component_idx++] = c;
3609               if (method == COMPOSITION_WITH_RULE
3610                   || (method == COMPOSITION_WITH_RULE_ALTCHARS
3611                       && composition_state == COMPOSING_COMPONENT_CHAR))
3612                 composition_state++;
3613             }
3614           else
3615             {
3616               MAYBE_FINISH_COMPOSITION ();
3617               *charbuf++ = c;
3618               char_offset++;
3619             }
3620         }
3621       continue;
3622
3623     invalid_code:
3624       MAYBE_FINISH_COMPOSITION ();
3625       src = src_base;
3626       consumed_chars = consumed_chars_base;
3627       ONE_MORE_BYTE (c);
3628       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3629       char_offset++;
3630       coding->errors++;
3631       continue;
3632
3633     break_loop:
3634       break;
3635     }
3636
3637  no_more_source:
3638   if (last_id != charset_ascii)
3639     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3640   coding->consumed_char += consumed_chars_base;
3641   coding->consumed = src_base - coding->source;
3642   coding->charbuf_used = charbuf - coding->charbuf;
3643 }
3644
3645
3646 /* ISO2022 encoding stuff.  */
3647
3648 /*
3649    It is not enough to say just "ISO2022" on encoding, we have to
3650    specify more details.  In Emacs, each coding system of ISO2022
3651    variant has the following specifications:
3652         1. Initial designation to G0 thru G3.
3653         2. Allows short-form designation?
3654         3. ASCII should be designated to G0 before control characters?
3655         4. ASCII should be designated to G0 at end of line?
3656         5. 7-bit environment or 8-bit environment?
3657         6. Use locking-shift?
3658         7. Use Single-shift?
3659    And the following two are only for Japanese:
3660         8. Use ASCII in place of JIS0201-1976-Roman?
3661         9. Use JISX0208-1983 in place of JISX0208-1978?
3662    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3663    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
3664    details.
3665 */
3666
3667 /* Produce codes (escape sequence) for designating CHARSET to graphic
3668    register REG at DST, and increment DST.  If <final-char> of CHARSET is
3669    '@', 'A', or 'B' and the coding system CODING allows, produce
3670    designation sequence of short-form.  */
3671
3672 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
3673   do {                                                                  \
3674     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
3675     char *intermediate_char_94 = "()*+";                                \
3676     char *intermediate_char_96 = ",-./";                                \
3677     int revision = -1;                                                  \
3678     int c;                                                              \
3679                                                                         \
3680     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
3681       revision = CHARSET_ISO_REVISION (charset);                        \
3682                                                                         \
3683     if (revision >= 0)                                                  \
3684       {                                                                 \
3685         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
3686         EMIT_ONE_BYTE ('@' + revision);                                 \
3687       }                                                                 \
3688     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
3689     if (CHARSET_DIMENSION (charset) == 1)                               \
3690       {                                                                 \
3691         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3692           c = intermediate_char_94[reg];                                \
3693         else                                                            \
3694           c = intermediate_char_96[reg];                                \
3695         EMIT_ONE_ASCII_BYTE (c);                                        \
3696       }                                                                 \
3697     else                                                                \
3698       {                                                                 \
3699         EMIT_ONE_ASCII_BYTE ('$');                                      \
3700         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3701           {                                                             \
3702             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
3703                 || reg != 0                                             \
3704                 || final_char < '@' || final_char > 'B')                \
3705               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
3706           }                                                             \
3707         else                                                            \
3708           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
3709       }                                                                 \
3710     EMIT_ONE_ASCII_BYTE (final_char);                                   \
3711                                                                         \
3712     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
3713   } while (0)
3714
3715
3716 /* The following two macros produce codes (control character or escape
3717    sequence) for ISO2022 single-shift functions (single-shift-2 and
3718    single-shift-3).  */
3719
3720 #define ENCODE_SINGLE_SHIFT_2                                           \
3721   do {                                                                  \
3722     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3723       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
3724     else                                                                \
3725       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
3726     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3727   } while (0)
3728
3729
3730 #define ENCODE_SINGLE_SHIFT_3                                           \
3731   do {                                                                  \
3732     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3733       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
3734     else                                                                \
3735       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
3736     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3737   } while (0)
3738
3739
3740 /* The following four macros produce codes (control character or
3741    escape sequence) for ISO2022 locking-shift functions (shift-in,
3742    shift-out, locking-shift-2, and locking-shift-3).  */
3743
3744 #define ENCODE_SHIFT_IN                                 \
3745   do {                                                  \
3746     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
3747     CODING_ISO_INVOCATION (coding, 0) = 0;              \
3748   } while (0)
3749
3750
3751 #define ENCODE_SHIFT_OUT                                \
3752   do {                                                  \
3753     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
3754     CODING_ISO_INVOCATION (coding, 0) = 1;              \
3755   } while (0)
3756
3757
3758 #define ENCODE_LOCKING_SHIFT_2                          \
3759   do {                                                  \
3760     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3761     CODING_ISO_INVOCATION (coding, 0) = 2;              \
3762   } while (0)
3763
3764
3765 #define ENCODE_LOCKING_SHIFT_3                          \
3766   do {                                                  \
3767     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3768     CODING_ISO_INVOCATION (coding, 0) = 3;              \
3769   } while (0)
3770
3771
3772 /* Produce codes for a DIMENSION1 character whose character set is
3773    CHARSET and whose position-code is C1.  Designation and invocation
3774    sequences are also produced in advance if necessary.  */
3775
3776 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
3777   do {                                                                  \
3778     int id = CHARSET_ID (charset);                                      \
3779                                                                         \
3780     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
3781         && id == charset_ascii)                                         \
3782       {                                                                 \
3783         id = charset_jisx0201_roman;                                    \
3784         charset = CHARSET_FROM_ID (id);                                 \
3785       }                                                                 \
3786                                                                         \
3787     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3788       {                                                                 \
3789         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3790           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
3791         else                                                            \
3792           EMIT_ONE_BYTE (c1 | 0x80);                                    \
3793         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3794         break;                                                          \
3795       }                                                                 \
3796     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3797       {                                                                 \
3798         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
3799         break;                                                          \
3800       }                                                                 \
3801     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3802       {                                                                 \
3803         EMIT_ONE_BYTE (c1 | 0x80);                                      \
3804         break;                                                          \
3805       }                                                                 \
3806     else                                                                \
3807       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3808          must invoke it, or, at first, designate it to some graphic     \
3809          register.  Then repeat the loop to actually produce the        \
3810          character.  */                                                 \
3811       dst = encode_invocation_designation (charset, coding, dst,        \
3812                                            &produced_chars);            \
3813   } while (1)
3814
3815
3816 /* Produce codes for a DIMENSION2 character whose character set is
3817    CHARSET and whose position-codes are C1 and C2.  Designation and
3818    invocation codes are also produced in advance if necessary.  */
3819
3820 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
3821   do {                                                                  \
3822     int id = CHARSET_ID (charset);                                      \
3823                                                                         \
3824     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
3825         && id == charset_jisx0208)                                      \
3826       {                                                                 \
3827         id = charset_jisx0208_1978;                                     \
3828         charset = CHARSET_FROM_ID (id);                                 \
3829       }                                                                 \
3830                                                                         \
3831     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3832       {                                                                 \
3833         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3834           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
3835         else                                                            \
3836           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
3837         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3838         break;                                                          \
3839       }                                                                 \
3840     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3841       {                                                                 \
3842         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
3843         break;                                                          \
3844       }                                                                 \
3845     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3846       {                                                                 \
3847         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
3848         break;                                                          \
3849       }                                                                 \
3850     else                                                                \
3851       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3852          must invoke it, or, at first, designate it to some graphic     \
3853          register.  Then repeat the loop to actually produce the        \
3854          character.  */                                                 \
3855       dst = encode_invocation_designation (charset, coding, dst,        \
3856                                            &produced_chars);            \
3857   } while (1)
3858
3859
3860 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
3861   do {                                                                     \
3862     int code = ENCODE_CHAR ((charset),(c));                                \
3863                                                                            \
3864     if (CHARSET_DIMENSION (charset) == 1)                                  \
3865       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
3866     else                                                                   \
3867       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3868   } while (0)
3869
3870
3871 /* Produce designation and invocation codes at a place pointed by DST
3872    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
3873    Return new DST.  */
3874
3875 unsigned char *
3876 encode_invocation_designation (charset, coding, dst, p_nchars)
3877      struct charset *charset;
3878      struct coding_system *coding;
3879      unsigned char *dst;
3880      int *p_nchars;
3881 {
3882   int multibytep = coding->dst_multibyte;
3883   int produced_chars = *p_nchars;
3884   int reg;                      /* graphic register number */
3885   int id = CHARSET_ID (charset);
3886
3887   /* At first, check designations.  */
3888   for (reg = 0; reg < 4; reg++)
3889     if (id == CODING_ISO_DESIGNATION (coding, reg))
3890       break;
3891
3892   if (reg >= 4)
3893     {
3894       /* CHARSET is not yet designated to any graphic registers.  */
3895       /* At first check the requested designation.  */
3896       reg = CODING_ISO_REQUEST (coding, id);
3897       if (reg < 0)
3898         /* Since CHARSET requests no special designation, designate it
3899            to graphic register 0.  */
3900         reg = 0;
3901
3902       ENCODE_DESIGNATION (charset, reg, coding);
3903     }
3904
3905   if (CODING_ISO_INVOCATION (coding, 0) != reg
3906       && CODING_ISO_INVOCATION (coding, 1) != reg)
3907     {
3908       /* Since the graphic register REG is not invoked to any graphic
3909          planes, invoke it to graphic plane 0.  */
3910       switch (reg)
3911         {
3912         case 0:                 /* graphic register 0 */
3913           ENCODE_SHIFT_IN;
3914           break;
3915
3916         case 1:                 /* graphic register 1 */
3917           ENCODE_SHIFT_OUT;
3918           break;
3919
3920         case 2:                 /* graphic register 2 */
3921           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3922             ENCODE_SINGLE_SHIFT_2;
3923           else
3924             ENCODE_LOCKING_SHIFT_2;
3925           break;
3926
3927         case 3:                 /* graphic register 3 */
3928           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3929             ENCODE_SINGLE_SHIFT_3;
3930           else
3931             ENCODE_LOCKING_SHIFT_3;
3932           break;
3933         }
3934     }
3935
3936   *p_nchars = produced_chars;
3937   return dst;
3938 }
3939
3940 /* The following three macros produce codes for indicating direction
3941    of text.  */
3942 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER                              \
3943   do {                                                                  \
3944     if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS)        \
3945       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '[');                         \
3946     else                                                                \
3947       EMIT_ONE_BYTE (ISO_CODE_CSI);                                     \
3948   } while (0)
3949
3950
3951 #define ENCODE_DIRECTION_R2L()                  \
3952   do {                                          \
3953     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3954     EMIT_TWO_ASCII_BYTES ('2', ']');            \
3955   } while (0)
3956
3957
3958 #define ENCODE_DIRECTION_L2R()                  \
3959   do {                                          \
3960     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3961     EMIT_TWO_ASCII_BYTES ('0', ']');            \
3962   } while (0)
3963
3964
3965 /* Produce codes for designation and invocation to reset the graphic
3966    planes and registers to initial state.  */
3967 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
3968   do {                                                                  \
3969     int reg;                                                            \
3970     struct charset *charset;                                            \
3971                                                                         \
3972     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
3973       ENCODE_SHIFT_IN;                                                  \
3974     for (reg = 0; reg < 4; reg++)                                       \
3975       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
3976           && (CODING_ISO_DESIGNATION (coding, reg)                      \
3977               != CODING_ISO_INITIAL (coding, reg)))                     \
3978         {                                                               \
3979           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3980           ENCODE_DESIGNATION (charset, reg, coding);                    \
3981         }                                                               \
3982   } while (0)
3983
3984
3985 /* Produce designation sequences of charsets in the line started from
3986    SRC to a place pointed by DST, and return updated DST.
3987
3988    If the current block ends before any end-of-line, we may fail to
3989    find all the necessary designations.  */
3990
3991 static unsigned char *
3992 encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
3993      struct coding_system *coding;
3994      int *charbuf, *charbuf_end;
3995      unsigned char *dst;
3996 {
3997   struct charset *charset;
3998   /* Table of charsets to be designated to each graphic register.  */
3999   int r[4];
4000   int c, found = 0, reg;
4001   int produced_chars = 0;
4002   int multibytep = coding->dst_multibyte;
4003   Lisp_Object attrs;
4004   Lisp_Object charset_list;
4005
4006   attrs = CODING_ID_ATTRS (coding->id);
4007   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4008   if (EQ (charset_list, Qiso_2022))
4009     charset_list = Viso_2022_charset_list;
4010
4011   for (reg = 0; reg < 4; reg++)
4012     r[reg] = -1;
4013
4014   while (found < 4)
4015     {
4016       int id;
4017
4018       c = *charbuf++;
4019       if (c == '\n')
4020         break;
4021       charset = char_charset (c, charset_list, NULL);
4022       id = CHARSET_ID (charset);
4023       reg = CODING_ISO_REQUEST (coding, id);
4024       if (reg >= 0 && r[reg] < 0)
4025         {
4026           found++;
4027           r[reg] = id;
4028         }
4029     }
4030
4031   if (found)
4032     {
4033       for (reg = 0; reg < 4; reg++)
4034         if (r[reg] >= 0
4035             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
4036           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
4037     }
4038
4039   return dst;
4040 }
4041
4042 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
4043
4044 static int
4045 encode_coding_iso_2022 (coding)
4046      struct coding_system *coding;
4047 {
4048   int multibytep = coding->dst_multibyte;
4049   int *charbuf = coding->charbuf;
4050   int *charbuf_end = charbuf + coding->charbuf_used;
4051   unsigned char *dst = coding->destination + coding->produced;
4052   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4053   int safe_room = 16;
4054   int bol_designation
4055     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
4056        && CODING_ISO_BOL (coding));
4057   int produced_chars = 0;
4058   Lisp_Object attrs, eol_type, charset_list;
4059   int ascii_compatible;
4060   int c;
4061   int preferred_charset_id = -1;
4062
4063   CODING_GET_INFO (coding, attrs, charset_list);
4064   eol_type = CODING_ID_EOL_TYPE (coding->id);
4065   if (VECTORP (eol_type))
4066     eol_type = Qunix;
4067
4068   setup_iso_safe_charsets (attrs);
4069   /* Charset list may have been changed.  */
4070   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4071   coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
4072
4073   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4074
4075   while (charbuf < charbuf_end)
4076     {
4077       ASSURE_DESTINATION (safe_room);
4078
4079       if (bol_designation)
4080         {
4081           unsigned char *dst_prev = dst;
4082
4083           /* We have to produce designation sequences if any now.  */
4084           dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
4085           bol_designation = 0;
4086           /* We are sure that designation sequences are all ASCII bytes.  */
4087           produced_chars += dst - dst_prev;
4088         }
4089
4090       c = *charbuf++;
4091
4092       if (c < 0)
4093         {
4094           /* Handle an annotation.  */
4095           switch (*charbuf)
4096             {
4097             case CODING_ANNOTATE_COMPOSITION_MASK:
4098               /* Not yet implemented.  */
4099               break;
4100             case CODING_ANNOTATE_CHARSET_MASK:
4101               preferred_charset_id = charbuf[2];
4102               if (preferred_charset_id >= 0
4103                   && NILP (Fmemq (make_number (preferred_charset_id),
4104                                   charset_list)))
4105                 preferred_charset_id = -1;
4106               break;
4107             default:
4108               abort ();
4109             }
4110           charbuf += -c - 1;
4111           continue;
4112         }
4113
4114       /* Now encode the character C.  */
4115       if (c < 0x20 || c == 0x7F)
4116         {
4117           if (c == '\n'
4118               || (c == '\r' && EQ (eol_type, Qmac)))
4119             {
4120               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4121                 ENCODE_RESET_PLANE_AND_REGISTER ();
4122               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
4123                 {
4124                   int i;
4125
4126                   for (i = 0; i < 4; i++)
4127                     CODING_ISO_DESIGNATION (coding, i)
4128                       = CODING_ISO_INITIAL (coding, i);
4129                 }
4130               bol_designation
4131                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
4132             }
4133           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
4134             ENCODE_RESET_PLANE_AND_REGISTER ();
4135           EMIT_ONE_ASCII_BYTE (c);
4136         }
4137       else if (ASCII_CHAR_P (c))
4138         {
4139           if (ascii_compatible)
4140             EMIT_ONE_ASCII_BYTE (c);
4141           else
4142             {
4143               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
4144               ENCODE_ISO_CHARACTER (charset, c);
4145             }
4146         }
4147       else if (CHAR_BYTE8_P (c))
4148         {
4149           c = CHAR_TO_BYTE8 (c);
4150           EMIT_ONE_BYTE (c);
4151         }
4152       else
4153         {
4154           struct charset *charset;
4155
4156           if (preferred_charset_id >= 0)
4157             {
4158               charset = CHARSET_FROM_ID (preferred_charset_id);
4159               if (! CHAR_CHARSET_P (c, charset))
4160                 charset = char_charset (c, charset_list, NULL);
4161             }
4162           else
4163             charset = char_charset (c, charset_list, NULL);
4164           if (!charset)
4165             {
4166               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4167                 {
4168                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4169                   charset = CHARSET_FROM_ID (charset_ascii);
4170                 }
4171               else
4172                 {
4173                   c = coding->default_char;
4174                   charset = char_charset (c, charset_list, NULL);
4175                 }
4176             }
4177           ENCODE_ISO_CHARACTER (charset, c);
4178         }
4179     }
4180
4181   if (coding->mode & CODING_MODE_LAST_BLOCK
4182       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4183     {
4184       ASSURE_DESTINATION (safe_room);
4185       ENCODE_RESET_PLANE_AND_REGISTER ();
4186     }
4187   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4188   CODING_ISO_BOL (coding) = bol_designation;
4189   coding->produced_char += produced_chars;
4190   coding->produced = dst - coding->destination;
4191   return 0;
4192 }
4193
4194 \f
4195 /*** 8,9. SJIS and BIG5 handlers ***/
4196
4197 /* Although SJIS and BIG5 are not ISO's coding system, they are used
4198    quite widely.  So, for the moment, Emacs supports them in the bare
4199    C code.  But, in the future, they may be supported only by CCL.  */
4200
4201 /* SJIS is a coding system encoding three character sets: ASCII, right
4202    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
4203    as is.  A character of charset katakana-jisx0201 is encoded by
4204    "position-code + 0x80".  A character of charset japanese-jisx0208
4205    is encoded in 2-byte but two position-codes are divided and shifted
4206    so that it fit in the range below.
4207
4208    --- CODE RANGE of SJIS ---
4209    (character set)      (range)
4210    ASCII                0x00 .. 0x7F
4211    KATAKANA-JISX0201    0xA0 .. 0xDF
4212    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
4213             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
4214    -------------------------------
4215
4216 */
4217
4218 /* BIG5 is a coding system encoding two character sets: ASCII and
4219    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
4220    character set and is encoded in two-byte.
4221
4222    --- CODE RANGE of BIG5 ---
4223    (character set)      (range)
4224    ASCII                0x00 .. 0x7F
4225    Big5 (1st byte)      0xA1 .. 0xFE
4226         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
4227    --------------------------
4228
4229   */
4230
4231 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4232    Check if a text is encoded in SJIS.  If it is, return
4233    CATEGORY_MASK_SJIS, else return 0.  */
4234
4235 static int
4236 detect_coding_sjis (coding, detect_info)
4237      struct coding_system *coding;
4238      struct coding_detection_info *detect_info;
4239 {
4240   const unsigned char *src = coding->source, *src_base;
4241   const unsigned char *src_end = coding->source + coding->src_bytes;
4242   int multibytep = coding->src_multibyte;
4243   int consumed_chars = 0;
4244   int found = 0;
4245   int c;
4246
4247   detect_info->checked |= CATEGORY_MASK_SJIS;
4248   /* A coding system of this category is always ASCII compatible.  */
4249   src += coding->head_ascii;
4250
4251   while (1)
4252     {
4253       src_base = src;
4254       ONE_MORE_BYTE (c);
4255       if (c < 0x80)
4256         continue;
4257       if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
4258         {
4259           ONE_MORE_BYTE (c);
4260           if (c < 0x40 || c == 0x7F || c > 0xFC)
4261             break;
4262           found = CATEGORY_MASK_SJIS;
4263         }
4264       else if (c >= 0xA0 && c < 0xE0)
4265         found = CATEGORY_MASK_SJIS;
4266       else
4267         break;
4268     }
4269   detect_info->rejected |= CATEGORY_MASK_SJIS;
4270   return 0;
4271
4272  no_more_source:
4273   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4274     {
4275       detect_info->rejected |= CATEGORY_MASK_SJIS;
4276       return 0;
4277     }
4278   detect_info->found |= found;
4279   return 1;
4280 }
4281
4282 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4283    Check if a text is encoded in BIG5.  If it is, return
4284    CATEGORY_MASK_BIG5, else return 0.  */
4285
4286 static int
4287 detect_coding_big5 (coding, detect_info)
4288      struct coding_system *coding;
4289      struct coding_detection_info *detect_info;
4290 {
4291   const unsigned char *src = coding->source, *src_base;
4292   const unsigned char *src_end = coding->source + coding->src_bytes;
4293   int multibytep = coding->src_multibyte;
4294   int consumed_chars = 0;
4295   int found = 0;
4296   int c;
4297
4298   detect_info->checked |= CATEGORY_MASK_BIG5;
4299   /* A coding system of this category is always ASCII compatible.  */
4300   src += coding->head_ascii;
4301
4302   while (1)
4303     {
4304       src_base = src;
4305       ONE_MORE_BYTE (c);
4306       if (c < 0x80)
4307         continue;
4308       if (c >= 0xA1)
4309         {
4310           ONE_MORE_BYTE (c);
4311           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
4312             return 0;
4313           found = CATEGORY_MASK_BIG5;
4314         }
4315       else
4316         break;
4317     }
4318   detect_info->rejected |= CATEGORY_MASK_BIG5;
4319   return 0;
4320
4321  no_more_source:
4322   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4323     {
4324       detect_info->rejected |= CATEGORY_MASK_BIG5;
4325       return 0;
4326     }
4327   detect_info->found |= found;
4328   return 1;
4329 }
4330
4331 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4332    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
4333
4334 static void
4335 decode_coding_sjis (coding)
4336      struct coding_system *coding;
4337 {
4338   const unsigned char *src = coding->source + coding->consumed;
4339   const unsigned char *src_end = coding->source + coding->src_bytes;
4340   const unsigned char *src_base;
4341   int *charbuf = coding->charbuf + coding->charbuf_used;
4342   int *charbuf_end
4343     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4344   int consumed_chars = 0, consumed_chars_base;
4345   int multibytep = coding->src_multibyte;
4346   struct charset *charset_roman, *charset_kanji, *charset_kana;
4347   struct charset *charset_kanji2;
4348   Lisp_Object attrs, charset_list, val;
4349   int char_offset = coding->produced_char;
4350   int last_offset = char_offset;
4351   int last_id = charset_ascii;
4352   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4353   int byte_after_cr = -1;
4354
4355   CODING_GET_INFO (coding, attrs, charset_list);
4356
4357   val = charset_list;
4358   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4359   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4360   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4361   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4362
4363   while (1)
4364     {
4365       int c, c1;
4366       struct charset *charset;
4367
4368       src_base = src;
4369       consumed_chars_base = consumed_chars;
4370
4371       if (charbuf >= charbuf_end)
4372         break;
4373
4374       if (byte_after_cr >= 0)
4375         c = byte_after_cr, byte_after_cr = -1;
4376       else
4377         ONE_MORE_BYTE (c);
4378       if (c < 0)
4379         goto invalid_code;
4380       if (c < 0x80)
4381         {
4382           if (eol_crlf && c == '\r')
4383             ONE_MORE_BYTE (byte_after_cr);
4384           charset = charset_roman;
4385         }
4386       else if (c == 0x80 || c == 0xA0)
4387         goto invalid_code;
4388       else if (c >= 0xA1 && c <= 0xDF)
4389         {
4390           /* SJIS -> JISX0201-Kana */
4391           c &= 0x7F;
4392           charset = charset_kana;
4393         }
4394       else if (c <= 0xEF)
4395         {
4396           /* SJIS -> JISX0208 */
4397           ONE_MORE_BYTE (c1);
4398           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4399             goto invalid_code;
4400           c = (c << 8) | c1;
4401           SJIS_TO_JIS (c);
4402           charset = charset_kanji;
4403         }
4404       else if (c <= 0xFC && charset_kanji2)
4405         {
4406           /* SJIS -> JISX0213-2 */
4407           ONE_MORE_BYTE (c1);
4408           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4409             goto invalid_code;
4410           c = (c << 8) | c1;
4411           SJIS_TO_JIS2 (c);
4412           charset = charset_kanji2;
4413         }
4414       else
4415         goto invalid_code;
4416       if (charset->id != charset_ascii
4417           && last_id != charset->id)
4418         {
4419           if (last_id != charset_ascii)
4420             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4421           last_id = charset->id;
4422           last_offset = char_offset;
4423         }
4424       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4425       *charbuf++ = c;
4426       char_offset++;
4427       continue;
4428
4429     invalid_code:
4430       src = src_base;
4431       consumed_chars = consumed_chars_base;
4432       ONE_MORE_BYTE (c);
4433       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4434       char_offset++;
4435       coding->errors++;
4436     }
4437
4438  no_more_source:
4439   if (last_id != charset_ascii)
4440     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4441   coding->consumed_char += consumed_chars_base;
4442   coding->consumed = src_base - coding->source;
4443   coding->charbuf_used = charbuf - coding->charbuf;
4444 }
4445
4446 static void
4447 decode_coding_big5 (coding)
4448      struct coding_system *coding;
4449 {
4450   const unsigned char *src = coding->source + coding->consumed;
4451   const unsigned char *src_end = coding->source + coding->src_bytes;
4452   const unsigned char *src_base;
4453   int *charbuf = coding->charbuf + coding->charbuf_used;
4454   int *charbuf_end
4455     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4456   int consumed_chars = 0, consumed_chars_base;
4457   int multibytep = coding->src_multibyte;
4458   struct charset *charset_roman, *charset_big5;
4459   Lisp_Object attrs, charset_list, val;
4460   int char_offset = coding->produced_char;
4461   int last_offset = char_offset;
4462   int last_id = charset_ascii;
4463   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4464   int byte_after_cr = -1;
4465
4466   CODING_GET_INFO (coding, attrs, charset_list);
4467   val = charset_list;
4468   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4469   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4470
4471   while (1)
4472     {
4473       int c, c1;
4474       struct charset *charset;
4475
4476       src_base = src;
4477       consumed_chars_base = consumed_chars;
4478
4479       if (charbuf >= charbuf_end)
4480         break;
4481
4482       if (byte_after_cr >= 0)
4483         c = byte_after_cr, byte_after_cr = -1;
4484       else
4485         ONE_MORE_BYTE (c);
4486
4487       if (c < 0)
4488         goto invalid_code;
4489       if (c < 0x80)
4490         {
4491           if (eol_crlf && c == '\r')
4492             ONE_MORE_BYTE (byte_after_cr);
4493           charset = charset_roman;
4494         }
4495       else
4496         {
4497           /* BIG5 -> Big5 */
4498           if (c < 0xA1 || c > 0xFE)
4499             goto invalid_code;
4500           ONE_MORE_BYTE (c1);
4501           if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4502             goto invalid_code;
4503           c = c << 8 | c1;
4504           charset = charset_big5;
4505         }
4506       if (charset->id != charset_ascii
4507           && last_id != charset->id)
4508         {
4509           if (last_id != charset_ascii)
4510             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4511           last_id = charset->id;
4512           last_offset = char_offset;
4513         }
4514       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4515       *charbuf++ = c;
4516       char_offset++;
4517       continue;
4518
4519     invalid_code:
4520       src = src_base;
4521       consumed_chars = consumed_chars_base;
4522       ONE_MORE_BYTE (c);
4523       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4524       char_offset++;
4525       coding->errors++;
4526     }
4527
4528  no_more_source:
4529   if (last_id != charset_ascii)
4530     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4531   coding->consumed_char += consumed_chars_base;
4532   coding->consumed = src_base - coding->source;
4533   coding->charbuf_used = charbuf - coding->charbuf;
4534 }
4535
4536 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4537    This function can encode charsets `ascii', `katakana-jisx0201',
4538    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
4539    are sure that all these charsets are registered as official charset
4540    (i.e. do not have extended leading-codes).  Characters of other
4541    charsets are produced without any encoding.  If SJIS_P is 1, encode
4542    SJIS text, else encode BIG5 text.  */
4543
4544 static int
4545 encode_coding_sjis (coding)
4546      struct coding_system *coding;
4547 {
4548   int multibytep = coding->dst_multibyte;
4549   int *charbuf = coding->charbuf;
4550   int *charbuf_end = charbuf + coding->charbuf_used;
4551   unsigned char *dst = coding->destination + coding->produced;
4552   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4553   int safe_room = 4;
4554   int produced_chars = 0;
4555   Lisp_Object attrs, charset_list, val;
4556   int ascii_compatible;
4557   struct charset *charset_roman, *charset_kanji, *charset_kana;
4558   struct charset *charset_kanji2;
4559   int c;
4560
4561   CODING_GET_INFO (coding, attrs, charset_list);
4562   val = charset_list;
4563   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4564   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4565   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4566   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4567
4568   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4569
4570   while (charbuf < charbuf_end)
4571     {
4572       ASSURE_DESTINATION (safe_room);
4573       c = *charbuf++;
4574       /* Now encode the character C.  */
4575       if (ASCII_CHAR_P (c) && ascii_compatible)
4576         EMIT_ONE_ASCII_BYTE (c);
4577       else if (CHAR_BYTE8_P (c))
4578         {
4579           c = CHAR_TO_BYTE8 (c);
4580           EMIT_ONE_BYTE (c);
4581         }
4582       else
4583         {
4584           unsigned code;
4585           struct charset *charset = char_charset (c, charset_list, &code);
4586
4587           if (!charset)
4588             {
4589               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4590                 {
4591                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4592                   charset = CHARSET_FROM_ID (charset_ascii);
4593                 }
4594               else
4595                 {
4596                   c = coding->default_char;
4597                   charset = char_charset (c, charset_list, &code);
4598                 }
4599             }
4600           if (code == CHARSET_INVALID_CODE (charset))
4601             abort ();
4602           if (charset == charset_kanji)
4603             {
4604               int c1, c2;
4605               JIS_TO_SJIS (code);
4606               c1 = code >> 8, c2 = code & 0xFF;
4607               EMIT_TWO_BYTES (c1, c2);
4608             }
4609           else if (charset == charset_kana)
4610             EMIT_ONE_BYTE (code | 0x80);
4611           else if (charset_kanji2 && charset == charset_kanji2)
4612             {
4613               int c1, c2;
4614
4615               c1 = code >> 8;
4616               if (c1 == 0x21 || (c1 >= 0x23 && c1 < 0x25)
4617                   || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
4618                 {
4619                   JIS_TO_SJIS2 (code);
4620                   c1 = code >> 8, c2 = code & 0xFF;
4621                   EMIT_TWO_BYTES (c1, c2);
4622                 }
4623               else
4624                 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4625             }
4626           else
4627             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4628         }
4629     }
4630   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4631   coding->produced_char += produced_chars;
4632   coding->produced = dst - coding->destination;
4633   return 0;
4634 }
4635
4636 static int
4637 encode_coding_big5 (coding)
4638      struct coding_system *coding;
4639 {
4640   int multibytep = coding->dst_multibyte;
4641   int *charbuf = coding->charbuf;
4642   int *charbuf_end = charbuf + coding->charbuf_used;
4643   unsigned char *dst = coding->destination + coding->produced;
4644   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4645   int safe_room = 4;
4646   int produced_chars = 0;
4647   Lisp_Object attrs, charset_list, val;
4648   int ascii_compatible;
4649   struct charset *charset_roman, *charset_big5;
4650   int c;
4651
4652   CODING_GET_INFO (coding, attrs, charset_list);
4653   val = charset_list;
4654   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4655   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4656   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4657
4658   while (charbuf < charbuf_end)
4659     {
4660       ASSURE_DESTINATION (safe_room);
4661       c = *charbuf++;
4662       /* Now encode the character C.  */
4663       if (ASCII_CHAR_P (c) && ascii_compatible)
4664         EMIT_ONE_ASCII_BYTE (c);
4665       else if (CHAR_BYTE8_P (c))
4666         {
4667           c = CHAR_TO_BYTE8 (c);
4668           EMIT_ONE_BYTE (c);
4669         }
4670       else
4671         {
4672           unsigned code;
4673           struct charset *charset = char_charset (c, charset_list, &code);
4674
4675           if (! charset)
4676             {
4677               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4678                 {
4679                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4680                   charset = CHARSET_FROM_ID (charset_ascii);
4681                 }
4682               else
4683                 {
4684                   c = coding->default_char;
4685                   charset = char_charset (c, charset_list, &code);
4686                 }
4687             }
4688           if (code == CHARSET_INVALID_CODE (charset))
4689             abort ();
4690           if (charset == charset_big5)
4691             {
4692               int c1, c2;
4693
4694               c1 = code >> 8, c2 = code & 0xFF;
4695               EMIT_TWO_BYTES (c1, c2);
4696             }
4697           else
4698             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4699         }
4700     }
4701   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4702   coding->produced_char += produced_chars;
4703   coding->produced = dst - coding->destination;
4704   return 0;
4705 }
4706
4707 \f
4708 /*** 10. CCL handlers ***/
4709
4710 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4711    Check if a text is encoded in a coding system of which
4712    encoder/decoder are written in CCL program.  If it is, return
4713    CATEGORY_MASK_CCL, else return 0.  */
4714
4715 static int
4716 detect_coding_ccl (coding, detect_info)
4717      struct coding_system *coding;
4718      struct coding_detection_info *detect_info;
4719 {
4720   const unsigned char *src = coding->source, *src_base;
4721   const unsigned char *src_end = coding->source + coding->src_bytes;
4722   int multibytep = coding->src_multibyte;
4723   int consumed_chars = 0;
4724   int found = 0;
4725   unsigned char *valids;
4726   int head_ascii = coding->head_ascii;
4727   Lisp_Object attrs;
4728
4729   detect_info->checked |= CATEGORY_MASK_CCL;
4730
4731   coding = &coding_categories[coding_category_ccl];
4732   valids = CODING_CCL_VALIDS (coding);
4733   attrs = CODING_ID_ATTRS (coding->id);
4734   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4735     src += head_ascii;
4736
4737   while (1)
4738     {
4739       int c;
4740
4741       src_base = src;
4742       ONE_MORE_BYTE (c);
4743       if (c < 0 || ! valids[c])
4744         break;
4745       if ((valids[c] > 1))
4746         found = CATEGORY_MASK_CCL;
4747     }
4748   detect_info->rejected |= CATEGORY_MASK_CCL;
4749   return 0;
4750
4751  no_more_source:
4752   detect_info->found |= found;
4753   return 1;
4754 }
4755
4756 static void
4757 decode_coding_ccl (coding)
4758      struct coding_system *coding;
4759 {
4760   const unsigned char *src = coding->source + coding->consumed;
4761   const unsigned char *src_end = coding->source + coding->src_bytes;
4762   int *charbuf = coding->charbuf + coding->charbuf_used;
4763   int *charbuf_end = coding->charbuf + coding->charbuf_size;
4764   int consumed_chars = 0;
4765   int multibytep = coding->src_multibyte;
4766   struct ccl_program ccl;
4767   int source_charbuf[1024];
4768   int source_byteidx[1024];
4769   Lisp_Object attrs, charset_list;
4770
4771   CODING_GET_INFO (coding, attrs, charset_list);
4772   setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4773
4774   while (src < src_end)
4775     {
4776       const unsigned char *p = src;
4777       int *source, *source_end;
4778       int i = 0;
4779
4780       if (multibytep)
4781         while (i < 1024 && p < src_end)
4782           {
4783             source_byteidx[i] = p - src;
4784             source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4785           }
4786       else
4787         while (i < 1024 && p < src_end)
4788           source_charbuf[i++] = *p++;
4789
4790       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4791         ccl.last_block = 1;
4792
4793       source = source_charbuf;
4794       source_end = source + i;
4795       while (source < source_end)
4796         {
4797           ccl_driver (&ccl, source, charbuf,
4798                       source_end - source, charbuf_end - charbuf,
4799                       charset_list);
4800           source += ccl.consumed;
4801           charbuf += ccl.produced;
4802           if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4803             break;
4804         }
4805       if (source < source_end)
4806         src += source_byteidx[source - source_charbuf];
4807       else
4808         src = p;
4809       consumed_chars += source - source_charbuf;
4810
4811       if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4812           && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4813         break;
4814     }
4815
4816   switch (ccl.status)
4817     {
4818     case CCL_STAT_SUSPEND_BY_SRC:
4819       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4820       break;
4821     case CCL_STAT_SUSPEND_BY_DST:
4822       break;
4823     case CCL_STAT_QUIT:
4824     case CCL_STAT_INVALID_CMD:
4825       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
4826       break;
4827     default:
4828       record_conversion_result (coding, CODING_RESULT_SUCCESS);
4829       break;
4830     }
4831   coding->consumed_char += consumed_chars;
4832   coding->consumed = src - coding->source;
4833   coding->charbuf_used = charbuf - coding->charbuf;
4834 }
4835
4836 static int
4837 encode_coding_ccl (coding)
4838      struct coding_system *coding;
4839 {
4840   struct ccl_program ccl;
4841   int multibytep = coding->dst_multibyte;
4842   int *charbuf = coding->charbuf;
4843   int *charbuf_end = charbuf + coding->charbuf_used;
4844   unsigned char *dst = coding->destination + coding->produced;
4845   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4846   int destination_charbuf[1024];
4847   int i, produced_chars = 0;
4848   Lisp_Object attrs, charset_list;
4849
4850   CODING_GET_INFO (coding, attrs, charset_list);
4851   setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4852
4853   ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4854   ccl.dst_multibyte = coding->dst_multibyte;
4855
4856   while (charbuf < charbuf_end)
4857     {
4858       ccl_driver (&ccl, charbuf, destination_charbuf,
4859                   charbuf_end - charbuf, 1024, charset_list);
4860       if (multibytep)
4861         {
4862           ASSURE_DESTINATION (ccl.produced * 2);
4863           for (i = 0; i < ccl.produced; i++)
4864             EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4865         }
4866       else
4867         {
4868           ASSURE_DESTINATION (ccl.produced);
4869           for (i = 0; i < ccl.produced; i++)
4870             *dst++ = destination_charbuf[i] & 0xFF;
4871           produced_chars += ccl.produced;
4872         }
4873       charbuf += ccl.consumed;
4874       if (ccl.status == CCL_STAT_QUIT
4875           || ccl.status == CCL_STAT_INVALID_CMD)
4876         break;
4877     }
4878
4879   switch (ccl.status)
4880     {
4881     case CCL_STAT_SUSPEND_BY_SRC:
4882       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4883       break;
4884     case CCL_STAT_SUSPEND_BY_DST:
4885       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
4886       break;
4887     case CCL_STAT_QUIT:
4888     case CCL_STAT_INVALID_CMD:
4889       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
4890       break;
4891     default:
4892       record_conversion_result (coding, CODING_RESULT_SUCCESS);
4893       break;
4894     }
4895
4896   coding->produced_char += produced_chars;
4897   coding->produced = dst - coding->destination;
4898   return 0;
4899 }
4900
4901
4902 \f
4903 /*** 10, 11. no-conversion handlers ***/
4904
4905 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
4906
4907 static void
4908 decode_coding_raw_text (coding)
4909      struct coding_system *coding;
4910 {
4911   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4912
4913   coding->chars_at_source = 1;
4914   coding->consumed_char = coding->src_chars;
4915   coding->consumed = coding->src_bytes;
4916   if (eol_crlf && coding->source[coding->src_bytes - 1] == '\r')
4917     {
4918       coding->consumed_char--;
4919       coding->consumed--;
4920       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4921     }
4922   else
4923     record_conversion_result (coding, CODING_RESULT_SUCCESS);
4924 }
4925
4926 static int
4927 encode_coding_raw_text (coding)
4928      struct coding_system *coding;
4929 {
4930   int multibytep = coding->dst_multibyte;
4931   int *charbuf = coding->charbuf;
4932   int *charbuf_end = coding->charbuf + coding->charbuf_used;
4933   unsigned char *dst = coding->destination + coding->produced;
4934   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4935   int produced_chars = 0;
4936   int c;
4937
4938   if (multibytep)
4939     {
4940       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4941
4942       if (coding->src_multibyte)
4943         while (charbuf < charbuf_end)
4944           {
4945             ASSURE_DESTINATION (safe_room);
4946             c = *charbuf++;
4947             if (ASCII_CHAR_P (c))
4948               EMIT_ONE_ASCII_BYTE (c);
4949             else if (CHAR_BYTE8_P (c))
4950               {
4951                 c = CHAR_TO_BYTE8 (c);
4952                 EMIT_ONE_BYTE (c);
4953               }
4954             else
4955               {
4956                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
4957
4958                 CHAR_STRING_ADVANCE (c, p1);
4959                 while (p0 < p1)
4960                   {
4961                     EMIT_ONE_BYTE (*p0);
4962                     p0++;
4963                   }
4964               }
4965           }
4966       else
4967         while (charbuf < charbuf_end)
4968           {
4969             ASSURE_DESTINATION (safe_room);
4970             c = *charbuf++;
4971             EMIT_ONE_BYTE (c);
4972           }
4973     }
4974   else
4975     {
4976       if (coding->src_multibyte)
4977         {
4978           int safe_room = MAX_MULTIBYTE_LENGTH;
4979
4980           while (charbuf < charbuf_end)
4981             {
4982               ASSURE_DESTINATION (safe_room);
4983               c = *charbuf++;
4984               if (ASCII_CHAR_P (c))
4985                 *dst++ = c;
4986               else if (CHAR_BYTE8_P (c))
4987                 *dst++ = CHAR_TO_BYTE8 (c);
4988               else
4989                 CHAR_STRING_ADVANCE (c, dst);
4990             }
4991         }
4992       else
4993         {
4994           ASSURE_DESTINATION (charbuf_end - charbuf);
4995           while (charbuf < charbuf_end && dst < dst_end)
4996             *dst++ = *charbuf++;
4997         }
4998       produced_chars = dst - (coding->destination + coding->produced);
4999     }
5000   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5001   coding->produced_char += produced_chars;
5002   coding->produced = dst - coding->destination;
5003   return 0;
5004 }
5005
5006 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5007    Check if a text is encoded in a charset-based coding system.  If it
5008    is, return 1, else return 0.  */
5009
5010 static int
5011 detect_coding_charset (coding, detect_info)
5012      struct coding_system *coding;
5013      struct coding_detection_info *detect_info;
5014 {
5015   const unsigned char *src = coding->source, *src_base;
5016   const unsigned char *src_end = coding->source + coding->src_bytes;
5017   int multibytep = coding->src_multibyte;
5018   int consumed_chars = 0;
5019   Lisp_Object attrs, valids, name;
5020   int found = 0;
5021   int head_ascii = coding->head_ascii;
5022   int check_latin_extra = 0;
5023
5024   detect_info->checked |= CATEGORY_MASK_CHARSET;
5025
5026   coding = &coding_categories[coding_category_charset];
5027   attrs = CODING_ID_ATTRS (coding->id);
5028   valids = AREF (attrs, coding_attr_charset_valids);
5029   name = CODING_ID_NAME (coding->id);
5030   if (VECTORP (Vlatin_extra_code_table)
5031       && strcmp ((char *) SDATA (SYMBOL_NAME (name)), "iso-8859-"))
5032     check_latin_extra = 1;
5033   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5034     src += head_ascii;
5035
5036   while (1)
5037     {
5038       int c;
5039       Lisp_Object val;
5040       struct charset *charset;
5041       int dim, idx;
5042
5043       src_base = src;
5044       ONE_MORE_BYTE (c);
5045       if (c < 0)
5046         continue;
5047       val = AREF (valids, c);
5048       if (NILP (val))
5049         break;
5050       if (c >= 0x80)
5051         {
5052           if (c < 0xA0
5053               && check_latin_extra
5054               && NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
5055             break;
5056           found = CATEGORY_MASK_CHARSET;
5057         }
5058       if (INTEGERP (val))
5059         {
5060           charset = CHARSET_FROM_ID (XFASTINT (val));
5061           dim = CHARSET_DIMENSION (charset);
5062           for (idx = 1; idx < dim; idx++)
5063             {
5064               if (src == src_end)
5065                 goto too_short;
5066               ONE_MORE_BYTE (c);
5067               if (c < charset->code_space[(dim - 1 - idx) * 2]
5068                   || c > charset->code_space[(dim - 1 - idx) * 2 + 1])
5069                 break;
5070             }
5071           if (idx < dim)
5072             break;
5073         }
5074       else
5075         {
5076           idx = 1;
5077           for (; CONSP (val); val = XCDR (val))
5078             {
5079               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5080               dim = CHARSET_DIMENSION (charset);
5081               while (idx < dim)
5082                 {
5083                   if (src == src_end)
5084                     goto too_short;
5085                   ONE_MORE_BYTE (c);
5086                   if (c < charset->code_space[(dim - 1 - idx) * 4]
5087                       || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
5088                     break;
5089                   idx++;
5090                 }
5091               if (idx == dim)
5092                 {
5093                   val = Qnil;
5094                   break;
5095                 }
5096             }
5097           if (CONSP (val))
5098             break;
5099         }
5100     }
5101  too_short:
5102   detect_info->rejected |= CATEGORY_MASK_CHARSET;
5103   return 0;
5104
5105  no_more_source:
5106   detect_info->found |= found;
5107   return 1;
5108 }
5109
5110 static void
5111 decode_coding_charset (coding)
5112      struct coding_system *coding;
5113 {
5114   const unsigned char *src = coding->source + coding->consumed;
5115   const unsigned char *src_end = coding->source + coding->src_bytes;
5116   const unsigned char *src_base;
5117   int *charbuf = coding->charbuf + coding->charbuf_used;
5118   int *charbuf_end
5119     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
5120   int consumed_chars = 0, consumed_chars_base;
5121   int multibytep = coding->src_multibyte;
5122   Lisp_Object attrs, charset_list, valids;
5123   int char_offset = coding->produced_char;
5124   int last_offset = char_offset;
5125   int last_id = charset_ascii;
5126   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5127   int byte_after_cr = -1;
5128
5129   CODING_GET_INFO (coding, attrs, charset_list);
5130   valids = AREF (attrs, coding_attr_charset_valids);
5131
5132   while (1)
5133     {
5134       int c;
5135       Lisp_Object val;
5136       struct charset *charset;
5137       int dim;
5138       int len = 1;
5139       unsigned code;
5140
5141       src_base = src;
5142       consumed_chars_base = consumed_chars;
5143
5144       if (charbuf >= charbuf_end)
5145         break;
5146
5147       if (byte_after_cr >= 0)
5148         {
5149           c = byte_after_cr;
5150           byte_after_cr = -1;
5151         }
5152       else
5153         {
5154           ONE_MORE_BYTE (c);
5155           if (eol_crlf && c == '\r')
5156             ONE_MORE_BYTE (byte_after_cr);
5157         }
5158       if (c < 0)
5159         goto invalid_code;
5160       code = c;
5161
5162       val = AREF (valids, c);
5163       if (NILP (val))
5164         goto invalid_code;
5165       if (INTEGERP (val))
5166         {
5167           charset = CHARSET_FROM_ID (XFASTINT (val));
5168           dim = CHARSET_DIMENSION (charset);
5169           while (len < dim)
5170             {
5171               ONE_MORE_BYTE (c);
5172               code = (code << 8) | c;
5173               len++;
5174             }
5175           CODING_DECODE_CHAR (coding, src, src_base, src_end,
5176                               charset, code, c);
5177         }
5178       else
5179         {
5180           /* VAL is a list of charset IDs.  It is assured that the
5181              list is sorted by charset dimensions (smaller one
5182              comes first).  */
5183           while (CONSP (val))
5184             {
5185               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5186               dim = CHARSET_DIMENSION (charset);
5187               while (len < dim)
5188                 {
5189                   ONE_MORE_BYTE (c);
5190                   code = (code << 8) | c;
5191                   len++;
5192                 }
5193               CODING_DECODE_CHAR (coding, src, src_base,
5194                                   src_end, charset, code, c);
5195               if (c >= 0)
5196                 break;
5197               val = XCDR (val);
5198             }
5199         }
5200       if (c < 0)
5201         goto invalid_code;
5202       if (charset->id != charset_ascii
5203           && last_id != charset->id)
5204         {
5205           if (last_id != charset_ascii)
5206             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5207           last_id = charset->id;
5208           last_offset = char_offset;
5209         }
5210
5211       *charbuf++ = c;
5212       char_offset++;
5213       continue;
5214
5215     invalid_code:
5216       src = src_base;
5217       consumed_chars = consumed_chars_base;
5218       ONE_MORE_BYTE (c);
5219       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
5220       char_offset++;
5221       coding->errors++;
5222     }
5223
5224  no_more_source:
5225   if (last_id != charset_ascii)
5226     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5227   coding->consumed_char += consumed_chars_base;
5228   coding->consumed = src_base - coding->source;
5229   coding->charbuf_used = charbuf - coding->charbuf;
5230 }
5231
5232 static int
5233 encode_coding_charset (coding)
5234      struct coding_system *coding;
5235 {
5236   int multibytep = coding->dst_multibyte;
5237   int *charbuf = coding->charbuf;
5238   int *charbuf_end = charbuf + coding->charbuf_used;
5239   unsigned char *dst = coding->destination + coding->produced;
5240   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5241   int safe_room = MAX_MULTIBYTE_LENGTH;
5242   int produced_chars = 0;
5243   Lisp_Object attrs, charset_list;
5244   int ascii_compatible;
5245   int c;
5246
5247   CODING_GET_INFO (coding, attrs, charset_list);
5248   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5249
5250   while (charbuf < charbuf_end)
5251     {
5252       struct charset *charset;
5253       unsigned code;
5254
5255       ASSURE_DESTINATION (safe_room);
5256       c = *charbuf++;
5257       if (ascii_compatible && ASCII_CHAR_P (c))
5258         EMIT_ONE_ASCII_BYTE (c);
5259       else if (CHAR_BYTE8_P (c))
5260         {
5261           c = CHAR_TO_BYTE8 (c);
5262           EMIT_ONE_BYTE (c);
5263         }
5264       else
5265         {
5266           charset = char_charset (c, charset_list, &code);
5267           if (charset)
5268             {
5269               if (CHARSET_DIMENSION (charset) == 1)
5270                 EMIT_ONE_BYTE (code);
5271               else if (CHARSET_DIMENSION (charset) == 2)
5272                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
5273               else if (CHARSET_DIMENSION (charset) == 3)
5274                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
5275               else
5276                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
5277                                  (code >> 8) & 0xFF, code & 0xFF);
5278             }
5279           else
5280             {
5281               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5282                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5283               else
5284                 c = coding->default_char;
5285               EMIT_ONE_BYTE (c);
5286             }
5287         }
5288     }
5289
5290   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5291   coding->produced_char += produced_chars;
5292   coding->produced = dst - coding->destination;
5293   return 0;
5294 }
5295
5296 \f
5297 /*** 7. C library functions ***/
5298
5299 /* Setup coding context CODING from information about CODING_SYSTEM.
5300    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
5301    CODING_SYSTEM is invalid, signal an error.  */
5302
5303 void
5304 setup_coding_system (coding_system, coding)
5305      Lisp_Object coding_system;
5306      struct coding_system *coding;
5307 {
5308   Lisp_Object attrs;
5309   Lisp_Object eol_type;
5310   Lisp_Object coding_type;
5311   Lisp_Object val;
5312
5313   if (NILP (coding_system))
5314     coding_system = Qundecided;
5315
5316   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
5317
5318   attrs = CODING_ID_ATTRS (coding->id);
5319   eol_type = CODING_ID_EOL_TYPE (coding->id);
5320
5321   coding->mode = 0;
5322   coding->head_ascii = -1;
5323   if (VECTORP (eol_type))
5324     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5325                             | CODING_REQUIRE_DETECTION_MASK);
5326   else if (! EQ (eol_type, Qunix))
5327     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5328                             | CODING_REQUIRE_ENCODING_MASK);
5329   else
5330     coding->common_flags = 0;
5331   if (! NILP (CODING_ATTR_POST_READ (attrs)))
5332     coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5333   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5334     coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5335   if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
5336     coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
5337
5338   val = CODING_ATTR_SAFE_CHARSETS (attrs);
5339   coding->max_charset_id = SCHARS (val) - 1;
5340   coding->safe_charsets = (char *) SDATA (val);
5341   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
5342
5343   coding_type = CODING_ATTR_TYPE (attrs);
5344   if (EQ (coding_type, Qundecided))
5345     {
5346       coding->detector = NULL;
5347       coding->decoder = decode_coding_raw_text;
5348       coding->encoder = encode_coding_raw_text;
5349       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5350     }
5351   else if (EQ (coding_type, Qiso_2022))
5352     {
5353       int i;
5354       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5355
5356       /* Invoke graphic register 0 to plane 0.  */
5357       CODING_ISO_INVOCATION (coding, 0) = 0;
5358       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
5359       CODING_ISO_INVOCATION (coding, 1)
5360         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
5361       /* Setup the initial status of designation.  */
5362       for (i = 0; i < 4; i++)
5363         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
5364       /* Not single shifting initially.  */
5365       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
5366       /* Beginning of buffer should also be regarded as bol. */
5367       CODING_ISO_BOL (coding) = 1;
5368       coding->detector = detect_coding_iso_2022;
5369       coding->decoder = decode_coding_iso_2022;
5370       coding->encoder = encode_coding_iso_2022;
5371       if (flags & CODING_ISO_FLAG_SAFE)
5372         coding->mode |= CODING_MODE_SAFE_ENCODING;
5373       coding->common_flags
5374         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5375             | CODING_REQUIRE_FLUSHING_MASK);
5376       if (flags & CODING_ISO_FLAG_COMPOSITION)
5377         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
5378       if (flags & CODING_ISO_FLAG_DESIGNATION)
5379         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
5380       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5381         {
5382           setup_iso_safe_charsets (attrs);
5383           val = CODING_ATTR_SAFE_CHARSETS (attrs);
5384           coding->max_charset_id = SCHARS (val) - 1;
5385           coding->safe_charsets = (char *) SDATA (val);
5386         }
5387       CODING_ISO_FLAGS (coding) = flags;
5388     }
5389   else if (EQ (coding_type, Qcharset))
5390     {
5391       coding->detector = detect_coding_charset;
5392       coding->decoder = decode_coding_charset;
5393       coding->encoder = encode_coding_charset;
5394       coding->common_flags
5395         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5396     }
5397   else if (EQ (coding_type, Qutf_8))
5398     {
5399       val = AREF (attrs, coding_attr_utf_bom);
5400       CODING_UTF_8_BOM (coding) = (CONSP (val) ? utf_detect_bom
5401                                    : EQ (val, Qt) ? utf_with_bom
5402                                    : utf_without_bom);
5403       coding->detector = detect_coding_utf_8;
5404       coding->decoder = decode_coding_utf_8;
5405       coding->encoder = encode_coding_utf_8;
5406       coding->common_flags
5407         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5408       if (CODING_UTF_8_BOM (coding) == utf_detect_bom)
5409         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5410     }
5411   else if (EQ (coding_type, Qutf_16))
5412     {
5413       val = AREF (attrs, coding_attr_utf_bom);
5414       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_detect_bom
5415                                     : EQ (val, Qt) ? utf_with_bom
5416                                     : utf_without_bom);
5417       val = AREF (attrs, coding_attr_utf_16_endian);
5418       CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
5419                                        : utf_16_little_endian);
5420       CODING_UTF_16_SURROGATE (coding) = 0;
5421       coding->detector = detect_coding_utf_16;
5422       coding->decoder = decode_coding_utf_16;
5423       coding->encoder = encode_coding_utf_16;
5424       coding->common_flags
5425         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5426       if (CODING_UTF_16_BOM (coding) == utf_detect_bom)
5427         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5428     }
5429   else if (EQ (coding_type, Qccl))
5430     {
5431       coding->detector = detect_coding_ccl;
5432       coding->decoder = decode_coding_ccl;
5433       coding->encoder = encode_coding_ccl;
5434       coding->common_flags
5435         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5436             | CODING_REQUIRE_FLUSHING_MASK);
5437     }
5438   else if (EQ (coding_type, Qemacs_mule))
5439     {
5440       coding->detector = detect_coding_emacs_mule;
5441       coding->decoder = decode_coding_emacs_mule;
5442       coding->encoder = encode_coding_emacs_mule;
5443       coding->common_flags
5444         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5445       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5446           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5447         {
5448           Lisp_Object tail, safe_charsets;
5449           int max_charset_id = 0;
5450
5451           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5452                tail = XCDR (tail))
5453             if (max_charset_id < XFASTINT (XCAR (tail)))
5454               max_charset_id = XFASTINT (XCAR (tail));
5455           safe_charsets = Fmake_string (make_number (max_charset_id + 1),
5456                                         make_number (255));
5457           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5458                tail = XCDR (tail))
5459             SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
5460           coding->max_charset_id = max_charset_id;
5461           coding->safe_charsets = (char *) SDATA (safe_charsets);
5462         }
5463     }
5464   else if (EQ (coding_type, Qshift_jis))
5465     {
5466       coding->detector = detect_coding_sjis;
5467       coding->decoder = decode_coding_sjis;
5468       coding->encoder = encode_coding_sjis;
5469       coding->common_flags
5470         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5471     }
5472   else if (EQ (coding_type, Qbig5))
5473     {
5474       coding->detector = detect_coding_big5;
5475       coding->decoder = decode_coding_big5;
5476       coding->encoder = encode_coding_big5;
5477       coding->common_flags
5478         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5479     }
5480   else                          /* EQ (coding_type, Qraw_text) */
5481     {
5482       coding->detector = NULL;
5483       coding->decoder = decode_coding_raw_text;
5484       coding->encoder = encode_coding_raw_text;
5485       if (! EQ (eol_type, Qunix))
5486         {
5487           coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5488           if (! VECTORP (eol_type))
5489             coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5490         }
5491
5492     }
5493
5494   return;
5495 }
5496
5497 /* Return a list of charsets supported by CODING.  */
5498
5499 Lisp_Object
5500 coding_charset_list (coding)
5501      struct coding_system *coding;
5502 {
5503   Lisp_Object attrs, charset_list;
5504
5505   CODING_GET_INFO (coding, attrs, charset_list);
5506   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5507     {
5508       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5509
5510       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5511         charset_list = Viso_2022_charset_list;
5512     }
5513   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5514     {
5515       charset_list = Vemacs_mule_charset_list;
5516     }
5517   return charset_list;
5518 }
5519
5520
5521 /* Return raw-text or one of its subsidiaries that has the same
5522    eol_type as CODING-SYSTEM.  */
5523
5524 Lisp_Object
5525 raw_text_coding_system (coding_system)
5526      Lisp_Object coding_system;
5527 {
5528   Lisp_Object spec, attrs;
5529   Lisp_Object eol_type, raw_text_eol_type;
5530
5531   if (NILP (coding_system))
5532     return Qraw_text;
5533   spec = CODING_SYSTEM_SPEC (coding_system);
5534   attrs = AREF (spec, 0);
5535
5536   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
5537     return coding_system;
5538
5539   eol_type = AREF (spec, 2);
5540   if (VECTORP (eol_type))
5541     return Qraw_text;
5542   spec = CODING_SYSTEM_SPEC (Qraw_text);
5543   raw_text_eol_type = AREF (spec, 2);
5544   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
5545           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
5546           : AREF (raw_text_eol_type, 2));
5547 }
5548
5549
5550 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
5551    does, return one of the subsidiary that has the same eol-spec as
5552    PARENT.  Otherwise, return CODING_SYSTEM.  If PARENT is nil,
5553    inherit end-of-line format from the system's setting
5554    (system_eol_type).  */
5555
5556 Lisp_Object
5557 coding_inherit_eol_type (coding_system, parent)
5558      Lisp_Object coding_system, parent;
5559 {
5560   Lisp_Object spec, eol_type;
5561
5562   if (NILP (coding_system))
5563     coding_system = Qraw_text;
5564   spec = CODING_SYSTEM_SPEC (coding_system);
5565   eol_type = AREF (spec, 2);
5566   if (VECTORP (eol_type))
5567     {
5568       Lisp_Object parent_eol_type;
5569
5570       if (! NILP (parent))
5571         {
5572           Lisp_Object parent_spec;
5573
5574           parent_spec = CODING_SYSTEM_SPEC (parent);
5575           parent_eol_type = AREF (parent_spec, 2);
5576         }
5577       else
5578         parent_eol_type = system_eol_type;
5579       if (EQ (parent_eol_type, Qunix))
5580         coding_system = AREF (eol_type, 0);
5581       else if (EQ (parent_eol_type, Qdos))
5582         coding_system = AREF (eol_type, 1);
5583       else if (EQ (parent_eol_type, Qmac))
5584         coding_system = AREF (eol_type, 2);
5585     }
5586   return coding_system;
5587 }
5588
5589 /* Emacs has a mechanism to automatically detect a coding system if it
5590    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
5591    it's impossible to distinguish some coding systems accurately
5592    because they use the same range of codes.  So, at first, coding
5593    systems are categorized into 7, those are:
5594
5595    o coding-category-emacs-mule
5596
5597         The category for a coding system which has the same code range
5598         as Emacs' internal format.  Assigned the coding-system (Lisp
5599         symbol) `emacs-mule' by default.
5600
5601    o coding-category-sjis
5602
5603         The category for a coding system which has the same code range
5604         as SJIS.  Assigned the coding-system (Lisp
5605         symbol) `japanese-shift-jis' by default.
5606
5607    o coding-category-iso-7
5608
5609         The category for a coding system which has the same code range
5610         as ISO2022 of 7-bit environment.  This doesn't use any locking
5611         shift and single shift functions.  This can encode/decode all
5612         charsets.  Assigned the coding-system (Lisp symbol)
5613         `iso-2022-7bit' by default.
5614
5615    o coding-category-iso-7-tight
5616
5617         Same as coding-category-iso-7 except that this can
5618         encode/decode only the specified charsets.
5619
5620    o coding-category-iso-8-1
5621
5622         The category for a coding system which has the same code range
5623         as ISO2022 of 8-bit environment and graphic plane 1 used only
5624         for DIMENSION1 charset.  This doesn't use any locking shift
5625         and single shift functions.  Assigned the coding-system (Lisp
5626         symbol) `iso-latin-1' by default.
5627
5628    o coding-category-iso-8-2
5629
5630         The category for a coding system which has the same code range
5631         as ISO2022 of 8-bit environment and graphic plane 1 used only
5632         for DIMENSION2 charset.  This doesn't use any locking shift
5633         and single shift functions.  Assigned the coding-system (Lisp
5634         symbol) `japanese-iso-8bit' by default.
5635
5636    o coding-category-iso-7-else
5637
5638         The category for a coding system which has the same code range
5639         as ISO2022 of 7-bit environemnt but uses locking shift or
5640         single shift functions.  Assigned the coding-system (Lisp
5641         symbol) `iso-2022-7bit-lock' by default.
5642
5643    o coding-category-iso-8-else
5644
5645         The category for a coding system which has the same code range
5646         as ISO2022 of 8-bit environemnt but uses locking shift or
5647         single shift functions.  Assigned the coding-system (Lisp
5648         symbol) `iso-2022-8bit-ss2' by default.
5649
5650    o coding-category-big5
5651
5652         The category for a coding system which has the same code range
5653         as BIG5.  Assigned the coding-system (Lisp symbol)
5654         `cn-big5' by default.
5655
5656    o coding-category-utf-8
5657
5658         The category for a coding system which has the same code range
5659         as UTF-8 (cf. RFC3629).  Assigned the coding-system (Lisp
5660         symbol) `utf-8' by default.
5661
5662    o coding-category-utf-16-be
5663
5664         The category for a coding system in which a text has an
5665         Unicode signature (cf. Unicode Standard) in the order of BIG
5666         endian at the head.  Assigned the coding-system (Lisp symbol)
5667         `utf-16-be' by default.
5668
5669    o coding-category-utf-16-le
5670
5671         The category for a coding system in which a text has an
5672         Unicode signature (cf. Unicode Standard) in the order of
5673         LITTLE endian at the head.  Assigned the coding-system (Lisp
5674         symbol) `utf-16-le' by default.
5675
5676    o coding-category-ccl
5677
5678         The category for a coding system of which encoder/decoder is
5679         written in CCL programs.  The default value is nil, i.e., no
5680         coding system is assigned.
5681
5682    o coding-category-binary
5683
5684         The category for a coding system not categorized in any of the
5685         above.  Assigned the coding-system (Lisp symbol)
5686         `no-conversion' by default.
5687
5688    Each of them is a Lisp symbol and the value is an actual
5689    `coding-system's (this is also a Lisp symbol) assigned by a user.
5690    What Emacs does actually is to detect a category of coding system.
5691    Then, it uses a `coding-system' assigned to it.  If Emacs can't
5692    decide only one possible category, it selects a category of the
5693    highest priority.  Priorities of categories are also specified by a
5694    user in a Lisp variable `coding-category-list'.
5695
5696 */
5697
5698 #define EOL_SEEN_NONE   0
5699 #define EOL_SEEN_LF     1
5700 #define EOL_SEEN_CR     2
5701 #define EOL_SEEN_CRLF   4
5702
5703 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
5704    SOURCE is encoded.  If CATEGORY is one of
5705    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5706    two-byte, else they are encoded by one-byte.
5707
5708    Return one of EOL_SEEN_XXX.  */
5709
5710 #define MAX_EOL_CHECK_COUNT 3
5711
5712 static int
5713 detect_eol (source, src_bytes, category)
5714      const unsigned char *source;
5715      EMACS_INT src_bytes;
5716      enum coding_category category;
5717 {
5718   const unsigned char *src = source, *src_end = src + src_bytes;
5719   unsigned char c;
5720   int total  = 0;
5721   int eol_seen = EOL_SEEN_NONE;
5722
5723   if ((1 << category) & CATEGORY_MASK_UTF_16)
5724     {
5725       int msb, lsb;
5726
5727       msb = category == (coding_category_utf_16_le
5728                          | coding_category_utf_16_le_nosig);
5729       lsb = 1 - msb;
5730
5731       while (src + 1 < src_end)
5732         {
5733           c = src[lsb];
5734           if (src[msb] == 0 && (c == '\n' || c == '\r'))
5735             {
5736               int this_eol;
5737
5738               if (c == '\n')
5739                 this_eol = EOL_SEEN_LF;
5740               else if (src + 3 >= src_end
5741                        || src[msb + 2] != 0
5742                        || src[lsb + 2] != '\n')
5743                 this_eol = EOL_SEEN_CR;
5744               else
5745                 this_eol = EOL_SEEN_CRLF;
5746
5747               if (eol_seen == EOL_SEEN_NONE)
5748                 /* This is the first end-of-line.  */
5749                 eol_seen = this_eol;
5750               else if (eol_seen != this_eol)
5751                 {
5752                   /* The found type is different from what found before.  */
5753                   eol_seen = EOL_SEEN_LF;
5754                   break;
5755                 }
5756               if (++total == MAX_EOL_CHECK_COUNT)
5757                 break;
5758             }
5759           src += 2;
5760         }
5761     }
5762   else
5763     {
5764       while (src < src_end)
5765         {
5766           c = *src++;
5767           if (c == '\n' || c == '\r')
5768             {
5769               int this_eol;
5770
5771               if (c == '\n')
5772                 this_eol = EOL_SEEN_LF;
5773               else if (src >= src_end || *src != '\n')
5774                 this_eol = EOL_SEEN_CR;
5775               else
5776                 this_eol = EOL_SEEN_CRLF, src++;
5777
5778               if (eol_seen == EOL_SEEN_NONE)
5779                 /* This is the first end-of-line.  */
5780                 eol_seen = this_eol;
5781               else if (eol_seen != this_eol)
5782                 {
5783                   /* The found type is different from what found before.  */
5784                   eol_seen = EOL_SEEN_LF;
5785                   break;
5786                 }
5787               if (++total == MAX_EOL_CHECK_COUNT)
5788                 break;
5789             }
5790         }
5791     }
5792   return eol_seen;
5793 }
5794
5795
5796 static Lisp_Object
5797 adjust_coding_eol_type (coding, eol_seen)
5798      struct coding_system *coding;
5799      int eol_seen;
5800 {
5801   Lisp_Object eol_type;
5802
5803   eol_type = CODING_ID_EOL_TYPE (coding->id);
5804   if (eol_seen & EOL_SEEN_LF)
5805     {
5806       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
5807       eol_type = Qunix;
5808     }
5809   else if (eol_seen & EOL_SEEN_CRLF)
5810     {
5811       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
5812       eol_type = Qdos;
5813     }
5814   else if (eol_seen & EOL_SEEN_CR)
5815     {
5816       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
5817       eol_type = Qmac;
5818     }
5819   return eol_type;
5820 }
5821
5822 /* Detect how a text specified in CODING is encoded.  If a coding
5823    system is detected, update fields of CODING by the detected coding
5824    system.  */
5825
5826 void
5827 detect_coding (coding)
5828      struct coding_system *coding;
5829 {
5830   const unsigned char *src, *src_end;
5831
5832   coding->consumed = coding->consumed_char = 0;
5833   coding->produced = coding->produced_char = 0;
5834   coding_set_source (coding);
5835
5836   src_end = coding->source + coding->src_bytes;
5837   coding->head_ascii = 0;
5838
5839   /* If we have not yet decided the text encoding type, detect it
5840      now.  */
5841   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
5842     {
5843       int c, i;
5844       struct coding_detection_info detect_info;
5845       int null_byte_found = 0, eight_bit_found = 0;
5846
5847       detect_info.checked = detect_info.found = detect_info.rejected = 0;
5848       for (src = coding->source; src < src_end; src++)
5849         {
5850           c = *src;
5851           if (c & 0x80)
5852             {
5853               eight_bit_found = 1;
5854               if (null_byte_found)
5855                 break;
5856             }
5857           else if (c < 0x20)
5858             {
5859               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
5860                   && ! inhibit_iso_escape_detection
5861                   && ! detect_info.checked)
5862                 {
5863                   if (detect_coding_iso_2022 (coding, &detect_info))
5864                     {
5865                       /* We have scanned the whole data.  */
5866                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
5867                         {
5868                           /* We didn't find an 8-bit code.  We may
5869                              have found a null-byte, but it's very
5870                              rare that a binary file confirm to
5871                              ISO-2022.  */
5872                           src = src_end;
5873                           coding->head_ascii = src - coding->source;
5874                         }
5875                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
5876                       break;
5877                     }
5878                 }
5879               else if (! c)
5880                 {
5881                   null_byte_found = 1;
5882                   if (eight_bit_found)
5883                     break;
5884                 }
5885               if (! eight_bit_found)
5886                 coding->head_ascii++;
5887             }
5888           else if (! eight_bit_found)
5889             coding->head_ascii++;
5890         }
5891
5892       if (null_byte_found || eight_bit_found
5893           || coding->head_ascii < coding->src_bytes
5894           || detect_info.found)
5895         {
5896           enum coding_category category;
5897           struct coding_system *this;
5898
5899           if (coding->head_ascii == coding->src_bytes)
5900             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
5901             for (i = 0; i < coding_category_raw_text; i++)
5902               {
5903                 category = coding_priorities[i];
5904                 this = coding_categories + category;
5905                 if (detect_info.found & (1 << category))
5906                   break;
5907               }
5908           else
5909             {
5910               if (null_byte_found)
5911                 {
5912                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
5913                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
5914                 }
5915               for (i = 0; i < coding_category_raw_text; i++)
5916                 {
5917                   category = coding_priorities[i];
5918                   this = coding_categories + category;
5919                   if (this->id < 0)
5920                     {
5921                       /* No coding system of this category is defined.  */
5922                       detect_info.rejected |= (1 << category);
5923                     }
5924                   else if (category >= coding_category_raw_text)
5925                     continue;
5926                   else if (detect_info.checked & (1 << category))
5927                     {
5928                       if (detect_info.found & (1 << category))
5929                         break;
5930                     }
5931                   else if ((*(this->detector)) (coding, &detect_info)
5932                            && detect_info.found & (1 << category))
5933                     {
5934                       if (category == coding_category_utf_16_auto)
5935                         {
5936                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5937                             category = coding_category_utf_16_le;
5938                           else
5939                             category = coding_category_utf_16_be;
5940                         }
5941                       break;
5942                     }
5943                 }
5944             }
5945
5946           if (i < coding_category_raw_text)
5947             setup_coding_system (CODING_ID_NAME (this->id), coding);
5948           else if (null_byte_found)
5949             setup_coding_system (Qno_conversion, coding);
5950           else if ((detect_info.rejected & CATEGORY_MASK_ANY)
5951                    == CATEGORY_MASK_ANY)
5952             setup_coding_system (Qraw_text, coding);
5953           else if (detect_info.rejected)
5954             for (i = 0; i < coding_category_raw_text; i++)
5955               if (! (detect_info.rejected & (1 << coding_priorities[i])))
5956                 {
5957                   this = coding_categories + coding_priorities[i];
5958                   setup_coding_system (CODING_ID_NAME (this->id), coding);
5959                   break;
5960                 }
5961         }
5962     }
5963   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
5964            == coding_category_utf_8_auto)
5965     {
5966       Lisp_Object coding_systems;
5967       struct coding_detection_info detect_info;
5968
5969       coding_systems
5970         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
5971       detect_info.found = detect_info.rejected = 0;
5972       coding->head_ascii = 0;
5973       if (CONSP (coding_systems)
5974           && detect_coding_utf_8 (coding, &detect_info))
5975         {
5976           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
5977             setup_coding_system (XCAR (coding_systems), coding);
5978           else
5979             setup_coding_system (XCDR (coding_systems), coding);
5980         }
5981     }
5982   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
5983            == coding_category_utf_16_auto)
5984     {
5985       Lisp_Object coding_systems;
5986       struct coding_detection_info detect_info;
5987
5988       coding_systems
5989         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
5990       detect_info.found = detect_info.rejected = 0;
5991       coding->head_ascii = 0;
5992       if (CONSP (coding_systems)
5993           && detect_coding_utf_16 (coding, &detect_info))
5994         {
5995           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5996             setup_coding_system (XCAR (coding_systems), coding);
5997           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
5998             setup_coding_system (XCDR (coding_systems), coding);
5999         }
6000     }
6001 }
6002
6003
6004 static void
6005 decode_eol (coding)
6006      struct coding_system *coding;
6007 {
6008   Lisp_Object eol_type;
6009   unsigned char *p, *pbeg, *pend;
6010
6011   eol_type = CODING_ID_EOL_TYPE (coding->id);
6012   if (EQ (eol_type, Qunix))
6013     return;
6014
6015   if (NILP (coding->dst_object))
6016     pbeg = coding->destination;
6017   else
6018     pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
6019   pend = pbeg + coding->produced;
6020
6021   if (VECTORP (eol_type))
6022     {
6023       int eol_seen = EOL_SEEN_NONE;
6024
6025       for (p = pbeg; p < pend; p++)
6026         {
6027           if (*p == '\n')
6028             eol_seen |= EOL_SEEN_LF;
6029           else if (*p == '\r')
6030             {
6031               if (p + 1 < pend && *(p + 1) == '\n')
6032                 {
6033                   eol_seen |= EOL_SEEN_CRLF;
6034                   p++;
6035                 }
6036               else
6037                 eol_seen |= EOL_SEEN_CR;
6038             }
6039         }
6040       if (eol_seen != EOL_SEEN_NONE
6041           && eol_seen != EOL_SEEN_LF
6042           && eol_seen != EOL_SEEN_CRLF
6043           && eol_seen != EOL_SEEN_CR)
6044         eol_seen = EOL_SEEN_LF;
6045       if (eol_seen != EOL_SEEN_NONE)
6046         eol_type = adjust_coding_eol_type (coding, eol_seen);
6047     }
6048
6049   if (EQ (eol_type, Qmac))
6050     {
6051       for (p = pbeg; p < pend; p++)
6052         if (*p == '\r')
6053           *p = '\n';
6054     }
6055   else if (EQ (eol_type, Qdos))
6056     {
6057       int n = 0;
6058
6059       if (NILP (coding->dst_object))
6060         {
6061           /* Start deleting '\r' from the tail to minimize the memory
6062              movement.  */
6063           for (p = pend - 2; p >= pbeg; p--)
6064             if (*p == '\r')
6065               {
6066                 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
6067                 n++;
6068               }
6069         }
6070       else
6071         {
6072           int pos_byte = coding->dst_pos_byte;
6073           int pos = coding->dst_pos;
6074           int pos_end = pos + coding->produced_char - 1;
6075
6076           while (pos < pos_end)
6077             {
6078               p = BYTE_POS_ADDR (pos_byte);
6079               if (*p == '\r' && p[1] == '\n')
6080                 {
6081                   del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
6082                   n++;
6083                   pos_end--;
6084                 }
6085               pos++;
6086               if (coding->dst_multibyte)
6087                 pos_byte += BYTES_BY_CHAR_HEAD (*p);
6088               else
6089                 pos_byte++;
6090             }
6091         }
6092       coding->produced -= n;
6093       coding->produced_char -= n;
6094     }
6095 }
6096
6097
6098 /* Return a translation table (or list of them) from coding system
6099    attribute vector ATTRS for encoding (ENCODEP is nonzero) or
6100    decoding (ENCODEP is zero). */
6101
6102 static Lisp_Object
6103 get_translation_table (attrs, encodep, max_lookup)
6104      Lisp_Object attrs;
6105      int encodep, *max_lookup;
6106 {
6107   Lisp_Object standard, translation_table;
6108   Lisp_Object val;
6109
6110   if (encodep)
6111     translation_table = CODING_ATTR_ENCODE_TBL (attrs),
6112       standard = Vstandard_translation_table_for_encode;
6113   else
6114     translation_table = CODING_ATTR_DECODE_TBL (attrs),
6115       standard = Vstandard_translation_table_for_decode;
6116   if (NILP (translation_table))
6117     translation_table = standard;
6118   else
6119     {
6120       if (SYMBOLP (translation_table))
6121         translation_table = Fget (translation_table, Qtranslation_table);
6122       else if (CONSP (translation_table))
6123         {
6124           translation_table = Fcopy_sequence (translation_table);
6125           for (val = translation_table; CONSP (val); val = XCDR (val))
6126             if (SYMBOLP (XCAR (val)))
6127               XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
6128         }
6129       if (CHAR_TABLE_P (standard))
6130         {
6131           if (CONSP (translation_table))
6132             translation_table = nconc2 (translation_table,
6133                                         Fcons (standard, Qnil));
6134           else
6135             translation_table = Fcons (translation_table,
6136                                        Fcons (standard, Qnil));
6137         }
6138     }
6139
6140   if (max_lookup)
6141     {
6142       *max_lookup = 1;
6143       if (CHAR_TABLE_P (translation_table)
6144           && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
6145         {
6146           val = XCHAR_TABLE (translation_table)->extras[1];
6147           if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6148             *max_lookup = XFASTINT (val);
6149         }
6150       else if (CONSP (translation_table))
6151         {
6152           Lisp_Object tail, val;
6153
6154           for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
6155             if (CHAR_TABLE_P (XCAR (tail))
6156                 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
6157               {
6158                 val = XCHAR_TABLE (XCAR (tail))->extras[1];
6159                 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6160                   *max_lookup = XFASTINT (val);
6161               }
6162         }
6163     }
6164   return translation_table;
6165 }
6166
6167 #define LOOKUP_TRANSLATION_TABLE(table, c, trans)               \
6168   do {                                                          \
6169     trans = Qnil;                                               \
6170     if (CHAR_TABLE_P (table))                                   \
6171       {                                                         \
6172         trans = CHAR_TABLE_REF (table, c);                      \
6173         if (CHARACTERP (trans))                                 \
6174           c = XFASTINT (trans), trans = Qnil;                   \
6175       }                                                         \
6176     else if (CONSP (table))                                     \
6177       {                                                         \
6178         Lisp_Object tail;                                       \
6179                                                                 \
6180         for (tail = table; CONSP (tail); tail = XCDR (tail))    \
6181           if (CHAR_TABLE_P (XCAR (tail)))                       \
6182             {                                                   \
6183               trans = CHAR_TABLE_REF (XCAR (tail), c);          \
6184               if (CHARACTERP (trans))                           \
6185                 c = XFASTINT (trans), trans = Qnil;             \
6186               else if (! NILP (trans))                          \
6187                 break;                                          \
6188             }                                                   \
6189       }                                                         \
6190   } while (0)
6191
6192
6193 static Lisp_Object
6194 get_translation (val, buf, buf_end, last_block, from_nchars, to_nchars)
6195      Lisp_Object val;
6196      int *buf, *buf_end;
6197      int last_block;
6198      int *from_nchars, *to_nchars;
6199 {
6200   /* VAL is TO or (([FROM-CHAR ...] .  TO) ...) where TO is TO-CHAR or
6201      [TO-CHAR ...].  */
6202   if (CONSP (val))
6203     {
6204       Lisp_Object from, tail;
6205       int i, len;
6206
6207       for (tail = val; CONSP (tail); tail = XCDR (tail))
6208         {
6209           val = XCAR (tail);
6210           from = XCAR (val);
6211           len = ASIZE (from);
6212           for (i = 0; i < len; i++)
6213             {
6214               if (buf + i == buf_end)
6215                 {
6216                   if (! last_block)
6217                     return Qt;
6218                   break;
6219                 }
6220               if (XINT (AREF (from, i)) != buf[i])
6221                 break;
6222             }
6223           if (i == len)
6224             {
6225               val = XCDR (val);
6226               *from_nchars = len;
6227               break;
6228             }
6229         }
6230       if (! CONSP (tail))
6231         return Qnil;
6232     }
6233   if (VECTORP (val))
6234     *buf = XINT (AREF (val, 0)), *to_nchars = ASIZE (val);
6235   else
6236     *buf = XINT (val);
6237   return val;
6238 }
6239
6240
6241 static int
6242 produce_chars (coding, translation_table, last_block)
6243      struct coding_system *coding;
6244      Lisp_Object translation_table;
6245      int last_block;
6246 {
6247   unsigned char *dst = coding->destination + coding->produced;
6248   unsigned char *dst_end = coding->destination + coding->dst_bytes;
6249   EMACS_INT produced;
6250   EMACS_INT produced_chars = 0;
6251   int carryover = 0;
6252
6253   if (! coding->chars_at_source)
6254     {
6255       /* Source characters are in coding->charbuf.  */
6256       int *buf = coding->charbuf;
6257       int *buf_end = buf + coding->charbuf_used;
6258
6259       if (EQ (coding->src_object, coding->dst_object))
6260         {
6261           coding_set_source (coding);
6262           dst_end = ((unsigned char *) coding->source) + coding->consumed;
6263         }
6264
6265       while (buf < buf_end)
6266         {
6267           int c = *buf, i;
6268
6269           if (c >= 0)
6270             {
6271               int from_nchars = 1, to_nchars = 1;
6272               Lisp_Object trans = Qnil;
6273
6274               LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6275               if (! NILP (trans))
6276                 {
6277                   trans = get_translation (trans, buf, buf_end, last_block,
6278                                            &from_nchars, &to_nchars);
6279                   if (EQ (trans, Qt))
6280                     break;
6281                   c = *buf;
6282                 }
6283
6284               if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
6285                 {
6286                   dst = alloc_destination (coding,
6287                                            buf_end - buf
6288                                            + MAX_MULTIBYTE_LENGTH * to_nchars,
6289                                            dst);
6290                   if (EQ (coding->src_object, coding->dst_object))
6291                     {
6292                       coding_set_source (coding);
6293                       dst_end = ((unsigned char *) coding->source) + coding->consumed;
6294                     }
6295                   else
6296                     dst_end = coding->destination + coding->dst_bytes;
6297                 }
6298
6299               for (i = 0; i < to_nchars; i++)
6300                 {
6301                   if (i > 0)
6302                     c = XINT (AREF (trans, i));
6303                   if (coding->dst_multibyte
6304                       || ! CHAR_BYTE8_P (c))
6305                     CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
6306                   else
6307                     *dst++ = CHAR_TO_BYTE8 (c);
6308                 }
6309               produced_chars += to_nchars;
6310               *buf++ = to_nchars;
6311               while (--from_nchars > 0)
6312                 *buf++ = 0;
6313             }
6314           else
6315             /* This is an annotation datum.  (-C) is the length.  */
6316             buf += -c;
6317         }
6318       carryover = buf_end - buf;
6319     }
6320   else
6321     {
6322       /* Source characters are at coding->source.  */
6323       const unsigned char *src = coding->source;
6324       const unsigned char *src_end = src + coding->consumed;
6325
6326       if (EQ (coding->dst_object, coding->src_object))
6327         dst_end = (unsigned char *) src;
6328       if (coding->src_multibyte != coding->dst_multibyte)
6329         {
6330           if (coding->src_multibyte)
6331             {
6332               int multibytep = 1;
6333               EMACS_INT consumed_chars;
6334
6335               while (1)
6336                 {
6337                   const unsigned char *src_base = src;
6338                   int c;
6339
6340                   ONE_MORE_BYTE (c);
6341                   if (dst == dst_end)
6342                     {
6343                       if (EQ (coding->src_object, coding->dst_object))
6344                         dst_end = (unsigned char *) src;
6345                       if (dst == dst_end)
6346                         {
6347                           EMACS_INT offset = src - coding->source;
6348
6349                           dst = alloc_destination (coding, src_end - src + 1,
6350                                                    dst);
6351                           dst_end = coding->destination + coding->dst_bytes;
6352                           coding_set_source (coding);
6353                           src = coding->source + offset;
6354                           src_end = coding->source + coding->src_bytes;
6355                           if (EQ (coding->src_object, coding->dst_object))
6356                             dst_end = (unsigned char *) src;
6357                         }
6358                     }
6359                   *dst++ = c;
6360                   produced_chars++;
6361                 }
6362             no_more_source:
6363               ;
6364             }
6365           else
6366             while (src < src_end)
6367               {
6368                 int multibytep = 1;
6369                 int c = *src++;
6370
6371                 if (dst >= dst_end - 1)
6372                   {
6373                     if (EQ (coding->src_object, coding->dst_object))
6374                       dst_end = (unsigned char *) src;
6375                     if (dst >= dst_end - 1)
6376                       {
6377                         EMACS_INT offset = src - coding->source;
6378                         EMACS_INT more_bytes;
6379
6380                         if (EQ (coding->src_object, coding->dst_object))
6381                           more_bytes = ((src_end - src) / 2) + 2;
6382                         else
6383                           more_bytes = src_end - src + 2;
6384                         dst = alloc_destination (coding, more_bytes, dst);
6385                         dst_end = coding->destination + coding->dst_bytes;
6386                         coding_set_source (coding);
6387                         src = coding->source + offset;
6388                         src_end = coding->source + coding->src_bytes;
6389                         if (EQ (coding->src_object, coding->dst_object))
6390                           dst_end = (unsigned char *) src;
6391                       }
6392                   }
6393                 EMIT_ONE_BYTE (c);
6394               }
6395         }
6396       else
6397         {
6398           if (!EQ (coding->src_object, coding->dst_object))
6399             {
6400               EMACS_INT require = coding->src_bytes - coding->dst_bytes;
6401
6402               if (require > 0)
6403                 {
6404                   EMACS_INT offset = src - coding->source;
6405
6406                   dst = alloc_destination (coding, require, dst);
6407                   coding_set_source (coding);
6408                   src = coding->source + offset;
6409                   src_end = coding->source + coding->src_bytes;
6410                 }
6411             }
6412           produced_chars = coding->consumed_char;
6413           while (src < src_end)
6414             *dst++ = *src++;
6415         }
6416     }
6417
6418   produced = dst - (coding->destination + coding->produced);
6419   if (BUFFERP (coding->dst_object) && produced_chars > 0)
6420     insert_from_gap (produced_chars, produced);
6421   coding->produced += produced;
6422   coding->produced_char += produced_chars;
6423   return carryover;
6424 }
6425
6426 /* Compose text in CODING->object according to the annotation data at
6427    CHARBUF.  CHARBUF is an array:
6428      [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
6429  */
6430
6431 static INLINE void
6432 produce_composition (coding, charbuf, pos)
6433      struct coding_system *coding;
6434      int *charbuf;
6435      EMACS_INT pos;
6436 {
6437   int len;
6438   EMACS_INT to;
6439   enum composition_method method;
6440   Lisp_Object components;
6441
6442   len = -charbuf[0];
6443   to = pos + charbuf[2];
6444   if (to <= pos)
6445     return;
6446   method = (enum composition_method) (charbuf[3]);
6447
6448   if (method == COMPOSITION_RELATIVE)
6449     components = Qnil;
6450   else if (method >= COMPOSITION_WITH_RULE
6451            && method <= COMPOSITION_WITH_RULE_ALTCHARS)
6452     {
6453       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
6454       int i;
6455
6456       len -= 4;
6457       charbuf += 4;
6458       for (i = 0; i < len; i++)
6459         {
6460           args[i] = make_number (charbuf[i]);
6461           if (charbuf[i] < 0)
6462             return;
6463         }
6464       components = (method == COMPOSITION_WITH_ALTCHARS
6465                     ? Fstring (len, args) : Fvector (len, args));
6466     }
6467   else
6468     return;
6469   compose_text (pos, to, components, Qnil, coding->dst_object);
6470 }
6471
6472
6473 /* Put `charset' property on text in CODING->object according to
6474    the annotation data at CHARBUF.  CHARBUF is an array:
6475      [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
6476  */
6477
6478 static INLINE void
6479 produce_charset (coding, charbuf, pos)
6480      struct coding_system *coding;
6481      int *charbuf;
6482      EMACS_INT pos;
6483 {
6484   EMACS_INT from = pos - charbuf[2];
6485   struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
6486
6487   Fput_text_property (make_number (from), make_number (pos),
6488                       Qcharset, CHARSET_NAME (charset),
6489                       coding->dst_object);
6490 }
6491
6492
6493 #define CHARBUF_SIZE 0x4000
6494
6495 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
6496   do {                                                                  \
6497     int size = CHARBUF_SIZE;;                                           \
6498                                                                         \
6499     coding->charbuf = NULL;                                             \
6500     while (size > 1024)                                                 \
6501       {                                                                 \
6502         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
6503         if (coding->charbuf)                                            \
6504           break;                                                        \
6505         size >>= 1;                                                     \
6506       }                                                                 \
6507     if (! coding->charbuf)                                              \
6508       {                                                                 \
6509         record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
6510         return coding->result;                                          \
6511       }                                                                 \
6512     coding->charbuf_size = size;                                        \
6513   } while (0)
6514
6515
6516 static void
6517 produce_annotation (coding, pos)
6518      struct coding_system *coding;
6519      EMACS_INT pos;
6520 {
6521   int *charbuf = coding->charbuf;
6522   int *charbuf_end = charbuf + coding->charbuf_used;
6523
6524   if (NILP (coding->dst_object))
6525     return;
6526
6527   while (charbuf < charbuf_end)
6528     {
6529       if (*charbuf >= 0)
6530         pos += *charbuf++;
6531       else
6532         {
6533           int len = -*charbuf;
6534           switch (charbuf[1])
6535             {
6536             case CODING_ANNOTATE_COMPOSITION_MASK:
6537               produce_composition (coding, charbuf, pos);
6538               break;
6539             case CODING_ANNOTATE_CHARSET_MASK:
6540               produce_charset (coding, charbuf, pos);
6541               break;
6542             default:
6543               abort ();
6544             }
6545           charbuf += len;
6546         }
6547     }
6548 }
6549
6550 /* Decode the data at CODING->src_object into CODING->dst_object.
6551    CODING->src_object is a buffer, a string, or nil.
6552    CODING->dst_object is a buffer.
6553
6554    If CODING->src_object is a buffer, it must be the current buffer.
6555    In this case, if CODING->src_pos is positive, it is a position of
6556    the source text in the buffer, otherwise, the source text is in the
6557    gap area of the buffer, and CODING->src_pos specifies the offset of
6558    the text from GPT (which must be the same as PT).  If this is the
6559    same buffer as CODING->dst_object, CODING->src_pos must be
6560    negative.
6561
6562    If CODING->src_object is a string, CODING->src_pos is an index to
6563    that string.
6564
6565    If CODING->src_object is nil, CODING->source must already point to
6566    the non-relocatable memory area.  In this case, CODING->src_pos is
6567    an offset from CODING->source.
6568
6569    The decoded data is inserted at the current point of the buffer
6570    CODING->dst_object.
6571 */
6572
6573 static int
6574 decode_coding (coding)
6575      struct coding_system *coding;
6576 {
6577   Lisp_Object attrs;
6578   Lisp_Object undo_list;
6579   Lisp_Object translation_table;
6580   int carryover;
6581   int i;
6582
6583   if (BUFFERP (coding->src_object)
6584       && coding->src_pos > 0
6585       && coding->src_pos < GPT
6586       && coding->src_pos + coding->src_chars > GPT)
6587     move_gap_both (coding->src_pos, coding->src_pos_byte);
6588
6589   undo_list = Qt;
6590   if (BUFFERP (coding->dst_object))
6591     {
6592       if (current_buffer != XBUFFER (coding->dst_object))
6593         set_buffer_internal (XBUFFER (coding->dst_object));
6594       if (GPT != PT)
6595         move_gap_both (PT, PT_BYTE);
6596       undo_list = current_buffer->undo_list;
6597       current_buffer->undo_list = Qt;
6598     }
6599
6600   coding->consumed = coding->consumed_char = 0;
6601   coding->produced = coding->produced_char = 0;
6602   coding->chars_at_source = 0;
6603   record_conversion_result (coding, CODING_RESULT_SUCCESS);
6604   coding->errors = 0;
6605
6606   ALLOC_CONVERSION_WORK_AREA (coding);
6607
6608   attrs = CODING_ID_ATTRS (coding->id);
6609   translation_table = get_translation_table (attrs, 0, NULL);
6610
6611   carryover = 0;
6612   do
6613     {
6614       EMACS_INT pos = coding->dst_pos + coding->produced_char;
6615
6616       coding_set_source (coding);
6617       coding->annotated = 0;
6618       coding->charbuf_used = carryover;
6619       (*(coding->decoder)) (coding);
6620       coding_set_destination (coding);
6621       carryover = produce_chars (coding, translation_table, 0);
6622       if (coding->annotated)
6623         produce_annotation (coding, pos);
6624       for (i = 0; i < carryover; i++)
6625         coding->charbuf[i]
6626           = coding->charbuf[coding->charbuf_used - carryover + i];
6627     }
6628   while (coding->consumed < coding->src_bytes
6629          && (coding->result == CODING_RESULT_SUCCESS
6630              || coding->result == CODING_RESULT_INVALID_SRC));
6631
6632   if (carryover > 0)
6633     {
6634       coding_set_destination (coding);
6635       coding->charbuf_used = carryover;
6636       produce_chars (coding, translation_table, 1);
6637     }
6638
6639   coding->carryover_bytes = 0;
6640   if (coding->consumed < coding->src_bytes)
6641     {
6642       int nbytes = coding->src_bytes - coding->consumed;
6643       const unsigned char *src;
6644
6645       coding_set_source (coding);
6646       coding_set_destination (coding);
6647       src = coding->source + coding->consumed;
6648
6649       if (coding->mode & CODING_MODE_LAST_BLOCK)
6650         {
6651           /* Flush out unprocessed data as binary chars.  We are sure
6652              that the number of data is less than the size of
6653              coding->charbuf.  */
6654           coding->charbuf_used = 0;
6655           while (nbytes-- > 0)
6656             {
6657               int c = *src++;
6658
6659               if (c & 0x80)
6660                 c = BYTE8_TO_CHAR (c);
6661               coding->charbuf[coding->charbuf_used++] = c;
6662             }
6663           produce_chars (coding, Qnil, 1);
6664         }
6665       else
6666         {
6667           /* Record unprocessed bytes in coding->carryover.  We are
6668              sure that the number of data is less than the size of
6669              coding->carryover.  */
6670           unsigned char *p = coding->carryover;
6671
6672           coding->carryover_bytes = nbytes;
6673           while (nbytes-- > 0)
6674             *p++ = *src++;
6675         }
6676       coding->consumed = coding->src_bytes;
6677     }
6678
6679   if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
6680     decode_eol (coding);
6681   if (BUFFERP (coding->dst_object))
6682     {
6683       current_buffer->undo_list = undo_list;
6684       record_insert (coding->dst_pos, coding->produced_char);
6685     }
6686   return coding->result;
6687 }
6688
6689
6690 /* Extract an annotation datum from a composition starting at POS and
6691    ending before LIMIT of CODING->src_object (buffer or string), store
6692    the data in BUF, set *STOP to a starting position of the next
6693    composition (if any) or to LIMIT, and return the address of the
6694    next element of BUF.
6695
6696    If such an annotation is not found, set *STOP to a starting
6697    position of a composition after POS (if any) or to LIMIT, and
6698    return BUF.  */
6699
6700 static INLINE int *
6701 handle_composition_annotation (pos, limit, coding, buf, stop)
6702      EMACS_INT pos, limit;
6703      struct coding_system *coding;
6704      int *buf;
6705      EMACS_INT *stop;
6706 {
6707   EMACS_INT start, end;
6708   Lisp_Object prop;
6709
6710   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
6711       || end > limit)
6712     *stop = limit;
6713   else if (start > pos)
6714     *stop = start;
6715   else
6716     {
6717       if (start == pos)
6718         {
6719           /* We found a composition.  Store the corresponding
6720              annotation data in BUF.  */
6721           int *head = buf;
6722           enum composition_method method = COMPOSITION_METHOD (prop);
6723           int nchars = COMPOSITION_LENGTH (prop);
6724
6725           ADD_COMPOSITION_DATA (buf, nchars, method);
6726           if (method != COMPOSITION_RELATIVE)
6727             {
6728               Lisp_Object components;
6729               int len, i, i_byte;
6730
6731               components = COMPOSITION_COMPONENTS (prop);
6732               if (VECTORP (components))
6733                 {
6734                   len = XVECTOR (components)->size;
6735                   for (i = 0; i < len; i++)
6736                     *buf++ = XINT (AREF (components, i));
6737                 }
6738               else if (STRINGP (components))
6739                 {
6740                   len = SCHARS (components);
6741                   i = i_byte = 0;
6742                   while (i < len)
6743                     {
6744                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
6745                       buf++;
6746                     }
6747                 }
6748               else if (INTEGERP (components))
6749                 {
6750                   len = 1;
6751                   *buf++ = XINT (components);
6752                 }
6753               else if (CONSP (components))
6754                 {
6755                   for (len = 0; CONSP (components);
6756                        len++, components = XCDR (components))
6757                     *buf++ = XINT (XCAR (components));
6758                 }
6759               else
6760                 abort ();
6761               *head -= len;
6762             }
6763         }
6764
6765       if (find_composition (end, limit, &start, &end, &prop,
6766                             coding->src_object)
6767           && end <= limit)
6768         *stop = start;
6769       else
6770         *stop = limit;
6771     }
6772   return buf;
6773 }
6774
6775
6776 /* Extract an annotation datum from a text property `charset' at POS of
6777    CODING->src_object (buffer of string), store the data in BUF, set
6778    *STOP to the position where the value of `charset' property changes
6779    (limiting by LIMIT), and return the address of the next element of
6780    BUF.
6781
6782    If the property value is nil, set *STOP to the position where the
6783    property value is non-nil (limiting by LIMIT), and return BUF.  */
6784
6785 static INLINE int *
6786 handle_charset_annotation (pos, limit, coding, buf, stop)
6787      EMACS_INT pos, limit;
6788      struct coding_system *coding;
6789      int *buf;
6790      EMACS_INT *stop;
6791 {
6792   Lisp_Object val, next;
6793   int id;
6794
6795   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
6796   if (! NILP (val) && CHARSETP (val))
6797     id = XINT (CHARSET_SYMBOL_ID (val));
6798   else
6799     id = -1;
6800   ADD_CHARSET_DATA (buf, 0, id);
6801   next = Fnext_single_property_change (make_number (pos), Qcharset,
6802                                        coding->src_object,
6803                                        make_number (limit));
6804   *stop = XINT (next);
6805   return buf;
6806 }
6807
6808
6809 static void
6810 consume_chars (coding, translation_table, max_lookup)
6811      struct coding_system *coding;
6812      Lisp_Object translation_table;
6813      int max_lookup;
6814 {
6815   int *buf = coding->charbuf;
6816   int *buf_end = coding->charbuf + coding->charbuf_size;
6817   const unsigned char *src = coding->source + coding->consumed;
6818   const unsigned char *src_end = coding->source + coding->src_bytes;
6819   EMACS_INT pos = coding->src_pos + coding->consumed_char;
6820   EMACS_INT end_pos = coding->src_pos + coding->src_chars;
6821   int multibytep = coding->src_multibyte;
6822   Lisp_Object eol_type;
6823   int c;
6824   EMACS_INT stop, stop_composition, stop_charset;
6825   int *lookup_buf = NULL;
6826
6827   if (! NILP (translation_table))
6828     lookup_buf = alloca (sizeof (int) * max_lookup);
6829
6830   eol_type = CODING_ID_EOL_TYPE (coding->id);
6831   if (VECTORP (eol_type))
6832     eol_type = Qunix;
6833
6834   /* Note: composition handling is not yet implemented.  */
6835   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
6836
6837   if (NILP (coding->src_object))
6838     stop = stop_composition = stop_charset = end_pos;
6839   else
6840     {
6841       if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
6842         stop = stop_composition = pos;
6843       else
6844         stop = stop_composition = end_pos;
6845       if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
6846         stop = stop_charset = pos;
6847       else
6848         stop_charset = end_pos;
6849     }
6850
6851   /* Compensate for CRLF and conversion.  */
6852   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
6853   while (buf < buf_end)
6854     {
6855       Lisp_Object trans;
6856
6857       if (pos == stop)
6858         {
6859           if (pos == end_pos)
6860             break;
6861           if (pos == stop_composition)
6862             buf = handle_composition_annotation (pos, end_pos, coding,
6863                                                  buf, &stop_composition);
6864           if (pos == stop_charset)
6865             buf = handle_charset_annotation (pos, end_pos, coding,
6866                                              buf, &stop_charset);
6867           stop = (stop_composition < stop_charset
6868                   ? stop_composition : stop_charset);
6869         }
6870
6871       if (! multibytep)
6872         {
6873           EMACS_INT bytes;
6874
6875           if (coding->encoder == encode_coding_raw_text)
6876             c = *src++, pos++;
6877           else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
6878             c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
6879           else
6880             c = BYTE8_TO_CHAR (*src), src++, pos++;
6881         }
6882       else
6883         c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
6884       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
6885         c = '\n';
6886       if (! EQ (eol_type, Qunix))
6887         {
6888           if (c == '\n')
6889             {
6890               if (EQ (eol_type, Qdos))
6891                 *buf++ = '\r';
6892               else
6893                 c = '\r';
6894             }
6895         }
6896
6897       trans = Qnil;
6898       LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6899       if (NILP (trans))
6900         *buf++ = c;
6901       else
6902         {
6903           int from_nchars = 1, to_nchars = 1;
6904           int *lookup_buf_end;
6905           const unsigned char *p = src;
6906           int i;
6907
6908           lookup_buf[0] = c;
6909           for (i = 1; i < max_lookup && p < src_end; i++)
6910             lookup_buf[i] = STRING_CHAR_ADVANCE (p);
6911           lookup_buf_end = lookup_buf + i;
6912           trans = get_translation (trans, lookup_buf, lookup_buf_end, 1,
6913                                    &from_nchars, &to_nchars);
6914           if (EQ (trans, Qt)
6915               || buf + to_nchars > buf_end)
6916             break;
6917           *buf++ = *lookup_buf;
6918           for (i = 1; i < to_nchars; i++)
6919             *buf++ = XINT (AREF (trans, i));
6920           for (i = 1; i < from_nchars; i++, pos++)
6921             src += MULTIBYTE_LENGTH_NO_CHECK (src);
6922         }
6923     }
6924
6925   coding->consumed = src - coding->source;
6926   coding->consumed_char = pos - coding->src_pos;
6927   coding->charbuf_used = buf - coding->charbuf;
6928   coding->chars_at_source = 0;
6929 }
6930
6931
6932 /* Encode the text at CODING->src_object into CODING->dst_object.
6933    CODING->src_object is a buffer or a string.
6934    CODING->dst_object is a buffer or nil.
6935
6936    If CODING->src_object is a buffer, it must be the current buffer.
6937    In this case, if CODING->src_pos is positive, it is a position of
6938    the source text in the buffer, otherwise. the source text is in the
6939    gap area of the buffer, and coding->src_pos specifies the offset of
6940    the text from GPT (which must be the same as PT).  If this is the
6941    same buffer as CODING->dst_object, CODING->src_pos must be
6942    negative and CODING should not have `pre-write-conversion'.
6943
6944    If CODING->src_object is a string, CODING should not have
6945    `pre-write-conversion'.
6946
6947    If CODING->dst_object is a buffer, the encoded data is inserted at
6948    the current point of that buffer.
6949
6950    If CODING->dst_object is nil, the encoded data is placed at the
6951    memory area specified by CODING->destination.  */
6952
6953 static int
6954 encode_coding (coding)
6955      struct coding_system *coding;
6956 {
6957   Lisp_Object attrs;
6958   Lisp_Object translation_table;
6959   int max_lookup;
6960
6961   attrs = CODING_ID_ATTRS (coding->id);
6962   if (coding->encoder == encode_coding_raw_text)
6963     translation_table = Qnil, max_lookup = 0;
6964   else
6965     translation_table = get_translation_table (attrs, 1, &max_lookup);
6966
6967   if (BUFFERP (coding->dst_object))
6968     {
6969       set_buffer_internal (XBUFFER (coding->dst_object));
6970       coding->dst_multibyte
6971         = ! NILP (current_buffer->enable_multibyte_characters);
6972     }
6973
6974   coding->consumed = coding->consumed_char = 0;
6975   coding->produced = coding->produced_char = 0;
6976   record_conversion_result (coding, CODING_RESULT_SUCCESS);
6977   coding->errors = 0;
6978
6979   ALLOC_CONVERSION_WORK_AREA (coding);
6980
6981   do {
6982     coding_set_source (coding);
6983     consume_chars (coding, translation_table, max_lookup);
6984     coding_set_destination (coding);
6985     (*(coding->encoder)) (coding);
6986   } while (coding->consumed_char < coding->src_chars);
6987
6988   if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
6989     insert_from_gap (coding->produced_char, coding->produced);
6990
6991   return (coding->result);
6992 }
6993
6994
6995 /* Name (or base name) of work buffer for code conversion.  */
6996 static Lisp_Object Vcode_conversion_workbuf_name;
6997
6998 /* A working buffer used by the top level conversion.  Once it is
6999    created, it is never destroyed.  It has the name
7000    Vcode_conversion_workbuf_name.  The other working buffers are
7001    destroyed after the use is finished, and their names are modified
7002    versions of Vcode_conversion_workbuf_name.  */
7003 static Lisp_Object Vcode_conversion_reused_workbuf;
7004
7005 /* 1 iff Vcode_conversion_reused_workbuf is already in use.  */
7006 static int reused_workbuf_in_use;
7007
7008
7009 /* Return a working buffer of code convesion.  MULTIBYTE specifies the
7010    multibyteness of returning buffer.  */
7011
7012 static Lisp_Object
7013 make_conversion_work_buffer (multibyte)
7014      int multibyte;
7015 {
7016   Lisp_Object name, workbuf;
7017   struct buffer *current;
7018
7019   if (reused_workbuf_in_use++)
7020     {
7021       name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
7022       workbuf = Fget_buffer_create (name);
7023     }
7024   else
7025     {
7026       if (NILP (Fbuffer_live_p (Vcode_conversion_reused_workbuf)))
7027         Vcode_conversion_reused_workbuf
7028           = Fget_buffer_create (Vcode_conversion_workbuf_name);
7029       workbuf = Vcode_conversion_reused_workbuf;
7030     }
7031   current = current_buffer;
7032   set_buffer_internal (XBUFFER (workbuf));
7033   Ferase_buffer ();
7034   current_buffer->undo_list = Qt;
7035   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
7036   set_buffer_internal (current);
7037   return workbuf;
7038 }
7039
7040
7041 static Lisp_Object
7042 code_conversion_restore (arg)
7043      Lisp_Object arg;
7044 {
7045   Lisp_Object current, workbuf;
7046   struct gcpro gcpro1;
7047
7048   GCPRO1 (arg);
7049   current = XCAR (arg);
7050   workbuf = XCDR (arg);
7051   if (! NILP (workbuf))
7052     {
7053       if (EQ (workbuf, Vcode_conversion_reused_workbuf))
7054         reused_workbuf_in_use = 0;
7055       else if (! NILP (Fbuffer_live_p (workbuf)))
7056         Fkill_buffer (workbuf);
7057     }
7058   set_buffer_internal (XBUFFER (current));
7059   UNGCPRO;
7060   return Qnil;
7061 }
7062
7063 Lisp_Object
7064 code_conversion_save (with_work_buf, multibyte)
7065      int with_work_buf, multibyte;
7066 {
7067   Lisp_Object workbuf = Qnil;
7068
7069   if (with_work_buf)
7070     workbuf = make_conversion_work_buffer (multibyte);
7071   record_unwind_protect (code_conversion_restore,
7072                          Fcons (Fcurrent_buffer (), workbuf));
7073   return workbuf;
7074 }
7075
7076 int
7077 decode_coding_gap (coding, chars, bytes)
7078      struct coding_system *coding;
7079      EMACS_INT chars, bytes;
7080 {
7081   int count = specpdl_ptr - specpdl;
7082   Lisp_Object attrs;
7083
7084   code_conversion_save (0, 0);
7085
7086   coding->src_object = Fcurrent_buffer ();
7087   coding->src_chars = chars;
7088   coding->src_bytes = bytes;
7089   coding->src_pos = -chars;
7090   coding->src_pos_byte = -bytes;
7091   coding->src_multibyte = chars < bytes;
7092   coding->dst_object = coding->src_object;
7093   coding->dst_pos = PT;
7094   coding->dst_pos_byte = PT_BYTE;
7095   coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
7096
7097   if (CODING_REQUIRE_DETECTION (coding))
7098     detect_coding (coding);
7099
7100   coding->mode |= CODING_MODE_LAST_BLOCK;
7101   current_buffer->text->inhibit_shrinking = 1;
7102   decode_coding (coding);
7103   current_buffer->text->inhibit_shrinking = 0;
7104
7105   attrs = CODING_ID_ATTRS (coding->id);
7106   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7107     {
7108       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7109       Lisp_Object val;
7110
7111       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7112       val = call1 (CODING_ATTR_POST_READ (attrs),
7113                    make_number (coding->produced_char));
7114       CHECK_NATNUM (val);
7115       coding->produced_char += Z - prev_Z;
7116       coding->produced += Z_BYTE - prev_Z_BYTE;
7117     }
7118
7119   unbind_to (count, Qnil);
7120   return coding->result;
7121 }
7122
7123 int
7124 encode_coding_gap (coding, chars, bytes)
7125      struct coding_system *coding;
7126      EMACS_INT chars, bytes;
7127 {
7128   int count = specpdl_ptr - specpdl;
7129
7130   code_conversion_save (0, 0);
7131
7132   coding->src_object = Fcurrent_buffer ();
7133   coding->src_chars = chars;
7134   coding->src_bytes = bytes;
7135   coding->src_pos = -chars;
7136   coding->src_pos_byte = -bytes;
7137   coding->src_multibyte = chars < bytes;
7138   coding->dst_object = coding->src_object;
7139   coding->dst_pos = PT;
7140   coding->dst_pos_byte = PT_BYTE;
7141
7142   encode_coding (coding);
7143
7144   unbind_to (count, Qnil);
7145   return coding->result;
7146 }
7147
7148
7149 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
7150    SRC_OBJECT into DST_OBJECT by coding context CODING.
7151
7152    SRC_OBJECT is a buffer, a string, or Qnil.
7153
7154    If it is a buffer, the text is at point of the buffer.  FROM and TO
7155    are positions in the buffer.
7156
7157    If it is a string, the text is at the beginning of the string.
7158    FROM and TO are indices to the string.
7159
7160    If it is nil, the text is at coding->source.  FROM and TO are
7161    indices to coding->source.
7162
7163    DST_OBJECT is a buffer, Qt, or Qnil.
7164
7165    If it is a buffer, the decoded text is inserted at point of the
7166    buffer.  If the buffer is the same as SRC_OBJECT, the source text
7167    is deleted.
7168
7169    If it is Qt, a string is made from the decoded text, and
7170    set in CODING->dst_object.
7171
7172    If it is Qnil, the decoded text is stored at CODING->destination.
7173    The caller must allocate CODING->dst_bytes bytes at
7174    CODING->destination by xmalloc.  If the decoded text is longer than
7175    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
7176  */
7177
7178 void
7179 decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7180                       dst_object)
7181      struct coding_system *coding;
7182      Lisp_Object src_object;
7183      EMACS_INT from, from_byte, to, to_byte;
7184      Lisp_Object dst_object;
7185 {
7186   int count = specpdl_ptr - specpdl;
7187   unsigned char *destination;
7188   EMACS_INT dst_bytes;
7189   EMACS_INT chars = to - from;
7190   EMACS_INT bytes = to_byte - from_byte;
7191   Lisp_Object attrs;
7192   int saved_pt = -1, saved_pt_byte;
7193   int need_marker_adjustment = 0;
7194   Lisp_Object old_deactivate_mark;
7195
7196   old_deactivate_mark = Vdeactivate_mark;
7197
7198   if (NILP (dst_object))
7199     {
7200       destination = coding->destination;
7201       dst_bytes = coding->dst_bytes;
7202     }
7203
7204   coding->src_object = src_object;
7205   coding->src_chars = chars;
7206   coding->src_bytes = bytes;
7207   coding->src_multibyte = chars < bytes;
7208
7209   if (STRINGP (src_object))
7210     {
7211       coding->src_pos = from;
7212       coding->src_pos_byte = from_byte;
7213     }
7214   else if (BUFFERP (src_object))
7215     {
7216       set_buffer_internal (XBUFFER (src_object));
7217       if (from != GPT)
7218         move_gap_both (from, from_byte);
7219       if (EQ (src_object, dst_object))
7220         {
7221           struct Lisp_Marker *tail;
7222
7223           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7224             {
7225               tail->need_adjustment
7226                 = tail->charpos == (tail->insertion_type ? from : to);
7227               need_marker_adjustment |= tail->need_adjustment;
7228             }
7229           saved_pt = PT, saved_pt_byte = PT_BYTE;
7230           TEMP_SET_PT_BOTH (from, from_byte);
7231           current_buffer->text->inhibit_shrinking = 1;
7232           del_range_both (from, from_byte, to, to_byte, 1);
7233           coding->src_pos = -chars;
7234           coding->src_pos_byte = -bytes;
7235         }
7236       else
7237         {
7238           coding->src_pos = from;
7239           coding->src_pos_byte = from_byte;
7240         }
7241     }
7242
7243   if (CODING_REQUIRE_DETECTION (coding))
7244     detect_coding (coding);
7245   attrs = CODING_ID_ATTRS (coding->id);
7246
7247   if (EQ (dst_object, Qt)
7248       || (! NILP (CODING_ATTR_POST_READ (attrs))
7249           && NILP (dst_object)))
7250     {
7251       coding->dst_multibyte = !CODING_FOR_UNIBYTE (coding);
7252       coding->dst_object = code_conversion_save (1, coding->dst_multibyte);
7253       coding->dst_pos = BEG;
7254       coding->dst_pos_byte = BEG_BYTE;
7255     }
7256   else if (BUFFERP (dst_object))
7257     {
7258       code_conversion_save (0, 0);
7259       coding->dst_object = dst_object;
7260       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7261       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7262       coding->dst_multibyte
7263         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
7264     }
7265   else
7266     {
7267       code_conversion_save (0, 0);
7268       coding->dst_object = Qnil;
7269       /* Most callers presume this will return a multibyte result, and they
7270          won't use `binary' or `raw-text' anyway, so let's not worry about
7271          CODING_FOR_UNIBYTE.  */
7272       coding->dst_multibyte = 1;
7273     }
7274
7275   decode_coding (coding);
7276
7277   if (BUFFERP (coding->dst_object))
7278     set_buffer_internal (XBUFFER (coding->dst_object));
7279
7280   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7281     {
7282       struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7283       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7284       Lisp_Object val;
7285
7286       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7287       GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7288               old_deactivate_mark);
7289       val = safe_call1 (CODING_ATTR_POST_READ (attrs),
7290                         make_number (coding->produced_char));
7291       UNGCPRO;
7292       CHECK_NATNUM (val);
7293       coding->produced_char += Z - prev_Z;
7294       coding->produced += Z_BYTE - prev_Z_BYTE;
7295     }
7296
7297   if (EQ (dst_object, Qt))
7298     {
7299       coding->dst_object = Fbuffer_string ();
7300     }
7301   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
7302     {
7303       set_buffer_internal (XBUFFER (coding->dst_object));
7304       if (dst_bytes < coding->produced)
7305         {
7306           destination = xrealloc (destination, coding->produced);
7307           if (! destination)
7308             {
7309               record_conversion_result (coding,
7310                                         CODING_RESULT_INSUFFICIENT_DST);
7311               unbind_to (count, Qnil);
7312               return;
7313             }
7314           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
7315             move_gap_both (BEGV, BEGV_BYTE);
7316           bcopy (BEGV_ADDR, destination, coding->produced);
7317           coding->destination = destination;
7318         }
7319     }
7320
7321   if (saved_pt >= 0)
7322     {
7323       /* This is the case of:
7324          (BUFFERP (src_object) && EQ (src_object, dst_object))
7325          As we have moved PT while replacing the original buffer
7326          contents, we must recover it now.  */
7327       set_buffer_internal (XBUFFER (src_object));
7328       current_buffer->text->inhibit_shrinking = 0;
7329       if (saved_pt < from)
7330         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7331       else if (saved_pt < from + chars)
7332         TEMP_SET_PT_BOTH (from, from_byte);
7333       else if (! NILP (current_buffer->enable_multibyte_characters))
7334         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7335                           saved_pt_byte + (coding->produced - bytes));
7336       else
7337         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7338                           saved_pt_byte + (coding->produced - bytes));
7339
7340       if (need_marker_adjustment)
7341         {
7342           struct Lisp_Marker *tail;
7343
7344           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7345             if (tail->need_adjustment)
7346               {
7347                 tail->need_adjustment = 0;
7348                 if (tail->insertion_type)
7349                   {
7350                     tail->bytepos = from_byte;
7351                     tail->charpos = from;
7352                   }
7353                 else
7354                   {
7355                     tail->bytepos = from_byte + coding->produced;
7356                     tail->charpos
7357                       = (NILP (current_buffer->enable_multibyte_characters)
7358                          ? tail->bytepos : from + coding->produced_char);
7359                   }
7360               }
7361         }
7362     }
7363
7364   Vdeactivate_mark = old_deactivate_mark;
7365   unbind_to (count, coding->dst_object);
7366 }
7367
7368
7369 void
7370 encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7371                       dst_object)
7372      struct coding_system *coding;
7373      Lisp_Object src_object;
7374      EMACS_INT from, from_byte, to, to_byte;
7375      Lisp_Object dst_object;
7376 {
7377   int count = specpdl_ptr - specpdl;
7378   EMACS_INT chars = to - from;
7379   EMACS_INT bytes = to_byte - from_byte;
7380   Lisp_Object attrs;
7381   int saved_pt = -1, saved_pt_byte;
7382   int need_marker_adjustment = 0;
7383   int kill_src_buffer = 0;
7384   Lisp_Object old_deactivate_mark;
7385
7386   old_deactivate_mark = Vdeactivate_mark;
7387
7388   coding->src_object = src_object;
7389   coding->src_chars = chars;
7390   coding->src_bytes = bytes;
7391   coding->src_multibyte = chars < bytes;
7392
7393   attrs = CODING_ID_ATTRS (coding->id);
7394
7395   if (EQ (src_object, dst_object))
7396     {
7397       struct Lisp_Marker *tail;
7398
7399       for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7400         {
7401           tail->need_adjustment
7402             = tail->charpos == (tail->insertion_type ? from : to);
7403           need_marker_adjustment |= tail->need_adjustment;
7404         }
7405     }
7406
7407   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
7408     {
7409       coding->src_object = code_conversion_save (1, coding->src_multibyte);
7410       set_buffer_internal (XBUFFER (coding->src_object));
7411       if (STRINGP (src_object))
7412         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
7413       else if (BUFFERP (src_object))
7414         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
7415       else
7416         insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
7417
7418       if (EQ (src_object, dst_object))
7419         {
7420           set_buffer_internal (XBUFFER (src_object));
7421           saved_pt = PT, saved_pt_byte = PT_BYTE;
7422           del_range_both (from, from_byte, to, to_byte, 1);
7423           set_buffer_internal (XBUFFER (coding->src_object));
7424         }
7425
7426       {
7427         Lisp_Object args[3];
7428         struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7429
7430         GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7431                 old_deactivate_mark);
7432         args[0] = CODING_ATTR_PRE_WRITE (attrs);
7433         args[1] = make_number (BEG);
7434         args[2] = make_number (Z);
7435         safe_call (3, args);
7436         UNGCPRO;
7437       }
7438       if (XBUFFER (coding->src_object) != current_buffer)
7439         kill_src_buffer = 1;
7440       coding->src_object = Fcurrent_buffer ();
7441       if (BEG != GPT)
7442         move_gap_both (BEG, BEG_BYTE);
7443       coding->src_chars = Z - BEG;
7444       coding->src_bytes = Z_BYTE - BEG_BYTE;
7445       coding->src_pos = BEG;
7446       coding->src_pos_byte = BEG_BYTE;
7447       coding->src_multibyte = Z < Z_BYTE;
7448     }
7449   else if (STRINGP (src_object))
7450     {
7451       code_conversion_save (0, 0);
7452       coding->src_pos = from;
7453       coding->src_pos_byte = from_byte;
7454     }
7455   else if (BUFFERP (src_object))
7456     {
7457       code_conversion_save (0, 0);
7458       set_buffer_internal (XBUFFER (src_object));
7459       if (EQ (src_object, dst_object))
7460         {
7461           saved_pt = PT, saved_pt_byte = PT_BYTE;
7462           coding->src_object = del_range_1 (from, to, 1, 1);
7463           coding->src_pos = 0;
7464           coding->src_pos_byte = 0;
7465         }
7466       else
7467         {
7468           if (from < GPT && to >= GPT)
7469             move_gap_both (from, from_byte);
7470           coding->src_pos = from;
7471           coding->src_pos_byte = from_byte;
7472         }
7473     }
7474   else
7475     code_conversion_save (0, 0);
7476
7477   if (BUFFERP (dst_object))
7478     {
7479       coding->dst_object = dst_object;
7480       if (EQ (src_object, dst_object))
7481         {
7482           coding->dst_pos = from;
7483           coding->dst_pos_byte = from_byte;
7484         }
7485       else
7486         {
7487           struct buffer *current = current_buffer;
7488
7489           set_buffer_temp (XBUFFER (dst_object));
7490           coding->dst_pos = PT;
7491           coding->dst_pos_byte = PT_BYTE;
7492           move_gap_both (coding->dst_pos, coding->dst_pos_byte);
7493           set_buffer_temp (current);
7494         }
7495       coding->dst_multibyte
7496         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
7497     }
7498   else if (EQ (dst_object, Qt))
7499     {
7500       coding->dst_object = Qnil;
7501       coding->dst_bytes = coding->src_chars;
7502       if (coding->dst_bytes == 0)
7503         coding->dst_bytes = 1;
7504       coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
7505       coding->dst_multibyte = 0;
7506     }
7507   else
7508     {
7509       coding->dst_object = Qnil;
7510       coding->dst_multibyte = 0;
7511     }
7512
7513   encode_coding (coding);
7514
7515   if (EQ (dst_object, Qt))
7516     {
7517       if (BUFFERP (coding->dst_object))
7518         coding->dst_object = Fbuffer_string ();
7519       else
7520         {
7521           coding->dst_object
7522             = make_unibyte_string ((char *) coding->destination,
7523                                    coding->produced);
7524           xfree (coding->destination);
7525         }
7526     }
7527
7528   if (saved_pt >= 0)
7529     {
7530       /* This is the case of:
7531          (BUFFERP (src_object) && EQ (src_object, dst_object))
7532          As we have moved PT while replacing the original buffer
7533          contents, we must recover it now.  */
7534       set_buffer_internal (XBUFFER (src_object));
7535       if (saved_pt < from)
7536         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7537       else if (saved_pt < from + chars)
7538         TEMP_SET_PT_BOTH (from, from_byte);
7539       else if (! NILP (current_buffer->enable_multibyte_characters))
7540         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7541                           saved_pt_byte + (coding->produced - bytes));
7542       else
7543         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7544                           saved_pt_byte + (coding->produced - bytes));
7545
7546       if (need_marker_adjustment)
7547         {
7548           struct Lisp_Marker *tail;
7549
7550           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7551             if (tail->need_adjustment)
7552               {
7553                 tail->need_adjustment = 0;
7554                 if (tail->insertion_type)
7555                   {
7556                     tail->bytepos = from_byte;
7557                     tail->charpos = from;
7558                   }
7559                 else
7560                   {
7561                     tail->bytepos = from_byte + coding->produced;
7562                     tail->charpos
7563                       = (NILP (current_buffer->enable_multibyte_characters)
7564                          ? tail->bytepos : from + coding->produced_char);
7565                   }
7566               }
7567         }
7568     }
7569
7570   if (kill_src_buffer)
7571     Fkill_buffer (coding->src_object);
7572
7573   Vdeactivate_mark = old_deactivate_mark;
7574   unbind_to (count, Qnil);
7575 }
7576
7577
7578 Lisp_Object
7579 preferred_coding_system ()
7580 {
7581   int id = coding_categories[coding_priorities[0]].id;
7582
7583   return CODING_ID_NAME (id);
7584 }
7585
7586 \f
7587 #ifdef emacs
7588 /*** 8. Emacs Lisp library functions ***/
7589
7590 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
7591        doc: /* Return t if OBJECT is nil or a coding-system.
7592 See the documentation of `define-coding-system' for information
7593 about coding-system objects.  */)
7594      (object)
7595      Lisp_Object object;
7596 {
7597   if (NILP (object)
7598       || CODING_SYSTEM_ID (object) >= 0)
7599     return Qt;
7600   if (! SYMBOLP (object)
7601       || NILP (Fget (object, Qcoding_system_define_form)))
7602     return Qnil;
7603   return Qt;
7604 }
7605
7606 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
7607        Sread_non_nil_coding_system, 1, 1, 0,
7608        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
7609      (prompt)
7610      Lisp_Object prompt;
7611 {
7612   Lisp_Object val;
7613   do
7614     {
7615       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
7616                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
7617     }
7618   while (SCHARS (val) == 0);
7619   return (Fintern (val, Qnil));
7620 }
7621
7622 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
7623        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
7624 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.
7625 Ignores case when completing coding systems (all Emacs coding systems
7626 are lower-case).  */)
7627      (prompt, default_coding_system)
7628      Lisp_Object prompt, default_coding_system;
7629 {
7630   Lisp_Object val;
7631   int count = SPECPDL_INDEX ();
7632
7633   if (SYMBOLP (default_coding_system))
7634     default_coding_system = SYMBOL_NAME (default_coding_system);
7635   specbind (Qcompletion_ignore_case, Qt);
7636   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
7637                           Qt, Qnil, Qcoding_system_history,
7638                           default_coding_system, Qnil);
7639   unbind_to (count, Qnil);
7640   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
7641 }
7642
7643 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
7644        1, 1, 0,
7645        doc: /* Check validity of CODING-SYSTEM.
7646 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
7647 It is valid if it is nil or a symbol defined as a coding system by the
7648 function `define-coding-system'.  */)
7649   (coding_system)
7650      Lisp_Object coding_system;
7651 {
7652   Lisp_Object define_form;
7653
7654   define_form = Fget (coding_system, Qcoding_system_define_form);
7655   if (! NILP (define_form))
7656     {
7657       Fput (coding_system, Qcoding_system_define_form, Qnil);
7658       safe_eval (define_form);
7659     }
7660   if (!NILP (Fcoding_system_p (coding_system)))
7661     return coding_system;
7662   xsignal1 (Qcoding_system_error, coding_system);
7663 }
7664
7665 \f
7666 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
7667    HIGHEST is nonzero, return the coding system of the highest
7668    priority among the detected coding systems.  Otherwize return a
7669    list of detected coding systems sorted by their priorities.  If
7670    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
7671    multibyte form but contains only ASCII and eight-bit chars.
7672    Otherwise, the bytes are raw bytes.
7673
7674    CODING-SYSTEM controls the detection as below:
7675
7676    If it is nil, detect both text-format and eol-format.  If the
7677    text-format part of CODING-SYSTEM is already specified
7678    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
7679    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
7680    detect only text-format.  */
7681
7682 Lisp_Object
7683 detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
7684                       coding_system)
7685      const unsigned char *src;
7686      EMACS_INT src_chars, src_bytes;
7687      int highest;
7688      int multibytep;
7689      Lisp_Object coding_system;
7690 {
7691   const unsigned char *src_end = src + src_bytes;
7692   Lisp_Object attrs, eol_type;
7693   Lisp_Object val;
7694   struct coding_system coding;
7695   int id;
7696   struct coding_detection_info detect_info;
7697   enum coding_category base_category;
7698   int null_byte_found = 0, eight_bit_found = 0;
7699
7700   if (NILP (coding_system))
7701     coding_system = Qundecided;
7702   setup_coding_system (coding_system, &coding);
7703   attrs = CODING_ID_ATTRS (coding.id);
7704   eol_type = CODING_ID_EOL_TYPE (coding.id);
7705   coding_system = CODING_ATTR_BASE_NAME (attrs);
7706
7707   coding.source = src;
7708   coding.src_chars = src_chars;
7709   coding.src_bytes = src_bytes;
7710   coding.src_multibyte = multibytep;
7711   coding.consumed = 0;
7712   coding.mode |= CODING_MODE_LAST_BLOCK;
7713   coding.head_ascii = 0;
7714
7715   detect_info.checked = detect_info.found = detect_info.rejected = 0;
7716
7717   /* At first, detect text-format if necessary.  */
7718   base_category = XINT (CODING_ATTR_CATEGORY (attrs));
7719   if (base_category == coding_category_undecided)
7720     {
7721       enum coding_category category;
7722       struct coding_system *this;
7723       int c, i;
7724
7725       /* Skip all ASCII bytes except for a few ISO2022 controls.  */
7726       for (; src < src_end; src++)
7727         {
7728           c = *src;
7729           if (c & 0x80)
7730             {
7731               eight_bit_found = 1;
7732               if (null_byte_found)
7733                 break;
7734             }
7735           else if (c < 0x20)
7736             {
7737               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
7738                   && ! inhibit_iso_escape_detection
7739                   && ! detect_info.checked)
7740                 {
7741                   if (detect_coding_iso_2022 (&coding, &detect_info))
7742                     {
7743                       /* We have scanned the whole data.  */
7744                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
7745                         {
7746                           /* We didn't find an 8-bit code.  We may
7747                              have found a null-byte, but it's very
7748                              rare that a binary file confirm to
7749                              ISO-2022.  */
7750                           src = src_end;
7751                           coding.head_ascii = src - coding.source;
7752                         }
7753                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
7754                       break;
7755                     }
7756                 }
7757               else if (! c)
7758                 {
7759                   null_byte_found = 1;
7760                   if (eight_bit_found)
7761                     break;
7762                 }
7763               if (! eight_bit_found)
7764                 coding.head_ascii++;
7765             }
7766           else if (! eight_bit_found)
7767             coding.head_ascii++;
7768         }
7769
7770       if (null_byte_found || eight_bit_found
7771           || coding.head_ascii < coding.src_bytes
7772           || detect_info.found)
7773         {
7774           if (coding.head_ascii == coding.src_bytes)
7775             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
7776             for (i = 0; i < coding_category_raw_text; i++)
7777               {
7778                 category = coding_priorities[i];
7779                 this = coding_categories + category;
7780                 if (detect_info.found & (1 << category))
7781                   break;
7782               }
7783           else
7784             {
7785               if (null_byte_found)
7786                 {
7787                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
7788                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
7789                 }
7790               for (i = 0; i < coding_category_raw_text; i++)
7791                 {
7792                   category = coding_priorities[i];
7793                   this = coding_categories + category;
7794
7795                   if (this->id < 0)
7796                     {
7797                       /* No coding system of this category is defined.  */
7798                       detect_info.rejected |= (1 << category);
7799                     }
7800                   else if (category >= coding_category_raw_text)
7801                     continue;
7802                   else if (detect_info.checked & (1 << category))
7803                     {
7804                       if (highest
7805                           && (detect_info.found & (1 << category)))
7806                         break;
7807                     }
7808                   else if ((*(this->detector)) (&coding, &detect_info)
7809                            && highest
7810                            && (detect_info.found & (1 << category)))
7811                     {
7812                       if (category == coding_category_utf_16_auto)
7813                         {
7814                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7815                             category = coding_category_utf_16_le;
7816                           else
7817                             category = coding_category_utf_16_be;
7818                         }
7819                       break;
7820                     }
7821                 }
7822             }
7823         }
7824
7825       if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY)
7826         {
7827           detect_info.found = CATEGORY_MASK_RAW_TEXT;
7828           id = coding_categories[coding_category_raw_text].id;
7829           val = Fcons (make_number (id), Qnil);
7830         }
7831       else if (! detect_info.rejected && ! detect_info.found)
7832         {
7833           detect_info.found = CATEGORY_MASK_ANY;
7834           id = coding_categories[coding_category_undecided].id;
7835           val = Fcons (make_number (id), Qnil);
7836         }
7837       else if (highest)
7838         {
7839           if (detect_info.found)
7840             {
7841               detect_info.found = 1 << category;
7842               val = Fcons (make_number (this->id), Qnil);
7843             }
7844           else
7845             for (i = 0; i < coding_category_raw_text; i++)
7846               if (! (detect_info.rejected & (1 << coding_priorities[i])))
7847                 {
7848                   detect_info.found = 1 << coding_priorities[i];
7849                   id = coding_categories[coding_priorities[i]].id;
7850                   val = Fcons (make_number (id), Qnil);
7851                   break;
7852                 }
7853         }
7854       else
7855         {
7856           int mask = detect_info.rejected | detect_info.found;
7857           int found = 0;
7858           val = Qnil;
7859
7860           for (i = coding_category_raw_text - 1; i >= 0; i--)
7861             {
7862               category = coding_priorities[i];
7863               if (! (mask & (1 << category)))
7864                 {
7865                   found |= 1 << category;
7866                   id = coding_categories[category].id;
7867                   if (id >= 0)
7868                     val = Fcons (make_number (id), val);
7869                 }
7870             }
7871           for (i = coding_category_raw_text - 1; i >= 0; i--)
7872             {
7873               category = coding_priorities[i];
7874               if (detect_info.found & (1 << category))
7875                 {
7876                   id = coding_categories[category].id;
7877                   val = Fcons (make_number (id), val);
7878                 }
7879             }
7880           detect_info.found |= found;
7881         }
7882     }
7883   else if (base_category == coding_category_utf_8_auto)
7884     {
7885       if (detect_coding_utf_8 (&coding, &detect_info))
7886         {
7887           struct coding_system *this;
7888
7889           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
7890             this = coding_categories + coding_category_utf_8_sig;
7891           else
7892             this = coding_categories + coding_category_utf_8_nosig;
7893           val = Fcons (make_number (this->id), Qnil);
7894         }
7895     }
7896   else if (base_category == coding_category_utf_16_auto)
7897     {
7898       if (detect_coding_utf_16 (&coding, &detect_info))
7899         {
7900           struct coding_system *this;
7901
7902           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7903             this = coding_categories + coding_category_utf_16_le;
7904           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
7905             this = coding_categories + coding_category_utf_16_be;
7906           else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
7907             this = coding_categories + coding_category_utf_16_be_nosig;
7908           else
7909             this = coding_categories + coding_category_utf_16_le_nosig;
7910           val = Fcons (make_number (this->id), Qnil);
7911         }
7912     }
7913   else
7914     {
7915       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
7916       val = Fcons (make_number (coding.id), Qnil);
7917     }
7918
7919   /* Then, detect eol-format if necessary.  */
7920   {
7921     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
7922     Lisp_Object tail;
7923
7924     if (VECTORP (eol_type))
7925       {
7926         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
7927           {
7928             if (null_byte_found)
7929               normal_eol = EOL_SEEN_LF;
7930             else
7931               normal_eol = detect_eol (coding.source, src_bytes,
7932                                        coding_category_raw_text);
7933           }
7934         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
7935                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
7936           utf_16_be_eol = detect_eol (coding.source, src_bytes,
7937                                       coding_category_utf_16_be);
7938         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
7939                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
7940           utf_16_le_eol = detect_eol (coding.source, src_bytes,
7941                                       coding_category_utf_16_le);
7942       }
7943     else
7944       {
7945         if (EQ (eol_type, Qunix))
7946           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
7947         else if (EQ (eol_type, Qdos))
7948           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
7949         else
7950           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
7951       }
7952
7953     for (tail = val; CONSP (tail); tail = XCDR (tail))
7954       {
7955         enum coding_category category;
7956         int this_eol;
7957
7958         id = XINT (XCAR (tail));
7959         attrs = CODING_ID_ATTRS (id);
7960         category = XINT (CODING_ATTR_CATEGORY (attrs));
7961         eol_type = CODING_ID_EOL_TYPE (id);
7962         if (VECTORP (eol_type))
7963           {
7964             if (category == coding_category_utf_16_be
7965                 || category == coding_category_utf_16_be_nosig)
7966               this_eol = utf_16_be_eol;
7967             else if (category == coding_category_utf_16_le
7968                      || category == coding_category_utf_16_le_nosig)
7969               this_eol = utf_16_le_eol;
7970             else
7971               this_eol = normal_eol;
7972
7973             if (this_eol == EOL_SEEN_LF)
7974               XSETCAR (tail, AREF (eol_type, 0));
7975             else if (this_eol == EOL_SEEN_CRLF)
7976               XSETCAR (tail, AREF (eol_type, 1));
7977             else if (this_eol == EOL_SEEN_CR)
7978               XSETCAR (tail, AREF (eol_type, 2));
7979             else
7980               XSETCAR (tail, CODING_ID_NAME (id));
7981           }
7982         else
7983           XSETCAR (tail, CODING_ID_NAME (id));
7984       }
7985   }
7986
7987   return (highest ? XCAR (val) : val);
7988 }
7989
7990
7991 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
7992        2, 3, 0,
7993        doc: /* Detect coding system of the text in the region between START and END.
7994 Return a list of possible coding systems ordered by priority.
7995
7996 If only ASCII characters are found (except for such ISO-2022 control
7997 characters as ESC), it returns a list of single element `undecided'
7998 or its subsidiary coding system according to a detected end-of-line
7999 format.
8000
8001 If optional argument HIGHEST is non-nil, return the coding system of
8002 highest priority.  */)
8003      (start, end, highest)
8004      Lisp_Object start, end, highest;
8005 {
8006   int from, to;
8007   int from_byte, to_byte;
8008
8009   CHECK_NUMBER_COERCE_MARKER (start);
8010   CHECK_NUMBER_COERCE_MARKER (end);
8011
8012   validate_region (&start, &end);
8013   from = XINT (start), to = XINT (end);
8014   from_byte = CHAR_TO_BYTE (from);
8015   to_byte = CHAR_TO_BYTE (to);
8016
8017   if (from < GPT && to >= GPT)
8018     move_gap_both (to, to_byte);
8019
8020   return detect_coding_system (BYTE_POS_ADDR (from_byte),
8021                                to - from, to_byte - from_byte,
8022                                !NILP (highest),
8023                                !NILP (current_buffer
8024                                       ->enable_multibyte_characters),
8025                                Qnil);
8026 }
8027
8028 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
8029        1, 2, 0,
8030        doc: /* Detect coding system of the text in STRING.
8031 Return a list of possible coding systems ordered by priority.
8032
8033 If only ASCII characters are found (except for such ISO-2022 control
8034 characters as ESC), it returns a list of single element `undecided'
8035 or its subsidiary coding system according to a detected end-of-line
8036 format.
8037
8038 If optional argument HIGHEST is non-nil, return the coding system of
8039 highest priority.  */)
8040      (string, highest)
8041      Lisp_Object string, highest;
8042 {
8043   CHECK_STRING (string);
8044
8045   return detect_coding_system (SDATA (string),
8046                                SCHARS (string), SBYTES (string),
8047                                !NILP (highest), STRING_MULTIBYTE (string),
8048                                Qnil);
8049 }
8050
8051
8052 static INLINE int
8053 char_encodable_p (c, attrs)
8054      int c;
8055      Lisp_Object attrs;
8056 {
8057   Lisp_Object tail;
8058   struct charset *charset;
8059   Lisp_Object translation_table;
8060
8061   translation_table = CODING_ATTR_TRANS_TBL (attrs);
8062   if (! NILP (translation_table))
8063     c = translate_char (translation_table, c);
8064   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
8065        CONSP (tail); tail = XCDR (tail))
8066     {
8067       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
8068       if (CHAR_CHARSET_P (c, charset))
8069         break;
8070     }
8071   return (! NILP (tail));
8072 }
8073
8074
8075 /* Return a list of coding systems that safely encode the text between
8076    START and END.  If EXCLUDE is non-nil, it is a list of coding
8077    systems not to check.  The returned list doesn't contain any such
8078    coding systems.  In any case, if the text contains only ASCII or is
8079    unibyte, return t.  */
8080
8081 DEFUN ("find-coding-systems-region-internal",
8082        Ffind_coding_systems_region_internal,
8083        Sfind_coding_systems_region_internal, 2, 3, 0,
8084        doc: /* Internal use only.  */)
8085      (start, end, exclude)
8086      Lisp_Object start, end, exclude;
8087 {
8088   Lisp_Object coding_attrs_list, safe_codings;
8089   EMACS_INT start_byte, end_byte;
8090   const unsigned char *p, *pbeg, *pend;
8091   int c;
8092   Lisp_Object tail, elt;
8093
8094   if (STRINGP (start))
8095     {
8096       if (!STRING_MULTIBYTE (start)
8097           || SCHARS (start) == SBYTES (start))
8098         return Qt;
8099       start_byte = 0;
8100       end_byte = SBYTES (start);
8101     }
8102   else
8103     {
8104       CHECK_NUMBER_COERCE_MARKER (start);
8105       CHECK_NUMBER_COERCE_MARKER (end);
8106       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8107         args_out_of_range (start, end);
8108       if (NILP (current_buffer->enable_multibyte_characters))
8109         return Qt;
8110       start_byte = CHAR_TO_BYTE (XINT (start));
8111       end_byte = CHAR_TO_BYTE (XINT (end));
8112       if (XINT (end) - XINT (start) == end_byte - start_byte)
8113         return Qt;
8114
8115       if (XINT (start) < GPT && XINT (end) > GPT)
8116         {
8117           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8118             move_gap_both (XINT (start), start_byte);
8119           else
8120             move_gap_both (XINT (end), end_byte);
8121         }
8122     }
8123
8124   coding_attrs_list = Qnil;
8125   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
8126     if (NILP (exclude)
8127         || NILP (Fmemq (XCAR (tail), exclude)))
8128       {
8129         Lisp_Object attrs;
8130
8131         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
8132         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
8133             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
8134           {
8135             ASET (attrs, coding_attr_trans_tbl,
8136                   get_translation_table (attrs, 1, NULL));
8137             coding_attrs_list = Fcons (attrs, coding_attrs_list);
8138           }
8139       }
8140
8141   if (STRINGP (start))
8142     p = pbeg = SDATA (start);
8143   else
8144     p = pbeg = BYTE_POS_ADDR (start_byte);
8145   pend = p + (end_byte - start_byte);
8146
8147   while (p < pend && ASCII_BYTE_P (*p)) p++;
8148   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8149
8150   while (p < pend)
8151     {
8152       if (ASCII_BYTE_P (*p))
8153         p++;
8154       else
8155         {
8156           c = STRING_CHAR_ADVANCE (p);
8157
8158           charset_map_loaded = 0;
8159           for (tail = coding_attrs_list; CONSP (tail);)
8160             {
8161               elt = XCAR (tail);
8162               if (NILP (elt))
8163                 tail = XCDR (tail);
8164               else if (char_encodable_p (c, elt))
8165                 tail = XCDR (tail);
8166               else if (CONSP (XCDR (tail)))
8167                 {
8168                   XSETCAR (tail, XCAR (XCDR (tail)));
8169                   XSETCDR (tail, XCDR (XCDR (tail)));
8170                 }
8171               else
8172                 {
8173                   XSETCAR (tail, Qnil);
8174                   tail = XCDR (tail);
8175                 }
8176             }
8177           if (charset_map_loaded)
8178             {
8179               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8180
8181               if (STRINGP (start))
8182                 pbeg = SDATA (start);
8183               else
8184                 pbeg = BYTE_POS_ADDR (start_byte);
8185               p = pbeg + p_offset;
8186               pend = pbeg + pend_offset;
8187             }
8188         }
8189     }
8190
8191   safe_codings = list2 (Qraw_text, Qno_conversion);
8192   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
8193     if (! NILP (XCAR (tail)))
8194       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
8195
8196   return safe_codings;
8197 }
8198
8199
8200 DEFUN ("unencodable-char-position", Funencodable_char_position,
8201        Sunencodable_char_position, 3, 5, 0,
8202        doc: /*
8203 Return position of first un-encodable character in a region.
8204 START and END specify the region and CODING-SYSTEM specifies the
8205 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
8206
8207 If optional 4th argument COUNT is non-nil, it specifies at most how
8208 many un-encodable characters to search.  In this case, the value is a
8209 list of positions.
8210
8211 If optional 5th argument STRING is non-nil, it is a string to search
8212 for un-encodable characters.  In that case, START and END are indexes
8213 to the string.  */)
8214      (start, end, coding_system, count, string)
8215      Lisp_Object start, end, coding_system, count, string;
8216 {
8217   int n;
8218   struct coding_system coding;
8219   Lisp_Object attrs, charset_list, translation_table;
8220   Lisp_Object positions;
8221   int from, to;
8222   const unsigned char *p, *stop, *pend;
8223   int ascii_compatible;
8224
8225   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
8226   attrs = CODING_ID_ATTRS (coding.id);
8227   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
8228     return Qnil;
8229   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
8230   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8231   translation_table = get_translation_table (attrs, 1, NULL);
8232
8233   if (NILP (string))
8234     {
8235       validate_region (&start, &end);
8236       from = XINT (start);
8237       to = XINT (end);
8238       if (NILP (current_buffer->enable_multibyte_characters)
8239           || (ascii_compatible
8240               && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
8241         return Qnil;
8242       p = CHAR_POS_ADDR (from);
8243       pend = CHAR_POS_ADDR (to);
8244       if (from < GPT && to >= GPT)
8245         stop = GPT_ADDR;
8246       else
8247         stop = pend;
8248     }
8249   else
8250     {
8251       CHECK_STRING (string);
8252       CHECK_NATNUM (start);
8253       CHECK_NATNUM (end);
8254       from = XINT (start);
8255       to = XINT (end);
8256       if (from > to
8257           || to > SCHARS (string))
8258         args_out_of_range_3 (string, start, end);
8259       if (! STRING_MULTIBYTE (string))
8260         return Qnil;
8261       p = SDATA (string) + string_char_to_byte (string, from);
8262       stop = pend = SDATA (string) + string_char_to_byte (string, to);
8263       if (ascii_compatible && (to - from) == (pend - p))
8264         return Qnil;
8265     }
8266
8267   if (NILP (count))
8268     n = 1;
8269   else
8270     {
8271       CHECK_NATNUM (count);
8272       n = XINT (count);
8273     }
8274
8275   positions = Qnil;
8276   while (1)
8277     {
8278       int c;
8279
8280       if (ascii_compatible)
8281         while (p < stop && ASCII_BYTE_P (*p))
8282           p++, from++;
8283       if (p >= stop)
8284         {
8285           if (p >= pend)
8286             break;
8287           stop = pend;
8288           p = GAP_END_ADDR;
8289         }
8290
8291       c = STRING_CHAR_ADVANCE (p);
8292       if (! (ASCII_CHAR_P (c) && ascii_compatible)
8293           && ! char_charset (translate_char (translation_table, c),
8294                              charset_list, NULL))
8295         {
8296           positions = Fcons (make_number (from), positions);
8297           n--;
8298           if (n == 0)
8299             break;
8300         }
8301
8302       from++;
8303     }
8304
8305   return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
8306 }
8307
8308
8309 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
8310        Scheck_coding_systems_region, 3, 3, 0,
8311        doc: /* Check if the region is encodable by coding systems.
8312
8313 START and END are buffer positions specifying the region.
8314 CODING-SYSTEM-LIST is a list of coding systems to check.
8315
8316 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
8317 CODING-SYSTEM is a member of CODING-SYSTEM-LIST and can't encode the
8318 whole region, POS0, POS1, ... are buffer positions where non-encodable
8319 characters are found.
8320
8321 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
8322 value is nil.
8323
8324 START may be a string.  In that case, check if the string is
8325 encodable, and the value contains indices to the string instead of
8326 buffer positions.  END is ignored.  */)
8327      (start, end, coding_system_list)
8328      Lisp_Object start, end, coding_system_list;
8329 {
8330   Lisp_Object list;
8331   EMACS_INT start_byte, end_byte;
8332   int pos;
8333   const unsigned char *p, *pbeg, *pend;
8334   int c;
8335   Lisp_Object tail, elt, attrs;
8336
8337   if (STRINGP (start))
8338     {
8339       if (!STRING_MULTIBYTE (start)
8340           && SCHARS (start) != SBYTES (start))
8341         return Qnil;
8342       start_byte = 0;
8343       end_byte = SBYTES (start);
8344       pos = 0;
8345     }
8346   else
8347     {
8348       CHECK_NUMBER_COERCE_MARKER (start);
8349       CHECK_NUMBER_COERCE_MARKER (end);
8350       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8351         args_out_of_range (start, end);
8352       if (NILP (current_buffer->enable_multibyte_characters))
8353         return Qnil;
8354       start_byte = CHAR_TO_BYTE (XINT (start));
8355       end_byte = CHAR_TO_BYTE (XINT (end));
8356       if (XINT (end) - XINT (start) == end_byte - start_byte)
8357         return Qt;
8358
8359       if (XINT (start) < GPT && XINT (end) > GPT)
8360         {
8361           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8362             move_gap_both (XINT (start), start_byte);
8363           else
8364             move_gap_both (XINT (end), end_byte);
8365         }
8366       pos = XINT (start);
8367     }
8368
8369   list = Qnil;
8370   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
8371     {
8372       elt = XCAR (tail);
8373       attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
8374       ASET (attrs, coding_attr_trans_tbl,
8375             get_translation_table (attrs, 1, NULL));
8376       list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
8377     }
8378
8379   if (STRINGP (start))
8380     p = pbeg = SDATA (start);
8381   else
8382     p = pbeg = BYTE_POS_ADDR (start_byte);
8383   pend = p + (end_byte - start_byte);
8384
8385   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
8386   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8387
8388   while (p < pend)
8389     {
8390       if (ASCII_BYTE_P (*p))
8391         p++;
8392       else
8393         {
8394           c = STRING_CHAR_ADVANCE (p);
8395
8396           charset_map_loaded = 0;
8397           for (tail = list; CONSP (tail); tail = XCDR (tail))
8398             {
8399               elt = XCDR (XCAR (tail));
8400               if (! char_encodable_p (c, XCAR (elt)))
8401                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
8402             }
8403           if (charset_map_loaded)
8404             {
8405               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8406
8407               if (STRINGP (start))
8408                 pbeg = SDATA (start);
8409               else
8410                 pbeg = BYTE_POS_ADDR (start_byte);
8411               p = pbeg + p_offset;
8412               pend = pbeg + pend_offset;
8413             }
8414         }
8415       pos++;
8416     }
8417
8418   tail = list;
8419   list = Qnil;
8420   for (; CONSP (tail); tail = XCDR (tail))
8421     {
8422       elt = XCAR (tail);
8423       if (CONSP (XCDR (XCDR (elt))))
8424         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
8425                       list);
8426     }
8427
8428   return list;
8429 }
8430
8431
8432 Lisp_Object
8433 code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
8434      Lisp_Object start, end, coding_system, dst_object;
8435      int encodep, norecord;
8436 {
8437   struct coding_system coding;
8438   EMACS_INT from, from_byte, to, to_byte;
8439   Lisp_Object src_object;
8440
8441   CHECK_NUMBER_COERCE_MARKER (start);
8442   CHECK_NUMBER_COERCE_MARKER (end);
8443   if (NILP (coding_system))
8444     coding_system = Qno_conversion;
8445   else
8446     CHECK_CODING_SYSTEM (coding_system);
8447   src_object = Fcurrent_buffer ();
8448   if (NILP (dst_object))
8449     dst_object = src_object;
8450   else if (! EQ (dst_object, Qt))
8451     CHECK_BUFFER (dst_object);
8452
8453   validate_region (&start, &end);
8454   from = XFASTINT (start);
8455   from_byte = CHAR_TO_BYTE (from);
8456   to = XFASTINT (end);
8457   to_byte = CHAR_TO_BYTE (to);
8458
8459   setup_coding_system (coding_system, &coding);
8460   coding.mode |= CODING_MODE_LAST_BLOCK;
8461
8462   if (encodep)
8463     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8464                           dst_object);
8465   else
8466     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8467                           dst_object);
8468   if (! norecord)
8469     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
8470
8471   return (BUFFERP (dst_object)
8472           ? make_number (coding.produced_char)
8473           : coding.dst_object);
8474 }
8475
8476
8477 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
8478        3, 4, "r\nzCoding system: ",
8479        doc: /* Decode the current region from the specified coding system.
8480 When called from a program, takes four arguments:
8481         START, END, CODING-SYSTEM, and DESTINATION.
8482 START and END are buffer positions.
8483
8484 Optional 4th arguments DESTINATION specifies where the decoded text goes.
8485 If nil, the region between START and END is replaced by the decoded text.
8486 If buffer, the decoded text is inserted in that buffer after point (point
8487 does not move).
8488 In those cases, the length of the decoded text is returned.
8489 If DESTINATION is t, the decoded text is returned.
8490
8491 This function sets `last-coding-system-used' to the precise coding system
8492 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8493 not fully specified.)  */)
8494      (start, end, coding_system, destination)
8495      Lisp_Object start, end, coding_system, destination;
8496 {
8497   return code_convert_region (start, end, coding_system, destination, 0, 0);
8498 }
8499
8500 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
8501        3, 4, "r\nzCoding system: ",
8502        doc: /* Encode the current region by specified coding system.
8503 When called from a program, takes four arguments:
8504         START, END, CODING-SYSTEM and DESTINATION.
8505 START and END are buffer positions.
8506
8507 Optional 4th arguments DESTINATION specifies where the encoded text goes.
8508 If nil, the region between START and END is replace by the encoded text.
8509 If buffer, the encoded text is inserted in that buffer after point (point
8510 does not move).
8511 In those cases, the length of the encoded text is returned.
8512 If DESTINATION is t, the encoded text is returned.
8513
8514 This function sets `last-coding-system-used' to the precise coding system
8515 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8516 not fully specified.)  */)
8517   (start, end, coding_system, destination)
8518      Lisp_Object start, end, coding_system, destination;
8519 {
8520   return code_convert_region (start, end, coding_system, destination, 1, 0);
8521 }
8522
8523 Lisp_Object
8524 code_convert_string (string, coding_system, dst_object,
8525                      encodep, nocopy, norecord)
8526      Lisp_Object string, coding_system, dst_object;
8527      int encodep, nocopy, norecord;
8528 {
8529   struct coding_system coding;
8530   EMACS_INT chars, bytes;
8531
8532   CHECK_STRING (string);
8533   if (NILP (coding_system))
8534     {
8535       if (! norecord)
8536         Vlast_coding_system_used = Qno_conversion;
8537       if (NILP (dst_object))
8538         return (nocopy ? Fcopy_sequence (string) : string);
8539     }
8540
8541   if (NILP (coding_system))
8542     coding_system = Qno_conversion;
8543   else
8544     CHECK_CODING_SYSTEM (coding_system);
8545   if (NILP (dst_object))
8546     dst_object = Qt;
8547   else if (! EQ (dst_object, Qt))
8548     CHECK_BUFFER (dst_object);
8549
8550   setup_coding_system (coding_system, &coding);
8551   coding.mode |= CODING_MODE_LAST_BLOCK;
8552   chars = SCHARS (string);
8553   bytes = SBYTES (string);
8554   if (encodep)
8555     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
8556   else
8557     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
8558   if (! norecord)
8559     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
8560
8561   return (BUFFERP (dst_object)
8562           ? make_number (coding.produced_char)
8563           : coding.dst_object);
8564 }
8565
8566
8567 /* Encode or decode STRING according to CODING_SYSTEM.
8568    Do not set Vlast_coding_system_used.
8569
8570    This function is called only from macros DECODE_FILE and
8571    ENCODE_FILE, thus we ignore character composition.  */
8572
8573 Lisp_Object
8574 code_convert_string_norecord (string, coding_system, encodep)
8575      Lisp_Object string, coding_system;
8576      int encodep;
8577 {
8578   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
8579 }
8580
8581
8582 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
8583        2, 4, 0,
8584        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
8585
8586 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
8587 if the decoding operation is trivial.
8588
8589 Optional fourth arg BUFFER non-nil means that the decoded text is
8590 inserted in that buffer after point (point does not move).  In this
8591 case, the return value is the length of the decoded text.
8592
8593 This function sets `last-coding-system-used' to the precise coding system
8594 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8595 not fully specified.)  */)
8596   (string, coding_system, nocopy, buffer)
8597      Lisp_Object string, coding_system, nocopy, buffer;
8598 {
8599   return code_convert_string (string, coding_system, buffer,
8600                               0, ! NILP (nocopy), 0);
8601 }
8602
8603 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
8604        2, 4, 0,
8605        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
8606
8607 Optional third arg NOCOPY non-nil means it is OK to return STRING
8608 itself if the encoding operation is trivial.
8609
8610 Optional fourth arg BUFFER non-nil means that the encoded text is
8611 inserted in that buffer after point (point does not move).  In this
8612 case, the return value is the length of the encoded text.
8613
8614 This function sets `last-coding-system-used' to the precise coding system
8615 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8616 not fully specified.)  */)
8617      (string, coding_system, nocopy, buffer)
8618      Lisp_Object string, coding_system, nocopy, buffer;
8619 {
8620   return code_convert_string (string, coding_system, buffer,
8621                               1, ! NILP (nocopy), 1);
8622 }
8623
8624 \f
8625 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
8626        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
8627 Return the corresponding character.  */)
8628      (code)
8629      Lisp_Object code;
8630 {
8631   Lisp_Object spec, attrs, val;
8632   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
8633   int c;
8634
8635   CHECK_NATNUM (code);
8636   c = XFASTINT (code);
8637   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
8638   attrs = AREF (spec, 0);
8639
8640   if (ASCII_BYTE_P (c)
8641       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8642     return code;
8643
8644   val = CODING_ATTR_CHARSET_LIST (attrs);
8645   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8646   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8647   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
8648
8649   if (c <= 0x7F)
8650     charset = charset_roman;
8651   else if (c >= 0xA0 && c < 0xDF)
8652     {
8653       charset = charset_kana;
8654       c -= 0x80;
8655     }
8656   else
8657     {
8658       int s1 = c >> 8, s2 = c & 0xFF;
8659
8660       if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
8661           || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
8662         error ("Invalid code: %d", code);
8663       SJIS_TO_JIS (c);
8664       charset = charset_kanji;
8665     }
8666   c = DECODE_CHAR (charset, c);
8667   if (c < 0)
8668     error ("Invalid code: %d", code);
8669   return make_number (c);
8670 }
8671
8672
8673 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
8674        doc: /* Encode a Japanese character CH to shift_jis encoding.
8675 Return the corresponding code in SJIS.  */)
8676      (ch)
8677     Lisp_Object ch;
8678 {
8679   Lisp_Object spec, attrs, charset_list;
8680   int c;
8681   struct charset *charset;
8682   unsigned code;
8683
8684   CHECK_CHARACTER (ch);
8685   c = XFASTINT (ch);
8686   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
8687   attrs = AREF (spec, 0);
8688
8689   if (ASCII_CHAR_P (c)
8690       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8691     return ch;
8692
8693   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8694   charset = char_charset (c, charset_list, &code);
8695   if (code == CHARSET_INVALID_CODE (charset))
8696     error ("Can't encode by shift_jis encoding: %d", c);
8697   JIS_TO_SJIS (code);
8698
8699   return make_number (code);
8700 }
8701
8702 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
8703        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
8704 Return the corresponding character.  */)
8705      (code)
8706      Lisp_Object code;
8707 {
8708   Lisp_Object spec, attrs, val;
8709   struct charset *charset_roman, *charset_big5, *charset;
8710   int c;
8711
8712   CHECK_NATNUM (code);
8713   c = XFASTINT (code);
8714   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8715   attrs = AREF (spec, 0);
8716
8717   if (ASCII_BYTE_P (c)
8718       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8719     return code;
8720
8721   val = CODING_ATTR_CHARSET_LIST (attrs);
8722   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8723   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
8724
8725   if (c <= 0x7F)
8726     charset = charset_roman;
8727   else
8728     {
8729       int b1 = c >> 8, b2 = c & 0x7F;
8730       if (b1 < 0xA1 || b1 > 0xFE
8731           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
8732         error ("Invalid code: %d", code);
8733       charset = charset_big5;
8734     }
8735   c = DECODE_CHAR (charset, (unsigned )c);
8736   if (c < 0)
8737     error ("Invalid code: %d", code);
8738   return make_number (c);
8739 }
8740
8741 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
8742        doc: /* Encode the Big5 character CH to BIG5 coding system.
8743 Return the corresponding character code in Big5.  */)
8744      (ch)
8745      Lisp_Object ch;
8746 {
8747   Lisp_Object spec, attrs, charset_list;
8748   struct charset *charset;
8749   int c;
8750   unsigned code;
8751
8752   CHECK_CHARACTER (ch);
8753   c = XFASTINT (ch);
8754   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8755   attrs = AREF (spec, 0);
8756   if (ASCII_CHAR_P (c)
8757       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8758     return ch;
8759
8760   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8761   charset = char_charset (c, charset_list, &code);
8762   if (code == CHARSET_INVALID_CODE (charset))
8763     error ("Can't encode by Big5 encoding: %d", c);
8764
8765   return make_number (code);
8766 }
8767
8768 \f
8769 DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
8770        Sset_terminal_coding_system_internal, 1, 2, 0,
8771        doc: /* Internal use only.  */)
8772      (coding_system, terminal)
8773      Lisp_Object coding_system;
8774      Lisp_Object terminal;
8775 {
8776   struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
8777   CHECK_SYMBOL (coding_system);
8778   setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
8779   /* We had better not send unsafe characters to terminal.  */
8780   terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
8781   /* Characer composition should be disabled.  */
8782   terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8783   terminal_coding->src_multibyte = 1;
8784   terminal_coding->dst_multibyte = 0;
8785   return Qnil;
8786 }
8787
8788 DEFUN ("set-safe-terminal-coding-system-internal",
8789        Fset_safe_terminal_coding_system_internal,
8790        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
8791        doc: /* Internal use only.  */)
8792      (coding_system)
8793      Lisp_Object coding_system;
8794 {
8795   CHECK_SYMBOL (coding_system);
8796   setup_coding_system (Fcheck_coding_system (coding_system),
8797                        &safe_terminal_coding);
8798   /* Characer composition should be disabled.  */
8799   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8800   safe_terminal_coding.src_multibyte = 1;
8801   safe_terminal_coding.dst_multibyte = 0;
8802   return Qnil;
8803 }
8804
8805 DEFUN ("terminal-coding-system", Fterminal_coding_system,
8806        Sterminal_coding_system, 0, 1, 0,
8807        doc: /* Return coding system specified for terminal output on the given terminal.
8808 TERMINAL may be a terminal id, a frame, or nil for the selected
8809 frame's terminal device.  */)
8810      (terminal)
8811      Lisp_Object terminal;
8812 {
8813   struct coding_system *terminal_coding
8814     = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
8815   Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
8816
8817   /* For backward compatibility, return nil if it is `undecided'. */
8818   return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
8819 }
8820
8821 DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
8822        Sset_keyboard_coding_system_internal, 1, 2, 0,
8823        doc: /* Internal use only.  */)
8824      (coding_system, terminal)
8825      Lisp_Object coding_system;
8826      Lisp_Object terminal;
8827 {
8828   struct terminal *t = get_terminal (terminal, 1);
8829   CHECK_SYMBOL (coding_system);
8830   setup_coding_system (Fcheck_coding_system (coding_system),
8831                        TERMINAL_KEYBOARD_CODING (t));
8832   /* Characer composition should be disabled.  */
8833   TERMINAL_KEYBOARD_CODING (t)->common_flags
8834     &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8835   return Qnil;
8836 }
8837
8838 DEFUN ("keyboard-coding-system",
8839        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 1, 0,
8840        doc: /* Return coding system specified for decoding keyboard input.  */)
8841      (terminal)
8842      Lisp_Object terminal;
8843 {
8844   return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING
8845                          (get_terminal (terminal, 1))->id);
8846 }
8847
8848 \f
8849 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
8850        Sfind_operation_coding_system,  1, MANY, 0,
8851        doc: /* Choose a coding system for an operation based on the target name.
8852 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
8853 DECODING-SYSTEM is the coding system to use for decoding
8854 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
8855 for encoding (in case OPERATION does encoding).
8856
8857 The first argument OPERATION specifies an I/O primitive:
8858   For file I/O, `insert-file-contents' or `write-region'.
8859   For process I/O, `call-process', `call-process-region', or `start-process'.
8860   For network I/O, `open-network-stream'.
8861
8862 The remaining arguments should be the same arguments that were passed
8863 to the primitive.  Depending on which primitive, one of those arguments
8864 is selected as the TARGET.  For example, if OPERATION does file I/O,
8865 whichever argument specifies the file name is TARGET.
8866
8867 TARGET has a meaning which depends on OPERATION:
8868   For file I/O, TARGET is a file name (except for the special case below).
8869   For process I/O, TARGET is a process name.
8870   For network I/O, TARGET is a service name or a port number.
8871
8872 This function looks up what is specified for TARGET in
8873 `file-coding-system-alist', `process-coding-system-alist',
8874 or `network-coding-system-alist' depending on OPERATION.
8875 They may specify a coding system, a cons of coding systems,
8876 or a function symbol to call.
8877 In the last case, we call the function with one argument,
8878 which is a list of all the arguments given to this function.
8879 If the function can't decide a coding system, it can return
8880 `undecided' so that the normal code-detection is performed.
8881
8882 If OPERATION is `insert-file-contents', the argument corresponding to
8883 TARGET may be a cons (FILENAME . BUFFER).  In that case, FILENAME is a
8884 file name to look up, and BUFFER is a buffer that contains the file's
8885 contents (not yet decoded).  If `file-coding-system-alist' specifies a
8886 function to call for FILENAME, that function should examine the
8887 contents of BUFFER instead of reading the file.
8888
8889 usage: (find-operation-coding-system OPERATION ARGUMENTS...)  */)
8890      (nargs, args)
8891      int nargs;
8892      Lisp_Object *args;
8893 {
8894   Lisp_Object operation, target_idx, target, val;
8895   register Lisp_Object chain;
8896
8897   if (nargs < 2)
8898     error ("Too few arguments");
8899   operation = args[0];
8900   if (!SYMBOLP (operation)
8901       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
8902     error ("Invalid first argument");
8903   if (nargs < 1 + XINT (target_idx))
8904     error ("Too few arguments for operation: %s",
8905            SDATA (SYMBOL_NAME (operation)));
8906   target = args[XINT (target_idx) + 1];
8907   if (!(STRINGP (target)
8908         || (EQ (operation, Qinsert_file_contents) && CONSP (target)
8909             && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
8910         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
8911     error ("Invalid %dth argument", XINT (target_idx) + 1);
8912   if (CONSP (target))
8913     target = XCAR (target);
8914
8915   chain = ((EQ (operation, Qinsert_file_contents)
8916             || EQ (operation, Qwrite_region))
8917            ? Vfile_coding_system_alist
8918            : (EQ (operation, Qopen_network_stream)
8919               ? Vnetwork_coding_system_alist
8920               : Vprocess_coding_system_alist));
8921   if (NILP (chain))
8922     return Qnil;
8923
8924   for (; CONSP (chain); chain = XCDR (chain))
8925     {
8926       Lisp_Object elt;
8927
8928       elt = XCAR (chain);
8929       if (CONSP (elt)
8930           && ((STRINGP (target)
8931                && STRINGP (XCAR (elt))
8932                && fast_string_match (XCAR (elt), target) >= 0)
8933               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
8934         {
8935           val = XCDR (elt);
8936           /* Here, if VAL is both a valid coding system and a valid
8937              function symbol, we return VAL as a coding system.  */
8938           if (CONSP (val))
8939             return val;
8940           if (! SYMBOLP (val))
8941             return Qnil;
8942           if (! NILP (Fcoding_system_p (val)))
8943             return Fcons (val, val);
8944           if (! NILP (Ffboundp (val)))
8945             {
8946               /* We use call1 rather than safe_call1
8947                  so as to get bug reports about functions called here
8948                  which don't handle the current interface.  */
8949               val = call1 (val, Flist (nargs, args));
8950               if (CONSP (val))
8951                 return val;
8952               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
8953                 return Fcons (val, val);
8954             }
8955           return Qnil;
8956         }
8957     }
8958   return Qnil;
8959 }
8960
8961 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
8962        Sset_coding_system_priority, 0, MANY, 0,
8963        doc: /* Assign higher priority to the coding systems given as arguments.
8964 If multiple coding systems belong to the same category,
8965 all but the first one are ignored.
8966
8967 usage: (set-coding-system-priority &rest coding-systems)  */)
8968      (nargs, args)
8969      int nargs;
8970      Lisp_Object *args;
8971 {
8972   int i, j;
8973   int changed[coding_category_max];
8974   enum coding_category priorities[coding_category_max];
8975
8976   bzero (changed, sizeof changed);
8977
8978   for (i = j = 0; i < nargs; i++)
8979     {
8980       enum coding_category category;
8981       Lisp_Object spec, attrs;
8982
8983       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
8984       attrs = AREF (spec, 0);
8985       category = XINT (CODING_ATTR_CATEGORY (attrs));
8986       if (changed[category])
8987         /* Ignore this coding system because a coding system of the
8988            same category already had a higher priority.  */
8989         continue;
8990       changed[category] = 1;
8991       priorities[j++] = category;
8992       if (coding_categories[category].id >= 0
8993           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
8994         setup_coding_system (args[i], &coding_categories[category]);
8995       Fset (AREF (Vcoding_category_table, category), args[i]);
8996     }
8997
8998   /* Now we have decided top J priorities.  Reflect the order of the
8999      original priorities to the remaining priorities.  */
9000
9001   for (i = j, j = 0; i < coding_category_max; i++, j++)
9002     {
9003       while (j < coding_category_max
9004              && changed[coding_priorities[j]])
9005         j++;
9006       if (j == coding_category_max)
9007         abort ();
9008       priorities[i] = coding_priorities[j];
9009     }
9010
9011   bcopy (priorities, coding_priorities, sizeof priorities);
9012
9013   /* Update `coding-category-list'.  */
9014   Vcoding_category_list = Qnil;
9015   for (i = coding_category_max - 1; i >= 0; i--)
9016     Vcoding_category_list
9017       = Fcons (AREF (Vcoding_category_table, priorities[i]),
9018                Vcoding_category_list);
9019
9020   return Qnil;
9021 }
9022
9023 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
9024        Scoding_system_priority_list, 0, 1, 0,
9025        doc: /* Return a list of coding systems ordered by their priorities.
9026 HIGHESTP non-nil means just return the highest priority one.  */)
9027      (highestp)
9028      Lisp_Object highestp;
9029 {
9030   int i;
9031   Lisp_Object val;
9032
9033   for (i = 0, val = Qnil; i < coding_category_max; i++)
9034     {
9035       enum coding_category category = coding_priorities[i];
9036       int id = coding_categories[category].id;
9037       Lisp_Object attrs;
9038
9039       if (id < 0)
9040         continue;
9041       attrs = CODING_ID_ATTRS (id);
9042       if (! NILP (highestp))
9043         return CODING_ATTR_BASE_NAME (attrs);
9044       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
9045     }
9046   return Fnreverse (val);
9047 }
9048
9049 static char *suffixes[] = { "-unix", "-dos", "-mac" };
9050
9051 static Lisp_Object
9052 make_subsidiaries (base)
9053      Lisp_Object base;
9054 {
9055   Lisp_Object subsidiaries;
9056   int base_name_len = SBYTES (SYMBOL_NAME (base));
9057   char *buf = (char *) alloca (base_name_len + 6);
9058   int i;
9059
9060   bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
9061   subsidiaries = Fmake_vector (make_number (3), Qnil);
9062   for (i = 0; i < 3; i++)
9063     {
9064       bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
9065       ASET (subsidiaries, i, intern (buf));
9066     }
9067   return subsidiaries;
9068 }
9069
9070
9071 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
9072        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
9073        doc: /* For internal use only.
9074 usage: (define-coding-system-internal ...)  */)
9075      (nargs, args)
9076      int nargs;
9077      Lisp_Object *args;
9078 {
9079   Lisp_Object name;
9080   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
9081   Lisp_Object attrs;            /* Vector of attributes.  */
9082   Lisp_Object eol_type;
9083   Lisp_Object aliases;
9084   Lisp_Object coding_type, charset_list, safe_charsets;
9085   enum coding_category category;
9086   Lisp_Object tail, val;
9087   int max_charset_id = 0;
9088   int i;
9089
9090   if (nargs < coding_arg_max)
9091     goto short_args;
9092
9093   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
9094
9095   name = args[coding_arg_name];
9096   CHECK_SYMBOL (name);
9097   CODING_ATTR_BASE_NAME (attrs) = name;
9098
9099   val = args[coding_arg_mnemonic];
9100   if (! STRINGP (val))
9101     CHECK_CHARACTER (val);
9102   CODING_ATTR_MNEMONIC (attrs) = val;
9103
9104   coding_type = args[coding_arg_coding_type];
9105   CHECK_SYMBOL (coding_type);
9106   CODING_ATTR_TYPE (attrs) = coding_type;
9107
9108   charset_list = args[coding_arg_charset_list];
9109   if (SYMBOLP (charset_list))
9110     {
9111       if (EQ (charset_list, Qiso_2022))
9112         {
9113           if (! EQ (coding_type, Qiso_2022))
9114             error ("Invalid charset-list");
9115           charset_list = Viso_2022_charset_list;
9116         }
9117       else if (EQ (charset_list, Qemacs_mule))
9118         {
9119           if (! EQ (coding_type, Qemacs_mule))
9120             error ("Invalid charset-list");
9121           charset_list = Vemacs_mule_charset_list;
9122         }
9123       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9124         if (max_charset_id < XFASTINT (XCAR (tail)))
9125           max_charset_id = XFASTINT (XCAR (tail));
9126     }
9127   else
9128     {
9129       charset_list = Fcopy_sequence (charset_list);
9130       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9131         {
9132           struct charset *charset;
9133
9134           val = XCAR (tail);
9135           CHECK_CHARSET_GET_CHARSET (val, charset);
9136           if (EQ (coding_type, Qiso_2022)
9137               ? CHARSET_ISO_FINAL (charset) < 0
9138               : EQ (coding_type, Qemacs_mule)
9139               ? CHARSET_EMACS_MULE_ID (charset) < 0
9140               : 0)
9141             error ("Can't handle charset `%s'",
9142                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9143
9144           XSETCAR (tail, make_number (charset->id));
9145           if (max_charset_id < charset->id)
9146             max_charset_id = charset->id;
9147         }
9148     }
9149   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
9150
9151   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
9152                                 make_number (255));
9153   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9154     SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
9155   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
9156
9157   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
9158
9159   val = args[coding_arg_decode_translation_table];
9160   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9161     CHECK_SYMBOL (val);
9162   CODING_ATTR_DECODE_TBL (attrs) = val;
9163
9164   val = args[coding_arg_encode_translation_table];
9165   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9166     CHECK_SYMBOL (val);
9167   CODING_ATTR_ENCODE_TBL (attrs) = val;
9168
9169   val = args[coding_arg_post_read_conversion];
9170   CHECK_SYMBOL (val);
9171   CODING_ATTR_POST_READ (attrs) = val;
9172
9173   val = args[coding_arg_pre_write_conversion];
9174   CHECK_SYMBOL (val);
9175   CODING_ATTR_PRE_WRITE (attrs) = val;
9176
9177   val = args[coding_arg_default_char];
9178   if (NILP (val))
9179     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
9180   else
9181     {
9182       CHECK_CHARACTER (val);
9183       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9184     }
9185
9186   val = args[coding_arg_for_unibyte];
9187   CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
9188
9189   val = args[coding_arg_plist];
9190   CHECK_LIST (val);
9191   CODING_ATTR_PLIST (attrs) = val;
9192
9193   if (EQ (coding_type, Qcharset))
9194     {
9195       /* Generate a lisp vector of 256 elements.  Each element is nil,
9196          integer, or a list of charset IDs.
9197
9198          If Nth element is nil, the byte code N is invalid in this
9199          coding system.
9200
9201          If Nth element is a number NUM, N is the first byte of a
9202          charset whose ID is NUM.
9203
9204          If Nth element is a list of charset IDs, N is the first byte
9205          of one of them.  The list is sorted by dimensions of the
9206          charsets.  A charset of smaller dimension comes firtst. */
9207       val = Fmake_vector (make_number (256), Qnil);
9208
9209       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9210         {
9211           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
9212           int dim = CHARSET_DIMENSION (charset);
9213           int idx = (dim - 1) * 4;
9214
9215           if (CHARSET_ASCII_COMPATIBLE_P (charset))
9216             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9217
9218           for (i = charset->code_space[idx];
9219                i <= charset->code_space[idx + 1]; i++)
9220             {
9221               Lisp_Object tmp, tmp2;
9222               int dim2;
9223
9224               tmp = AREF (val, i);
9225               if (NILP (tmp))
9226                 tmp = XCAR (tail);
9227               else if (NUMBERP (tmp))
9228                 {
9229                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
9230                   if (dim < dim2)
9231                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
9232                   else
9233                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
9234                 }
9235               else
9236                 {
9237                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
9238                     {
9239                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
9240                       if (dim < dim2)
9241                         break;
9242                     }
9243                   if (NILP (tmp2))
9244                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
9245                   else
9246                     {
9247                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
9248                       XSETCAR (tmp2, XCAR (tail));
9249                     }
9250                 }
9251               ASET (val, i, tmp);
9252             }
9253         }
9254       ASET (attrs, coding_attr_charset_valids, val);
9255       category = coding_category_charset;
9256     }
9257   else if (EQ (coding_type, Qccl))
9258     {
9259       Lisp_Object valids;
9260
9261       if (nargs < coding_arg_ccl_max)
9262         goto short_args;
9263
9264       val = args[coding_arg_ccl_decoder];
9265       CHECK_CCL_PROGRAM (val);
9266       if (VECTORP (val))
9267         val = Fcopy_sequence (val);
9268       ASET (attrs, coding_attr_ccl_decoder, val);
9269
9270       val = args[coding_arg_ccl_encoder];
9271       CHECK_CCL_PROGRAM (val);
9272       if (VECTORP (val))
9273         val = Fcopy_sequence (val);
9274       ASET (attrs, coding_attr_ccl_encoder, val);
9275
9276       val = args[coding_arg_ccl_valids];
9277       valids = Fmake_string (make_number (256), make_number (0));
9278       for (tail = val; !NILP (tail); tail = Fcdr (tail))
9279         {
9280           int from, to;
9281
9282           val = Fcar (tail);
9283           if (INTEGERP (val))
9284             {
9285               from = to = XINT (val);
9286               if (from < 0 || from > 255)
9287                 args_out_of_range_3 (val, make_number (0), make_number (255));
9288             }
9289           else
9290             {
9291               CHECK_CONS (val);
9292               CHECK_NATNUM_CAR (val);
9293               CHECK_NATNUM_CDR (val);
9294               from = XINT (XCAR (val));
9295               if (from > 255)
9296                 args_out_of_range_3 (XCAR (val),
9297                                      make_number (0), make_number (255));
9298               to = XINT (XCDR (val));
9299               if (to < from || to > 255)
9300                 args_out_of_range_3 (XCDR (val),
9301                                      XCAR (val), make_number (255));
9302             }
9303           for (i = from; i <= to; i++)
9304             SSET (valids, i, 1);
9305         }
9306       ASET (attrs, coding_attr_ccl_valids, valids);
9307
9308       category = coding_category_ccl;
9309     }
9310   else if (EQ (coding_type, Qutf_16))
9311     {
9312       Lisp_Object bom, endian;
9313
9314       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9315
9316       if (nargs < coding_arg_utf16_max)
9317         goto short_args;
9318
9319       bom = args[coding_arg_utf16_bom];
9320       if (! NILP (bom) && ! EQ (bom, Qt))
9321         {
9322           CHECK_CONS (bom);
9323           val = XCAR (bom);
9324           CHECK_CODING_SYSTEM (val);
9325           val = XCDR (bom);
9326           CHECK_CODING_SYSTEM (val);
9327         }
9328       ASET (attrs, coding_attr_utf_bom, bom);
9329
9330       endian = args[coding_arg_utf16_endian];
9331       CHECK_SYMBOL (endian);
9332       if (NILP (endian))
9333         endian = Qbig;
9334       else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
9335         error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
9336       ASET (attrs, coding_attr_utf_16_endian, endian);
9337
9338       category = (CONSP (bom)
9339                   ? coding_category_utf_16_auto
9340                   : NILP (bom)
9341                   ? (EQ (endian, Qbig)
9342                      ? coding_category_utf_16_be_nosig
9343                      : coding_category_utf_16_le_nosig)
9344                   : (EQ (endian, Qbig)
9345                      ? coding_category_utf_16_be
9346                      : coding_category_utf_16_le));
9347     }
9348   else if (EQ (coding_type, Qiso_2022))
9349     {
9350       Lisp_Object initial, reg_usage, request, flags;
9351       int i;
9352
9353       if (nargs < coding_arg_iso2022_max)
9354         goto short_args;
9355
9356       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
9357       CHECK_VECTOR (initial);
9358       for (i = 0; i < 4; i++)
9359         {
9360           val = Faref (initial, make_number (i));
9361           if (! NILP (val))
9362             {
9363               struct charset *charset;
9364
9365               CHECK_CHARSET_GET_CHARSET (val, charset);
9366               ASET (initial, i, make_number (CHARSET_ID (charset)));
9367               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
9368                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9369             }
9370           else
9371             ASET (initial, i, make_number (-1));
9372         }
9373
9374       reg_usage = args[coding_arg_iso2022_reg_usage];
9375       CHECK_CONS (reg_usage);
9376       CHECK_NUMBER_CAR (reg_usage);
9377       CHECK_NUMBER_CDR (reg_usage);
9378
9379       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
9380       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
9381         {
9382           int id;
9383           Lisp_Object tmp;
9384
9385           val = Fcar (tail);
9386           CHECK_CONS (val);
9387           tmp = XCAR (val);
9388           CHECK_CHARSET_GET_ID (tmp, id);
9389           CHECK_NATNUM_CDR (val);
9390           if (XINT (XCDR (val)) >= 4)
9391             error ("Invalid graphic register number: %d", XINT (XCDR (val)));
9392           XSETCAR (val, make_number (id));
9393         }
9394
9395       flags = args[coding_arg_iso2022_flags];
9396       CHECK_NATNUM (flags);
9397       i = XINT (flags);
9398       if (EQ (args[coding_arg_charset_list], Qiso_2022))
9399         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
9400
9401       ASET (attrs, coding_attr_iso_initial, initial);
9402       ASET (attrs, coding_attr_iso_usage, reg_usage);
9403       ASET (attrs, coding_attr_iso_request, request);
9404       ASET (attrs, coding_attr_iso_flags, flags);
9405       setup_iso_safe_charsets (attrs);
9406
9407       if (i & CODING_ISO_FLAG_SEVEN_BITS)
9408         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
9409                           | CODING_ISO_FLAG_SINGLE_SHIFT))
9410                     ? coding_category_iso_7_else
9411                     : EQ (args[coding_arg_charset_list], Qiso_2022)
9412                     ? coding_category_iso_7
9413                     : coding_category_iso_7_tight);
9414       else
9415         {
9416           int id = XINT (AREF (initial, 1));
9417
9418           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
9419                        || EQ (args[coding_arg_charset_list], Qiso_2022)
9420                        || id < 0)
9421                       ? coding_category_iso_8_else
9422                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
9423                       ? coding_category_iso_8_1
9424                       : coding_category_iso_8_2);
9425         }
9426       if (category != coding_category_iso_8_1
9427           && category != coding_category_iso_8_2)
9428         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9429     }
9430   else if (EQ (coding_type, Qemacs_mule))
9431     {
9432       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
9433         ASET (attrs, coding_attr_emacs_mule_full, Qt);
9434       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9435       category = coding_category_emacs_mule;
9436     }
9437   else if (EQ (coding_type, Qshift_jis))
9438     {
9439
9440       struct charset *charset;
9441
9442       if (XINT (Flength (charset_list)) != 3
9443           && XINT (Flength (charset_list)) != 4)
9444         error ("There should be three or four charsets");
9445
9446       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9447       if (CHARSET_DIMENSION (charset) != 1)
9448         error ("Dimension of charset %s is not one",
9449                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9450       if (CHARSET_ASCII_COMPATIBLE_P (charset))
9451         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9452
9453       charset_list = XCDR (charset_list);
9454       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9455       if (CHARSET_DIMENSION (charset) != 1)
9456         error ("Dimension of charset %s is not one",
9457                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9458
9459       charset_list = XCDR (charset_list);
9460       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9461       if (CHARSET_DIMENSION (charset) != 2)
9462         error ("Dimension of charset %s is not two",
9463                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9464
9465       charset_list = XCDR (charset_list);
9466       if (! NILP (charset_list))
9467         {
9468           charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9469           if (CHARSET_DIMENSION (charset) != 2)
9470             error ("Dimension of charset %s is not two",
9471                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9472         }
9473
9474       category = coding_category_sjis;
9475       Vsjis_coding_system = name;
9476     }
9477   else if (EQ (coding_type, Qbig5))
9478     {
9479       struct charset *charset;
9480
9481       if (XINT (Flength (charset_list)) != 2)
9482         error ("There should be just two charsets");
9483
9484       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9485       if (CHARSET_DIMENSION (charset) != 1)
9486         error ("Dimension of charset %s is not one",
9487                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9488       if (CHARSET_ASCII_COMPATIBLE_P (charset))
9489         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9490
9491       charset_list = XCDR (charset_list);
9492       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9493       if (CHARSET_DIMENSION (charset) != 2)
9494         error ("Dimension of charset %s is not two",
9495                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9496
9497       category = coding_category_big5;
9498       Vbig5_coding_system = name;
9499     }
9500   else if (EQ (coding_type, Qraw_text))
9501     {
9502       category = coding_category_raw_text;
9503       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9504     }
9505   else if (EQ (coding_type, Qutf_8))
9506     {
9507       Lisp_Object bom;
9508
9509       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9510
9511       if (nargs < coding_arg_utf8_max)
9512         goto short_args;
9513
9514       bom = args[coding_arg_utf8_bom];
9515       if (! NILP (bom) && ! EQ (bom, Qt))
9516         {
9517           CHECK_CONS (bom);
9518           val = XCAR (bom);
9519           CHECK_CODING_SYSTEM (val);
9520           val = XCDR (bom);
9521           CHECK_CODING_SYSTEM (val);
9522         }
9523       ASET (attrs, coding_attr_utf_bom, bom);
9524
9525       category = (CONSP (bom) ? coding_category_utf_8_auto
9526                   : NILP (bom) ? coding_category_utf_8_nosig
9527                   : coding_category_utf_8_sig);
9528     }
9529   else if (EQ (coding_type, Qundecided))
9530     category = coding_category_undecided;
9531   else
9532     error ("Invalid coding system type: %s",
9533            SDATA (SYMBOL_NAME (coding_type)));
9534
9535   CODING_ATTR_CATEGORY (attrs) = make_number (category);
9536   CODING_ATTR_PLIST (attrs)
9537     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
9538                                 CODING_ATTR_PLIST (attrs)));
9539   CODING_ATTR_PLIST (attrs)
9540     = Fcons (QCascii_compatible_p,
9541              Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
9542                     CODING_ATTR_PLIST (attrs)));
9543
9544   eol_type = args[coding_arg_eol_type];
9545   if (! NILP (eol_type)
9546       && ! EQ (eol_type, Qunix)
9547       && ! EQ (eol_type, Qdos)
9548       && ! EQ (eol_type, Qmac))
9549     error ("Invalid eol-type");
9550
9551   aliases = Fcons (name, Qnil);
9552
9553   if (NILP (eol_type))
9554     {
9555       eol_type = make_subsidiaries (name);
9556       for (i = 0; i < 3; i++)
9557         {
9558           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
9559
9560           this_name = AREF (eol_type, i);
9561           this_aliases = Fcons (this_name, Qnil);
9562           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
9563           this_spec = Fmake_vector (make_number (3), attrs);
9564           ASET (this_spec, 1, this_aliases);
9565           ASET (this_spec, 2, this_eol_type);
9566           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
9567           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
9568           val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist);
9569           if (NILP (val))
9570             Vcoding_system_alist
9571               = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
9572                        Vcoding_system_alist);
9573         }
9574     }
9575
9576   spec_vec = Fmake_vector (make_number (3), attrs);
9577   ASET (spec_vec, 1, aliases);
9578   ASET (spec_vec, 2, eol_type);
9579
9580   Fputhash (name, spec_vec, Vcoding_system_hash_table);
9581   Vcoding_system_list = Fcons (name, Vcoding_system_list);
9582   val = Fassoc (Fsymbol_name (name), Vcoding_system_alist);
9583   if (NILP (val))
9584     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
9585                                   Vcoding_system_alist);
9586
9587   {
9588     int id = coding_categories[category].id;
9589
9590     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
9591       setup_coding_system (name, &coding_categories[category]);
9592   }
9593
9594   return Qnil;
9595
9596  short_args:
9597   return Fsignal (Qwrong_number_of_arguments,
9598                   Fcons (intern ("define-coding-system-internal"),
9599                          make_number (nargs)));
9600 }
9601
9602
9603 DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
9604        3, 3, 0,
9605        doc: /* Change value in CODING-SYSTEM's property list PROP to VAL.  */)
9606   (coding_system, prop, val)
9607      Lisp_Object coding_system, prop, val;
9608 {
9609   Lisp_Object spec, attrs;
9610
9611   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9612   attrs = AREF (spec, 0);
9613   if (EQ (prop, QCmnemonic))
9614     {
9615       if (! STRINGP (val))
9616         CHECK_CHARACTER (val);
9617       CODING_ATTR_MNEMONIC (attrs) = val;
9618     }
9619   else if (EQ (prop, QCdefalut_char))
9620     {
9621       if (NILP (val))
9622         val = make_number (' ');
9623       else
9624         CHECK_CHARACTER (val);
9625       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9626     }
9627   else if (EQ (prop, QCdecode_translation_table))
9628     {
9629       if (! CHAR_TABLE_P (val) && ! CONSP (val))
9630         CHECK_SYMBOL (val);
9631       CODING_ATTR_DECODE_TBL (attrs) = val;
9632     }
9633   else if (EQ (prop, QCencode_translation_table))
9634     {
9635       if (! CHAR_TABLE_P (val) && ! CONSP (val))
9636         CHECK_SYMBOL (val);
9637       CODING_ATTR_ENCODE_TBL (attrs) = val;
9638     }
9639   else if (EQ (prop, QCpost_read_conversion))
9640     {
9641       CHECK_SYMBOL (val);
9642       CODING_ATTR_POST_READ (attrs) = val;
9643     }
9644   else if (EQ (prop, QCpre_write_conversion))
9645     {
9646       CHECK_SYMBOL (val);
9647       CODING_ATTR_PRE_WRITE (attrs) = val;
9648     }
9649   else if (EQ (prop, QCascii_compatible_p))
9650     {
9651       CODING_ATTR_ASCII_COMPAT (attrs) = val;
9652     }
9653
9654   CODING_ATTR_PLIST (attrs)
9655     = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
9656   return val;
9657 }
9658
9659
9660 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
9661        Sdefine_coding_system_alias, 2, 2, 0,
9662        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
9663      (alias, coding_system)
9664      Lisp_Object alias, coding_system;
9665 {
9666   Lisp_Object spec, aliases, eol_type, val;
9667
9668   CHECK_SYMBOL (alias);
9669   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9670   aliases = AREF (spec, 1);
9671   /* ALIASES should be a list of length more than zero, and the first
9672      element is a base coding system.  Append ALIAS at the tail of the
9673      list.  */
9674   while (!NILP (XCDR (aliases)))
9675     aliases = XCDR (aliases);
9676   XSETCDR (aliases, Fcons (alias, Qnil));
9677
9678   eol_type = AREF (spec, 2);
9679   if (VECTORP (eol_type))
9680     {
9681       Lisp_Object subsidiaries;
9682       int i;
9683
9684       subsidiaries = make_subsidiaries (alias);
9685       for (i = 0; i < 3; i++)
9686         Fdefine_coding_system_alias (AREF (subsidiaries, i),
9687                                      AREF (eol_type, i));
9688     }
9689
9690   Fputhash (alias, spec, Vcoding_system_hash_table);
9691   Vcoding_system_list = Fcons (alias, Vcoding_system_list);
9692   val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist);
9693   if (NILP (val))
9694     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
9695                                   Vcoding_system_alist);
9696
9697   return Qnil;
9698 }
9699
9700 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
9701        1, 1, 0,
9702        doc: /* Return the base of CODING-SYSTEM.
9703 Any alias or subsidiary coding system is not a base coding system.  */)
9704   (coding_system)
9705      Lisp_Object coding_system;
9706 {
9707   Lisp_Object spec, attrs;
9708
9709   if (NILP (coding_system))
9710     return (Qno_conversion);
9711   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9712   attrs = AREF (spec, 0);
9713   return CODING_ATTR_BASE_NAME (attrs);
9714 }
9715
9716 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
9717        1, 1, 0,
9718        doc: "Return the property list of CODING-SYSTEM.")
9719      (coding_system)
9720      Lisp_Object coding_system;
9721 {
9722   Lisp_Object spec, attrs;
9723
9724   if (NILP (coding_system))
9725     coding_system = Qno_conversion;
9726   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9727   attrs = AREF (spec, 0);
9728   return CODING_ATTR_PLIST (attrs);
9729 }
9730
9731
9732 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
9733        1, 1, 0,
9734        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
9735      (coding_system)
9736      Lisp_Object coding_system;
9737 {
9738   Lisp_Object spec;
9739
9740   if (NILP (coding_system))
9741     coding_system = Qno_conversion;
9742   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9743   return AREF (spec, 1);
9744 }
9745
9746 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
9747        Scoding_system_eol_type, 1, 1, 0,
9748        doc: /* Return eol-type of CODING-SYSTEM.
9749 An eol-type is an integer 0, 1, 2, or a vector of coding systems.
9750
9751 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
9752 and CR respectively.
9753
9754 A vector value indicates that a format of end-of-line should be
9755 detected automatically.  Nth element of the vector is the subsidiary
9756 coding system whose eol-type is N.  */)
9757      (coding_system)
9758      Lisp_Object coding_system;
9759 {
9760   Lisp_Object spec, eol_type;
9761   int n;
9762
9763   if (NILP (coding_system))
9764     coding_system = Qno_conversion;
9765   if (! CODING_SYSTEM_P (coding_system))
9766     return Qnil;
9767   spec = CODING_SYSTEM_SPEC (coding_system);
9768   eol_type = AREF (spec, 2);
9769   if (VECTORP (eol_type))
9770     return Fcopy_sequence (eol_type);
9771   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
9772   return make_number (n);
9773 }
9774
9775 #endif /* emacs */
9776
9777 \f
9778 /*** 9. Post-amble ***/
9779
9780 void
9781 init_coding_once ()
9782 {
9783   int i;
9784
9785   for (i = 0; i < coding_category_max; i++)
9786     {
9787       coding_categories[i].id = -1;
9788       coding_priorities[i] = i;
9789     }
9790
9791   /* ISO2022 specific initialize routine.  */
9792   for (i = 0; i < 0x20; i++)
9793     iso_code_class[i] = ISO_control_0;
9794   for (i = 0x21; i < 0x7F; i++)
9795     iso_code_class[i] = ISO_graphic_plane_0;
9796   for (i = 0x80; i < 0xA0; i++)
9797     iso_code_class[i] = ISO_control_1;
9798   for (i = 0xA1; i < 0xFF; i++)
9799     iso_code_class[i] = ISO_graphic_plane_1;
9800   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
9801   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
9802   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
9803   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
9804   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
9805   iso_code_class[ISO_CODE_ESC] = ISO_escape;
9806   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
9807   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
9808   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
9809
9810   for (i = 0; i < 256; i++)
9811     {
9812       emacs_mule_bytes[i] = 1;
9813     }
9814   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
9815   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
9816   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
9817   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
9818 }
9819
9820 #ifdef emacs
9821
9822 void
9823 syms_of_coding ()
9824 {
9825   staticpro (&Vcoding_system_hash_table);
9826   {
9827     Lisp_Object args[2];
9828     args[0] = QCtest;
9829     args[1] = Qeq;
9830     Vcoding_system_hash_table = Fmake_hash_table (2, args);
9831   }
9832
9833   staticpro (&Vsjis_coding_system);
9834   Vsjis_coding_system = Qnil;
9835
9836   staticpro (&Vbig5_coding_system);
9837   Vbig5_coding_system = Qnil;
9838
9839   staticpro (&Vcode_conversion_reused_workbuf);
9840   Vcode_conversion_reused_workbuf = Qnil;
9841
9842   staticpro (&Vcode_conversion_workbuf_name);
9843   Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
9844
9845   reused_workbuf_in_use = 0;
9846
9847   DEFSYM (Qcharset, "charset");
9848   DEFSYM (Qtarget_idx, "target-idx");
9849   DEFSYM (Qcoding_system_history, "coding-system-history");
9850   Fset (Qcoding_system_history, Qnil);
9851
9852   /* Target FILENAME is the first argument.  */
9853   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9854   /* Target FILENAME is the third argument.  */
9855   Fput (Qwrite_region, Qtarget_idx, make_number (2));
9856
9857   DEFSYM (Qcall_process, "call-process");
9858   /* Target PROGRAM is the first argument.  */
9859   Fput (Qcall_process, Qtarget_idx, make_number (0));
9860
9861   DEFSYM (Qcall_process_region, "call-process-region");
9862   /* Target PROGRAM is the third argument.  */
9863   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
9864
9865   DEFSYM (Qstart_process, "start-process");
9866   /* Target PROGRAM is the third argument.  */
9867   Fput (Qstart_process, Qtarget_idx, make_number (2));
9868
9869   DEFSYM (Qopen_network_stream, "open-network-stream");
9870   /* Target SERVICE is the fourth argument.  */
9871   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
9872
9873   DEFSYM (Qcoding_system, "coding-system");
9874   DEFSYM (Qcoding_aliases, "coding-aliases");
9875
9876   DEFSYM (Qeol_type, "eol-type");
9877   DEFSYM (Qunix, "unix");
9878   DEFSYM (Qdos, "dos");
9879
9880   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
9881   DEFSYM (Qpost_read_conversion, "post-read-conversion");
9882   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
9883   DEFSYM (Qdefault_char, "default-char");
9884   DEFSYM (Qundecided, "undecided");
9885   DEFSYM (Qno_conversion, "no-conversion");
9886   DEFSYM (Qraw_text, "raw-text");
9887
9888   DEFSYM (Qiso_2022, "iso-2022");
9889
9890   DEFSYM (Qutf_8, "utf-8");
9891   DEFSYM (Qutf_8_emacs, "utf-8-emacs");
9892
9893   DEFSYM (Qutf_16, "utf-16");
9894   DEFSYM (Qbig, "big");
9895   DEFSYM (Qlittle, "little");
9896
9897   DEFSYM (Qshift_jis, "shift-jis");
9898   DEFSYM (Qbig5, "big5");
9899
9900   DEFSYM (Qcoding_system_p, "coding-system-p");
9901
9902   DEFSYM (Qcoding_system_error, "coding-system-error");
9903   Fput (Qcoding_system_error, Qerror_conditions,
9904         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
9905   Fput (Qcoding_system_error, Qerror_message,
9906         build_string ("Invalid coding system"));
9907
9908   /* Intern this now in case it isn't already done.
9909      Setting this variable twice is harmless.
9910      But don't staticpro it here--that is done in alloc.c.  */
9911   Qchar_table_extra_slots = intern ("char-table-extra-slots");
9912
9913   DEFSYM (Qtranslation_table, "translation-table");
9914   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
9915   DEFSYM (Qtranslation_table_id, "translation-table-id");
9916   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
9917   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
9918
9919   DEFSYM (Qvalid_codes, "valid-codes");
9920
9921   DEFSYM (Qemacs_mule, "emacs-mule");
9922
9923   DEFSYM (QCcategory, ":category");
9924   DEFSYM (QCmnemonic, ":mnemonic");
9925   DEFSYM (QCdefalut_char, ":default-char");
9926   DEFSYM (QCdecode_translation_table, ":decode-translation-table");
9927   DEFSYM (QCencode_translation_table, ":encode-translation-table");
9928   DEFSYM (QCpost_read_conversion, ":post-read-conversion");
9929   DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
9930   DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
9931
9932   Vcoding_category_table
9933     = Fmake_vector (make_number (coding_category_max), Qnil);
9934   staticpro (&Vcoding_category_table);
9935   /* Followings are target of code detection.  */
9936   ASET (Vcoding_category_table, coding_category_iso_7,
9937         intern ("coding-category-iso-7"));
9938   ASET (Vcoding_category_table, coding_category_iso_7_tight,
9939         intern ("coding-category-iso-7-tight"));
9940   ASET (Vcoding_category_table, coding_category_iso_8_1,
9941         intern ("coding-category-iso-8-1"));
9942   ASET (Vcoding_category_table, coding_category_iso_8_2,
9943         intern ("coding-category-iso-8-2"));
9944   ASET (Vcoding_category_table, coding_category_iso_7_else,
9945         intern ("coding-category-iso-7-else"));
9946   ASET (Vcoding_category_table, coding_category_iso_8_else,
9947         intern ("coding-category-iso-8-else"));
9948   ASET (Vcoding_category_table, coding_category_utf_8_auto,
9949         intern ("coding-category-utf-8-auto"));
9950   ASET (Vcoding_category_table, coding_category_utf_8_nosig,
9951         intern ("coding-category-utf-8"));
9952   ASET (Vcoding_category_table, coding_category_utf_8_sig,
9953         intern ("coding-category-utf-8-sig"));
9954   ASET (Vcoding_category_table, coding_category_utf_16_be,
9955         intern ("coding-category-utf-16-be"));
9956   ASET (Vcoding_category_table, coding_category_utf_16_auto,
9957         intern ("coding-category-utf-16-auto"));
9958   ASET (Vcoding_category_table, coding_category_utf_16_le,
9959         intern ("coding-category-utf-16-le"));
9960   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
9961         intern ("coding-category-utf-16-be-nosig"));
9962   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
9963         intern ("coding-category-utf-16-le-nosig"));
9964   ASET (Vcoding_category_table, coding_category_charset,
9965         intern ("coding-category-charset"));
9966   ASET (Vcoding_category_table, coding_category_sjis,
9967         intern ("coding-category-sjis"));
9968   ASET (Vcoding_category_table, coding_category_big5,
9969         intern ("coding-category-big5"));
9970   ASET (Vcoding_category_table, coding_category_ccl,
9971         intern ("coding-category-ccl"));
9972   ASET (Vcoding_category_table, coding_category_emacs_mule,
9973         intern ("coding-category-emacs-mule"));
9974   /* Followings are NOT target of code detection.  */
9975   ASET (Vcoding_category_table, coding_category_raw_text,
9976         intern ("coding-category-raw-text"));
9977   ASET (Vcoding_category_table, coding_category_undecided,
9978         intern ("coding-category-undecided"));
9979
9980   DEFSYM (Qinsufficient_source, "insufficient-source");
9981   DEFSYM (Qinconsistent_eol, "inconsistent-eol");
9982   DEFSYM (Qinvalid_source, "invalid-source");
9983   DEFSYM (Qinterrupted, "interrupted");
9984   DEFSYM (Qinsufficient_memory, "insufficient-memory");
9985   DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
9986
9987   defsubr (&Scoding_system_p);
9988   defsubr (&Sread_coding_system);
9989   defsubr (&Sread_non_nil_coding_system);
9990   defsubr (&Scheck_coding_system);
9991   defsubr (&Sdetect_coding_region);
9992   defsubr (&Sdetect_coding_string);
9993   defsubr (&Sfind_coding_systems_region_internal);
9994   defsubr (&Sunencodable_char_position);
9995   defsubr (&Scheck_coding_systems_region);
9996   defsubr (&Sdecode_coding_region);
9997   defsubr (&Sencode_coding_region);
9998   defsubr (&Sdecode_coding_string);
9999   defsubr (&Sencode_coding_string);
10000   defsubr (&Sdecode_sjis_char);
10001   defsubr (&Sencode_sjis_char);
10002   defsubr (&Sdecode_big5_char);
10003   defsubr (&Sencode_big5_char);
10004   defsubr (&Sset_terminal_coding_system_internal);
10005   defsubr (&Sset_safe_terminal_coding_system_internal);
10006   defsubr (&Sterminal_coding_system);
10007   defsubr (&Sset_keyboard_coding_system_internal);
10008   defsubr (&Skeyboard_coding_system);
10009   defsubr (&Sfind_operation_coding_system);
10010   defsubr (&Sset_coding_system_priority);
10011   defsubr (&Sdefine_coding_system_internal);
10012   defsubr (&Sdefine_coding_system_alias);
10013   defsubr (&Scoding_system_put);
10014   defsubr (&Scoding_system_base);
10015   defsubr (&Scoding_system_plist);
10016   defsubr (&Scoding_system_aliases);
10017   defsubr (&Scoding_system_eol_type);
10018   defsubr (&Scoding_system_priority_list);
10019
10020   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
10021                doc: /* List of coding systems.
10022
10023 Do not alter the value of this variable manually.  This variable should be
10024 updated by the functions `define-coding-system' and
10025 `define-coding-system-alias'.  */);
10026   Vcoding_system_list = Qnil;
10027
10028   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
10029                doc: /* Alist of coding system names.
10030 Each element is one element list of coding system name.
10031 This variable is given to `completing-read' as COLLECTION argument.
10032
10033 Do not alter the value of this variable manually.  This variable should be
10034 updated by the functions `make-coding-system' and
10035 `define-coding-system-alias'.  */);
10036   Vcoding_system_alist = Qnil;
10037
10038   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
10039                doc: /* List of coding-categories (symbols) ordered by priority.
10040
10041 On detecting a coding system, Emacs tries code detection algorithms
10042 associated with each coding-category one by one in this order.  When
10043 one algorithm agrees with a byte sequence of source text, the coding
10044 system bound to the corresponding coding-category is selected.
10045
10046 Don't modify this variable directly, but use `set-coding-priority'.  */);
10047   {
10048     int i;
10049
10050     Vcoding_category_list = Qnil;
10051     for (i = coding_category_max - 1; i >= 0; i--)
10052       Vcoding_category_list
10053         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
10054                  Vcoding_category_list);
10055   }
10056
10057   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
10058                doc: /* Specify the coding system for read operations.
10059 It is useful to bind this variable with `let', but do not set it globally.
10060 If the value is a coding system, it is used for decoding on read operation.
10061 If not, an appropriate element is used from one of the coding system alists.
10062 There are three such tables: `file-coding-system-alist',
10063 `process-coding-system-alist', and `network-coding-system-alist'.  */);
10064   Vcoding_system_for_read = Qnil;
10065
10066   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
10067                doc: /* Specify the coding system for write operations.
10068 Programs bind this variable with `let', but you should not set it globally.
10069 If the value is a coding system, it is used for encoding of output,
10070 when writing it to a file and when sending it to a file or subprocess.
10071
10072 If this does not specify a coding system, an appropriate element
10073 is used from one of the coding system alists.
10074 There are three such tables: `file-coding-system-alist',
10075 `process-coding-system-alist', and `network-coding-system-alist'.
10076 For output to files, if the above procedure does not specify a coding system,
10077 the value of `buffer-file-coding-system' is used.  */);
10078   Vcoding_system_for_write = Qnil;
10079
10080   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
10081                doc: /*
10082 Coding system used in the latest file or process I/O.  */);
10083   Vlast_coding_system_used = Qnil;
10084
10085   DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
10086                doc: /*
10087 Error status of the last code conversion.
10088
10089 When an error was detected in the last code conversion, this variable
10090 is set to one of the following symbols.
10091   `insufficient-source'
10092   `inconsistent-eol'
10093   `invalid-source'
10094   `interrupted'
10095   `insufficient-memory'
10096 When no error was detected, the value doesn't change.  So, to check
10097 the error status of a code conversion by this variable, you must
10098 explicitly set this variable to nil before performing code
10099 conversion.  */);
10100   Vlast_code_conversion_error = Qnil;
10101
10102   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
10103                doc: /*
10104 *Non-nil means always inhibit code conversion of end-of-line format.
10105 See info node `Coding Systems' and info node `Text and Binary' concerning
10106 such conversion.  */);
10107   inhibit_eol_conversion = 0;
10108
10109   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
10110                doc: /*
10111 Non-nil means process buffer inherits coding system of process output.
10112 Bind it to t if the process output is to be treated as if it were a file
10113 read from some filesystem.  */);
10114   inherit_process_coding_system = 0;
10115
10116   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
10117                doc: /*
10118 Alist to decide a coding system to use for a file I/O operation.
10119 The format is ((PATTERN . VAL) ...),
10120 where PATTERN is a regular expression matching a file name,
10121 VAL is a coding system, a cons of coding systems, or a function symbol.
10122 If VAL is a coding system, it is used for both decoding and encoding
10123 the file contents.
10124 If VAL is a cons of coding systems, the car part is used for decoding,
10125 and the cdr part is used for encoding.
10126 If VAL is a function symbol, the function must return a coding system
10127 or a cons of coding systems which are used as above.  The function is
10128 called with an argument that is a list of the arguments with which
10129 `find-operation-coding-system' was called.  If the function can't decide
10130 a coding system, it can return `undecided' so that the normal
10131 code-detection is performed.
10132
10133 See also the function `find-operation-coding-system'
10134 and the variable `auto-coding-alist'.  */);
10135   Vfile_coding_system_alist = Qnil;
10136
10137   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
10138                doc: /*
10139 Alist to decide a coding system to use for a process I/O operation.
10140 The format is ((PATTERN . VAL) ...),
10141 where PATTERN is a regular expression matching a program name,
10142 VAL is a coding system, a cons of coding systems, or a function symbol.
10143 If VAL is a coding system, it is used for both decoding what received
10144 from the program and encoding what sent to the program.
10145 If VAL is a cons of coding systems, the car part is used for decoding,
10146 and the cdr part is used for encoding.
10147 If VAL is a function symbol, the function must return a coding system
10148 or a cons of coding systems which are used as above.
10149
10150 See also the function `find-operation-coding-system'.  */);
10151   Vprocess_coding_system_alist = Qnil;
10152
10153   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
10154                doc: /*
10155 Alist to decide a coding system to use for a network I/O operation.
10156 The format is ((PATTERN . VAL) ...),
10157 where PATTERN is a regular expression matching a network service name
10158 or is a port number to connect to,
10159 VAL is a coding system, a cons of coding systems, or a function symbol.
10160 If VAL is a coding system, it is used for both decoding what received
10161 from the network stream and encoding what sent to the network stream.
10162 If VAL is a cons of coding systems, the car part is used for decoding,
10163 and the cdr part is used for encoding.
10164 If VAL is a function symbol, the function must return a coding system
10165 or a cons of coding systems which are used as above.
10166
10167 See also the function `find-operation-coding-system'.  */);
10168   Vnetwork_coding_system_alist = Qnil;
10169
10170   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
10171                doc: /* Coding system to use with system messages.
10172 Also used for decoding keyboard input on X Window system.  */);
10173   Vlocale_coding_system = Qnil;
10174
10175   /* The eol mnemonics are reset in startup.el system-dependently.  */
10176   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
10177                doc: /*
10178 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
10179   eol_mnemonic_unix = build_string (":");
10180
10181   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
10182                doc: /*
10183 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
10184   eol_mnemonic_dos = build_string ("\\");
10185
10186   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
10187                doc: /*
10188 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
10189   eol_mnemonic_mac = build_string ("/");
10190
10191   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
10192                doc: /*
10193 *String displayed in mode line when end-of-line format is not yet determined.  */);
10194   eol_mnemonic_undecided = build_string (":");
10195
10196   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
10197                doc: /*
10198 *Non-nil enables character translation while encoding and decoding.  */);
10199   Venable_character_translation = Qt;
10200
10201   DEFVAR_LISP ("standard-translation-table-for-decode",
10202                &Vstandard_translation_table_for_decode,
10203                doc: /* Table for translating characters while decoding.  */);
10204   Vstandard_translation_table_for_decode = Qnil;
10205
10206   DEFVAR_LISP ("standard-translation-table-for-encode",
10207                &Vstandard_translation_table_for_encode,
10208                doc: /* Table for translating characters while encoding.  */);
10209   Vstandard_translation_table_for_encode = Qnil;
10210
10211   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
10212                doc: /* Alist of charsets vs revision numbers.
10213 While encoding, if a charset (car part of an element) is found,
10214 designate it with the escape sequence identifying revision (cdr part
10215 of the element).  */);
10216   Vcharset_revision_table = Qnil;
10217
10218   DEFVAR_LISP ("default-process-coding-system",
10219                &Vdefault_process_coding_system,
10220                doc: /* Cons of coding systems used for process I/O by default.
10221 The car part is used for decoding a process output,
10222 the cdr part is used for encoding a text to be sent to a process.  */);
10223   Vdefault_process_coding_system = Qnil;
10224
10225   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
10226                doc: /*
10227 Table of extra Latin codes in the range 128..159 (inclusive).
10228 This is a vector of length 256.
10229 If Nth element is non-nil, the existence of code N in a file
10230 \(or output of subprocess) doesn't prevent it to be detected as
10231 a coding system of ISO 2022 variant which has a flag
10232 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
10233 or reading output of a subprocess.
10234 Only 128th through 159th elements have a meaning.  */);
10235   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
10236
10237   DEFVAR_LISP ("select-safe-coding-system-function",
10238                &Vselect_safe_coding_system_function,
10239                doc: /*
10240 Function to call to select safe coding system for encoding a text.
10241
10242 If set, this function is called to force a user to select a proper
10243 coding system which can encode the text in the case that a default
10244 coding system used in each operation can't encode the text.  The
10245 function should take care that the buffer is not modified while
10246 the coding system is being selected.
10247
10248 The default value is `select-safe-coding-system' (which see).  */);
10249   Vselect_safe_coding_system_function = Qnil;
10250
10251   DEFVAR_BOOL ("coding-system-require-warning",
10252                &coding_system_require_warning,
10253                doc: /* Internal use only.
10254 If non-nil, on writing a file, `select-safe-coding-system-function' is
10255 called even if `coding-system-for-write' is non-nil.  The command
10256 `universal-coding-system-argument' binds this variable to t temporarily.  */);
10257   coding_system_require_warning = 0;
10258
10259
10260   DEFVAR_BOOL ("inhibit-iso-escape-detection",
10261                &inhibit_iso_escape_detection,
10262                doc: /*
10263 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
10264
10265 By default, on reading a file, Emacs tries to detect how the text is
10266 encoded.  This code detection is sensitive to escape sequences.  If
10267 the sequence is valid as ISO2022, the code is determined as one of
10268 the ISO2022 encodings, and the file is decoded by the corresponding
10269 coding system (e.g. `iso-2022-7bit').
10270
10271 However, there may be a case that you want to read escape sequences in
10272 a file as is.  In such a case, you can set this variable to non-nil.
10273 Then, as the code detection ignores any escape sequences, no file is
10274 detected as encoded in some ISO2022 encoding.  The result is that all
10275 escape sequences become visible in a buffer.
10276
10277 The default value is nil, and it is strongly recommended not to change
10278 it.  That is because many Emacs Lisp source files that contain
10279 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
10280 in Emacs's distribution, and they won't be decoded correctly on
10281 reading if you suppress escape sequence detection.
10282
10283 The other way to read escape sequences in a file without decoding is
10284 to explicitly specify some coding system that doesn't use ISO2022's
10285 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
10286   inhibit_iso_escape_detection = 0;
10287
10288   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
10289                doc: /* Char table for translating self-inserting characters.
10290 This is applied to the result of input methods, not their input.
10291 See also `keyboard-translate-table'.  */);
10292     Vtranslation_table_for_input = Qnil;
10293
10294   {
10295     Lisp_Object args[coding_arg_max];
10296     Lisp_Object plist[16];
10297     int i;
10298
10299     for (i = 0; i < coding_arg_max; i++)
10300       args[i] = Qnil;
10301
10302     plist[0] = intern (":name");
10303     plist[1] = args[coding_arg_name] = Qno_conversion;
10304     plist[2] = intern (":mnemonic");
10305     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
10306     plist[4] = intern (":coding-type");
10307     plist[5] = args[coding_arg_coding_type] = Qraw_text;
10308     plist[6] = intern (":ascii-compatible-p");
10309     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
10310     plist[8] = intern (":default-char");
10311     plist[9] = args[coding_arg_default_char] = make_number (0);
10312     plist[10] = intern (":for-unibyte");
10313     plist[11] = args[coding_arg_for_unibyte] = Qt;
10314     plist[12] = intern (":docstring");
10315     plist[13] = build_string ("Do no conversion.\n\
10316 \n\
10317 When you visit a file with this coding, the file is read into a\n\
10318 unibyte buffer as is, thus each byte of a file is treated as a\n\
10319 character.");
10320     plist[14] = intern (":eol-type");
10321     plist[15] = args[coding_arg_eol_type] = Qunix;
10322     args[coding_arg_plist] = Flist (16, plist);
10323     Fdefine_coding_system_internal (coding_arg_max, args);
10324
10325     plist[1] = args[coding_arg_name] = Qundecided;
10326     plist[3] = args[coding_arg_mnemonic] = make_number ('-');
10327     plist[5] = args[coding_arg_coding_type] = Qundecided;
10328     /* This is already set.
10329        plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
10330     plist[8] = intern (":charset-list");
10331     plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
10332     plist[11] = args[coding_arg_for_unibyte] = Qnil;
10333     plist[13] = build_string ("No conversion on encoding, automatic conversion on decoding.");
10334     plist[15] = args[coding_arg_eol_type] = Qnil;
10335     args[coding_arg_plist] = Flist (16, plist);
10336     Fdefine_coding_system_internal (coding_arg_max, args);
10337   }
10338
10339   setup_coding_system (Qno_conversion, &safe_terminal_coding);
10340
10341   {
10342     int i;
10343
10344     for (i = 0; i < coding_category_max; i++)
10345       Fset (AREF (Vcoding_category_table, i), Qno_conversion);
10346   }
10347 #if defined (MSDOS) || defined (WINDOWSNT)
10348   system_eol_type = Qdos;
10349 #else
10350   system_eol_type = Qunix;
10351 #endif
10352   staticpro (&system_eol_type);
10353 }
10354
10355 char *
10356 emacs_strerror (error_number)
10357      int error_number;
10358 {
10359   char *str;
10360
10361   synchronize_system_messages_locale ();
10362   str = strerror (error_number);
10363
10364   if (! NILP (Vlocale_coding_system))
10365     {
10366       Lisp_Object dec = code_convert_string_norecord (build_string (str),
10367                                                       Vlocale_coding_system,
10368                                                       0);
10369       str = (char *) SDATA (dec);
10370     }
10371
10372   return str;
10373 }
10374
10375 #endif /* emacs */
10376
10377 /* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
10378    (do not change this comment) */