src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 2001, 2002, 2003, 2004, 2005,
   3                  2006, 2007, 2008, 2009 Free Software Foundation, Inc.
   4    Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   5      2005, 2006, 2007, 2008, 2009
   6      National Institute of Advanced Industrial Science and Technology (AIST)
   7      Registration Number H14PRO021
   8    Copyright (C) 2003
   9      National Institute of Advanced Industrial Science and Technology (AIST)
  10      Registration Number H13PRO009
  11
  12 This file is part of GNU Emacs.
  13
  14 GNU Emacs is free software: you can redistribute it and/or modify
  15 it under the terms of the GNU General Public License as published by
  16 the Free Software Foundation, either version 3 of the License, or
  17 (at your option) any later version.
  18
  19 GNU Emacs is distributed in the hope that it will be useful,
  20 but WITHOUT ANY WARRANTY; without even the implied warranty of
  21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22 GNU General Public License for more details.
  23
  24 You should have received a copy of the GNU General Public License
  25 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  26
  27 /*** TABLE OF CONTENTS ***
  28
  29   0. General comments
  30   1. Preamble
  31   2. Emacs' internal format (emacs-utf-8) handlers
  32   3. UTF-8 handlers
  33   4. UTF-16 handlers
  34   5. Charset-base coding systems handlers
  35   6. emacs-mule (old Emacs' internal format) handlers
  36   7. ISO2022 handlers
  37   8. Shift-JIS and BIG5 handlers
  38   9. CCL handlers
  39   10. C library functions
  40   11. Emacs Lisp library functions
  41   12. Postamble
  42
  43 */
  44
  45 /*** 0. General comments ***
  46
  47
  48 CODING SYSTEM
  49
  50   A coding system is an object for an encoding mechanism that contains
  51   information about how to convert byte sequences to character
  52   sequences and vice versa.  When we say "decode", it means converting
  53   a byte sequence of a specific coding system into a character
  54   sequence that is represented by Emacs' internal coding system
  55   `emacs-utf-8', and when we say "encode", it means converting a
  56   character sequence of emacs-utf-8 to a byte sequence of a specific
  57   coding system.
  58
  59   In Emacs Lisp, a coding system is represented by a Lisp symbol.  In
  60   C level, a coding system is represented by a vector of attributes
  61   stored in the hash table Vcharset_hash_table.  The conversion from
  62   coding system symbol to attributes vector is done by looking up
  63   Vcharset_hash_table by the symbol.
  64
  65   Coding systems are classified into the following types depending on
  66   the encoding mechanism.  Here's a brief description of the types.
  67
  68   o UTF-8
  69
  70   o UTF-16
  71
  72   o Charset-base coding system
  73
  74   A coding system defined by one or more (coded) character sets.
  75   Decoding and encoding are done by a code converter defined for each
  76   character set.
  77
  78   o Old Emacs internal format (emacs-mule)
  79
  80   The coding system adopted by old versions of Emacs (20 and 21).
  81
  82   o ISO2022-base coding system
  83
  84   The most famous coding system for multiple character sets.  X's
  85   Compound Text, various EUCs (Extended Unix Code), and coding systems
  86   used in the Internet communication such as ISO-2022-JP are all
  87   variants of ISO2022.
  88
  89   o SJIS (or Shift-JIS or MS-Kanji-Code)
  90
  91   A coding system to encode character sets: ASCII, JISX0201, and
  92   JISX0208.  Widely used for PC's in Japan.  Details are described in
  93   section 8.
  94
  95   o BIG5
  96
  97   A coding system to encode character sets: ASCII and Big5.  Widely
  98   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  99   described in section 8.  In this file, when we write "big5" (all
 100   lowercase), we mean the coding system, and when we write "Big5"
 101   (capitalized), we mean the character set.
 102
 103   o CCL
 104
 105   If a user wants to decode/encode text encoded in a coding system
 106   not listed above, he can supply a decoder and an encoder for it in
 107   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 108   program while decoding/encoding.
 109
 110   o Raw-text
 111
 112   A coding system for text containing raw eight-bit data.  Emacs
 113   treats each byte of source text as a character (except for
 114   end-of-line conversion).
 115
 116   o No-conversion
 117
 118   Like raw text, but don't do end-of-line conversion.
 119
 120
 121 END-OF-LINE FORMAT
 122
 123   How text end-of-line is encoded depends on operating system.  For
 124   instance, Unix's format is just one byte of LF (line-feed) code,
 125   whereas DOS's format is two-byte sequence of `carriage-return' and
 126   `line-feed' codes.  MacOS's format is usually one byte of
 127   `carriage-return'.
 128
 129   Since text character encoding and end-of-line encoding are
 130   independent, any coding system described above can take any format
 131   of end-of-line (except for no-conversion).
 132
 133 STRUCT CODING_SYSTEM
 134
 135   Before using a coding system for code conversion (i.e. decoding and
 136   encoding), we setup a structure of type `struct coding_system'.
 137   This structure keeps various information about a specific code
 138   conversion (e.g. the location of source and destination data).
 139
 140 */
 141
 142 /* COMMON MACROS */
 143
 144
 145 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 146
 147   These functions check if a byte sequence specified as a source in
 148   CODING conforms to the format of XXX, and update the members of
 149   DETECT_INFO.
 150
 151   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 152
 153   Below is the template of these functions.  */
 154
 155 #if 0
 156 static int
 157 detect_coding_XXX (coding, detect_info)
 158      struct coding_system *coding;
 159      struct coding_detection_info *detect_info;
 160 {
 161   const unsigned char *src = coding->source;
 162   const unsigned char *src_end = coding->source + coding->src_bytes;
 163   int multibytep = coding->src_multibyte;
 164   int consumed_chars = 0;
 165   int found = 0;
 166   ...;
 167
 168   while (1)
 169     {
 170       /* Get one byte from the source.  If the souce is exausted, jump
 171          to no_more_source:.  */
 172       ONE_MORE_BYTE (c);
 173
 174       if (! __C_conforms_to_XXX___ (c))
 175         break;
 176       if (! __C_strongly_suggests_XXX__ (c))
 177         found = CATEGORY_MASK_XXX;
 178     }
 179   /* The byte sequence is invalid for XXX.  */
 180   detect_info->rejected |= CATEGORY_MASK_XXX;
 181   return 0;
 182
 183  no_more_source:
 184   /* The source exausted successfully.  */
 185   detect_info->found |= found;
 186   return 1;
 187 }
 188 #endif
 189
 190 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 191
 192   These functions decode a byte sequence specified as a source by
 193   CODING.  The resulting multibyte text goes to a place pointed to by
 194   CODING->charbuf, the length of which should not exceed
 195   CODING->charbuf_size;
 196
 197   These functions set the information of original and decoded texts in
 198   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 199   They also set CODING->result to one of CODING_RESULT_XXX indicating
 200   how the decoding is finished.
 201
 202   Below is the template of these functions.  */
 203
 204 #if 0
 205 static void
 206 decode_coding_XXXX (coding)
 207      struct coding_system *coding;
 208 {
 209   const unsigned char *src = coding->source + coding->consumed;
 210   const unsigned char *src_end = coding->source + coding->src_bytes;
 211   /* SRC_BASE remembers the start position in source in each loop.
 212      The loop will be exited when there's not enough source code, or
 213      when there's no room in CHARBUF for a decoded character.  */
 214   const unsigned char *src_base;
 215   /* A buffer to produce decoded characters.  */
 216   int *charbuf = coding->charbuf + coding->charbuf_used;
 217   int *charbuf_end = coding->charbuf + coding->charbuf_size;
 218   int multibytep = coding->src_multibyte;
 219
 220   while (1)
 221     {
 222       src_base = src;
 223       if (charbuf < charbuf_end)
 224         /* No more room to produce a decoded character.  */
 225         break;
 226       ONE_MORE_BYTE (c);
 227       /* Decode it. */
 228     }
 229
 230  no_more_source:
 231   if (src_base < src_end
 232       && coding->mode & CODING_MODE_LAST_BLOCK)
 233     /* If the source ends by partial bytes to construct a character,
 234        treat them as eight-bit raw data.  */
 235     while (src_base < src_end && charbuf < charbuf_end)
 236       *charbuf++ = *src_base++;
 237   /* Remember how many bytes and characters we consumed.  If the
 238      source is multibyte, the bytes and chars are not identical.  */
 239   coding->consumed = coding->consumed_char = src_base - coding->source;
 240   /* Remember how many characters we produced.  */
 241   coding->charbuf_used = charbuf - coding->charbuf;
 242 }
 243 #endif
 244
 245 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 246
 247   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 248   internal multibyte format by CODING.  The resulting byte sequence
 249   goes to a place pointed to by DESTINATION, the length of which
 250   should not exceed DST_BYTES.
 251
 252   These functions set the information of original and encoded texts in
 253   the members produced, produced_char, consumed, and consumed_char of
 254   the structure *CODING.  They also set the member result to one of
 255   CODING_RESULT_XXX indicating how the encoding finished.
 256
 257   DST_BYTES zero means that source area and destination area are
 258   overlapped, which means that we can produce a encoded text until it
 259   reaches at the head of not-yet-encoded source text.
 260
 261   Below is a template of these functions.  */
 262 #if 0
 263 static void
 264 encode_coding_XXX (coding)
 265      struct coding_system *coding;
 266 {
 267   int multibytep = coding->dst_multibyte;
 268   int *charbuf = coding->charbuf;
 269   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 270   unsigned char *dst = coding->destination + coding->produced;
 271   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 272   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 273   int produced_chars = 0;
 274
 275   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 276     {
 277       int c = *charbuf;
 278       /* Encode C into DST, and increment DST.  */
 279     }
 280  label_no_more_destination:
 281   /* How many chars and bytes we produced.  */
 282   coding->produced_char += produced_chars;
 283   coding->produced = dst - coding->destination;
 284 }
 285 #endif
 286
 287 \f
 288 /*** 1. Preamble ***/
 289
 290 #include <config.h>
 291 #include <stdio.h>
 292
 293 #include "lisp.h"
 294 #include "buffer.h"
 295 #include "character.h"
 296 #include "charset.h"
 297 #include "ccl.h"
 298 #include "composite.h"
 299 #include "coding.h"
 300 #include "window.h"
 301 #include "frame.h"
 302 #include "termhooks.h"
 303
 304 Lisp_Object Vcoding_system_hash_table;
 305
 306 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
 307 Lisp_Object Qunix, Qdos;
 308 extern Lisp_Object Qmac;        /* frame.c */
 309 Lisp_Object Qbuffer_file_coding_system;
 310 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 311 Lisp_Object Qdefault_char;
 312 Lisp_Object Qno_conversion, Qundecided;
 313 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
 314 Lisp_Object Qbig, Qlittle;
 315 Lisp_Object Qcoding_system_history;
 316 Lisp_Object Qvalid_codes;
 317 Lisp_Object QCcategory, QCmnemonic, QCdefault_char;
 318 Lisp_Object QCdecode_translation_table, QCencode_translation_table;
 319 Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
 320 Lisp_Object QCascii_compatible_p;
 321
 322 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 323 Lisp_Object Qcall_process, Qcall_process_region;
 324 Lisp_Object Qstart_process, Qopen_network_stream;
 325 Lisp_Object Qtarget_idx;
 326
 327 Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
 328 Lisp_Object Qinterrupted, Qinsufficient_memory;
 329
 330 extern Lisp_Object Qcompletion_ignore_case;
 331
 332 /* If a symbol has this property, evaluate the value to define the
 333    symbol as a coding system.  */
 334 static Lisp_Object Qcoding_system_define_form;
 335
 336 int coding_system_require_warning;
 337
 338 Lisp_Object Vselect_safe_coding_system_function;
 339
 340 /* Mnemonic string for each format of end-of-line.  */
 341 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 342 /* Mnemonic string to indicate format of end-of-line is not yet
 343    decided.  */
 344 Lisp_Object eol_mnemonic_undecided;
 345
 346 /* Format of end-of-line decided by system.  This is Qunix on
 347    Unix and Mac, Qdos on DOS/Windows.
 348    This has an effect only for external encoding (i.e. for output to
 349    file and process), not for in-buffer or Lisp string encoding.  */
 350 static Lisp_Object system_eol_type;
 351
 352 #ifdef emacs
 353
 354 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 355
 356 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 357
 358 /* Coding system emacs-mule and raw-text are for converting only
 359    end-of-line format.  */
 360 Lisp_Object Qemacs_mule, Qraw_text;
 361 Lisp_Object Qutf_8_emacs;
 362
 363 /* Coding-systems are handed between Emacs Lisp programs and C internal
 364    routines by the following three variables.  */
 365 /* Coding-system for reading files and receiving data from process.  */
 366 Lisp_Object Vcoding_system_for_read;
 367 /* Coding-system for writing files and sending data to process.  */
 368 Lisp_Object Vcoding_system_for_write;
 369 /* Coding-system actually used in the latest I/O.  */
 370 Lisp_Object Vlast_coding_system_used;
 371 /* Set to non-nil when an error is detected while code conversion.  */
 372 Lisp_Object Vlast_code_conversion_error;
 373 /* A vector of length 256 which contains information about special
 374    Latin codes (especially for dealing with Microsoft codes).  */
 375 Lisp_Object Vlatin_extra_code_table;
 376
 377 /* Flag to inhibit code conversion of end-of-line format.  */
 378 int inhibit_eol_conversion;
 379
 380 /* Flag to inhibit ISO2022 escape sequence detection.  */
 381 int inhibit_iso_escape_detection;
 382
 383 /* Flag to inhibit detection of binary files through null bytes.  */
 384 int inhibit_null_byte_detection;
 385
 386 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 387 int inherit_process_coding_system;
 388
 389 /* Coding system to be used to encode text for terminal display when
 390    terminal coding system is nil.  */
 391 struct coding_system safe_terminal_coding;
 392
 393 Lisp_Object Vfile_coding_system_alist;
 394 Lisp_Object Vprocess_coding_system_alist;
 395 Lisp_Object Vnetwork_coding_system_alist;
 396
 397 Lisp_Object Vlocale_coding_system;
 398
 399 #endif /* emacs */
 400
 401 /* Flag to tell if we look up translation table on character code
 402    conversion.  */
 403 Lisp_Object Venable_character_translation;
 404 /* Standard translation table to look up on decoding (reading).  */
 405 Lisp_Object Vstandard_translation_table_for_decode;
 406 /* Standard translation table to look up on encoding (writing).  */
 407 Lisp_Object Vstandard_translation_table_for_encode;
 408
 409 Lisp_Object Qtranslation_table;
 410 Lisp_Object Qtranslation_table_id;
 411 Lisp_Object Qtranslation_table_for_decode;
 412 Lisp_Object Qtranslation_table_for_encode;
 413
 414 /* Alist of charsets vs revision number.  */
 415 static Lisp_Object Vcharset_revision_table;
 416
 417 /* Default coding systems used for process I/O.  */
 418 Lisp_Object Vdefault_process_coding_system;
 419
 420 /* Char table for translating Quail and self-inserting input.  */
 421 Lisp_Object Vtranslation_table_for_input;
 422
 423 /* Two special coding systems.  */
 424 Lisp_Object Vsjis_coding_system;
 425 Lisp_Object Vbig5_coding_system;
 426
 427 /* ISO2022 section */
 428
 429 #define CODING_ISO_INITIAL(coding, reg)                 \
 430   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 431                      coding_attr_iso_initial),          \
 432                reg)))
 433
 434
 435 #define CODING_ISO_REQUEST(coding, charset_id)          \
 436   (((charset_id) <= (coding)->max_charset_id            \
 437     ? ((coding)->safe_charsets[charset_id] != 255       \
 438        ? (coding)->safe_charsets[charset_id]            \
 439        : -1)                                            \
 440     : -1))
 441
 442
 443 #define CODING_ISO_FLAGS(coding)        \
 444   ((coding)->spec.iso_2022.flags)
 445 #define CODING_ISO_DESIGNATION(coding, reg)     \
 446   ((coding)->spec.iso_2022.current_designation[reg])
 447 #define CODING_ISO_INVOCATION(coding, plane)    \
 448   ((coding)->spec.iso_2022.current_invocation[plane])
 449 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 450   ((coding)->spec.iso_2022.single_shifting)
 451 #define CODING_ISO_BOL(coding)  \
 452   ((coding)->spec.iso_2022.bol)
 453 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 454   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 455 #define CODING_ISO_CMP_STATUS(coding)   \
 456   (&(coding)->spec.iso_2022.cmp_status)
 457 #define CODING_ISO_EXTSEGMENT_LEN(coding)       \
 458   ((coding)->spec.iso_2022.ctext_extended_segment_len)
 459 #define CODING_ISO_EMBEDDED_UTF_8(coding)       \
 460   ((coding)->spec.iso_2022.embedded_utf_8)
 461
 462 /* Control characters of ISO2022.  */
 463                         /* code */      /* function */
 464 #define ISO_CODE_LF     0x0A            /* line-feed */
 465 #define ISO_CODE_CR     0x0D            /* carriage-return */
 466 #define ISO_CODE_SO     0x0E            /* shift-out */
 467 #define ISO_CODE_SI     0x0F            /* shift-in */
 468 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 469 #define ISO_CODE_ESC    0x1B            /* escape */
 470 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 471 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 472 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 473
 474 /* All code (1-byte) of ISO2022 is classified into one of the
 475    followings.  */
 476 enum iso_code_class_type
 477   {
 478     ISO_control_0,              /* Control codes in the range
 479                                    0x00..0x1F and 0x7F, except for the
 480                                    following 5 codes.  */
 481     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 482     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 483     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 484     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 485     ISO_control_1,              /* Control codes in the range
 486                                    0x80..0x9F, except for the
 487                                    following 3 codes.  */
 488     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 489     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 490     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 491     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 492     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 493     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 494     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 495   };
 496
 497 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 498     `iso-flags' attribute of an iso2022 coding system.  */
 499
 500 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 501    instead of the correct short-form sequence (e.g. ESC $ A).  */
 502 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 503
 504 /* If set, reset graphic planes and registers at end-of-line to the
 505    initial state.  */
 506 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 507
 508 /* If set, reset graphic planes and registers before any control
 509    characters to the initial state.  */
 510 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 511
 512 /* If set, encode by 7-bit environment.  */
 513 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 514
 515 /* If set, use locking-shift function.  */
 516 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 517
 518 /* If set, use single-shift function.  Overwrite
 519    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 520 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 521
 522 /* If set, use designation escape sequence.  */
 523 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 524
 525 /* If set, produce revision number sequence.  */
 526 #define CODING_ISO_FLAG_REVISION        0x0080
 527
 528 /* If set, produce ISO6429's direction specifying sequence.  */
 529 #define CODING_ISO_FLAG_DIRECTION       0x0100
 530
 531 /* If set, assume designation states are reset at beginning of line on
 532    output.  */
 533 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 534
 535 /* If set, designation sequence should be placed at beginning of line
 536    on output.  */
 537 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 538
 539 /* If set, do not encode unsafe charactes on output.  */
 540 #define CODING_ISO_FLAG_SAFE            0x0800
 541
 542 /* If set, extra latin codes (128..159) are accepted as a valid code
 543    on input.  */
 544 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 545
 546 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 547
 548 #define CODING_ISO_FLAG_EUC_TW_SHIFT    0x4000
 549
 550 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 551
 552 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 553
 554 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 555
 556 /* A character to be produced on output if encoding of the original
 557    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 558 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 559
 560 /* UTF-8 section */
 561 #define CODING_UTF_8_BOM(coding)        \
 562   ((coding)->spec.utf_8_bom)
 563
 564 /* UTF-16 section */
 565 #define CODING_UTF_16_BOM(coding)       \
 566   ((coding)->spec.utf_16.bom)
 567
 568 #define CODING_UTF_16_ENDIAN(coding)    \
 569   ((coding)->spec.utf_16.endian)
 570
 571 #define CODING_UTF_16_SURROGATE(coding) \
 572   ((coding)->spec.utf_16.surrogate)
 573
 574
 575 /* CCL section */
 576 #define CODING_CCL_DECODER(coding)      \
 577   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 578 #define CODING_CCL_ENCODER(coding)      \
 579   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 580 #define CODING_CCL_VALIDS(coding)                                          \
 581   (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
 582
 583 /* Index for each coding category in `coding_categories' */
 584
 585 enum coding_category
 586   {
 587     coding_category_iso_7,
 588     coding_category_iso_7_tight,
 589     coding_category_iso_8_1,
 590     coding_category_iso_8_2,
 591     coding_category_iso_7_else,
 592     coding_category_iso_8_else,
 593     coding_category_utf_8_auto,
 594     coding_category_utf_8_nosig,
 595     coding_category_utf_8_sig,
 596     coding_category_utf_16_auto,
 597     coding_category_utf_16_be,
 598     coding_category_utf_16_le,
 599     coding_category_utf_16_be_nosig,
 600     coding_category_utf_16_le_nosig,
 601     coding_category_charset,
 602     coding_category_sjis,
 603     coding_category_big5,
 604     coding_category_ccl,
 605     coding_category_emacs_mule,
 606     /* All above are targets of code detection.  */
 607     coding_category_raw_text,
 608     coding_category_undecided,
 609     coding_category_max
 610   };
 611
 612 /* Definitions of flag bits used in detect_coding_XXXX.  */
 613 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 614 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 615 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 616 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 617 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 618 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 619 #define CATEGORY_MASK_UTF_8_AUTO        (1 << coding_category_utf_8_auto)
 620 #define CATEGORY_MASK_UTF_8_NOSIG       (1 << coding_category_utf_8_nosig)
 621 #define CATEGORY_MASK_UTF_8_SIG         (1 << coding_category_utf_8_sig)
 622 #define CATEGORY_MASK_UTF_16_AUTO       (1 << coding_category_utf_16_auto)
 623 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 624 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 625 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 626 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 627 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 628 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 629 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 630 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 631 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 632 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 633
 634 /* This value is returned if detect_coding_mask () find nothing other
 635    than ASCII characters.  */
 636 #define CATEGORY_MASK_ANY               \
 637   (CATEGORY_MASK_ISO_7                  \
 638    | CATEGORY_MASK_ISO_7_TIGHT          \
 639    | CATEGORY_MASK_ISO_8_1              \
 640    | CATEGORY_MASK_ISO_8_2              \
 641    | CATEGORY_MASK_ISO_7_ELSE           \
 642    | CATEGORY_MASK_ISO_8_ELSE           \
 643    | CATEGORY_MASK_UTF_8_AUTO           \
 644    | CATEGORY_MASK_UTF_8_NOSIG          \
 645    | CATEGORY_MASK_UTF_8_SIG            \
 646    | CATEGORY_MASK_UTF_16_AUTO          \
 647    | CATEGORY_MASK_UTF_16_BE            \
 648    | CATEGORY_MASK_UTF_16_LE            \
 649    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 650    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 651    | CATEGORY_MASK_CHARSET              \
 652    | CATEGORY_MASK_SJIS                 \
 653    | CATEGORY_MASK_BIG5                 \
 654    | CATEGORY_MASK_CCL                  \
 655    | CATEGORY_MASK_EMACS_MULE)
 656
 657
 658 #define CATEGORY_MASK_ISO_7BIT \
 659   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 660
 661 #define CATEGORY_MASK_ISO_8BIT \
 662   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 663
 664 #define CATEGORY_MASK_ISO_ELSE \
 665   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 666
 667 #define CATEGORY_MASK_ISO_ESCAPE        \
 668   (CATEGORY_MASK_ISO_7                  \
 669    | CATEGORY_MASK_ISO_7_TIGHT          \
 670    | CATEGORY_MASK_ISO_7_ELSE           \
 671    | CATEGORY_MASK_ISO_8_ELSE)
 672
 673 #define CATEGORY_MASK_ISO       \
 674   (  CATEGORY_MASK_ISO_7BIT     \
 675      | CATEGORY_MASK_ISO_8BIT   \
 676      | CATEGORY_MASK_ISO_ELSE)
 677
 678 #define CATEGORY_MASK_UTF_16            \
 679   (CATEGORY_MASK_UTF_16_AUTO            \
 680    | CATEGORY_MASK_UTF_16_BE            \
 681    | CATEGORY_MASK_UTF_16_LE            \
 682    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 683    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 684
 685 #define CATEGORY_MASK_UTF_8     \
 686   (CATEGORY_MASK_UTF_8_AUTO     \
 687    | CATEGORY_MASK_UTF_8_NOSIG  \
 688    | CATEGORY_MASK_UTF_8_SIG)
 689
 690 /* List of symbols `coding-category-xxx' ordered by priority.  This
 691    variable is exposed to Emacs Lisp.  */
 692 static Lisp_Object Vcoding_category_list;
 693
 694 /* Table of coding categories (Lisp symbols).  This variable is for
 695    internal use oly.  */
 696 static Lisp_Object Vcoding_category_table;
 697
 698 /* Table of coding-categories ordered by priority.  */
 699 static enum coding_category coding_priorities[coding_category_max];
 700
 701 /* Nth element is a coding context for the coding system bound to the
 702    Nth coding category.  */
 703 static struct coding_system coding_categories[coding_category_max];
 704
 705 /*** Commonly used macros and functions ***/
 706
 707 #ifndef min
 708 #define min(a, b) ((a) < (b) ? (a) : (b))
 709 #endif
 710 #ifndef max
 711 #define max(a, b) ((a) > (b) ? (a) : (b))
 712 #endif
 713
 714 #define CODING_GET_INFO(coding, attrs, charset_list)    \
 715   do {                                                  \
 716     (attrs) = CODING_ID_ATTRS ((coding)->id);           \
 717     (charset_list) = CODING_ATTR_CHARSET_LIST (attrs);  \
 718   } while (0)
 719
 720
 721 /* Safely get one byte from the source text pointed by SRC which ends
 722    at SRC_END, and set C to that byte.  If there are not enough bytes
 723    in the source, it jumps to `no_more_source'.  If multibytep is
 724    nonzero, and a multibyte character is found at SRC, set C to the
 725    negative value of the character code.  The caller should declare
 726    and set these variables appropriately in advance:
 727         src, src_end, multibytep */
 728
 729 #define ONE_MORE_BYTE(c)                                \
 730   do {                                                  \
 731     if (src == src_end)                                 \
 732       {                                                 \
 733         if (src_base < src)                             \
 734           record_conversion_result                      \
 735             (coding, CODING_RESULT_INSUFFICIENT_SRC);   \
 736         goto no_more_source;                            \
 737       }                                                 \
 738     c = *src++;                                         \
 739     if (multibytep && (c & 0x80))                       \
 740       {                                                 \
 741         if ((c & 0xFE) == 0xC0)                         \
 742           c = ((c & 1) << 6) | *src++;                  \
 743         else                                            \
 744           {                                             \
 745             src--;                                      \
 746             c = - string_char (src, &src, NULL);        \
 747             record_conversion_result                    \
 748               (coding, CODING_RESULT_INVALID_SRC);      \
 749           }                                             \
 750       }                                                 \
 751     consumed_chars++;                                   \
 752   } while (0)
 753
 754 /* Safely get two bytes from the source text pointed by SRC which ends
 755    at SRC_END, and set C1 and C2 to those bytes while skipping the
 756    heading multibyte characters.  If there are not enough bytes in the
 757    source, it jumps to `no_more_source'.  If multibytep is nonzero and
 758    a multibyte character is found for C2, set C2 to the negative value
 759    of the character code.  The caller should declare and set these
 760    variables appropriately in advance:
 761         src, src_end, multibytep
 762    It is intended that this macro is used in detect_coding_utf_16.  */
 763
 764 #define TWO_MORE_BYTES(c1, c2)                          \
 765   do {                                                  \
 766     do {                                                \
 767       if (src == src_end)                               \
 768         goto no_more_source;                            \
 769       c1 = *src++;                                      \
 770       if (multibytep && (c1 & 0x80))                    \
 771         {                                               \
 772           if ((c1 & 0xFE) == 0xC0)                      \
 773             c1 = ((c1 & 1) << 6) | *src++;              \
 774           else                                          \
 775             {                                           \
 776               src += BYTES_BY_CHAR_HEAD (c1) - 1;       \
 777               c1 = -1;                                  \
 778             }                                           \
 779         }                                               \
 780     } while (c1 < 0);                                   \
 781     if (src == src_end)                                 \
 782       goto no_more_source;                              \
 783     c2 = *src++;                                        \
 784     if (multibytep && (c2 & 0x80))                      \
 785       {                                                 \
 786         if ((c2 & 0xFE) == 0xC0)                        \
 787           c2 = ((c2 & 1) << 6) | *src++;                \
 788         else                                            \
 789           c2 = -1;                                      \
 790       }                                                 \
 791   } while (0)
 792
 793
 794 #define ONE_MORE_BYTE_NO_CHECK(c)                       \
 795   do {                                                  \
 796     c = *src++;                                         \
 797     if (multibytep && (c & 0x80))                       \
 798       {                                                 \
 799         if ((c & 0xFE) == 0xC0)                         \
 800           c = ((c & 1) << 6) | *src++;                  \
 801         else                                            \
 802           {                                             \
 803             src--;                                      \
 804             c = - string_char (src, &src, NULL);        \
 805             record_conversion_result                    \
 806               (coding, CODING_RESULT_INVALID_SRC);      \
 807           }                                             \
 808       }                                                 \
 809     consumed_chars++;                                   \
 810   } while (0)
 811
 812
 813 /* Store a byte C in the place pointed by DST and increment DST to the
 814    next free point, and increment PRODUCED_CHARS.  The caller should
 815    assure that C is 0..127, and declare and set the variable `dst'
 816    appropriately in advance.
 817 */
 818
 819
 820 #define EMIT_ONE_ASCII_BYTE(c)  \
 821   do {                          \
 822     produced_chars++;           \
 823     *dst++ = (c);               \
 824   } while (0)
 825
 826
 827 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2.  */
 828
 829 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 830   do {                                  \
 831     produced_chars += 2;                \
 832     *dst++ = (c1), *dst++ = (c2);       \
 833   } while (0)
 834
 835
 836 /* Store a byte C in the place pointed by DST and increment DST to the
 837    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 838    nonzero, store in an appropriate multibyte from.  The caller should
 839    declare and set the variables `dst' and `multibytep' appropriately
 840    in advance.  */
 841
 842 #define EMIT_ONE_BYTE(c)                \
 843   do {                                  \
 844     produced_chars++;                   \
 845     if (multibytep)                     \
 846       {                                 \
 847         int ch = (c);                   \
 848         if (ch >= 0x80)                 \
 849           ch = BYTE8_TO_CHAR (ch);      \
 850         CHAR_STRING_ADVANCE (ch, dst);  \
 851       }                                 \
 852     else                                \
 853       *dst++ = (c);                     \
 854   } while (0)
 855
 856
 857 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 858
 859 #define EMIT_TWO_BYTES(c1, c2)          \
 860   do {                                  \
 861     produced_chars += 2;                \
 862     if (multibytep)                     \
 863       {                                 \
 864         int ch;                         \
 865                                         \
 866         ch = (c1);                      \
 867         if (ch >= 0x80)                 \
 868           ch = BYTE8_TO_CHAR (ch);      \
 869         CHAR_STRING_ADVANCE (ch, dst);  \
 870         ch = (c2);                      \
 871         if (ch >= 0x80)                 \
 872           ch = BYTE8_TO_CHAR (ch);      \
 873         CHAR_STRING_ADVANCE (ch, dst);  \
 874       }                                 \
 875     else                                \
 876       {                                 \
 877         *dst++ = (c1);                  \
 878         *dst++ = (c2);                  \
 879       }                                 \
 880   } while (0)
 881
 882
 883 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 884   do {                                  \
 885     EMIT_ONE_BYTE (c1);                 \
 886     EMIT_TWO_BYTES (c2, c3);            \
 887   } while (0)
 888
 889
 890 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 891   do {                                          \
 892     EMIT_TWO_BYTES (c1, c2);                    \
 893     EMIT_TWO_BYTES (c3, c4);                    \
 894   } while (0)
 895
 896
 897 /* Prototypes for static functions.  */
 898 static void record_conversion_result P_ ((struct coding_system *coding,
 899                                           enum coding_result_code result));
 900 static int detect_coding_utf_8 P_ ((struct coding_system *,
 901                                     struct coding_detection_info *info));
 902 static void decode_coding_utf_8 P_ ((struct coding_system *));
 903 static int encode_coding_utf_8 P_ ((struct coding_system *));
 904
 905 static int detect_coding_utf_16 P_ ((struct coding_system *,
 906                                      struct coding_detection_info *info));
 907 static void decode_coding_utf_16 P_ ((struct coding_system *));
 908 static int encode_coding_utf_16 P_ ((struct coding_system *));
 909
 910 static int detect_coding_iso_2022 P_ ((struct coding_system *,
 911                                        struct coding_detection_info *info));
 912 static void decode_coding_iso_2022 P_ ((struct coding_system *));
 913 static int encode_coding_iso_2022 P_ ((struct coding_system *));
 914
 915 static int detect_coding_emacs_mule P_ ((struct coding_system *,
 916                                          struct coding_detection_info *info));
 917 static void decode_coding_emacs_mule P_ ((struct coding_system *));
 918 static int encode_coding_emacs_mule P_ ((struct coding_system *));
 919
 920 static int detect_coding_sjis P_ ((struct coding_system *,
 921                                    struct coding_detection_info *info));
 922 static void decode_coding_sjis P_ ((struct coding_system *));
 923 static int encode_coding_sjis P_ ((struct coding_system *));
 924
 925 static int detect_coding_big5 P_ ((struct coding_system *,
 926                                    struct coding_detection_info *info));
 927 static void decode_coding_big5 P_ ((struct coding_system *));
 928 static int encode_coding_big5 P_ ((struct coding_system *));
 929
 930 static int detect_coding_ccl P_ ((struct coding_system *,
 931                                   struct coding_detection_info *info));
 932 static void decode_coding_ccl P_ ((struct coding_system *));
 933 static int encode_coding_ccl P_ ((struct coding_system *));
 934
 935 static void decode_coding_raw_text P_ ((struct coding_system *));
 936 static int encode_coding_raw_text P_ ((struct coding_system *));
 937
 938 static void coding_set_source P_ ((struct coding_system *));
 939 static void coding_set_destination P_ ((struct coding_system *));
 940 static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT));
 941 static void coding_alloc_by_making_gap P_ ((struct coding_system *,
 942                                             EMACS_INT, EMACS_INT));
 943 static unsigned char *alloc_destination P_ ((struct coding_system *,
 944                                              EMACS_INT, unsigned char *));
 945 static void setup_iso_safe_charsets P_ ((Lisp_Object));
 946 static unsigned char *encode_designation_at_bol P_ ((struct coding_system *,
 947                                                      int *, int *,
 948                                                      unsigned char *));
 949 static int detect_eol P_ ((const unsigned char *,
 950                            EMACS_INT, enum coding_category));
 951 static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int));
 952 static void decode_eol P_ ((struct coding_system *));
 953 static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *));
 954 static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *));
 955 static int produce_chars P_ ((struct coding_system *, Lisp_Object, int));
 956 static INLINE void produce_charset P_ ((struct coding_system *, int *,
 957                                         EMACS_INT));
 958 static void produce_annotation P_ ((struct coding_system *, EMACS_INT));
 959 static int decode_coding P_ ((struct coding_system *));
 960 static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT,
 961                                                       struct coding_system *,
 962                                                       int *, EMACS_INT *));
 963 static INLINE int *handle_charset_annotation P_ ((EMACS_INT, EMACS_INT,
 964                                                   struct coding_system *,
 965                                                   int *, EMACS_INT *));
 966 static void consume_chars P_ ((struct coding_system *, Lisp_Object, int));
 967 static int encode_coding P_ ((struct coding_system *));
 968 static Lisp_Object make_conversion_work_buffer P_ ((int));
 969 static Lisp_Object code_conversion_restore P_ ((Lisp_Object));
 970 static INLINE int char_encodable_p P_ ((int, Lisp_Object));
 971 static Lisp_Object make_subsidiaries P_ ((Lisp_Object));
 972
 973 static void
 974 record_conversion_result (struct coding_system *coding,
 975                           enum coding_result_code result)
 976 {
 977   coding->result = result;
 978   switch (result)
 979     {
 980     case CODING_RESULT_INSUFFICIENT_SRC:
 981       Vlast_code_conversion_error = Qinsufficient_source;
 982       break;
 983     case CODING_RESULT_INCONSISTENT_EOL:
 984       Vlast_code_conversion_error = Qinconsistent_eol;
 985       break;
 986     case CODING_RESULT_INVALID_SRC:
 987       Vlast_code_conversion_error = Qinvalid_source;
 988       break;
 989     case CODING_RESULT_INTERRUPT:
 990       Vlast_code_conversion_error = Qinterrupted;
 991       break;
 992     case CODING_RESULT_INSUFFICIENT_MEM:
 993       Vlast_code_conversion_error = Qinsufficient_memory;
 994       break;
 995     case CODING_RESULT_SUCCESS:
 996       break;
 997     default:
 998       Vlast_code_conversion_error = intern ("Unknown error");
 999     }
1000 }
1001
1002 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
1003   do {                                                                       \
1004     charset_map_loaded = 0;                                                  \
1005     c = DECODE_CHAR (charset, code);                                         \
1006     if (charset_map_loaded)                                                  \
1007       {                                                                      \
1008         const unsigned char *orig = coding->source;                          \
1009         EMACS_INT offset;                                                    \
1010                                                                              \
1011         coding_set_source (coding);                                          \
1012         offset = coding->source - orig;                                      \
1013         src += offset;                                                       \
1014         src_base += offset;                                                  \
1015         src_end += offset;                                                   \
1016       }                                                                      \
1017   } while (0)
1018
1019
1020 /* If there are at least BYTES length of room at dst, allocate memory
1021    for coding->destination and update dst and dst_end.  We don't have
1022    to take care of coding->source which will be relocated.  It is
1023    handled by calling coding_set_source in encode_coding.  */
1024
1025 #define ASSURE_DESTINATION(bytes)                               \
1026   do {                                                          \
1027     if (dst + (bytes) >= dst_end)                               \
1028       {                                                         \
1029         int more_bytes = charbuf_end - charbuf + (bytes);       \
1030                                                                 \
1031         dst = alloc_destination (coding, more_bytes, dst);      \
1032         dst_end = coding->destination + coding->dst_bytes;      \
1033       }                                                         \
1034   } while (0)
1035
1036
1037 /* Store multibyte form of the character C in P, and advance P to the
1038    end of the multibyte form.  This is like CHAR_STRING_ADVANCE but it
1039    never calls MAYBE_UNIFY_CHAR.  */
1040
1041 #define CHAR_STRING_ADVANCE_NO_UNIFY(c, p)      \
1042   do {                                          \
1043     if ((c) <= MAX_1_BYTE_CHAR)                 \
1044       *(p)++ = (c);                             \
1045     else if ((c) <= MAX_2_BYTE_CHAR)            \
1046       *(p)++ = (0xC0 | ((c) >> 6)),             \
1047         *(p)++ = (0x80 | ((c) & 0x3F));         \
1048     else if ((c) <= MAX_3_BYTE_CHAR)            \
1049       *(p)++ = (0xE0 | ((c) >> 12)),            \
1050         *(p)++ = (0x80 | (((c) >> 6) & 0x3F)),  \
1051         *(p)++ = (0x80 | ((c) & 0x3F));         \
1052     else if ((c) <= MAX_4_BYTE_CHAR)            \
1053       *(p)++ = (0xF0 | (c >> 18)),              \
1054         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1055         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1056         *(p)++ = (0x80 | (c & 0x3F));           \
1057     else if ((c) <= MAX_5_BYTE_CHAR)            \
1058       *(p)++ = 0xF8,                            \
1059         *(p)++ = (0x80 | ((c >> 18) & 0x0F)),   \
1060         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1061         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1062         *(p)++ = (0x80 | (c & 0x3F));           \
1063     else                                        \
1064       (p) += BYTE8_STRING ((c) - 0x3FFF80, p);  \
1065   } while (0)
1066
1067
1068 /* Return the character code of character whose multibyte form is at
1069    P, and advance P to the end of the multibyte form.  This is like
1070    STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR.  */
1071
1072 #define STRING_CHAR_ADVANCE_NO_UNIFY(p)                         \
1073   (!((p)[0] & 0x80)                                             \
1074    ? *(p)++                                                     \
1075    : ! ((p)[0] & 0x20)                                          \
1076    ? ((p) += 2,                                                 \
1077       ((((p)[-2] & 0x1F) << 6)                                  \
1078        | ((p)[-1] & 0x3F)                                       \
1079        | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))    \
1080    : ! ((p)[0] & 0x10)                                          \
1081    ? ((p) += 3,                                                 \
1082       ((((p)[-3] & 0x0F) << 12)                                 \
1083        | (((p)[-2] & 0x3F) << 6)                                \
1084        | ((p)[-1] & 0x3F)))                                     \
1085    : ! ((p)[0] & 0x08)                                          \
1086    ? ((p) += 4,                                                 \
1087       ((((p)[-4] & 0xF) << 18)                                  \
1088        | (((p)[-3] & 0x3F) << 12)                               \
1089        | (((p)[-2] & 0x3F) << 6)                                \
1090        | ((p)[-1] & 0x3F)))                                     \
1091    : ((p) += 5,                                                 \
1092       ((((p)[-4] & 0x3F) << 18)                                 \
1093        | (((p)[-3] & 0x3F) << 12)                               \
1094        | (((p)[-2] & 0x3F) << 6)                                \
1095        | ((p)[-1] & 0x3F))))
1096
1097
1098 static void
1099 coding_set_source (coding)
1100      struct coding_system *coding;
1101 {
1102   if (BUFFERP (coding->src_object))
1103     {
1104       struct buffer *buf = XBUFFER (coding->src_object);
1105
1106       if (coding->src_pos < 0)
1107         coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
1108       else
1109         coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
1110     }
1111   else if (STRINGP (coding->src_object))
1112     {
1113       coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
1114     }
1115   else
1116     /* Otherwise, the source is C string and is never relocated
1117        automatically.  Thus we don't have to update anything.  */
1118     ;
1119 }
1120
1121 static void
1122 coding_set_destination (coding)
1123      struct coding_system *coding;
1124 {
1125   if (BUFFERP (coding->dst_object))
1126     {
1127       if (coding->src_pos < 0)
1128         {
1129           coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
1130           coding->dst_bytes = (GAP_END_ADDR
1131                                - (coding->src_bytes - coding->consumed)
1132                                - coding->destination);
1133         }
1134       else
1135         {
1136           /* We are sure that coding->dst_pos_byte is before the gap
1137              of the buffer. */
1138           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
1139                                  + coding->dst_pos_byte - BEG_BYTE);
1140           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
1141                                - coding->destination);
1142         }
1143     }
1144   else
1145     /* Otherwise, the destination is C string and is never relocated
1146        automatically.  Thus we don't have to update anything.  */
1147     ;
1148 }
1149
1150
1151 static void
1152 coding_alloc_by_realloc (coding, bytes)
1153      struct coding_system *coding;
1154      EMACS_INT bytes;
1155 {
1156   coding->destination = (unsigned char *) xrealloc (coding->destination,
1157                                                     coding->dst_bytes + bytes);
1158   coding->dst_bytes += bytes;
1159 }
1160
1161 static void
1162 coding_alloc_by_making_gap (coding, gap_head_used, bytes)
1163      struct coding_system *coding;
1164      EMACS_INT gap_head_used, bytes;
1165 {
1166   if (EQ (coding->src_object, coding->dst_object))
1167     {
1168       /* The gap may contain the produced data at the head and not-yet
1169          consumed data at the tail.  To preserve those data, we at
1170          first make the gap size to zero, then increase the gap
1171          size.  */
1172       EMACS_INT add = GAP_SIZE;
1173
1174       GPT += gap_head_used, GPT_BYTE += gap_head_used;
1175       GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
1176       make_gap (bytes);
1177       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
1178       GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
1179     }
1180   else
1181     {
1182       Lisp_Object this_buffer;
1183
1184       this_buffer = Fcurrent_buffer ();
1185       set_buffer_internal (XBUFFER (coding->dst_object));
1186       make_gap (bytes);
1187       set_buffer_internal (XBUFFER (this_buffer));
1188     }
1189 }
1190
1191
1192 static unsigned char *
1193 alloc_destination (coding, nbytes, dst)
1194      struct coding_system *coding;
1195      EMACS_INT nbytes;
1196      unsigned char *dst;
1197 {
1198   EMACS_INT offset = dst - coding->destination;
1199
1200   if (BUFFERP (coding->dst_object))
1201     {
1202       struct buffer *buf = XBUFFER (coding->dst_object);
1203
1204       coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
1205     }
1206   else
1207     coding_alloc_by_realloc (coding, nbytes);
1208   coding_set_destination (coding);
1209   dst = coding->destination + offset;
1210   return dst;
1211 }
1212
1213 /** Macros for annotations.  */
1214
1215 /* An annotation data is stored in the array coding->charbuf in this
1216    format:
1217      [ -LENGTH ANNOTATION_MASK NCHARS ... ]
1218    LENGTH is the number of elements in the annotation.
1219    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1220    NCHARS is the number of characters in the text annotated.
1221
1222    The format of the following elements depend on ANNOTATION_MASK.
1223
1224    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1225    follows:
1226      ... NBYTES METHOD [ COMPOSITION-COMPONENTS ... ]
1227
1228    NBYTES is the number of bytes specified in the header part of
1229    old-style emacs-mule encoding, or 0 for the other kind of
1230    composition.
1231
1232    METHOD is one of enum composition_method.
1233
1234    Optionnal COMPOSITION-COMPONENTS are characters and composition
1235    rules.
1236
1237    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1238    follows.
1239
1240    If ANNOTATION_MASK is 0, this annotation is just a space holder to
1241    recover from an invalid annotation, and should be skipped by
1242    produce_annotation.  */
1243
1244 /* Maximum length of the header of annotation data.  */
1245 #define MAX_ANNOTATION_LENGTH 5
1246
1247 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars)     \
1248   do {                                                  \
1249     *(buf)++ = -(len);                                  \
1250     *(buf)++ = (mask);                                  \
1251     *(buf)++ = (nchars);                                \
1252     coding->annotated = 1;                              \
1253   } while (0);
1254
1255 #define ADD_COMPOSITION_DATA(buf, nchars, nbytes, method)                   \
1256   do {                                                                      \
1257     ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1258     *buf++ = nbytes;                                                        \
1259     *buf++ = method;                                                        \
1260   } while (0)
1261
1262
1263 #define ADD_CHARSET_DATA(buf, nchars, id)                               \
1264   do {                                                                  \
1265     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1266     *buf++ = id;                                                        \
1267   } while (0)
1268
1269 \f
1270 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1271
1272
1273
1274 \f
1275 /*** 3. UTF-8 ***/
1276
1277 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1278    Check if a text is encoded in UTF-8.  If it is, return 1, else
1279    return 0.  */
1280
1281 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1282 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1283 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1284 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1285 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1286 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1287
1288 #define UTF_BOM 0xFEFF
1289 #define UTF_8_BOM_1 0xEF
1290 #define UTF_8_BOM_2 0xBB
1291 #define UTF_8_BOM_3 0xBF
1292
1293 static int
1294 detect_coding_utf_8 (coding, detect_info)
1295      struct coding_system *coding;
1296      struct coding_detection_info *detect_info;
1297 {
1298   const unsigned char *src = coding->source, *src_base;
1299   const unsigned char *src_end = coding->source + coding->src_bytes;
1300   int multibytep = coding->src_multibyte;
1301   int consumed_chars = 0;
1302   int bom_found = 0;
1303   int found = 0;
1304
1305   detect_info->checked |= CATEGORY_MASK_UTF_8;
1306   /* A coding system of this category is always ASCII compatible.  */
1307   src += coding->head_ascii;
1308
1309   while (1)
1310     {
1311       int c, c1, c2, c3, c4;
1312
1313       src_base = src;
1314       ONE_MORE_BYTE (c);
1315       if (c < 0 || UTF_8_1_OCTET_P (c))
1316         continue;
1317       ONE_MORE_BYTE (c1);
1318       if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
1319         break;
1320       if (UTF_8_2_OCTET_LEADING_P (c))
1321         {
1322           found = 1;
1323           continue;
1324         }
1325       ONE_MORE_BYTE (c2);
1326       if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1327         break;
1328       if (UTF_8_3_OCTET_LEADING_P (c))
1329         {
1330           found = 1;
1331           if (src_base == coding->source
1332               && c == UTF_8_BOM_1 && c1 == UTF_8_BOM_2 && c2 == UTF_8_BOM_3)
1333             bom_found = 1;
1334           continue;
1335         }
1336       ONE_MORE_BYTE (c3);
1337       if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1338         break;
1339       if (UTF_8_4_OCTET_LEADING_P (c))
1340         {
1341           found = 1;
1342           continue;
1343         }
1344       ONE_MORE_BYTE (c4);
1345       if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1346         break;
1347       if (UTF_8_5_OCTET_LEADING_P (c))
1348         {
1349           found = 1;
1350           continue;
1351         }
1352       break;
1353     }
1354   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1355   return 0;
1356
1357  no_more_source:
1358   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1359     {
1360       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1361       return 0;
1362     }
1363   if (bom_found)
1364     {
1365       /* The first character 0xFFFE doesn't necessarily mean a BOM.  */
1366       detect_info->found |= CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG;
1367     }
1368   else
1369     {
1370       detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG;
1371       if (found)
1372         detect_info->found |= CATEGORY_MASK_UTF_8_NOSIG;
1373     }
1374   return 1;
1375 }
1376
1377
1378 static void
1379 decode_coding_utf_8 (coding)
1380      struct coding_system *coding;
1381 {
1382   const unsigned char *src = coding->source + coding->consumed;
1383   const unsigned char *src_end = coding->source + coding->src_bytes;
1384   const unsigned char *src_base;
1385   int *charbuf = coding->charbuf + coding->charbuf_used;
1386   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1387   int consumed_chars = 0, consumed_chars_base = 0;
1388   int multibytep = coding->src_multibyte;
1389   enum utf_bom_type bom = CODING_UTF_8_BOM (coding);
1390   Lisp_Object attr, charset_list;
1391   int eol_crlf =
1392     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1393   int byte_after_cr = -1;
1394
1395   CODING_GET_INFO (coding, attr, charset_list);
1396
1397   if (bom != utf_without_bom)
1398     {
1399       int c1, c2, c3;
1400
1401       src_base = src;
1402       ONE_MORE_BYTE (c1);
1403       if (! UTF_8_3_OCTET_LEADING_P (c1))
1404         src = src_base;
1405       else
1406         {
1407           ONE_MORE_BYTE (c2);
1408           if (! UTF_8_EXTRA_OCTET_P (c2))
1409             src = src_base;
1410           else
1411             {
1412               ONE_MORE_BYTE (c3);
1413               if (! UTF_8_EXTRA_OCTET_P (c3))
1414                 src = src_base;
1415               else
1416                 {
1417                   if ((c1 != UTF_8_BOM_1)
1418                       || (c2 != UTF_8_BOM_2) || (c3 != UTF_8_BOM_3))
1419                     src = src_base;
1420                   else
1421                     CODING_UTF_8_BOM (coding) = utf_without_bom;
1422                 }
1423             }
1424         }
1425     }
1426   CODING_UTF_8_BOM (coding) = utf_without_bom;
1427
1428
1429
1430   while (1)
1431     {
1432       int c, c1, c2, c3, c4, c5;
1433
1434       src_base = src;
1435       consumed_chars_base = consumed_chars;
1436
1437       if (charbuf >= charbuf_end)
1438         {
1439           if (byte_after_cr >= 0)
1440             src_base--;
1441           break;
1442         }
1443
1444       if (byte_after_cr >= 0)
1445         c1 = byte_after_cr, byte_after_cr = -1;
1446       else
1447         ONE_MORE_BYTE (c1);
1448       if (c1 < 0)
1449         {
1450           c = - c1;
1451         }
1452       else if (UTF_8_1_OCTET_P(c1))
1453         {
1454           if (eol_crlf && c1 == '\r')
1455             ONE_MORE_BYTE (byte_after_cr);
1456           c = c1;
1457         }
1458       else
1459         {
1460           ONE_MORE_BYTE (c2);
1461           if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1462             goto invalid_code;
1463           if (UTF_8_2_OCTET_LEADING_P (c1))
1464             {
1465               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1466               /* Reject overlong sequences here and below.  Encoders
1467                  producing them are incorrect, they can be misleading,
1468                  and they mess up read/write invariance.  */
1469               if (c < 128)
1470                 goto invalid_code;
1471             }
1472           else
1473             {
1474               ONE_MORE_BYTE (c3);
1475               if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1476                 goto invalid_code;
1477               if (UTF_8_3_OCTET_LEADING_P (c1))
1478                 {
1479                   c = (((c1 & 0xF) << 12)
1480                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1481                   if (c < 0x800
1482                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1483                     goto invalid_code;
1484                 }
1485               else
1486                 {
1487                   ONE_MORE_BYTE (c4);
1488                   if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1489                     goto invalid_code;
1490                   if (UTF_8_4_OCTET_LEADING_P (c1))
1491                     {
1492                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1493                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1494                     if (c < 0x10000)
1495                       goto invalid_code;
1496                     }
1497                   else
1498                     {
1499                       ONE_MORE_BYTE (c5);
1500                       if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
1501                         goto invalid_code;
1502                       if (UTF_8_5_OCTET_LEADING_P (c1))
1503                         {
1504                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1505                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1506                                | (c5 & 0x3F));
1507                           if ((c > MAX_CHAR) || (c < 0x200000))
1508                             goto invalid_code;
1509                         }
1510                       else
1511                         goto invalid_code;
1512                     }
1513                 }
1514             }
1515         }
1516
1517       *charbuf++ = c;
1518       continue;
1519
1520     invalid_code:
1521       src = src_base;
1522       consumed_chars = consumed_chars_base;
1523       ONE_MORE_BYTE (c);
1524       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1525       coding->errors++;
1526     }
1527
1528  no_more_source:
1529   coding->consumed_char += consumed_chars_base;
1530   coding->consumed = src_base - coding->source;
1531   coding->charbuf_used = charbuf - coding->charbuf;
1532 }
1533
1534
1535 static int
1536 encode_coding_utf_8 (coding)
1537      struct coding_system *coding;
1538 {
1539   int multibytep = coding->dst_multibyte;
1540   int *charbuf = coding->charbuf;
1541   int *charbuf_end = charbuf + coding->charbuf_used;
1542   unsigned char *dst = coding->destination + coding->produced;
1543   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1544   int produced_chars = 0;
1545   int c;
1546
1547   if (CODING_UTF_8_BOM (coding) == utf_with_bom)
1548     {
1549       ASSURE_DESTINATION (3);
1550       EMIT_THREE_BYTES (UTF_8_BOM_1, UTF_8_BOM_2, UTF_8_BOM_3);
1551       CODING_UTF_8_BOM (coding) = utf_without_bom;
1552     }
1553
1554   if (multibytep)
1555     {
1556       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1557
1558       while (charbuf < charbuf_end)
1559         {
1560           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1561
1562           ASSURE_DESTINATION (safe_room);
1563           c = *charbuf++;
1564           if (CHAR_BYTE8_P (c))
1565             {
1566               c = CHAR_TO_BYTE8 (c);
1567               EMIT_ONE_BYTE (c);
1568             }
1569           else
1570             {
1571               CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
1572               for (p = str; p < pend; p++)
1573                 EMIT_ONE_BYTE (*p);
1574             }
1575         }
1576     }
1577   else
1578     {
1579       int safe_room = MAX_MULTIBYTE_LENGTH;
1580
1581       while (charbuf < charbuf_end)
1582         {
1583           ASSURE_DESTINATION (safe_room);
1584           c = *charbuf++;
1585           if (CHAR_BYTE8_P (c))
1586             *dst++ = CHAR_TO_BYTE8 (c);
1587           else
1588             CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
1589           produced_chars++;
1590         }
1591     }
1592   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1593   coding->produced_char += produced_chars;
1594   coding->produced = dst - coding->destination;
1595   return 0;
1596 }
1597
1598
1599 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1600    Check if a text is encoded in one of UTF-16 based coding systems.
1601    If it is, return 1, else return 0.  */
1602
1603 #define UTF_16_HIGH_SURROGATE_P(val) \
1604   (((val) & 0xFC00) == 0xD800)
1605
1606 #define UTF_16_LOW_SURROGATE_P(val) \
1607   (((val) & 0xFC00) == 0xDC00)
1608
1609 #define UTF_16_INVALID_P(val)   \
1610   (((val) == 0xFFFE)            \
1611    || ((val) == 0xFFFF)         \
1612    || UTF_16_LOW_SURROGATE_P (val))
1613
1614
1615 static int
1616 detect_coding_utf_16 (coding, detect_info)
1617      struct coding_system *coding;
1618      struct coding_detection_info *detect_info;
1619 {
1620   const unsigned char *src = coding->source, *src_base = src;
1621   const unsigned char *src_end = coding->source + coding->src_bytes;
1622   int multibytep = coding->src_multibyte;
1623   int consumed_chars = 0;
1624   int c1, c2;
1625
1626   detect_info->checked |= CATEGORY_MASK_UTF_16;
1627   if (coding->mode & CODING_MODE_LAST_BLOCK
1628       && (coding->src_chars & 1))
1629     {
1630       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1631       return 0;
1632     }
1633
1634   TWO_MORE_BYTES (c1, c2);
1635   if ((c1 == 0xFF) && (c2 == 0xFE))
1636     {
1637       detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1638                              | CATEGORY_MASK_UTF_16_AUTO);
1639       detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1640                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1641                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1642     }
1643   else if ((c1 == 0xFE) && (c2 == 0xFF))
1644     {
1645       detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1646                              | CATEGORY_MASK_UTF_16_AUTO);
1647       detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1648                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1649                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1650     }
1651   else if (c2 < 0)
1652     {
1653       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1654       return 0;
1655     }
1656   else
1657     {
1658       /* We check the dispersion of Eth and Oth bytes where E is even and
1659          O is odd.  If both are high, we assume binary data.*/
1660       unsigned char e[256], o[256];
1661       unsigned e_num = 1, o_num = 1;
1662
1663       memset (e, 0, 256);
1664       memset (o, 0, 256);
1665       e[c1] = 1;
1666       o[c2] = 1;
1667
1668       detect_info->rejected
1669         |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
1670
1671       while (1)
1672         {
1673           TWO_MORE_BYTES (c1, c2);
1674           if (c2 < 0)
1675             break;
1676           if (! e[c1])
1677             {
1678               e[c1] = 1;
1679               e_num++;
1680               if (e_num >= 128 && o_num >= 128)
1681                 break;
1682             }
1683           if (! o[c2])
1684             {
1685               o[c2] = 1;
1686               o_num++;
1687               if (e_num >= 128 && o_num >= 128)
1688                 break;
1689             }
1690         }
1691       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1692       return 0;
1693     }
1694
1695  no_more_source:
1696   return 1;
1697 }
1698
1699 static void
1700 decode_coding_utf_16 (coding)
1701      struct coding_system *coding;
1702 {
1703   const unsigned char *src = coding->source + coding->consumed;
1704   const unsigned char *src_end = coding->source + coding->src_bytes;
1705   const unsigned char *src_base;
1706   int *charbuf = coding->charbuf + coding->charbuf_used;
1707   /* We may produces at most 3 chars in one loop.  */
1708   int *charbuf_end = coding->charbuf + coding->charbuf_size - 2;
1709   int consumed_chars = 0, consumed_chars_base = 0;
1710   int multibytep = coding->src_multibyte;
1711   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1712   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1713   int surrogate = CODING_UTF_16_SURROGATE (coding);
1714   Lisp_Object attr, charset_list;
1715   int eol_crlf =
1716     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1717   int byte_after_cr1 = -1, byte_after_cr2 = -1;
1718
1719   CODING_GET_INFO (coding, attr, charset_list);
1720
1721   if (bom == utf_with_bom)
1722     {
1723       int c, c1, c2;
1724
1725       src_base = src;
1726       ONE_MORE_BYTE (c1);
1727       ONE_MORE_BYTE (c2);
1728       c = (c1 << 8) | c2;
1729
1730       if (endian == utf_16_big_endian
1731           ? c != 0xFEFF : c != 0xFFFE)
1732         {
1733           /* The first two bytes are not BOM.  Treat them as bytes
1734              for a normal character.  */
1735           src = src_base;
1736           coding->errors++;
1737         }
1738       CODING_UTF_16_BOM (coding) = utf_without_bom;
1739     }
1740   else if (bom == utf_detect_bom)
1741     {
1742       /* We have already tried to detect BOM and failed in
1743          detect_coding.  */
1744       CODING_UTF_16_BOM (coding) = utf_without_bom;
1745     }
1746
1747   while (1)
1748     {
1749       int c, c1, c2;
1750
1751       src_base = src;
1752       consumed_chars_base = consumed_chars;
1753
1754       if (charbuf >= charbuf_end)
1755         {
1756           if (byte_after_cr1 >= 0)
1757             src_base -= 2;
1758           break;
1759         }
1760
1761       if (byte_after_cr1 >= 0)
1762         c1 = byte_after_cr1, byte_after_cr1 = -1;
1763       else
1764         ONE_MORE_BYTE (c1);
1765       if (c1 < 0)
1766         {
1767           *charbuf++ = -c1;
1768           continue;
1769         }
1770       if (byte_after_cr2 >= 0)
1771         c2 = byte_after_cr2, byte_after_cr2 = -1;
1772       else
1773         ONE_MORE_BYTE (c2);
1774       if (c2 < 0)
1775         {
1776           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1777           *charbuf++ = -c2;
1778           continue;
1779         }
1780       c = (endian == utf_16_big_endian
1781            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1782
1783       if (surrogate)
1784         {
1785           if (! UTF_16_LOW_SURROGATE_P (c))
1786             {
1787               if (endian == utf_16_big_endian)
1788                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1789               else
1790                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1791               *charbuf++ = c1;
1792               *charbuf++ = c2;
1793               coding->errors++;
1794               if (UTF_16_HIGH_SURROGATE_P (c))
1795                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1796               else
1797                 *charbuf++ = c;
1798             }
1799           else
1800             {
1801               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1802               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1803               *charbuf++ = 0x10000 + c;
1804             }
1805         }
1806       else
1807         {
1808           if (UTF_16_HIGH_SURROGATE_P (c))
1809             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1810           else
1811             {
1812               if (eol_crlf && c == '\r')
1813                 {
1814                   ONE_MORE_BYTE (byte_after_cr1);
1815                   ONE_MORE_BYTE (byte_after_cr2);
1816                 }
1817               *charbuf++ = c;
1818             }
1819         }
1820     }
1821
1822  no_more_source:
1823   coding->consumed_char += consumed_chars_base;
1824   coding->consumed = src_base - coding->source;
1825   coding->charbuf_used = charbuf - coding->charbuf;
1826 }
1827
1828 static int
1829 encode_coding_utf_16 (coding)
1830      struct coding_system *coding;
1831 {
1832   int multibytep = coding->dst_multibyte;
1833   int *charbuf = coding->charbuf;
1834   int *charbuf_end = charbuf + coding->charbuf_used;
1835   unsigned char *dst = coding->destination + coding->produced;
1836   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1837   int safe_room = 8;
1838   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1839   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1840   int produced_chars = 0;
1841   Lisp_Object attrs, charset_list;
1842   int c;
1843
1844   CODING_GET_INFO (coding, attrs, charset_list);
1845
1846   if (bom != utf_without_bom)
1847     {
1848       ASSURE_DESTINATION (safe_room);
1849       if (big_endian)
1850         EMIT_TWO_BYTES (0xFE, 0xFF);
1851       else
1852         EMIT_TWO_BYTES (0xFF, 0xFE);
1853       CODING_UTF_16_BOM (coding) = utf_without_bom;
1854     }
1855
1856   while (charbuf < charbuf_end)
1857     {
1858       ASSURE_DESTINATION (safe_room);
1859       c = *charbuf++;
1860       if (c >= MAX_UNICODE_CHAR)
1861         c = coding->default_char;
1862
1863       if (c < 0x10000)
1864         {
1865           if (big_endian)
1866             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1867           else
1868             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1869         }
1870       else
1871         {
1872           int c1, c2;
1873
1874           c -= 0x10000;
1875           c1 = (c >> 10) + 0xD800;
1876           c2 = (c & 0x3FF) + 0xDC00;
1877           if (big_endian)
1878             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1879           else
1880             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1881         }
1882     }
1883   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1884   coding->produced = dst - coding->destination;
1885   coding->produced_char += produced_chars;
1886   return 0;
1887 }
1888
1889 \f
1890 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1891
1892 /* Emacs' internal format for representation of multiple character
1893    sets is a kind of multi-byte encoding, i.e. characters are
1894    represented by variable-length sequences of one-byte codes.
1895
1896    ASCII characters and control characters (e.g. `tab', `newline') are
1897    represented by one-byte sequences which are their ASCII codes, in
1898    the range 0x00 through 0x7F.
1899
1900    8-bit characters of the range 0x80..0x9F are represented by
1901    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1902    code + 0x20).
1903
1904    8-bit characters of the range 0xA0..0xFF are represented by
1905    one-byte sequences which are their 8-bit code.
1906
1907    The other characters are represented by a sequence of `base
1908    leading-code', optional `extended leading-code', and one or two
1909    `position-code's.  The length of the sequence is determined by the
1910    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1911    whereas extended leading-code and position-code take the range 0xA0
1912    through 0xFF.  See `charset.h' for more details about leading-code
1913    and position-code.
1914
1915    --- CODE RANGE of Emacs' internal format ---
1916    character set        range
1917    -------------        -----
1918    ascii                0x00..0x7F
1919    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1920    eight-bit-graphic    0xA0..0xBF
1921    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1922    ---------------------------------------------
1923
1924    As this is the internal character representation, the format is
1925    usually not used externally (i.e. in a file or in a data sent to a
1926    process).  But, it is possible to have a text externally in this
1927    format (i.e. by encoding by the coding system `emacs-mule').
1928
1929    In that case, a sequence of one-byte codes has a slightly different
1930    form.
1931
1932    At first, all characters in eight-bit-control are represented by
1933    one-byte sequences which are their 8-bit code.
1934
1935    Next, character composition data are represented by the byte
1936    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1937    where,
1938         METHOD is 0xF2 plus one of composition method (enum
1939         composition_method),
1940
1941         BYTES is 0xA0 plus a byte length of this composition data,
1942
1943         CHARS is 0xA0 plus a number of characters composed by this
1944         data,
1945
1946         COMPONENTs are characters of multibye form or composition
1947         rules encoded by two-byte of ASCII codes.
1948
1949    In addition, for backward compatibility, the following formats are
1950    also recognized as composition data on decoding.
1951
1952    0x80 MSEQ ...
1953    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1954
1955    Here,
1956         MSEQ is a multibyte form but in these special format:
1957           ASCII: 0xA0 ASCII_CODE+0x80,
1958           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1959         RULE is a one byte code of the range 0xA0..0xF0 that
1960         represents a composition rule.
1961   */
1962
1963 char emacs_mule_bytes[256];
1964
1965
1966 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1967    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
1968    else return 0.  */
1969
1970 static int
1971 detect_coding_emacs_mule (coding, detect_info)
1972      struct coding_system *coding;
1973      struct coding_detection_info *detect_info;
1974 {
1975   const unsigned char *src = coding->source, *src_base;
1976   const unsigned char *src_end = coding->source + coding->src_bytes;
1977   int multibytep = coding->src_multibyte;
1978   int consumed_chars = 0;
1979   int c;
1980   int found = 0;
1981
1982   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1983   /* A coding system of this category is always ASCII compatible.  */
1984   src += coding->head_ascii;
1985
1986   while (1)
1987     {
1988       src_base = src;
1989       ONE_MORE_BYTE (c);
1990       if (c < 0)
1991         continue;
1992       if (c == 0x80)
1993         {
1994           /* Perhaps the start of composite character.  We simply skip
1995              it because analyzing it is too heavy for detecting.  But,
1996              at least, we check that the composite character
1997              constitutes of more than 4 bytes.  */
1998           const unsigned char *src_base;
1999
2000         repeat:
2001           src_base = src;
2002           do
2003             {
2004               ONE_MORE_BYTE (c);
2005             }
2006           while (c >= 0xA0);
2007
2008           if (src - src_base <= 4)
2009             break;
2010           found = CATEGORY_MASK_EMACS_MULE;
2011           if (c == 0x80)
2012             goto repeat;
2013         }
2014
2015       if (c < 0x80)
2016         {
2017           if (c < 0x20
2018               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
2019             break;
2020         }
2021       else
2022         {
2023           int more_bytes = emacs_mule_bytes[*src_base] - 1;
2024
2025           while (more_bytes > 0)
2026             {
2027               ONE_MORE_BYTE (c);
2028               if (c < 0xA0)
2029                 {
2030                   src--;        /* Unread the last byte.  */
2031                   break;
2032                 }
2033               more_bytes--;
2034             }
2035           if (more_bytes != 0)
2036             break;
2037           found = CATEGORY_MASK_EMACS_MULE;
2038         }
2039     }
2040   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2041   return 0;
2042
2043  no_more_source:
2044   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
2045     {
2046       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2047       return 0;
2048     }
2049   detect_info->found |= found;
2050   return 1;
2051 }
2052
2053
2054 /* Parse emacs-mule multibyte sequence at SRC and return the decoded
2055    character.  If CMP_STATUS indicates that we must expect MSEQ or
2056    RULE described above, decode it and return the negative value of
2057    the deocded character or rule.  If an invalid byte is found, return
2058    -1.  If SRC is too short, return -2.  */
2059
2060 int
2061 emacs_mule_char (coding, src, nbytes, nchars, id, cmp_status)
2062      struct coding_system *coding;
2063      const unsigned char *src;
2064      int *nbytes, *nchars, *id;
2065      struct composition_status *cmp_status;
2066 {
2067   const unsigned char *src_end = coding->source + coding->src_bytes;
2068   const unsigned char *src_base = src;
2069   int multibytep = coding->src_multibyte;
2070   struct charset *charset;
2071   unsigned code;
2072   int c;
2073   int consumed_chars = 0;
2074   int mseq_found = 0;
2075
2076   ONE_MORE_BYTE (c);
2077   if (c < 0)
2078     {
2079       c = -c;
2080       charset = emacs_mule_charset[0];
2081     }
2082   else
2083     {
2084       if (c >= 0xA0)
2085         {
2086           if (cmp_status->state != COMPOSING_NO
2087               && cmp_status->old_form)
2088             {
2089               if (cmp_status->state == COMPOSING_CHAR)
2090                 {
2091                   if (c == 0xA0)
2092                     {
2093                       ONE_MORE_BYTE (c);
2094                       c -= 0x80;
2095                       if (c < 0)
2096                         goto invalid_code;
2097                     }
2098                   else
2099                     c -= 0x20;
2100                   mseq_found = 1;
2101                 }
2102               else
2103                 {
2104                   *nbytes = src - src_base;
2105                   *nchars = consumed_chars;
2106                   return -c;
2107                 }
2108             }
2109           else
2110             goto invalid_code;
2111         }
2112
2113       switch (emacs_mule_bytes[c])
2114         {
2115         case 2:
2116           if (! (charset = emacs_mule_charset[c]))
2117             goto invalid_code;
2118           ONE_MORE_BYTE (c);
2119           if (c < 0xA0)
2120             goto invalid_code;
2121           code = c & 0x7F;
2122           break;
2123
2124         case 3:
2125           if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
2126               || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
2127             {
2128               ONE_MORE_BYTE (c);
2129               if (c < 0xA0 || ! (charset = emacs_mule_charset[c]))
2130                 goto invalid_code;
2131               ONE_MORE_BYTE (c);
2132               if (c < 0xA0)
2133                 goto invalid_code;
2134               code = c & 0x7F;
2135             }
2136           else
2137             {
2138               if (! (charset = emacs_mule_charset[c]))
2139                 goto invalid_code;
2140               ONE_MORE_BYTE (c);
2141               if (c < 0xA0)
2142                 goto invalid_code;
2143               code = (c & 0x7F) << 8;
2144               ONE_MORE_BYTE (c);
2145               if (c < 0xA0)
2146                 goto invalid_code;
2147               code |= c & 0x7F;
2148             }
2149           break;
2150
2151         case 4:
2152           ONE_MORE_BYTE (c);
2153           if (c < 0 || ! (charset = emacs_mule_charset[c]))
2154             goto invalid_code;
2155           ONE_MORE_BYTE (c);
2156           if (c < 0xA0)
2157             goto invalid_code;
2158           code = (c & 0x7F) << 8;
2159           ONE_MORE_BYTE (c);
2160           if (c < 0xA0)
2161             goto invalid_code;
2162           code |= c & 0x7F;
2163           break;
2164
2165         case 1:
2166           code = c;
2167           charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
2168                                      ? charset_ascii : charset_eight_bit);
2169           break;
2170
2171         default:
2172           abort ();
2173         }
2174       c = DECODE_CHAR (charset, code);
2175       if (c < 0)
2176         goto invalid_code;
2177     }
2178   *nbytes = src - src_base;
2179   *nchars = consumed_chars;
2180   if (id)
2181     *id = charset->id;
2182   return (mseq_found ? -c : c);
2183
2184  no_more_source:
2185   return -2;
2186
2187  invalid_code:
2188   return -1;
2189 }
2190
2191
2192 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2193
2194 /* Handle these composition sequence ('|': the end of header elements,
2195    BYTES and CHARS >= 0xA0):
2196
2197    (1) relative composition: 0x80 0xF2 BYTES CHARS | CHAR ...
2198    (2) altchar composition:  0x80 0xF4 BYTES CHARS | ALT ... ALT CHAR ...
2199    (3) alt&rule composition: 0x80 0xF5 BYTES CHARS | ALT RULE ... ALT CHAR ...
2200
2201    and these old form:
2202
2203    (4) relative composition: 0x80 | MSEQ ... MSEQ
2204    (5) rulebase composition: 0x80 0xFF | MSEQ MRULE ... MSEQ
2205
2206    When the starter 0x80 and the following header elements are found,
2207    this annotation header is produced.
2208
2209         [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS NBYTES METHOD ]
2210
2211    NCHARS is CHARS - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2212    NBYTES is BYTES - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2213
2214    Then, upon reading the following elements, these codes are produced
2215    until the composition end is found:
2216
2217    (1) CHAR ... CHAR
2218    (2) ALT ... ALT CHAR ... CHAR
2219    (3) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT CHAR ... CHAR
2220    (4) CHAR ... CHAR
2221    (5) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
2222
2223    When the composition end is found, LENGTH and NCHARS in the
2224    annotation header is updated as below:
2225
2226    (1) LENGTH: unchanged, NCHARS: unchanged
2227    (2) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2228    (3) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2229    (4) LENGTH: unchanged,  NCHARS: number of CHARs
2230    (5) LENGTH: unchanged,  NCHARS: number of CHARs
2231
2232    If an error is found while composing, the annotation header is
2233    changed to the original composition header (plus filler -1s) as
2234    below:
2235
2236    (1),(2),(3)  [ 0x80 0xF2+METHOD BYTES CHARS -1 ]
2237    (5)          [ 0x80 0xFF -1 -1- -1 ]
2238
2239    and the sequence [ -2 DECODED-RULE ] is changed to the original
2240    byte sequence as below:
2241         o the original byte sequence is B: [ B -1 ]
2242         o the original byte sequence is B1 B2: [ B1 B2 ]
2243
2244    Most of the routines are implemented by macros because many
2245    variables and labels in the caller decode_coding_emacs_mule must be
2246    accessible, and they are usually called just once (thus doesn't
2247    increase the size of compiled object).  */
2248
2249 /* Decode a composition rule represented by C as a component of
2250    composition sequence of Emacs 20 style.  Set RULE to the decoded
2251    rule. */
2252
2253 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(c, rule)  \
2254   do {                                                  \
2255     int gref, nref;                                     \
2256                                                         \
2257     c -= 0xA0;                                          \
2258     if (c < 0 || c >= 81)                               \
2259       goto invalid_code;                                \
2260     gref = c / 9, nref = c % 9;                         \
2261     if (gref == 4) gref = 10;                           \
2262     if (nref == 4) nref = 10;                           \
2263     rule = COMPOSITION_ENCODE_RULE (gref, nref);        \
2264   } while (0)
2265
2266
2267 /* Decode a composition rule represented by C and the following byte
2268    at SRC as a component of composition sequence of Emacs 21 style.
2269    Set RULE to the decoded rule.  */
2270
2271 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(c, rule)  \
2272   do {                                                  \
2273     int gref, nref;                                     \
2274                                                         \
2275     gref = c - 0x20;                                    \
2276     if (gref < 0 || gref >= 81)                         \
2277       goto invalid_code;                                \
2278     ONE_MORE_BYTE (c);                                  \
2279     nref = c - 0x20;                                    \
2280     if (nref < 0 || nref >= 81)                         \
2281       goto invalid_code;                                \
2282     rule = COMPOSITION_ENCODE_RULE (gref, nref);        \
2283   } while (0)
2284
2285
2286 /* Start of Emacs 21 style format.  The first three bytes at SRC are
2287    (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is the
2288    byte length of this composition information, CHARS is the number of
2289    characters composed by this composition.  */
2290
2291 #define DECODE_EMACS_MULE_21_COMPOSITION()                              \
2292   do {                                                                  \
2293     enum composition_method method = c - 0xF2;                          \
2294     int *charbuf_base = charbuf;                                        \
2295     int nbytes, nchars;                                                 \
2296                                                                         \
2297     ONE_MORE_BYTE (c);                                                  \
2298     if (c < 0)                                                          \
2299       goto invalid_code;                                                \
2300     nbytes = c - 0xA0;                                                  \
2301     if (nbytes < 3 || (method == COMPOSITION_RELATIVE && nbytes != 4))  \
2302       goto invalid_code;                                                \
2303     ONE_MORE_BYTE (c);                                                  \
2304     nchars = c - 0xA0;                                                  \
2305     if (nchars <= 0 || nchars >= MAX_COMPOSITION_COMPONENTS)            \
2306       goto invalid_code;                                                \
2307     cmp_status->old_form = 0;                                           \
2308     cmp_status->method = method;                                        \
2309     if (method == COMPOSITION_RELATIVE)                                 \
2310       cmp_status->state = COMPOSING_CHAR;                               \
2311     else                                                                \
2312       cmp_status->state = COMPOSING_COMPONENT_CHAR;                     \
2313     cmp_status->length = MAX_ANNOTATION_LENGTH;                         \
2314     cmp_status->nchars = nchars;                                        \
2315     cmp_status->ncomps = nbytes - 4;                                    \
2316     ADD_COMPOSITION_DATA (charbuf, nchars, nbytes, method);             \
2317   } while (0)
2318
2319
2320 /* Start of Emacs 20 style format for relative composition.  */
2321
2322 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION()             \
2323   do {                                                          \
2324     cmp_status->old_form = 1;                                   \
2325     cmp_status->method = COMPOSITION_RELATIVE;                  \
2326     cmp_status->state = COMPOSING_CHAR;                         \
2327     cmp_status->length = MAX_ANNOTATION_LENGTH;                 \
2328     cmp_status->nchars = cmp_status->ncomps = 0;                \
2329     ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);   \
2330   } while (0)
2331
2332
2333 /* Start of Emacs 20 style format for rule-base composition.  */
2334
2335 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION()             \
2336   do {                                                          \
2337     cmp_status->old_form = 1;                                   \
2338     cmp_status->method = COMPOSITION_WITH_RULE;                 \
2339     cmp_status->state = COMPOSING_CHAR;                         \
2340     cmp_status->length = MAX_ANNOTATION_LENGTH;                 \
2341     cmp_status->nchars = cmp_status->ncomps = 0;                \
2342     ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);   \
2343   } while (0)
2344
2345
2346 #define DECODE_EMACS_MULE_COMPOSITION_START()           \
2347   do {                                                  \
2348     const unsigned char *current_src = src;             \
2349                                                         \
2350     ONE_MORE_BYTE (c);                                  \
2351     if (c < 0)                                          \
2352       goto invalid_code;                                \
2353     if (c - 0xF2 >= COMPOSITION_RELATIVE                \
2354         && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)  \
2355       DECODE_EMACS_MULE_21_COMPOSITION ();              \
2356     else if (c < 0xA0)                                  \
2357       goto invalid_code;                                \
2358     else if (c < 0xC0)                                  \
2359       {                                                 \
2360         DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION ();   \
2361         /* Re-read C as a composition component.  */    \
2362         src = current_src;                              \
2363       }                                                 \
2364     else if (c == 0xFF)                                 \
2365       DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION ();     \
2366     else                                                \
2367       goto invalid_code;                                \
2368   } while (0)
2369
2370 #define EMACS_MULE_COMPOSITION_END()                            \
2371   do {                                                          \
2372     int idx = - cmp_status->length;                             \
2373                                                                 \
2374     if (cmp_status->old_form)                                   \
2375       charbuf[idx + 2] = cmp_status->nchars;                    \
2376     else if (cmp_status->method > COMPOSITION_RELATIVE)         \
2377       charbuf[idx] = charbuf[idx + 2] - cmp_status->length;     \
2378     cmp_status->state = COMPOSING_NO;                           \
2379   } while (0)
2380
2381
2382 static int
2383 emacs_mule_finish_composition (charbuf, cmp_status)
2384      int *charbuf;
2385      struct composition_status *cmp_status;
2386 {
2387   int idx = - cmp_status->length;
2388   int new_chars;
2389
2390   if (cmp_status->old_form && cmp_status->nchars > 0)
2391     {
2392       charbuf[idx + 2] = cmp_status->nchars;
2393       new_chars = 0;
2394       if (cmp_status->method == COMPOSITION_WITH_RULE
2395           && cmp_status->state == COMPOSING_CHAR)
2396         {
2397           /* The last rule was invalid.  */
2398           int rule = charbuf[-1] + 0xA0;
2399
2400           charbuf[-2] = BYTE8_TO_CHAR (rule);
2401           charbuf[-1] = -1;
2402           new_chars = 1;
2403         }
2404     }
2405   else
2406     {
2407       charbuf[idx++] = BYTE8_TO_CHAR (0x80);
2408
2409       if (cmp_status->method == COMPOSITION_WITH_RULE)
2410         {
2411           charbuf[idx++] = BYTE8_TO_CHAR (0xFF);
2412           charbuf[idx++] = -3;
2413           charbuf[idx++] = 0;
2414           new_chars = 1;
2415         }
2416       else
2417         {
2418           int nchars = charbuf[idx + 1] + 0xA0;
2419           int nbytes = charbuf[idx + 2] + 0xA0;
2420
2421           charbuf[idx++] = BYTE8_TO_CHAR (0xF2 + cmp_status->method);
2422           charbuf[idx++] = BYTE8_TO_CHAR (nbytes);
2423           charbuf[idx++] = BYTE8_TO_CHAR (nchars);
2424           charbuf[idx++] = -1;
2425           new_chars = 4;
2426         }
2427     }
2428   cmp_status->state = COMPOSING_NO;
2429   return new_chars;
2430 }
2431
2432 #define EMACS_MULE_MAYBE_FINISH_COMPOSITION()                             \
2433   do {                                                                    \
2434     if (cmp_status->state != COMPOSING_NO)                                \
2435       char_offset += emacs_mule_finish_composition (charbuf, cmp_status); \
2436   } while (0)
2437
2438
2439 static void
2440 decode_coding_emacs_mule (coding)
2441      struct coding_system *coding;
2442 {
2443   const unsigned char *src = coding->source + coding->consumed;
2444   const unsigned char *src_end = coding->source + coding->src_bytes;
2445   const unsigned char *src_base;
2446   int *charbuf = coding->charbuf + coding->charbuf_used;
2447   /* We may produce two annocations (charset and composition) in one
2448      loop and one more charset annocation at the end.  */
2449   int *charbuf_end
2450     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
2451   int consumed_chars = 0, consumed_chars_base;
2452   int multibytep = coding->src_multibyte;
2453   Lisp_Object attrs, charset_list;
2454   int char_offset = coding->produced_char;
2455   int last_offset = char_offset;
2456   int last_id = charset_ascii;
2457   int eol_crlf =
2458     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
2459   int byte_after_cr = -1;
2460   struct composition_status *cmp_status = &coding->spec.emacs_mule.cmp_status;
2461
2462   CODING_GET_INFO (coding, attrs, charset_list);
2463
2464   if (cmp_status->state != COMPOSING_NO)
2465     {
2466       int i;
2467
2468       for (i = 0; i < cmp_status->length; i++)
2469         *charbuf++ = cmp_status->carryover[i];
2470       coding->annotated = 1;
2471     }
2472
2473   while (1)
2474     {
2475       int c, id;
2476
2477       src_base = src;
2478       consumed_chars_base = consumed_chars;
2479
2480       if (charbuf >= charbuf_end)
2481         {
2482           if (byte_after_cr >= 0)
2483             src_base--;
2484           break;
2485         }
2486
2487       if (byte_after_cr >= 0)
2488         c = byte_after_cr, byte_after_cr = -1;
2489       else
2490         ONE_MORE_BYTE (c);
2491
2492       if (c < 0 || c == 0x80)
2493         {
2494           EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2495           if (c < 0)
2496             {
2497               *charbuf++ = -c;
2498               char_offset++;
2499             }
2500           else
2501             DECODE_EMACS_MULE_COMPOSITION_START ();
2502           continue;
2503         }
2504
2505       if (c < 0x80)
2506         {
2507           if (eol_crlf && c == '\r')
2508             ONE_MORE_BYTE (byte_after_cr);
2509           id = charset_ascii;
2510           if (cmp_status->state != COMPOSING_NO)
2511             {
2512               if (cmp_status->old_form)
2513                 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2514               else if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2515                 cmp_status->ncomps--;
2516             }
2517         }
2518       else
2519         {
2520           int nchars, nbytes;
2521
2522           c = emacs_mule_char (coding, src_base, &nbytes, &nchars, &id,
2523                                cmp_status);
2524           if (c < 0)
2525             {
2526               if (c == -1)
2527                 goto invalid_code;
2528               if (c == -2)
2529                 break;
2530             }
2531           src = src_base + nbytes;
2532           consumed_chars = consumed_chars_base + nchars;
2533           if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2534             cmp_status->ncomps -= nchars;
2535         }
2536
2537       /* Now if C >= 0, we found a normally encoded characer, if C <
2538          0, we found an old-style composition component character or
2539          rule.  */
2540
2541       if (cmp_status->state == COMPOSING_NO)
2542         {
2543           if (last_id != id)
2544             {
2545               if (last_id != charset_ascii)
2546                 ADD_CHARSET_DATA (charbuf, char_offset - last_offset,
2547                                   last_id);
2548               last_id = id;
2549               last_offset = char_offset;
2550             }
2551           *charbuf++ = c;
2552           char_offset++;
2553         }
2554       else if (cmp_status->state == COMPOSING_CHAR)
2555         {
2556           if (cmp_status->old_form)
2557             {
2558               if (c >= 0)
2559                 {
2560                   EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2561                   *charbuf++ = c;
2562                   char_offset++;
2563                 }
2564               else
2565                 {
2566                   *charbuf++ = -c;
2567                   cmp_status->nchars++;
2568                   cmp_status->length++;
2569                   if (cmp_status->nchars == MAX_COMPOSITION_COMPONENTS)
2570                     EMACS_MULE_COMPOSITION_END ();
2571                   else if (cmp_status->method == COMPOSITION_WITH_RULE)
2572                     cmp_status->state = COMPOSING_RULE;
2573                 }
2574             }
2575           else
2576             {
2577               *charbuf++ = c;
2578               cmp_status->length++;
2579               cmp_status->nchars--;
2580               if (cmp_status->nchars == 0)
2581                 EMACS_MULE_COMPOSITION_END ();
2582             }
2583         }
2584       else if (cmp_status->state == COMPOSING_RULE)
2585         {
2586           int rule;
2587
2588           if (c >= 0)
2589             {
2590               EMACS_MULE_COMPOSITION_END ();
2591               *charbuf++ = c;
2592               char_offset++;
2593             }
2594           else
2595             {
2596               c = -c;
2597               DECODE_EMACS_MULE_COMPOSITION_RULE_20 (c, rule);
2598               if (rule < 0)
2599                 goto invalid_code;
2600               *charbuf++ = -2;
2601               *charbuf++ = rule;
2602               cmp_status->length += 2;
2603               cmp_status->state = COMPOSING_CHAR;
2604             }
2605         }
2606       else if (cmp_status->state == COMPOSING_COMPONENT_CHAR)
2607         {
2608           *charbuf++ = c;
2609           cmp_status->length++;
2610           if (cmp_status->ncomps == 0)
2611             cmp_status->state = COMPOSING_CHAR;
2612           else if (cmp_status->ncomps > 0)
2613             {
2614               if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)
2615                 cmp_status->state = COMPOSING_COMPONENT_RULE;
2616             }
2617           else
2618             EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2619         }
2620       else                      /* COMPOSING_COMPONENT_RULE */
2621         {
2622           int rule;
2623
2624           DECODE_EMACS_MULE_COMPOSITION_RULE_21 (c, rule);
2625           if (rule < 0)
2626             goto invalid_code;
2627           *charbuf++ = -2;
2628           *charbuf++ = rule;
2629           cmp_status->length += 2;
2630           cmp_status->ncomps--;
2631           if (cmp_status->ncomps > 0)
2632             cmp_status->state = COMPOSING_COMPONENT_CHAR;
2633           else
2634             EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2635         }
2636       continue;
2637
2638     retry:
2639       src = src_base;
2640       consumed_chars = consumed_chars_base;
2641       continue;
2642
2643     invalid_code:
2644       EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2645       src = src_base;
2646       consumed_chars = consumed_chars_base;
2647       ONE_MORE_BYTE (c);
2648       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2649       char_offset++;
2650       coding->errors++;
2651     }
2652
2653  no_more_source:
2654   if (cmp_status->state != COMPOSING_NO)
2655     {
2656       if (coding->mode & CODING_MODE_LAST_BLOCK)
2657         EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2658       else
2659         {
2660           int i;
2661
2662           charbuf -= cmp_status->length;
2663           for (i = 0; i < cmp_status->length; i++)
2664             cmp_status->carryover[i] = charbuf[i];
2665         }
2666     }
2667   if (last_id != charset_ascii)
2668     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2669   coding->consumed_char += consumed_chars_base;
2670   coding->consumed = src_base - coding->source;
2671   coding->charbuf_used = charbuf - coding->charbuf;
2672 }
2673
2674
2675 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2676   do {                                          \
2677     if (id < 0xA0)                              \
2678       codes[0] = id, codes[1] = 0;              \
2679     else if (id < 0xE0)                         \
2680       codes[0] = 0x9A, codes[1] = id;           \
2681     else if (id < 0xF0)                         \
2682       codes[0] = 0x9B, codes[1] = id;           \
2683     else if (id < 0xF5)                         \
2684       codes[0] = 0x9C, codes[1] = id;           \
2685     else                                        \
2686       codes[0] = 0x9D, codes[1] = id;           \
2687   } while (0);
2688
2689
2690 static int
2691 encode_coding_emacs_mule (coding)
2692      struct coding_system *coding;
2693 {
2694   int multibytep = coding->dst_multibyte;
2695   int *charbuf = coding->charbuf;
2696   int *charbuf_end = charbuf + coding->charbuf_used;
2697   unsigned char *dst = coding->destination + coding->produced;
2698   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2699   int safe_room = 8;
2700   int produced_chars = 0;
2701   Lisp_Object attrs, charset_list;
2702   int c;
2703   int preferred_charset_id = -1;
2704
2705   CODING_GET_INFO (coding, attrs, charset_list);
2706   if (! EQ (charset_list, Vemacs_mule_charset_list))
2707     {
2708       CODING_ATTR_CHARSET_LIST (attrs)
2709         = charset_list = Vemacs_mule_charset_list;
2710     }
2711
2712   while (charbuf < charbuf_end)
2713     {
2714       ASSURE_DESTINATION (safe_room);
2715       c = *charbuf++;
2716
2717       if (c < 0)
2718         {
2719           /* Handle an annotation.  */
2720           switch (*charbuf)
2721             {
2722             case CODING_ANNOTATE_COMPOSITION_MASK:
2723               /* Not yet implemented.  */
2724               break;
2725             case CODING_ANNOTATE_CHARSET_MASK:
2726               preferred_charset_id = charbuf[3];
2727               if (preferred_charset_id >= 0
2728                   && NILP (Fmemq (make_number (preferred_charset_id),
2729                                   charset_list)))
2730                 preferred_charset_id = -1;
2731               break;
2732             default:
2733               abort ();
2734             }
2735           charbuf += -c - 1;
2736           continue;
2737         }
2738
2739       if (ASCII_CHAR_P (c))
2740         EMIT_ONE_ASCII_BYTE (c);
2741       else if (CHAR_BYTE8_P (c))
2742         {
2743           c = CHAR_TO_BYTE8 (c);
2744           EMIT_ONE_BYTE (c);
2745         }
2746       else
2747         {
2748           struct charset *charset;
2749           unsigned code;
2750           int dimension;
2751           int emacs_mule_id;
2752           unsigned char leading_codes[2];
2753
2754           if (preferred_charset_id >= 0)
2755             {
2756               charset = CHARSET_FROM_ID (preferred_charset_id);
2757               if (CHAR_CHARSET_P (c, charset))
2758                 code = ENCODE_CHAR (charset, c);
2759               else
2760                 charset = char_charset (c, charset_list, &code);
2761             }
2762           else
2763             charset = char_charset (c, charset_list, &code);
2764           if (! charset)
2765             {
2766               c = coding->default_char;
2767               if (ASCII_CHAR_P (c))
2768                 {
2769                   EMIT_ONE_ASCII_BYTE (c);
2770                   continue;
2771                 }
2772               charset = char_charset (c, charset_list, &code);
2773             }
2774           dimension = CHARSET_DIMENSION (charset);
2775           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2776           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2777           EMIT_ONE_BYTE (leading_codes[0]);
2778           if (leading_codes[1])
2779             EMIT_ONE_BYTE (leading_codes[1]);
2780           if (dimension == 1)
2781             EMIT_ONE_BYTE (code | 0x80);
2782           else
2783             {
2784               code |= 0x8080;
2785               EMIT_ONE_BYTE (code >> 8);
2786               EMIT_ONE_BYTE (code & 0xFF);
2787             }
2788         }
2789     }
2790   record_conversion_result (coding, CODING_RESULT_SUCCESS);
2791   coding->produced_char += produced_chars;
2792   coding->produced = dst - coding->destination;
2793   return 0;
2794 }
2795
2796 \f
2797 /*** 7. ISO2022 handlers ***/
2798
2799 /* The following note describes the coding system ISO2022 briefly.
2800    Since the intention of this note is to help understand the
2801    functions in this file, some parts are NOT ACCURATE or are OVERLY
2802    SIMPLIFIED.  For thorough understanding, please refer to the
2803    original document of ISO2022.  This is equivalent to the standard
2804    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2805
2806    ISO2022 provides many mechanisms to encode several character sets
2807    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2808    is encoded using bytes less than 128.  This may make the encoded
2809    text a little bit longer, but the text passes more easily through
2810    several types of gateway, some of which strip off the MSB (Most
2811    Significant Bit).
2812
2813    There are two kinds of character sets: control character sets and
2814    graphic character sets.  The former contain control characters such
2815    as `newline' and `escape' to provide control functions (control
2816    functions are also provided by escape sequences).  The latter
2817    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2818    two control character sets and many graphic character sets.
2819
2820    Graphic character sets are classified into one of the following
2821    four classes, according to the number of bytes (DIMENSION) and
2822    number of characters in one dimension (CHARS) of the set:
2823    - DIMENSION1_CHARS94
2824    - DIMENSION1_CHARS96
2825    - DIMENSION2_CHARS94
2826    - DIMENSION2_CHARS96
2827
2828    In addition, each character set is assigned an identification tag,
2829    unique for each set, called the "final character" (denoted as <F>
2830    hereafter).  The <F> of each character set is decided by ECMA(*)
2831    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2832    (0x30..0x3F are for private use only).
2833
2834    Note (*): ECMA = European Computer Manufacturers Association
2835
2836    Here are examples of graphic character sets [NAME(<F>)]:
2837         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2838         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2839         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2840         o DIMENSION2_CHARS96 -- none for the moment
2841
2842    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2843         C0 [0x00..0x1F] -- control character plane 0
2844         GL [0x20..0x7F] -- graphic character plane 0
2845         C1 [0x80..0x9F] -- control character plane 1
2846         GR [0xA0..0xFF] -- graphic character plane 1
2847
2848    A control character set is directly designated and invoked to C0 or
2849    C1 by an escape sequence.  The most common case is that:
2850    - ISO646's  control character set is designated/invoked to C0, and
2851    - ISO6429's control character set is designated/invoked to C1,
2852    and usually these designations/invocations are omitted in encoded
2853    text.  In a 7-bit environment, only C0 can be used, and a control
2854    character for C1 is encoded by an appropriate escape sequence to
2855    fit into the environment.  All control characters for C1 are
2856    defined to have corresponding escape sequences.
2857
2858    A graphic character set is at first designated to one of four
2859    graphic registers (G0 through G3), then these graphic registers are
2860    invoked to GL or GR.  These designations and invocations can be
2861    done independently.  The most common case is that G0 is invoked to
2862    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2863    these invocations and designations are omitted in encoded text.
2864    In a 7-bit environment, only GL can be used.
2865
2866    When a graphic character set of CHARS94 is invoked to GL, codes
2867    0x20 and 0x7F of the GL area work as control characters SPACE and
2868    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2869    be used.
2870
2871    There are two ways of invocation: locking-shift and single-shift.
2872    With locking-shift, the invocation lasts until the next different
2873    invocation, whereas with single-shift, the invocation affects the
2874    following character only and doesn't affect the locking-shift
2875    state.  Invocations are done by the following control characters or
2876    escape sequences:
2877
2878    ----------------------------------------------------------------------
2879    abbrev  function                  cntrl escape seq   description
2880    ----------------------------------------------------------------------
2881    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2882    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2883    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2884    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2885    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2886    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2887    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2888    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2889    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2890    ----------------------------------------------------------------------
2891    (*) These are not used by any known coding system.
2892
2893    Control characters for these functions are defined by macros
2894    ISO_CODE_XXX in `coding.h'.
2895
2896    Designations are done by the following escape sequences:
2897    ----------------------------------------------------------------------
2898    escape sequence      description
2899    ----------------------------------------------------------------------
2900    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2901    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2902    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2903    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2904    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2905    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2906    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2907    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2908    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2909    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2910    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2911    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2912    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2913    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2914    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2915    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2916    ----------------------------------------------------------------------
2917
2918    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2919    of dimension 1, chars 94, and final character <F>, etc...
2920
2921    Note (*): Although these designations are not allowed in ISO2022,
2922    Emacs accepts them on decoding, and produces them on encoding
2923    CHARS96 character sets in a coding system which is characterized as
2924    7-bit environment, non-locking-shift, and non-single-shift.
2925
2926    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2927    '(' must be omitted.  We refer to this as "short-form" hereafter.
2928
2929    Now you may notice that there are a lot of ways of encoding the
2930    same multilingual text in ISO2022.  Actually, there exist many
2931    coding systems such as Compound Text (used in X11's inter client
2932    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2933    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2934    localized platforms), and all of these are variants of ISO2022.
2935
2936    In addition to the above, Emacs handles two more kinds of escape
2937    sequences: ISO6429's direction specification and Emacs' private
2938    sequence for specifying character composition.
2939
2940    ISO6429's direction specification takes the following form:
2941         o CSI ']'      -- end of the current direction
2942         o CSI '0' ']'  -- end of the current direction
2943         o CSI '1' ']'  -- start of left-to-right text
2944         o CSI '2' ']'  -- start of right-to-left text
2945    The control character CSI (0x9B: control sequence introducer) is
2946    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2947
2948    Character composition specification takes the following form:
2949         o ESC '0' -- start relative composition
2950         o ESC '1' -- end composition
2951         o ESC '2' -- start rule-base composition (*)
2952         o ESC '3' -- start relative composition with alternate chars  (**)
2953         o ESC '4' -- start rule-base composition with alternate chars  (**)
2954   Since these are not standard escape sequences of any ISO standard,
2955   the use of them with these meanings is restricted to Emacs only.
2956
2957   (*) This form is used only in Emacs 20.7 and older versions,
2958   but newer versions can safely decode it.
2959   (**) This form is used only in Emacs 21.1 and newer versions,
2960   and older versions can't decode it.
2961
2962   Here's a list of example usages of these composition escape
2963   sequences (categorized by `enum composition_method').
2964
2965   COMPOSITION_RELATIVE:
2966         ESC 0 CHAR [ CHAR ] ESC 1
2967   COMPOSITION_WITH_RULE:
2968         ESC 2 CHAR [ RULE CHAR ] ESC 1
2969   COMPOSITION_WITH_ALTCHARS:
2970         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2971   COMPOSITION_WITH_RULE_ALTCHARS:
2972         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2973
2974 enum iso_code_class_type iso_code_class[256];
2975
2976 #define SAFE_CHARSET_P(coding, id)      \
2977   ((id) <= (coding)->max_charset_id     \
2978    && (coding)->safe_charsets[id] != 255)
2979
2980
2981 #define SHIFT_OUT_OK(category)  \
2982   (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2983
2984 static void
2985 setup_iso_safe_charsets (attrs)
2986      Lisp_Object attrs;
2987 {
2988   Lisp_Object charset_list, safe_charsets;
2989   Lisp_Object request;
2990   Lisp_Object reg_usage;
2991   Lisp_Object tail;
2992   int reg94, reg96;
2993   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2994   int max_charset_id;
2995
2996   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2997   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2998       && ! EQ (charset_list, Viso_2022_charset_list))
2999     {
3000       CODING_ATTR_CHARSET_LIST (attrs)
3001         = charset_list = Viso_2022_charset_list;
3002       ASET (attrs, coding_attr_safe_charsets, Qnil);
3003     }
3004
3005   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
3006     return;
3007
3008   max_charset_id = 0;
3009   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
3010     {
3011       int id = XINT (XCAR (tail));
3012       if (max_charset_id < id)
3013         max_charset_id = id;
3014     }
3015
3016   safe_charsets = make_uninit_string (max_charset_id + 1);
3017   memset (SDATA (safe_charsets), 255, max_charset_id + 1);
3018   request = AREF (attrs, coding_attr_iso_request);
3019   reg_usage = AREF (attrs, coding_attr_iso_usage);
3020   reg94 = XINT (XCAR (reg_usage));
3021   reg96 = XINT (XCDR (reg_usage));
3022
3023   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
3024     {
3025       Lisp_Object id;
3026       Lisp_Object reg;
3027       struct charset *charset;
3028
3029       id = XCAR (tail);
3030       charset = CHARSET_FROM_ID (XINT (id));
3031       reg = Fcdr (Fassq (id, request));
3032       if (! NILP (reg))
3033         SSET (safe_charsets, XINT (id), XINT (reg));
3034       else if (charset->iso_chars_96)
3035         {
3036           if (reg96 < 4)
3037             SSET (safe_charsets, XINT (id), reg96);
3038         }
3039       else
3040         {
3041           if (reg94 < 4)
3042             SSET (safe_charsets, XINT (id), reg94);
3043         }
3044     }
3045   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
3046 }
3047
3048
3049 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3050    Check if a text is encoded in one of ISO-2022 based codig systems.
3051    If it is, return 1, else return 0.  */
3052
3053 static int
3054 detect_coding_iso_2022 (coding, detect_info)
3055      struct coding_system *coding;
3056      struct coding_detection_info *detect_info;
3057 {
3058   const unsigned char *src = coding->source, *src_base = src;
3059   const unsigned char *src_end = coding->source + coding->src_bytes;
3060   int multibytep = coding->src_multibyte;
3061   int single_shifting = 0;
3062   int id;
3063   int c, c1;
3064   int consumed_chars = 0;
3065   int i;
3066   int rejected = 0;
3067   int found = 0;
3068   int composition_count = -1;
3069
3070   detect_info->checked |= CATEGORY_MASK_ISO;
3071
3072   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
3073     {
3074       struct coding_system *this = &(coding_categories[i]);
3075       Lisp_Object attrs, val;
3076
3077       if (this->id < 0)
3078         continue;
3079       attrs = CODING_ID_ATTRS (this->id);
3080       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
3081           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Viso_2022_charset_list))
3082         setup_iso_safe_charsets (attrs);
3083       val = CODING_ATTR_SAFE_CHARSETS (attrs);
3084       this->max_charset_id = SCHARS (val) - 1;
3085       this->safe_charsets = SDATA (val);
3086     }
3087
3088   /* A coding system of this category is always ASCII compatible.  */
3089   src += coding->head_ascii;
3090
3091   while (rejected != CATEGORY_MASK_ISO)
3092     {
3093       src_base = src;
3094       ONE_MORE_BYTE (c);
3095       switch (c)
3096         {
3097         case ISO_CODE_ESC:
3098           if (inhibit_iso_escape_detection)
3099             break;
3100           single_shifting = 0;
3101           ONE_MORE_BYTE (c);
3102           if (c >= '(' && c <= '/')
3103             {
3104               /* Designation sequence for a charset of dimension 1.  */
3105               ONE_MORE_BYTE (c1);
3106               if (c1 < ' ' || c1 >= 0x80
3107                   || (id = iso_charset_table[0][c >= ','][c1]) < 0)
3108                 /* Invalid designation sequence.  Just ignore.  */
3109                 break;
3110             }
3111           else if (c == '$')
3112             {
3113               /* Designation sequence for a charset of dimension 2.  */
3114               ONE_MORE_BYTE (c);
3115               if (c >= '@' && c <= 'B')
3116                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
3117                 id = iso_charset_table[1][0][c];
3118               else if (c >= '(' && c <= '/')
3119                 {
3120                   ONE_MORE_BYTE (c1);
3121                   if (c1 < ' ' || c1 >= 0x80
3122                       || (id = iso_charset_table[1][c >= ','][c1]) < 0)
3123                     /* Invalid designation sequence.  Just ignore.  */
3124                     break;
3125                 }
3126               else
3127                 /* Invalid designation sequence.  Just ignore it.  */
3128                 break;
3129             }
3130           else if (c == 'N' || c == 'O')
3131             {
3132               /* ESC <Fe> for SS2 or SS3.  */
3133               single_shifting = 1;
3134               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
3135               break;
3136             }
3137           else if (c == '1')
3138             {
3139               /* End of composition.  */
3140               if (composition_count < 0
3141                   || composition_count > MAX_COMPOSITION_COMPONENTS)
3142                 /* Invalid */
3143                 break;
3144               composition_count = -1;
3145               found |= CATEGORY_MASK_ISO;
3146             }
3147           else if (c >= '0' && c <= '4')
3148             {
3149               /* ESC <Fp> for start/end composition.  */
3150               composition_count = 0;
3151               break;
3152             }
3153           else
3154             {
3155               /* Invalid escape sequence.  Just ignore it.  */
3156               break;
3157             }
3158
3159           /* We found a valid designation sequence for CHARSET.  */
3160           rejected |= CATEGORY_MASK_ISO_8BIT;
3161           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
3162                               id))
3163             found |= CATEGORY_MASK_ISO_7;
3164           else
3165             rejected |= CATEGORY_MASK_ISO_7;
3166           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
3167                               id))
3168             found |= CATEGORY_MASK_ISO_7_TIGHT;
3169           else
3170             rejected |= CATEGORY_MASK_ISO_7_TIGHT;
3171           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
3172                               id))
3173             found |= CATEGORY_MASK_ISO_7_ELSE;
3174           else
3175             rejected |= CATEGORY_MASK_ISO_7_ELSE;
3176           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
3177                               id))
3178             found |= CATEGORY_MASK_ISO_8_ELSE;
3179           else
3180             rejected |= CATEGORY_MASK_ISO_8_ELSE;
3181           break;
3182
3183         case ISO_CODE_SO:
3184         case ISO_CODE_SI:
3185           /* Locking shift out/in.  */
3186           if (inhibit_iso_escape_detection)
3187             break;
3188           single_shifting = 0;
3189           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
3190           break;
3191
3192         case ISO_CODE_CSI:
3193           /* Control sequence introducer.  */
3194           single_shifting = 0;
3195           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3196           found |= CATEGORY_MASK_ISO_8_ELSE;
3197           goto check_extra_latin;
3198
3199         case ISO_CODE_SS2:
3200         case ISO_CODE_SS3:
3201           /* Single shift.   */
3202           if (inhibit_iso_escape_detection)
3203             break;
3204           single_shifting = 0;
3205           rejected |= CATEGORY_MASK_ISO_7BIT;
3206           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3207               & CODING_ISO_FLAG_SINGLE_SHIFT)
3208             found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
3209           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
3210               & CODING_ISO_FLAG_SINGLE_SHIFT)
3211             found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
3212           if (single_shifting)
3213             break;
3214           goto check_extra_latin;
3215
3216         default:
3217           if (c < 0)
3218             continue;
3219           if (c < 0x80)
3220             {
3221               if (composition_count >= 0)
3222                 composition_count++;
3223               single_shifting = 0;
3224               break;
3225             }
3226           if (c >= 0xA0)
3227             {
3228               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3229               found |= CATEGORY_MASK_ISO_8_1;
3230               /* Check the length of succeeding codes of the range
3231                  0xA0..0FF.  If the byte length is even, we include
3232                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
3233                  only when we are not single shifting.  */
3234               if (! single_shifting
3235                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
3236                 {
3237                   int i = 1;
3238                   while (src < src_end)
3239                     {
3240                       ONE_MORE_BYTE (c);
3241                       if (c < 0xA0)
3242                         break;
3243                       i++;
3244                     }
3245
3246                   if (i & 1 && src < src_end)
3247                     {
3248                       rejected |= CATEGORY_MASK_ISO_8_2;
3249                       if (composition_count >= 0)
3250                         composition_count += i;
3251                     }
3252                   else
3253                     {
3254                       found |= CATEGORY_MASK_ISO_8_2;
3255                       if (composition_count >= 0)
3256                         composition_count += i / 2;
3257                     }
3258                 }
3259               break;
3260             }
3261         check_extra_latin:
3262           single_shifting = 0;
3263           if (! VECTORP (Vlatin_extra_code_table)
3264               || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3265             {
3266               rejected = CATEGORY_MASK_ISO;
3267               break;
3268             }
3269           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3270               & CODING_ISO_FLAG_LATIN_EXTRA)
3271             found |= CATEGORY_MASK_ISO_8_1;
3272           else
3273             rejected |= CATEGORY_MASK_ISO_8_1;
3274           rejected |= CATEGORY_MASK_ISO_8_2;
3275         }
3276     }
3277   detect_info->rejected |= CATEGORY_MASK_ISO;
3278   return 0;
3279
3280  no_more_source:
3281   detect_info->rejected |= rejected;
3282   detect_info->found |= (found & ~rejected);
3283   return 1;
3284 }
3285
3286
3287 /* Set designation state into CODING.  Set CHARS_96 to -1 if the
3288    escape sequence should be kept.  */
3289 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
3290   do {                                                                  \
3291     int id, prev;                                                       \
3292                                                                         \
3293     if (final < '0' || final >= 128                                     \
3294         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
3295         || !SAFE_CHARSET_P (coding, id))                                \
3296       {                                                                 \
3297         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
3298         chars_96 = -1;                                                  \
3299         break;                                                          \
3300       }                                                                 \
3301     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
3302     if (id == charset_jisx0201_roman)                                   \
3303       {                                                                 \
3304         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
3305           id = charset_ascii;                                           \
3306       }                                                                 \
3307     else if (id == charset_jisx0208_1978)                               \
3308       {                                                                 \
3309         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
3310           id = charset_jisx0208;                                        \
3311       }                                                                 \
3312     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
3313     /* If there was an invalid designation to REG previously, and this  \
3314        designation is ASCII to REG, we should keep this designation     \
3315        sequence.  */                                                    \
3316     if (prev == -2 && id == charset_ascii)                              \
3317       chars_96 = -1;                                                    \
3318   } while (0)
3319
3320
3321 /* Handle these composition sequence (ALT: alternate char):
3322
3323    (1) relative composition: ESC 0 CHAR ... ESC 1
3324    (2) rulebase composition: ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3325    (3) altchar composition:  ESC 3 ALT ... ALT ESC 0 CHAR ... ESC 1
3326    (4) alt&rule composition: ESC 4 ALT RULE ... ALT ESC 0 CHAR ... ESC 1
3327
3328    When the start sequence (ESC 0/2/3/4) is found, this annotation
3329    header is produced.
3330
3331         [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) 0 METHOD ]
3332
3333    Then, upon reading CHAR or RULE (one or two bytes), these codes are
3334    produced until the end sequence (ESC 1) is found:
3335
3336    (1) CHAR ... CHAR
3337    (2) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
3338    (3) ALT ... ALT -1 -1 CHAR ... CHAR
3339    (4) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT -1 -1 CHAR ... CHAR
3340
3341    When the end sequence (ESC 1) is found, LENGTH and NCHARS in the
3342    annotation header is updated as below:
3343
3344    (1) LENGTH: unchanged,  NCHARS: number of CHARs
3345    (2) LENGTH: unchanged,  NCHARS: number of CHARs
3346    (3) LENGTH: += number of ALTs + 2,  NCHARS: number of CHARs
3347    (4) LENGTH: += number of ALTs * 3,  NCHARS: number of CHARs
3348
3349    If an error is found while composing, the annotation header is
3350    changed to:
3351
3352         [ ESC '0'/'2'/'3'/'4' -2 0 ]
3353
3354    and the sequence [ -2 DECODED-RULE ] is changed to the original
3355    byte sequence as below:
3356         o the original byte sequence is B: [ B -1 ]
3357         o the original byte sequence is B1 B2: [ B1 B2 ]
3358    and the sequence [ -1 -1 ] is changed to the original byte
3359    sequence:
3360         [ ESC '0' ]
3361 */
3362
3363 /* Decode a composition rule C1 and maybe one more byte from the
3364    source, and set RULE to the encoded composition rule, NBYTES to the
3365    length of the composition rule.  If the rule is invalid, set RULE
3366    to some negative value.  */
3367
3368 #define DECODE_COMPOSITION_RULE(rule, nbytes)                           \
3369   do {                                                                  \
3370     rule = c1 - 32;                                                     \
3371     if (rule < 0)                                                       \
3372       break;                                                            \
3373     if (rule < 81)              /* old format (before ver.21) */        \
3374       {                                                                 \
3375         int gref = (rule) / 9;                                          \
3376         int nref = (rule) % 9;                                          \
3377         if (gref == 4) gref = 10;                                       \
3378         if (nref == 4) nref = 10;                                       \
3379         rule = COMPOSITION_ENCODE_RULE (gref, nref);                    \
3380         nbytes = 1;                                                     \
3381       }                                                                 \
3382     else                        /* new format (after ver.21) */         \
3383       {                                                                 \
3384         int c;                                                          \
3385                                                                         \
3386         ONE_MORE_BYTE (c);                                              \
3387         rule = COMPOSITION_ENCODE_RULE (rule - 81, c - 32);             \
3388         if (rule >= 0)                                                  \
3389           rule += 0x100;   /* to destinguish it from the old format */  \
3390         nbytes = 2;                                                     \
3391       }                                                                 \
3392   } while (0)
3393
3394 #define ENCODE_COMPOSITION_RULE(rule)                           \
3395   do {                                                          \
3396     int gref = (rule % 0x100) / 12, nref = (rule % 0x100) % 12; \
3397                                                                 \
3398     if (rule < 0x100)           /* old format */                \
3399       {                                                         \
3400         if (gref == 10) gref = 4;                               \
3401         if (nref == 10) nref = 4;                               \
3402         charbuf[idx] = 32 + gref * 9 + nref;                    \
3403         charbuf[idx + 1] = -1;                                  \
3404         new_chars++;                                            \
3405       }                                                         \
3406     else                                /* new format */        \
3407       {                                                         \
3408         charbuf[idx] = 32 + 81 + gref;                          \
3409         charbuf[idx + 1] = 32 + nref;                           \
3410         new_chars += 2;                                         \
3411       }                                                         \
3412   } while (0)
3413
3414 /* Finish the current composition as invalid.  */
3415
3416 static int finish_composition P_ ((int *, struct composition_status *));
3417
3418 static int
3419 finish_composition (charbuf, cmp_status)
3420      int *charbuf;
3421      struct composition_status *cmp_status;
3422 {
3423   int idx = - cmp_status->length;
3424   int new_chars;
3425
3426   /* Recover the original ESC sequence */
3427   charbuf[idx++] = ISO_CODE_ESC;
3428   charbuf[idx++] = (cmp_status->method == COMPOSITION_RELATIVE ? '0'
3429                     : cmp_status->method == COMPOSITION_WITH_RULE ? '2'
3430                     : cmp_status->method == COMPOSITION_WITH_ALTCHARS ? '3'
3431                     /* cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS */
3432                     : '4');
3433   charbuf[idx++] = -2;
3434   charbuf[idx++] = 0;
3435   charbuf[idx++] = -1;
3436   new_chars = cmp_status->nchars;
3437   if (cmp_status->method >= COMPOSITION_WITH_RULE)
3438     for (; idx < 0; idx++)
3439       {
3440         int elt = charbuf[idx];
3441
3442         if (elt == -2)
3443           {
3444             ENCODE_COMPOSITION_RULE (charbuf[idx + 1]);
3445             idx++;
3446           }
3447         else if (elt == -1)
3448           {
3449             charbuf[idx++] = ISO_CODE_ESC;
3450             charbuf[idx] = '0';
3451             new_chars += 2;
3452           }
3453       }
3454   cmp_status->state = COMPOSING_NO;
3455   return new_chars;
3456 }
3457
3458 /* If characers are under composition, finish the composition.  */
3459 #define MAYBE_FINISH_COMPOSITION()                              \
3460   do {                                                          \
3461     if (cmp_status->state != COMPOSING_NO)                      \
3462       char_offset += finish_composition (charbuf, cmp_status);  \
3463   } while (0)
3464
3465 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
3466
3467    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
3468    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3469    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
3470    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
3471
3472    Produce this annotation sequence now:
3473
3474    [ -LENGTH(==-4) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) METHOD ]
3475 */
3476
3477 #define DECODE_COMPOSITION_START(c1)                                       \
3478   do {                                                                     \
3479     if (c1 == '0'                                                          \
3480         && ((cmp_status->state == COMPOSING_COMPONENT_CHAR                 \
3481              && cmp_status->method == COMPOSITION_WITH_ALTCHARS)           \
3482             || (cmp_status->state == COMPOSING_COMPONENT_RULE              \
3483                 && cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS))) \
3484       {                                                                    \
3485         *charbuf++ = -1;                                                   \
3486         *charbuf++= -1;                                                    \
3487         cmp_status->state = COMPOSING_CHAR;                                \
3488         cmp_status->length += 2;                                           \
3489       }                                                                    \
3490     else                                                                   \
3491       {                                                                    \
3492         MAYBE_FINISH_COMPOSITION ();                                       \
3493         cmp_status->method = (c1 == '0' ? COMPOSITION_RELATIVE             \
3494                               : c1 == '2' ? COMPOSITION_WITH_RULE          \
3495                               : c1 == '3' ? COMPOSITION_WITH_ALTCHARS      \
3496                               : COMPOSITION_WITH_RULE_ALTCHARS);           \
3497         cmp_status->state                                                  \
3498           = (c1 <= '2' ? COMPOSING_CHAR : COMPOSING_COMPONENT_CHAR);       \
3499         ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);          \
3500         cmp_status->length = MAX_ANNOTATION_LENGTH;                        \
3501         cmp_status->nchars = cmp_status->ncomps = 0;                       \
3502         coding->annotated = 1;                                             \
3503       }                                                                    \
3504   } while (0)
3505
3506
3507 /* Handle composition end sequence ESC 1.  */
3508
3509 #define DECODE_COMPOSITION_END()                                        \
3510   do {                                                                  \
3511     if (cmp_status->nchars == 0                                         \
3512         || ((cmp_status->state == COMPOSING_CHAR)                       \
3513             == (cmp_status->method == COMPOSITION_WITH_RULE)))          \
3514       {                                                                 \
3515         MAYBE_FINISH_COMPOSITION ();                                    \
3516         goto invalid_code;                                              \
3517       }                                                                 \
3518     if (cmp_status->method == COMPOSITION_WITH_ALTCHARS)                \
3519       charbuf[- cmp_status->length] -= cmp_status->ncomps + 2;          \
3520     else if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)      \
3521       charbuf[- cmp_status->length] -= cmp_status->ncomps * 3;          \
3522     charbuf[- cmp_status->length + 2] = cmp_status->nchars;             \
3523     char_offset += cmp_status->nchars;                                  \
3524     cmp_status->state = COMPOSING_NO;                                   \
3525   } while (0)
3526
3527 /* Store a composition rule RULE in charbuf, and update cmp_status.  */
3528
3529 #define STORE_COMPOSITION_RULE(rule)    \
3530   do {                                  \
3531     *charbuf++ = -2;                    \
3532     *charbuf++ = rule;                  \
3533     cmp_status->length += 2;            \
3534     cmp_status->state--;                \
3535   } while (0)
3536
3537 /* Store a composed char or a component char C in charbuf, and update
3538    cmp_status.  */
3539
3540 #define STORE_COMPOSITION_CHAR(c)                                       \
3541   do {                                                                  \
3542     *charbuf++ = (c);                                                   \
3543     cmp_status->length++;                                               \
3544     if (cmp_status->state == COMPOSING_CHAR)                            \
3545       cmp_status->nchars++;                                             \
3546     else                                                                \
3547       cmp_status->ncomps++;                                             \
3548     if (cmp_status->method == COMPOSITION_WITH_RULE                     \
3549         || (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS        \
3550             && cmp_status->state == COMPOSING_COMPONENT_CHAR))          \
3551       cmp_status->state++;                                              \
3552   } while (0)
3553
3554
3555 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
3556
3557 static void
3558 decode_coding_iso_2022 (coding)
3559      struct coding_system *coding;
3560 {
3561   const unsigned char *src = coding->source + coding->consumed;
3562   const unsigned char *src_end = coding->source + coding->src_bytes;
3563   const unsigned char *src_base;
3564   int *charbuf = coding->charbuf + coding->charbuf_used;
3565   /* We may produce two annocations (charset and composition) in one
3566      loop and one more charset annocation at the end.  */
3567   int *charbuf_end
3568     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
3569   int consumed_chars = 0, consumed_chars_base;
3570   int multibytep = coding->src_multibyte;
3571   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
3572   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3573   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3574   int charset_id_2, charset_id_3;
3575   struct charset *charset;
3576   int c;
3577   struct composition_status *cmp_status = CODING_ISO_CMP_STATUS (coding);
3578   Lisp_Object attrs, charset_list;
3579   int char_offset = coding->produced_char;
3580   int last_offset = char_offset;
3581   int last_id = charset_ascii;
3582   int eol_crlf =
3583     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
3584   int byte_after_cr = -1;
3585   int i;
3586
3587   CODING_GET_INFO (coding, attrs, charset_list);
3588   setup_iso_safe_charsets (attrs);
3589   /* Charset list may have been changed.  */
3590   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3591   coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
3592
3593   if (cmp_status->state != COMPOSING_NO)
3594     {
3595       for (i = 0; i < cmp_status->length; i++)
3596         *charbuf++ = cmp_status->carryover[i];
3597       coding->annotated = 1;
3598     }
3599
3600   while (1)
3601     {
3602       int c1, c2;
3603
3604       src_base = src;
3605       consumed_chars_base = consumed_chars;
3606
3607       if (charbuf >= charbuf_end)
3608         {
3609           if (byte_after_cr >= 0)
3610             src_base--;
3611           break;
3612         }
3613
3614       if (byte_after_cr >= 0)
3615         c1 = byte_after_cr, byte_after_cr = -1;
3616       else
3617         ONE_MORE_BYTE (c1);
3618       if (c1 < 0)
3619         goto invalid_code;
3620
3621       if (CODING_ISO_EXTSEGMENT_LEN (coding) > 0)
3622         {
3623           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3624           char_offset++;
3625           CODING_ISO_EXTSEGMENT_LEN (coding)--;
3626           continue;
3627         }
3628
3629       if (CODING_ISO_EMBEDDED_UTF_8 (coding))
3630         {
3631           if (c1 == ISO_CODE_ESC)
3632             {
3633               if (src + 1 >= src_end)
3634                 goto no_more_source;
3635               *charbuf++ = ISO_CODE_ESC;
3636               char_offset++;
3637               if (src[0] == '%' && src[1] == '@')
3638                 {
3639                   src += 2;
3640                   consumed_chars += 2;
3641                   char_offset += 2;
3642                   /* We are sure charbuf can contain two more chars. */
3643                   *charbuf++ = '%';
3644                   *charbuf++ = '@';
3645                   CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
3646                 }
3647             }
3648           else
3649             {
3650               *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3651               char_offset++;
3652             }
3653           continue;
3654         }
3655
3656       if ((cmp_status->state == COMPOSING_RULE
3657            || cmp_status->state == COMPOSING_COMPONENT_RULE)
3658           && c1 != ISO_CODE_ESC)
3659         {
3660           int rule, nbytes;
3661
3662           DECODE_COMPOSITION_RULE (rule, nbytes);
3663           if (rule < 0)
3664             goto invalid_code;
3665           STORE_COMPOSITION_RULE (rule);
3666           continue;
3667         }
3668
3669       /* We produce at most one character.  */
3670       switch (iso_code_class [c1])
3671         {
3672         case ISO_0x20_or_0x7F:
3673           if (charset_id_0 < 0
3674               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
3675             /* This is SPACE or DEL.  */
3676             charset = CHARSET_FROM_ID (charset_ascii);
3677           else
3678             charset = CHARSET_FROM_ID (charset_id_0);
3679           break;
3680
3681         case ISO_graphic_plane_0:
3682           if (charset_id_0 < 0)
3683             charset = CHARSET_FROM_ID (charset_ascii);
3684           else
3685             charset = CHARSET_FROM_ID (charset_id_0);
3686           break;
3687
3688         case ISO_0xA0_or_0xFF:
3689           if (charset_id_1 < 0
3690               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
3691               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3692             goto invalid_code;
3693           /* This is a graphic character, we fall down ... */
3694
3695         case ISO_graphic_plane_1:
3696           if (charset_id_1 < 0)
3697             goto invalid_code;
3698           charset = CHARSET_FROM_ID (charset_id_1);
3699           break;
3700
3701         case ISO_control_0:
3702           if (eol_crlf && c1 == '\r')
3703             ONE_MORE_BYTE (byte_after_cr);
3704           MAYBE_FINISH_COMPOSITION ();
3705           charset = CHARSET_FROM_ID (charset_ascii);
3706           break;
3707
3708         case ISO_control_1:
3709           goto invalid_code;
3710
3711         case ISO_shift_out:
3712           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3713               || CODING_ISO_DESIGNATION (coding, 1) < 0)
3714             goto invalid_code;
3715           CODING_ISO_INVOCATION (coding, 0) = 1;
3716           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3717           continue;
3718
3719         case ISO_shift_in:
3720           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3721             goto invalid_code;
3722           CODING_ISO_INVOCATION (coding, 0) = 0;
3723           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3724           continue;
3725
3726         case ISO_single_shift_2_7:
3727         case ISO_single_shift_2:
3728           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3729             goto invalid_code;
3730           /* SS2 is handled as an escape sequence of ESC 'N' */
3731           c1 = 'N';
3732           goto label_escape_sequence;
3733
3734         case ISO_single_shift_3:
3735           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3736             goto invalid_code;
3737           /* SS2 is handled as an escape sequence of ESC 'O' */
3738           c1 = 'O';
3739           goto label_escape_sequence;
3740
3741         case ISO_control_sequence_introducer:
3742           /* CSI is handled as an escape sequence of ESC '[' ...  */
3743           c1 = '[';
3744           goto label_escape_sequence;
3745
3746         case ISO_escape:
3747           ONE_MORE_BYTE (c1);
3748         label_escape_sequence:
3749           /* Escape sequences handled here are invocation,
3750              designation, direction specification, and character
3751              composition specification.  */
3752           switch (c1)
3753             {
3754             case '&':           /* revision of following character set */
3755               ONE_MORE_BYTE (c1);
3756               if (!(c1 >= '@' && c1 <= '~'))
3757                 goto invalid_code;
3758               ONE_MORE_BYTE (c1);
3759               if (c1 != ISO_CODE_ESC)
3760                 goto invalid_code;
3761               ONE_MORE_BYTE (c1);
3762               goto label_escape_sequence;
3763
3764             case '$':           /* designation of 2-byte character set */
3765               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3766                 goto invalid_code;
3767               {
3768                 int reg, chars96;
3769
3770                 ONE_MORE_BYTE (c1);
3771                 if (c1 >= '@' && c1 <= 'B')
3772                   {     /* designation of JISX0208.1978, GB2312.1980,
3773                            or JISX0208.1980 */
3774                     reg = 0, chars96 = 0;
3775                   }
3776                 else if (c1 >= 0x28 && c1 <= 0x2B)
3777                   { /* designation of DIMENSION2_CHARS94 character set */
3778                     reg = c1 - 0x28, chars96 = 0;
3779                     ONE_MORE_BYTE (c1);
3780                   }
3781                 else if (c1 >= 0x2C && c1 <= 0x2F)
3782                   { /* designation of DIMENSION2_CHARS96 character set */
3783                     reg = c1 - 0x2C, chars96 = 1;
3784                     ONE_MORE_BYTE (c1);
3785                   }
3786                 else
3787                   goto invalid_code;
3788                 DECODE_DESIGNATION (reg, 2, chars96, c1);
3789                 /* We must update these variables now.  */
3790                 if (reg == 0)
3791                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3792                 else if (reg == 1)
3793                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3794                 if (chars96 < 0)
3795                   goto invalid_code;
3796               }
3797               continue;
3798
3799             case 'n':           /* invocation of locking-shift-2 */
3800               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3801                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3802                 goto invalid_code;
3803               CODING_ISO_INVOCATION (coding, 0) = 2;
3804               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3805               continue;
3806
3807             case 'o':           /* invocation of locking-shift-3 */
3808               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3809                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3810                 goto invalid_code;
3811               CODING_ISO_INVOCATION (coding, 0) = 3;
3812               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3813               continue;
3814
3815             case 'N':           /* invocation of single-shift-2 */
3816               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3817                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3818                 goto invalid_code;
3819               charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3820               if (charset_id_2 < 0)
3821                 charset = CHARSET_FROM_ID (charset_ascii);
3822               else
3823                 charset = CHARSET_FROM_ID (charset_id_2);
3824               ONE_MORE_BYTE (c1);
3825               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3826                 goto invalid_code;
3827               break;
3828
3829             case 'O':           /* invocation of single-shift-3 */
3830               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3831                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3832                 goto invalid_code;
3833               charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3834               if (charset_id_3 < 0)
3835                 charset = CHARSET_FROM_ID (charset_ascii);
3836               else
3837                 charset = CHARSET_FROM_ID (charset_id_3);
3838               ONE_MORE_BYTE (c1);
3839               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3840                 goto invalid_code;
3841               break;
3842
3843             case '0': case '2': case '3': case '4': /* start composition */
3844               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3845                 goto invalid_code;
3846               if (last_id != charset_ascii)
3847                 {
3848                   ADD_CHARSET_DATA (charbuf, char_offset- last_offset, last_id);
3849                   last_id = charset_ascii;
3850                   last_offset = char_offset;
3851                 }
3852               DECODE_COMPOSITION_START (c1);
3853               continue;
3854
3855             case '1':           /* end composition */
3856               if (cmp_status->state == COMPOSING_NO)
3857                 goto invalid_code;
3858               DECODE_COMPOSITION_END ();
3859               continue;
3860
3861             case '[':           /* specification of direction */
3862               if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3863                 goto invalid_code;
3864               /* For the moment, nested direction is not supported.
3865                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3866                  left-to-right, and nozero means right-to-left.  */
3867               ONE_MORE_BYTE (c1);
3868               switch (c1)
3869                 {
3870                 case ']':       /* end of the current direction */
3871                   coding->mode &= ~CODING_MODE_DIRECTION;
3872
3873                 case '0':       /* end of the current direction */
3874                 case '1':       /* start of left-to-right direction */
3875                   ONE_MORE_BYTE (c1);
3876                   if (c1 == ']')
3877                     coding->mode &= ~CODING_MODE_DIRECTION;
3878                   else
3879                     goto invalid_code;
3880                   break;
3881
3882                 case '2':       /* start of right-to-left direction */
3883                   ONE_MORE_BYTE (c1);
3884                   if (c1 == ']')
3885                     coding->mode |= CODING_MODE_DIRECTION;
3886                   else
3887                     goto invalid_code;
3888                   break;
3889
3890                 default:
3891                   goto invalid_code;
3892                 }
3893               continue;
3894
3895             case '%':
3896               ONE_MORE_BYTE (c1);
3897               if (c1 == '/')
3898                 {
3899                   /* CTEXT extended segment:
3900                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3901                      We keep these bytes as is for the moment.
3902                      They may be decoded by post-read-conversion.  */
3903                   int dim, M, L;
3904                   int size;
3905
3906                   ONE_MORE_BYTE (dim);
3907                   if (dim < 0 || dim > 4)
3908                     goto invalid_code;
3909                   ONE_MORE_BYTE (M);
3910                   if (M < 128)
3911                     goto invalid_code;
3912                   ONE_MORE_BYTE (L);
3913                   if (L < 128)
3914                     goto invalid_code;
3915                   size = ((M - 128) * 128) + (L - 128);
3916                   if (charbuf + 6 > charbuf_end)
3917                     goto break_loop;
3918                   *charbuf++ = ISO_CODE_ESC;
3919                   *charbuf++ = '%';
3920                   *charbuf++ = '/';
3921                   *charbuf++ = dim;
3922                   *charbuf++ = BYTE8_TO_CHAR (M);
3923                   *charbuf++ = BYTE8_TO_CHAR (L);
3924                   CODING_ISO_EXTSEGMENT_LEN (coding) = size;
3925                 }
3926               else if (c1 == 'G')
3927                 {
3928                   /* XFree86 extension for embedding UTF-8 in CTEXT:
3929                      ESC % G --UTF-8-BYTES-- ESC % @
3930                      We keep these bytes as is for the moment.
3931                      They may be decoded by post-read-conversion.  */
3932                   if (charbuf + 3 > charbuf_end)
3933                     goto break_loop;
3934                   *charbuf++ = ISO_CODE_ESC;
3935                   *charbuf++ = '%';
3936                   *charbuf++ = 'G';
3937                   CODING_ISO_EMBEDDED_UTF_8 (coding) = 1;
3938                 }
3939               else
3940                 goto invalid_code;
3941               continue;
3942               break;
3943
3944             default:
3945               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3946                 goto invalid_code;
3947               {
3948                 int reg, chars96;
3949
3950                 if (c1 >= 0x28 && c1 <= 0x2B)
3951                   { /* designation of DIMENSION1_CHARS94 character set */
3952                     reg = c1 - 0x28, chars96 = 0;
3953                     ONE_MORE_BYTE (c1);
3954                   }
3955                 else if (c1 >= 0x2C && c1 <= 0x2F)
3956                   { /* designation of DIMENSION1_CHARS96 character set */
3957                     reg = c1 - 0x2C, chars96 = 1;
3958                     ONE_MORE_BYTE (c1);
3959                   }
3960                 else
3961                   goto invalid_code;
3962                 DECODE_DESIGNATION (reg, 1, chars96, c1);
3963                 /* We must update these variables now.  */
3964                 if (reg == 0)
3965                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3966                 else if (reg == 1)
3967                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3968                 if (chars96 < 0)
3969                   goto invalid_code;
3970               }
3971               continue;
3972             }
3973         }
3974
3975       if (cmp_status->state == COMPOSING_NO
3976           && charset->id != charset_ascii
3977           && last_id != charset->id)
3978         {
3979           if (last_id != charset_ascii)
3980             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3981           last_id = charset->id;
3982           last_offset = char_offset;
3983         }
3984
3985       /* Now we know CHARSET and 1st position code C1 of a character.
3986          Produce a decoded character while getting 2nd position code
3987          C2 if necessary.  */
3988       c1 &= 0x7F;
3989       if (CHARSET_DIMENSION (charset) > 1)
3990         {
3991           ONE_MORE_BYTE (c2);
3992           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3993             /* C2 is not in a valid range.  */
3994             goto invalid_code;
3995           c1 = (c1 << 8) | (c2 & 0x7F);
3996           if (CHARSET_DIMENSION (charset) > 2)
3997             {
3998               ONE_MORE_BYTE (c2);
3999               if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
4000                 /* C2 is not in a valid range.  */
4001                 goto invalid_code;
4002               c1 = (c1 << 8) | (c2 & 0x7F);
4003             }
4004         }
4005
4006       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
4007       if (c < 0)
4008         {
4009           MAYBE_FINISH_COMPOSITION ();
4010           for (; src_base < src; src_base++, char_offset++)
4011             {
4012               if (ASCII_BYTE_P (*src_base))
4013                 *charbuf++ = *src_base;
4014               else
4015                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
4016             }
4017         }
4018       else if (cmp_status->state == COMPOSING_NO)
4019         {
4020           *charbuf++ = c;
4021           char_offset++;
4022         }
4023       else if ((cmp_status->state == COMPOSING_CHAR
4024                 ? cmp_status->nchars
4025                 : cmp_status->ncomps)
4026                >= MAX_COMPOSITION_COMPONENTS)
4027         {
4028           /* Too long composition.  */
4029           MAYBE_FINISH_COMPOSITION ();
4030           *charbuf++ = c;
4031           char_offset++;
4032         }
4033       else
4034         STORE_COMPOSITION_CHAR (c);
4035       continue;
4036
4037     invalid_code:
4038       MAYBE_FINISH_COMPOSITION ();
4039       src = src_base;
4040       consumed_chars = consumed_chars_base;
4041       ONE_MORE_BYTE (c);
4042       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4043       char_offset++;
4044       coding->errors++;
4045       continue;
4046
4047     break_loop:
4048       break;
4049     }
4050
4051  no_more_source:
4052   if (cmp_status->state != COMPOSING_NO)
4053     {
4054       if (coding->mode & CODING_MODE_LAST_BLOCK)
4055         MAYBE_FINISH_COMPOSITION ();
4056       else
4057         {
4058           charbuf -= cmp_status->length;
4059           for (i = 0; i < cmp_status->length; i++)
4060             cmp_status->carryover[i] = charbuf[i];
4061         }
4062     }
4063   else if (last_id != charset_ascii)
4064     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4065   coding->consumed_char += consumed_chars_base;
4066   coding->consumed = src_base - coding->source;
4067   coding->charbuf_used = charbuf - coding->charbuf;
4068 }
4069
4070
4071 /* ISO2022 encoding stuff.  */
4072
4073 /*
4074    It is not enough to say just "ISO2022" on encoding, we have to
4075    specify more details.  In Emacs, each coding system of ISO2022
4076    variant has the following specifications:
4077         1. Initial designation to G0 thru G3.
4078         2. Allows short-form designation?
4079         3. ASCII should be designated to G0 before control characters?
4080         4. ASCII should be designated to G0 at end of line?
4081         5. 7-bit environment or 8-bit environment?
4082         6. Use locking-shift?
4083         7. Use Single-shift?
4084    And the following two are only for Japanese:
4085         8. Use ASCII in place of JIS0201-1976-Roman?
4086         9. Use JISX0208-1983 in place of JISX0208-1978?
4087    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
4088    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
4089    details.
4090 */
4091
4092 /* Produce codes (escape sequence) for designating CHARSET to graphic
4093    register REG at DST, and increment DST.  If <final-char> of CHARSET is
4094    '@', 'A', or 'B' and the coding system CODING allows, produce
4095    designation sequence of short-form.  */
4096
4097 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
4098   do {                                                                  \
4099     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
4100     char *intermediate_char_94 = "()*+";                                \
4101     char *intermediate_char_96 = ",-./";                                \
4102     int revision = -1;                                                  \
4103     int c;                                                              \
4104                                                                         \
4105     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
4106       revision = CHARSET_ISO_REVISION (charset);                        \
4107                                                                         \
4108     if (revision >= 0)                                                  \
4109       {                                                                 \
4110         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
4111         EMIT_ONE_BYTE ('@' + revision);                                 \
4112       }                                                                 \
4113     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
4114     if (CHARSET_DIMENSION (charset) == 1)                               \
4115       {                                                                 \
4116         if (! CHARSET_ISO_CHARS_96 (charset))                           \
4117           c = intermediate_char_94[reg];                                \
4118         else                                                            \
4119           c = intermediate_char_96[reg];                                \
4120         EMIT_ONE_ASCII_BYTE (c);                                        \
4121       }                                                                 \
4122     else                                                                \
4123       {                                                                 \
4124         EMIT_ONE_ASCII_BYTE ('$');                                      \
4125         if (! CHARSET_ISO_CHARS_96 (charset))                           \
4126           {                                                             \
4127             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
4128                 || reg != 0                                             \
4129                 || final_char < '@' || final_char > 'B')                \
4130               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
4131           }                                                             \
4132         else                                                            \
4133           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
4134       }                                                                 \
4135     EMIT_ONE_ASCII_BYTE (final_char);                                   \
4136                                                                         \
4137     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
4138   } while (0)
4139
4140
4141 /* The following two macros produce codes (control character or escape
4142    sequence) for ISO2022 single-shift functions (single-shift-2 and
4143    single-shift-3).  */
4144
4145 #define ENCODE_SINGLE_SHIFT_2                                           \
4146   do {                                                                  \
4147     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
4148       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
4149     else                                                                \
4150       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
4151     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
4152   } while (0)
4153
4154
4155 #define ENCODE_SINGLE_SHIFT_3                                           \
4156   do {                                                                  \
4157     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
4158       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
4159     else                                                                \
4160       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
4161     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
4162   } while (0)
4163
4164
4165 /* The following four macros produce codes (control character or
4166    escape sequence) for ISO2022 locking-shift functions (shift-in,
4167    shift-out, locking-shift-2, and locking-shift-3).  */
4168
4169 #define ENCODE_SHIFT_IN                                 \
4170   do {                                                  \
4171     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
4172     CODING_ISO_INVOCATION (coding, 0) = 0;              \
4173   } while (0)
4174
4175
4176 #define ENCODE_SHIFT_OUT                                \
4177   do {                                                  \
4178     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
4179     CODING_ISO_INVOCATION (coding, 0) = 1;              \
4180   } while (0)
4181
4182
4183 #define ENCODE_LOCKING_SHIFT_2                          \
4184   do {                                                  \
4185     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
4186     CODING_ISO_INVOCATION (coding, 0) = 2;              \
4187   } while (0)
4188
4189
4190 #define ENCODE_LOCKING_SHIFT_3                          \
4191   do {                                                  \
4192     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
4193     CODING_ISO_INVOCATION (coding, 0) = 3;              \
4194   } while (0)
4195
4196
4197 /* Produce codes for a DIMENSION1 character whose character set is
4198    CHARSET and whose position-code is C1.  Designation and invocation
4199    sequences are also produced in advance if necessary.  */
4200
4201 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
4202   do {                                                                  \
4203     int id = CHARSET_ID (charset);                                      \
4204                                                                         \
4205     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
4206         && id == charset_ascii)                                         \
4207       {                                                                 \
4208         id = charset_jisx0201_roman;                                    \
4209         charset = CHARSET_FROM_ID (id);                                 \
4210       }                                                                 \
4211                                                                         \
4212     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
4213       {                                                                 \
4214         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
4215           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
4216         else                                                            \
4217           EMIT_ONE_BYTE (c1 | 0x80);                                    \
4218         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
4219         break;                                                          \
4220       }                                                                 \
4221     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
4222       {                                                                 \
4223         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
4224         break;                                                          \
4225       }                                                                 \
4226     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
4227       {                                                                 \
4228         EMIT_ONE_BYTE (c1 | 0x80);                                      \
4229         break;                                                          \
4230       }                                                                 \
4231     else                                                                \
4232       /* Since CHARSET is not yet invoked to any graphic planes, we     \
4233          must invoke it, or, at first, designate it to some graphic     \
4234          register.  Then repeat the loop to actually produce the        \
4235          character.  */                                                 \
4236       dst = encode_invocation_designation (charset, coding, dst,        \
4237                                            &produced_chars);            \
4238   } while (1)
4239
4240
4241 /* Produce codes for a DIMENSION2 character whose character set is
4242    CHARSET and whose position-codes are C1 and C2.  Designation and
4243    invocation codes are also produced in advance if necessary.  */
4244
4245 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
4246   do {                                                                  \
4247     int id = CHARSET_ID (charset);                                      \
4248                                                                         \
4249     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
4250         && id == charset_jisx0208)                                      \
4251       {                                                                 \
4252         id = charset_jisx0208_1978;                                     \
4253         charset = CHARSET_FROM_ID (id);                                 \
4254       }                                                                 \
4255                                                                         \
4256     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
4257       {                                                                 \
4258         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
4259           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
4260         else                                                            \
4261           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
4262         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
4263         break;                                                          \
4264       }                                                                 \
4265     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
4266       {                                                                 \
4267         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
4268         break;                                                          \
4269       }                                                                 \
4270     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
4271       {                                                                 \
4272         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
4273         break;                                                          \
4274       }                                                                 \
4275     else                                                                \
4276       /* Since CHARSET is not yet invoked to any graphic planes, we     \
4277          must invoke it, or, at first, designate it to some graphic     \
4278          register.  Then repeat the loop to actually produce the        \
4279          character.  */                                                 \
4280       dst = encode_invocation_designation (charset, coding, dst,        \
4281                                            &produced_chars);            \
4282   } while (1)
4283
4284
4285 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
4286   do {                                                                     \
4287     int code = ENCODE_CHAR ((charset),(c));                                \
4288                                                                            \
4289     if (CHARSET_DIMENSION (charset) == 1)                                  \
4290       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
4291     else                                                                   \
4292       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
4293   } while (0)
4294
4295
4296 /* Produce designation and invocation codes at a place pointed by DST
4297    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
4298    Return new DST.  */
4299
4300 unsigned char *
4301 encode_invocation_designation (charset, coding, dst, p_nchars)
4302      struct charset *charset;
4303      struct coding_system *coding;
4304      unsigned char *dst;
4305      int *p_nchars;
4306 {
4307   int multibytep = coding->dst_multibyte;
4308   int produced_chars = *p_nchars;
4309   int reg;                      /* graphic register number */
4310   int id = CHARSET_ID (charset);
4311
4312   /* At first, check designations.  */
4313   for (reg = 0; reg < 4; reg++)
4314     if (id == CODING_ISO_DESIGNATION (coding, reg))
4315       break;
4316
4317   if (reg >= 4)
4318     {
4319       /* CHARSET is not yet designated to any graphic registers.  */
4320       /* At first check the requested designation.  */
4321       reg = CODING_ISO_REQUEST (coding, id);
4322       if (reg < 0)
4323         /* Since CHARSET requests no special designation, designate it
4324            to graphic register 0.  */
4325         reg = 0;
4326
4327       ENCODE_DESIGNATION (charset, reg, coding);
4328     }
4329
4330   if (CODING_ISO_INVOCATION (coding, 0) != reg
4331       && CODING_ISO_INVOCATION (coding, 1) != reg)
4332     {
4333       /* Since the graphic register REG is not invoked to any graphic
4334          planes, invoke it to graphic plane 0.  */
4335       switch (reg)
4336         {
4337         case 0:                 /* graphic register 0 */
4338           ENCODE_SHIFT_IN;
4339           break;
4340
4341         case 1:                 /* graphic register 1 */
4342           ENCODE_SHIFT_OUT;
4343           break;
4344
4345         case 2:                 /* graphic register 2 */
4346           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4347             ENCODE_SINGLE_SHIFT_2;
4348           else
4349             ENCODE_LOCKING_SHIFT_2;
4350           break;
4351
4352         case 3:                 /* graphic register 3 */
4353           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4354             ENCODE_SINGLE_SHIFT_3;
4355           else
4356             ENCODE_LOCKING_SHIFT_3;
4357           break;
4358         }
4359     }
4360
4361   *p_nchars = produced_chars;
4362   return dst;
4363 }
4364
4365 /* The following three macros produce codes for indicating direction
4366    of text.  */
4367 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER                              \
4368   do {                                                                  \
4369     if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS)        \
4370       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '[');                         \
4371     else                                                                \
4372       EMIT_ONE_BYTE (ISO_CODE_CSI);                                     \
4373   } while (0)
4374
4375
4376 #define ENCODE_DIRECTION_R2L()                  \
4377   do {                                          \
4378     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
4379     EMIT_TWO_ASCII_BYTES ('2', ']');            \
4380   } while (0)
4381
4382
4383 #define ENCODE_DIRECTION_L2R()                  \
4384   do {                                          \
4385     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
4386     EMIT_TWO_ASCII_BYTES ('0', ']');            \
4387   } while (0)
4388
4389
4390 /* Produce codes for designation and invocation to reset the graphic
4391    planes and registers to initial state.  */
4392 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
4393   do {                                                                  \
4394     int reg;                                                            \
4395     struct charset *charset;                                            \
4396                                                                         \
4397     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
4398       ENCODE_SHIFT_IN;                                                  \
4399     for (reg = 0; reg < 4; reg++)                                       \
4400       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
4401           && (CODING_ISO_DESIGNATION (coding, reg)                      \
4402               != CODING_ISO_INITIAL (coding, reg)))                     \
4403         {                                                               \
4404           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
4405           ENCODE_DESIGNATION (charset, reg, coding);                    \
4406         }                                                               \
4407   } while (0)
4408
4409
4410 /* Produce designation sequences of charsets in the line started from
4411    SRC to a place pointed by DST, and return updated DST.
4412
4413    If the current block ends before any end-of-line, we may fail to
4414    find all the necessary designations.  */
4415
4416 static unsigned char *
4417 encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
4418      struct coding_system *coding;
4419      int *charbuf, *charbuf_end;
4420      unsigned char *dst;
4421 {
4422   struct charset *charset;
4423   /* Table of charsets to be designated to each graphic register.  */
4424   int r[4];
4425   int c, found = 0, reg;
4426   int produced_chars = 0;
4427   int multibytep = coding->dst_multibyte;
4428   Lisp_Object attrs;
4429   Lisp_Object charset_list;
4430
4431   attrs = CODING_ID_ATTRS (coding->id);
4432   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4433   if (EQ (charset_list, Qiso_2022))
4434     charset_list = Viso_2022_charset_list;
4435
4436   for (reg = 0; reg < 4; reg++)
4437     r[reg] = -1;
4438
4439   while (found < 4)
4440     {
4441       int id;
4442
4443       c = *charbuf++;
4444       if (c == '\n')
4445         break;
4446       charset = char_charset (c, charset_list, NULL);
4447       id = CHARSET_ID (charset);
4448       reg = CODING_ISO_REQUEST (coding, id);
4449       if (reg >= 0 && r[reg] < 0)
4450         {
4451           found++;
4452           r[reg] = id;
4453         }
4454     }
4455
4456   if (found)
4457     {
4458       for (reg = 0; reg < 4; reg++)
4459         if (r[reg] >= 0
4460             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
4461           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
4462     }
4463
4464   return dst;
4465 }
4466
4467 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
4468
4469 static int
4470 encode_coding_iso_2022 (coding)
4471      struct coding_system *coding;
4472 {
4473   int multibytep = coding->dst_multibyte;
4474   int *charbuf = coding->charbuf;
4475   int *charbuf_end = charbuf + coding->charbuf_used;
4476   unsigned char *dst = coding->destination + coding->produced;
4477   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4478   int safe_room = 16;
4479   int bol_designation
4480     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
4481        && CODING_ISO_BOL (coding));
4482   int produced_chars = 0;
4483   Lisp_Object attrs, eol_type, charset_list;
4484   int ascii_compatible;
4485   int c;
4486   int preferred_charset_id = -1;
4487
4488   CODING_GET_INFO (coding, attrs, charset_list);
4489   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
4490   if (VECTORP (eol_type))
4491     eol_type = Qunix;
4492
4493   setup_iso_safe_charsets (attrs);
4494   /* Charset list may have been changed.  */
4495   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4496   coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
4497
4498   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4499
4500   while (charbuf < charbuf_end)
4501     {
4502       ASSURE_DESTINATION (safe_room);
4503
4504       if (bol_designation)
4505         {
4506           unsigned char *dst_prev = dst;
4507
4508           /* We have to produce designation sequences if any now.  */
4509           dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
4510           bol_designation = 0;
4511           /* We are sure that designation sequences are all ASCII bytes.  */
4512           produced_chars += dst - dst_prev;
4513         }
4514
4515       c = *charbuf++;
4516
4517       if (c < 0)
4518         {
4519           /* Handle an annotation.  */
4520           switch (*charbuf)
4521             {
4522             case CODING_ANNOTATE_COMPOSITION_MASK:
4523               /* Not yet implemented.  */
4524               break;
4525             case CODING_ANNOTATE_CHARSET_MASK:
4526               preferred_charset_id = charbuf[2];
4527               if (preferred_charset_id >= 0
4528                   && NILP (Fmemq (make_number (preferred_charset_id),
4529                                   charset_list)))
4530                 preferred_charset_id = -1;
4531               break;
4532             default:
4533               abort ();
4534             }
4535           charbuf += -c - 1;
4536           continue;
4537         }
4538
4539       /* Now encode the character C.  */
4540       if (c < 0x20 || c == 0x7F)
4541         {
4542           if (c == '\n'
4543               || (c == '\r' && EQ (eol_type, Qmac)))
4544             {
4545               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4546                 ENCODE_RESET_PLANE_AND_REGISTER ();
4547               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
4548                 {
4549                   int i;
4550
4551                   for (i = 0; i < 4; i++)
4552                     CODING_ISO_DESIGNATION (coding, i)
4553                       = CODING_ISO_INITIAL (coding, i);
4554                 }
4555               bol_designation
4556                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
4557             }
4558           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
4559             ENCODE_RESET_PLANE_AND_REGISTER ();
4560           EMIT_ONE_ASCII_BYTE (c);
4561         }
4562       else if (ASCII_CHAR_P (c))
4563         {
4564           if (ascii_compatible)
4565             EMIT_ONE_ASCII_BYTE (c);
4566           else
4567             {
4568               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
4569               ENCODE_ISO_CHARACTER (charset, c);
4570             }
4571         }
4572       else if (CHAR_BYTE8_P (c))
4573         {
4574           c = CHAR_TO_BYTE8 (c);
4575           EMIT_ONE_BYTE (c);
4576         }
4577       else
4578         {
4579           struct charset *charset;
4580
4581           if (preferred_charset_id >= 0)
4582             {
4583               charset = CHARSET_FROM_ID (preferred_charset_id);
4584               if (! CHAR_CHARSET_P (c, charset))
4585                 charset = char_charset (c, charset_list, NULL);
4586             }
4587           else
4588             charset = char_charset (c, charset_list, NULL);
4589           if (!charset)
4590             {
4591               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4592                 {
4593                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4594                   charset = CHARSET_FROM_ID (charset_ascii);
4595                 }
4596               else
4597                 {
4598                   c = coding->default_char;
4599                   charset = char_charset (c, charset_list, NULL);
4600                 }
4601             }
4602           ENCODE_ISO_CHARACTER (charset, c);
4603         }
4604     }
4605
4606   if (coding->mode & CODING_MODE_LAST_BLOCK
4607       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4608     {
4609       ASSURE_DESTINATION (safe_room);
4610       ENCODE_RESET_PLANE_AND_REGISTER ();
4611     }
4612   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4613   CODING_ISO_BOL (coding) = bol_designation;
4614   coding->produced_char += produced_chars;
4615   coding->produced = dst - coding->destination;
4616   return 0;
4617 }
4618
4619 \f
4620 /*** 8,9. SJIS and BIG5 handlers ***/
4621
4622 /* Although SJIS and BIG5 are not ISO's coding system, they are used
4623    quite widely.  So, for the moment, Emacs supports them in the bare
4624    C code.  But, in the future, they may be supported only by CCL.  */
4625
4626 /* SJIS is a coding system encoding three character sets: ASCII, right
4627    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
4628    as is.  A character of charset katakana-jisx0201 is encoded by
4629    "position-code + 0x80".  A character of charset japanese-jisx0208
4630    is encoded in 2-byte but two position-codes are divided and shifted
4631    so that it fit in the range below.
4632
4633    --- CODE RANGE of SJIS ---
4634    (character set)      (range)
4635    ASCII                0x00 .. 0x7F
4636    KATAKANA-JISX0201    0xA0 .. 0xDF
4637    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
4638             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
4639    -------------------------------
4640
4641 */
4642
4643 /* BIG5 is a coding system encoding two character sets: ASCII and
4644    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
4645    character set and is encoded in two-byte.
4646
4647    --- CODE RANGE of BIG5 ---
4648    (character set)      (range)
4649    ASCII                0x00 .. 0x7F
4650    Big5 (1st byte)      0xA1 .. 0xFE
4651         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
4652    --------------------------
4653
4654   */
4655
4656 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4657    Check if a text is encoded in SJIS.  If it is, return
4658    CATEGORY_MASK_SJIS, else return 0.  */
4659
4660 static int
4661 detect_coding_sjis (coding, detect_info)
4662      struct coding_system *coding;
4663      struct coding_detection_info *detect_info;
4664 {
4665   const unsigned char *src = coding->source, *src_base;
4666   const unsigned char *src_end = coding->source + coding->src_bytes;
4667   int multibytep = coding->src_multibyte;
4668   int consumed_chars = 0;
4669   int found = 0;
4670   int c;
4671
4672   detect_info->checked |= CATEGORY_MASK_SJIS;
4673   /* A coding system of this category is always ASCII compatible.  */
4674   src += coding->head_ascii;
4675
4676   while (1)
4677     {
4678       src_base = src;
4679       ONE_MORE_BYTE (c);
4680       if (c < 0x80)
4681         continue;
4682       if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
4683         {
4684           ONE_MORE_BYTE (c);
4685           if (c < 0x40 || c == 0x7F || c > 0xFC)
4686             break;
4687           found = CATEGORY_MASK_SJIS;
4688         }
4689       else if (c >= 0xA0 && c < 0xE0)
4690         found = CATEGORY_MASK_SJIS;
4691       else
4692         break;
4693     }
4694   detect_info->rejected |= CATEGORY_MASK_SJIS;
4695   return 0;
4696
4697  no_more_source:
4698   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4699     {
4700       detect_info->rejected |= CATEGORY_MASK_SJIS;
4701       return 0;
4702     }
4703   detect_info->found |= found;
4704   return 1;
4705 }
4706
4707 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4708    Check if a text is encoded in BIG5.  If it is, return
4709    CATEGORY_MASK_BIG5, else return 0.  */
4710
4711 static int
4712 detect_coding_big5 (coding, detect_info)
4713      struct coding_system *coding;
4714      struct coding_detection_info *detect_info;
4715 {
4716   const unsigned char *src = coding->source, *src_base;
4717   const unsigned char *src_end = coding->source + coding->src_bytes;
4718   int multibytep = coding->src_multibyte;
4719   int consumed_chars = 0;
4720   int found = 0;
4721   int c;
4722
4723   detect_info->checked |= CATEGORY_MASK_BIG5;
4724   /* A coding system of this category is always ASCII compatible.  */
4725   src += coding->head_ascii;
4726
4727   while (1)
4728     {
4729       src_base = src;
4730       ONE_MORE_BYTE (c);
4731       if (c < 0x80)
4732         continue;
4733       if (c >= 0xA1)
4734         {
4735           ONE_MORE_BYTE (c);
4736           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
4737             return 0;
4738           found = CATEGORY_MASK_BIG5;
4739         }
4740       else
4741         break;
4742     }
4743   detect_info->rejected |= CATEGORY_MASK_BIG5;
4744   return 0;
4745
4746  no_more_source:
4747   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4748     {
4749       detect_info->rejected |= CATEGORY_MASK_BIG5;
4750       return 0;
4751     }
4752   detect_info->found |= found;
4753   return 1;
4754 }
4755
4756 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4757    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
4758
4759 static void
4760 decode_coding_sjis (coding)
4761      struct coding_system *coding;
4762 {
4763   const unsigned char *src = coding->source + coding->consumed;
4764   const unsigned char *src_end = coding->source + coding->src_bytes;
4765   const unsigned char *src_base;
4766   int *charbuf = coding->charbuf + coding->charbuf_used;
4767   /* We may produce one charset annocation in one loop and one more at
4768      the end.  */
4769   int *charbuf_end
4770     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4771   int consumed_chars = 0, consumed_chars_base;
4772   int multibytep = coding->src_multibyte;
4773   struct charset *charset_roman, *charset_kanji, *charset_kana;
4774   struct charset *charset_kanji2;
4775   Lisp_Object attrs, charset_list, val;
4776   int char_offset = coding->produced_char;
4777   int last_offset = char_offset;
4778   int last_id = charset_ascii;
4779   int eol_crlf =
4780     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4781   int byte_after_cr = -1;
4782
4783   CODING_GET_INFO (coding, attrs, charset_list);
4784
4785   val = charset_list;
4786   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4787   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4788   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4789   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4790
4791   while (1)
4792     {
4793       int c, c1;
4794       struct charset *charset;
4795
4796       src_base = src;
4797       consumed_chars_base = consumed_chars;
4798
4799       if (charbuf >= charbuf_end)
4800         {
4801           if (byte_after_cr >= 0)
4802             src_base--;
4803           break;
4804         }
4805
4806       if (byte_after_cr >= 0)
4807         c = byte_after_cr, byte_after_cr = -1;
4808       else
4809         ONE_MORE_BYTE (c);
4810       if (c < 0)
4811         goto invalid_code;
4812       if (c < 0x80)
4813         {
4814           if (eol_crlf && c == '\r')
4815             ONE_MORE_BYTE (byte_after_cr);
4816           charset = charset_roman;
4817         }
4818       else if (c == 0x80 || c == 0xA0)
4819         goto invalid_code;
4820       else if (c >= 0xA1 && c <= 0xDF)
4821         {
4822           /* SJIS -> JISX0201-Kana */
4823           c &= 0x7F;
4824           charset = charset_kana;
4825         }
4826       else if (c <= 0xEF)
4827         {
4828           /* SJIS -> JISX0208 */
4829           ONE_MORE_BYTE (c1);
4830           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4831             goto invalid_code;
4832           c = (c << 8) | c1;
4833           SJIS_TO_JIS (c);
4834           charset = charset_kanji;
4835         }
4836       else if (c <= 0xFC && charset_kanji2)
4837         {
4838           /* SJIS -> JISX0213-2 */
4839           ONE_MORE_BYTE (c1);
4840           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4841             goto invalid_code;
4842           c = (c << 8) | c1;
4843           SJIS_TO_JIS2 (c);
4844           charset = charset_kanji2;
4845         }
4846       else
4847         goto invalid_code;
4848       if (charset->id != charset_ascii
4849           && last_id != charset->id)
4850         {
4851           if (last_id != charset_ascii)
4852             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4853           last_id = charset->id;
4854           last_offset = char_offset;
4855         }
4856       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4857       *charbuf++ = c;
4858       char_offset++;
4859       continue;
4860
4861     invalid_code:
4862       src = src_base;
4863       consumed_chars = consumed_chars_base;
4864       ONE_MORE_BYTE (c);
4865       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4866       char_offset++;
4867       coding->errors++;
4868     }
4869
4870  no_more_source:
4871   if (last_id != charset_ascii)
4872     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4873   coding->consumed_char += consumed_chars_base;
4874   coding->consumed = src_base - coding->source;
4875   coding->charbuf_used = charbuf - coding->charbuf;
4876 }
4877
4878 static void
4879 decode_coding_big5 (coding)
4880      struct coding_system *coding;
4881 {
4882   const unsigned char *src = coding->source + coding->consumed;
4883   const unsigned char *src_end = coding->source + coding->src_bytes;
4884   const unsigned char *src_base;
4885   int *charbuf = coding->charbuf + coding->charbuf_used;
4886   /* We may produce one charset annocation in one loop and one more at
4887      the end.  */
4888   int *charbuf_end
4889     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4890   int consumed_chars = 0, consumed_chars_base;
4891   int multibytep = coding->src_multibyte;
4892   struct charset *charset_roman, *charset_big5;
4893   Lisp_Object attrs, charset_list, val;
4894   int char_offset = coding->produced_char;
4895   int last_offset = char_offset;
4896   int last_id = charset_ascii;
4897   int eol_crlf =
4898     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4899   int byte_after_cr = -1;
4900
4901   CODING_GET_INFO (coding, attrs, charset_list);
4902   val = charset_list;
4903   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4904   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4905
4906   while (1)
4907     {
4908       int c, c1;
4909       struct charset *charset;
4910
4911       src_base = src;
4912       consumed_chars_base = consumed_chars;
4913
4914       if (charbuf >= charbuf_end)
4915         {
4916           if (byte_after_cr >= 0)
4917             src_base--;
4918           break;
4919         }
4920
4921       if (byte_after_cr >= 0)
4922         c = byte_after_cr, byte_after_cr = -1;
4923       else
4924         ONE_MORE_BYTE (c);
4925
4926       if (c < 0)
4927         goto invalid_code;
4928       if (c < 0x80)
4929         {
4930           if (eol_crlf && c == '\r')
4931             ONE_MORE_BYTE (byte_after_cr);
4932           charset = charset_roman;
4933         }
4934       else
4935         {
4936           /* BIG5 -> Big5 */
4937           if (c < 0xA1 || c > 0xFE)
4938             goto invalid_code;
4939           ONE_MORE_BYTE (c1);
4940           if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4941             goto invalid_code;
4942           c = c << 8 | c1;
4943           charset = charset_big5;
4944         }
4945       if (charset->id != charset_ascii
4946           && last_id != charset->id)
4947         {
4948           if (last_id != charset_ascii)
4949             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4950           last_id = charset->id;
4951           last_offset = char_offset;
4952         }
4953       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4954       *charbuf++ = c;
4955       char_offset++;
4956       continue;
4957
4958     invalid_code:
4959       src = src_base;
4960       consumed_chars = consumed_chars_base;
4961       ONE_MORE_BYTE (c);
4962       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4963       char_offset++;
4964       coding->errors++;
4965     }
4966
4967  no_more_source:
4968   if (last_id != charset_ascii)
4969     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4970   coding->consumed_char += consumed_chars_base;
4971   coding->consumed = src_base - coding->source;
4972   coding->charbuf_used = charbuf - coding->charbuf;
4973 }
4974
4975 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4976    This function can encode charsets `ascii', `katakana-jisx0201',
4977    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
4978    are sure that all these charsets are registered as official charset
4979    (i.e. do not have extended leading-codes).  Characters of other
4980    charsets are produced without any encoding.  If SJIS_P is 1, encode
4981    SJIS text, else encode BIG5 text.  */
4982
4983 static int
4984 encode_coding_sjis (coding)
4985      struct coding_system *coding;
4986 {
4987   int multibytep = coding->dst_multibyte;
4988   int *charbuf = coding->charbuf;
4989   int *charbuf_end = charbuf + coding->charbuf_used;
4990   unsigned char *dst = coding->destination + coding->produced;
4991   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4992   int safe_room = 4;
4993   int produced_chars = 0;
4994   Lisp_Object attrs, charset_list, val;
4995   int ascii_compatible;
4996   struct charset *charset_roman, *charset_kanji, *charset_kana;
4997   struct charset *charset_kanji2;
4998   int c;
4999
5000   CODING_GET_INFO (coding, attrs, charset_list);
5001   val = charset_list;
5002   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5003   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5004   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5005   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
5006
5007   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5008
5009   while (charbuf < charbuf_end)
5010     {
5011       ASSURE_DESTINATION (safe_room);
5012       c = *charbuf++;
5013       /* Now encode the character C.  */
5014       if (ASCII_CHAR_P (c) && ascii_compatible)
5015         EMIT_ONE_ASCII_BYTE (c);
5016       else if (CHAR_BYTE8_P (c))
5017         {
5018           c = CHAR_TO_BYTE8 (c);
5019           EMIT_ONE_BYTE (c);
5020         }
5021       else
5022         {
5023           unsigned code;
5024           struct charset *charset = char_charset (c, charset_list, &code);
5025
5026           if (!charset)
5027             {
5028               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5029                 {
5030                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5031                   charset = CHARSET_FROM_ID (charset_ascii);
5032                 }
5033               else
5034                 {
5035                   c = coding->default_char;
5036                   charset = char_charset (c, charset_list, &code);
5037                 }
5038             }
5039           if (code == CHARSET_INVALID_CODE (charset))
5040             abort ();
5041           if (charset == charset_kanji)
5042             {
5043               int c1, c2;
5044               JIS_TO_SJIS (code);
5045               c1 = code >> 8, c2 = code & 0xFF;
5046               EMIT_TWO_BYTES (c1, c2);
5047             }
5048           else if (charset == charset_kana)
5049             EMIT_ONE_BYTE (code | 0x80);
5050           else if (charset_kanji2 && charset == charset_kanji2)
5051             {
5052               int c1, c2;
5053
5054               c1 = code >> 8;
5055               if (c1 == 0x21 || (c1 >= 0x23 && c1 < 0x25)
5056                   || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
5057                 {
5058                   JIS_TO_SJIS2 (code);
5059                   c1 = code >> 8, c2 = code & 0xFF;
5060                   EMIT_TWO_BYTES (c1, c2);
5061                 }
5062               else
5063                 EMIT_ONE_ASCII_BYTE (code & 0x7F);
5064             }
5065           else
5066             EMIT_ONE_ASCII_BYTE (code & 0x7F);
5067         }
5068     }
5069   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5070   coding->produced_char += produced_chars;
5071   coding->produced = dst - coding->destination;
5072   return 0;
5073 }
5074
5075 static int
5076 encode_coding_big5 (coding)
5077      struct coding_system *coding;
5078 {
5079   int multibytep = coding->dst_multibyte;
5080   int *charbuf = coding->charbuf;
5081   int *charbuf_end = charbuf + coding->charbuf_used;
5082   unsigned char *dst = coding->destination + coding->produced;
5083   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5084   int safe_room = 4;
5085   int produced_chars = 0;
5086   Lisp_Object attrs, charset_list, val;
5087   int ascii_compatible;
5088   struct charset *charset_roman, *charset_big5;
5089   int c;
5090
5091   CODING_GET_INFO (coding, attrs, charset_list);
5092   val = charset_list;
5093   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5094   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
5095   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5096
5097   while (charbuf < charbuf_end)
5098     {
5099       ASSURE_DESTINATION (safe_room);
5100       c = *charbuf++;
5101       /* Now encode the character C.  */
5102       if (ASCII_CHAR_P (c) && ascii_compatible)
5103         EMIT_ONE_ASCII_BYTE (c);
5104       else if (CHAR_BYTE8_P (c))
5105         {
5106           c = CHAR_TO_BYTE8 (c);
5107           EMIT_ONE_BYTE (c);
5108         }
5109       else
5110         {
5111           unsigned code;
5112           struct charset *charset = char_charset (c, charset_list, &code);
5113
5114           if (! charset)
5115             {
5116               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5117                 {
5118                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5119                   charset = CHARSET_FROM_ID (charset_ascii);
5120                 }
5121               else
5122                 {
5123                   c = coding->default_char;
5124                   charset = char_charset (c, charset_list, &code);
5125                 }
5126             }
5127           if (code == CHARSET_INVALID_CODE (charset))
5128             abort ();
5129           if (charset == charset_big5)
5130             {
5131               int c1, c2;
5132
5133               c1 = code >> 8, c2 = code & 0xFF;
5134               EMIT_TWO_BYTES (c1, c2);
5135             }
5136           else
5137             EMIT_ONE_ASCII_BYTE (code & 0x7F);
5138         }
5139     }
5140   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5141   coding->produced_char += produced_chars;
5142   coding->produced = dst - coding->destination;
5143   return 0;
5144 }
5145
5146 \f
5147 /*** 10. CCL handlers ***/
5148
5149 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5150    Check if a text is encoded in a coding system of which
5151    encoder/decoder are written in CCL program.  If it is, return
5152    CATEGORY_MASK_CCL, else return 0.  */
5153
5154 static int
5155 detect_coding_ccl (coding, detect_info)
5156      struct coding_system *coding;
5157      struct coding_detection_info *detect_info;
5158 {
5159   const unsigned char *src = coding->source, *src_base;
5160   const unsigned char *src_end = coding->source + coding->src_bytes;
5161   int multibytep = coding->src_multibyte;
5162   int consumed_chars = 0;
5163   int found = 0;
5164   unsigned char *valids;
5165   int head_ascii = coding->head_ascii;
5166   Lisp_Object attrs;
5167
5168   detect_info->checked |= CATEGORY_MASK_CCL;
5169
5170   coding = &coding_categories[coding_category_ccl];
5171   valids = CODING_CCL_VALIDS (coding);
5172   attrs = CODING_ID_ATTRS (coding->id);
5173   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5174     src += head_ascii;
5175
5176   while (1)
5177     {
5178       int c;
5179
5180       src_base = src;
5181       ONE_MORE_BYTE (c);
5182       if (c < 0 || ! valids[c])
5183         break;
5184       if ((valids[c] > 1))
5185         found = CATEGORY_MASK_CCL;
5186     }
5187   detect_info->rejected |= CATEGORY_MASK_CCL;
5188   return 0;
5189
5190  no_more_source:
5191   detect_info->found |= found;
5192   return 1;
5193 }
5194
5195 static void
5196 decode_coding_ccl (coding)
5197      struct coding_system *coding;
5198 {
5199   const unsigned char *src = coding->source + coding->consumed;
5200   const unsigned char *src_end = coding->source + coding->src_bytes;
5201   int *charbuf = coding->charbuf + coding->charbuf_used;
5202   int *charbuf_end = coding->charbuf + coding->charbuf_size;
5203   int consumed_chars = 0;
5204   int multibytep = coding->src_multibyte;
5205   struct ccl_program ccl;
5206   int source_charbuf[1024];
5207   int source_byteidx[1024];
5208   Lisp_Object attrs, charset_list;
5209
5210   CODING_GET_INFO (coding, attrs, charset_list);
5211   setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
5212
5213   while (src < src_end)
5214     {
5215       const unsigned char *p = src;
5216       int *source, *source_end;
5217       int i = 0;
5218
5219       if (multibytep)
5220         while (i < 1024 && p < src_end)
5221           {
5222             source_byteidx[i] = p - src;
5223             source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
5224           }
5225       else
5226         while (i < 1024 && p < src_end)
5227           source_charbuf[i++] = *p++;
5228
5229       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
5230         ccl.last_block = 1;
5231
5232       source = source_charbuf;
5233       source_end = source + i;
5234       while (source < source_end)
5235         {
5236           ccl_driver (&ccl, source, charbuf,
5237                       source_end - source, charbuf_end - charbuf,
5238                       charset_list);
5239           source += ccl.consumed;
5240           charbuf += ccl.produced;
5241           if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
5242             break;
5243         }
5244       if (source < source_end)
5245         src += source_byteidx[source - source_charbuf];
5246       else
5247         src = p;
5248       consumed_chars += source - source_charbuf;
5249
5250       if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
5251           && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
5252         break;
5253     }
5254
5255   switch (ccl.status)
5256     {
5257     case CCL_STAT_SUSPEND_BY_SRC:
5258       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5259       break;
5260     case CCL_STAT_SUSPEND_BY_DST:
5261       break;
5262     case CCL_STAT_QUIT:
5263     case CCL_STAT_INVALID_CMD:
5264       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
5265       break;
5266     default:
5267       record_conversion_result (coding, CODING_RESULT_SUCCESS);
5268       break;
5269     }
5270   coding->consumed_char += consumed_chars;
5271   coding->consumed = src - coding->source;
5272   coding->charbuf_used = charbuf - coding->charbuf;
5273 }
5274
5275 static int
5276 encode_coding_ccl (coding)
5277      struct coding_system *coding;
5278 {
5279   struct ccl_program ccl;
5280   int multibytep = coding->dst_multibyte;
5281   int *charbuf = coding->charbuf;
5282   int *charbuf_end = charbuf + coding->charbuf_used;
5283   unsigned char *dst = coding->destination + coding->produced;
5284   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5285   int destination_charbuf[1024];
5286   int i, produced_chars = 0;
5287   Lisp_Object attrs, charset_list;
5288
5289   CODING_GET_INFO (coding, attrs, charset_list);
5290   setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
5291
5292   ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
5293   ccl.dst_multibyte = coding->dst_multibyte;
5294
5295   while (charbuf < charbuf_end)
5296     {
5297       ccl_driver (&ccl, charbuf, destination_charbuf,
5298                   charbuf_end - charbuf, 1024, charset_list);
5299       if (multibytep)
5300         {
5301           ASSURE_DESTINATION (ccl.produced * 2);
5302           for (i = 0; i < ccl.produced; i++)
5303             EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
5304         }
5305       else
5306         {
5307           ASSURE_DESTINATION (ccl.produced);
5308           for (i = 0; i < ccl.produced; i++)
5309             *dst++ = destination_charbuf[i] & 0xFF;
5310           produced_chars += ccl.produced;
5311         }
5312       charbuf += ccl.consumed;
5313       if (ccl.status == CCL_STAT_QUIT
5314           || ccl.status == CCL_STAT_INVALID_CMD)
5315         break;
5316     }
5317
5318   switch (ccl.status)
5319     {
5320     case CCL_STAT_SUSPEND_BY_SRC:
5321       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5322       break;
5323     case CCL_STAT_SUSPEND_BY_DST:
5324       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
5325       break;
5326     case CCL_STAT_QUIT:
5327     case CCL_STAT_INVALID_CMD:
5328       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
5329       break;
5330     default:
5331       record_conversion_result (coding, CODING_RESULT_SUCCESS);
5332       break;
5333     }
5334
5335   coding->produced_char += produced_chars;
5336   coding->produced = dst - coding->destination;
5337   return 0;
5338 }
5339
5340
5341 \f
5342 /*** 10, 11. no-conversion handlers ***/
5343
5344 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
5345
5346 static void
5347 decode_coding_raw_text (coding)
5348      struct coding_system *coding;
5349 {
5350   int eol_crlf =
5351     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5352
5353   coding->chars_at_source = 1;
5354   coding->consumed_char = coding->src_chars;
5355   coding->consumed = coding->src_bytes;
5356   if (eol_crlf && coding->source[coding->src_bytes - 1] == '\r')
5357     {
5358       coding->consumed_char--;
5359       coding->consumed--;
5360       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5361     }
5362   else
5363     record_conversion_result (coding, CODING_RESULT_SUCCESS);
5364 }
5365
5366 static int
5367 encode_coding_raw_text (coding)
5368      struct coding_system *coding;
5369 {
5370   int multibytep = coding->dst_multibyte;
5371   int *charbuf = coding->charbuf;
5372   int *charbuf_end = coding->charbuf + coding->charbuf_used;
5373   unsigned char *dst = coding->destination + coding->produced;
5374   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5375   int produced_chars = 0;
5376   int c;
5377
5378   if (multibytep)
5379     {
5380       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
5381
5382       if (coding->src_multibyte)
5383         while (charbuf < charbuf_end)
5384           {
5385             ASSURE_DESTINATION (safe_room);
5386             c = *charbuf++;
5387             if (ASCII_CHAR_P (c))
5388               EMIT_ONE_ASCII_BYTE (c);
5389             else if (CHAR_BYTE8_P (c))
5390               {
5391                 c = CHAR_TO_BYTE8 (c);
5392                 EMIT_ONE_BYTE (c);
5393               }
5394             else
5395               {
5396                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
5397
5398                 CHAR_STRING_ADVANCE (c, p1);
5399                 while (p0 < p1)
5400                   {
5401                     EMIT_ONE_BYTE (*p0);
5402                     p0++;
5403                   }
5404               }
5405           }
5406       else
5407         while (charbuf < charbuf_end)
5408           {
5409             ASSURE_DESTINATION (safe_room);
5410             c = *charbuf++;
5411             EMIT_ONE_BYTE (c);
5412           }
5413     }
5414   else
5415     {
5416       if (coding->src_multibyte)
5417         {
5418           int safe_room = MAX_MULTIBYTE_LENGTH;
5419
5420           while (charbuf < charbuf_end)
5421             {
5422               ASSURE_DESTINATION (safe_room);
5423               c = *charbuf++;
5424               if (ASCII_CHAR_P (c))
5425                 *dst++ = c;
5426               else if (CHAR_BYTE8_P (c))
5427                 *dst++ = CHAR_TO_BYTE8 (c);
5428               else
5429                 CHAR_STRING_ADVANCE (c, dst);
5430             }
5431         }
5432       else
5433         {
5434           ASSURE_DESTINATION (charbuf_end - charbuf);
5435           while (charbuf < charbuf_end && dst < dst_end)
5436             *dst++ = *charbuf++;
5437         }
5438       produced_chars = dst - (coding->destination + coding->produced);
5439     }
5440   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5441   coding->produced_char += produced_chars;
5442   coding->produced = dst - coding->destination;
5443   return 0;
5444 }
5445
5446 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5447    Check if a text is encoded in a charset-based coding system.  If it
5448    is, return 1, else return 0.  */
5449
5450 static int
5451 detect_coding_charset (coding, detect_info)
5452      struct coding_system *coding;
5453      struct coding_detection_info *detect_info;
5454 {
5455   const unsigned char *src = coding->source, *src_base;
5456   const unsigned char *src_end = coding->source + coding->src_bytes;
5457   int multibytep = coding->src_multibyte;
5458   int consumed_chars = 0;
5459   Lisp_Object attrs, valids, name;
5460   int found = 0;
5461   int head_ascii = coding->head_ascii;
5462   int check_latin_extra = 0;
5463
5464   detect_info->checked |= CATEGORY_MASK_CHARSET;
5465
5466   coding = &coding_categories[coding_category_charset];
5467   attrs = CODING_ID_ATTRS (coding->id);
5468   valids = AREF (attrs, coding_attr_charset_valids);
5469   name = CODING_ID_NAME (coding->id);
5470   if (strncmp ((char *) SDATA (SYMBOL_NAME (name)),
5471                "iso-8859-", sizeof ("iso-8859-") - 1) == 0
5472       || strncmp ((char *) SDATA (SYMBOL_NAME (name)),
5473                   "iso-latin-", sizeof ("iso-latin-") - 1) == 0)
5474     check_latin_extra = 1;
5475
5476   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5477     src += head_ascii;
5478
5479   while (1)
5480     {
5481       int c;
5482       Lisp_Object val;
5483       struct charset *charset;
5484       int dim, idx;
5485
5486       src_base = src;
5487       ONE_MORE_BYTE (c);
5488       if (c < 0)
5489         continue;
5490       val = AREF (valids, c);
5491       if (NILP (val))
5492         break;
5493       if (c >= 0x80)
5494         {
5495           if (c < 0xA0
5496               && check_latin_extra
5497               && (!VECTORP (Vlatin_extra_code_table)
5498                   || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])))
5499             break;
5500           found = CATEGORY_MASK_CHARSET;
5501         }
5502       if (INTEGERP (val))
5503         {
5504           charset = CHARSET_FROM_ID (XFASTINT (val));
5505           dim = CHARSET_DIMENSION (charset);
5506           for (idx = 1; idx < dim; idx++)
5507             {
5508               if (src == src_end)
5509                 goto too_short;
5510               ONE_MORE_BYTE (c);
5511               if (c < charset->code_space[(dim - 1 - idx) * 2]
5512                   || c > charset->code_space[(dim - 1 - idx) * 2 + 1])
5513                 break;
5514             }
5515           if (idx < dim)
5516             break;
5517         }
5518       else
5519         {
5520           idx = 1;
5521           for (; CONSP (val); val = XCDR (val))
5522             {
5523               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5524               dim = CHARSET_DIMENSION (charset);
5525               while (idx < dim)
5526                 {
5527                   if (src == src_end)
5528                     goto too_short;
5529                   ONE_MORE_BYTE (c);
5530                   if (c < charset->code_space[(dim - 1 - idx) * 4]
5531                       || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
5532                     break;
5533                   idx++;
5534                 }
5535               if (idx == dim)
5536                 {
5537                   val = Qnil;
5538                   break;
5539                 }
5540             }
5541           if (CONSP (val))
5542             break;
5543         }
5544     }
5545  too_short:
5546   detect_info->rejected |= CATEGORY_MASK_CHARSET;
5547   return 0;
5548
5549  no_more_source:
5550   detect_info->found |= found;
5551   return 1;
5552 }
5553
5554 static void
5555 decode_coding_charset (coding)
5556      struct coding_system *coding;
5557 {
5558   const unsigned char *src = coding->source + coding->consumed;
5559   const unsigned char *src_end = coding->source + coding->src_bytes;
5560   const unsigned char *src_base;
5561   int *charbuf = coding->charbuf + coding->charbuf_used;
5562   /* We may produce one charset annocation in one loop and one more at
5563      the end.  */
5564   int *charbuf_end
5565     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
5566   int consumed_chars = 0, consumed_chars_base;
5567   int multibytep = coding->src_multibyte;
5568   Lisp_Object attrs, charset_list, valids;
5569   int char_offset = coding->produced_char;
5570   int last_offset = char_offset;
5571   int last_id = charset_ascii;
5572   int eol_crlf =
5573     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5574   int byte_after_cr = -1;
5575
5576   CODING_GET_INFO (coding, attrs, charset_list);
5577   valids = AREF (attrs, coding_attr_charset_valids);
5578
5579   while (1)
5580     {
5581       int c;
5582       Lisp_Object val;
5583       struct charset *charset;
5584       int dim;
5585       int len = 1;
5586       unsigned code;
5587
5588       src_base = src;
5589       consumed_chars_base = consumed_chars;
5590
5591       if (charbuf >= charbuf_end)
5592         {
5593           if (byte_after_cr >= 0)
5594             src_base--;
5595           break;
5596         }
5597
5598       if (byte_after_cr >= 0)
5599         {
5600           c = byte_after_cr;
5601           byte_after_cr = -1;
5602         }
5603       else
5604         {
5605           ONE_MORE_BYTE (c);
5606           if (eol_crlf && c == '\r')
5607             ONE_MORE_BYTE (byte_after_cr);
5608         }
5609       if (c < 0)
5610         goto invalid_code;
5611       code = c;
5612
5613       val = AREF (valids, c);
5614       if (! INTEGERP (val) && ! CONSP (val))
5615         goto invalid_code;
5616       if (INTEGERP (val))
5617         {
5618           charset = CHARSET_FROM_ID (XFASTINT (val));
5619           dim = CHARSET_DIMENSION (charset);
5620           while (len < dim)
5621             {
5622               ONE_MORE_BYTE (c);
5623               code = (code << 8) | c;
5624               len++;
5625             }
5626           CODING_DECODE_CHAR (coding, src, src_base, src_end,
5627                               charset, code, c);
5628         }
5629       else
5630         {
5631           /* VAL is a list of charset IDs.  It is assured that the
5632              list is sorted by charset dimensions (smaller one
5633              comes first).  */
5634           while (CONSP (val))
5635             {
5636               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5637               dim = CHARSET_DIMENSION (charset);
5638               while (len < dim)
5639                 {
5640                   ONE_MORE_BYTE (c);
5641                   code = (code << 8) | c;
5642                   len++;
5643                 }
5644               CODING_DECODE_CHAR (coding, src, src_base,
5645                                   src_end, charset, code, c);
5646               if (c >= 0)
5647                 break;
5648               val = XCDR (val);
5649             }
5650         }
5651       if (c < 0)
5652         goto invalid_code;
5653       if (charset->id != charset_ascii
5654           && last_id != charset->id)
5655         {
5656           if (last_id != charset_ascii)
5657             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5658           last_id = charset->id;
5659           last_offset = char_offset;
5660         }
5661
5662       *charbuf++ = c;
5663       char_offset++;
5664       continue;
5665
5666     invalid_code:
5667       src = src_base;
5668       consumed_chars = consumed_chars_base;
5669       ONE_MORE_BYTE (c);
5670       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
5671       char_offset++;
5672       coding->errors++;
5673     }
5674
5675  no_more_source:
5676   if (last_id != charset_ascii)
5677     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5678   coding->consumed_char += consumed_chars_base;
5679   coding->consumed = src_base - coding->source;
5680   coding->charbuf_used = charbuf - coding->charbuf;
5681 }
5682
5683 static int
5684 encode_coding_charset (coding)
5685      struct coding_system *coding;
5686 {
5687   int multibytep = coding->dst_multibyte;
5688   int *charbuf = coding->charbuf;
5689   int *charbuf_end = charbuf + coding->charbuf_used;
5690   unsigned char *dst = coding->destination + coding->produced;
5691   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5692   int safe_room = MAX_MULTIBYTE_LENGTH;
5693   int produced_chars = 0;
5694   Lisp_Object attrs, charset_list;
5695   int ascii_compatible;
5696   int c;
5697
5698   CODING_GET_INFO (coding, attrs, charset_list);
5699   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5700
5701   while (charbuf < charbuf_end)
5702     {
5703       struct charset *charset;
5704       unsigned code;
5705
5706       ASSURE_DESTINATION (safe_room);
5707       c = *charbuf++;
5708       if (ascii_compatible && ASCII_CHAR_P (c))
5709         EMIT_ONE_ASCII_BYTE (c);
5710       else if (CHAR_BYTE8_P (c))
5711         {
5712           c = CHAR_TO_BYTE8 (c);
5713           EMIT_ONE_BYTE (c);
5714         }
5715       else
5716         {
5717           charset = char_charset (c, charset_list, &code);
5718           if (charset)
5719             {
5720               if (CHARSET_DIMENSION (charset) == 1)
5721                 EMIT_ONE_BYTE (code);
5722               else if (CHARSET_DIMENSION (charset) == 2)
5723                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
5724               else if (CHARSET_DIMENSION (charset) == 3)
5725                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
5726               else
5727                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
5728                                  (code >> 8) & 0xFF, code & 0xFF);
5729             }
5730           else
5731             {
5732               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5733                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5734               else
5735                 c = coding->default_char;
5736               EMIT_ONE_BYTE (c);
5737             }
5738         }
5739     }
5740
5741   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5742   coding->produced_char += produced_chars;
5743   coding->produced = dst - coding->destination;
5744   return 0;
5745 }
5746
5747 \f
5748 /*** 7. C library functions ***/
5749
5750 /* Setup coding context CODING from information about CODING_SYSTEM.
5751    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
5752    CODING_SYSTEM is invalid, signal an error.  */
5753
5754 void
5755 setup_coding_system (coding_system, coding)
5756      Lisp_Object coding_system;
5757      struct coding_system *coding;
5758 {
5759   Lisp_Object attrs;
5760   Lisp_Object eol_type;
5761   Lisp_Object coding_type;
5762   Lisp_Object val;
5763
5764   if (NILP (coding_system))
5765     coding_system = Qundecided;
5766
5767   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
5768
5769   attrs = CODING_ID_ATTRS (coding->id);
5770   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
5771
5772   coding->mode = 0;
5773   coding->head_ascii = -1;
5774   if (VECTORP (eol_type))
5775     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5776                             | CODING_REQUIRE_DETECTION_MASK);
5777   else if (! EQ (eol_type, Qunix))
5778     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5779                             | CODING_REQUIRE_ENCODING_MASK);
5780   else
5781     coding->common_flags = 0;
5782   if (! NILP (CODING_ATTR_POST_READ (attrs)))
5783     coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5784   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5785     coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5786   if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
5787     coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
5788
5789   val = CODING_ATTR_SAFE_CHARSETS (attrs);
5790   coding->max_charset_id = SCHARS (val) - 1;
5791   coding->safe_charsets = SDATA (val);
5792   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
5793
5794   coding_type = CODING_ATTR_TYPE (attrs);
5795   if (EQ (coding_type, Qundecided))
5796     {
5797       coding->detector = NULL;
5798       coding->decoder = decode_coding_raw_text;
5799       coding->encoder = encode_coding_raw_text;
5800       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5801     }
5802   else if (EQ (coding_type, Qiso_2022))
5803     {
5804       int i;
5805       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5806
5807       /* Invoke graphic register 0 to plane 0.  */
5808       CODING_ISO_INVOCATION (coding, 0) = 0;
5809       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
5810       CODING_ISO_INVOCATION (coding, 1)
5811         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
5812       /* Setup the initial status of designation.  */
5813       for (i = 0; i < 4; i++)
5814         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
5815       /* Not single shifting initially.  */
5816       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
5817       /* Beginning of buffer should also be regarded as bol. */
5818       CODING_ISO_BOL (coding) = 1;
5819       coding->detector = detect_coding_iso_2022;
5820       coding->decoder = decode_coding_iso_2022;
5821       coding->encoder = encode_coding_iso_2022;
5822       if (flags & CODING_ISO_FLAG_SAFE)
5823         coding->mode |= CODING_MODE_SAFE_ENCODING;
5824       coding->common_flags
5825         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5826             | CODING_REQUIRE_FLUSHING_MASK);
5827       if (flags & CODING_ISO_FLAG_COMPOSITION)
5828         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
5829       if (flags & CODING_ISO_FLAG_DESIGNATION)
5830         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
5831       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5832         {
5833           setup_iso_safe_charsets (attrs);
5834           val = CODING_ATTR_SAFE_CHARSETS (attrs);
5835           coding->max_charset_id = SCHARS (val) - 1;
5836           coding->safe_charsets = SDATA (val);
5837         }
5838       CODING_ISO_FLAGS (coding) = flags;
5839       CODING_ISO_CMP_STATUS (coding)->state = COMPOSING_NO;
5840       CODING_ISO_CMP_STATUS (coding)->method = COMPOSITION_NO;
5841       CODING_ISO_EXTSEGMENT_LEN (coding) = 0;
5842       CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
5843     }
5844   else if (EQ (coding_type, Qcharset))
5845     {
5846       coding->detector = detect_coding_charset;
5847       coding->decoder = decode_coding_charset;
5848       coding->encoder = encode_coding_charset;
5849       coding->common_flags
5850         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5851     }
5852   else if (EQ (coding_type, Qutf_8))
5853     {
5854       val = AREF (attrs, coding_attr_utf_bom);
5855       CODING_UTF_8_BOM (coding) = (CONSP (val) ? utf_detect_bom
5856                                    : EQ (val, Qt) ? utf_with_bom
5857                                    : utf_without_bom);
5858       coding->detector = detect_coding_utf_8;
5859       coding->decoder = decode_coding_utf_8;
5860       coding->encoder = encode_coding_utf_8;
5861       coding->common_flags
5862         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5863       if (CODING_UTF_8_BOM (coding) == utf_detect_bom)
5864         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5865     }
5866   else if (EQ (coding_type, Qutf_16))
5867     {
5868       val = AREF (attrs, coding_attr_utf_bom);
5869       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_detect_bom
5870                                     : EQ (val, Qt) ? utf_with_bom
5871                                     : utf_without_bom);
5872       val = AREF (attrs, coding_attr_utf_16_endian);
5873       CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
5874                                        : utf_16_little_endian);
5875       CODING_UTF_16_SURROGATE (coding) = 0;
5876       coding->detector = detect_coding_utf_16;
5877       coding->decoder = decode_coding_utf_16;
5878       coding->encoder = encode_coding_utf_16;
5879       coding->common_flags
5880         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5881       if (CODING_UTF_16_BOM (coding) == utf_detect_bom)
5882         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5883     }
5884   else if (EQ (coding_type, Qccl))
5885     {
5886       coding->detector = detect_coding_ccl;
5887       coding->decoder = decode_coding_ccl;
5888       coding->encoder = encode_coding_ccl;
5889       coding->common_flags
5890         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5891             | CODING_REQUIRE_FLUSHING_MASK);
5892     }
5893   else if (EQ (coding_type, Qemacs_mule))
5894     {
5895       coding->detector = detect_coding_emacs_mule;
5896       coding->decoder = decode_coding_emacs_mule;
5897       coding->encoder = encode_coding_emacs_mule;
5898       coding->common_flags
5899         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5900       coding->spec.emacs_mule.full_support = 1;
5901       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5902           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5903         {
5904           Lisp_Object tail, safe_charsets;
5905           int max_charset_id = 0;
5906
5907           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5908                tail = XCDR (tail))
5909             if (max_charset_id < XFASTINT (XCAR (tail)))
5910               max_charset_id = XFASTINT (XCAR (tail));
5911           safe_charsets = make_uninit_string (max_charset_id + 1);
5912           memset (SDATA (safe_charsets), 255, max_charset_id + 1);
5913           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5914                tail = XCDR (tail))
5915             SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
5916           coding->max_charset_id = max_charset_id;
5917           coding->safe_charsets = SDATA (safe_charsets);
5918           coding->spec.emacs_mule.full_support = 1;
5919         }
5920       coding->spec.emacs_mule.cmp_status.state = COMPOSING_NO;
5921       coding->spec.emacs_mule.cmp_status.method = COMPOSITION_NO;
5922     }
5923   else if (EQ (coding_type, Qshift_jis))
5924     {
5925       coding->detector = detect_coding_sjis;
5926       coding->decoder = decode_coding_sjis;
5927       coding->encoder = encode_coding_sjis;
5928       coding->common_flags
5929         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5930     }
5931   else if (EQ (coding_type, Qbig5))
5932     {
5933       coding->detector = detect_coding_big5;
5934       coding->decoder = decode_coding_big5;
5935       coding->encoder = encode_coding_big5;
5936       coding->common_flags
5937         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5938     }
5939   else                          /* EQ (coding_type, Qraw_text) */
5940     {
5941       coding->detector = NULL;
5942       coding->decoder = decode_coding_raw_text;
5943       coding->encoder = encode_coding_raw_text;
5944       if (! EQ (eol_type, Qunix))
5945         {
5946           coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5947           if (! VECTORP (eol_type))
5948             coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5949         }
5950
5951     }
5952
5953   return;
5954 }
5955
5956 /* Return a list of charsets supported by CODING.  */
5957
5958 Lisp_Object
5959 coding_charset_list (coding)
5960      struct coding_system *coding;
5961 {
5962   Lisp_Object attrs, charset_list;
5963
5964   CODING_GET_INFO (coding, attrs, charset_list);
5965   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5966     {
5967       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5968
5969       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5970         charset_list = Viso_2022_charset_list;
5971     }
5972   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5973     {
5974       charset_list = Vemacs_mule_charset_list;
5975     }
5976   return charset_list;
5977 }
5978
5979
5980 /* Return a list of charsets supported by CODING-SYSTEM.  */
5981
5982 Lisp_Object
5983 coding_system_charset_list (coding_system)
5984      Lisp_Object coding_system;
5985 {
5986   int id;
5987   Lisp_Object attrs, charset_list;
5988
5989   CHECK_CODING_SYSTEM_GET_ID (coding_system, id);
5990   attrs = CODING_ID_ATTRS (id);
5991
5992   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5993     {
5994       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5995
5996       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5997         charset_list = Viso_2022_charset_list;
5998       else
5999         charset_list = CODING_ATTR_CHARSET_LIST (attrs);
6000     }
6001   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
6002     {
6003       charset_list = Vemacs_mule_charset_list;
6004     }
6005   else
6006     {
6007       charset_list = CODING_ATTR_CHARSET_LIST (attrs);
6008     }
6009   return charset_list;
6010 }
6011
6012
6013 /* Return raw-text or one of its subsidiaries that has the same
6014    eol_type as CODING-SYSTEM.  */
6015
6016 Lisp_Object
6017 raw_text_coding_system (coding_system)
6018      Lisp_Object coding_system;
6019 {
6020   Lisp_Object spec, attrs;
6021   Lisp_Object eol_type, raw_text_eol_type;
6022
6023   if (NILP (coding_system))
6024     return Qraw_text;
6025   spec = CODING_SYSTEM_SPEC (coding_system);
6026   attrs = AREF (spec, 0);
6027
6028   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
6029     return coding_system;
6030
6031   eol_type = AREF (spec, 2);
6032   if (VECTORP (eol_type))
6033     return Qraw_text;
6034   spec = CODING_SYSTEM_SPEC (Qraw_text);
6035   raw_text_eol_type = AREF (spec, 2);
6036   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
6037           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
6038           : AREF (raw_text_eol_type, 2));
6039 }
6040
6041
6042 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
6043    does, return one of the subsidiary that has the same eol-spec as
6044    PARENT.  Otherwise, return CODING_SYSTEM.  If PARENT is nil,
6045    inherit end-of-line format from the system's setting
6046    (system_eol_type).  */
6047
6048 Lisp_Object
6049 coding_inherit_eol_type (coding_system, parent)
6050      Lisp_Object coding_system, parent;
6051 {
6052   Lisp_Object spec, eol_type;
6053
6054   if (NILP (coding_system))
6055     coding_system = Qraw_text;
6056   spec = CODING_SYSTEM_SPEC (coding_system);
6057   eol_type = AREF (spec, 2);
6058   if (VECTORP (eol_type))
6059     {
6060       Lisp_Object parent_eol_type;
6061
6062       if (! NILP (parent))
6063         {
6064           Lisp_Object parent_spec;
6065
6066           parent_spec = CODING_SYSTEM_SPEC (parent);
6067           parent_eol_type = AREF (parent_spec, 2);
6068         }
6069       else
6070         parent_eol_type = system_eol_type;
6071       if (EQ (parent_eol_type, Qunix))
6072         coding_system = AREF (eol_type, 0);
6073       else if (EQ (parent_eol_type, Qdos))
6074         coding_system = AREF (eol_type, 1);
6075       else if (EQ (parent_eol_type, Qmac))
6076         coding_system = AREF (eol_type, 2);
6077     }
6078   return coding_system;
6079 }
6080
6081 /* Emacs has a mechanism to automatically detect a coding system if it
6082    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
6083    it's impossible to distinguish some coding systems accurately
6084    because they use the same range of codes.  So, at first, coding
6085    systems are categorized into 7, those are:
6086
6087    o coding-category-emacs-mule
6088
6089         The category for a coding system which has the same code range
6090         as Emacs' internal format.  Assigned the coding-system (Lisp
6091         symbol) `emacs-mule' by default.
6092
6093    o coding-category-sjis
6094
6095         The category for a coding system which has the same code range
6096         as SJIS.  Assigned the coding-system (Lisp
6097         symbol) `japanese-shift-jis' by default.
6098
6099    o coding-category-iso-7
6100
6101         The category for a coding system which has the same code range
6102         as ISO2022 of 7-bit environment.  This doesn't use any locking
6103         shift and single shift functions.  This can encode/decode all
6104         charsets.  Assigned the coding-system (Lisp symbol)
6105         `iso-2022-7bit' by default.
6106
6107    o coding-category-iso-7-tight
6108
6109         Same as coding-category-iso-7 except that this can
6110         encode/decode only the specified charsets.
6111
6112    o coding-category-iso-8-1
6113
6114         The category for a coding system which has the same code range
6115         as ISO2022 of 8-bit environment and graphic plane 1 used only
6116         for DIMENSION1 charset.  This doesn't use any locking shift
6117         and single shift functions.  Assigned the coding-system (Lisp
6118         symbol) `iso-latin-1' by default.
6119
6120    o coding-category-iso-8-2
6121
6122         The category for a coding system which has the same code range
6123         as ISO2022 of 8-bit environment and graphic plane 1 used only
6124         for DIMENSION2 charset.  This doesn't use any locking shift
6125         and single shift functions.  Assigned the coding-system (Lisp
6126         symbol) `japanese-iso-8bit' by default.
6127
6128    o coding-category-iso-7-else
6129
6130         The category for a coding system which has the same code range
6131         as ISO2022 of 7-bit environemnt but uses locking shift or
6132         single shift functions.  Assigned the coding-system (Lisp
6133         symbol) `iso-2022-7bit-lock' by default.
6134
6135    o coding-category-iso-8-else
6136
6137         The category for a coding system which has the same code range
6138         as ISO2022 of 8-bit environemnt but uses locking shift or
6139         single shift functions.  Assigned the coding-system (Lisp
6140         symbol) `iso-2022-8bit-ss2' by default.
6141
6142    o coding-category-big5
6143
6144         The category for a coding system which has the same code range
6145         as BIG5.  Assigned the coding-system (Lisp symbol)
6146         `cn-big5' by default.
6147
6148    o coding-category-utf-8
6149
6150         The category for a coding system which has the same code range
6151         as UTF-8 (cf. RFC3629).  Assigned the coding-system (Lisp
6152         symbol) `utf-8' by default.
6153
6154    o coding-category-utf-16-be
6155
6156         The category for a coding system in which a text has an
6157         Unicode signature (cf. Unicode Standard) in the order of BIG
6158         endian at the head.  Assigned the coding-system (Lisp symbol)
6159         `utf-16-be' by default.
6160
6161    o coding-category-utf-16-le
6162
6163         The category for a coding system in which a text has an
6164         Unicode signature (cf. Unicode Standard) in the order of
6165         LITTLE endian at the head.  Assigned the coding-system (Lisp
6166         symbol) `utf-16-le' by default.
6167
6168    o coding-category-ccl
6169
6170         The category for a coding system of which encoder/decoder is
6171         written in CCL programs.  The default value is nil, i.e., no
6172         coding system is assigned.
6173
6174    o coding-category-binary
6175
6176         The category for a coding system not categorized in any of the
6177         above.  Assigned the coding-system (Lisp symbol)
6178         `no-conversion' by default.
6179
6180    Each of them is a Lisp symbol and the value is an actual
6181    `coding-system's (this is also a Lisp symbol) assigned by a user.
6182    What Emacs does actually is to detect a category of coding system.
6183    Then, it uses a `coding-system' assigned to it.  If Emacs can't
6184    decide only one possible category, it selects a category of the
6185    highest priority.  Priorities of categories are also specified by a
6186    user in a Lisp variable `coding-category-list'.
6187
6188 */
6189
6190 #define EOL_SEEN_NONE   0
6191 #define EOL_SEEN_LF     1
6192 #define EOL_SEEN_CR     2
6193 #define EOL_SEEN_CRLF   4
6194
6195 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
6196    SOURCE is encoded.  If CATEGORY is one of
6197    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
6198    two-byte, else they are encoded by one-byte.
6199
6200    Return one of EOL_SEEN_XXX.  */
6201
6202 #define MAX_EOL_CHECK_COUNT 3
6203
6204 static int
6205 detect_eol (source, src_bytes, category)
6206      const unsigned char *source;
6207      EMACS_INT src_bytes;
6208      enum coding_category category;
6209 {
6210   const unsigned char *src = source, *src_end = src + src_bytes;
6211   unsigned char c;
6212   int total  = 0;
6213   int eol_seen = EOL_SEEN_NONE;
6214
6215   if ((1 << category) & CATEGORY_MASK_UTF_16)
6216     {
6217       int msb, lsb;
6218
6219       msb = category == (coding_category_utf_16_le
6220                          | coding_category_utf_16_le_nosig);
6221       lsb = 1 - msb;
6222
6223       while (src + 1 < src_end)
6224         {
6225           c = src[lsb];
6226           if (src[msb] == 0 && (c == '\n' || c == '\r'))
6227             {
6228               int this_eol;
6229
6230               if (c == '\n')
6231                 this_eol = EOL_SEEN_LF;
6232               else if (src + 3 >= src_end
6233                        || src[msb + 2] != 0
6234                        || src[lsb + 2] != '\n')
6235                 this_eol = EOL_SEEN_CR;
6236               else
6237                 {
6238                   this_eol = EOL_SEEN_CRLF;
6239                   src += 2;
6240                 }
6241
6242               if (eol_seen == EOL_SEEN_NONE)
6243                 /* This is the first end-of-line.  */
6244                 eol_seen = this_eol;
6245               else if (eol_seen != this_eol)
6246                 {
6247                   /* The found type is different from what found before.
6248                      Allow for stray ^M characters in DOS EOL files.  */
6249                   if (eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF
6250                       || eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR)
6251                     eol_seen = EOL_SEEN_CRLF;
6252                   else
6253                     {
6254                       eol_seen = EOL_SEEN_LF;
6255                       break;
6256                     }
6257                 }
6258               if (++total == MAX_EOL_CHECK_COUNT)
6259                 break;
6260             }
6261           src += 2;
6262         }
6263     }
6264   else
6265     {
6266       while (src < src_end)
6267         {
6268           c = *src++;
6269           if (c == '\n' || c == '\r')
6270             {
6271               int this_eol;
6272
6273               if (c == '\n')
6274                 this_eol = EOL_SEEN_LF;
6275               else if (src >= src_end || *src != '\n')
6276                 this_eol = EOL_SEEN_CR;
6277               else
6278                 this_eol = EOL_SEEN_CRLF, src++;
6279
6280               if (eol_seen == EOL_SEEN_NONE)
6281                 /* This is the first end-of-line.  */
6282                 eol_seen = this_eol;
6283               else if (eol_seen != this_eol)
6284                 {
6285                   /* The found type is different from what found before.
6286                      Allow for stray ^M characters in DOS EOL files.  */
6287                   if (eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF
6288                       || eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR)
6289                     eol_seen = EOL_SEEN_CRLF;
6290                   else
6291                     {
6292                       eol_seen = EOL_SEEN_LF;
6293                       break;
6294                     }
6295                 }
6296               if (++total == MAX_EOL_CHECK_COUNT)
6297                 break;
6298             }
6299         }
6300     }
6301   return eol_seen;
6302 }
6303
6304
6305 static Lisp_Object
6306 adjust_coding_eol_type (coding, eol_seen)
6307      struct coding_system *coding;
6308      int eol_seen;
6309 {
6310   Lisp_Object eol_type;
6311
6312   eol_type = CODING_ID_EOL_TYPE (coding->id);
6313   if (eol_seen & EOL_SEEN_LF)
6314     {
6315       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
6316       eol_type = Qunix;
6317     }
6318   else if (eol_seen & EOL_SEEN_CRLF)
6319     {
6320       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
6321       eol_type = Qdos;
6322     }
6323   else if (eol_seen & EOL_SEEN_CR)
6324     {
6325       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
6326       eol_type = Qmac;
6327     }
6328   return eol_type;
6329 }
6330
6331 /* Detect how a text specified in CODING is encoded.  If a coding
6332    system is detected, update fields of CODING by the detected coding
6333    system.  */
6334
6335 void
6336 detect_coding (coding)
6337      struct coding_system *coding;
6338 {
6339   const unsigned char *src, *src_end;
6340   int saved_mode = coding->mode;
6341
6342   coding->consumed = coding->consumed_char = 0;
6343   coding->produced = coding->produced_char = 0;
6344   coding_set_source (coding);
6345
6346   src_end = coding->source + coding->src_bytes;
6347   coding->head_ascii = 0;
6348
6349   /* If we have not yet decided the text encoding type, detect it
6350      now.  */
6351   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
6352     {
6353       int c, i;
6354       struct coding_detection_info detect_info;
6355       int null_byte_found = 0, eight_bit_found = 0;
6356
6357       detect_info.checked = detect_info.found = detect_info.rejected = 0;
6358       for (src = coding->source; src < src_end; src++)
6359         {
6360           c = *src;
6361           if (c & 0x80)
6362             {
6363               eight_bit_found = 1;
6364               if (null_byte_found)
6365                 break;
6366             }
6367           else if (c < 0x20)
6368             {
6369               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
6370                   && ! inhibit_iso_escape_detection
6371                   && ! detect_info.checked)
6372                 {
6373                   if (detect_coding_iso_2022 (coding, &detect_info))
6374                     {
6375                       /* We have scanned the whole data.  */
6376                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
6377                         {
6378                           /* We didn't find an 8-bit code.  We may
6379                              have found a null-byte, but it's very
6380                              rare that a binary file confirm to
6381                              ISO-2022.  */
6382                           src = src_end;
6383                           coding->head_ascii = src - coding->source;
6384                         }
6385                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
6386                       break;
6387                     }
6388                 }
6389               else if (! c && !inhibit_null_byte_detection)
6390                 {
6391                   null_byte_found = 1;
6392                   if (eight_bit_found)
6393                     break;
6394                 }
6395               if (! eight_bit_found)
6396                 coding->head_ascii++;
6397             }
6398           else if (! eight_bit_found)
6399             coding->head_ascii++;
6400         }
6401
6402       if (null_byte_found || eight_bit_found
6403           || coding->head_ascii < coding->src_bytes
6404           || detect_info.found)
6405         {
6406           enum coding_category category;
6407           struct coding_system *this;
6408
6409           if (coding->head_ascii == coding->src_bytes)
6410             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
6411             for (i = 0; i < coding_category_raw_text; i++)
6412               {
6413                 category = coding_priorities[i];
6414                 this = coding_categories + category;
6415                 if (detect_info.found & (1 << category))
6416                   break;
6417               }
6418           else
6419             {
6420               if (null_byte_found)
6421                 {
6422                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
6423                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
6424                 }
6425               for (i = 0; i < coding_category_raw_text; i++)
6426                 {
6427                   category = coding_priorities[i];
6428                   this = coding_categories + category;
6429                   if (this->id < 0)
6430                     {
6431                       /* No coding system of this category is defined.  */
6432                       detect_info.rejected |= (1 << category);
6433                     }
6434                   else if (category >= coding_category_raw_text)
6435                     continue;
6436                   else if (detect_info.checked & (1 << category))
6437                     {
6438                       if (detect_info.found & (1 << category))
6439                         break;
6440                     }
6441                   else if ((*(this->detector)) (coding, &detect_info)
6442                            && detect_info.found & (1 << category))
6443                     {
6444                       if (category == coding_category_utf_16_auto)
6445                         {
6446                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6447                             category = coding_category_utf_16_le;
6448                           else
6449                             category = coding_category_utf_16_be;
6450                         }
6451                       break;
6452                     }
6453                 }
6454             }
6455
6456           if (i < coding_category_raw_text)
6457             setup_coding_system (CODING_ID_NAME (this->id), coding);
6458           else if (null_byte_found)
6459             setup_coding_system (Qno_conversion, coding);
6460           else if ((detect_info.rejected & CATEGORY_MASK_ANY)
6461                    == CATEGORY_MASK_ANY)
6462             setup_coding_system (Qraw_text, coding);
6463           else if (detect_info.rejected)
6464             for (i = 0; i < coding_category_raw_text; i++)
6465               if (! (detect_info.rejected & (1 << coding_priorities[i])))
6466                 {
6467                   this = coding_categories + coding_priorities[i];
6468                   setup_coding_system (CODING_ID_NAME (this->id), coding);
6469                   break;
6470                 }
6471         }
6472     }
6473   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6474            == coding_category_utf_8_auto)
6475     {
6476       Lisp_Object coding_systems;
6477       struct coding_detection_info detect_info;
6478
6479       coding_systems
6480         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6481       detect_info.found = detect_info.rejected = 0;
6482       coding->head_ascii = 0;
6483       if (CONSP (coding_systems)
6484           && detect_coding_utf_8 (coding, &detect_info))
6485         {
6486           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
6487             setup_coding_system (XCAR (coding_systems), coding);
6488           else
6489             setup_coding_system (XCDR (coding_systems), coding);
6490         }
6491     }
6492   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6493            == coding_category_utf_16_auto)
6494     {
6495       Lisp_Object coding_systems;
6496       struct coding_detection_info detect_info;
6497
6498       coding_systems
6499         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6500       detect_info.found = detect_info.rejected = 0;
6501       coding->head_ascii = 0;
6502       if (CONSP (coding_systems)
6503           && detect_coding_utf_16 (coding, &detect_info))
6504         {
6505           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6506             setup_coding_system (XCAR (coding_systems), coding);
6507           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
6508             setup_coding_system (XCDR (coding_systems), coding);
6509         }
6510     }
6511   coding->mode = saved_mode;
6512 }
6513
6514
6515 static void
6516 decode_eol (coding)
6517      struct coding_system *coding;
6518 {
6519   Lisp_Object eol_type;
6520   unsigned char *p, *pbeg, *pend;
6521
6522   eol_type = CODING_ID_EOL_TYPE (coding->id);
6523   if (EQ (eol_type, Qunix) || inhibit_eol_conversion)
6524     return;
6525
6526   if (NILP (coding->dst_object))
6527     pbeg = coding->destination;
6528   else
6529     pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
6530   pend = pbeg + coding->produced;
6531
6532   if (VECTORP (eol_type))
6533     {
6534       int eol_seen = EOL_SEEN_NONE;
6535
6536       for (p = pbeg; p < pend; p++)
6537         {
6538           if (*p == '\n')
6539             eol_seen |= EOL_SEEN_LF;
6540           else if (*p == '\r')
6541             {
6542               if (p + 1 < pend && *(p + 1) == '\n')
6543                 {
6544                   eol_seen |= EOL_SEEN_CRLF;
6545                   p++;
6546                 }
6547               else
6548                 eol_seen |= EOL_SEEN_CR;
6549             }
6550         }
6551       /* Handle DOS-style EOLs in a file with stray ^M characters.  */
6552       if ((eol_seen & EOL_SEEN_CRLF) != 0
6553           && (eol_seen & EOL_SEEN_CR) != 0
6554           && (eol_seen & EOL_SEEN_LF) == 0)
6555         eol_seen = EOL_SEEN_CRLF;
6556       else if (eol_seen != EOL_SEEN_NONE
6557           && eol_seen != EOL_SEEN_LF
6558           && eol_seen != EOL_SEEN_CRLF
6559           && eol_seen != EOL_SEEN_CR)
6560         eol_seen = EOL_SEEN_LF;
6561       if (eol_seen != EOL_SEEN_NONE)
6562         eol_type = adjust_coding_eol_type (coding, eol_seen);
6563     }
6564
6565   if (EQ (eol_type, Qmac))
6566     {
6567       for (p = pbeg; p < pend; p++)
6568         if (*p == '\r')
6569           *p = '\n';
6570     }
6571   else if (EQ (eol_type, Qdos))
6572     {
6573       int n = 0;
6574
6575       if (NILP (coding->dst_object))
6576         {
6577           /* Start deleting '\r' from the tail to minimize the memory
6578              movement.  */
6579           for (p = pend - 2; p >= pbeg; p--)
6580             if (*p == '\r')
6581               {
6582                 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
6583                 n++;
6584               }
6585         }
6586       else
6587         {
6588           int pos_byte = coding->dst_pos_byte;
6589           int pos = coding->dst_pos;
6590           int pos_end = pos + coding->produced_char - 1;
6591
6592           while (pos < pos_end)
6593             {
6594               p = BYTE_POS_ADDR (pos_byte);
6595               if (*p == '\r' && p[1] == '\n')
6596                 {
6597                   del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
6598                   n++;
6599                   pos_end--;
6600                 }
6601               pos++;
6602               if (coding->dst_multibyte)
6603                 pos_byte += BYTES_BY_CHAR_HEAD (*p);
6604               else
6605                 pos_byte++;
6606             }
6607         }
6608       coding->produced -= n;
6609       coding->produced_char -= n;
6610     }
6611 }
6612
6613
6614 /* Return a translation table (or list of them) from coding system
6615    attribute vector ATTRS for encoding (ENCODEP is nonzero) or
6616    decoding (ENCODEP is zero). */
6617
6618 static Lisp_Object
6619 get_translation_table (attrs, encodep, max_lookup)
6620      Lisp_Object attrs;
6621      int encodep, *max_lookup;
6622 {
6623   Lisp_Object standard, translation_table;
6624   Lisp_Object val;
6625
6626   if (NILP (Venable_character_translation))
6627     {
6628       if (max_lookup)
6629         *max_lookup = 0;
6630       return Qnil;
6631     }
6632   if (encodep)
6633     translation_table = CODING_ATTR_ENCODE_TBL (attrs),
6634       standard = Vstandard_translation_table_for_encode;
6635   else
6636     translation_table = CODING_ATTR_DECODE_TBL (attrs),
6637       standard = Vstandard_translation_table_for_decode;
6638   if (NILP (translation_table))
6639     translation_table = standard;
6640   else
6641     {
6642       if (SYMBOLP (translation_table))
6643         translation_table = Fget (translation_table, Qtranslation_table);
6644       else if (CONSP (translation_table))
6645         {
6646           translation_table = Fcopy_sequence (translation_table);
6647           for (val = translation_table; CONSP (val); val = XCDR (val))
6648             if (SYMBOLP (XCAR (val)))
6649               XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
6650         }
6651       if (CHAR_TABLE_P (standard))
6652         {
6653           if (CONSP (translation_table))
6654             translation_table = nconc2 (translation_table,
6655                                         Fcons (standard, Qnil));
6656           else
6657             translation_table = Fcons (translation_table,
6658                                        Fcons (standard, Qnil));
6659         }
6660     }
6661
6662   if (max_lookup)
6663     {
6664       *max_lookup = 1;
6665       if (CHAR_TABLE_P (translation_table)
6666           && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
6667         {
6668           val = XCHAR_TABLE (translation_table)->extras[1];
6669           if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6670             *max_lookup = XFASTINT (val);
6671         }
6672       else if (CONSP (translation_table))
6673         {
6674           Lisp_Object tail, val;
6675
6676           for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
6677             if (CHAR_TABLE_P (XCAR (tail))
6678                 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
6679               {
6680                 val = XCHAR_TABLE (XCAR (tail))->extras[1];
6681                 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6682                   *max_lookup = XFASTINT (val);
6683               }
6684         }
6685     }
6686   return translation_table;
6687 }
6688
6689 #define LOOKUP_TRANSLATION_TABLE(table, c, trans)               \
6690   do {                                                          \
6691     trans = Qnil;                                               \
6692     if (CHAR_TABLE_P (table))                                   \
6693       {                                                         \
6694         trans = CHAR_TABLE_REF (table, c);                      \
6695         if (CHARACTERP (trans))                                 \
6696           c = XFASTINT (trans), trans = Qnil;                   \
6697       }                                                         \
6698     else if (CONSP (table))                                     \
6699       {                                                         \
6700         Lisp_Object tail;                                       \
6701                                                                 \
6702         for (tail = table; CONSP (tail); tail = XCDR (tail))    \
6703           if (CHAR_TABLE_P (XCAR (tail)))                       \
6704             {                                                   \
6705               trans = CHAR_TABLE_REF (XCAR (tail), c);          \
6706               if (CHARACTERP (trans))                           \
6707                 c = XFASTINT (trans), trans = Qnil;             \
6708               else if (! NILP (trans))                          \
6709                 break;                                          \
6710             }                                                   \
6711       }                                                         \
6712   } while (0)
6713
6714
6715 /* Return a translation of character(s) at BUF according to TRANS.
6716    TRANS is TO-CHAR or ((FROM .  TO) ...) where
6717    FROM = [FROM-CHAR ...], TO is TO-CHAR or [TO-CHAR ...].
6718    The return value is TO-CHAR or ([FROM-CHAR ...] . TO) if a
6719    translation is found, and Qnil if not found..
6720    If BUF is too short to lookup characters in FROM, return Qt.  */
6721
6722 static Lisp_Object
6723 get_translation (trans, buf, buf_end)
6724      Lisp_Object trans;
6725      int *buf, *buf_end;
6726 {
6727
6728   if (INTEGERP (trans))
6729     return trans;
6730   for (; CONSP (trans); trans = XCDR (trans))
6731     {
6732       Lisp_Object val = XCAR (trans);
6733       Lisp_Object from = XCAR (val);
6734       int len = ASIZE (from);
6735       int i;
6736
6737       for (i = 0; i < len; i++)
6738         {
6739           if (buf + i == buf_end)
6740             return Qt;
6741           if (XINT (AREF (from, i)) != buf[i])
6742             break;
6743         }
6744       if (i == len)
6745         return val;
6746     }
6747   return Qnil;
6748 }
6749
6750
6751 static int
6752 produce_chars (coding, translation_table, last_block)
6753      struct coding_system *coding;
6754      Lisp_Object translation_table;
6755      int last_block;
6756 {
6757   unsigned char *dst = coding->destination + coding->produced;
6758   unsigned char *dst_end = coding->destination + coding->dst_bytes;
6759   EMACS_INT produced;
6760   EMACS_INT produced_chars = 0;
6761   int carryover = 0;
6762
6763   if (! coding->chars_at_source)
6764     {
6765       /* Source characters are in coding->charbuf.  */
6766       int *buf = coding->charbuf;
6767       int *buf_end = buf + coding->charbuf_used;
6768
6769       if (EQ (coding->src_object, coding->dst_object))
6770         {
6771           coding_set_source (coding);
6772           dst_end = ((unsigned char *) coding->source) + coding->consumed;
6773         }
6774
6775       while (buf < buf_end)
6776         {
6777           int c = *buf, i;
6778
6779           if (c >= 0)
6780             {
6781               int from_nchars = 1, to_nchars = 1;
6782               Lisp_Object trans = Qnil;
6783
6784               LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6785               if (! NILP (trans))
6786                 {
6787                   trans = get_translation (trans, buf, buf_end);
6788                   if (INTEGERP (trans))
6789                     c = XINT (trans);
6790                   else if (CONSP (trans))
6791                     {
6792                       from_nchars = ASIZE (XCAR (trans));
6793                       trans = XCDR (trans);
6794                       if (INTEGERP (trans))
6795                         c = XINT (trans);
6796                       else
6797                         {
6798                           to_nchars = ASIZE (trans);
6799                           c = XINT (AREF (trans, 0));
6800                         }
6801                     }
6802                   else if (EQ (trans, Qt) && ! last_block)
6803                     break;
6804                 }
6805
6806               if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
6807                 {
6808                   dst = alloc_destination (coding,
6809                                            buf_end - buf
6810                                            + MAX_MULTIBYTE_LENGTH * to_nchars,
6811                                            dst);
6812                   if (EQ (coding->src_object, coding->dst_object))
6813                     {
6814                       coding_set_source (coding);
6815                       dst_end = (((unsigned char *) coding->source)
6816                                  + coding->consumed);
6817                     }
6818                   else
6819                     dst_end = coding->destination + coding->dst_bytes;
6820                 }
6821
6822               for (i = 0; i < to_nchars; i++)
6823                 {
6824                   if (i > 0)
6825                     c = XINT (AREF (trans, i));
6826                   if (coding->dst_multibyte
6827                       || ! CHAR_BYTE8_P (c))
6828                     CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
6829                   else
6830                     *dst++ = CHAR_TO_BYTE8 (c);
6831                 }
6832               produced_chars += to_nchars;
6833               buf += from_nchars;
6834             }
6835           else
6836             /* This is an annotation datum.  (-C) is the length.  */
6837             buf += -c;
6838         }
6839       carryover = buf_end - buf;
6840     }
6841   else
6842     {
6843       /* Source characters are at coding->source.  */
6844       const unsigned char *src = coding->source;
6845       const unsigned char *src_end = src + coding->consumed;
6846
6847       if (EQ (coding->dst_object, coding->src_object))
6848         dst_end = (unsigned char *) src;
6849       if (coding->src_multibyte != coding->dst_multibyte)
6850         {
6851           if (coding->src_multibyte)
6852             {
6853               int multibytep = 1;
6854               EMACS_INT consumed_chars = 0;
6855
6856               while (1)
6857                 {
6858                   const unsigned char *src_base = src;
6859                   int c;
6860
6861                   ONE_MORE_BYTE (c);
6862                   if (dst == dst_end)
6863                     {
6864                       if (EQ (coding->src_object, coding->dst_object))
6865                         dst_end = (unsigned char *) src;
6866                       if (dst == dst_end)
6867                         {
6868                           EMACS_INT offset = src - coding->source;
6869
6870                           dst = alloc_destination (coding, src_end - src + 1,
6871                                                    dst);
6872                           dst_end = coding->destination + coding->dst_bytes;
6873                           coding_set_source (coding);
6874                           src = coding->source + offset;
6875                           src_end = coding->source + coding->src_bytes;
6876                           if (EQ (coding->src_object, coding->dst_object))
6877                             dst_end = (unsigned char *) src;
6878                         }
6879                     }
6880                   *dst++ = c;
6881                   produced_chars++;
6882                 }
6883             no_more_source:
6884               ;
6885             }
6886           else
6887             while (src < src_end)
6888               {
6889                 int multibytep = 1;
6890                 int c = *src++;
6891
6892                 if (dst >= dst_end - 1)
6893                   {
6894                     if (EQ (coding->src_object, coding->dst_object))
6895                       dst_end = (unsigned char *) src;
6896                     if (dst >= dst_end - 1)
6897                       {
6898                         EMACS_INT offset = src - coding->source;
6899                         EMACS_INT more_bytes;
6900
6901                         if (EQ (coding->src_object, coding->dst_object))
6902                           more_bytes = ((src_end - src) / 2) + 2;
6903                         else
6904                           more_bytes = src_end - src + 2;
6905                         dst = alloc_destination (coding, more_bytes, dst);
6906                         dst_end = coding->destination + coding->dst_bytes;
6907                         coding_set_source (coding);
6908                         src = coding->source + offset;
6909                         src_end = coding->source + coding->src_bytes;
6910                         if (EQ (coding->src_object, coding->dst_object))
6911                           dst_end = (unsigned char *) src;
6912                       }
6913                   }
6914                 EMIT_ONE_BYTE (c);
6915               }
6916         }
6917       else
6918         {
6919           if (!EQ (coding->src_object, coding->dst_object))
6920             {
6921               EMACS_INT require = coding->src_bytes - coding->dst_bytes;
6922
6923               if (require > 0)
6924                 {
6925                   EMACS_INT offset = src - coding->source;
6926
6927                   dst = alloc_destination (coding, require, dst);
6928                   coding_set_source (coding);
6929                   src = coding->source + offset;
6930                   src_end = coding->source + coding->src_bytes;
6931                 }
6932             }
6933           produced_chars = coding->consumed_char;
6934           while (src < src_end)
6935             *dst++ = *src++;
6936         }
6937     }
6938
6939   produced = dst - (coding->destination + coding->produced);
6940   if (BUFFERP (coding->dst_object) && produced_chars > 0)
6941     insert_from_gap (produced_chars, produced);
6942   coding->produced += produced;
6943   coding->produced_char += produced_chars;
6944   return carryover;
6945 }
6946
6947 /* Compose text in CODING->object according to the annotation data at
6948    CHARBUF.  CHARBUF is an array:
6949      [ -LENGTH ANNOTATION_MASK NCHARS NBYTES METHOD [ COMPONENTS... ] ]
6950  */
6951
6952 static INLINE void
6953 produce_composition (coding, charbuf, pos)
6954      struct coding_system *coding;
6955      int *charbuf;
6956      EMACS_INT pos;
6957 {
6958   int len;
6959   EMACS_INT to;
6960   enum composition_method method;
6961   Lisp_Object components;
6962
6963   len = -charbuf[0] - MAX_ANNOTATION_LENGTH;
6964   to = pos + charbuf[2];
6965   method = (enum composition_method) (charbuf[4]);
6966
6967   if (method == COMPOSITION_RELATIVE)
6968     components = Qnil;
6969   else
6970     {
6971       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
6972       int i, j;
6973
6974       if (method == COMPOSITION_WITH_RULE)
6975         len = charbuf[2] * 3 - 2;
6976       charbuf += MAX_ANNOTATION_LENGTH;
6977       /* charbuf = [ CHRA ... CHAR] or [ CHAR -2 RULE ... CHAR ] */
6978       for (i = j = 0; i < len && charbuf[i] != -1; i++, j++)
6979         {
6980           if (charbuf[i] >= 0)
6981             args[j] = make_number (charbuf[i]);
6982           else
6983             {
6984               i++;
6985               args[j] = make_number (charbuf[i] % 0x100);
6986             }
6987         }
6988       components = (i == j ? Fstring (j, args) : Fvector (j, args));
6989     }
6990   compose_text (pos, to, components, Qnil, coding->dst_object);
6991 }
6992
6993
6994 /* Put `charset' property on text in CODING->object according to
6995    the annotation data at CHARBUF.  CHARBUF is an array:
6996      [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
6997  */
6998
6999 static INLINE void
7000 produce_charset (coding, charbuf, pos)
7001      struct coding_system *coding;
7002      int *charbuf;
7003      EMACS_INT pos;
7004 {
7005   EMACS_INT from = pos - charbuf[2];
7006   struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
7007
7008   Fput_text_property (make_number (from), make_number (pos),
7009                       Qcharset, CHARSET_NAME (charset),
7010                       coding->dst_object);
7011 }
7012
7013
7014 #define CHARBUF_SIZE 0x4000
7015
7016 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
7017   do {                                                                  \
7018     int size = CHARBUF_SIZE;                                            \
7019                                                                         \
7020     coding->charbuf = NULL;                                             \
7021     while (size > 1024)                                                 \
7022       {                                                                 \
7023         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
7024         if (coding->charbuf)                                            \
7025           break;                                                        \
7026         size >>= 1;                                                     \
7027       }                                                                 \
7028     if (! coding->charbuf)                                              \
7029       {                                                                 \
7030         record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
7031         return coding->result;                                          \
7032       }                                                                 \
7033     coding->charbuf_size = size;                                        \
7034   } while (0)
7035
7036
7037 static void
7038 produce_annotation (coding, pos)
7039      struct coding_system *coding;
7040      EMACS_INT pos;
7041 {
7042   int *charbuf = coding->charbuf;
7043   int *charbuf_end = charbuf + coding->charbuf_used;
7044
7045   if (NILP (coding->dst_object))
7046     return;
7047
7048   while (charbuf < charbuf_end)
7049     {
7050       if (*charbuf >= 0)
7051         pos++, charbuf++;
7052       else
7053         {
7054           int len = -*charbuf;
7055
7056           if (len > 2)
7057             switch (charbuf[1])
7058               {
7059               case CODING_ANNOTATE_COMPOSITION_MASK:
7060                 produce_composition (coding, charbuf, pos);
7061                 break;
7062               case CODING_ANNOTATE_CHARSET_MASK:
7063                 produce_charset (coding, charbuf, pos);
7064                 break;
7065               }
7066           charbuf += len;
7067         }
7068     }
7069 }
7070
7071 /* Decode the data at CODING->src_object into CODING->dst_object.
7072    CODING->src_object is a buffer, a string, or nil.
7073    CODING->dst_object is a buffer.
7074
7075    If CODING->src_object is a buffer, it must be the current buffer.
7076    In this case, if CODING->src_pos is positive, it is a position of
7077    the source text in the buffer, otherwise, the source text is in the
7078    gap area of the buffer, and CODING->src_pos specifies the offset of
7079    the text from GPT (which must be the same as PT).  If this is the
7080    same buffer as CODING->dst_object, CODING->src_pos must be
7081    negative.
7082
7083    If CODING->src_object is a string, CODING->src_pos is an index to
7084    that string.
7085
7086    If CODING->src_object is nil, CODING->source must already point to
7087    the non-relocatable memory area.  In this case, CODING->src_pos is
7088    an offset from CODING->source.
7089
7090    The decoded data is inserted at the current point of the buffer
7091    CODING->dst_object.
7092 */
7093
7094 static int
7095 decode_coding (coding)
7096      struct coding_system *coding;
7097 {
7098   Lisp_Object attrs;
7099   Lisp_Object undo_list;
7100   Lisp_Object translation_table;
7101   int carryover;
7102   int i;
7103
7104   if (BUFFERP (coding->src_object)
7105       && coding->src_pos > 0
7106       && coding->src_pos < GPT
7107       && coding->src_pos + coding->src_chars > GPT)
7108     move_gap_both (coding->src_pos, coding->src_pos_byte);
7109
7110   undo_list = Qt;
7111   if (BUFFERP (coding->dst_object))
7112     {
7113       if (current_buffer != XBUFFER (coding->dst_object))
7114         set_buffer_internal (XBUFFER (coding->dst_object));
7115       if (GPT != PT)
7116         move_gap_both (PT, PT_BYTE);
7117       undo_list = current_buffer->undo_list;
7118       current_buffer->undo_list = Qt;
7119     }
7120
7121   coding->consumed = coding->consumed_char = 0;
7122   coding->produced = coding->produced_char = 0;
7123   coding->chars_at_source = 0;
7124   record_conversion_result (coding, CODING_RESULT_SUCCESS);
7125   coding->errors = 0;
7126
7127   ALLOC_CONVERSION_WORK_AREA (coding);
7128
7129   attrs = CODING_ID_ATTRS (coding->id);
7130   translation_table = get_translation_table (attrs, 0, NULL);
7131
7132   carryover = 0;
7133   do
7134     {
7135       EMACS_INT pos = coding->dst_pos + coding->produced_char;
7136
7137       coding_set_source (coding);
7138       coding->annotated = 0;
7139       coding->charbuf_used = carryover;
7140       (*(coding->decoder)) (coding);
7141       coding_set_destination (coding);
7142       carryover = produce_chars (coding, translation_table, 0);
7143       if (coding->annotated)
7144         produce_annotation (coding, pos);
7145       for (i = 0; i < carryover; i++)
7146         coding->charbuf[i]
7147           = coding->charbuf[coding->charbuf_used - carryover + i];
7148     }
7149   while (coding->consumed < coding->src_bytes
7150          && (coding->result == CODING_RESULT_SUCCESS
7151              || coding->result == CODING_RESULT_INVALID_SRC));
7152
7153   if (carryover > 0)
7154     {
7155       coding_set_destination (coding);
7156       coding->charbuf_used = carryover;
7157       produce_chars (coding, translation_table, 1);
7158     }
7159
7160   coding->carryover_bytes = 0;
7161   if (coding->consumed < coding->src_bytes)
7162     {
7163       int nbytes = coding->src_bytes - coding->consumed;
7164       const unsigned char *src;
7165
7166       coding_set_source (coding);
7167       coding_set_destination (coding);
7168       src = coding->source + coding->consumed;
7169
7170       if (coding->mode & CODING_MODE_LAST_BLOCK)
7171         {
7172           /* Flush out unprocessed data as binary chars.  We are sure
7173              that the number of data is less than the size of
7174              coding->charbuf.  */
7175           coding->charbuf_used = 0;
7176           coding->chars_at_source = 0;
7177
7178           while (nbytes-- > 0)
7179             {
7180               int c = *src++;
7181
7182               if (c & 0x80)
7183                 c = BYTE8_TO_CHAR (c);
7184               coding->charbuf[coding->charbuf_used++] = c;
7185             }
7186           produce_chars (coding, Qnil, 1);
7187         }
7188       else
7189         {
7190           /* Record unprocessed bytes in coding->carryover.  We are
7191              sure that the number of data is less than the size of
7192              coding->carryover.  */
7193           unsigned char *p = coding->carryover;
7194
7195           if (nbytes > sizeof coding->carryover)
7196             nbytes = sizeof coding->carryover;
7197           coding->carryover_bytes = nbytes;
7198           while (nbytes-- > 0)
7199             *p++ = *src++;
7200         }
7201       coding->consumed = coding->src_bytes;
7202     }
7203
7204   if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix)
7205       && !inhibit_eol_conversion)
7206     decode_eol (coding);
7207   if (BUFFERP (coding->dst_object))
7208     {
7209       current_buffer->undo_list = undo_list;
7210       record_insert (coding->dst_pos, coding->produced_char);
7211     }
7212   return coding->result;
7213 }
7214
7215
7216 /* Extract an annotation datum from a composition starting at POS and
7217    ending before LIMIT of CODING->src_object (buffer or string), store
7218    the data in BUF, set *STOP to a starting position of the next
7219    composition (if any) or to LIMIT, and return the address of the
7220    next element of BUF.
7221
7222    If such an annotation is not found, set *STOP to a starting
7223    position of a composition after POS (if any) or to LIMIT, and
7224    return BUF.  */
7225
7226 static INLINE int *
7227 handle_composition_annotation (pos, limit, coding, buf, stop)
7228      EMACS_INT pos, limit;
7229      struct coding_system *coding;
7230      int *buf;
7231      EMACS_INT *stop;
7232 {
7233   EMACS_INT start, end;
7234   Lisp_Object prop;
7235
7236   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
7237       || end > limit)
7238     *stop = limit;
7239   else if (start > pos)
7240     *stop = start;
7241   else
7242     {
7243       if (start == pos)
7244         {
7245           /* We found a composition.  Store the corresponding
7246              annotation data in BUF.  */
7247           int *head = buf;
7248           enum composition_method method = COMPOSITION_METHOD (prop);
7249           int nchars = COMPOSITION_LENGTH (prop);
7250
7251           ADD_COMPOSITION_DATA (buf, nchars, 0, method);
7252           if (method != COMPOSITION_RELATIVE)
7253             {
7254               Lisp_Object components;
7255               int len, i, i_byte;
7256
7257               components = COMPOSITION_COMPONENTS (prop);
7258               if (VECTORP (components))
7259                 {
7260                   len = XVECTOR (components)->size;
7261                   for (i = 0; i < len; i++)
7262                     *buf++ = XINT (AREF (components, i));
7263                 }
7264               else if (STRINGP (components))
7265                 {
7266                   len = SCHARS (components);
7267                   i = i_byte = 0;
7268                   while (i < len)
7269                     {
7270                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
7271                       buf++;
7272                     }
7273                 }
7274               else if (INTEGERP (components))
7275                 {
7276                   len = 1;
7277                   *buf++ = XINT (components);
7278                 }
7279               else if (CONSP (components))
7280                 {
7281                   for (len = 0; CONSP (components);
7282                        len++, components = XCDR (components))
7283                     *buf++ = XINT (XCAR (components));
7284                 }
7285               else
7286                 abort ();
7287               *head -= len;
7288             }
7289         }
7290
7291       if (find_composition (end, limit, &start, &end, &prop,
7292                             coding->src_object)
7293           && end <= limit)
7294         *stop = start;
7295       else
7296         *stop = limit;
7297     }
7298   return buf;
7299 }
7300
7301
7302 /* Extract an annotation datum from a text property `charset' at POS of
7303    CODING->src_object (buffer of string), store the data in BUF, set
7304    *STOP to the position where the value of `charset' property changes
7305    (limiting by LIMIT), and return the address of the next element of
7306    BUF.
7307
7308    If the property value is nil, set *STOP to the position where the
7309    property value is non-nil (limiting by LIMIT), and return BUF.  */
7310
7311 static INLINE int *
7312 handle_charset_annotation (pos, limit, coding, buf, stop)
7313      EMACS_INT pos, limit;
7314      struct coding_system *coding;
7315      int *buf;
7316      EMACS_INT *stop;
7317 {
7318   Lisp_Object val, next;
7319   int id;
7320
7321   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
7322   if (! NILP (val) && CHARSETP (val))
7323     id = XINT (CHARSET_SYMBOL_ID (val));
7324   else
7325     id = -1;
7326   ADD_CHARSET_DATA (buf, 0, id);
7327   next = Fnext_single_property_change (make_number (pos), Qcharset,
7328                                        coding->src_object,
7329                                        make_number (limit));
7330   *stop = XINT (next);
7331   return buf;
7332 }
7333
7334
7335 static void
7336 consume_chars (coding, translation_table, max_lookup)
7337      struct coding_system *coding;
7338      Lisp_Object translation_table;
7339      int max_lookup;
7340 {
7341   int *buf = coding->charbuf;
7342   int *buf_end = coding->charbuf + coding->charbuf_size;
7343   const unsigned char *src = coding->source + coding->consumed;
7344   const unsigned char *src_end = coding->source + coding->src_bytes;
7345   EMACS_INT pos = coding->src_pos + coding->consumed_char;
7346   EMACS_INT end_pos = coding->src_pos + coding->src_chars;
7347   int multibytep = coding->src_multibyte;
7348   Lisp_Object eol_type;
7349   int c;
7350   EMACS_INT stop, stop_composition, stop_charset;
7351   int *lookup_buf = NULL;
7352
7353   if (! NILP (translation_table))
7354     lookup_buf = alloca (sizeof (int) * max_lookup);
7355
7356   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
7357   if (VECTORP (eol_type))
7358     eol_type = Qunix;
7359
7360   /* Note: composition handling is not yet implemented.  */
7361   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7362
7363   if (NILP (coding->src_object))
7364     stop = stop_composition = stop_charset = end_pos;
7365   else
7366     {
7367       if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
7368         stop = stop_composition = pos;
7369       else
7370         stop = stop_composition = end_pos;
7371       if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
7372         stop = stop_charset = pos;
7373       else
7374         stop_charset = end_pos;
7375     }
7376
7377   /* Compensate for CRLF and conversion.  */
7378   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
7379   while (buf < buf_end)
7380     {
7381       Lisp_Object trans;
7382
7383       if (pos == stop)
7384         {
7385           if (pos == end_pos)
7386             break;
7387           if (pos == stop_composition)
7388             buf = handle_composition_annotation (pos, end_pos, coding,
7389                                                  buf, &stop_composition);
7390           if (pos == stop_charset)
7391             buf = handle_charset_annotation (pos, end_pos, coding,
7392                                              buf, &stop_charset);
7393           stop = (stop_composition < stop_charset
7394                   ? stop_composition : stop_charset);
7395         }
7396
7397       if (! multibytep)
7398         {
7399           EMACS_INT bytes;
7400
7401           if (coding->encoder == encode_coding_raw_text)
7402             c = *src++, pos++;
7403           else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
7404             c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
7405           else
7406             c = BYTE8_TO_CHAR (*src), src++, pos++;
7407         }
7408       else
7409         c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
7410       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
7411         c = '\n';
7412       if (! EQ (eol_type, Qunix))
7413         {
7414           if (c == '\n')
7415             {
7416               if (EQ (eol_type, Qdos))
7417                 *buf++ = '\r';
7418               else
7419                 c = '\r';
7420             }
7421         }
7422
7423       trans = Qnil;
7424       LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
7425       if (NILP (trans))
7426         *buf++ = c;
7427       else
7428         {
7429           int from_nchars = 1, to_nchars = 1;
7430           int *lookup_buf_end;
7431           const unsigned char *p = src;
7432           int i;
7433
7434           lookup_buf[0] = c;
7435           for (i = 1; i < max_lookup && p < src_end; i++)
7436             lookup_buf[i] = STRING_CHAR_ADVANCE (p);
7437           lookup_buf_end = lookup_buf + i;
7438           trans = get_translation (trans, lookup_buf, lookup_buf_end);
7439           if (INTEGERP (trans))
7440             c = XINT (trans);
7441           else if (CONSP (trans))
7442             {
7443               from_nchars = ASIZE (XCAR (trans));
7444               trans = XCDR (trans);
7445               if (INTEGERP (trans))
7446                 c = XINT (trans);
7447               else
7448                 {
7449                   to_nchars = ASIZE (trans);
7450                   if (buf + to_nchars > buf_end)
7451                     break;
7452                   c = XINT (AREF (trans, 0));
7453                 }
7454             }
7455           else
7456             break;
7457           *buf++ = c;
7458           for (i = 1; i < to_nchars; i++)
7459             *buf++ = XINT (AREF (trans, i));
7460           for (i = 1; i < from_nchars; i++, pos++)
7461             src += MULTIBYTE_LENGTH_NO_CHECK (src);
7462         }
7463     }
7464
7465   coding->consumed = src - coding->source;
7466   coding->consumed_char = pos - coding->src_pos;
7467   coding->charbuf_used = buf - coding->charbuf;
7468   coding->chars_at_source = 0;
7469 }
7470
7471
7472 /* Encode the text at CODING->src_object into CODING->dst_object.
7473    CODING->src_object is a buffer or a string.
7474    CODING->dst_object is a buffer or nil.
7475
7476    If CODING->src_object is a buffer, it must be the current buffer.
7477    In this case, if CODING->src_pos is positive, it is a position of
7478    the source text in the buffer, otherwise. the source text is in the
7479    gap area of the buffer, and coding->src_pos specifies the offset of
7480    the text from GPT (which must be the same as PT).  If this is the
7481    same buffer as CODING->dst_object, CODING->src_pos must be
7482    negative and CODING should not have `pre-write-conversion'.
7483
7484    If CODING->src_object is a string, CODING should not have
7485    `pre-write-conversion'.
7486
7487    If CODING->dst_object is a buffer, the encoded data is inserted at
7488    the current point of that buffer.
7489
7490    If CODING->dst_object is nil, the encoded data is placed at the
7491    memory area specified by CODING->destination.  */
7492
7493 static int
7494 encode_coding (coding)
7495      struct coding_system *coding;
7496 {
7497   Lisp_Object attrs;
7498   Lisp_Object translation_table;
7499   int max_lookup;
7500
7501   attrs = CODING_ID_ATTRS (coding->id);
7502   if (coding->encoder == encode_coding_raw_text)
7503     translation_table = Qnil, max_lookup = 0;
7504   else
7505     translation_table = get_translation_table (attrs, 1, &max_lookup);
7506
7507   if (BUFFERP (coding->dst_object))
7508     {
7509       set_buffer_internal (XBUFFER (coding->dst_object));
7510       coding->dst_multibyte
7511         = ! NILP (current_buffer->enable_multibyte_characters);
7512     }
7513
7514   coding->consumed = coding->consumed_char = 0;
7515   coding->produced = coding->produced_char = 0;
7516   record_conversion_result (coding, CODING_RESULT_SUCCESS);
7517   coding->errors = 0;
7518
7519   ALLOC_CONVERSION_WORK_AREA (coding);
7520
7521   do {
7522     coding_set_source (coding);
7523     consume_chars (coding, translation_table, max_lookup);
7524     coding_set_destination (coding);
7525     (*(coding->encoder)) (coding);
7526   } while (coding->consumed_char < coding->src_chars);
7527
7528   if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
7529     insert_from_gap (coding->produced_char, coding->produced);
7530
7531   return (coding->result);
7532 }
7533
7534
7535 /* Name (or base name) of work buffer for code conversion.  */
7536 static Lisp_Object Vcode_conversion_workbuf_name;
7537
7538 /* A working buffer used by the top level conversion.  Once it is
7539    created, it is never destroyed.  It has the name
7540    Vcode_conversion_workbuf_name.  The other working buffers are
7541    destroyed after the use is finished, and their names are modified
7542    versions of Vcode_conversion_workbuf_name.  */
7543 static Lisp_Object Vcode_conversion_reused_workbuf;
7544
7545 /* 1 iff Vcode_conversion_reused_workbuf is already in use.  */
7546 static int reused_workbuf_in_use;
7547
7548
7549 /* Return a working buffer of code convesion.  MULTIBYTE specifies the
7550    multibyteness of returning buffer.  */
7551
7552 static Lisp_Object
7553 make_conversion_work_buffer (multibyte)
7554      int multibyte;
7555 {
7556   Lisp_Object name, workbuf;
7557   struct buffer *current;
7558
7559   if (reused_workbuf_in_use++)
7560     {
7561       name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
7562       workbuf = Fget_buffer_create (name);
7563     }
7564   else
7565     {
7566       if (NILP (Fbuffer_live_p (Vcode_conversion_reused_workbuf)))
7567         Vcode_conversion_reused_workbuf
7568           = Fget_buffer_create (Vcode_conversion_workbuf_name);
7569       workbuf = Vcode_conversion_reused_workbuf;
7570     }
7571   current = current_buffer;
7572   set_buffer_internal (XBUFFER (workbuf));
7573   /* We can't allow modification hooks to run in the work buffer.  For
7574      instance, directory_files_internal assumes that file decoding
7575      doesn't compile new regexps.  */
7576   Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt);
7577   Ferase_buffer ();
7578   current_buffer->undo_list = Qt;
7579   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
7580   set_buffer_internal (current);
7581   return workbuf;
7582 }
7583
7584
7585 static Lisp_Object
7586 code_conversion_restore (arg)
7587      Lisp_Object arg;
7588 {
7589   Lisp_Object current, workbuf;
7590   struct gcpro gcpro1;
7591
7592   GCPRO1 (arg);
7593   current = XCAR (arg);
7594   workbuf = XCDR (arg);
7595   if (! NILP (workbuf))
7596     {
7597       if (EQ (workbuf, Vcode_conversion_reused_workbuf))
7598         reused_workbuf_in_use = 0;
7599       else if (! NILP (Fbuffer_live_p (workbuf)))
7600         Fkill_buffer (workbuf);
7601     }
7602   set_buffer_internal (XBUFFER (current));
7603   UNGCPRO;
7604   return Qnil;
7605 }
7606
7607 Lisp_Object
7608 code_conversion_save (with_work_buf, multibyte)
7609      int with_work_buf, multibyte;
7610 {
7611   Lisp_Object workbuf = Qnil;
7612
7613   if (with_work_buf)
7614     workbuf = make_conversion_work_buffer (multibyte);
7615   record_unwind_protect (code_conversion_restore,
7616                          Fcons (Fcurrent_buffer (), workbuf));
7617   return workbuf;
7618 }
7619
7620 int
7621 decode_coding_gap (coding, chars, bytes)
7622      struct coding_system *coding;
7623      EMACS_INT chars, bytes;
7624 {
7625   int count = specpdl_ptr - specpdl;
7626   Lisp_Object attrs;
7627
7628   code_conversion_save (0, 0);
7629
7630   coding->src_object = Fcurrent_buffer ();
7631   coding->src_chars = chars;
7632   coding->src_bytes = bytes;
7633   coding->src_pos = -chars;
7634   coding->src_pos_byte = -bytes;
7635   coding->src_multibyte = chars < bytes;
7636   coding->dst_object = coding->src_object;
7637   coding->dst_pos = PT;
7638   coding->dst_pos_byte = PT_BYTE;
7639   coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
7640
7641   if (CODING_REQUIRE_DETECTION (coding))
7642     detect_coding (coding);
7643
7644   coding->mode |= CODING_MODE_LAST_BLOCK;
7645   current_buffer->text->inhibit_shrinking = 1;
7646   decode_coding (coding);
7647   current_buffer->text->inhibit_shrinking = 0;
7648
7649   attrs = CODING_ID_ATTRS (coding->id);
7650   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7651     {
7652       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7653       Lisp_Object val;
7654
7655       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7656       val = call1 (CODING_ATTR_POST_READ (attrs),
7657                    make_number (coding->produced_char));
7658       CHECK_NATNUM (val);
7659       coding->produced_char += Z - prev_Z;
7660       coding->produced += Z_BYTE - prev_Z_BYTE;
7661     }
7662
7663   unbind_to (count, Qnil);
7664   return coding->result;
7665 }
7666
7667 int
7668 encode_coding_gap (coding, chars, bytes)
7669      struct coding_system *coding;
7670      EMACS_INT chars, bytes;
7671 {
7672   int count = specpdl_ptr - specpdl;
7673
7674   code_conversion_save (0, 0);
7675
7676   coding->src_object = Fcurrent_buffer ();
7677   coding->src_chars = chars;
7678   coding->src_bytes = bytes;
7679   coding->src_pos = -chars;
7680   coding->src_pos_byte = -bytes;
7681   coding->src_multibyte = chars < bytes;
7682   coding->dst_object = coding->src_object;
7683   coding->dst_pos = PT;
7684   coding->dst_pos_byte = PT_BYTE;
7685
7686   encode_coding (coding);
7687
7688   unbind_to (count, Qnil);
7689   return coding->result;
7690 }
7691
7692
7693 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
7694    SRC_OBJECT into DST_OBJECT by coding context CODING.
7695
7696    SRC_OBJECT is a buffer, a string, or Qnil.
7697
7698    If it is a buffer, the text is at point of the buffer.  FROM and TO
7699    are positions in the buffer.
7700
7701    If it is a string, the text is at the beginning of the string.
7702    FROM and TO are indices to the string.
7703
7704    If it is nil, the text is at coding->source.  FROM and TO are
7705    indices to coding->source.
7706
7707    DST_OBJECT is a buffer, Qt, or Qnil.
7708
7709    If it is a buffer, the decoded text is inserted at point of the
7710    buffer.  If the buffer is the same as SRC_OBJECT, the source text
7711    is deleted.
7712
7713    If it is Qt, a string is made from the decoded text, and
7714    set in CODING->dst_object.
7715
7716    If it is Qnil, the decoded text is stored at CODING->destination.
7717    The caller must allocate CODING->dst_bytes bytes at
7718    CODING->destination by xmalloc.  If the decoded text is longer than
7719    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
7720  */
7721
7722 void
7723 decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7724                       dst_object)
7725      struct coding_system *coding;
7726      Lisp_Object src_object;
7727      EMACS_INT from, from_byte, to, to_byte;
7728      Lisp_Object dst_object;
7729 {
7730   int count = specpdl_ptr - specpdl;
7731   unsigned char *destination;
7732   EMACS_INT dst_bytes;
7733   EMACS_INT chars = to - from;
7734   EMACS_INT bytes = to_byte - from_byte;
7735   Lisp_Object attrs;
7736   int saved_pt = -1, saved_pt_byte;
7737   int need_marker_adjustment = 0;
7738   Lisp_Object old_deactivate_mark;
7739
7740   old_deactivate_mark = Vdeactivate_mark;
7741
7742   if (NILP (dst_object))
7743     {
7744       destination = coding->destination;
7745       dst_bytes = coding->dst_bytes;
7746     }
7747
7748   coding->src_object = src_object;
7749   coding->src_chars = chars;
7750   coding->src_bytes = bytes;
7751   coding->src_multibyte = chars < bytes;
7752
7753   if (STRINGP (src_object))
7754     {
7755       coding->src_pos = from;
7756       coding->src_pos_byte = from_byte;
7757     }
7758   else if (BUFFERP (src_object))
7759     {
7760       set_buffer_internal (XBUFFER (src_object));
7761       if (from != GPT)
7762         move_gap_both (from, from_byte);
7763       if (EQ (src_object, dst_object))
7764         {
7765           struct Lisp_Marker *tail;
7766
7767           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7768             {
7769               tail->need_adjustment
7770                 = tail->charpos == (tail->insertion_type ? from : to);
7771               need_marker_adjustment |= tail->need_adjustment;
7772             }
7773           saved_pt = PT, saved_pt_byte = PT_BYTE;
7774           TEMP_SET_PT_BOTH (from, from_byte);
7775           current_buffer->text->inhibit_shrinking = 1;
7776           del_range_both (from, from_byte, to, to_byte, 1);
7777           coding->src_pos = -chars;
7778           coding->src_pos_byte = -bytes;
7779         }
7780       else
7781         {
7782           coding->src_pos = from;
7783           coding->src_pos_byte = from_byte;
7784         }
7785     }
7786
7787   if (CODING_REQUIRE_DETECTION (coding))
7788     detect_coding (coding);
7789   attrs = CODING_ID_ATTRS (coding->id);
7790
7791   if (EQ (dst_object, Qt)
7792       || (! NILP (CODING_ATTR_POST_READ (attrs))
7793           && NILP (dst_object)))
7794     {
7795       coding->dst_multibyte = !CODING_FOR_UNIBYTE (coding);
7796       coding->dst_object = code_conversion_save (1, coding->dst_multibyte);
7797       coding->dst_pos = BEG;
7798       coding->dst_pos_byte = BEG_BYTE;
7799     }
7800   else if (BUFFERP (dst_object))
7801     {
7802       code_conversion_save (0, 0);
7803       coding->dst_object = dst_object;
7804       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7805       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7806       coding->dst_multibyte
7807         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
7808     }
7809   else
7810     {
7811       code_conversion_save (0, 0);
7812       coding->dst_object = Qnil;
7813       /* Most callers presume this will return a multibyte result, and they
7814          won't use `binary' or `raw-text' anyway, so let's not worry about
7815          CODING_FOR_UNIBYTE.  */
7816       coding->dst_multibyte = 1;
7817     }
7818
7819   decode_coding (coding);
7820
7821   if (BUFFERP (coding->dst_object))
7822     set_buffer_internal (XBUFFER (coding->dst_object));
7823
7824   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7825     {
7826       struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7827       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7828       Lisp_Object val;
7829
7830       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7831       GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7832               old_deactivate_mark);
7833       val = safe_call1 (CODING_ATTR_POST_READ (attrs),
7834                         make_number (coding->produced_char));
7835       UNGCPRO;
7836       CHECK_NATNUM (val);
7837       coding->produced_char += Z - prev_Z;
7838       coding->produced += Z_BYTE - prev_Z_BYTE;
7839     }
7840
7841   if (EQ (dst_object, Qt))
7842     {
7843       coding->dst_object = Fbuffer_string ();
7844     }
7845   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
7846     {
7847       set_buffer_internal (XBUFFER (coding->dst_object));
7848       if (dst_bytes < coding->produced)
7849         {
7850           destination = xrealloc (destination, coding->produced);
7851           if (! destination)
7852             {
7853               record_conversion_result (coding,
7854                                         CODING_RESULT_INSUFFICIENT_DST);
7855               unbind_to (count, Qnil);
7856               return;
7857             }
7858           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
7859             move_gap_both (BEGV, BEGV_BYTE);
7860           bcopy (BEGV_ADDR, destination, coding->produced);
7861           coding->destination = destination;
7862         }
7863     }
7864
7865   if (saved_pt >= 0)
7866     {
7867       /* This is the case of:
7868          (BUFFERP (src_object) && EQ (src_object, dst_object))
7869          As we have moved PT while replacing the original buffer
7870          contents, we must recover it now.  */
7871       set_buffer_internal (XBUFFER (src_object));
7872       current_buffer->text->inhibit_shrinking = 0;
7873       if (saved_pt < from)
7874         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7875       else if (saved_pt < from + chars)
7876         TEMP_SET_PT_BOTH (from, from_byte);
7877       else if (! NILP (current_buffer->enable_multibyte_characters))
7878         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7879                           saved_pt_byte + (coding->produced - bytes));
7880       else
7881         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7882                           saved_pt_byte + (coding->produced - bytes));
7883
7884       if (need_marker_adjustment)
7885         {
7886           struct Lisp_Marker *tail;
7887
7888           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7889             if (tail->need_adjustment)
7890               {
7891                 tail->need_adjustment = 0;
7892                 if (tail->insertion_type)
7893                   {
7894                     tail->bytepos = from_byte;
7895                     tail->charpos = from;
7896                   }
7897                 else
7898                   {
7899                     tail->bytepos = from_byte + coding->produced;
7900                     tail->charpos
7901                       = (NILP (current_buffer->enable_multibyte_characters)
7902                          ? tail->bytepos : from + coding->produced_char);
7903                   }
7904               }
7905         }
7906     }
7907
7908   Vdeactivate_mark = old_deactivate_mark;
7909   unbind_to (count, coding->dst_object);
7910 }
7911
7912
7913 void
7914 encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7915                       dst_object)
7916      struct coding_system *coding;
7917      Lisp_Object src_object;
7918      EMACS_INT from, from_byte, to, to_byte;
7919      Lisp_Object dst_object;
7920 {
7921   int count = specpdl_ptr - specpdl;
7922   EMACS_INT chars = to - from;
7923   EMACS_INT bytes = to_byte - from_byte;
7924   Lisp_Object attrs;
7925   int saved_pt = -1, saved_pt_byte;
7926   int need_marker_adjustment = 0;
7927   int kill_src_buffer = 0;
7928   Lisp_Object old_deactivate_mark;
7929
7930   old_deactivate_mark = Vdeactivate_mark;
7931
7932   coding->src_object = src_object;
7933   coding->src_chars = chars;
7934   coding->src_bytes = bytes;
7935   coding->src_multibyte = chars < bytes;
7936
7937   attrs = CODING_ID_ATTRS (coding->id);
7938
7939   if (EQ (src_object, dst_object))
7940     {
7941       struct Lisp_Marker *tail;
7942
7943       for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7944         {
7945           tail->need_adjustment
7946             = tail->charpos == (tail->insertion_type ? from : to);
7947           need_marker_adjustment |= tail->need_adjustment;
7948         }
7949     }
7950
7951   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
7952     {
7953       coding->src_object = code_conversion_save (1, coding->src_multibyte);
7954       set_buffer_internal (XBUFFER (coding->src_object));
7955       if (STRINGP (src_object))
7956         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
7957       else if (BUFFERP (src_object))
7958         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
7959       else
7960         insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
7961
7962       if (EQ (src_object, dst_object))
7963         {
7964           set_buffer_internal (XBUFFER (src_object));
7965           saved_pt = PT, saved_pt_byte = PT_BYTE;
7966           del_range_both (from, from_byte, to, to_byte, 1);
7967           set_buffer_internal (XBUFFER (coding->src_object));
7968         }
7969
7970       {
7971         Lisp_Object args[3];
7972         struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7973
7974         GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7975                 old_deactivate_mark);
7976         args[0] = CODING_ATTR_PRE_WRITE (attrs);
7977         args[1] = make_number (BEG);
7978         args[2] = make_number (Z);
7979         safe_call (3, args);
7980         UNGCPRO;
7981       }
7982       if (XBUFFER (coding->src_object) != current_buffer)
7983         kill_src_buffer = 1;
7984       coding->src_object = Fcurrent_buffer ();
7985       if (BEG != GPT)
7986         move_gap_both (BEG, BEG_BYTE);
7987       coding->src_chars = Z - BEG;
7988       coding->src_bytes = Z_BYTE - BEG_BYTE;
7989       coding->src_pos = BEG;
7990       coding->src_pos_byte = BEG_BYTE;
7991       coding->src_multibyte = Z < Z_BYTE;
7992     }
7993   else if (STRINGP (src_object))
7994     {
7995       code_conversion_save (0, 0);
7996       coding->src_pos = from;
7997       coding->src_pos_byte = from_byte;
7998     }
7999   else if (BUFFERP (src_object))
8000     {
8001       code_conversion_save (0, 0);
8002       set_buffer_internal (XBUFFER (src_object));
8003       if (EQ (src_object, dst_object))
8004         {
8005           saved_pt = PT, saved_pt_byte = PT_BYTE;
8006           coding->src_object = del_range_1 (from, to, 1, 1);
8007           coding->src_pos = 0;
8008           coding->src_pos_byte = 0;
8009         }
8010       else
8011         {
8012           if (from < GPT && to >= GPT)
8013             move_gap_both (from, from_byte);
8014           coding->src_pos = from;
8015           coding->src_pos_byte = from_byte;
8016         }
8017     }
8018   else
8019     code_conversion_save (0, 0);
8020
8021   if (BUFFERP (dst_object))
8022     {
8023       coding->dst_object = dst_object;
8024       if (EQ (src_object, dst_object))
8025         {
8026           coding->dst_pos = from;
8027           coding->dst_pos_byte = from_byte;
8028         }
8029       else
8030         {
8031           struct buffer *current = current_buffer;
8032
8033           set_buffer_temp (XBUFFER (dst_object));
8034           coding->dst_pos = PT;
8035           coding->dst_pos_byte = PT_BYTE;
8036           move_gap_both (coding->dst_pos, coding->dst_pos_byte);
8037           set_buffer_temp (current);
8038         }
8039       coding->dst_multibyte
8040         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
8041     }
8042   else if (EQ (dst_object, Qt))
8043     {
8044       coding->dst_object = Qnil;
8045       coding->dst_bytes = coding->src_chars;
8046       if (coding->dst_bytes == 0)
8047         coding->dst_bytes = 1;
8048       coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
8049       coding->dst_multibyte = 0;
8050     }
8051   else
8052     {
8053       coding->dst_object = Qnil;
8054       coding->dst_multibyte = 0;
8055     }
8056
8057   encode_coding (coding);
8058
8059   if (EQ (dst_object, Qt))
8060     {
8061       if (BUFFERP (coding->dst_object))
8062         coding->dst_object = Fbuffer_string ();
8063       else
8064         {
8065           coding->dst_object
8066             = make_unibyte_string ((char *) coding->destination,
8067                                    coding->produced);
8068           xfree (coding->destination);
8069         }
8070     }
8071
8072   if (saved_pt >= 0)
8073     {
8074       /* This is the case of:
8075          (BUFFERP (src_object) && EQ (src_object, dst_object))
8076          As we have moved PT while replacing the original buffer
8077          contents, we must recover it now.  */
8078       set_buffer_internal (XBUFFER (src_object));
8079       if (saved_pt < from)
8080         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
8081       else if (saved_pt < from + chars)
8082         TEMP_SET_PT_BOTH (from, from_byte);
8083       else if (! NILP (current_buffer->enable_multibyte_characters))
8084         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
8085                           saved_pt_byte + (coding->produced - bytes));
8086       else
8087         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
8088                           saved_pt_byte + (coding->produced - bytes));
8089
8090       if (need_marker_adjustment)
8091         {
8092           struct Lisp_Marker *tail;
8093
8094           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
8095             if (tail->need_adjustment)
8096               {
8097                 tail->need_adjustment = 0;
8098                 if (tail->insertion_type)
8099                   {
8100                     tail->bytepos = from_byte;
8101                     tail->charpos = from;
8102                   }
8103                 else
8104                   {
8105                     tail->bytepos = from_byte + coding->produced;
8106                     tail->charpos
8107                       = (NILP (current_buffer->enable_multibyte_characters)
8108                          ? tail->bytepos : from + coding->produced_char);
8109                   }
8110               }
8111         }
8112     }
8113
8114   if (kill_src_buffer)
8115     Fkill_buffer (coding->src_object);
8116
8117   Vdeactivate_mark = old_deactivate_mark;
8118   unbind_to (count, Qnil);
8119 }
8120
8121
8122 Lisp_Object
8123 preferred_coding_system ()
8124 {
8125   int id = coding_categories[coding_priorities[0]].id;
8126
8127   return CODING_ID_NAME (id);
8128 }
8129
8130 \f
8131 #ifdef emacs
8132 /*** 8. Emacs Lisp library functions ***/
8133
8134 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
8135        doc: /* Return t if OBJECT is nil or a coding-system.
8136 See the documentation of `define-coding-system' for information
8137 about coding-system objects.  */)
8138      (object)
8139      Lisp_Object object;
8140 {
8141   if (NILP (object)
8142       || CODING_SYSTEM_ID (object) >= 0)
8143     return Qt;
8144   if (! SYMBOLP (object)
8145       || NILP (Fget (object, Qcoding_system_define_form)))
8146     return Qnil;
8147   return Qt;
8148 }
8149
8150 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
8151        Sread_non_nil_coding_system, 1, 1, 0,
8152        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
8153      (prompt)
8154      Lisp_Object prompt;
8155 {
8156   Lisp_Object val;
8157   do
8158     {
8159       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8160                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
8161     }
8162   while (SCHARS (val) == 0);
8163   return (Fintern (val, Qnil));
8164 }
8165
8166 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
8167        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
8168 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.
8169 Ignores case when completing coding systems (all Emacs coding systems
8170 are lower-case).  */)
8171      (prompt, default_coding_system)
8172      Lisp_Object prompt, default_coding_system;
8173 {
8174   Lisp_Object val;
8175   int count = SPECPDL_INDEX ();
8176
8177   if (SYMBOLP (default_coding_system))
8178     default_coding_system = SYMBOL_NAME (default_coding_system);
8179   specbind (Qcompletion_ignore_case, Qt);
8180   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8181                           Qt, Qnil, Qcoding_system_history,
8182                           default_coding_system, Qnil);
8183   unbind_to (count, Qnil);
8184   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
8185 }
8186
8187 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
8188        1, 1, 0,
8189        doc: /* Check validity of CODING-SYSTEM.
8190 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
8191 It is valid if it is nil or a symbol defined as a coding system by the
8192 function `define-coding-system'.  */)
8193   (coding_system)
8194      Lisp_Object coding_system;
8195 {
8196   Lisp_Object define_form;
8197
8198   define_form = Fget (coding_system, Qcoding_system_define_form);
8199   if (! NILP (define_form))
8200     {
8201       Fput (coding_system, Qcoding_system_define_form, Qnil);
8202       safe_eval (define_form);
8203     }
8204   if (!NILP (Fcoding_system_p (coding_system)))
8205     return coding_system;
8206   xsignal1 (Qcoding_system_error, coding_system);
8207 }
8208
8209 \f
8210 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
8211    HIGHEST is nonzero, return the coding system of the highest
8212    priority among the detected coding systems.  Otherwize return a
8213    list of detected coding systems sorted by their priorities.  If
8214    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
8215    multibyte form but contains only ASCII and eight-bit chars.
8216    Otherwise, the bytes are raw bytes.
8217
8218    CODING-SYSTEM controls the detection as below:
8219
8220    If it is nil, detect both text-format and eol-format.  If the
8221    text-format part of CODING-SYSTEM is already specified
8222    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
8223    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
8224    detect only text-format.  */
8225
8226 Lisp_Object
8227 detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
8228                       coding_system)
8229      const unsigned char *src;
8230      EMACS_INT src_chars, src_bytes;
8231      int highest;
8232      int multibytep;
8233      Lisp_Object coding_system;
8234 {
8235   const unsigned char *src_end = src + src_bytes;
8236   Lisp_Object attrs, eol_type;
8237   Lisp_Object val = Qnil;
8238   struct coding_system coding;
8239   int id;
8240   struct coding_detection_info detect_info;
8241   enum coding_category base_category;
8242   int null_byte_found = 0, eight_bit_found = 0;
8243
8244   if (NILP (coding_system))
8245     coding_system = Qundecided;
8246   setup_coding_system (coding_system, &coding);
8247   attrs = CODING_ID_ATTRS (coding.id);
8248   eol_type = CODING_ID_EOL_TYPE (coding.id);
8249   coding_system = CODING_ATTR_BASE_NAME (attrs);
8250
8251   coding.source = src;
8252   coding.src_chars = src_chars;
8253   coding.src_bytes = src_bytes;
8254   coding.src_multibyte = multibytep;
8255   coding.consumed = 0;
8256   coding.mode |= CODING_MODE_LAST_BLOCK;
8257   coding.head_ascii = 0;
8258
8259   detect_info.checked = detect_info.found = detect_info.rejected = 0;
8260
8261   /* At first, detect text-format if necessary.  */
8262   base_category = XINT (CODING_ATTR_CATEGORY (attrs));
8263   if (base_category == coding_category_undecided)
8264     {
8265       enum coding_category category;
8266       struct coding_system *this;
8267       int c, i;
8268
8269       /* Skip all ASCII bytes except for a few ISO2022 controls.  */
8270       for (; src < src_end; src++)
8271         {
8272           c = *src;
8273           if (c & 0x80)
8274             {
8275               eight_bit_found = 1;
8276               if (null_byte_found)
8277                 break;
8278             }
8279           else if (c < 0x20)
8280             {
8281               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
8282                   && ! inhibit_iso_escape_detection
8283                   && ! detect_info.checked)
8284                 {
8285                   if (detect_coding_iso_2022 (&coding, &detect_info))
8286                     {
8287                       /* We have scanned the whole data.  */
8288                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
8289                         {
8290                           /* We didn't find an 8-bit code.  We may
8291                              have found a null-byte, but it's very
8292                              rare that a binary file confirm to
8293                              ISO-2022.  */
8294                           src = src_end;
8295                           coding.head_ascii = src - coding.source;
8296                         }
8297                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
8298                       break;
8299                     }
8300                 }
8301               else if (! c && !inhibit_null_byte_detection)
8302                 {
8303                   null_byte_found = 1;
8304                   if (eight_bit_found)
8305                     break;
8306                 }
8307               if (! eight_bit_found)
8308                 coding.head_ascii++;
8309             }
8310           else if (! eight_bit_found)
8311             coding.head_ascii++;
8312         }
8313
8314       if (null_byte_found || eight_bit_found
8315           || coding.head_ascii < coding.src_bytes
8316           || detect_info.found)
8317         {
8318           if (coding.head_ascii == coding.src_bytes)
8319             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
8320             for (i = 0; i < coding_category_raw_text; i++)
8321               {
8322                 category = coding_priorities[i];
8323                 this = coding_categories + category;
8324                 if (detect_info.found & (1 << category))
8325                   break;
8326               }
8327           else
8328             {
8329               if (null_byte_found)
8330                 {
8331                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
8332                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
8333                 }
8334               for (i = 0; i < coding_category_raw_text; i++)
8335                 {
8336                   category = coding_priorities[i];
8337                   this = coding_categories + category;
8338
8339                   if (this->id < 0)
8340                     {
8341                       /* No coding system of this category is defined.  */
8342                       detect_info.rejected |= (1 << category);
8343                     }
8344                   else if (category >= coding_category_raw_text)
8345                     continue;
8346                   else if (detect_info.checked & (1 << category))
8347                     {
8348                       if (highest
8349                           && (detect_info.found & (1 << category)))
8350                         break;
8351                     }
8352                   else if ((*(this->detector)) (&coding, &detect_info)
8353                            && highest
8354                            && (detect_info.found & (1 << category)))
8355                     {
8356                       if (category == coding_category_utf_16_auto)
8357                         {
8358                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8359                             category = coding_category_utf_16_le;
8360                           else
8361                             category = coding_category_utf_16_be;
8362                         }
8363                       break;
8364                     }
8365                 }
8366             }
8367         }
8368
8369       if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY
8370           || null_byte_found)
8371         {
8372           detect_info.found = CATEGORY_MASK_RAW_TEXT;
8373           id = CODING_SYSTEM_ID (Qno_conversion);
8374           val = Fcons (make_number (id), Qnil);
8375         }
8376       else if (! detect_info.rejected && ! detect_info.found)
8377         {
8378           detect_info.found = CATEGORY_MASK_ANY;
8379           id = coding_categories[coding_category_undecided].id;
8380           val = Fcons (make_number (id), Qnil);
8381         }
8382       else if (highest)
8383         {
8384           if (detect_info.found)
8385             {
8386               detect_info.found = 1 << category;
8387               val = Fcons (make_number (this->id), Qnil);
8388             }
8389           else
8390             for (i = 0; i < coding_category_raw_text; i++)
8391               if (! (detect_info.rejected & (1 << coding_priorities[i])))
8392                 {
8393                   detect_info.found = 1 << coding_priorities[i];
8394                   id = coding_categories[coding_priorities[i]].id;
8395                   val = Fcons (make_number (id), Qnil);
8396                   break;
8397                 }
8398         }
8399       else
8400         {
8401           int mask = detect_info.rejected | detect_info.found;
8402           int found = 0;
8403
8404           for (i = coding_category_raw_text - 1; i >= 0; i--)
8405             {
8406               category = coding_priorities[i];
8407               if (! (mask & (1 << category)))
8408                 {
8409                   found |= 1 << category;
8410                   id = coding_categories[category].id;
8411                   if (id >= 0)
8412                     val = Fcons (make_number (id), val);
8413                 }
8414             }
8415           for (i = coding_category_raw_text - 1; i >= 0; i--)
8416             {
8417               category = coding_priorities[i];
8418               if (detect_info.found & (1 << category))
8419                 {
8420                   id = coding_categories[category].id;
8421                   val = Fcons (make_number (id), val);
8422                 }
8423             }
8424           detect_info.found |= found;
8425         }
8426     }
8427   else if (base_category == coding_category_utf_8_auto)
8428     {
8429       if (detect_coding_utf_8 (&coding, &detect_info))
8430         {
8431           struct coding_system *this;
8432
8433           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
8434             this = coding_categories + coding_category_utf_8_sig;
8435           else
8436             this = coding_categories + coding_category_utf_8_nosig;
8437           val = Fcons (make_number (this->id), Qnil);
8438         }
8439     }
8440   else if (base_category == coding_category_utf_16_auto)
8441     {
8442       if (detect_coding_utf_16 (&coding, &detect_info))
8443         {
8444           struct coding_system *this;
8445
8446           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8447             this = coding_categories + coding_category_utf_16_le;
8448           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
8449             this = coding_categories + coding_category_utf_16_be;
8450           else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
8451             this = coding_categories + coding_category_utf_16_be_nosig;
8452           else
8453             this = coding_categories + coding_category_utf_16_le_nosig;
8454           val = Fcons (make_number (this->id), Qnil);
8455         }
8456     }
8457   else
8458     {
8459       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
8460       val = Fcons (make_number (coding.id), Qnil);
8461     }
8462
8463   /* Then, detect eol-format if necessary.  */
8464   {
8465     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol = -1;
8466     Lisp_Object tail;
8467
8468     if (VECTORP (eol_type))
8469       {
8470         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
8471           {
8472             if (null_byte_found)
8473               normal_eol = EOL_SEEN_LF;
8474             else
8475               normal_eol = detect_eol (coding.source, src_bytes,
8476                                        coding_category_raw_text);
8477           }
8478         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
8479                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
8480           utf_16_be_eol = detect_eol (coding.source, src_bytes,
8481                                       coding_category_utf_16_be);
8482         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
8483                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
8484           utf_16_le_eol = detect_eol (coding.source, src_bytes,
8485                                       coding_category_utf_16_le);
8486       }
8487     else
8488       {
8489         if (EQ (eol_type, Qunix))
8490           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
8491         else if (EQ (eol_type, Qdos))
8492           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
8493         else
8494           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
8495       }
8496
8497     for (tail = val; CONSP (tail); tail = XCDR (tail))
8498       {
8499         enum coding_category category;
8500         int this_eol;
8501
8502         id = XINT (XCAR (tail));
8503         attrs = CODING_ID_ATTRS (id);
8504         category = XINT (CODING_ATTR_CATEGORY (attrs));
8505         eol_type = CODING_ID_EOL_TYPE (id);
8506         if (VECTORP (eol_type))
8507           {
8508             if (category == coding_category_utf_16_be
8509                 || category == coding_category_utf_16_be_nosig)
8510               this_eol = utf_16_be_eol;
8511             else if (category == coding_category_utf_16_le
8512                      || category == coding_category_utf_16_le_nosig)
8513               this_eol = utf_16_le_eol;
8514             else
8515               this_eol = normal_eol;
8516
8517             if (this_eol == EOL_SEEN_LF)
8518               XSETCAR (tail, AREF (eol_type, 0));
8519             else if (this_eol == EOL_SEEN_CRLF)
8520               XSETCAR (tail, AREF (eol_type, 1));
8521             else if (this_eol == EOL_SEEN_CR)
8522               XSETCAR (tail, AREF (eol_type, 2));
8523             else
8524               XSETCAR (tail, CODING_ID_NAME (id));
8525           }
8526         else
8527           XSETCAR (tail, CODING_ID_NAME (id));
8528       }
8529   }
8530
8531   return (highest ? (CONSP (val) ? XCAR (val) : Qnil) : val);
8532 }
8533
8534
8535 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
8536        2, 3, 0,
8537        doc: /* Detect coding system of the text in the region between START and END.
8538 Return a list of possible coding systems ordered by priority.
8539 The coding systems to try and their priorities follows what
8540 the function `coding-system-priority-list' (which see) returns.
8541
8542 If only ASCII characters are found (except for such ISO-2022 control
8543 characters as ESC), it returns a list of single element `undecided'
8544 or its subsidiary coding system according to a detected end-of-line
8545 format.
8546
8547 If optional argument HIGHEST is non-nil, return the coding system of
8548 highest priority.  */)
8549      (start, end, highest)
8550      Lisp_Object start, end, highest;
8551 {
8552   int from, to;
8553   int from_byte, to_byte;
8554
8555   CHECK_NUMBER_COERCE_MARKER (start);
8556   CHECK_NUMBER_COERCE_MARKER (end);
8557
8558   validate_region (&start, &end);
8559   from = XINT (start), to = XINT (end);
8560   from_byte = CHAR_TO_BYTE (from);
8561   to_byte = CHAR_TO_BYTE (to);
8562
8563   if (from < GPT && to >= GPT)
8564     move_gap_both (to, to_byte);
8565
8566   return detect_coding_system (BYTE_POS_ADDR (from_byte),
8567                                to - from, to_byte - from_byte,
8568                                !NILP (highest),
8569                                !NILP (current_buffer
8570                                       ->enable_multibyte_characters),
8571                                Qnil);
8572 }
8573
8574 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
8575        1, 2, 0,
8576        doc: /* Detect coding system of the text in STRING.
8577 Return a list of possible coding systems ordered by priority.
8578 The coding systems to try and their priorities follows what
8579 the function `coding-system-priority-list' (which see) returns.
8580
8581 If only ASCII characters are found (except for such ISO-2022 control
8582 characters as ESC), it returns a list of single element `undecided'
8583 or its subsidiary coding system according to a detected end-of-line
8584 format.
8585
8586 If optional argument HIGHEST is non-nil, return the coding system of
8587 highest priority.  */)
8588      (string, highest)
8589      Lisp_Object string, highest;
8590 {
8591   CHECK_STRING (string);
8592
8593   return detect_coding_system (SDATA (string),
8594                                SCHARS (string), SBYTES (string),
8595                                !NILP (highest), STRING_MULTIBYTE (string),
8596                                Qnil);
8597 }
8598
8599
8600 static INLINE int
8601 char_encodable_p (c, attrs)
8602      int c;
8603      Lisp_Object attrs;
8604 {
8605   Lisp_Object tail;
8606   struct charset *charset;
8607   Lisp_Object translation_table;
8608
8609   translation_table = CODING_ATTR_TRANS_TBL (attrs);
8610   if (! NILP (translation_table))
8611     c = translate_char (translation_table, c);
8612   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
8613        CONSP (tail); tail = XCDR (tail))
8614     {
8615       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
8616       if (CHAR_CHARSET_P (c, charset))
8617         break;
8618     }
8619   return (! NILP (tail));
8620 }
8621
8622
8623 /* Return a list of coding systems that safely encode the text between
8624    START and END.  If EXCLUDE is non-nil, it is a list of coding
8625    systems not to check.  The returned list doesn't contain any such
8626    coding systems.  In any case, if the text contains only ASCII or is
8627    unibyte, return t.  */
8628
8629 DEFUN ("find-coding-systems-region-internal",
8630        Ffind_coding_systems_region_internal,
8631        Sfind_coding_systems_region_internal, 2, 3, 0,
8632        doc: /* Internal use only.  */)
8633      (start, end, exclude)
8634      Lisp_Object start, end, exclude;
8635 {
8636   Lisp_Object coding_attrs_list, safe_codings;
8637   EMACS_INT start_byte, end_byte;
8638   const unsigned char *p, *pbeg, *pend;
8639   int c;
8640   Lisp_Object tail, elt;
8641
8642   if (STRINGP (start))
8643     {
8644       if (!STRING_MULTIBYTE (start)
8645           || SCHARS (start) == SBYTES (start))
8646         return Qt;
8647       start_byte = 0;
8648       end_byte = SBYTES (start);
8649     }
8650   else
8651     {
8652       CHECK_NUMBER_COERCE_MARKER (start);
8653       CHECK_NUMBER_COERCE_MARKER (end);
8654       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8655         args_out_of_range (start, end);
8656       if (NILP (current_buffer->enable_multibyte_characters))
8657         return Qt;
8658       start_byte = CHAR_TO_BYTE (XINT (start));
8659       end_byte = CHAR_TO_BYTE (XINT (end));
8660       if (XINT (end) - XINT (start) == end_byte - start_byte)
8661         return Qt;
8662
8663       if (XINT (start) < GPT && XINT (end) > GPT)
8664         {
8665           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8666             move_gap_both (XINT (start), start_byte);
8667           else
8668             move_gap_both (XINT (end), end_byte);
8669         }
8670     }
8671
8672   coding_attrs_list = Qnil;
8673   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
8674     if (NILP (exclude)
8675         || NILP (Fmemq (XCAR (tail), exclude)))
8676       {
8677         Lisp_Object attrs;
8678
8679         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
8680         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
8681             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
8682           {
8683             ASET (attrs, coding_attr_trans_tbl,
8684                   get_translation_table (attrs, 1, NULL));
8685             coding_attrs_list = Fcons (attrs, coding_attrs_list);
8686           }
8687       }
8688
8689   if (STRINGP (start))
8690     p = pbeg = SDATA (start);
8691   else
8692     p = pbeg = BYTE_POS_ADDR (start_byte);
8693   pend = p + (end_byte - start_byte);
8694
8695   while (p < pend && ASCII_BYTE_P (*p)) p++;
8696   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8697
8698   while (p < pend)
8699     {
8700       if (ASCII_BYTE_P (*p))
8701         p++;
8702       else
8703         {
8704           c = STRING_CHAR_ADVANCE (p);
8705
8706           charset_map_loaded = 0;
8707           for (tail = coding_attrs_list; CONSP (tail);)
8708             {
8709               elt = XCAR (tail);
8710               if (NILP (elt))
8711                 tail = XCDR (tail);
8712               else if (char_encodable_p (c, elt))
8713                 tail = XCDR (tail);
8714               else if (CONSP (XCDR (tail)))
8715                 {
8716                   XSETCAR (tail, XCAR (XCDR (tail)));
8717                   XSETCDR (tail, XCDR (XCDR (tail)));
8718                 }
8719               else
8720                 {
8721                   XSETCAR (tail, Qnil);
8722                   tail = XCDR (tail);
8723                 }
8724             }
8725           if (charset_map_loaded)
8726             {
8727               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8728
8729               if (STRINGP (start))
8730                 pbeg = SDATA (start);
8731               else
8732                 pbeg = BYTE_POS_ADDR (start_byte);
8733               p = pbeg + p_offset;
8734               pend = pbeg + pend_offset;
8735             }
8736         }
8737     }
8738
8739   safe_codings = list2 (Qraw_text, Qno_conversion);
8740   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
8741     if (! NILP (XCAR (tail)))
8742       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
8743
8744   return safe_codings;
8745 }
8746
8747
8748 DEFUN ("unencodable-char-position", Funencodable_char_position,
8749        Sunencodable_char_position, 3, 5, 0,
8750        doc: /*
8751 Return position of first un-encodable character in a region.
8752 START and END specify the region and CODING-SYSTEM specifies the
8753 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
8754
8755 If optional 4th argument COUNT is non-nil, it specifies at most how
8756 many un-encodable characters to search.  In this case, the value is a
8757 list of positions.
8758
8759 If optional 5th argument STRING is non-nil, it is a string to search
8760 for un-encodable characters.  In that case, START and END are indexes
8761 to the string.  */)
8762      (start, end, coding_system, count, string)
8763      Lisp_Object start, end, coding_system, count, string;
8764 {
8765   int n;
8766   struct coding_system coding;
8767   Lisp_Object attrs, charset_list, translation_table;
8768   Lisp_Object positions;
8769   int from, to;
8770   const unsigned char *p, *stop, *pend;
8771   int ascii_compatible;
8772
8773   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
8774   attrs = CODING_ID_ATTRS (coding.id);
8775   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
8776     return Qnil;
8777   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
8778   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8779   translation_table = get_translation_table (attrs, 1, NULL);
8780
8781   if (NILP (string))
8782     {
8783       validate_region (&start, &end);
8784       from = XINT (start);
8785       to = XINT (end);
8786       if (NILP (current_buffer->enable_multibyte_characters)
8787           || (ascii_compatible
8788               && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
8789         return Qnil;
8790       p = CHAR_POS_ADDR (from);
8791       pend = CHAR_POS_ADDR (to);
8792       if (from < GPT && to >= GPT)
8793         stop = GPT_ADDR;
8794       else
8795         stop = pend;
8796     }
8797   else
8798     {
8799       CHECK_STRING (string);
8800       CHECK_NATNUM (start);
8801       CHECK_NATNUM (end);
8802       from = XINT (start);
8803       to = XINT (end);
8804       if (from > to
8805           || to > SCHARS (string))
8806         args_out_of_range_3 (string, start, end);
8807       if (! STRING_MULTIBYTE (string))
8808         return Qnil;
8809       p = SDATA (string) + string_char_to_byte (string, from);
8810       stop = pend = SDATA (string) + string_char_to_byte (string, to);
8811       if (ascii_compatible && (to - from) == (pend - p))
8812         return Qnil;
8813     }
8814
8815   if (NILP (count))
8816     n = 1;
8817   else
8818     {
8819       CHECK_NATNUM (count);
8820       n = XINT (count);
8821     }
8822
8823   positions = Qnil;
8824   while (1)
8825     {
8826       int c;
8827
8828       if (ascii_compatible)
8829         while (p < stop && ASCII_BYTE_P (*p))
8830           p++, from++;
8831       if (p >= stop)
8832         {
8833           if (p >= pend)
8834             break;
8835           stop = pend;
8836           p = GAP_END_ADDR;
8837         }
8838
8839       c = STRING_CHAR_ADVANCE (p);
8840       if (! (ASCII_CHAR_P (c) && ascii_compatible)
8841           && ! char_charset (translate_char (translation_table, c),
8842                              charset_list, NULL))
8843         {
8844           positions = Fcons (make_number (from), positions);
8845           n--;
8846           if (n == 0)
8847             break;
8848         }
8849
8850       from++;
8851     }
8852
8853   return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
8854 }
8855
8856
8857 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
8858        Scheck_coding_systems_region, 3, 3, 0,
8859        doc: /* Check if the region is encodable by coding systems.
8860
8861 START and END are buffer positions specifying the region.
8862 CODING-SYSTEM-LIST is a list of coding systems to check.
8863
8864 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
8865 CODING-SYSTEM is a member of CODING-SYSTEM-LIST and can't encode the
8866 whole region, POS0, POS1, ... are buffer positions where non-encodable
8867 characters are found.
8868
8869 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
8870 value is nil.
8871
8872 START may be a string.  In that case, check if the string is
8873 encodable, and the value contains indices to the string instead of
8874 buffer positions.  END is ignored.
8875
8876 If the current buffer (or START if it is a string) is unibyte, the value
8877 is nil.  */)
8878      (start, end, coding_system_list)
8879      Lisp_Object start, end, coding_system_list;
8880 {
8881   Lisp_Object list;
8882   EMACS_INT start_byte, end_byte;
8883   int pos;
8884   const unsigned char *p, *pbeg, *pend;
8885   int c;
8886   Lisp_Object tail, elt, attrs;
8887
8888   if (STRINGP (start))
8889     {
8890       if (!STRING_MULTIBYTE (start)
8891           || SCHARS (start) == SBYTES (start))
8892         return Qnil;
8893       start_byte = 0;
8894       end_byte = SBYTES (start);
8895       pos = 0;
8896     }
8897   else
8898     {
8899       CHECK_NUMBER_COERCE_MARKER (start);
8900       CHECK_NUMBER_COERCE_MARKER (end);
8901       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8902         args_out_of_range (start, end);
8903       if (NILP (current_buffer->enable_multibyte_characters))
8904         return Qnil;
8905       start_byte = CHAR_TO_BYTE (XINT (start));
8906       end_byte = CHAR_TO_BYTE (XINT (end));
8907       if (XINT (end) - XINT (start) == end_byte - start_byte)
8908         return Qnil;
8909
8910       if (XINT (start) < GPT && XINT (end) > GPT)
8911         {
8912           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8913             move_gap_both (XINT (start), start_byte);
8914           else
8915             move_gap_both (XINT (end), end_byte);
8916         }
8917       pos = XINT (start);
8918     }
8919
8920   list = Qnil;
8921   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
8922     {
8923       elt = XCAR (tail);
8924       attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
8925       ASET (attrs, coding_attr_trans_tbl,
8926             get_translation_table (attrs, 1, NULL));
8927       list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
8928     }
8929
8930   if (STRINGP (start))
8931     p = pbeg = SDATA (start);
8932   else
8933     p = pbeg = BYTE_POS_ADDR (start_byte);
8934   pend = p + (end_byte - start_byte);
8935
8936   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
8937   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8938
8939   while (p < pend)
8940     {
8941       if (ASCII_BYTE_P (*p))
8942         p++;
8943       else
8944         {
8945           c = STRING_CHAR_ADVANCE (p);
8946
8947           charset_map_loaded = 0;
8948           for (tail = list; CONSP (tail); tail = XCDR (tail))
8949             {
8950               elt = XCDR (XCAR (tail));
8951               if (! char_encodable_p (c, XCAR (elt)))
8952                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
8953             }
8954           if (charset_map_loaded)
8955             {
8956               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8957
8958               if (STRINGP (start))
8959                 pbeg = SDATA (start);
8960               else
8961                 pbeg = BYTE_POS_ADDR (start_byte);
8962               p = pbeg + p_offset;
8963               pend = pbeg + pend_offset;
8964             }
8965         }
8966       pos++;
8967     }
8968
8969   tail = list;
8970   list = Qnil;
8971   for (; CONSP (tail); tail = XCDR (tail))
8972     {
8973       elt = XCAR (tail);
8974       if (CONSP (XCDR (XCDR (elt))))
8975         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
8976                       list);
8977     }
8978
8979   return list;
8980 }
8981
8982
8983 Lisp_Object
8984 code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
8985      Lisp_Object start, end, coding_system, dst_object;
8986      int encodep, norecord;
8987 {
8988   struct coding_system coding;
8989   EMACS_INT from, from_byte, to, to_byte;
8990   Lisp_Object src_object;
8991
8992   CHECK_NUMBER_COERCE_MARKER (start);
8993   CHECK_NUMBER_COERCE_MARKER (end);
8994   if (NILP (coding_system))
8995     coding_system = Qno_conversion;
8996   else
8997     CHECK_CODING_SYSTEM (coding_system);
8998   src_object = Fcurrent_buffer ();
8999   if (NILP (dst_object))
9000     dst_object = src_object;
9001   else if (! EQ (dst_object, Qt))
9002     CHECK_BUFFER (dst_object);
9003
9004   validate_region (&start, &end);
9005   from = XFASTINT (start);
9006   from_byte = CHAR_TO_BYTE (from);
9007   to = XFASTINT (end);
9008   to_byte = CHAR_TO_BYTE (to);
9009
9010   setup_coding_system (coding_system, &coding);
9011   coding.mode |= CODING_MODE_LAST_BLOCK;
9012
9013   if (encodep)
9014     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
9015                           dst_object);
9016   else
9017     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
9018                           dst_object);
9019   if (! norecord)
9020     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
9021
9022   return (BUFFERP (dst_object)
9023           ? make_number (coding.produced_char)
9024           : coding.dst_object);
9025 }
9026
9027
9028 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
9029        3, 4, "r\nzCoding system: ",
9030        doc: /* Decode the current region from the specified coding system.
9031 When called from a program, takes four arguments:
9032         START, END, CODING-SYSTEM, and DESTINATION.
9033 START and END are buffer positions.
9034
9035 Optional 4th arguments DESTINATION specifies where the decoded text goes.
9036 If nil, the region between START and END is replaced by the decoded text.
9037 If buffer, the decoded text is inserted in that buffer after point (point
9038 does not move).
9039 In those cases, the length of the decoded text is returned.
9040 If DESTINATION is t, the decoded text is returned.
9041
9042 This function sets `last-coding-system-used' to the precise coding system
9043 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9044 not fully specified.)  */)
9045      (start, end, coding_system, destination)
9046      Lisp_Object start, end, coding_system, destination;
9047 {
9048   return code_convert_region (start, end, coding_system, destination, 0, 0);
9049 }
9050
9051 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
9052        3, 4, "r\nzCoding system: ",
9053        doc: /* Encode the current region by specified coding system.
9054 When called from a program, takes four arguments:
9055         START, END, CODING-SYSTEM and DESTINATION.
9056 START and END are buffer positions.
9057
9058 Optional 4th arguments DESTINATION specifies where the encoded text goes.
9059 If nil, the region between START and END is replace by the encoded text.
9060 If buffer, the encoded text is inserted in that buffer after point (point
9061 does not move).
9062 In those cases, the length of the encoded text is returned.
9063 If DESTINATION is t, the encoded text is returned.
9064
9065 This function sets `last-coding-system-used' to the precise coding system
9066 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9067 not fully specified.)  */)
9068   (start, end, coding_system, destination)
9069      Lisp_Object start, end, coding_system, destination;
9070 {
9071   return code_convert_region (start, end, coding_system, destination, 1, 0);
9072 }
9073
9074 Lisp_Object
9075 code_convert_string (string, coding_system, dst_object,
9076                      encodep, nocopy, norecord)
9077      Lisp_Object string, coding_system, dst_object;
9078      int encodep, nocopy, norecord;
9079 {
9080   struct coding_system coding;
9081   EMACS_INT chars, bytes;
9082
9083   CHECK_STRING (string);
9084   if (NILP (coding_system))
9085     {
9086       if (! norecord)
9087         Vlast_coding_system_used = Qno_conversion;
9088       if (NILP (dst_object))
9089         return (nocopy ? Fcopy_sequence (string) : string);
9090     }
9091
9092   if (NILP (coding_system))
9093     coding_system = Qno_conversion;
9094   else
9095     CHECK_CODING_SYSTEM (coding_system);
9096   if (NILP (dst_object))
9097     dst_object = Qt;
9098   else if (! EQ (dst_object, Qt))
9099     CHECK_BUFFER (dst_object);
9100
9101   setup_coding_system (coding_system, &coding);
9102   coding.mode |= CODING_MODE_LAST_BLOCK;
9103   chars = SCHARS (string);
9104   bytes = SBYTES (string);
9105   if (encodep)
9106     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9107   else
9108     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9109   if (! norecord)
9110     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
9111
9112   return (BUFFERP (dst_object)
9113           ? make_number (coding.produced_char)
9114           : coding.dst_object);
9115 }
9116
9117
9118 /* Encode or decode STRING according to CODING_SYSTEM.
9119    Do not set Vlast_coding_system_used.
9120
9121    This function is called only from macros DECODE_FILE and
9122    ENCODE_FILE, thus we ignore character composition.  */
9123
9124 Lisp_Object
9125 code_convert_string_norecord (string, coding_system, encodep)
9126      Lisp_Object string, coding_system;
9127      int encodep;
9128 {
9129   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
9130 }
9131
9132
9133 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
9134        2, 4, 0,
9135        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
9136
9137 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
9138 if the decoding operation is trivial.
9139
9140 Optional fourth arg BUFFER non-nil means that the decoded text is
9141 inserted in that buffer after point (point does not move).  In this
9142 case, the return value is the length of the decoded text.
9143
9144 This function sets `last-coding-system-used' to the precise coding system
9145 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9146 not fully specified.)  */)
9147   (string, coding_system, nocopy, buffer)
9148      Lisp_Object string, coding_system, nocopy, buffer;
9149 {
9150   return code_convert_string (string, coding_system, buffer,
9151                               0, ! NILP (nocopy), 0);
9152 }
9153
9154 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
9155        2, 4, 0,
9156        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
9157
9158 Optional third arg NOCOPY non-nil means it is OK to return STRING
9159 itself if the encoding operation is trivial.
9160
9161 Optional fourth arg BUFFER non-nil means that the encoded text is
9162 inserted in that buffer after point (point does not move).  In this
9163 case, the return value is the length of the encoded text.
9164
9165 This function sets `last-coding-system-used' to the precise coding system
9166 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9167 not fully specified.)  */)
9168      (string, coding_system, nocopy, buffer)
9169      Lisp_Object string, coding_system, nocopy, buffer;
9170 {
9171   return code_convert_string (string, coding_system, buffer,
9172                               1, ! NILP (nocopy), 1);
9173 }
9174
9175 \f
9176 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
9177        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
9178 Return the corresponding character.  */)
9179      (code)
9180      Lisp_Object code;
9181 {
9182   Lisp_Object spec, attrs, val;
9183   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
9184   int c;
9185
9186   CHECK_NATNUM (code);
9187   c = XFASTINT (code);
9188   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9189   attrs = AREF (spec, 0);
9190
9191   if (ASCII_BYTE_P (c)
9192       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9193     return code;
9194
9195   val = CODING_ATTR_CHARSET_LIST (attrs);
9196   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9197   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9198   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
9199
9200   if (c <= 0x7F)
9201     charset = charset_roman;
9202   else if (c >= 0xA0 && c < 0xDF)
9203     {
9204       charset = charset_kana;
9205       c -= 0x80;
9206     }
9207   else
9208     {
9209       int s1 = c >> 8, s2 = c & 0xFF;
9210
9211       if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
9212           || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
9213         error ("Invalid code: %d", code);
9214       SJIS_TO_JIS (c);
9215       charset = charset_kanji;
9216     }
9217   c = DECODE_CHAR (charset, c);
9218   if (c < 0)
9219     error ("Invalid code: %d", code);
9220   return make_number (c);
9221 }
9222
9223
9224 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
9225        doc: /* Encode a Japanese character CH to shift_jis encoding.
9226 Return the corresponding code in SJIS.  */)
9227      (ch)
9228     Lisp_Object ch;
9229 {
9230   Lisp_Object spec, attrs, charset_list;
9231   int c;
9232   struct charset *charset;
9233   unsigned code;
9234
9235   CHECK_CHARACTER (ch);
9236   c = XFASTINT (ch);
9237   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9238   attrs = AREF (spec, 0);
9239
9240   if (ASCII_CHAR_P (c)
9241       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9242     return ch;
9243
9244   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9245   charset = char_charset (c, charset_list, &code);
9246   if (code == CHARSET_INVALID_CODE (charset))
9247     error ("Can't encode by shift_jis encoding: %d", c);
9248   JIS_TO_SJIS (code);
9249
9250   return make_number (code);
9251 }
9252
9253 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
9254        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
9255 Return the corresponding character.  */)
9256      (code)
9257      Lisp_Object code;
9258 {
9259   Lisp_Object spec, attrs, val;
9260   struct charset *charset_roman, *charset_big5, *charset;
9261   int c;
9262
9263   CHECK_NATNUM (code);
9264   c = XFASTINT (code);
9265   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9266   attrs = AREF (spec, 0);
9267
9268   if (ASCII_BYTE_P (c)
9269       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9270     return code;
9271
9272   val = CODING_ATTR_CHARSET_LIST (attrs);
9273   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9274   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
9275
9276   if (c <= 0x7F)
9277     charset = charset_roman;
9278   else
9279     {
9280       int b1 = c >> 8, b2 = c & 0x7F;
9281       if (b1 < 0xA1 || b1 > 0xFE
9282           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
9283         error ("Invalid code: %d", code);
9284       charset = charset_big5;
9285     }
9286   c = DECODE_CHAR (charset, (unsigned )c);
9287   if (c < 0)
9288     error ("Invalid code: %d", code);
9289   return make_number (c);
9290 }
9291
9292 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
9293        doc: /* Encode the Big5 character CH to BIG5 coding system.
9294 Return the corresponding character code in Big5.  */)
9295      (ch)
9296      Lisp_Object ch;
9297 {
9298   Lisp_Object spec, attrs, charset_list;
9299   struct charset *charset;
9300   int c;
9301   unsigned code;
9302
9303   CHECK_CHARACTER (ch);
9304   c = XFASTINT (ch);
9305   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9306   attrs = AREF (spec, 0);
9307   if (ASCII_CHAR_P (c)
9308       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9309     return ch;
9310
9311   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9312   charset = char_charset (c, charset_list, &code);
9313   if (code == CHARSET_INVALID_CODE (charset))
9314     error ("Can't encode by Big5 encoding: %d", c);
9315
9316   return make_number (code);
9317 }
9318
9319 \f
9320 DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
9321        Sset_terminal_coding_system_internal, 1, 2, 0,
9322        doc: /* Internal use only.  */)
9323      (coding_system, terminal)
9324      Lisp_Object coding_system;
9325      Lisp_Object terminal;
9326 {
9327   struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
9328   CHECK_SYMBOL (coding_system);
9329   setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
9330   /* We had better not send unsafe characters to terminal.  */
9331   terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
9332   /* Characer composition should be disabled.  */
9333   terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9334   terminal_coding->src_multibyte = 1;
9335   terminal_coding->dst_multibyte = 0;
9336   return Qnil;
9337 }
9338
9339 DEFUN ("set-safe-terminal-coding-system-internal",
9340        Fset_safe_terminal_coding_system_internal,
9341        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
9342        doc: /* Internal use only.  */)
9343      (coding_system)
9344      Lisp_Object coding_system;
9345 {
9346   CHECK_SYMBOL (coding_system);
9347   setup_coding_system (Fcheck_coding_system (coding_system),
9348                        &safe_terminal_coding);
9349   /* Characer composition should be disabled.  */
9350   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9351   safe_terminal_coding.src_multibyte = 1;
9352   safe_terminal_coding.dst_multibyte = 0;
9353   return Qnil;
9354 }
9355
9356 DEFUN ("terminal-coding-system", Fterminal_coding_system,
9357        Sterminal_coding_system, 0, 1, 0,
9358        doc: /* Return coding system specified for terminal output on the given terminal.
9359 TERMINAL may be a terminal object, a frame, or nil for the selected
9360 frame's terminal device.  */)
9361      (terminal)
9362      Lisp_Object terminal;
9363 {
9364   struct coding_system *terminal_coding
9365     = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
9366   Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
9367
9368   /* For backward compatibility, return nil if it is `undecided'. */
9369   return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
9370 }
9371
9372 DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
9373        Sset_keyboard_coding_system_internal, 1, 2, 0,
9374        doc: /* Internal use only.  */)
9375      (coding_system, terminal)
9376      Lisp_Object coding_system;
9377      Lisp_Object terminal;
9378 {
9379   struct terminal *t = get_terminal (terminal, 1);
9380   CHECK_SYMBOL (coding_system);
9381   setup_coding_system (Fcheck_coding_system (coding_system),
9382                        TERMINAL_KEYBOARD_CODING (t));
9383   /* Characer composition should be disabled.  */
9384   TERMINAL_KEYBOARD_CODING (t)->common_flags
9385     &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9386   return Qnil;
9387 }
9388
9389 DEFUN ("keyboard-coding-system",
9390        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 1, 0,
9391        doc: /* Return coding system specified for decoding keyboard input.  */)
9392      (terminal)
9393      Lisp_Object terminal;
9394 {
9395   return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING
9396                          (get_terminal (terminal, 1))->id);
9397 }
9398
9399 \f
9400 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
9401        Sfind_operation_coding_system,  1, MANY, 0,
9402        doc: /* Choose a coding system for an operation based on the target name.
9403 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
9404 DECODING-SYSTEM is the coding system to use for decoding
9405 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
9406 for encoding (in case OPERATION does encoding).
9407
9408 The first argument OPERATION specifies an I/O primitive:
9409   For file I/O, `insert-file-contents' or `write-region'.
9410   For process I/O, `call-process', `call-process-region', or `start-process'.
9411   For network I/O, `open-network-stream'.
9412
9413 The remaining arguments should be the same arguments that were passed
9414 to the primitive.  Depending on which primitive, one of those arguments
9415 is selected as the TARGET.  For example, if OPERATION does file I/O,
9416 whichever argument specifies the file name is TARGET.
9417
9418 TARGET has a meaning which depends on OPERATION:
9419   For file I/O, TARGET is a file name (except for the special case below).
9420   For process I/O, TARGET is a process name.
9421   For network I/O, TARGET is a service name or a port number.
9422
9423 This function looks up what is specified for TARGET in
9424 `file-coding-system-alist', `process-coding-system-alist',
9425 or `network-coding-system-alist' depending on OPERATION.
9426 They may specify a coding system, a cons of coding systems,
9427 or a function symbol to call.
9428 In the last case, we call the function with one argument,
9429 which is a list of all the arguments given to this function.
9430 If the function can't decide a coding system, it can return
9431 `undecided' so that the normal code-detection is performed.
9432
9433 If OPERATION is `insert-file-contents', the argument corresponding to
9434 TARGET may be a cons (FILENAME . BUFFER).  In that case, FILENAME is a
9435 file name to look up, and BUFFER is a buffer that contains the file's
9436 contents (not yet decoded).  If `file-coding-system-alist' specifies a
9437 function to call for FILENAME, that function should examine the
9438 contents of BUFFER instead of reading the file.
9439
9440 usage: (find-operation-coding-system OPERATION ARGUMENTS...)  */)
9441      (nargs, args)
9442      int nargs;
9443      Lisp_Object *args;
9444 {
9445   Lisp_Object operation, target_idx, target, val;
9446   register Lisp_Object chain;
9447
9448   if (nargs < 2)
9449     error ("Too few arguments");
9450   operation = args[0];
9451   if (!SYMBOLP (operation)
9452       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
9453     error ("Invalid first argument");
9454   if (nargs < 1 + XINT (target_idx))
9455     error ("Too few arguments for operation: %s",
9456            SDATA (SYMBOL_NAME (operation)));
9457   target = args[XINT (target_idx) + 1];
9458   if (!(STRINGP (target)
9459         || (EQ (operation, Qinsert_file_contents) && CONSP (target)
9460             && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
9461         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
9462     error ("Invalid %dth argument", XINT (target_idx) + 1);
9463   if (CONSP (target))
9464     target = XCAR (target);
9465
9466   chain = ((EQ (operation, Qinsert_file_contents)
9467             || EQ (operation, Qwrite_region))
9468            ? Vfile_coding_system_alist
9469            : (EQ (operation, Qopen_network_stream)
9470               ? Vnetwork_coding_system_alist
9471               : Vprocess_coding_system_alist));
9472   if (NILP (chain))
9473     return Qnil;
9474
9475   for (; CONSP (chain); chain = XCDR (chain))
9476     {
9477       Lisp_Object elt;
9478
9479       elt = XCAR (chain);
9480       if (CONSP (elt)
9481           && ((STRINGP (target)
9482                && STRINGP (XCAR (elt))
9483                && fast_string_match (XCAR (elt), target) >= 0)
9484               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
9485         {
9486           val = XCDR (elt);
9487           /* Here, if VAL is both a valid coding system and a valid
9488              function symbol, we return VAL as a coding system.  */
9489           if (CONSP (val))
9490             return val;
9491           if (! SYMBOLP (val))
9492             return Qnil;
9493           if (! NILP (Fcoding_system_p (val)))
9494             return Fcons (val, val);
9495           if (! NILP (Ffboundp (val)))
9496             {
9497               /* We use call1 rather than safe_call1
9498                  so as to get bug reports about functions called here
9499                  which don't handle the current interface.  */
9500               val = call1 (val, Flist (nargs, args));
9501               if (CONSP (val))
9502                 return val;
9503               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
9504                 return Fcons (val, val);
9505             }
9506           return Qnil;
9507         }
9508     }
9509   return Qnil;
9510 }
9511
9512 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
9513        Sset_coding_system_priority, 0, MANY, 0,
9514        doc: /* Assign higher priority to the coding systems given as arguments.
9515 If multiple coding systems belong to the same category,
9516 all but the first one are ignored.
9517
9518 usage: (set-coding-system-priority &rest coding-systems)  */)
9519      (nargs, args)
9520      int nargs;
9521      Lisp_Object *args;
9522 {
9523   int i, j;
9524   int changed[coding_category_max];
9525   enum coding_category priorities[coding_category_max];
9526
9527   bzero (changed, sizeof changed);
9528
9529   for (i = j = 0; i < nargs; i++)
9530     {
9531       enum coding_category category;
9532       Lisp_Object spec, attrs;
9533
9534       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
9535       attrs = AREF (spec, 0);
9536       category = XINT (CODING_ATTR_CATEGORY (attrs));
9537       if (changed[category])
9538         /* Ignore this coding system because a coding system of the
9539            same category already had a higher priority.  */
9540         continue;
9541       changed[category] = 1;
9542       priorities[j++] = category;
9543       if (coding_categories[category].id >= 0
9544           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
9545         setup_coding_system (args[i], &coding_categories[category]);
9546       Fset (AREF (Vcoding_category_table, category), args[i]);
9547     }
9548
9549   /* Now we have decided top J priorities.  Reflect the order of the
9550      original priorities to the remaining priorities.  */
9551
9552   for (i = j, j = 0; i < coding_category_max; i++, j++)
9553     {
9554       while (j < coding_category_max
9555              && changed[coding_priorities[j]])
9556         j++;
9557       if (j == coding_category_max)
9558         abort ();
9559       priorities[i] = coding_priorities[j];
9560     }
9561
9562   bcopy (priorities, coding_priorities, sizeof priorities);
9563
9564   /* Update `coding-category-list'.  */
9565   Vcoding_category_list = Qnil;
9566   for (i = coding_category_max - 1; i >= 0; i--)
9567     Vcoding_category_list
9568       = Fcons (AREF (Vcoding_category_table, priorities[i]),
9569                Vcoding_category_list);
9570
9571   return Qnil;
9572 }
9573
9574 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
9575        Scoding_system_priority_list, 0, 1, 0,
9576        doc: /* Return a list of coding systems ordered by their priorities.
9577 The list contains a subset of coding systems; i.e. coding systems
9578 assigned to each coding category (see `coding-category-list').
9579
9580 HIGHESTP non-nil means just return the highest priority one.  */)
9581      (highestp)
9582      Lisp_Object highestp;
9583 {
9584   int i;
9585   Lisp_Object val;
9586
9587   for (i = 0, val = Qnil; i < coding_category_max; i++)
9588     {
9589       enum coding_category category = coding_priorities[i];
9590       int id = coding_categories[category].id;
9591       Lisp_Object attrs;
9592
9593       if (id < 0)
9594         continue;
9595       attrs = CODING_ID_ATTRS (id);
9596       if (! NILP (highestp))
9597         return CODING_ATTR_BASE_NAME (attrs);
9598       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
9599     }
9600   return Fnreverse (val);
9601 }
9602
9603 static char *suffixes[] = { "-unix", "-dos", "-mac" };
9604
9605 static Lisp_Object
9606 make_subsidiaries (base)
9607      Lisp_Object base;
9608 {
9609   Lisp_Object subsidiaries;
9610   int base_name_len = SBYTES (SYMBOL_NAME (base));
9611   char *buf = (char *) alloca (base_name_len + 6);
9612   int i;
9613
9614   bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
9615   subsidiaries = Fmake_vector (make_number (3), Qnil);
9616   for (i = 0; i < 3; i++)
9617     {
9618       bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
9619       ASET (subsidiaries, i, intern (buf));
9620     }
9621   return subsidiaries;
9622 }
9623
9624
9625 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
9626        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
9627        doc: /* For internal use only.
9628 usage: (define-coding-system-internal ...)  */)
9629      (nargs, args)
9630      int nargs;
9631      Lisp_Object *args;
9632 {
9633   Lisp_Object name;
9634   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
9635   Lisp_Object attrs;            /* Vector of attributes.  */
9636   Lisp_Object eol_type;
9637   Lisp_Object aliases;
9638   Lisp_Object coding_type, charset_list, safe_charsets;
9639   enum coding_category category;
9640   Lisp_Object tail, val;
9641   int max_charset_id = 0;
9642   int i;
9643
9644   if (nargs < coding_arg_max)
9645     goto short_args;
9646
9647   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
9648
9649   name = args[coding_arg_name];
9650   CHECK_SYMBOL (name);
9651   CODING_ATTR_BASE_NAME (attrs) = name;
9652
9653   val = args[coding_arg_mnemonic];
9654   if (! STRINGP (val))
9655     CHECK_CHARACTER (val);
9656   CODING_ATTR_MNEMONIC (attrs) = val;
9657
9658   coding_type = args[coding_arg_coding_type];
9659   CHECK_SYMBOL (coding_type);
9660   CODING_ATTR_TYPE (attrs) = coding_type;
9661
9662   charset_list = args[coding_arg_charset_list];
9663   if (SYMBOLP (charset_list))
9664     {
9665       if (EQ (charset_list, Qiso_2022))
9666         {
9667           if (! EQ (coding_type, Qiso_2022))
9668             error ("Invalid charset-list");
9669           charset_list = Viso_2022_charset_list;
9670         }
9671       else if (EQ (charset_list, Qemacs_mule))
9672         {
9673           if (! EQ (coding_type, Qemacs_mule))
9674             error ("Invalid charset-list");
9675           charset_list = Vemacs_mule_charset_list;
9676         }
9677       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9678         if (max_charset_id < XFASTINT (XCAR (tail)))
9679           max_charset_id = XFASTINT (XCAR (tail));
9680     }
9681   else
9682     {
9683       charset_list = Fcopy_sequence (charset_list);
9684       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9685         {
9686           struct charset *charset;
9687
9688           val = XCAR (tail);
9689           CHECK_CHARSET_GET_CHARSET (val, charset);
9690           if (EQ (coding_type, Qiso_2022)
9691               ? CHARSET_ISO_FINAL (charset) < 0
9692               : EQ (coding_type, Qemacs_mule)
9693               ? CHARSET_EMACS_MULE_ID (charset) < 0
9694               : 0)
9695             error ("Can't handle charset `%s'",
9696                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9697
9698           XSETCAR (tail, make_number (charset->id));
9699           if (max_charset_id < charset->id)
9700             max_charset_id = charset->id;
9701         }
9702     }
9703   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
9704
9705   safe_charsets = make_uninit_string (max_charset_id + 1);
9706   memset (SDATA (safe_charsets), 255, max_charset_id + 1);
9707   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9708     SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
9709   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
9710
9711   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
9712
9713   val = args[coding_arg_decode_translation_table];
9714   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9715     CHECK_SYMBOL (val);
9716   CODING_ATTR_DECODE_TBL (attrs) = val;
9717
9718   val = args[coding_arg_encode_translation_table];
9719   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9720     CHECK_SYMBOL (val);
9721   CODING_ATTR_ENCODE_TBL (attrs) = val;
9722
9723   val = args[coding_arg_post_read_conversion];
9724   CHECK_SYMBOL (val);
9725   CODING_ATTR_POST_READ (attrs) = val;
9726
9727   val = args[coding_arg_pre_write_conversion];
9728   CHECK_SYMBOL (val);
9729   CODING_ATTR_PRE_WRITE (attrs) = val;
9730
9731   val = args[coding_arg_default_char];
9732   if (NILP (val))
9733     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
9734   else
9735     {
9736       CHECK_CHARACTER (val);
9737       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9738     }
9739
9740   val = args[coding_arg_for_unibyte];
9741   CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
9742
9743   val = args[coding_arg_plist];
9744   CHECK_LIST (val);
9745   CODING_ATTR_PLIST (attrs) = val;
9746
9747   if (EQ (coding_type, Qcharset))
9748     {
9749       /* Generate a lisp vector of 256 elements.  Each element is nil,
9750          integer, or a list of charset IDs.
9751
9752          If Nth element is nil, the byte code N is invalid in this
9753          coding system.
9754
9755          If Nth element is a number NUM, N is the first byte of a
9756          charset whose ID is NUM.
9757
9758          If Nth element is a list of charset IDs, N is the first byte
9759          of one of them.  The list is sorted by dimensions of the
9760          charsets.  A charset of smaller dimension comes firtst. */
9761       val = Fmake_vector (make_number (256), Qnil);
9762
9763       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9764         {
9765           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
9766           int dim = CHARSET_DIMENSION (charset);
9767           int idx = (dim - 1) * 4;
9768
9769           if (CHARSET_ASCII_COMPATIBLE_P (charset))
9770             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9771
9772           for (i = charset->code_space[idx];
9773                i <= charset->code_space[idx + 1]; i++)
9774             {
9775               Lisp_Object tmp, tmp2;
9776               int dim2;
9777
9778               tmp = AREF (val, i);
9779               if (NILP (tmp))
9780                 tmp = XCAR (tail);
9781               else if (NUMBERP (tmp))
9782                 {
9783                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
9784                   if (dim < dim2)
9785                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
9786                   else
9787                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
9788                 }
9789               else
9790                 {
9791                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
9792                     {
9793                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
9794                       if (dim < dim2)
9795                         break;
9796                     }
9797                   if (NILP (tmp2))
9798                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
9799                   else
9800                     {
9801                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
9802                       XSETCAR (tmp2, XCAR (tail));
9803                     }
9804                 }
9805               ASET (val, i, tmp);
9806             }
9807         }
9808       ASET (attrs, coding_attr_charset_valids, val);
9809       category = coding_category_charset;
9810     }
9811   else if (EQ (coding_type, Qccl))
9812     {
9813       Lisp_Object valids;
9814
9815       if (nargs < coding_arg_ccl_max)
9816         goto short_args;
9817
9818       val = args[coding_arg_ccl_decoder];
9819       CHECK_CCL_PROGRAM (val);
9820       if (VECTORP (val))
9821         val = Fcopy_sequence (val);
9822       ASET (attrs, coding_attr_ccl_decoder, val);
9823
9824       val = args[coding_arg_ccl_encoder];
9825       CHECK_CCL_PROGRAM (val);
9826       if (VECTORP (val))
9827         val = Fcopy_sequence (val);
9828       ASET (attrs, coding_attr_ccl_encoder, val);
9829
9830       val = args[coding_arg_ccl_valids];
9831       valids = Fmake_string (make_number (256), make_number (0));
9832       for (tail = val; !NILP (tail); tail = Fcdr (tail))
9833         {
9834           int from, to;
9835
9836           val = Fcar (tail);
9837           if (INTEGERP (val))
9838             {
9839               from = to = XINT (val);
9840               if (from < 0 || from > 255)
9841                 args_out_of_range_3 (val, make_number (0), make_number (255));
9842             }
9843           else
9844             {
9845               CHECK_CONS (val);
9846               CHECK_NATNUM_CAR (val);
9847               CHECK_NATNUM_CDR (val);
9848               from = XINT (XCAR (val));
9849               if (from > 255)
9850                 args_out_of_range_3 (XCAR (val),
9851                                      make_number (0), make_number (255));
9852               to = XINT (XCDR (val));
9853               if (to < from || to > 255)
9854                 args_out_of_range_3 (XCDR (val),
9855                                      XCAR (val), make_number (255));
9856             }
9857           for (i = from; i <= to; i++)
9858             SSET (valids, i, 1);
9859         }
9860       ASET (attrs, coding_attr_ccl_valids, valids);
9861
9862       category = coding_category_ccl;
9863     }
9864   else if (EQ (coding_type, Qutf_16))
9865     {
9866       Lisp_Object bom, endian;
9867
9868       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9869
9870       if (nargs < coding_arg_utf16_max)
9871         goto short_args;
9872
9873       bom = args[coding_arg_utf16_bom];
9874       if (! NILP (bom) && ! EQ (bom, Qt))
9875         {
9876           CHECK_CONS (bom);
9877           val = XCAR (bom);
9878           CHECK_CODING_SYSTEM (val);
9879           val = XCDR (bom);
9880           CHECK_CODING_SYSTEM (val);
9881         }
9882       ASET (attrs, coding_attr_utf_bom, bom);
9883
9884       endian = args[coding_arg_utf16_endian];
9885       CHECK_SYMBOL (endian);
9886       if (NILP (endian))
9887         endian = Qbig;
9888       else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
9889         error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
9890       ASET (attrs, coding_attr_utf_16_endian, endian);
9891
9892       category = (CONSP (bom)
9893                   ? coding_category_utf_16_auto
9894                   : NILP (bom)
9895                   ? (EQ (endian, Qbig)
9896                      ? coding_category_utf_16_be_nosig
9897                      : coding_category_utf_16_le_nosig)
9898                   : (EQ (endian, Qbig)
9899                      ? coding_category_utf_16_be
9900                      : coding_category_utf_16_le));
9901     }
9902   else if (EQ (coding_type, Qiso_2022))
9903     {
9904       Lisp_Object initial, reg_usage, request, flags;
9905       int i;
9906
9907       if (nargs < coding_arg_iso2022_max)
9908         goto short_args;
9909
9910       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
9911       CHECK_VECTOR (initial);
9912       for (i = 0; i < 4; i++)
9913         {
9914           val = Faref (initial, make_number (i));
9915           if (! NILP (val))
9916             {
9917               struct charset *charset;
9918
9919               CHECK_CHARSET_GET_CHARSET (val, charset);
9920               ASET (initial, i, make_number (CHARSET_ID (charset)));
9921               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
9922                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9923             }
9924           else
9925             ASET (initial, i, make_number (-1));
9926         }
9927
9928       reg_usage = args[coding_arg_iso2022_reg_usage];
9929       CHECK_CONS (reg_usage);
9930       CHECK_NUMBER_CAR (reg_usage);
9931       CHECK_NUMBER_CDR (reg_usage);
9932
9933       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
9934       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
9935         {
9936           int id;
9937           Lisp_Object tmp;
9938
9939           val = Fcar (tail);
9940           CHECK_CONS (val);
9941           tmp = XCAR (val);
9942           CHECK_CHARSET_GET_ID (tmp, id);
9943           CHECK_NATNUM_CDR (val);
9944           if (XINT (XCDR (val)) >= 4)
9945             error ("Invalid graphic register number: %d", XINT (XCDR (val)));
9946           XSETCAR (val, make_number (id));
9947         }
9948
9949       flags = args[coding_arg_iso2022_flags];
9950       CHECK_NATNUM (flags);
9951       i = XINT (flags);
9952       if (EQ (args[coding_arg_charset_list], Qiso_2022))
9953         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
9954
9955       ASET (attrs, coding_attr_iso_initial, initial);
9956       ASET (attrs, coding_attr_iso_usage, reg_usage);
9957       ASET (attrs, coding_attr_iso_request, request);
9958       ASET (attrs, coding_attr_iso_flags, flags);
9959       setup_iso_safe_charsets (attrs);
9960
9961       if (i & CODING_ISO_FLAG_SEVEN_BITS)
9962         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
9963                           | CODING_ISO_FLAG_SINGLE_SHIFT))
9964                     ? coding_category_iso_7_else
9965                     : EQ (args[coding_arg_charset_list], Qiso_2022)
9966                     ? coding_category_iso_7
9967                     : coding_category_iso_7_tight);
9968       else
9969         {
9970           int id = XINT (AREF (initial, 1));
9971
9972           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
9973                        || EQ (args[coding_arg_charset_list], Qiso_2022)
9974                        || id < 0)
9975                       ? coding_category_iso_8_else
9976                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
9977                       ? coding_category_iso_8_1
9978                       : coding_category_iso_8_2);
9979         }
9980       if (category != coding_category_iso_8_1
9981           && category != coding_category_iso_8_2)
9982         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9983     }
9984   else if (EQ (coding_type, Qemacs_mule))
9985     {
9986       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
9987         ASET (attrs, coding_attr_emacs_mule_full, Qt);
9988       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9989       category = coding_category_emacs_mule;
9990     }
9991   else if (EQ (coding_type, Qshift_jis))
9992     {
9993
9994       struct charset *charset;
9995
9996       if (XINT (Flength (charset_list)) != 3
9997           && XINT (Flength (charset_list)) != 4)
9998         error ("There should be three or four charsets");
9999
10000       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10001       if (CHARSET_DIMENSION (charset) != 1)
10002         error ("Dimension of charset %s is not one",
10003                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10004       if (CHARSET_ASCII_COMPATIBLE_P (charset))
10005         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
10006
10007       charset_list = XCDR (charset_list);
10008       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10009       if (CHARSET_DIMENSION (charset) != 1)
10010         error ("Dimension of charset %s is not one",
10011                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10012
10013       charset_list = XCDR (charset_list);
10014       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10015       if (CHARSET_DIMENSION (charset) != 2)
10016         error ("Dimension of charset %s is not two",
10017                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10018
10019       charset_list = XCDR (charset_list);
10020       if (! NILP (charset_list))
10021         {
10022           charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10023           if (CHARSET_DIMENSION (charset) != 2)
10024             error ("Dimension of charset %s is not two",
10025                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10026         }
10027
10028       category = coding_category_sjis;
10029       Vsjis_coding_system = name;
10030     }
10031   else if (EQ (coding_type, Qbig5))
10032     {
10033       struct charset *charset;
10034
10035       if (XINT (Flength (charset_list)) != 2)
10036         error ("There should be just two charsets");
10037
10038       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10039       if (CHARSET_DIMENSION (charset) != 1)
10040         error ("Dimension of charset %s is not one",
10041                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10042       if (CHARSET_ASCII_COMPATIBLE_P (charset))
10043         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
10044
10045       charset_list = XCDR (charset_list);
10046       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10047       if (CHARSET_DIMENSION (charset) != 2)
10048         error ("Dimension of charset %s is not two",
10049                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10050
10051       category = coding_category_big5;
10052       Vbig5_coding_system = name;
10053     }
10054   else if (EQ (coding_type, Qraw_text))
10055     {
10056       category = coding_category_raw_text;
10057       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
10058     }
10059   else if (EQ (coding_type, Qutf_8))
10060     {
10061       Lisp_Object bom;
10062
10063       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
10064
10065       if (nargs < coding_arg_utf8_max)
10066         goto short_args;
10067
10068       bom = args[coding_arg_utf8_bom];
10069       if (! NILP (bom) && ! EQ (bom, Qt))
10070         {
10071           CHECK_CONS (bom);
10072           val = XCAR (bom);
10073           CHECK_CODING_SYSTEM (val);
10074           val = XCDR (bom);
10075           CHECK_CODING_SYSTEM (val);
10076         }
10077       ASET (attrs, coding_attr_utf_bom, bom);
10078
10079       category = (CONSP (bom) ? coding_category_utf_8_auto
10080                   : NILP (bom) ? coding_category_utf_8_nosig
10081                   : coding_category_utf_8_sig);
10082     }
10083   else if (EQ (coding_type, Qundecided))
10084     category = coding_category_undecided;
10085   else
10086     error ("Invalid coding system type: %s",
10087            SDATA (SYMBOL_NAME (coding_type)));
10088
10089   CODING_ATTR_CATEGORY (attrs) = make_number (category);
10090   CODING_ATTR_PLIST (attrs)
10091     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
10092                                 CODING_ATTR_PLIST (attrs)));
10093   CODING_ATTR_PLIST (attrs)
10094     = Fcons (QCascii_compatible_p,
10095              Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
10096                     CODING_ATTR_PLIST (attrs)));
10097
10098   eol_type = args[coding_arg_eol_type];
10099   if (! NILP (eol_type)
10100       && ! EQ (eol_type, Qunix)
10101       && ! EQ (eol_type, Qdos)
10102       && ! EQ (eol_type, Qmac))
10103     error ("Invalid eol-type");
10104
10105   aliases = Fcons (name, Qnil);
10106
10107   if (NILP (eol_type))
10108     {
10109       eol_type = make_subsidiaries (name);
10110       for (i = 0; i < 3; i++)
10111         {
10112           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
10113
10114           this_name = AREF (eol_type, i);
10115           this_aliases = Fcons (this_name, Qnil);
10116           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
10117           this_spec = Fmake_vector (make_number (3), attrs);
10118           ASET (this_spec, 1, this_aliases);
10119           ASET (this_spec, 2, this_eol_type);
10120           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
10121           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
10122           val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist);
10123           if (NILP (val))
10124             Vcoding_system_alist
10125               = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
10126                        Vcoding_system_alist);
10127         }
10128     }
10129
10130   spec_vec = Fmake_vector (make_number (3), attrs);
10131   ASET (spec_vec, 1, aliases);
10132   ASET (spec_vec, 2, eol_type);
10133
10134   Fputhash (name, spec_vec, Vcoding_system_hash_table);
10135   Vcoding_system_list = Fcons (name, Vcoding_system_list);
10136   val = Fassoc (Fsymbol_name (name), Vcoding_system_alist);
10137   if (NILP (val))
10138     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
10139                                   Vcoding_system_alist);
10140
10141   {
10142     int id = coding_categories[category].id;
10143
10144     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
10145       setup_coding_system (name, &coding_categories[category]);
10146   }
10147
10148   return Qnil;
10149
10150  short_args:
10151   return Fsignal (Qwrong_number_of_arguments,
10152                   Fcons (intern ("define-coding-system-internal"),
10153                          make_number (nargs)));
10154 }
10155
10156
10157 DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
10158        3, 3, 0,
10159        doc: /* Change value in CODING-SYSTEM's property list PROP to VAL.  */)
10160   (coding_system, prop, val)
10161      Lisp_Object coding_system, prop, val;
10162 {
10163   Lisp_Object spec, attrs;
10164
10165   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10166   attrs = AREF (spec, 0);
10167   if (EQ (prop, QCmnemonic))
10168     {
10169       if (! STRINGP (val))
10170         CHECK_CHARACTER (val);
10171       CODING_ATTR_MNEMONIC (attrs) = val;
10172     }
10173   else if (EQ (prop, QCdefault_char))
10174     {
10175       if (NILP (val))
10176         val = make_number (' ');
10177       else
10178         CHECK_CHARACTER (val);
10179       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
10180     }
10181   else if (EQ (prop, QCdecode_translation_table))
10182     {
10183       if (! CHAR_TABLE_P (val) && ! CONSP (val))
10184         CHECK_SYMBOL (val);
10185       CODING_ATTR_DECODE_TBL (attrs) = val;
10186     }
10187   else if (EQ (prop, QCencode_translation_table))
10188     {
10189       if (! CHAR_TABLE_P (val) && ! CONSP (val))
10190         CHECK_SYMBOL (val);
10191       CODING_ATTR_ENCODE_TBL (attrs) = val;
10192     }
10193   else if (EQ (prop, QCpost_read_conversion))
10194     {
10195       CHECK_SYMBOL (val);
10196       CODING_ATTR_POST_READ (attrs) = val;
10197     }
10198   else if (EQ (prop, QCpre_write_conversion))
10199     {
10200       CHECK_SYMBOL (val);
10201       CODING_ATTR_PRE_WRITE (attrs) = val;
10202     }
10203   else if (EQ (prop, QCascii_compatible_p))
10204     {
10205       CODING_ATTR_ASCII_COMPAT (attrs) = val;
10206     }
10207
10208   CODING_ATTR_PLIST (attrs)
10209     = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
10210   return val;
10211 }
10212
10213
10214 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
10215        Sdefine_coding_system_alias, 2, 2, 0,
10216        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
10217      (alias, coding_system)
10218      Lisp_Object alias, coding_system;
10219 {
10220   Lisp_Object spec, aliases, eol_type, val;
10221
10222   CHECK_SYMBOL (alias);
10223   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10224   aliases = AREF (spec, 1);
10225   /* ALIASES should be a list of length more than zero, and the first
10226      element is a base coding system.  Append ALIAS at the tail of the
10227      list.  */
10228   while (!NILP (XCDR (aliases)))
10229     aliases = XCDR (aliases);
10230   XSETCDR (aliases, Fcons (alias, Qnil));
10231
10232   eol_type = AREF (spec, 2);
10233   if (VECTORP (eol_type))
10234     {
10235       Lisp_Object subsidiaries;
10236       int i;
10237
10238       subsidiaries = make_subsidiaries (alias);
10239       for (i = 0; i < 3; i++)
10240         Fdefine_coding_system_alias (AREF (subsidiaries, i),
10241                                      AREF (eol_type, i));
10242     }
10243
10244   Fputhash (alias, spec, Vcoding_system_hash_table);
10245   Vcoding_system_list = Fcons (alias, Vcoding_system_list);
10246   val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist);
10247   if (NILP (val))
10248     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
10249                                   Vcoding_system_alist);
10250
10251   return Qnil;
10252 }
10253
10254 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
10255        1, 1, 0,
10256        doc: /* Return the base of CODING-SYSTEM.
10257 Any alias or subsidiary coding system is not a base coding system.  */)
10258   (coding_system)
10259      Lisp_Object coding_system;
10260 {
10261   Lisp_Object spec, attrs;
10262
10263   if (NILP (coding_system))
10264     return (Qno_conversion);
10265   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10266   attrs = AREF (spec, 0);
10267   return CODING_ATTR_BASE_NAME (attrs);
10268 }
10269
10270 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
10271        1, 1, 0,
10272        doc: "Return the property list of CODING-SYSTEM.")
10273      (coding_system)
10274      Lisp_Object coding_system;
10275 {
10276   Lisp_Object spec, attrs;
10277
10278   if (NILP (coding_system))
10279     coding_system = Qno_conversion;
10280   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10281   attrs = AREF (spec, 0);
10282   return CODING_ATTR_PLIST (attrs);
10283 }
10284
10285
10286 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
10287        1, 1, 0,
10288        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
10289      (coding_system)
10290      Lisp_Object coding_system;
10291 {
10292   Lisp_Object spec;
10293
10294   if (NILP (coding_system))
10295     coding_system = Qno_conversion;
10296   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10297   return AREF (spec, 1);
10298 }
10299
10300 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
10301        Scoding_system_eol_type, 1, 1, 0,
10302        doc: /* Return eol-type of CODING-SYSTEM.
10303 An eol-type is an integer 0, 1, 2, or a vector of coding systems.
10304
10305 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
10306 and CR respectively.
10307
10308 A vector value indicates that a format of end-of-line should be
10309 detected automatically.  Nth element of the vector is the subsidiary
10310 coding system whose eol-type is N.  */)
10311      (coding_system)
10312      Lisp_Object coding_system;
10313 {
10314   Lisp_Object spec, eol_type;
10315   int n;
10316
10317   if (NILP (coding_system))
10318     coding_system = Qno_conversion;
10319   if (! CODING_SYSTEM_P (coding_system))
10320     return Qnil;
10321   spec = CODING_SYSTEM_SPEC (coding_system);
10322   eol_type = AREF (spec, 2);
10323   if (VECTORP (eol_type))
10324     return Fcopy_sequence (eol_type);
10325   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
10326   return make_number (n);
10327 }
10328
10329 #endif /* emacs */
10330
10331 \f
10332 /*** 9. Post-amble ***/
10333
10334 void
10335 init_coding_once ()
10336 {
10337   int i;
10338
10339   for (i = 0; i < coding_category_max; i++)
10340     {
10341       coding_categories[i].id = -1;
10342       coding_priorities[i] = i;
10343     }
10344
10345   /* ISO2022 specific initialize routine.  */
10346   for (i = 0; i < 0x20; i++)
10347     iso_code_class[i] = ISO_control_0;
10348   for (i = 0x21; i < 0x7F; i++)
10349     iso_code_class[i] = ISO_graphic_plane_0;
10350   for (i = 0x80; i < 0xA0; i++)
10351     iso_code_class[i] = ISO_control_1;
10352   for (i = 0xA1; i < 0xFF; i++)
10353     iso_code_class[i] = ISO_graphic_plane_1;
10354   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
10355   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
10356   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
10357   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
10358   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
10359   iso_code_class[ISO_CODE_ESC] = ISO_escape;
10360   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
10361   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
10362   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
10363
10364   for (i = 0; i < 256; i++)
10365     {
10366       emacs_mule_bytes[i] = 1;
10367     }
10368   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
10369   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
10370   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
10371   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
10372 }
10373
10374 #ifdef emacs
10375
10376 void
10377 syms_of_coding ()
10378 {
10379   staticpro (&Vcoding_system_hash_table);
10380   {
10381     Lisp_Object args[2];
10382     args[0] = QCtest;
10383     args[1] = Qeq;
10384     Vcoding_system_hash_table = Fmake_hash_table (2, args);
10385   }
10386
10387   staticpro (&Vsjis_coding_system);
10388   Vsjis_coding_system = Qnil;
10389
10390   staticpro (&Vbig5_coding_system);
10391   Vbig5_coding_system = Qnil;
10392
10393   staticpro (&Vcode_conversion_reused_workbuf);
10394   Vcode_conversion_reused_workbuf = Qnil;
10395
10396   staticpro (&Vcode_conversion_workbuf_name);
10397   Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
10398
10399   reused_workbuf_in_use = 0;
10400
10401   DEFSYM (Qcharset, "charset");
10402   DEFSYM (Qtarget_idx, "target-idx");
10403   DEFSYM (Qcoding_system_history, "coding-system-history");
10404   Fset (Qcoding_system_history, Qnil);
10405
10406   /* Target FILENAME is the first argument.  */
10407   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
10408   /* Target FILENAME is the third argument.  */
10409   Fput (Qwrite_region, Qtarget_idx, make_number (2));
10410
10411   DEFSYM (Qcall_process, "call-process");
10412   /* Target PROGRAM is the first argument.  */
10413   Fput (Qcall_process, Qtarget_idx, make_number (0));
10414
10415   DEFSYM (Qcall_process_region, "call-process-region");
10416   /* Target PROGRAM is the third argument.  */
10417   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
10418
10419   DEFSYM (Qstart_process, "start-process");
10420   /* Target PROGRAM is the third argument.  */
10421   Fput (Qstart_process, Qtarget_idx, make_number (2));
10422
10423   DEFSYM (Qopen_network_stream, "open-network-stream");
10424   /* Target SERVICE is the fourth argument.  */
10425   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
10426
10427   DEFSYM (Qcoding_system, "coding-system");
10428   DEFSYM (Qcoding_aliases, "coding-aliases");
10429
10430   DEFSYM (Qeol_type, "eol-type");
10431   DEFSYM (Qunix, "unix");
10432   DEFSYM (Qdos, "dos");
10433
10434   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
10435   DEFSYM (Qpost_read_conversion, "post-read-conversion");
10436   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
10437   DEFSYM (Qdefault_char, "default-char");
10438   DEFSYM (Qundecided, "undecided");
10439   DEFSYM (Qno_conversion, "no-conversion");
10440   DEFSYM (Qraw_text, "raw-text");
10441
10442   DEFSYM (Qiso_2022, "iso-2022");
10443
10444   DEFSYM (Qutf_8, "utf-8");
10445   DEFSYM (Qutf_8_emacs, "utf-8-emacs");
10446
10447   DEFSYM (Qutf_16, "utf-16");
10448   DEFSYM (Qbig, "big");
10449   DEFSYM (Qlittle, "little");
10450
10451   DEFSYM (Qshift_jis, "shift-jis");
10452   DEFSYM (Qbig5, "big5");
10453
10454   DEFSYM (Qcoding_system_p, "coding-system-p");
10455
10456   DEFSYM (Qcoding_system_error, "coding-system-error");
10457   Fput (Qcoding_system_error, Qerror_conditions,
10458         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
10459   Fput (Qcoding_system_error, Qerror_message,
10460         build_string ("Invalid coding system"));
10461
10462   /* Intern this now in case it isn't already done.
10463      Setting this variable twice is harmless.
10464      But don't staticpro it here--that is done in alloc.c.  */
10465   Qchar_table_extra_slots = intern ("char-table-extra-slots");
10466
10467   DEFSYM (Qtranslation_table, "translation-table");
10468   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
10469   DEFSYM (Qtranslation_table_id, "translation-table-id");
10470   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
10471   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
10472
10473   DEFSYM (Qvalid_codes, "valid-codes");
10474
10475   DEFSYM (Qemacs_mule, "emacs-mule");
10476
10477   DEFSYM (QCcategory, ":category");
10478   DEFSYM (QCmnemonic, ":mnemonic");
10479   DEFSYM (QCdefault_char, ":default-char");
10480   DEFSYM (QCdecode_translation_table, ":decode-translation-table");
10481   DEFSYM (QCencode_translation_table, ":encode-translation-table");
10482   DEFSYM (QCpost_read_conversion, ":post-read-conversion");
10483   DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
10484   DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
10485
10486   Vcoding_category_table
10487     = Fmake_vector (make_number (coding_category_max), Qnil);
10488   staticpro (&Vcoding_category_table);
10489   /* Followings are target of code detection.  */
10490   ASET (Vcoding_category_table, coding_category_iso_7,
10491         intern ("coding-category-iso-7"));
10492   ASET (Vcoding_category_table, coding_category_iso_7_tight,
10493         intern ("coding-category-iso-7-tight"));
10494   ASET (Vcoding_category_table, coding_category_iso_8_1,
10495         intern ("coding-category-iso-8-1"));
10496   ASET (Vcoding_category_table, coding_category_iso_8_2,
10497         intern ("coding-category-iso-8-2"));
10498   ASET (Vcoding_category_table, coding_category_iso_7_else,
10499         intern ("coding-category-iso-7-else"));
10500   ASET (Vcoding_category_table, coding_category_iso_8_else,
10501         intern ("coding-category-iso-8-else"));
10502   ASET (Vcoding_category_table, coding_category_utf_8_auto,
10503         intern ("coding-category-utf-8-auto"));
10504   ASET (Vcoding_category_table, coding_category_utf_8_nosig,
10505         intern ("coding-category-utf-8"));
10506   ASET (Vcoding_category_table, coding_category_utf_8_sig,
10507         intern ("coding-category-utf-8-sig"));
10508   ASET (Vcoding_category_table, coding_category_utf_16_be,
10509         intern ("coding-category-utf-16-be"));
10510   ASET (Vcoding_category_table, coding_category_utf_16_auto,
10511         intern ("coding-category-utf-16-auto"));
10512   ASET (Vcoding_category_table, coding_category_utf_16_le,
10513         intern ("coding-category-utf-16-le"));
10514   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
10515         intern ("coding-category-utf-16-be-nosig"));
10516   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
10517         intern ("coding-category-utf-16-le-nosig"));
10518   ASET (Vcoding_category_table, coding_category_charset,
10519         intern ("coding-category-charset"));
10520   ASET (Vcoding_category_table, coding_category_sjis,
10521         intern ("coding-category-sjis"));
10522   ASET (Vcoding_category_table, coding_category_big5,
10523         intern ("coding-category-big5"));
10524   ASET (Vcoding_category_table, coding_category_ccl,
10525         intern ("coding-category-ccl"));
10526   ASET (Vcoding_category_table, coding_category_emacs_mule,
10527         intern ("coding-category-emacs-mule"));
10528   /* Followings are NOT target of code detection.  */
10529   ASET (Vcoding_category_table, coding_category_raw_text,
10530         intern ("coding-category-raw-text"));
10531   ASET (Vcoding_category_table, coding_category_undecided,
10532         intern ("coding-category-undecided"));
10533
10534   DEFSYM (Qinsufficient_source, "insufficient-source");
10535   DEFSYM (Qinconsistent_eol, "inconsistent-eol");
10536   DEFSYM (Qinvalid_source, "invalid-source");
10537   DEFSYM (Qinterrupted, "interrupted");
10538   DEFSYM (Qinsufficient_memory, "insufficient-memory");
10539   DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
10540
10541   defsubr (&Scoding_system_p);
10542   defsubr (&Sread_coding_system);
10543   defsubr (&Sread_non_nil_coding_system);
10544   defsubr (&Scheck_coding_system);
10545   defsubr (&Sdetect_coding_region);
10546   defsubr (&Sdetect_coding_string);
10547   defsubr (&Sfind_coding_systems_region_internal);
10548   defsubr (&Sunencodable_char_position);
10549   defsubr (&Scheck_coding_systems_region);
10550   defsubr (&Sdecode_coding_region);
10551   defsubr (&Sencode_coding_region);
10552   defsubr (&Sdecode_coding_string);
10553   defsubr (&Sencode_coding_string);
10554   defsubr (&Sdecode_sjis_char);
10555   defsubr (&Sencode_sjis_char);
10556   defsubr (&Sdecode_big5_char);
10557   defsubr (&Sencode_big5_char);
10558   defsubr (&Sset_terminal_coding_system_internal);
10559   defsubr (&Sset_safe_terminal_coding_system_internal);
10560   defsubr (&Sterminal_coding_system);
10561   defsubr (&Sset_keyboard_coding_system_internal);
10562   defsubr (&Skeyboard_coding_system);
10563   defsubr (&Sfind_operation_coding_system);
10564   defsubr (&Sset_coding_system_priority);
10565   defsubr (&Sdefine_coding_system_internal);
10566   defsubr (&Sdefine_coding_system_alias);
10567   defsubr (&Scoding_system_put);
10568   defsubr (&Scoding_system_base);
10569   defsubr (&Scoding_system_plist);
10570   defsubr (&Scoding_system_aliases);
10571   defsubr (&Scoding_system_eol_type);
10572   defsubr (&Scoding_system_priority_list);
10573
10574   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
10575                doc: /* List of coding systems.
10576
10577 Do not alter the value of this variable manually.  This variable should be
10578 updated by the functions `define-coding-system' and
10579 `define-coding-system-alias'.  */);
10580   Vcoding_system_list = Qnil;
10581
10582   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
10583                doc: /* Alist of coding system names.
10584 Each element is one element list of coding system name.
10585 This variable is given to `completing-read' as COLLECTION argument.
10586
10587 Do not alter the value of this variable manually.  This variable should be
10588 updated by the functions `make-coding-system' and
10589 `define-coding-system-alias'.  */);
10590   Vcoding_system_alist = Qnil;
10591
10592   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
10593                doc: /* List of coding-categories (symbols) ordered by priority.
10594
10595 On detecting a coding system, Emacs tries code detection algorithms
10596 associated with each coding-category one by one in this order.  When
10597 one algorithm agrees with a byte sequence of source text, the coding
10598 system bound to the corresponding coding-category is selected.
10599
10600 Don't modify this variable directly, but use `set-coding-priority'.  */);
10601   {
10602     int i;
10603
10604     Vcoding_category_list = Qnil;
10605     for (i = coding_category_max - 1; i >= 0; i--)
10606       Vcoding_category_list
10607         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
10608                  Vcoding_category_list);
10609   }
10610
10611   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
10612                doc: /* Specify the coding system for read operations.
10613 It is useful to bind this variable with `let', but do not set it globally.
10614 If the value is a coding system, it is used for decoding on read operation.
10615 If not, an appropriate element is used from one of the coding system alists.
10616 There are three such tables: `file-coding-system-alist',
10617 `process-coding-system-alist', and `network-coding-system-alist'.  */);
10618   Vcoding_system_for_read = Qnil;
10619
10620   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
10621                doc: /* Specify the coding system for write operations.
10622 Programs bind this variable with `let', but you should not set it globally.
10623 If the value is a coding system, it is used for encoding of output,
10624 when writing it to a file and when sending it to a file or subprocess.
10625
10626 If this does not specify a coding system, an appropriate element
10627 is used from one of the coding system alists.
10628 There are three such tables: `file-coding-system-alist',
10629 `process-coding-system-alist', and `network-coding-system-alist'.
10630 For output to files, if the above procedure does not specify a coding system,
10631 the value of `buffer-file-coding-system' is used.  */);
10632   Vcoding_system_for_write = Qnil;
10633
10634   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
10635                doc: /*
10636 Coding system used in the latest file or process I/O.  */);
10637   Vlast_coding_system_used = Qnil;
10638
10639   DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
10640                doc: /*
10641 Error status of the last code conversion.
10642
10643 When an error was detected in the last code conversion, this variable
10644 is set to one of the following symbols.
10645   `insufficient-source'
10646   `inconsistent-eol'
10647   `invalid-source'
10648   `interrupted'
10649   `insufficient-memory'
10650 When no error was detected, the value doesn't change.  So, to check
10651 the error status of a code conversion by this variable, you must
10652 explicitly set this variable to nil before performing code
10653 conversion.  */);
10654   Vlast_code_conversion_error = Qnil;
10655
10656   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
10657                doc: /*
10658 *Non-nil means always inhibit code conversion of end-of-line format.
10659 See info node `Coding Systems' and info node `Text and Binary' concerning
10660 such conversion.  */);
10661   inhibit_eol_conversion = 0;
10662
10663   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
10664                doc: /*
10665 Non-nil means process buffer inherits coding system of process output.
10666 Bind it to t if the process output is to be treated as if it were a file
10667 read from some filesystem.  */);
10668   inherit_process_coding_system = 0;
10669
10670   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
10671                doc: /*
10672 Alist to decide a coding system to use for a file I/O operation.
10673 The format is ((PATTERN . VAL) ...),
10674 where PATTERN is a regular expression matching a file name,
10675 VAL is a coding system, a cons of coding systems, or a function symbol.
10676 If VAL is a coding system, it is used for both decoding and encoding
10677 the file contents.
10678 If VAL is a cons of coding systems, the car part is used for decoding,
10679 and the cdr part is used for encoding.
10680 If VAL is a function symbol, the function must return a coding system
10681 or a cons of coding systems which are used as above.  The function is
10682 called with an argument that is a list of the arguments with which
10683 `find-operation-coding-system' was called.  If the function can't decide
10684 a coding system, it can return `undecided' so that the normal
10685 code-detection is performed.
10686
10687 See also the function `find-operation-coding-system'
10688 and the variable `auto-coding-alist'.  */);
10689   Vfile_coding_system_alist = Qnil;
10690
10691   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
10692                doc: /*
10693 Alist to decide a coding system to use for a process I/O operation.
10694 The format is ((PATTERN . VAL) ...),
10695 where PATTERN is a regular expression matching a program name,
10696 VAL is a coding system, a cons of coding systems, or a function symbol.
10697 If VAL is a coding system, it is used for both decoding what received
10698 from the program and encoding what sent to the program.
10699 If VAL is a cons of coding systems, the car part is used for decoding,
10700 and the cdr part is used for encoding.
10701 If VAL is a function symbol, the function must return a coding system
10702 or a cons of coding systems which are used as above.
10703
10704 See also the function `find-operation-coding-system'.  */);
10705   Vprocess_coding_system_alist = Qnil;
10706
10707   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
10708                doc: /*
10709 Alist to decide a coding system to use for a network I/O operation.
10710 The format is ((PATTERN . VAL) ...),
10711 where PATTERN is a regular expression matching a network service name
10712 or is a port number to connect to,
10713 VAL is a coding system, a cons of coding systems, or a function symbol.
10714 If VAL is a coding system, it is used for both decoding what received
10715 from the network stream and encoding what sent to the network stream.
10716 If VAL is a cons of coding systems, the car part is used for decoding,
10717 and the cdr part is used for encoding.
10718 If VAL is a function symbol, the function must return a coding system
10719 or a cons of coding systems which are used as above.
10720
10721 See also the function `find-operation-coding-system'.  */);
10722   Vnetwork_coding_system_alist = Qnil;
10723
10724   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
10725                doc: /* Coding system to use with system messages.
10726 Also used for decoding keyboard input on X Window system.  */);
10727   Vlocale_coding_system = Qnil;
10728
10729   /* The eol mnemonics are reset in startup.el system-dependently.  */
10730   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
10731                doc: /*
10732 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
10733   eol_mnemonic_unix = build_string (":");
10734
10735   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
10736                doc: /*
10737 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
10738   eol_mnemonic_dos = build_string ("\\");
10739
10740   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
10741                doc: /*
10742 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
10743   eol_mnemonic_mac = build_string ("/");
10744
10745   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
10746                doc: /*
10747 *String displayed in mode line when end-of-line format is not yet determined.  */);
10748   eol_mnemonic_undecided = build_string (":");
10749
10750   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
10751                doc: /*
10752 *Non-nil enables character translation while encoding and decoding.  */);
10753   Venable_character_translation = Qt;
10754
10755   DEFVAR_LISP ("standard-translation-table-for-decode",
10756                &Vstandard_translation_table_for_decode,
10757                doc: /* Table for translating characters while decoding.  */);
10758   Vstandard_translation_table_for_decode = Qnil;
10759
10760   DEFVAR_LISP ("standard-translation-table-for-encode",
10761                &Vstandard_translation_table_for_encode,
10762                doc: /* Table for translating characters while encoding.  */);
10763   Vstandard_translation_table_for_encode = Qnil;
10764
10765   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
10766                doc: /* Alist of charsets vs revision numbers.
10767 While encoding, if a charset (car part of an element) is found,
10768 designate it with the escape sequence identifying revision (cdr part
10769 of the element).  */);
10770   Vcharset_revision_table = Qnil;
10771
10772   DEFVAR_LISP ("default-process-coding-system",
10773                &Vdefault_process_coding_system,
10774                doc: /* Cons of coding systems used for process I/O by default.
10775 The car part is used for decoding a process output,
10776 the cdr part is used for encoding a text to be sent to a process.  */);
10777   Vdefault_process_coding_system = Qnil;
10778
10779   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
10780                doc: /*
10781 Table of extra Latin codes in the range 128..159 (inclusive).
10782 This is a vector of length 256.
10783 If Nth element is non-nil, the existence of code N in a file
10784 \(or output of subprocess) doesn't prevent it to be detected as
10785 a coding system of ISO 2022 variant which has a flag
10786 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
10787 or reading output of a subprocess.
10788 Only 128th through 159th elements have a meaning.  */);
10789   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
10790
10791   DEFVAR_LISP ("select-safe-coding-system-function",
10792                &Vselect_safe_coding_system_function,
10793                doc: /*
10794 Function to call to select safe coding system for encoding a text.
10795
10796 If set, this function is called to force a user to select a proper
10797 coding system which can encode the text in the case that a default
10798 coding system used in each operation can't encode the text.  The
10799 function should take care that the buffer is not modified while
10800 the coding system is being selected.
10801
10802 The default value is `select-safe-coding-system' (which see).  */);
10803   Vselect_safe_coding_system_function = Qnil;
10804
10805   DEFVAR_BOOL ("coding-system-require-warning",
10806                &coding_system_require_warning,
10807                doc: /* Internal use only.
10808 If non-nil, on writing a file, `select-safe-coding-system-function' is
10809 called even if `coding-system-for-write' is non-nil.  The command
10810 `universal-coding-system-argument' binds this variable to t temporarily.  */);
10811   coding_system_require_warning = 0;
10812
10813
10814   DEFVAR_BOOL ("inhibit-iso-escape-detection",
10815                &inhibit_iso_escape_detection,
10816                doc: /*
10817 If non-nil, Emacs ignores ISO-2022 escape sequences during code detection.
10818
10819 When Emacs reads text, it tries to detect how the text is encoded.
10820 This code detection is sensitive to escape sequences.  If Emacs sees
10821 a valid ISO-2022 escape sequence, it assumes the text is encoded in one
10822 of the ISO2022 encodings, and decodes text by the corresponding coding
10823 system (e.g. `iso-2022-7bit').
10824
10825 However, there may be a case that you want to read escape sequences in
10826 a file as is.  In such a case, you can set this variable to non-nil.
10827 Then the code detection will ignore any escape sequences, and no text is
10828 detected as encoded in some ISO-2022 encoding.  The result is that all
10829 escape sequences become visible in a buffer.
10830
10831 The default value is nil, and it is strongly recommended not to change
10832 it.  That is because many Emacs Lisp source files that contain
10833 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
10834 in Emacs's distribution, and they won't be decoded correctly on
10835 reading if you suppress escape sequence detection.
10836
10837 The other way to read escape sequences in a file without decoding is
10838 to explicitly specify some coding system that doesn't use ISO-2022
10839 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
10840   inhibit_iso_escape_detection = 0;
10841
10842   DEFVAR_BOOL ("inhibit-null-byte-detection",
10843                &inhibit_null_byte_detection,
10844                doc: /* If non-nil, Emacs ignores null bytes on code detection.
10845 By default, Emacs treats it as binary data, and does not attempt to
10846 decode it.  The effect is as if you specified `no-conversion' for
10847 reading that text.
10848
10849 Set this to non-nil when a regular text happens to include null bytes.
10850 Examples are Index nodes of Info files and null-byte delimited output
10851 from GNU Find and GNU Grep.  Emacs will then ignore the null bytes and
10852 decode text as usual.  */);
10853   inhibit_null_byte_detection = 0;
10854
10855   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
10856                doc: /* Char table for translating self-inserting characters.
10857 This is applied to the result of input methods, not their input.
10858 See also `keyboard-translate-table'.
10859
10860 Use of this variable for character code unification was rendered
10861 obsolete in Emacs 23.1 and later, since Unicode is now the basis of
10862 internal character representation.  */);
10863     Vtranslation_table_for_input = Qnil;
10864
10865   {
10866     Lisp_Object args[coding_arg_max];
10867     Lisp_Object plist[16];
10868     int i;
10869
10870     for (i = 0; i < coding_arg_max; i++)
10871       args[i] = Qnil;
10872
10873     plist[0] = intern (":name");
10874     plist[1] = args[coding_arg_name] = Qno_conversion;
10875     plist[2] = intern (":mnemonic");
10876     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
10877     plist[4] = intern (":coding-type");
10878     plist[5] = args[coding_arg_coding_type] = Qraw_text;
10879     plist[6] = intern (":ascii-compatible-p");
10880     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
10881     plist[8] = intern (":default-char");
10882     plist[9] = args[coding_arg_default_char] = make_number (0);
10883     plist[10] = intern (":for-unibyte");
10884     plist[11] = args[coding_arg_for_unibyte] = Qt;
10885     plist[12] = intern (":docstring");
10886     plist[13] = build_string ("Do no conversion.\n\
10887 \n\
10888 When you visit a file with this coding, the file is read into a\n\
10889 unibyte buffer as is, thus each byte of a file is treated as a\n\
10890 character.");
10891     plist[14] = intern (":eol-type");
10892     plist[15] = args[coding_arg_eol_type] = Qunix;
10893     args[coding_arg_plist] = Flist (16, plist);
10894     Fdefine_coding_system_internal (coding_arg_max, args);
10895
10896     plist[1] = args[coding_arg_name] = Qundecided;
10897     plist[3] = args[coding_arg_mnemonic] = make_number ('-');
10898     plist[5] = args[coding_arg_coding_type] = Qundecided;
10899     /* This is already set.
10900        plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
10901     plist[8] = intern (":charset-list");
10902     plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
10903     plist[11] = args[coding_arg_for_unibyte] = Qnil;
10904     plist[13] = build_string ("No conversion on encoding, automatic conversion on decoding.");
10905     plist[15] = args[coding_arg_eol_type] = Qnil;
10906     args[coding_arg_plist] = Flist (16, plist);
10907     Fdefine_coding_system_internal (coding_arg_max, args);
10908   }
10909
10910   setup_coding_system (Qno_conversion, &safe_terminal_coding);
10911
10912   {
10913     int i;
10914
10915     for (i = 0; i < coding_category_max; i++)
10916       Fset (AREF (Vcoding_category_table, i), Qno_conversion);
10917   }
10918 #if defined (MSDOS) || defined (WINDOWSNT)
10919   system_eol_type = Qdos;
10920 #else
10921   system_eol_type = Qunix;
10922 #endif
10923   staticpro (&system_eol_type);
10924 }
10925
10926 char *
10927 emacs_strerror (error_number)
10928      int error_number;
10929 {
10930   char *str;
10931
10932   synchronize_system_messages_locale ();
10933   str = strerror (error_number);
10934
10935   if (! NILP (Vlocale_coding_system))
10936     {
10937       Lisp_Object dec = code_convert_string_norecord (build_string (str),
10938                                                       Vlocale_coding_system,
10939                                                       0);
10940       str = (char *) SDATA (dec);
10941     }
10942
10943   return str;
10944 }
10945
10946 #endif /* emacs */
10947
10948 /* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
10949    (do not change this comment) */