src/coding.c

   1 /* Coding system handler (conversion, detection, and etc).
   2    Copyright (C) 1995, 1997, 1998, 2002 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4    Copyright (C) 2001,2002  Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 2, or (at your option)
  11 any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs; see the file COPYING.  If not, write to
  20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  21 Boston, MA 02111-1307, USA.  */
  22
  23 /*** TABLE OF CONTENTS ***
  24
  25   0. General comments
  26   1. Preamble
  27   2. Emacs' internal format (emacs-mule) handlers
  28   3. ISO2022 handlers
  29   4. Shift-JIS and BIG5 handlers
  30   5. CCL handlers
  31   6. End-of-line handlers
  32   7. C library functions
  33   8. Emacs Lisp library functions
  34   9. Post-amble
  35
  36 */
  37
  38 /*** 0. General comments ***/
  39
  40
  41 /*** GENERAL NOTE on CODING SYSTEMS ***
  42
  43   A coding system is an encoding mechanism for one or more character
  44   sets.  Here's a list of coding systems which Emacs can handle.  When
  45   we say "decode", it means converting some other coding system to
  46   Emacs' internal format (emacs-mule), and when we say "encode",
  47   it means converting the coding system emacs-mule to some other
  48   coding system.
  49
  50   0. Emacs' internal format (emacs-mule)
  51
  52   Emacs itself holds a multi-lingual character in buffers and strings
  53   in a special format.  Details are described in section 2.
  54
  55   1. ISO2022
  56
  57   The most famous coding system for multiple character sets.  X's
  58   Compound Text, various EUCs (Extended Unix Code), and coding
  59   systems used in Internet communication such as ISO-2022-JP are
  60   all variants of ISO2022.  Details are described in section 3.
  61
  62   2. SJIS (or Shift-JIS or MS-Kanji-Code)
  63
  64   A coding system to encode character sets: ASCII, JISX0201, and
  65   JISX0208.  Widely used for PC's in Japan.  Details are described in
  66   section 4.
  67
  68   3. BIG5
  69
  70   A coding system to encode the character sets ASCII and Big5.  Widely
  71   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  72   described in section 4.  In this file, when we write "BIG5"
  73   (all uppercase), we mean the coding system, and when we write
  74   "Big5" (capitalized), we mean the character set.
  75
  76   4. Raw text
  77
  78   A coding system for text containing random 8-bit code.  Emacs does
  79   no code conversion on such text except for end-of-line format.
  80
  81   5. Other
  82
  83   If a user wants to read/write text encoded in a coding system not
  84   listed above, he can supply a decoder and an encoder for it as CCL
  85   (Code Conversion Language) programs.  Emacs executes the CCL program
  86   while reading/writing.
  87
  88   Emacs represents a coding system by a Lisp symbol that has a property
  89   `coding-system'.  But, before actually using the coding system, the
  90   information about it is set in a structure of type `struct
  91   coding_system' for rapid processing.  See section 6 for more details.
  92
  93 */
  94
  95 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
  96
  97   How end-of-line of text is encoded depends on the operating system.
  98   For instance, Unix's format is just one byte of `line-feed' code,
  99   whereas DOS's format is two-byte sequence of `carriage-return' and
 100   `line-feed' codes.  MacOS's format is usually one byte of
 101   `carriage-return'.
 102
 103   Since text character encoding and end-of-line encoding are
 104   independent, any coding system described above can have any
 105   end-of-line format.  So Emacs has information about end-of-line
 106   format in each coding-system.  See section 6 for more details.
 107
 108 */
 109
 110 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 111
 112   These functions check if a text between SRC and SRC_END is encoded
 113   in the coding system category XXX.  Each returns an integer value in
 114   which appropriate flag bits for the category XXX are set.  The flag
 115   bits are defined in macros CODING_CATEGORY_MASK_XXX.  Below is the
 116   template for these functions.  If MULTIBYTEP is nonzero, 8-bit codes
 117   of the range 0x80..0x9F are in multibyte form.  */
 118 #if 0
 119 int
 120 detect_coding_emacs_mule (src, src_end, multibytep)
 121      unsigned char *src, *src_end;
 122      int multibytep;
 123 {
 124   ...
 125 }
 126 #endif
 127
 128 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 129
 130   These functions decode SRC_BYTES length of unibyte text at SOURCE
 131   encoded in CODING to Emacs' internal format.  The resulting
 132   multibyte text goes to a place pointed to by DESTINATION, the length
 133   of which should not exceed DST_BYTES.
 134
 135   These functions set the information about original and decoded texts
 136   in the members `produced', `produced_char', `consumed', and
 137   `consumed_char' of the structure *CODING.  They also set the member
 138   `result' to one of CODING_FINISH_XXX indicating how the decoding
 139   finished.
 140
 141   DST_BYTES zero means that the source area and destination area are
 142   overlapped, which means that we can produce a decoded text until it
 143   reaches the head of the not-yet-decoded source text.
 144
 145   Below is a template for these functions.  */
 146 #if 0
 147 static void
 148 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 149      struct coding_system *coding;
 150      unsigned char *source, *destination;
 151      int src_bytes, dst_bytes;
 152 {
 153   ...
 154 }
 155 #endif
 156
 157 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 158
 159   These functions encode SRC_BYTES length text at SOURCE from Emacs'
 160   internal multibyte format to CODING.  The resulting unibyte text
 161   goes to a place pointed to by DESTINATION, the length of which
 162   should not exceed DST_BYTES.
 163
 164   These functions set the information about original and encoded texts
 165   in the members `produced', `produced_char', `consumed', and
 166   `consumed_char' of the structure *CODING.  They also set the member
 167   `result' to one of CODING_FINISH_XXX indicating how the encoding
 168   finished.
 169
 170   DST_BYTES zero means that the source area and destination area are
 171   overlapped, which means that we can produce encoded text until it
 172   reaches at the head of the not-yet-encoded source text.
 173
 174   Below is a template for these functions.  */
 175 #if 0
 176 static void
 177 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 178      struct coding_system *coding;
 179      unsigned char *source, *destination;
 180      int src_bytes, dst_bytes;
 181 {
 182   ...
 183 }
 184 #endif
 185
 186 /*** COMMONLY USED MACROS ***/
 187
 188 /* The following two macros ONE_MORE_BYTE and TWO_MORE_BYTES safely
 189    get one, two, and three bytes from the source text respectively.
 190    If there are not enough bytes in the source, they jump to
 191    `label_end_of_loop'.  The caller should set variables `coding',
 192    `src' and `src_end' to appropriate pointer in advance.  These
 193    macros are called from decoding routines `decode_coding_XXX', thus
 194    it is assumed that the source text is unibyte.  */
 195
 196 #define ONE_MORE_BYTE(c1)                                       \
 197   do {                                                          \
 198     if (src >= src_end)                                         \
 199       {                                                         \
 200         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 201         goto label_end_of_loop;                                 \
 202       }                                                         \
 203     c1 = *src++;                                                \
 204   } while (0)
 205
 206 #define TWO_MORE_BYTES(c1, c2)                                  \
 207   do {                                                          \
 208     if (src + 1 >= src_end)                                     \
 209       {                                                         \
 210         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 211         goto label_end_of_loop;                                 \
 212       }                                                         \
 213     c1 = *src++;                                                \
 214     c2 = *src++;                                                \
 215   } while (0)
 216
 217
 218 /* Like ONE_MORE_BYTE, but 8-bit bytes of data at SRC are in multibyte
 219    form if MULTIBYTEP is nonzero.  */
 220
 221 #define ONE_MORE_BYTE_CHECK_MULTIBYTE(c1, multibytep)           \
 222   do {                                                          \
 223     if (src >= src_end)                                         \
 224       {                                                         \
 225         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 226         goto label_end_of_loop;                                 \
 227       }                                                         \
 228     c1 = *src++;                                                \
 229     if (multibytep && c1 == LEADING_CODE_8_BIT_CONTROL)         \
 230       c1 = *src++ - 0x20;                                       \
 231   } while (0)
 232
 233 /* Set C to the next character at the source text pointed by `src'.
 234    If there are not enough characters in the source, jump to
 235    `label_end_of_loop'.  The caller should set variables `coding'
 236    `src', `src_end', and `translation_table' to appropriate pointers
 237    in advance.  This macro is used in encoding routines
 238    `encode_coding_XXX', thus it assumes that the source text is in
 239    multibyte form except for 8-bit characters.  8-bit characters are
 240    in multibyte form if coding->src_multibyte is nonzero, else they
 241    are represented by a single byte.  */
 242
 243 #define ONE_MORE_CHAR(c)                                        \
 244   do {                                                          \
 245     int len = src_end - src;                                    \
 246     int bytes;                                                  \
 247     if (len <= 0)                                               \
 248       {                                                         \
 249         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 250         goto label_end_of_loop;                                 \
 251       }                                                         \
 252     if (coding->src_multibyte                                   \
 253         || UNIBYTE_STR_AS_MULTIBYTE_P (src, len, bytes))        \
 254       c = STRING_CHAR_AND_LENGTH (src, len, bytes);             \
 255     else                                                        \
 256       c = *src, bytes = 1;                                      \
 257     if (!NILP (translation_table))                              \
 258       c = translate_char (translation_table, c, -1, 0, 0);      \
 259     src += bytes;                                               \
 260   } while (0)
 261
 262
 263 /* Produce a multibyte form of character C to `dst'.  Jump to
 264    `label_end_of_loop' if there's not enough space at `dst'.
 265
 266    If we are now in the middle of a composition sequence, the decoded
 267    character may be ALTCHAR (for the current composition).  In that
 268    case, the character goes to coding->cmp_data->data instead of
 269    `dst'.
 270
 271    This macro is used in decoding routines.  */
 272
 273 #define EMIT_CHAR(c)                                                    \
 274   do {                                                                  \
 275     if (! COMPOSING_P (coding)                                          \
 276         || coding->composing == COMPOSITION_RELATIVE                    \
 277         || coding->composing == COMPOSITION_WITH_RULE)                  \
 278       {                                                                 \
 279         int bytes = CHAR_BYTES (c);                                     \
 280         if ((dst + bytes) > (dst_bytes ? dst_end : src))                \
 281           {                                                             \
 282             coding->result = CODING_FINISH_INSUFFICIENT_DST;            \
 283             goto label_end_of_loop;                                     \
 284           }                                                             \
 285         dst += CHAR_STRING (c, dst);                                    \
 286         coding->produced_char++;                                        \
 287       }                                                                 \
 288                                                                         \
 289     if (COMPOSING_P (coding)                                            \
 290         && coding->composing != COMPOSITION_RELATIVE)                   \
 291       {                                                                 \
 292         CODING_ADD_COMPOSITION_COMPONENT (coding, c);                   \
 293         coding->composition_rule_follows                                \
 294           = coding->composing != COMPOSITION_WITH_ALTCHARS;             \
 295       }                                                                 \
 296   } while (0)
 297
 298
 299 #define EMIT_ONE_BYTE(c)                                        \
 300   do {                                                          \
 301     if (dst >= (dst_bytes ? dst_end : src))                     \
 302       {                                                         \
 303         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 304         goto label_end_of_loop;                                 \
 305       }                                                         \
 306     *dst++ = c;                                                 \
 307   } while (0)
 308
 309 #define EMIT_TWO_BYTES(c1, c2)                                  \
 310   do {                                                          \
 311     if (dst + 2 > (dst_bytes ? dst_end : src))                  \
 312       {                                                         \
 313         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 314         goto label_end_of_loop;                                 \
 315       }                                                         \
 316     *dst++ = c1, *dst++ = c2;                                   \
 317   } while (0)
 318
 319 #define EMIT_BYTES(from, to)                                    \
 320   do {                                                          \
 321     if (dst + (to - from) > (dst_bytes ? dst_end : src))        \
 322       {                                                         \
 323         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 324         goto label_end_of_loop;                                 \
 325       }                                                         \
 326     while (from < to)                                           \
 327       *dst++ = *from++;                                         \
 328   } while (0)
 329
 330 \f
 331 /*** 1. Preamble ***/
 332
 333 #ifdef emacs
 334 #include <config.h>
 335 #endif
 336
 337 #include <stdio.h>
 338
 339 #ifdef emacs
 340
 341 #include "lisp.h"
 342 #include "buffer.h"
 343 #include "charset.h"
 344 #include "composite.h"
 345 #include "ccl.h"
 346 #include "coding.h"
 347 #include "window.h"
 348
 349 #else  /* not emacs */
 350
 351 #include "mulelib.h"
 352
 353 #endif /* not emacs */
 354
 355 Lisp_Object Qcoding_system, Qeol_type;
 356 Lisp_Object Qbuffer_file_coding_system;
 357 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 358 Lisp_Object Qno_conversion, Qundecided;
 359 Lisp_Object Qcoding_system_history;
 360 Lisp_Object Qsafe_chars;
 361 Lisp_Object Qvalid_codes;
 362
 363 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 364 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 365 Lisp_Object Qstart_process, Qopen_network_stream;
 366 Lisp_Object Qtarget_idx;
 367
 368 Lisp_Object Vselect_safe_coding_system_function;
 369
 370 int coding_system_require_warning;
 371
 372 /* Mnemonic string for each format of end-of-line.  */
 373 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 374 /* Mnemonic string to indicate format of end-of-line is not yet
 375    decided.  */
 376 Lisp_Object eol_mnemonic_undecided;
 377
 378 /* Format of end-of-line decided by system.  This is CODING_EOL_LF on
 379    Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.  */
 380 int system_eol_type;
 381
 382 #ifdef emacs
 383
 384 /* Information about which coding system is safe for which chars.
 385    The value has the form (GENERIC-LIST . NON-GENERIC-ALIST).
 386
 387    GENERIC-LIST is a list of generic coding systems which can encode
 388    any characters.
 389
 390    NON-GENERIC-ALIST is an alist of non generic coding systems vs the
 391    corresponding char table that contains safe chars.  */
 392 Lisp_Object Vcoding_system_safe_chars;
 393
 394 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 395
 396 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 397
 398 /* Coding system emacs-mule and raw-text are for converting only
 399    end-of-line format.  */
 400 Lisp_Object Qemacs_mule, Qraw_text;
 401
 402 /* Coding-systems are handed between Emacs Lisp programs and C internal
 403    routines by the following three variables.  */
 404 /* Coding-system for reading files and receiving data from process.  */
 405 Lisp_Object Vcoding_system_for_read;
 406 /* Coding-system for writing files and sending data to process.  */
 407 Lisp_Object Vcoding_system_for_write;
 408 /* Coding-system actually used in the latest I/O.  */
 409 Lisp_Object Vlast_coding_system_used;
 410
 411 /* A vector of length 256 which contains information about special
 412    Latin codes (especially for dealing with Microsoft codes).  */
 413 Lisp_Object Vlatin_extra_code_table;
 414
 415 /* Flag to inhibit code conversion of end-of-line format.  */
 416 int inhibit_eol_conversion;
 417
 418 /* Flag to inhibit ISO2022 escape sequence detection.  */
 419 int inhibit_iso_escape_detection;
 420
 421 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 422 int inherit_process_coding_system;
 423
 424 /* Coding system to be used to encode text for terminal display.  */
 425 struct coding_system terminal_coding;
 426
 427 /* Coding system to be used to encode text for terminal display when
 428    terminal coding system is nil.  */
 429 struct coding_system safe_terminal_coding;
 430
 431 /* Coding system of what is sent from terminal keyboard.  */
 432 struct coding_system keyboard_coding;
 433
 434 /* Default coding system to be used to write a file.  */
 435 struct coding_system default_buffer_file_coding;
 436
 437 Lisp_Object Vfile_coding_system_alist;
 438 Lisp_Object Vprocess_coding_system_alist;
 439 Lisp_Object Vnetwork_coding_system_alist;
 440
 441 Lisp_Object Vlocale_coding_system;
 442
 443 #endif /* emacs */
 444
 445 Lisp_Object Qcoding_category, Qcoding_category_index;
 446
 447 /* List of symbols `coding-category-xxx' ordered by priority.  */
 448 Lisp_Object Vcoding_category_list;
 449
 450 /* Table of coding categories (Lisp symbols).  */
 451 Lisp_Object Vcoding_category_table;
 452
 453 /* Table of names of symbol for each coding-category.  */
 454 char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
 455   "coding-category-emacs-mule",
 456   "coding-category-sjis",
 457   "coding-category-iso-7",
 458   "coding-category-iso-7-tight",
 459   "coding-category-iso-8-1",
 460   "coding-category-iso-8-2",
 461   "coding-category-iso-7-else",
 462   "coding-category-iso-8-else",
 463   "coding-category-ccl",
 464   "coding-category-big5",
 465   "coding-category-utf-8",
 466   "coding-category-utf-16-be",
 467   "coding-category-utf-16-le",
 468   "coding-category-raw-text",
 469   "coding-category-binary"
 470 };
 471
 472 /* Table of pointers to coding systems corresponding to each coding
 473    categories.  */
 474 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
 475
 476 /* Table of coding category masks.  Nth element is a mask for a coding
 477    category of which priority is Nth.  */
 478 static
 479 int coding_priorities[CODING_CATEGORY_IDX_MAX];
 480
 481 /* Flag to tell if we look up translation table on character code
 482    conversion.  */
 483 Lisp_Object Venable_character_translation;
 484 /* Standard translation table to look up on decoding (reading).  */
 485 Lisp_Object Vstandard_translation_table_for_decode;
 486 /* Standard translation table to look up on encoding (writing).  */
 487 Lisp_Object Vstandard_translation_table_for_encode;
 488
 489 Lisp_Object Qtranslation_table;
 490 Lisp_Object Qtranslation_table_id;
 491 Lisp_Object Qtranslation_table_for_decode;
 492 Lisp_Object Qtranslation_table_for_encode;
 493
 494 /* Alist of charsets vs revision number.  */
 495 Lisp_Object Vcharset_revision_alist;
 496
 497 /* Default coding systems used for process I/O.  */
 498 Lisp_Object Vdefault_process_coding_system;
 499
 500 /* Char table for translating Quail and self-inserting input.  */
 501 Lisp_Object Vtranslation_table_for_input;
 502
 503 /* Global flag to tell that we can't call post-read-conversion and
 504    pre-write-conversion functions.  Usually the value is zero, but it
 505    is set to 1 temporarily while such functions are running.  This is
 506    to avoid infinite recursive call.  */
 507 static int inhibit_pre_post_conversion;
 508
 509 Lisp_Object Qchar_coding_system;
 510
 511 /* Return `safe-chars' property of CODING_SYSTEM (symbol).  Don't check
 512    its validity.  */
 513
 514 Lisp_Object
 515 coding_safe_chars (coding_system)
 516      Lisp_Object coding_system;
 517 {
 518   Lisp_Object coding_spec, plist, safe_chars;
 519
 520   coding_spec = Fget (coding_system, Qcoding_system);
 521   plist = XVECTOR (coding_spec)->contents[3];
 522   safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars);
 523   return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt);
 524 }
 525
 526 #define CODING_SAFE_CHAR_P(safe_chars, c) \
 527   (EQ (safe_chars, Qt) || !NILP (CHAR_TABLE_REF (safe_chars, c)))
 528
 529 \f
 530 /*** 2. Emacs internal format (emacs-mule) handlers ***/
 531
 532 /* Emacs' internal format for representation of multiple character
 533    sets is a kind of multi-byte encoding, i.e. characters are
 534    represented by variable-length sequences of one-byte codes.
 535
 536    ASCII characters and control characters (e.g. `tab', `newline') are
 537    represented by one-byte sequences which are their ASCII codes, in
 538    the range 0x00 through 0x7F.
 539
 540    8-bit characters of the range 0x80..0x9F are represented by
 541    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
 542    code + 0x20).
 543
 544    8-bit characters of the range 0xA0..0xFF are represented by
 545    one-byte sequences which are their 8-bit code.
 546
 547    The other characters are represented by a sequence of `base
 548    leading-code', optional `extended leading-code', and one or two
 549    `position-code's.  The length of the sequence is determined by the
 550    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
 551    whereas extended leading-code and position-code take the range 0xA0
 552    through 0xFF.  See `charset.h' for more details about leading-code
 553    and position-code.
 554
 555    --- CODE RANGE of Emacs' internal format ---
 556    character set        range
 557    -------------        -----
 558    ascii                0x00..0x7F
 559    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
 560    eight-bit-graphic    0xA0..0xBF
 561    ELSE                 0x81..0x9D + [0xA0..0xFF]+
 562    ---------------------------------------------
 563
 564    As this is the internal character representation, the format is
 565    usually not used externally (i.e. in a file or in a data sent to a
 566    process).  But, it is possible to have a text externally in this
 567    format (i.e. by encoding by the coding system `emacs-mule').
 568
 569    In that case, a sequence of one-byte codes has a slightly different
 570    form.
 571
 572    Firstly, all characters in eight-bit-control are represented by
 573    one-byte sequences which are their 8-bit code.
 574
 575    Next, character composition data are represented by the byte
 576    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
 577    where,
 578         METHOD is 0xF0 plus one of composition method (enum
 579         composition_method),
 580
 581         BYTES is 0xA0 plus the byte length of these composition data,
 582
 583         CHARS is 0xA0 plus the number of characters composed by these
 584         data,
 585
 586         COMPONENTs are characters of multibyte form or composition
 587         rules encoded by two-byte of ASCII codes.
 588
 589    In addition, for backward compatibility, the following formats are
 590    also recognized as composition data on decoding.
 591
 592    0x80 MSEQ ...
 593    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
 594
 595    Here,
 596         MSEQ is a multibyte form but in these special format:
 597           ASCII: 0xA0 ASCII_CODE+0x80,
 598           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
 599         RULE is a one byte code of the range 0xA0..0xF0 that
 600         represents a composition rule.
 601   */
 602
 603 enum emacs_code_class_type emacs_code_class[256];
 604
 605 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 606    Check if a text is encoded in Emacs' internal format.  If it is,
 607    return CODING_CATEGORY_MASK_EMACS_MULE, else return 0.  */
 608
 609 static int
 610 detect_coding_emacs_mule (src, src_end, multibytep)
 611       unsigned char *src, *src_end;
 612       int multibytep;
 613 {
 614   unsigned char c;
 615   int composing = 0;
 616   /* Dummy for ONE_MORE_BYTE.  */
 617   struct coding_system dummy_coding;
 618   struct coding_system *coding = &dummy_coding;
 619
 620   while (1)
 621     {
 622       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
 623
 624       if (composing)
 625         {
 626           if (c < 0xA0)
 627             composing = 0;
 628           else if (c == 0xA0)
 629             {
 630               ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
 631               c &= 0x7F;
 632             }
 633           else
 634             c -= 0x20;
 635         }
 636
 637       if (c < 0x20)
 638         {
 639           if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
 640             return 0;
 641         }
 642       else if (c >= 0x80 && c < 0xA0)
 643         {
 644           if (c == 0x80)
 645             /* Old leading code for a composite character.  */
 646             composing = 1;
 647           else
 648             {
 649               unsigned char *src_base = src - 1;
 650               int bytes;
 651
 652               if (!UNIBYTE_STR_AS_MULTIBYTE_P (src_base, src_end - src_base,
 653                                                bytes))
 654                 return 0;
 655               src = src_base + bytes;
 656             }
 657         }
 658     }
 659  label_end_of_loop:
 660   return CODING_CATEGORY_MASK_EMACS_MULE;
 661 }
 662
 663
 664 /* Record the starting position START and METHOD of one composition.  */
 665
 666 #define CODING_ADD_COMPOSITION_START(coding, start, method)     \
 667   do {                                                          \
 668     struct composition_data *cmp_data = coding->cmp_data;       \
 669     int *data = cmp_data->data + cmp_data->used;                \
 670     coding->cmp_data_start = cmp_data->used;                    \
 671     data[0] = -1;                                               \
 672     data[1] = cmp_data->char_offset + start;                    \
 673     data[3] = (int) method;                                     \
 674     cmp_data->used += 4;                                        \
 675   } while (0)
 676
 677 /* Record the ending position END of the current composition.  */
 678
 679 #define CODING_ADD_COMPOSITION_END(coding, end)                 \
 680   do {                                                          \
 681     struct composition_data *cmp_data = coding->cmp_data;       \
 682     int *data = cmp_data->data + coding->cmp_data_start;        \
 683     data[0] = cmp_data->used - coding->cmp_data_start;          \
 684     data[2] = cmp_data->char_offset + end;                      \
 685   } while (0)
 686
 687 /* Record one COMPONENT (alternate character or composition rule).  */
 688
 689 #define CODING_ADD_COMPOSITION_COMPONENT(coding, component)             \
 690   do {                                                                  \
 691     coding->cmp_data->data[coding->cmp_data->used++] = component;       \
 692     if (coding->cmp_data->used - coding->cmp_data_start                 \
 693         == COMPOSITION_DATA_MAX_BUNCH_LENGTH)                           \
 694       {                                                                 \
 695         CODING_ADD_COMPOSITION_END (coding, coding->produced_char);     \
 696         coding->composing = COMPOSITION_NO;                             \
 697       }                                                                 \
 698   } while (0)
 699
 700
 701 /* Get one byte from a data pointed by SRC and increment SRC.  If SRC
 702    is not less than SRC_END, return -1 without incrementing Src.  */
 703
 704 #define SAFE_ONE_MORE_BYTE() (src >= src_end ? -1 : *src++)
 705
 706
 707 /* Decode a character represented as a component of composition
 708    sequence of Emacs 20 style at SRC.  Set C to that character, store
 709    its multibyte form sequence at P, and set P to the end of that
 710    sequence.  If no valid character is found, set C to -1.  */
 711
 712 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(c, p)                \
 713   do {                                                          \
 714     int bytes;                                                  \
 715                                                                 \
 716     c = SAFE_ONE_MORE_BYTE ();                                  \
 717     if (c < 0)                                                  \
 718       break;                                                    \
 719     if (CHAR_HEAD_P (c))                                        \
 720       c = -1;                                                   \
 721     else if (c == 0xA0)                                         \
 722       {                                                         \
 723         c = SAFE_ONE_MORE_BYTE ();                              \
 724         if (c < 0xA0)                                           \
 725           c = -1;                                               \
 726         else                                                    \
 727           {                                                     \
 728             c -= 0xA0;                                          \
 729             *p++ = c;                                           \
 730           }                                                     \
 731       }                                                         \
 732     else if (BASE_LEADING_CODE_P (c - 0x20))                    \
 733       {                                                         \
 734         unsigned char *p0 = p;                                  \
 735                                                                 \
 736         c -= 0x20;                                              \
 737         *p++ = c;                                               \
 738         bytes = BYTES_BY_CHAR_HEAD (c);                         \
 739         while (--bytes)                                         \
 740           {                                                     \
 741             c = SAFE_ONE_MORE_BYTE ();                          \
 742             if (c < 0)                                          \
 743               break;                                            \
 744             *p++ = c;                                           \
 745           }                                                     \
 746         if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes))     \
 747           c = STRING_CHAR (p0, bytes);                          \
 748         else                                                    \
 749           c = -1;                                               \
 750       }                                                         \
 751     else                                                        \
 752       c = -1;                                                   \
 753   } while (0)
 754
 755
 756 /* Decode a composition rule represented as a component of composition
 757    sequence of Emacs 20 style at SRC.  Set C to the rule.  If not
 758    valid rule is found, set C to -1.  */
 759
 760 #define DECODE_EMACS_MULE_COMPOSITION_RULE(c)           \
 761   do {                                                  \
 762     c = SAFE_ONE_MORE_BYTE ();                          \
 763     c -= 0xA0;                                          \
 764     if (c < 0 || c >= 81)                               \
 765       c = -1;                                           \
 766     else                                                \
 767       {                                                 \
 768         gref = c / 9, nref = c % 9;                     \
 769         c = COMPOSITION_ENCODE_RULE (gref, nref);       \
 770       }                                                 \
 771   } while (0)
 772
 773
 774 /* Decode composition sequence encoded by `emacs-mule' at the source
 775    pointed by SRC.  SRC_END is the end of source.  Store information
 776    of the composition in CODING->cmp_data.
 777
 778    For backward compatibility, decode also a composition sequence of
 779    Emacs 20 style.  In that case, the composition sequence contains
 780    characters that should be extracted into a buffer or string.  Store
 781    those characters at *DESTINATION in multibyte form.
 782
 783    If we encounter an invalid byte sequence, return 0.
 784    If we encounter an insufficient source or destination, or
 785    insufficient space in CODING->cmp_data, return 1.
 786    Otherwise, return consumed bytes in the source.
 787
 788 */
 789 static INLINE int
 790 decode_composition_emacs_mule (coding, src, src_end,
 791                                destination, dst_end, dst_bytes)
 792      struct coding_system *coding;
 793      unsigned char *src, *src_end, **destination, *dst_end;
 794      int dst_bytes;
 795 {
 796   unsigned char *dst = *destination;
 797   int method, data_len, nchars;
 798   unsigned char *src_base = src++;
 799   /* Store components of composition.  */
 800   int component[COMPOSITION_DATA_MAX_BUNCH_LENGTH];
 801   int ncomponent;
 802   /* Store multibyte form of characters to be composed.  This is for
 803      Emacs 20 style composition sequence.  */
 804   unsigned char buf[MAX_COMPOSITION_COMPONENTS * MAX_MULTIBYTE_LENGTH];
 805   unsigned char *bufp = buf;
 806   int c, i, gref, nref;
 807
 808   if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
 809       >= COMPOSITION_DATA_SIZE)
 810     {
 811       coding->result = CODING_FINISH_INSUFFICIENT_CMP;
 812       return -1;
 813     }
 814
 815   ONE_MORE_BYTE (c);
 816   if (c - 0xF0 >= COMPOSITION_RELATIVE
 817            && c - 0xF0 <= COMPOSITION_WITH_RULE_ALTCHARS)
 818     {
 819       int with_rule;
 820
 821       method = c - 0xF0;
 822       with_rule = (method == COMPOSITION_WITH_RULE
 823                    || method == COMPOSITION_WITH_RULE_ALTCHARS);
 824       ONE_MORE_BYTE (c);
 825       data_len = c - 0xA0;
 826       if (data_len < 4
 827           || src_base + data_len > src_end)
 828         return 0;
 829       ONE_MORE_BYTE (c);
 830       nchars = c - 0xA0;
 831       if (c < 1)
 832         return 0;
 833       for (ncomponent = 0; src < src_base + data_len; ncomponent++)
 834         {
 835           /* If it is longer than this, it can't be valid.  */
 836           if (ncomponent >= COMPOSITION_DATA_MAX_BUNCH_LENGTH)
 837             return 0;
 838
 839           if (ncomponent % 2 && with_rule)
 840             {
 841               ONE_MORE_BYTE (gref);
 842               gref -= 32;
 843               ONE_MORE_BYTE (nref);
 844               nref -= 32;
 845               c = COMPOSITION_ENCODE_RULE (gref, nref);
 846             }
 847           else
 848             {
 849               int bytes;
 850               if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
 851                 c = STRING_CHAR (src, bytes);
 852               else
 853                 c = *src, bytes = 1;
 854               src += bytes;
 855             }
 856           component[ncomponent] = c;
 857         }
 858     }
 859   else
 860     {
 861       /* This may be an old Emacs 20 style format.  See the comment at
 862          the section 2 of this file.  */
 863       while (src < src_end && !CHAR_HEAD_P (*src)) src++;
 864       if (src == src_end
 865           && !(coding->mode & CODING_MODE_LAST_BLOCK))
 866         goto label_end_of_loop;
 867
 868       src_end = src;
 869       src = src_base + 1;
 870       if (c < 0xC0)
 871         {
 872           method = COMPOSITION_RELATIVE;
 873           for (ncomponent = 0; ncomponent < MAX_COMPOSITION_COMPONENTS;)
 874             {
 875               DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 876               if (c < 0)
 877                 break;
 878               component[ncomponent++] = c;
 879             }
 880           if (ncomponent < 2)
 881             return 0;
 882           nchars = ncomponent;
 883         }
 884       else if (c == 0xFF)
 885         {
 886           method = COMPOSITION_WITH_RULE;
 887           src++;
 888           DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 889           if (c < 0)
 890             return 0;
 891           component[0] = c;
 892           for (ncomponent = 1;
 893                ncomponent < MAX_COMPOSITION_COMPONENTS * 2 - 1;)
 894             {
 895               DECODE_EMACS_MULE_COMPOSITION_RULE (c);
 896               if (c < 0)
 897                 break;
 898               component[ncomponent++] = c;
 899               DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 900               if (c < 0)
 901                 break;
 902               component[ncomponent++] = c;
 903             }
 904           if (ncomponent < 3)
 905             return 0;
 906           nchars = (ncomponent + 1) / 2;
 907         }
 908       else
 909         return 0;
 910     }
 911
 912   if (buf == bufp || dst + (bufp - buf) <= (dst_bytes ? dst_end : src))
 913     {
 914       CODING_ADD_COMPOSITION_START (coding, coding->produced_char, method);
 915       for (i = 0; i < ncomponent; i++)
 916         CODING_ADD_COMPOSITION_COMPONENT (coding, component[i]);
 917       CODING_ADD_COMPOSITION_END (coding, coding->produced_char + nchars);
 918       if (buf < bufp)
 919         {
 920           unsigned char *p = buf;
 921           EMIT_BYTES (p, bufp);
 922           *destination += bufp - buf;
 923           coding->produced_char += nchars;
 924         }
 925       return (src - src_base);
 926     }
 927  label_end_of_loop:
 928   return -1;
 929 }
 930
 931 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
 932
 933 static void
 934 decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
 935      struct coding_system *coding;
 936      unsigned char *source, *destination;
 937      int src_bytes, dst_bytes;
 938 {
 939   unsigned char *src = source;
 940   unsigned char *src_end = source + src_bytes;
 941   unsigned char *dst = destination;
 942   unsigned char *dst_end = destination + dst_bytes;
 943   /* SRC_BASE remembers the start position in source in each loop.
 944      The loop will be exited when there's not enough source code, or
 945      when there's not enough destination area to produce a
 946      character.  */
 947   unsigned char *src_base;
 948
 949   coding->produced_char = 0;
 950   while ((src_base = src) < src_end)
 951     {
 952       unsigned char tmp[MAX_MULTIBYTE_LENGTH], *p;
 953       int bytes;
 954
 955       if (*src == '\r')
 956         {
 957           int c = *src++;
 958
 959           if (coding->eol_type == CODING_EOL_CR)
 960             c = '\n';
 961           else if (coding->eol_type == CODING_EOL_CRLF)
 962             {
 963               ONE_MORE_BYTE (c);
 964               if (c != '\n')
 965                 {
 966                   src--;
 967                   c = '\r';
 968                 }
 969             }
 970           *dst++ = c;
 971           coding->produced_char++;
 972           continue;
 973         }
 974       else if (*src == '\n')
 975         {
 976           if ((coding->eol_type == CODING_EOL_CR
 977                || coding->eol_type == CODING_EOL_CRLF)
 978               && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
 979             {
 980               coding->result = CODING_FINISH_INCONSISTENT_EOL;
 981               goto label_end_of_loop;
 982             }
 983           *dst++ = *src++;
 984           coding->produced_char++;
 985           continue;
 986         }
 987       else if (*src == 0x80 && coding->cmp_data)
 988         {
 989           /* Start of composition data.  */
 990           int consumed  = decode_composition_emacs_mule (coding, src, src_end,
 991                                                          &dst, dst_end,
 992                                                          dst_bytes);
 993           if (consumed < 0)
 994             goto label_end_of_loop;
 995           else if (consumed > 0)
 996             {
 997               src += consumed;
 998               continue;
 999             }
1000           bytes = CHAR_STRING (*src, tmp);
1001           p = tmp;
1002           src++;
1003         }
1004       else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
1005         {
1006           p = src;
1007           src += bytes;
1008         }
1009       else
1010         {
1011           bytes = CHAR_STRING (*src, tmp);
1012           p = tmp;
1013           src++;
1014         }
1015       if (dst + bytes >= (dst_bytes ? dst_end : src))
1016         {
1017           coding->result = CODING_FINISH_INSUFFICIENT_DST;
1018           break;
1019         }
1020       while (bytes--) *dst++ = *p++;
1021       coding->produced_char++;
1022     }
1023  label_end_of_loop:
1024   coding->consumed = coding->consumed_char = src_base - source;
1025   coding->produced = dst - destination;
1026 }
1027
1028
1029 /* Encode composition data stored at DATA into a special byte sequence
1030    starting by 0x80.  Update CODING->cmp_data_start and maybe
1031    CODING->cmp_data for the next call.  */
1032
1033 #define ENCODE_COMPOSITION_EMACS_MULE(coding, data)                     \
1034   do {                                                                  \
1035     unsigned char buf[1024], *p0 = buf, *p;                             \
1036     int len = data[0];                                                  \
1037     int i;                                                              \
1038                                                                         \
1039     buf[0] = 0x80;                                                      \
1040     buf[1] = 0xF0 + data[3];    /* METHOD */                            \
1041     buf[3] = 0xA0 + (data[2] - data[1]); /* COMPOSED-CHARS */           \
1042     p = buf + 4;                                                        \
1043     if (data[3] == COMPOSITION_WITH_RULE                                \
1044         || data[3] == COMPOSITION_WITH_RULE_ALTCHARS)                   \
1045       {                                                                 \
1046         p += CHAR_STRING (data[4], p);                                  \
1047         for (i = 5; i < len; i += 2)                                    \
1048           {                                                             \
1049             int gref, nref;                                             \
1050              COMPOSITION_DECODE_RULE (data[i], gref, nref);             \
1051             *p++ = 0x20 + gref;                                         \
1052             *p++ = 0x20 + nref;                                         \
1053             p += CHAR_STRING (data[i + 1], p);                          \
1054           }                                                             \
1055       }                                                                 \
1056     else                                                                \
1057       {                                                                 \
1058         for (i = 4; i < len; i++)                                       \
1059           p += CHAR_STRING (data[i], p);                                \
1060       }                                                                 \
1061     buf[2] = 0xA0 + (p - buf);  /* COMPONENTS-BYTES */                  \
1062                                                                         \
1063     if (dst + (p - buf) + 4 > (dst_bytes ? dst_end : src))              \
1064       {                                                                 \
1065         coding->result = CODING_FINISH_INSUFFICIENT_DST;                \
1066         goto label_end_of_loop;                                         \
1067       }                                                                 \
1068     while (p0 < p)                                                      \
1069       *dst++ = *p0++;                                                   \
1070     coding->cmp_data_start += data[0];                                  \
1071     if (coding->cmp_data_start == coding->cmp_data->used                \
1072         && coding->cmp_data->next)                                      \
1073       {                                                                 \
1074         coding->cmp_data = coding->cmp_data->next;                      \
1075         coding->cmp_data_start = 0;                                     \
1076       }                                                                 \
1077   } while (0)
1078
1079
1080 static void encode_eol P_ ((struct coding_system *, const unsigned char *,
1081                             unsigned char *, int, int));
1082
1083 static void
1084 encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
1085      struct coding_system *coding;
1086      unsigned char *source, *destination;
1087      int src_bytes, dst_bytes;
1088 {
1089   unsigned char *src = source;
1090   unsigned char *src_end = source + src_bytes;
1091   unsigned char *dst = destination;
1092   unsigned char *dst_end = destination + dst_bytes;
1093   unsigned char *src_base;
1094   int c;
1095   int char_offset;
1096   int *data;
1097
1098   Lisp_Object translation_table;
1099
1100   translation_table = Qnil;
1101
1102   /* Optimization for the case that there's no composition.  */
1103   if (!coding->cmp_data || coding->cmp_data->used == 0)
1104     {
1105       encode_eol (coding, source, destination, src_bytes, dst_bytes);
1106       return;
1107     }
1108
1109   char_offset = coding->cmp_data->char_offset;
1110   data = coding->cmp_data->data + coding->cmp_data_start;
1111   while (1)
1112     {
1113       src_base = src;
1114
1115       /* If SRC starts a composition, encode the information about the
1116          composition in advance.  */
1117       if (coding->cmp_data_start < coding->cmp_data->used
1118           && char_offset + coding->consumed_char == data[1])
1119         {
1120           ENCODE_COMPOSITION_EMACS_MULE (coding, data);
1121           char_offset = coding->cmp_data->char_offset;
1122           data = coding->cmp_data->data + coding->cmp_data_start;
1123         }
1124
1125       ONE_MORE_CHAR (c);
1126       if (c == '\n' && (coding->eol_type == CODING_EOL_CRLF
1127                         || coding->eol_type == CODING_EOL_CR))
1128         {
1129           if (coding->eol_type == CODING_EOL_CRLF)
1130             EMIT_TWO_BYTES ('\r', c);
1131           else
1132             EMIT_ONE_BYTE ('\r');
1133         }
1134       else if (SINGLE_BYTE_CHAR_P (c))
1135         EMIT_ONE_BYTE (c);
1136       else
1137         EMIT_BYTES (src_base, src);
1138       coding->consumed_char++;
1139     }
1140  label_end_of_loop:
1141   coding->consumed = src_base - source;
1142   coding->produced = coding->produced_char = dst - destination;
1143   return;
1144 }
1145
1146 \f
1147 /*** 3. ISO2022 handlers ***/
1148
1149 /* The following note describes the coding system ISO2022 briefly.
1150    Since the intention of this note is to help understand the
1151    functions in this file, some parts are NOT ACCURATE or are OVERLY
1152    SIMPLIFIED.  For thorough understanding, please refer to the
1153    original document of ISO2022.  This is equivalent to the standard
1154    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
1155
1156    ISO2022 provides many mechanisms to encode several character sets
1157    in 7-bit and 8-bit environments.  For 7-bit environments, all text
1158    is encoded using bytes less than 128.  This may make the encoded
1159    text a little bit longer, but the text passes more easily through
1160    several types of gateway, some of which strip off the MSB (Most
1161    Significant Bit).
1162
1163    There are two kinds of character sets: control character sets and
1164    graphic character sets.  The former contain control characters such
1165    as `newline' and `escape' to provide control functions (control
1166    functions are also provided by escape sequences).  The latter
1167    contain graphic characters such as 'A' and '-'.  Emacs recognizes
1168    two control character sets and many graphic character sets.
1169
1170    Graphic character sets are classified into one of the following
1171    four classes, according to the number of bytes (DIMENSION) and
1172    number of characters in one dimension (CHARS) of the set:
1173    - DIMENSION1_CHARS94
1174    - DIMENSION1_CHARS96
1175    - DIMENSION2_CHARS94
1176    - DIMENSION2_CHARS96
1177
1178    In addition, each character set is assigned an identification tag,
1179    unique for each set, called the "final character" (denoted as <F>
1180    hereafter).  The <F> of each character set is decided by ECMA(*)
1181    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
1182    (0x30..0x3F are for private use only).
1183
1184    Note (*): ECMA = European Computer Manufacturers Association
1185
1186    Here are examples of graphic character sets [NAME(<F>)]:
1187         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
1188         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
1189         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
1190         o DIMENSION2_CHARS96 -- none for the moment
1191
1192    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
1193         C0 [0x00..0x1F] -- control character plane 0
1194         GL [0x20..0x7F] -- graphic character plane 0
1195         C1 [0x80..0x9F] -- control character plane 1
1196         GR [0xA0..0xFF] -- graphic character plane 1
1197
1198    A control character set is directly designated and invoked to C0 or
1199    C1 by an escape sequence.  The most common case is that:
1200    - ISO646's  control character set is designated/invoked to C0, and
1201    - ISO6429's control character set is designated/invoked to C1,
1202    and usually these designations/invocations are omitted in encoded
1203    text.  In a 7-bit environment, only C0 can be used, and a control
1204    character for C1 is encoded by an appropriate escape sequence to
1205    fit into the environment.  All control characters for C1 are
1206    defined to have corresponding escape sequences.
1207
1208    A graphic character set is at first designated to one of four
1209    graphic registers (G0 through G3), then these graphic registers are
1210    invoked to GL or GR.  These designations and invocations can be
1211    done independently.  The most common case is that G0 is invoked to
1212    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
1213    these invocations and designations are omitted in encoded text.
1214    In a 7-bit environment, only GL can be used.
1215
1216    When a graphic character set of CHARS94 is invoked to GL, codes
1217    0x20 and 0x7F of the GL area work as control characters SPACE and
1218    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
1219    be used.
1220
1221    There are two ways of invocation: locking-shift and single-shift.
1222    With locking-shift, the invocation lasts until the next different
1223    invocation, whereas with single-shift, the invocation affects the
1224    following character only and doesn't affect the locking-shift
1225    state.  Invocations are done by the following control characters or
1226    escape sequences:
1227
1228    ----------------------------------------------------------------------
1229    abbrev  function                  cntrl escape seq   description
1230    ----------------------------------------------------------------------
1231    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
1232    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
1233    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
1234    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
1235    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
1236    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
1237    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
1238    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
1239    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
1240    ----------------------------------------------------------------------
1241    (*) These are not used by any known coding system.
1242
1243    Control characters for these functions are defined by macros
1244    ISO_CODE_XXX in `coding.h'.
1245
1246    Designations are done by the following escape sequences:
1247    ----------------------------------------------------------------------
1248    escape sequence      description
1249    ----------------------------------------------------------------------
1250    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
1251    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
1252    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
1253    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
1254    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
1255    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
1256    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
1257    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
1258    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
1259    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
1260    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
1261    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
1262    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
1263    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
1264    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
1265    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
1266    ----------------------------------------------------------------------
1267
1268    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
1269    of dimension 1, chars 94, and final character <F>, etc...
1270
1271    Note (*): Although these designations are not allowed in ISO2022,
1272    Emacs accepts them on decoding, and produces them on encoding
1273    CHARS96 character sets in a coding system which is characterized as
1274    7-bit environment, non-locking-shift, and non-single-shift.
1275
1276    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
1277    '(' can be omitted.  We refer to this as "short-form" hereafter.
1278
1279    Now you may notice that there are a lot of ways of encoding the
1280    same multilingual text in ISO2022.  Actually, there exist many
1281    coding systems such as Compound Text (used in X11's inter client
1282    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
1283    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
1284    localized platforms), and all of these are variants of ISO2022.
1285
1286    In addition to the above, Emacs handles two more kinds of escape
1287    sequences: ISO6429's direction specification and Emacs' private
1288    sequence for specifying character composition.
1289
1290    ISO6429's direction specification takes the following form:
1291         o CSI ']'      -- end of the current direction
1292         o CSI '0' ']'  -- end of the current direction
1293         o CSI '1' ']'  -- start of left-to-right text
1294         o CSI '2' ']'  -- start of right-to-left text
1295    The control character CSI (0x9B: control sequence introducer) is
1296    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
1297
1298    Character composition specification takes the following form:
1299         o ESC '0' -- start relative composition
1300         o ESC '1' -- end composition
1301         o ESC '2' -- start rule-base composition (*)
1302         o ESC '3' -- start relative composition with alternate chars  (**)
1303         o ESC '4' -- start rule-base composition with alternate chars  (**)
1304   Since these are not standard escape sequences of any ISO standard,
1305   the use of them with these meanings is restricted to Emacs only.
1306
1307   (*) This form is used only in Emacs 20.5 and older versions,
1308   but the newer versions can safely decode it.
1309   (**) This form is used only in Emacs 21.1 and newer versions,
1310   and the older versions can't decode it.
1311
1312   Here's a list of example usages of these composition escape
1313   sequences (categorized by `enum composition_method').
1314
1315   COMPOSITION_RELATIVE:
1316         ESC 0 CHAR [ CHAR ] ESC 1
1317   COMPOSITION_WITH_RULE:
1318         ESC 2 CHAR [ RULE CHAR ] ESC 1
1319   COMPOSITION_WITH_ALTCHARS:
1320         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
1321   COMPOSITION_WITH_RULE_ALTCHARS:
1322         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
1323
1324 enum iso_code_class_type iso_code_class[256];
1325
1326 #define CHARSET_OK(idx, charset, c)                                     \
1327   (coding_system_table[idx]                                             \
1328    && (charset == CHARSET_ASCII                                         \
1329        || (safe_chars = coding_safe_chars (coding_system_table[idx]->symbol), \
1330            CODING_SAFE_CHAR_P (safe_chars, c)))                         \
1331    && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding_system_table[idx], \
1332                                               charset)                  \
1333        != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
1334
1335 #define SHIFT_OUT_OK(idx) \
1336   (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
1337
1338 #define COMPOSITION_OK(idx)     \
1339   (coding_system_table[idx]->composing != COMPOSITION_DISABLED)
1340
1341 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1342    Check if a text is encoded in ISO2022.  If it is, return an
1343    integer in which appropriate flag bits any of:
1344         CODING_CATEGORY_MASK_ISO_7
1345         CODING_CATEGORY_MASK_ISO_7_TIGHT
1346         CODING_CATEGORY_MASK_ISO_8_1
1347         CODING_CATEGORY_MASK_ISO_8_2
1348         CODING_CATEGORY_MASK_ISO_7_ELSE
1349         CODING_CATEGORY_MASK_ISO_8_ELSE
1350    are set.  If a code which should never appear in ISO2022 is found,
1351    returns 0.  */
1352
1353 static int
1354 detect_coding_iso2022 (src, src_end, multibytep)
1355      unsigned char *src, *src_end;
1356      int multibytep;
1357 {
1358   int mask = CODING_CATEGORY_MASK_ISO;
1359   int mask_found = 0;
1360   int reg[4], shift_out = 0, single_shifting = 0;
1361   int c, c1, charset;
1362   /* Dummy for ONE_MORE_BYTE.  */
1363   struct coding_system dummy_coding;
1364   struct coding_system *coding = &dummy_coding;
1365   Lisp_Object safe_chars;
1366
1367   reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
1368   while (mask && src < src_end)
1369     {
1370       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1371     retry:
1372       switch (c)
1373         {
1374         case ISO_CODE_ESC:
1375           if (inhibit_iso_escape_detection)
1376             break;
1377           single_shifting = 0;
1378           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1379           if (c >= '(' && c <= '/')
1380             {
1381               /* Designation sequence for a charset of dimension 1.  */
1382               ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
1383               if (c1 < ' ' || c1 >= 0x80
1384                   || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
1385                 /* Invalid designation sequence.  Just ignore.  */
1386                 break;
1387               reg[(c - '(') % 4] = charset;
1388             }
1389           else if (c == '$')
1390             {
1391               /* Designation sequence for a charset of dimension 2.  */
1392               ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1393               if (c >= '@' && c <= 'B')
1394                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
1395                 reg[0] = charset = iso_charset_table[1][0][c];
1396               else if (c >= '(' && c <= '/')
1397                 {
1398                   ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
1399                   if (c1 < ' ' || c1 >= 0x80
1400                       || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
1401                     /* Invalid designation sequence.  Just ignore.  */
1402                     break;
1403                   reg[(c - '(') % 4] = charset;
1404                 }
1405               else
1406                 /* Invalid designation sequence.  Just ignore.  */
1407                 break;
1408             }
1409           else if (c == 'N' || c == 'O')
1410             {
1411               /* ESC <Fe> for SS2 or SS3.  */
1412               mask &= CODING_CATEGORY_MASK_ISO_7_ELSE;
1413               break;
1414             }
1415           else if (c >= '0' && c <= '4')
1416             {
1417               /* ESC <Fp> for start/end composition.  */
1418               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7))
1419                 mask_found |= CODING_CATEGORY_MASK_ISO_7;
1420               else
1421                 mask &= ~CODING_CATEGORY_MASK_ISO_7;
1422               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT))
1423                 mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
1424               else
1425                 mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
1426               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_1))
1427                 mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
1428               else
1429                 mask &= ~CODING_CATEGORY_MASK_ISO_8_1;
1430               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_2))
1431                 mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
1432               else
1433                 mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
1434               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_ELSE))
1435                 mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
1436               else
1437                 mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
1438               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_ELSE))
1439                 mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
1440               else
1441                 mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
1442               break;
1443             }
1444           else
1445             /* Invalid escape sequence.  Just ignore.  */
1446             break;
1447
1448           /* We found a valid designation sequence for CHARSET.  */
1449           mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
1450           c = MAKE_CHAR (charset, 0, 0);
1451           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset, c))
1452             mask_found |= CODING_CATEGORY_MASK_ISO_7;
1453           else
1454             mask &= ~CODING_CATEGORY_MASK_ISO_7;
1455           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset, c))
1456             mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
1457           else
1458             mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
1459           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset, c))
1460             mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
1461           else
1462             mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
1463           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset, c))
1464             mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
1465           else
1466             mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
1467           break;
1468
1469         case ISO_CODE_SO:
1470           if (inhibit_iso_escape_detection)
1471             break;
1472           single_shifting = 0;
1473           if (shift_out == 0
1474               && (reg[1] >= 0
1475                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
1476                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
1477             {
1478               /* Locking shift out.  */
1479               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
1480               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
1481             }
1482           break;
1483
1484         case ISO_CODE_SI:
1485           if (inhibit_iso_escape_detection)
1486             break;
1487           single_shifting = 0;
1488           if (shift_out == 1)
1489             {
1490               /* Locking shift in.  */
1491               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
1492               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
1493             }
1494           break;
1495
1496         case ISO_CODE_CSI:
1497           single_shifting = 0;
1498         case ISO_CODE_SS2:
1499         case ISO_CODE_SS3:
1500           {
1501             int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
1502
1503             if (inhibit_iso_escape_detection)
1504               break;
1505             if (c != ISO_CODE_CSI)
1506               {
1507                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1508                     & CODING_FLAG_ISO_SINGLE_SHIFT)
1509                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1510                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1511                     & CODING_FLAG_ISO_SINGLE_SHIFT)
1512                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1513                 single_shifting = 1;
1514               }
1515             if (VECTORP (Vlatin_extra_code_table)
1516                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
1517               {
1518                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1519                     & CODING_FLAG_ISO_LATIN_EXTRA)
1520                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1521                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1522                     & CODING_FLAG_ISO_LATIN_EXTRA)
1523                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1524               }
1525             mask &= newmask;
1526             mask_found |= newmask;
1527           }
1528           break;
1529
1530         default:
1531           if (c < 0x80)
1532             {
1533               single_shifting = 0;
1534               break;
1535             }
1536           else if (c < 0xA0)
1537             {
1538               single_shifting = 0;
1539               if (VECTORP (Vlatin_extra_code_table)
1540                   && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
1541                 {
1542                   int newmask = 0;
1543
1544                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1545                       & CODING_FLAG_ISO_LATIN_EXTRA)
1546                     newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1547                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1548                       & CODING_FLAG_ISO_LATIN_EXTRA)
1549                     newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1550                   mask &= newmask;
1551                   mask_found |= newmask;
1552                 }
1553               else
1554                 return 0;
1555             }
1556           else
1557             {
1558               mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
1559                         | CODING_CATEGORY_MASK_ISO_7_ELSE);
1560               mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
1561               /* Check the length of succeeding codes of the range
1562                  0xA0..0FF.  If the byte length is odd, we exclude
1563                  CODING_CATEGORY_MASK_ISO_8_2.  We can check this only
1564                  when we are not single shifting.  */
1565               if (!single_shifting
1566                   && mask & CODING_CATEGORY_MASK_ISO_8_2)
1567                 {
1568                   int i = 1;
1569
1570                   c = -1;
1571                   while (src < src_end)
1572                     {
1573                       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1574                       if (c < 0xA0)
1575                         break;
1576                       i++;
1577                     }
1578
1579                   if (i & 1 && src < src_end)
1580                     mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
1581                   else
1582                     mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
1583                   if (c >= 0)
1584                     /* This means that we have read one extra byte.  */
1585                     goto retry;
1586                 }
1587             }
1588           break;
1589         }
1590     }
1591  label_end_of_loop:
1592   return (mask & mask_found);
1593 }
1594
1595 /* Decode a character of which charset is CHARSET, the 1st position
1596    code is C1, the 2nd position code is C2, and return the decoded
1597    character code.  If the variable `translation_table' is non-nil,
1598    returned the translated code.  */
1599
1600 #define DECODE_ISO_CHARACTER(charset, c1, c2)   \
1601   (NILP (translation_table)                     \
1602    ? MAKE_CHAR (charset, c1, c2)                \
1603    : translate_char (translation_table, -1, charset, c1, c2))
1604
1605 /* Set designation state into CODING.  */
1606 #define DECODE_DESIGNATION(reg, dimension, chars, final_char)              \
1607   do {                                                                     \
1608     int charset, c;                                                        \
1609                                                                            \
1610     if (final_char < '0' || final_char >= 128)                             \
1611       goto label_invalid_code;                                             \
1612     charset = ISO_CHARSET_TABLE (make_number (dimension),                  \
1613                                  make_number (chars),                      \
1614                                  make_number (final_char));                \
1615     c = MAKE_CHAR (charset, 0, 0);                                         \
1616     if (charset >= 0                                                       \
1617         && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
1618             || CODING_SAFE_CHAR_P (safe_chars, c)))                        \
1619       {                                                                    \
1620         if (coding->spec.iso2022.last_invalid_designation_register == 0    \
1621             && reg == 0                                                    \
1622             && charset == CHARSET_ASCII)                                   \
1623           {                                                                \
1624             /* We should insert this designation sequence as is so         \
1625                that it is surely written back to a file.  */               \
1626             coding->spec.iso2022.last_invalid_designation_register = -1;   \
1627             goto label_invalid_code;                                       \
1628           }                                                                \
1629         coding->spec.iso2022.last_invalid_designation_register = -1;       \
1630         if ((coding->mode & CODING_MODE_DIRECTION)                         \
1631             && CHARSET_REVERSE_CHARSET (charset) >= 0)                     \
1632           charset = CHARSET_REVERSE_CHARSET (charset);                     \
1633         CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;               \
1634       }                                                                    \
1635     else                                                                   \
1636       {                                                                    \
1637         coding->spec.iso2022.last_invalid_designation_register = reg;      \
1638         goto label_invalid_code;                                           \
1639       }                                                                    \
1640   } while (0)
1641
1642 /* Allocate a memory block for storing information about compositions.
1643    The block is chained to the already allocated blocks.  */
1644
1645 void
1646 coding_allocate_composition_data (coding, char_offset)
1647      struct coding_system *coding;
1648      int char_offset;
1649 {
1650   struct composition_data *cmp_data
1651     = (struct composition_data *) xmalloc (sizeof *cmp_data);
1652
1653   cmp_data->char_offset = char_offset;
1654   cmp_data->used = 0;
1655   cmp_data->prev = coding->cmp_data;
1656   cmp_data->next = NULL;
1657   if (coding->cmp_data)
1658     coding->cmp_data->next = cmp_data;
1659   coding->cmp_data = cmp_data;
1660   coding->cmp_data_start = 0;
1661 }
1662
1663 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
1664    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
1665    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
1666    ESC 3 : altchar composition :  ESC 3 ALT ... ESC 0 CHAR ... ESC 1
1667    ESC 4 : alt&rule composition : ESC 4 ALT RULE .. ALT ESC 0 CHAR ... ESC 1
1668   */
1669
1670 #define DECODE_COMPOSITION_START(c1)                                       \
1671   do {                                                                     \
1672     if (coding->composing == COMPOSITION_DISABLED)                         \
1673       {                                                                    \
1674         *dst++ = ISO_CODE_ESC;                                             \
1675         *dst++ = c1 & 0x7f;                                                \
1676         coding->produced_char += 2;                                        \
1677       }                                                                    \
1678     else if (!COMPOSING_P (coding))                                        \
1679       {                                                                    \
1680         /* This is surely the start of a composition.  We must be sure     \
1681            that coding->cmp_data has enough space to store the             \
1682            information about the composition.  If not, terminate the       \
1683            current decoding loop, allocate one more memory block for       \
1684            coding->cmp_data in the caller, then start the decoding         \
1685            loop again.  We can't allocate memory here directly because     \
1686            it may cause buffer/string relocation.  */                      \
1687         if (!coding->cmp_data                                              \
1688             || (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \
1689                 >= COMPOSITION_DATA_SIZE))                                 \
1690           {                                                                \
1691             coding->result = CODING_FINISH_INSUFFICIENT_CMP;               \
1692             goto label_end_of_loop;                                        \
1693           }                                                                \
1694         coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE              \
1695                              : c1 == '2' ? COMPOSITION_WITH_RULE           \
1696                              : c1 == '3' ? COMPOSITION_WITH_ALTCHARS       \
1697                              : COMPOSITION_WITH_RULE_ALTCHARS);            \
1698         CODING_ADD_COMPOSITION_START (coding, coding->produced_char,       \
1699                                       coding->composing);                  \
1700         coding->composition_rule_follows = 0;                              \
1701       }                                                                    \
1702     else                                                                   \
1703       {                                                                    \
1704         /* We are already handling a composition.  If the method is        \
1705            the following two, the codes following the current escape       \
1706            sequence are actual characters stored in a buffer.  */          \
1707         if (coding->composing == COMPOSITION_WITH_ALTCHARS                 \
1708             || coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)        \
1709           {                                                                \
1710             coding->composing = COMPOSITION_RELATIVE;                      \
1711             coding->composition_rule_follows = 0;                          \
1712           }                                                                \
1713       }                                                                    \
1714   } while (0)
1715
1716 /* Handle composition end sequence ESC 1.  */
1717
1718 #define DECODE_COMPOSITION_END(c1)                                      \
1719   do {                                                                  \
1720     if (! COMPOSING_P (coding))                                         \
1721       {                                                                 \
1722         *dst++ = ISO_CODE_ESC;                                          \
1723         *dst++ = c1;                                                    \
1724         coding->produced_char += 2;                                     \
1725       }                                                                 \
1726     else                                                                \
1727       {                                                                 \
1728         CODING_ADD_COMPOSITION_END (coding, coding->produced_char);     \
1729         coding->composing = COMPOSITION_NO;                             \
1730       }                                                                 \
1731   } while (0)
1732
1733 /* Decode a composition rule from the byte C1 (and maybe one more byte
1734    from SRC) and store one encoded composition rule in
1735    coding->cmp_data.  */
1736
1737 #define DECODE_COMPOSITION_RULE(c1)                                     \
1738   do {                                                                  \
1739     int rule = 0;                                                       \
1740     (c1) -= 32;                                                         \
1741     if (c1 < 81)                /* old format (before ver.21) */        \
1742       {                                                                 \
1743         int gref = (c1) / 9;                                            \
1744         int nref = (c1) % 9;                                            \
1745         if (gref == 4) gref = 10;                                       \
1746         if (nref == 4) nref = 10;                                       \
1747         rule = COMPOSITION_ENCODE_RULE (gref, nref);                    \
1748       }                                                                 \
1749     else if (c1 < 93)           /* new format (after ver.21) */         \
1750       {                                                                 \
1751         ONE_MORE_BYTE (c2);                                             \
1752         rule = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);              \
1753       }                                                                 \
1754     CODING_ADD_COMPOSITION_COMPONENT (coding, rule);                    \
1755     coding->composition_rule_follows = 0;                               \
1756   } while (0)
1757
1758
1759 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1760
1761 static void
1762 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1763      struct coding_system *coding;
1764      unsigned char *source, *destination;
1765      int src_bytes, dst_bytes;
1766 {
1767   unsigned char *src = source;
1768   unsigned char *src_end = source + src_bytes;
1769   unsigned char *dst = destination;
1770   unsigned char *dst_end = destination + dst_bytes;
1771   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
1772   int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1773   int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1774   /* SRC_BASE remembers the start position in source in each loop.
1775      The loop will be exited when there's not enough source code
1776      (within macro ONE_MORE_BYTE), or when there's not enough
1777      destination area to produce a character (within macro
1778      EMIT_CHAR).  */
1779   unsigned char *src_base;
1780   int c, charset;
1781   Lisp_Object translation_table;
1782   Lisp_Object safe_chars;
1783
1784   safe_chars = coding_safe_chars (coding->symbol);
1785
1786   if (NILP (Venable_character_translation))
1787     translation_table = Qnil;
1788   else
1789     {
1790       translation_table = coding->translation_table_for_decode;
1791       if (NILP (translation_table))
1792         translation_table = Vstandard_translation_table_for_decode;
1793     }
1794
1795   coding->result = CODING_FINISH_NORMAL;
1796
1797   while (1)
1798     {
1799       int c1, c2;
1800
1801       src_base = src;
1802       ONE_MORE_BYTE (c1);
1803
1804       /* We produce no character or one character.  */
1805       switch (iso_code_class [c1])
1806         {
1807         case ISO_0x20_or_0x7F:
1808           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1809             {
1810               DECODE_COMPOSITION_RULE (c1);
1811               continue;
1812             }
1813           if (charset0 < 0 || CHARSET_CHARS (charset0) == 94)
1814             {
1815               /* This is SPACE or DEL.  */
1816               charset = CHARSET_ASCII;
1817               break;
1818             }
1819           /* This is a graphic character, we fall down ...  */
1820
1821         case ISO_graphic_plane_0:
1822           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1823             {
1824               DECODE_COMPOSITION_RULE (c1);
1825               continue;
1826             }
1827           charset = charset0;
1828           break;
1829
1830         case ISO_0xA0_or_0xFF:
1831           if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1832               || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1833             goto label_invalid_code;
1834           /* This is a graphic character, we fall down ... */
1835
1836         case ISO_graphic_plane_1:
1837           if (charset1 < 0)
1838             goto label_invalid_code;
1839           charset = charset1;
1840           break;
1841
1842         case ISO_control_0:
1843           if (COMPOSING_P (coding))
1844             DECODE_COMPOSITION_END ('1');
1845
1846           /* All ISO2022 control characters in this class have the
1847              same representation in Emacs internal format.  */
1848           if (c1 == '\n'
1849               && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1850               && (coding->eol_type == CODING_EOL_CR
1851                   || coding->eol_type == CODING_EOL_CRLF))
1852             {
1853               coding->result = CODING_FINISH_INCONSISTENT_EOL;
1854               goto label_end_of_loop;
1855             }
1856           charset = CHARSET_ASCII;
1857           break;
1858
1859         case ISO_control_1:
1860           if (COMPOSING_P (coding))
1861             DECODE_COMPOSITION_END ('1');
1862           goto label_invalid_code;
1863
1864         case ISO_carriage_return:
1865           if (COMPOSING_P (coding))
1866             DECODE_COMPOSITION_END ('1');
1867
1868           if (coding->eol_type == CODING_EOL_CR)
1869             c1 = '\n';
1870           else if (coding->eol_type == CODING_EOL_CRLF)
1871             {
1872               ONE_MORE_BYTE (c1);
1873               if (c1 != ISO_CODE_LF)
1874                 {
1875                   src--;
1876                   c1 = '\r';
1877                 }
1878             }
1879           charset = CHARSET_ASCII;
1880           break;
1881
1882         case ISO_shift_out:
1883           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1884               || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1885             goto label_invalid_code;
1886           CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1887           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1888           continue;
1889
1890         case ISO_shift_in:
1891           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1892             goto label_invalid_code;
1893           CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1894           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1895           continue;
1896
1897         case ISO_single_shift_2_7:
1898         case ISO_single_shift_2:
1899           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1900             goto label_invalid_code;
1901           /* SS2 is handled as an escape sequence of ESC 'N' */
1902           c1 = 'N';
1903           goto label_escape_sequence;
1904
1905         case ISO_single_shift_3:
1906           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1907             goto label_invalid_code;
1908           /* SS2 is handled as an escape sequence of ESC 'O' */
1909           c1 = 'O';
1910           goto label_escape_sequence;
1911
1912         case ISO_control_sequence_introducer:
1913           /* CSI is handled as an escape sequence of ESC '[' ...  */
1914           c1 = '[';
1915           goto label_escape_sequence;
1916
1917         case ISO_escape:
1918           ONE_MORE_BYTE (c1);
1919         label_escape_sequence:
1920           /* Escape sequences handled by Emacs are invocation,
1921              designation, direction specification, and character
1922              composition specification.  */
1923           switch (c1)
1924             {
1925             case '&':           /* revision of following character set */
1926               ONE_MORE_BYTE (c1);
1927               if (!(c1 >= '@' && c1 <= '~'))
1928                 goto label_invalid_code;
1929               ONE_MORE_BYTE (c1);
1930               if (c1 != ISO_CODE_ESC)
1931                 goto label_invalid_code;
1932               ONE_MORE_BYTE (c1);
1933               goto label_escape_sequence;
1934
1935             case '$':           /* designation of 2-byte character set */
1936               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1937                 goto label_invalid_code;
1938               ONE_MORE_BYTE (c1);
1939               if (c1 >= '@' && c1 <= 'B')
1940                 {       /* designation of JISX0208.1978, GB2312.1980,
1941                            or JISX0208.1980 */
1942                   DECODE_DESIGNATION (0, 2, 94, c1);
1943                 }
1944               else if (c1 >= 0x28 && c1 <= 0x2B)
1945                 {       /* designation of DIMENSION2_CHARS94 character set */
1946                   ONE_MORE_BYTE (c2);
1947                   DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
1948                 }
1949               else if (c1 >= 0x2C && c1 <= 0x2F)
1950                 {       /* designation of DIMENSION2_CHARS96 character set */
1951                   ONE_MORE_BYTE (c2);
1952                   DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
1953                 }
1954               else
1955                 goto label_invalid_code;
1956               /* We must update these variables now.  */
1957               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1958               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1959               continue;
1960
1961             case 'n':           /* invocation of locking-shift-2 */
1962               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1963                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1964                 goto label_invalid_code;
1965               CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
1966               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1967               continue;
1968
1969             case 'o':           /* invocation of locking-shift-3 */
1970               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1971                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1972                 goto label_invalid_code;
1973               CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
1974               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1975               continue;
1976
1977             case 'N':           /* invocation of single-shift-2 */
1978               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1979                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1980                 goto label_invalid_code;
1981               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
1982               ONE_MORE_BYTE (c1);
1983               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
1984                 goto label_invalid_code;
1985               break;
1986
1987             case 'O':           /* invocation of single-shift-3 */
1988               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1989                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1990                 goto label_invalid_code;
1991               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
1992               ONE_MORE_BYTE (c1);
1993               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
1994                 goto label_invalid_code;
1995               break;
1996
1997             case '0': case '2': case '3': case '4': /* start composition */
1998               DECODE_COMPOSITION_START (c1);
1999               continue;
2000
2001             case '1':           /* end composition */
2002               DECODE_COMPOSITION_END (c1);
2003               continue;
2004
2005             case '[':           /* specification of direction */
2006               if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
2007                 goto label_invalid_code;
2008               /* For the moment, nested direction is not supported.
2009                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
2010                  left-to-right, and nonzero means right-to-left.  */
2011               ONE_MORE_BYTE (c1);
2012               switch (c1)
2013                 {
2014                 case ']':       /* end of the current direction */
2015                   coding->mode &= ~CODING_MODE_DIRECTION;
2016
2017                 case '0':       /* end of the current direction */
2018                 case '1':       /* start of left-to-right direction */
2019                   ONE_MORE_BYTE (c1);
2020                   if (c1 == ']')
2021                     coding->mode &= ~CODING_MODE_DIRECTION;
2022                   else
2023                     goto label_invalid_code;
2024                   break;
2025
2026                 case '2':       /* start of right-to-left direction */
2027                   ONE_MORE_BYTE (c1);
2028                   if (c1 == ']')
2029                     coding->mode |= CODING_MODE_DIRECTION;
2030                   else
2031                     goto label_invalid_code;
2032                   break;
2033
2034                 default:
2035                   goto label_invalid_code;
2036                 }
2037               continue;
2038
2039             default:
2040               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
2041                 goto label_invalid_code;
2042               if (c1 >= 0x28 && c1 <= 0x2B)
2043                 {       /* designation of DIMENSION1_CHARS94 character set */
2044                   ONE_MORE_BYTE (c2);
2045                   DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
2046                 }
2047               else if (c1 >= 0x2C && c1 <= 0x2F)
2048                 {       /* designation of DIMENSION1_CHARS96 character set */
2049                   ONE_MORE_BYTE (c2);
2050                   DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
2051                 }
2052               else
2053                 goto label_invalid_code;
2054               /* We must update these variables now.  */
2055               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
2056               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
2057               continue;
2058             }
2059         }
2060
2061       /* Now we know CHARSET and 1st position code C1 of a character.
2062          Produce a multibyte sequence for that character while getting
2063          2nd position code C2 if necessary.  */
2064       if (CHARSET_DIMENSION (charset) == 2)
2065         {
2066           ONE_MORE_BYTE (c2);
2067           if (c1 < 0x80 ? c2 < 0x20 || c2 >= 0x80 : c2 < 0xA0)
2068             /* C2 is not in a valid range.  */
2069             goto label_invalid_code;
2070         }
2071       c = DECODE_ISO_CHARACTER (charset, c1, c2);
2072       EMIT_CHAR (c);
2073       continue;
2074
2075     label_invalid_code:
2076       coding->errors++;
2077       if (COMPOSING_P (coding))
2078         DECODE_COMPOSITION_END ('1');
2079       src = src_base;
2080       c = *src++;
2081       EMIT_CHAR (c);
2082     }
2083
2084  label_end_of_loop:
2085   coding->consumed = coding->consumed_char = src_base - source;
2086   coding->produced = dst - destination;
2087   return;
2088 }
2089
2090
2091 /* ISO2022 encoding stuff.  */
2092
2093 /*
2094    It is not enough to say just "ISO2022" on encoding, we have to
2095    specify more details.  In Emacs, each ISO2022 coding system
2096    variant has the following specifications:
2097         1. Initial designation to G0 through G3.
2098         2. Allows short-form designation?
2099         3. ASCII should be designated to G0 before control characters?
2100         4. ASCII should be designated to G0 at end of line?
2101         5. 7-bit environment or 8-bit environment?
2102         6. Use locking-shift?
2103         7. Use Single-shift?
2104    And the following two are only for Japanese:
2105         8. Use ASCII in place of JIS0201-1976-Roman?
2106         9. Use JISX0208-1983 in place of JISX0208-1978?
2107    These specifications are encoded in `coding->flags' as flag bits
2108    defined by macros CODING_FLAG_ISO_XXX.  See `coding.h' for more
2109    details.
2110 */
2111
2112 /* Produce codes (escape sequence) for designating CHARSET to graphic
2113    register REG at DST, and increment DST.  If <final-char> of CHARSET is
2114    '@', 'A', or 'B' and the coding system CODING allows, produce
2115    designation sequence of short-form.  */
2116
2117 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
2118   do {                                                                  \
2119     unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset);        \
2120     char *intermediate_char_94 = "()*+";                                \
2121     char *intermediate_char_96 = ",-./";                                \
2122     int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset);    \
2123                                                                         \
2124     if (revision < 255)                                                 \
2125       {                                                                 \
2126         *dst++ = ISO_CODE_ESC;                                          \
2127         *dst++ = '&';                                                   \
2128         *dst++ = '@' + revision;                                        \
2129       }                                                                 \
2130     *dst++ = ISO_CODE_ESC;                                              \
2131     if (CHARSET_DIMENSION (charset) == 1)                               \
2132       {                                                                 \
2133         if (CHARSET_CHARS (charset) == 94)                              \
2134           *dst++ = (unsigned char) (intermediate_char_94[reg]);         \
2135         else                                                            \
2136           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
2137       }                                                                 \
2138     else                                                                \
2139       {                                                                 \
2140         *dst++ = '$';                                                   \
2141         if (CHARSET_CHARS (charset) == 94)                              \
2142           {                                                             \
2143             if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM)          \
2144                 || reg != 0                                             \
2145                 || final_char < '@' || final_char > 'B')                \
2146               *dst++ = (unsigned char) (intermediate_char_94[reg]);     \
2147           }                                                             \
2148         else                                                            \
2149           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
2150       }                                                                 \
2151     *dst++ = final_char;                                                \
2152     CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;                \
2153   } while (0)
2154
2155 /* The following two macros produce codes (control character or escape
2156    sequence) for ISO2022 single-shift functions (single-shift-2 and
2157    single-shift-3).  */
2158
2159 #define ENCODE_SINGLE_SHIFT_2                           \
2160   do {                                                  \
2161     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
2162       *dst++ = ISO_CODE_ESC, *dst++ = 'N';              \
2163     else                                                \
2164       *dst++ = ISO_CODE_SS2;                            \
2165     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
2166   } while (0)
2167
2168 #define ENCODE_SINGLE_SHIFT_3                           \
2169   do {                                                  \
2170     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
2171       *dst++ = ISO_CODE_ESC, *dst++ = 'O';              \
2172     else                                                \
2173       *dst++ = ISO_CODE_SS3;                            \
2174     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
2175   } while (0)
2176
2177 /* The following four macros produce codes (control character or
2178    escape sequence) for ISO2022 locking-shift functions (shift-in,
2179    shift-out, locking-shift-2, and locking-shift-3).  */
2180
2181 #define ENCODE_SHIFT_IN                         \
2182   do {                                          \
2183     *dst++ = ISO_CODE_SI;                       \
2184     CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
2185   } while (0)
2186
2187 #define ENCODE_SHIFT_OUT                        \
2188   do {                                          \
2189     *dst++ = ISO_CODE_SO;                       \
2190     CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
2191   } while (0)
2192
2193 #define ENCODE_LOCKING_SHIFT_2                  \
2194   do {                                          \
2195     *dst++ = ISO_CODE_ESC, *dst++ = 'n';        \
2196     CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
2197   } while (0)
2198
2199 #define ENCODE_LOCKING_SHIFT_3                  \
2200   do {                                          \
2201     *dst++ = ISO_CODE_ESC, *dst++ = 'o';        \
2202     CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
2203   } while (0)
2204
2205 /* Produce codes for a DIMENSION1 character whose character set is
2206    CHARSET and whose position-code is C1.  Designation and invocation
2207    sequences are also produced in advance if necessary.  */
2208
2209 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
2210   do {                                                                  \
2211     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
2212       {                                                                 \
2213         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
2214           *dst++ = c1 & 0x7F;                                           \
2215         else                                                            \
2216           *dst++ = c1 | 0x80;                                           \
2217         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
2218         break;                                                          \
2219       }                                                                 \
2220     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
2221       {                                                                 \
2222         *dst++ = c1 & 0x7F;                                             \
2223         break;                                                          \
2224       }                                                                 \
2225     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
2226       {                                                                 \
2227         *dst++ = c1 | 0x80;                                             \
2228         break;                                                          \
2229       }                                                                 \
2230     else                                                                \
2231       /* Since CHARSET is not yet invoked to any graphic planes, we     \
2232          must invoke it, or, at first, designate it to some graphic     \
2233          register.  Then repeat the loop to actually produce the        \
2234          character.  */                                                 \
2235       dst = encode_invocation_designation (charset, coding, dst);       \
2236   } while (1)
2237
2238 /* Produce codes for a DIMENSION2 character whose character set is
2239    CHARSET and whose position-codes are C1 and C2.  Designation and
2240    invocation codes are also produced in advance if necessary.  */
2241
2242 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
2243   do {                                                                  \
2244     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
2245       {                                                                 \
2246         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
2247           *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F;                       \
2248         else                                                            \
2249           *dst++ = c1 | 0x80, *dst++ = c2 | 0x80;                       \
2250         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
2251         break;                                                          \
2252       }                                                                 \
2253     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
2254       {                                                                 \
2255         *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F;                          \
2256         break;                                                          \
2257       }                                                                 \
2258     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
2259       {                                                                 \
2260         *dst++ = c1 | 0x80, *dst++= c2 | 0x80;                          \
2261         break;                                                          \
2262       }                                                                 \
2263     else                                                                \
2264       /* Since CHARSET is not yet invoked to any graphic planes, we     \
2265          must invoke it, or, at first, designate it to some graphic     \
2266          register.  Then repeat the loop to actually produce the        \
2267          character.  */                                                 \
2268       dst = encode_invocation_designation (charset, coding, dst);       \
2269   } while (1)
2270
2271 #define ENCODE_ISO_CHARACTER(c)                                 \
2272   do {                                                          \
2273     int charset, c1, c2;                                        \
2274                                                                 \
2275     SPLIT_CHAR (c, charset, c1, c2);                            \
2276     if (CHARSET_DEFINED_P (charset))                            \
2277       {                                                         \
2278         if (CHARSET_DIMENSION (charset) == 1)                   \
2279           {                                                     \
2280             if (charset == CHARSET_ASCII                        \
2281                 && coding->flags & CODING_FLAG_ISO_USE_ROMAN)   \
2282               charset = charset_latin_jisx0201;                 \
2283             ENCODE_ISO_CHARACTER_DIMENSION1 (charset, c1);      \
2284           }                                                     \
2285         else                                                    \
2286           {                                                     \
2287             if (charset == charset_jisx0208                     \
2288                 && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)  \
2289               charset = charset_jisx0208_1978;                  \
2290             ENCODE_ISO_CHARACTER_DIMENSION2 (charset, c1, c2);  \
2291           }                                                     \
2292       }                                                         \
2293     else                                                        \
2294       {                                                         \
2295         *dst++ = c1;                                            \
2296         if (c2 >= 0)                                            \
2297           *dst++ = c2;                                          \
2298       }                                                         \
2299   } while (0)
2300
2301
2302 /* Instead of encoding character C, produce one or two `?'s.  */
2303
2304 #define ENCODE_UNSAFE_CHARACTER(c)                              \
2305   do {                                                          \
2306     ENCODE_ISO_CHARACTER (CODING_REPLACEMENT_CHARACTER);        \
2307     if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1)                   \
2308       ENCODE_ISO_CHARACTER (CODING_REPLACEMENT_CHARACTER);      \
2309   } while (0)
2310
2311
2312 /* Produce designation and invocation codes at a place pointed by DST
2313    to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
2314    Return new DST.  */
2315
2316 unsigned char *
2317 encode_invocation_designation (charset, coding, dst)
2318      int charset;
2319      struct coding_system *coding;
2320      unsigned char *dst;
2321 {
2322   int reg;                      /* graphic register number */
2323
2324   /* At first, check designations.  */
2325   for (reg = 0; reg < 4; reg++)
2326     if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
2327       break;
2328
2329   if (reg >= 4)
2330     {
2331       /* CHARSET is not yet designated to any graphic registers.  */
2332       /* At first check the requested designation.  */
2333       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
2334       if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
2335         /* Since CHARSET requests no special designation, designate it
2336            to graphic register 0.  */
2337         reg = 0;
2338
2339       ENCODE_DESIGNATION (charset, reg, coding);
2340     }
2341
2342   if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
2343       && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
2344     {
2345       /* Since the graphic register REG is not invoked to any graphic
2346          planes, invoke it to graphic plane 0.  */
2347       switch (reg)
2348         {
2349         case 0:                 /* graphic register 0 */
2350           ENCODE_SHIFT_IN;
2351           break;
2352
2353         case 1:                 /* graphic register 1 */
2354           ENCODE_SHIFT_OUT;
2355           break;
2356
2357         case 2:                 /* graphic register 2 */
2358           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2359             ENCODE_SINGLE_SHIFT_2;
2360           else
2361             ENCODE_LOCKING_SHIFT_2;
2362           break;
2363
2364         case 3:                 /* graphic register 3 */
2365           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2366             ENCODE_SINGLE_SHIFT_3;
2367           else
2368             ENCODE_LOCKING_SHIFT_3;
2369           break;
2370         }
2371     }
2372
2373   return dst;
2374 }
2375
2376 /* Produce 2-byte codes for encoded composition rule RULE.  */
2377
2378 #define ENCODE_COMPOSITION_RULE(rule)           \
2379   do {                                          \
2380     int gref, nref;                             \
2381     COMPOSITION_DECODE_RULE (rule, gref, nref); \
2382     *dst++ = 32 + 81 + gref;                    \
2383     *dst++ = 32 + nref;                         \
2384   } while (0)
2385
2386 /* Produce codes for indicating the start of a composition sequence
2387    (ESC 0, ESC 3, or ESC 4).  DATA points to an array of integers
2388    which specify information about the composition.  See the comment
2389    in coding.h for the format of DATA.  */
2390
2391 #define ENCODE_COMPOSITION_START(coding, data)                          \
2392   do {                                                                  \
2393     coding->composing = data[3];                                        \
2394     *dst++ = ISO_CODE_ESC;                                              \
2395     if (coding->composing == COMPOSITION_RELATIVE)                      \
2396       *dst++ = '0';                                                     \
2397     else                                                                \
2398       {                                                                 \
2399         *dst++ = (coding->composing == COMPOSITION_WITH_ALTCHARS        \
2400                   ? '3' : '4');                                         \
2401         coding->cmp_data_index = coding->cmp_data_start + 4;            \
2402         coding->composition_rule_follows = 0;                           \
2403       }                                                                 \
2404   } while (0)
2405
2406 /* Produce codes for indicating the end of the current composition.  */
2407
2408 #define ENCODE_COMPOSITION_END(coding, data)                    \
2409   do {                                                          \
2410     *dst++ = ISO_CODE_ESC;                                      \
2411     *dst++ = '1';                                               \
2412     coding->cmp_data_start += data[0];                          \
2413     coding->composing = COMPOSITION_NO;                         \
2414     if (coding->cmp_data_start == coding->cmp_data->used        \
2415         && coding->cmp_data->next)                              \
2416       {                                                         \
2417         coding->cmp_data = coding->cmp_data->next;              \
2418         coding->cmp_data_start = 0;                             \
2419       }                                                         \
2420   } while (0)
2421
2422 /* Produce composition start sequence ESC 0.  Here, this sequence
2423    doesn't mean the start of a new composition but means that we have
2424    just produced components (alternate chars and composition rules) of
2425    the composition and the actual text follows in SRC.  */
2426
2427 #define ENCODE_COMPOSITION_FAKE_START(coding)   \
2428   do {                                          \
2429     *dst++ = ISO_CODE_ESC;                      \
2430     *dst++ = '0';                               \
2431     coding->composing = COMPOSITION_RELATIVE;   \
2432   } while (0)
2433
2434 /* The following three macros produce codes for indicating direction
2435    of text.  */
2436 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER              \
2437   do {                                                  \
2438     if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS)    \
2439       *dst++ = ISO_CODE_ESC, *dst++ = '[';              \
2440     else                                                \
2441       *dst++ = ISO_CODE_CSI;                            \
2442   } while (0)
2443
2444 #define ENCODE_DIRECTION_R2L    \
2445   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '2', *dst++ = ']'
2446
2447 #define ENCODE_DIRECTION_L2R    \
2448   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '0', *dst++ = ']'
2449
2450 /* Produce codes for designation and invocation to reset the graphic
2451    planes and registers to initial state.  */
2452 #define ENCODE_RESET_PLANE_AND_REGISTER                                     \
2453   do {                                                                      \
2454     int reg;                                                                \
2455     if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                        \
2456       ENCODE_SHIFT_IN;                                                      \
2457     for (reg = 0; reg < 4; reg++)                                           \
2458       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0            \
2459           && (CODING_SPEC_ISO_DESIGNATION (coding, reg)                     \
2460               != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)))        \
2461         ENCODE_DESIGNATION                                                  \
2462           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
2463   } while (0)
2464
2465 /* Produce designation sequences of charsets in the line started from
2466    SRC to a place pointed by DST, and return updated DST.
2467
2468    If the current block ends before any end-of-line, we may fail to
2469    find all the necessary designations.  */
2470
2471 static unsigned char *
2472 encode_designation_at_bol (coding, translation_table, src, src_end, dst)
2473      struct coding_system *coding;
2474      Lisp_Object translation_table;
2475      unsigned char *src, *src_end, *dst;
2476 {
2477   int charset, c, found = 0, reg;
2478   /* Table of charsets to be designated to each graphic register.  */
2479   int r[4];
2480
2481   for (reg = 0; reg < 4; reg++)
2482     r[reg] = -1;
2483
2484   while (found < 4)
2485     {
2486       ONE_MORE_CHAR (c);
2487       if (c == '\n')
2488         break;
2489
2490       charset = CHAR_CHARSET (c);
2491       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
2492       if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
2493         {
2494           found++;
2495           r[reg] = charset;
2496         }
2497     }
2498
2499  label_end_of_loop:
2500   if (found)
2501     {
2502       for (reg = 0; reg < 4; reg++)
2503         if (r[reg] >= 0
2504             && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
2505           ENCODE_DESIGNATION (r[reg], reg, coding);
2506     }
2507
2508   return dst;
2509 }
2510
2511 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
2512
2513 static void
2514 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
2515      struct coding_system *coding;
2516      unsigned char *source, *destination;
2517      int src_bytes, dst_bytes;
2518 {
2519   unsigned char *src = source;
2520   unsigned char *src_end = source + src_bytes;
2521   unsigned char *dst = destination;
2522   unsigned char *dst_end = destination + dst_bytes;
2523   /* Since the maximum bytes produced by each loop is 20, we subtract 19
2524      from DST_END to assure overflow checking is necessary only at the
2525      head of loop.  */
2526   unsigned char *adjusted_dst_end = dst_end - 19;
2527   /* SRC_BASE remembers the start position in source in each loop.
2528      The loop will be exited when there's not enough source text to
2529      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2530      there's not enough destination area to produce encoded codes
2531      (within macro EMIT_BYTES).  */
2532   unsigned char *src_base;
2533   int c;
2534   Lisp_Object translation_table;
2535   Lisp_Object safe_chars;
2536
2537   if (coding->flags & CODING_FLAG_ISO_SAFE)
2538     coding->mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR;
2539
2540   safe_chars = coding_safe_chars (coding->symbol);
2541
2542   if (NILP (Venable_character_translation))
2543     translation_table = Qnil;
2544   else
2545     {
2546       translation_table = coding->translation_table_for_encode;
2547       if (NILP (translation_table))
2548         translation_table = Vstandard_translation_table_for_encode;
2549     }
2550
2551   coding->consumed_char = 0;
2552   coding->errors = 0;
2553   while (1)
2554     {
2555       src_base = src;
2556
2557       if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19)))
2558         {
2559           coding->result = CODING_FINISH_INSUFFICIENT_DST;
2560           break;
2561         }
2562
2563       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
2564           && CODING_SPEC_ISO_BOL (coding))
2565         {
2566           /* We have to produce designation sequences if any now.  */
2567           dst = encode_designation_at_bol (coding, translation_table,
2568                                            src, src_end, dst);
2569           CODING_SPEC_ISO_BOL (coding) = 0;
2570         }
2571
2572       /* Check composition start and end.  */
2573       if (coding->composing != COMPOSITION_DISABLED
2574           && coding->cmp_data_start < coding->cmp_data->used)
2575         {
2576           struct composition_data *cmp_data = coding->cmp_data;
2577           int *data = cmp_data->data + coding->cmp_data_start;
2578           int this_pos = cmp_data->char_offset + coding->consumed_char;
2579
2580           if (coding->composing == COMPOSITION_RELATIVE)
2581             {
2582               if (this_pos == data[2])
2583                 {
2584                   ENCODE_COMPOSITION_END (coding, data);
2585                   cmp_data = coding->cmp_data;
2586                   data = cmp_data->data + coding->cmp_data_start;
2587                 }
2588             }
2589           else if (COMPOSING_P (coding))
2590             {
2591               /* COMPOSITION_WITH_ALTCHARS or COMPOSITION_WITH_RULE_ALTCHAR  */
2592               if (coding->cmp_data_index == coding->cmp_data_start + data[0])
2593                 /* We have consumed components of the composition.
2594                    What follows in SRC is the composition's base
2595                    text.  */
2596                 ENCODE_COMPOSITION_FAKE_START (coding);
2597               else
2598                 {
2599                   int c = cmp_data->data[coding->cmp_data_index++];
2600                   if (coding->composition_rule_follows)
2601                     {
2602                       ENCODE_COMPOSITION_RULE (c);
2603                       coding->composition_rule_follows = 0;
2604                     }
2605                   else
2606                     {
2607                       if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR
2608                           && ! CODING_SAFE_CHAR_P (safe_chars, c))
2609                         ENCODE_UNSAFE_CHARACTER (c);
2610                       else
2611                         ENCODE_ISO_CHARACTER (c);
2612                       if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)
2613                         coding->composition_rule_follows = 1;
2614                     }
2615                   continue;
2616                 }
2617             }
2618           if (!COMPOSING_P (coding))
2619             {
2620               if (this_pos == data[1])
2621                 {
2622                   ENCODE_COMPOSITION_START (coding, data);
2623                   continue;
2624                 }
2625             }
2626         }
2627
2628       ONE_MORE_CHAR (c);
2629
2630       /* Now encode the character C.  */
2631       if (c < 0x20 || c == 0x7F)
2632         {
2633           if (c == '\r')
2634             {
2635               if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2636                 {
2637                   if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2638                     ENCODE_RESET_PLANE_AND_REGISTER;
2639                   *dst++ = c;
2640                   continue;
2641                 }
2642               /* fall down to treat '\r' as '\n' ...  */
2643               c = '\n';
2644             }
2645           if (c == '\n')
2646             {
2647               if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
2648                 ENCODE_RESET_PLANE_AND_REGISTER;
2649               if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
2650                 bcopy (coding->spec.iso2022.initial_designation,
2651                        coding->spec.iso2022.current_designation,
2652                        sizeof coding->spec.iso2022.initial_designation);
2653               if (coding->eol_type == CODING_EOL_LF
2654                   || coding->eol_type == CODING_EOL_UNDECIDED)
2655                 *dst++ = ISO_CODE_LF;
2656               else if (coding->eol_type == CODING_EOL_CRLF)
2657                 *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
2658               else
2659                 *dst++ = ISO_CODE_CR;
2660               CODING_SPEC_ISO_BOL (coding) = 1;
2661             }
2662           else
2663             {
2664               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2665                 ENCODE_RESET_PLANE_AND_REGISTER;
2666               *dst++ = c;
2667             }
2668         }
2669       else if (ASCII_BYTE_P (c))
2670         ENCODE_ISO_CHARACTER (c);
2671       else if (SINGLE_BYTE_CHAR_P (c))
2672         {
2673           *dst++ = c;
2674           coding->errors++;
2675         }
2676       else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR
2677                && ! CODING_SAFE_CHAR_P (safe_chars, c))
2678         ENCODE_UNSAFE_CHARACTER (c);
2679       else
2680         ENCODE_ISO_CHARACTER (c);
2681
2682       coding->consumed_char++;
2683     }
2684
2685  label_end_of_loop:
2686   coding->consumed = src_base - source;
2687   coding->produced = coding->produced_char = dst - destination;
2688 }
2689
2690 \f
2691 /*** 4. SJIS and BIG5 handlers ***/
2692
2693 /* Although SJIS and BIG5 are not ISO coding systems, they are used
2694    quite widely.  So, for the moment, Emacs supports them in the bare
2695    C code.  But, in the future, they may be supported only by CCL.  */
2696
2697 /* SJIS is a coding system encoding three character sets: ASCII, right
2698    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
2699    as is.  A character of charset katakana-jisx0201 is encoded by
2700    "position-code + 0x80".  A character of charset japanese-jisx0208
2701    is encoded in 2-byte but two position-codes are divided and shifted
2702    so that it fits in the range below.
2703
2704    --- CODE RANGE of SJIS ---
2705    (character set)      (range)
2706    ASCII                0x00 .. 0x7F
2707    KATAKANA-JISX0201    0xA1 .. 0xDF
2708    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
2709             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
2710    -------------------------------
2711
2712 */
2713
2714 /* BIG5 is a coding system encoding two character sets: ASCII and
2715    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
2716    character set and is encoded in two bytes.
2717
2718    --- CODE RANGE of BIG5 ---
2719    (character set)      (range)
2720    ASCII                0x00 .. 0x7F
2721    Big5 (1st byte)      0xA1 .. 0xFE
2722         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
2723    --------------------------
2724
2725    Since the number of characters in Big5 is larger than maximum
2726    characters in Emacs' charset (96x96), it can't be handled as one
2727    charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
2728    and `charset-big5-2'.  Both are DIMENSION2 and CHARS94.  The former
2729    contains frequently used characters and the latter contains less
2730    frequently used characters.  */
2731
2732 /* Macros to decode or encode a character of Big5 in BIG5.  B1 and B2
2733    are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2734    C1 and C2 are the 1st and 2nd position-codes of Emacs' internal
2735    format.  CHARSET is `charset_big5_1' or `charset_big5_2'.  */
2736
2737 /* Number of Big5 characters which have the same code in 1st byte.  */
2738 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2739
2740 #define DECODE_BIG5(b1, b2, charset, c1, c2)                            \
2741   do {                                                                  \
2742     unsigned int temp                                                   \
2743       = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62);   \
2744     if (b1 < 0xC9)                                                      \
2745       charset = charset_big5_1;                                         \
2746     else                                                                \
2747       {                                                                 \
2748         charset = charset_big5_2;                                       \
2749         temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW;                          \
2750       }                                                                 \
2751     c1 = temp / (0xFF - 0xA1) + 0x21;                                   \
2752     c2 = temp % (0xFF - 0xA1) + 0x21;                                   \
2753   } while (0)
2754
2755 #define ENCODE_BIG5(charset, c1, c2, b1, b2)                            \
2756   do {                                                                  \
2757     unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21);      \
2758     if (charset == charset_big5_2)                                      \
2759       temp += BIG5_SAME_ROW * (0xC9 - 0xA1);                            \
2760     b1 = temp / BIG5_SAME_ROW + 0xA1;                                   \
2761     b2 = temp % BIG5_SAME_ROW;                                          \
2762     b2 += b2 < 0x3F ? 0x40 : 0x62;                                      \
2763   } while (0)
2764
2765 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2766    Check if a text is encoded in SJIS.  If it is, return
2767    CODING_CATEGORY_MASK_SJIS, else return 0.  */
2768
2769 static int
2770 detect_coding_sjis (src, src_end, multibytep)
2771      unsigned char *src, *src_end;
2772      int multibytep;
2773 {
2774   int c;
2775   /* Dummy for ONE_MORE_BYTE.  */
2776   struct coding_system dummy_coding;
2777   struct coding_system *coding = &dummy_coding;
2778
2779   while (1)
2780     {
2781       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2782       if (c < 0x80)
2783         continue;
2784       if (c == 0x80 || c == 0xA0 || c > 0xEF)
2785         return 0;
2786       if (c <= 0x9F || c >= 0xE0)
2787         {
2788           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2789           if (c < 0x40 || c == 0x7F || c > 0xFC)
2790             return 0;
2791         }
2792     }
2793  label_end_of_loop:
2794   return CODING_CATEGORY_MASK_SJIS;
2795 }
2796
2797 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2798    Check if a text is encoded in BIG5.  If it is, return
2799    CODING_CATEGORY_MASK_BIG5, else return 0.  */
2800
2801 static int
2802 detect_coding_big5 (src, src_end, multibytep)
2803      unsigned char *src, *src_end;
2804      int multibytep;
2805 {
2806   int c;
2807   /* Dummy for ONE_MORE_BYTE.  */
2808   struct coding_system dummy_coding;
2809   struct coding_system *coding = &dummy_coding;
2810
2811   while (1)
2812     {
2813       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2814       if (c < 0x80)
2815         continue;
2816       if (c < 0xA1 || c > 0xFE)
2817         return 0;
2818       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2819       if (c < 0x40 || (c > 0x7F && c < 0xA1) || c > 0xFE)
2820         return 0;
2821     }
2822  label_end_of_loop:
2823   return CODING_CATEGORY_MASK_BIG5;
2824 }
2825
2826 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2827    Check if a text is encoded in UTF-8.  If it is, return
2828    CODING_CATEGORY_MASK_UTF_8, else return 0.  */
2829
2830 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
2831 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
2832 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
2833 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
2834 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
2835 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
2836 #define UTF_8_6_OCTET_LEADING_P(c) (((c) & 0xFE) == 0xFC)
2837
2838 static int
2839 detect_coding_utf_8 (src, src_end, multibytep)
2840      unsigned char *src, *src_end;
2841      int multibytep;
2842 {
2843   unsigned char c;
2844   int seq_maybe_bytes;
2845   /* Dummy for ONE_MORE_BYTE.  */
2846   struct coding_system dummy_coding;
2847   struct coding_system *coding = &dummy_coding;
2848
2849   while (1)
2850     {
2851       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2852       if (UTF_8_1_OCTET_P (c))
2853         continue;
2854       else if (UTF_8_2_OCTET_LEADING_P (c))
2855         seq_maybe_bytes = 1;
2856       else if (UTF_8_3_OCTET_LEADING_P (c))
2857         seq_maybe_bytes = 2;
2858       else if (UTF_8_4_OCTET_LEADING_P (c))
2859         seq_maybe_bytes = 3;
2860       else if (UTF_8_5_OCTET_LEADING_P (c))
2861         seq_maybe_bytes = 4;
2862       else if (UTF_8_6_OCTET_LEADING_P (c))
2863         seq_maybe_bytes = 5;
2864       else
2865         return 0;
2866
2867       do
2868         {
2869           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2870           if (!UTF_8_EXTRA_OCTET_P (c))
2871             return 0;
2872           seq_maybe_bytes--;
2873         }
2874       while (seq_maybe_bytes > 0);
2875     }
2876
2877  label_end_of_loop:
2878   return CODING_CATEGORY_MASK_UTF_8;
2879 }
2880
2881 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2882    Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
2883    Little Endian (otherwise).  If it is, return
2884    CODING_CATEGORY_MASK_UTF_16_BE or CODING_CATEGORY_MASK_UTF_16_LE,
2885    else return 0.  */
2886
2887 #define UTF_16_INVALID_P(val)   \
2888   (((val) == 0xFFFE)            \
2889    || ((val) == 0xFFFF))
2890
2891 #define UTF_16_HIGH_SURROGATE_P(val) \
2892   (((val) & 0xD800) == 0xD800)
2893
2894 #define UTF_16_LOW_SURROGATE_P(val) \
2895   (((val) & 0xDC00) == 0xDC00)
2896
2897 static int
2898 detect_coding_utf_16 (src, src_end, multibytep)
2899      unsigned char *src, *src_end;
2900      int multibytep;
2901 {
2902   unsigned char c1, c2;
2903   /* Dummy for ONE_MORE_BYTE_CHECK_MULTIBYTE.  */
2904   struct coding_system dummy_coding;
2905   struct coding_system *coding = &dummy_coding;
2906
2907   ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
2908   ONE_MORE_BYTE_CHECK_MULTIBYTE (c2, multibytep);
2909
2910   if ((c1 == 0xFF) && (c2 == 0xFE))
2911     return CODING_CATEGORY_MASK_UTF_16_LE;
2912   else if ((c1 == 0xFE) && (c2 == 0xFF))
2913     return CODING_CATEGORY_MASK_UTF_16_BE;
2914
2915  label_end_of_loop:
2916   return 0;
2917 }
2918
2919 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2920    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
2921
2922 static void
2923 decode_coding_sjis_big5 (coding, source, destination,
2924                          src_bytes, dst_bytes, sjis_p)
2925      struct coding_system *coding;
2926      unsigned char *source, *destination;
2927      int src_bytes, dst_bytes;
2928      int sjis_p;
2929 {
2930   unsigned char *src = source;
2931   unsigned char *src_end = source + src_bytes;
2932   unsigned char *dst = destination;
2933   unsigned char *dst_end = destination + dst_bytes;
2934   /* SRC_BASE remembers the start position in source in each loop.
2935      The loop will be exited when there's not enough source code
2936      (within macro ONE_MORE_BYTE), or when there's not enough
2937      destination area to produce a character (within macro
2938      EMIT_CHAR).  */
2939   unsigned char *src_base;
2940   Lisp_Object translation_table;
2941
2942   if (NILP (Venable_character_translation))
2943     translation_table = Qnil;
2944   else
2945     {
2946       translation_table = coding->translation_table_for_decode;
2947       if (NILP (translation_table))
2948         translation_table = Vstandard_translation_table_for_decode;
2949     }
2950
2951   coding->produced_char = 0;
2952   while (1)
2953     {
2954       int c, charset, c1, c2;
2955
2956       src_base = src;
2957       ONE_MORE_BYTE (c1);
2958
2959       if (c1 < 0x80)
2960         {
2961           charset = CHARSET_ASCII;
2962           if (c1 < 0x20)
2963             {
2964               if (c1 == '\r')
2965                 {
2966                   if (coding->eol_type == CODING_EOL_CRLF)
2967                     {
2968                       ONE_MORE_BYTE (c2);
2969                       if (c2 == '\n')
2970                         c1 = c2;
2971                       else
2972                         /* To process C2 again, SRC is subtracted by 1.  */
2973                         src--;
2974                     }
2975                   else if (coding->eol_type == CODING_EOL_CR)
2976                     c1 = '\n';
2977                 }
2978               else if (c1 == '\n'
2979                        && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2980                        && (coding->eol_type == CODING_EOL_CR
2981                            || coding->eol_type == CODING_EOL_CRLF))
2982                 {
2983                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
2984                   goto label_end_of_loop;
2985                 }
2986             }
2987         }
2988       else
2989         {
2990           if (sjis_p)
2991             {
2992               if (c1 == 0x80 || c1 == 0xA0 || c1 > 0xEF)
2993                 goto label_invalid_code;
2994               if (c1 <= 0x9F || c1 >= 0xE0)
2995                 {
2996                   /* SJIS -> JISX0208 */
2997                   ONE_MORE_BYTE (c2);
2998                   if (c2 < 0x40 || c2 == 0x7F || c2 > 0xFC)
2999                     goto label_invalid_code;
3000                   DECODE_SJIS (c1, c2, c1, c2);
3001                   charset = charset_jisx0208;
3002                 }
3003               else
3004                 /* SJIS -> JISX0201-Kana */
3005                 charset = charset_katakana_jisx0201;
3006             }
3007           else
3008             {
3009               /* BIG5 -> Big5 */
3010               if (c1 < 0xA0 || c1 > 0xFE)
3011                 goto label_invalid_code;
3012               ONE_MORE_BYTE (c2);
3013               if (c2 < 0x40 || (c2 > 0x7E && c2 < 0xA1) || c2 > 0xFE)
3014                 goto label_invalid_code;
3015               DECODE_BIG5 (c1, c2, charset, c1, c2);
3016             }
3017         }
3018
3019       c = DECODE_ISO_CHARACTER (charset, c1, c2);
3020       EMIT_CHAR (c);
3021       continue;
3022
3023     label_invalid_code:
3024       coding->errors++;
3025       src = src_base;
3026       c = *src++;
3027       EMIT_CHAR (c);
3028     }
3029
3030  label_end_of_loop:
3031   coding->consumed = coding->consumed_char = src_base - source;
3032   coding->produced = dst - destination;
3033   return;
3034 }
3035
3036 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
3037    This function can encode charsets `ascii', `katakana-jisx0201',
3038    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
3039    are sure that all these charsets are registered as official charset
3040    (i.e. do not have extended leading-codes).  Characters of other
3041    charsets are produced without any encoding.  If SJIS_P is 1, encode
3042    SJIS text, else encode BIG5 text.  */
3043
3044 static void
3045 encode_coding_sjis_big5 (coding, source, destination,
3046                          src_bytes, dst_bytes, sjis_p)
3047      struct coding_system *coding;
3048      unsigned char *source, *destination;
3049      int src_bytes, dst_bytes;
3050      int sjis_p;
3051 {
3052   unsigned char *src = source;
3053   unsigned char *src_end = source + src_bytes;
3054   unsigned char *dst = destination;
3055   unsigned char *dst_end = destination + dst_bytes;
3056   /* SRC_BASE remembers the start position in source in each loop.
3057      The loop will be exited when there's not enough source text to
3058      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
3059      there's not enough destination area to produce encoded codes
3060      (within macro EMIT_BYTES).  */
3061   unsigned char *src_base;
3062   Lisp_Object translation_table;
3063
3064   if (NILP (Venable_character_translation))
3065     translation_table = Qnil;
3066   else
3067     {
3068       translation_table = coding->translation_table_for_encode;
3069       if (NILP (translation_table))
3070         translation_table = Vstandard_translation_table_for_encode;
3071     }
3072
3073   while (1)
3074     {
3075       int c, charset, c1, c2;
3076
3077       src_base = src;
3078       ONE_MORE_CHAR (c);
3079
3080       /* Now encode the character C.  */
3081       if (SINGLE_BYTE_CHAR_P (c))
3082         {
3083           switch (c)
3084             {
3085             case '\r':
3086               if (!(coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
3087                 {
3088                   EMIT_ONE_BYTE (c);
3089                   break;
3090                 }
3091               c = '\n';
3092             case '\n':
3093               if (coding->eol_type == CODING_EOL_CRLF)
3094                 {
3095                   EMIT_TWO_BYTES ('\r', c);
3096                   break;
3097                 }
3098               else if (coding->eol_type == CODING_EOL_CR)
3099                 c = '\r';
3100             default:
3101               EMIT_ONE_BYTE (c);
3102             }
3103         }
3104       else
3105         {
3106           SPLIT_CHAR (c, charset, c1, c2);
3107           if (sjis_p)
3108             {
3109               if (charset == charset_jisx0208
3110                   || charset == charset_jisx0208_1978)
3111                 {
3112                   ENCODE_SJIS (c1, c2, c1, c2);
3113                   EMIT_TWO_BYTES (c1, c2);
3114                 }
3115               else if (charset == charset_katakana_jisx0201)
3116                 EMIT_ONE_BYTE (c1 | 0x80);
3117               else if (charset == charset_latin_jisx0201)
3118                 EMIT_ONE_BYTE (c1);
3119               else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR)
3120                 {
3121                   EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3122                   if (CHARSET_WIDTH (charset) > 1)
3123                     EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3124                 }
3125               else
3126                 /* There's no way other than producing the internal
3127                    codes as is.  */
3128                 EMIT_BYTES (src_base, src);
3129             }
3130           else
3131             {
3132               if (charset == charset_big5_1 || charset == charset_big5_2)
3133                 {
3134                   ENCODE_BIG5 (charset, c1, c2, c1, c2);
3135                   EMIT_TWO_BYTES (c1, c2);
3136                 }
3137               else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR)
3138                 {
3139                   EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3140                   if (CHARSET_WIDTH (charset) > 1)
3141                     EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3142                 }
3143               else
3144                 /* There's no way other than producing the internal
3145                    codes as is.  */
3146                 EMIT_BYTES (src_base, src);
3147             }
3148         }
3149       coding->consumed_char++;
3150     }
3151
3152  label_end_of_loop:
3153   coding->consumed = src_base - source;
3154   coding->produced = coding->produced_char = dst - destination;
3155 }
3156
3157 \f
3158 /*** 5. CCL handlers ***/
3159
3160 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3161    Check if a text is encoded in a coding system of which
3162    encoder/decoder are written in CCL program.  If it is, return
3163    CODING_CATEGORY_MASK_CCL, else return 0.  */
3164
3165 static int
3166 detect_coding_ccl (src, src_end, multibytep)
3167      unsigned char *src, *src_end;
3168      int multibytep;
3169 {
3170   unsigned char *valid;
3171   int c;
3172   /* Dummy for ONE_MORE_BYTE.  */
3173   struct coding_system dummy_coding;
3174   struct coding_system *coding = &dummy_coding;
3175
3176   /* No coding system is assigned to coding-category-ccl.  */
3177   if (!coding_system_table[CODING_CATEGORY_IDX_CCL])
3178     return 0;
3179
3180   valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
3181   while (1)
3182     {
3183       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
3184       if (! valid[c])
3185         return 0;
3186     }
3187  label_end_of_loop:
3188   return CODING_CATEGORY_MASK_CCL;
3189 }
3190
3191 \f
3192 /*** 6. End-of-line handlers ***/
3193
3194 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
3195
3196 static void
3197 decode_eol (coding, source, destination, src_bytes, dst_bytes)
3198      struct coding_system *coding;
3199      unsigned char *source, *destination;
3200      int src_bytes, dst_bytes;
3201 {
3202   unsigned char *src = source;
3203   unsigned char *dst = destination;
3204   unsigned char *src_end = src + src_bytes;
3205   unsigned char *dst_end = dst + dst_bytes;
3206   Lisp_Object translation_table;
3207   /* SRC_BASE remembers the start position in source in each loop.
3208      The loop will be exited when there's not enough source code
3209      (within macro ONE_MORE_BYTE), or when there's not enough
3210      destination area to produce a character (within macro
3211      EMIT_CHAR).  */
3212   unsigned char *src_base;
3213   int c;
3214
3215   translation_table = Qnil;
3216   switch (coding->eol_type)
3217     {
3218     case CODING_EOL_CRLF:
3219       while (1)
3220         {
3221           src_base = src;
3222           ONE_MORE_BYTE (c);
3223           if (c == '\r')
3224             {
3225               ONE_MORE_BYTE (c);
3226               if (c != '\n')
3227                 {
3228                   src--;
3229                   c = '\r';
3230                 }
3231             }
3232           else if (c == '\n'
3233                    && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
3234             {
3235               coding->result = CODING_FINISH_INCONSISTENT_EOL;
3236               goto label_end_of_loop;
3237             }
3238           EMIT_CHAR (c);
3239         }
3240       break;
3241
3242     case CODING_EOL_CR:
3243       while (1)
3244         {
3245           src_base = src;
3246           ONE_MORE_BYTE (c);
3247           if (c == '\n')
3248             {
3249               if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
3250                 {
3251                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
3252                   goto label_end_of_loop;
3253                 }
3254             }
3255           else if (c == '\r')
3256             c = '\n';
3257           EMIT_CHAR (c);
3258         }
3259       break;
3260
3261     default:                    /* no need for EOL handling */
3262       while (1)
3263         {
3264           src_base = src;
3265           ONE_MORE_BYTE (c);
3266           EMIT_CHAR (c);
3267         }
3268     }
3269
3270  label_end_of_loop:
3271   coding->consumed = coding->consumed_char = src_base - source;
3272   coding->produced = dst - destination;
3273   return;
3274 }
3275
3276 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  Encode
3277    format of end-of-line according to `coding->eol_type'.  It also
3278    convert multibyte form 8-bit characters to unibyte if
3279    CODING->src_multibyte is nonzero.  If `coding->mode &
3280    CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code '\r' in source text
3281    also means end-of-line.  */
3282
3283 static void
3284 encode_eol (coding, source, destination, src_bytes, dst_bytes)
3285      struct coding_system *coding;
3286      const unsigned char *source;
3287      unsigned char *destination;
3288      int src_bytes, dst_bytes;
3289 {
3290   const unsigned char *src = source;
3291   unsigned char *dst = destination;
3292   const unsigned char *src_end = src + src_bytes;
3293   unsigned char *dst_end = dst + dst_bytes;
3294   Lisp_Object translation_table;
3295   /* SRC_BASE remembers the start position in source in each loop.
3296      The loop will be exited when there's not enough source text to
3297      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
3298      there's not enough destination area to produce encoded codes
3299      (within macro EMIT_BYTES).  */
3300   const unsigned char *src_base;
3301   unsigned char *tmp;
3302   int c;
3303   int selective_display = coding->mode & CODING_MODE_SELECTIVE_DISPLAY;
3304
3305   translation_table = Qnil;
3306   if (coding->src_multibyte
3307       && *(src_end - 1) == LEADING_CODE_8_BIT_CONTROL)
3308     {
3309       src_end--;
3310       src_bytes--;
3311       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
3312     }
3313
3314   if (coding->eol_type == CODING_EOL_CRLF)
3315     {
3316       while (src < src_end)
3317         {
3318           src_base = src;
3319           c = *src++;
3320           if (c >= 0x20)
3321             EMIT_ONE_BYTE (c);
3322           else if (c == '\n' || (c == '\r' && selective_display))
3323             EMIT_TWO_BYTES ('\r', '\n');
3324           else
3325             EMIT_ONE_BYTE (c);
3326         }
3327       src_base = src;
3328     label_end_of_loop:
3329       ;
3330     }
3331   else
3332     {
3333       if (!dst_bytes || src_bytes <= dst_bytes)
3334         {
3335           safe_bcopy (src, dst, src_bytes);
3336           src_base = src_end;
3337           dst += src_bytes;
3338         }
3339       else
3340         {
3341           if (coding->src_multibyte
3342               && *(src + dst_bytes - 1) == LEADING_CODE_8_BIT_CONTROL)
3343             dst_bytes--;
3344           safe_bcopy (src, dst, dst_bytes);
3345           src_base = src + dst_bytes;
3346           dst = destination + dst_bytes;
3347           coding->result = CODING_FINISH_INSUFFICIENT_DST;
3348         }
3349       if (coding->eol_type == CODING_EOL_CR)
3350         {
3351           for (tmp = destination; tmp < dst; tmp++)
3352             if (*tmp == '\n') *tmp = '\r';
3353         }
3354       else if (selective_display)
3355         {
3356           for (tmp = destination; tmp < dst; tmp++)
3357             if (*tmp == '\r') *tmp = '\n';
3358         }
3359     }
3360   if (coding->src_multibyte)
3361     dst = destination + str_as_unibyte (destination, dst - destination);
3362
3363   coding->consumed = src_base - source;
3364   coding->produced = dst - destination;
3365   coding->produced_char = coding->produced;
3366 }
3367
3368 \f
3369 /*** 7. C library functions ***/
3370
3371 /* In Emacs Lisp, a coding system is represented by a Lisp symbol which
3372    has a property `coding-system'.  The value of this property is a
3373    vector of length 5 (called the coding-vector).  Among elements of
3374    this vector, the first (element[0]) and the fifth (element[4])
3375    carry important information for decoding/encoding.  Before
3376    decoding/encoding, this information should be set in fields of a
3377    structure of type `coding_system'.
3378
3379    The value of the property `coding-system' can be a symbol of another
3380    subsidiary coding-system.  In that case, Emacs gets coding-vector
3381    from that symbol.
3382
3383    `element[0]' contains information to be set in `coding->type'.  The
3384    value and its meaning is as follows:
3385
3386    0 -- coding_type_emacs_mule
3387    1 -- coding_type_sjis
3388    2 -- coding_type_iso2022
3389    3 -- coding_type_big5
3390    4 -- coding_type_ccl encoder/decoder written in CCL
3391    nil -- coding_type_no_conversion
3392    t -- coding_type_undecided (automatic conversion on decoding,
3393                                no-conversion on encoding)
3394
3395    `element[4]' contains information to be set in `coding->flags' and
3396    `coding->spec'.  The meaning varies by `coding->type'.
3397
3398    If `coding->type' is `coding_type_iso2022', element[4] is a vector
3399    of length 32 (of which the first 13 sub-elements are used now).
3400    Meanings of these sub-elements are:
3401
3402    sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
3403         If the value is an integer of valid charset, the charset is
3404         assumed to be designated to graphic register N initially.
3405
3406         If the value is minus, it is a minus value of charset which
3407         reserves graphic register N, which means that the charset is
3408         not designated initially but should be designated to graphic
3409         register N just before encoding a character in that charset.
3410
3411         If the value is nil, graphic register N is never used on
3412         encoding.
3413
3414    sub-element[N] where N is 4 through 11: to be set in `coding->flags'
3415         Each value takes t or nil.  See the section ISO2022 of
3416         `coding.h' for more information.
3417
3418    If `coding->type' is `coding_type_big5', element[4] is t to denote
3419    BIG5-ETen or nil to denote BIG5-HKU.
3420
3421    If `coding->type' takes the other value, element[4] is ignored.
3422
3423    Emacs Lisp's coding systems also carry information about format of
3424    end-of-line in a value of property `eol-type'.  If the value is
3425    integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
3426    means CODING_EOL_CR.  If it is not integer, it should be a vector
3427    of subsidiary coding systems of which property `eol-type' has one
3428    of the above values.
3429
3430 */
3431
3432 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
3433    and set it in CODING.  If CODING_SYSTEM_SYMBOL is invalid, CODING
3434    is setup so that no conversion is necessary and return -1, else
3435    return 0.  */
3436
3437 int
3438 setup_coding_system (coding_system, coding)
3439      Lisp_Object coding_system;
3440      struct coding_system *coding;
3441 {
3442   Lisp_Object coding_spec, coding_type, eol_type, plist;
3443   Lisp_Object val;
3444
3445   /* At first, zero clear all members.  */
3446   bzero (coding, sizeof (struct coding_system));
3447
3448   /* Initialize some fields required for all kinds of coding systems.  */
3449   coding->symbol = coding_system;
3450   coding->heading_ascii = -1;
3451   coding->post_read_conversion = coding->pre_write_conversion = Qnil;
3452   coding->composing = COMPOSITION_DISABLED;
3453   coding->cmp_data = NULL;
3454
3455   if (NILP (coding_system))
3456     goto label_invalid_coding_system;
3457
3458   coding_spec = Fget (coding_system, Qcoding_system);
3459
3460   if (!VECTORP (coding_spec)
3461       || XVECTOR (coding_spec)->size != 5
3462       || !CONSP (XVECTOR (coding_spec)->contents[3]))
3463     goto label_invalid_coding_system;
3464
3465   eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
3466   if (VECTORP (eol_type))
3467     {
3468       coding->eol_type = CODING_EOL_UNDECIDED;
3469       coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
3470     }
3471   else if (XFASTINT (eol_type) == 1)
3472     {
3473       coding->eol_type = CODING_EOL_CRLF;
3474       coding->common_flags
3475         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3476     }
3477   else if (XFASTINT (eol_type) == 2)
3478     {
3479       coding->eol_type = CODING_EOL_CR;
3480       coding->common_flags
3481         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3482     }
3483   else
3484     coding->eol_type = CODING_EOL_LF;
3485
3486   coding_type = XVECTOR (coding_spec)->contents[0];
3487   /* Try short cut.  */
3488   if (SYMBOLP (coding_type))
3489     {
3490       if (EQ (coding_type, Qt))
3491         {
3492           coding->type = coding_type_undecided;
3493           coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
3494         }
3495       else
3496         coding->type = coding_type_no_conversion;
3497       /* Initialize this member.  Any thing other than
3498          CODING_CATEGORY_IDX_UTF_16_BE and
3499          CODING_CATEGORY_IDX_UTF_16_LE are ok because they have
3500          special treatment in detect_eol.  */
3501       coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
3502
3503       return 0;
3504     }
3505
3506   /* Get values of coding system properties:
3507      `post-read-conversion', `pre-write-conversion',
3508      `translation-table-for-decode', `translation-table-for-encode'.  */
3509   plist = XVECTOR (coding_spec)->contents[3];
3510   /* Pre & post conversion functions should be disabled if
3511      inhibit_eol_conversion is nonzero.  This is the case that a code
3512      conversion function is called while those functions are running.  */
3513   if (! inhibit_pre_post_conversion)
3514     {
3515       coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
3516       coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
3517     }
3518   val = Fplist_get (plist, Qtranslation_table_for_decode);
3519   if (SYMBOLP (val))
3520     val = Fget (val, Qtranslation_table_for_decode);
3521   coding->translation_table_for_decode = CHAR_TABLE_P (val) ? val : Qnil;
3522   val = Fplist_get (plist, Qtranslation_table_for_encode);
3523   if (SYMBOLP (val))
3524     val = Fget (val, Qtranslation_table_for_encode);
3525   coding->translation_table_for_encode = CHAR_TABLE_P (val) ? val : Qnil;
3526   val = Fplist_get (plist, Qcoding_category);
3527   if (!NILP (val))
3528     {
3529       val = Fget (val, Qcoding_category_index);
3530       if (INTEGERP (val))
3531         coding->category_idx = XINT (val);
3532       else
3533         goto label_invalid_coding_system;
3534     }
3535   else
3536     goto label_invalid_coding_system;
3537
3538   /* If the coding system has non-nil `composition' property, enable
3539      composition handling.  */
3540   val = Fplist_get (plist, Qcomposition);
3541   if (!NILP (val))
3542     coding->composing = COMPOSITION_NO;
3543
3544   switch (XFASTINT (coding_type))
3545     {
3546     case 0:
3547       coding->type = coding_type_emacs_mule;
3548       coding->common_flags
3549         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3550       if (!NILP (coding->post_read_conversion))
3551         coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
3552       if (!NILP (coding->pre_write_conversion))
3553         coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
3554       break;
3555
3556     case 1:
3557       coding->type = coding_type_sjis;
3558       coding->common_flags
3559         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3560       break;
3561
3562     case 2:
3563       coding->type = coding_type_iso2022;
3564       coding->common_flags
3565         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3566       {
3567         Lisp_Object val, temp;
3568         Lisp_Object *flags;
3569         int i, charset, reg_bits = 0;
3570
3571         val = XVECTOR (coding_spec)->contents[4];
3572
3573         if (!VECTORP (val) || XVECTOR (val)->size != 32)
3574           goto label_invalid_coding_system;
3575
3576         flags = XVECTOR (val)->contents;
3577         coding->flags
3578           = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
3579              | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
3580              | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
3581              | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
3582              | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
3583              | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
3584              | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
3585              | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
3586              | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
3587              | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
3588              | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
3589              | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
3590              | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
3591              );
3592
3593         /* Invoke graphic register 0 to plane 0.  */
3594         CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
3595         /* Invoke graphic register 1 to plane 1 if we can use full 8-bit.  */
3596         CODING_SPEC_ISO_INVOCATION (coding, 1)
3597           = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
3598         /* Not single shifting at first.  */
3599         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
3600         /* Beginning of buffer should also be regarded as bol. */
3601         CODING_SPEC_ISO_BOL (coding) = 1;
3602
3603         for (charset = 0; charset <= MAX_CHARSET; charset++)
3604           CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
3605         val = Vcharset_revision_alist;
3606         while (CONSP (val))
3607           {
3608             charset = get_charset_id (Fcar_safe (XCAR (val)));
3609             if (charset >= 0
3610                 && (temp = Fcdr_safe (XCAR (val)), INTEGERP (temp))
3611                 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
3612               CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
3613             val = XCDR (val);
3614           }
3615
3616         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
3617            FLAGS[REG] can be one of below:
3618                 integer CHARSET: CHARSET occupies register I,
3619                 t: designate nothing to REG initially, but can be used
3620                   by any charsets,
3621                 list of integer, nil, or t: designate the first
3622                   element (if integer) to REG initially, the remaining
3623                   elements (if integer) is designated to REG on request,
3624                   if an element is t, REG can be used by any charsets,
3625                 nil: REG is never used.  */
3626         for (charset = 0; charset <= MAX_CHARSET; charset++)
3627           CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3628             = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
3629         for (i = 0; i < 4; i++)
3630           {
3631             if ((INTEGERP (flags[i])
3632                  && (charset = XINT (flags[i]), CHARSET_VALID_P (charset)))
3633                 || (charset = get_charset_id (flags[i])) >= 0)
3634               {
3635                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3636                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
3637               }
3638             else if (EQ (flags[i], Qt))
3639               {
3640                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3641                 reg_bits |= 1 << i;
3642                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3643               }
3644             else if (CONSP (flags[i]))
3645               {
3646                 Lisp_Object tail;
3647                 tail = flags[i];
3648
3649                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3650                 if ((INTEGERP (XCAR (tail))
3651                      && (charset = XINT (XCAR (tail)),
3652                          CHARSET_VALID_P (charset)))
3653                     || (charset = get_charset_id (XCAR (tail))) >= 0)
3654                   {
3655                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3656                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
3657                   }
3658                 else
3659                   CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3660                 tail = XCDR (tail);
3661                 while (CONSP (tail))
3662                   {
3663                     if ((INTEGERP (XCAR (tail))
3664                          && (charset = XINT (XCAR (tail)),
3665                              CHARSET_VALID_P (charset)))
3666                         || (charset = get_charset_id (XCAR (tail))) >= 0)
3667                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3668                         = i;
3669                     else if (EQ (XCAR (tail), Qt))
3670                       reg_bits |= 1 << i;
3671                     tail = XCDR (tail);
3672                   }
3673               }
3674             else
3675               CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3676
3677             CODING_SPEC_ISO_DESIGNATION (coding, i)
3678               = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
3679           }
3680
3681         if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
3682           {
3683             /* REG 1 can be used only by locking shift in 7-bit env.  */
3684             if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
3685               reg_bits &= ~2;
3686             if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
3687               /* Without any shifting, only REG 0 and 1 can be used.  */
3688               reg_bits &= 3;
3689           }
3690
3691         if (reg_bits)
3692           for (charset = 0; charset <= MAX_CHARSET; charset++)
3693             {
3694               if (CHARSET_DEFINED_P (charset)
3695                   && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3696                       == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
3697                 {
3698                   /* There exist some default graphic registers to be
3699                      used by CHARSET.  */
3700
3701                   /* We had better avoid designating a charset of
3702                      CHARS96 to REG 0 as far as possible.  */
3703                   if (CHARSET_CHARS (charset) == 96)
3704                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3705                       = (reg_bits & 2
3706                          ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
3707                   else
3708                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3709                       = (reg_bits & 1
3710                          ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
3711                 }
3712             }
3713       }
3714       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3715       coding->spec.iso2022.last_invalid_designation_register = -1;
3716       break;
3717
3718     case 3:
3719       coding->type = coding_type_big5;
3720       coding->common_flags
3721         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3722       coding->flags
3723         = (NILP (XVECTOR (coding_spec)->contents[4])
3724            ? CODING_FLAG_BIG5_HKU
3725            : CODING_FLAG_BIG5_ETEN);
3726       break;
3727
3728     case 4:
3729       coding->type = coding_type_ccl;
3730       coding->common_flags
3731         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3732       {
3733         val = XVECTOR (coding_spec)->contents[4];
3734         if (! CONSP (val)
3735             || setup_ccl_program (&(coding->spec.ccl.decoder),
3736                                   XCAR (val)) < 0
3737             || setup_ccl_program (&(coding->spec.ccl.encoder),
3738                                   XCDR (val)) < 0)
3739           goto label_invalid_coding_system;
3740
3741         bzero (coding->spec.ccl.valid_codes, 256);
3742         val = Fplist_get (plist, Qvalid_codes);
3743         if (CONSP (val))
3744           {
3745             Lisp_Object this;
3746
3747             for (; CONSP (val); val = XCDR (val))
3748               {
3749                 this = XCAR (val);
3750                 if (INTEGERP (this)
3751                     && XINT (this) >= 0 && XINT (this) < 256)
3752                   coding->spec.ccl.valid_codes[XINT (this)] = 1;
3753                 else if (CONSP (this)
3754                          && INTEGERP (XCAR (this))
3755                          && INTEGERP (XCDR (this)))
3756                   {
3757                     int start = XINT (XCAR (this));
3758                     int end = XINT (XCDR (this));
3759
3760                     if (start >= 0 && start <= end && end < 256)
3761                       while (start <= end)
3762                         coding->spec.ccl.valid_codes[start++] = 1;
3763                   }
3764               }
3765           }
3766       }
3767       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3768       coding->spec.ccl.cr_carryover = 0;
3769       coding->spec.ccl.eight_bit_carryover[0] = 0;
3770       break;
3771
3772     case 5:
3773       coding->type = coding_type_raw_text;
3774       break;
3775
3776     default:
3777       goto label_invalid_coding_system;
3778     }
3779   return 0;
3780
3781  label_invalid_coding_system:
3782   coding->type = coding_type_no_conversion;
3783   coding->category_idx = CODING_CATEGORY_IDX_BINARY;
3784   coding->common_flags = 0;
3785   coding->eol_type = CODING_EOL_LF;
3786   coding->pre_write_conversion = coding->post_read_conversion = Qnil;
3787   return -1;
3788 }
3789
3790 /* Free memory blocks allocated for storing composition information.  */
3791
3792 void
3793 coding_free_composition_data (coding)
3794      struct coding_system *coding;
3795 {
3796   struct composition_data *cmp_data = coding->cmp_data, *next;
3797
3798   if (!cmp_data)
3799     return;
3800   /* Memory blocks are chained.  At first, rewind to the first, then,
3801      free blocks one by one.  */
3802   while (cmp_data->prev)
3803     cmp_data = cmp_data->prev;
3804   while (cmp_data)
3805     {
3806       next = cmp_data->next;
3807       xfree (cmp_data);
3808       cmp_data = next;
3809     }
3810   coding->cmp_data = NULL;
3811 }
3812
3813 /* Set `char_offset' member of all memory blocks pointed by
3814    coding->cmp_data to POS.  */
3815
3816 void
3817 coding_adjust_composition_offset (coding, pos)
3818      struct coding_system *coding;
3819      int pos;
3820 {
3821   struct composition_data *cmp_data;
3822
3823   for (cmp_data = coding->cmp_data; cmp_data; cmp_data = cmp_data->next)
3824     cmp_data->char_offset = pos;
3825 }
3826
3827 /* Setup raw-text or one of its subsidiaries in the structure
3828    coding_system CODING according to the already setup value eol_type
3829    in CODING.  CODING should be setup for some coding system in
3830    advance.  */
3831
3832 void
3833 setup_raw_text_coding_system (coding)
3834      struct coding_system *coding;
3835 {
3836   if (coding->type != coding_type_raw_text)
3837     {
3838       coding->symbol = Qraw_text;
3839       coding->type = coding_type_raw_text;
3840       if (coding->eol_type != CODING_EOL_UNDECIDED)
3841         {
3842           Lisp_Object subsidiaries;
3843           subsidiaries = Fget (Qraw_text, Qeol_type);
3844
3845           if (VECTORP (subsidiaries)
3846               && XVECTOR (subsidiaries)->size == 3)
3847             coding->symbol
3848               = XVECTOR (subsidiaries)->contents[coding->eol_type];
3849         }
3850       setup_coding_system (coding->symbol, coding);
3851     }
3852   return;
3853 }
3854
3855 /* Emacs has a mechanism to automatically detect a coding system if it
3856    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
3857    it's impossible to distinguish some coding systems accurately
3858    because they use the same range of codes.  So, at first, coding
3859    systems are categorized into 7, those are:
3860
3861    o coding-category-emacs-mule
3862
3863         The category for a coding system which has the same code range
3864         as Emacs' internal format.  Assigned the coding-system (Lisp
3865         symbol) `emacs-mule' by default.
3866
3867    o coding-category-sjis
3868
3869         The category for a coding system which has the same code range
3870         as SJIS.  Assigned the coding-system (Lisp
3871         symbol) `japanese-shift-jis' by default.
3872
3873    o coding-category-iso-7
3874
3875         The category for a coding system which has the same code range
3876         as ISO2022 of 7-bit environment.  This doesn't use any locking
3877         shift and single shift functions.  This can encode/decode all
3878         charsets.  Assigned the coding-system (Lisp symbol)
3879         `iso-2022-7bit' by default.
3880
3881    o coding-category-iso-7-tight
3882
3883         Same as coding-category-iso-7 except that this can
3884         encode/decode only the specified charsets.
3885
3886    o coding-category-iso-8-1
3887
3888         The category for a coding system which has the same code range
3889         as ISO2022 of 8-bit environment and graphic plane 1 used only
3890         for DIMENSION1 charset.  This doesn't use any locking shift
3891         and single shift functions.  Assigned the coding-system (Lisp
3892         symbol) `iso-latin-1' by default.
3893
3894    o coding-category-iso-8-2
3895
3896         The category for a coding system which has the same code range
3897         as ISO2022 of 8-bit environment and graphic plane 1 used only
3898         for DIMENSION2 charset.  This doesn't use any locking shift
3899         and single shift functions.  Assigned the coding-system (Lisp
3900         symbol) `japanese-iso-8bit' by default.
3901
3902    o coding-category-iso-7-else
3903
3904         The category for a coding system which has the same code range
3905         as ISO2022 of 7-bit environment but uses locking shift or
3906         single shift functions.  Assigned the coding-system (Lisp
3907         symbol) `iso-2022-7bit-lock' by default.
3908
3909    o coding-category-iso-8-else
3910
3911         The category for a coding system which has the same code range
3912         as ISO2022 of 8-bit environment but uses locking shift or
3913         single shift functions.  Assigned the coding-system (Lisp
3914         symbol) `iso-2022-8bit-ss2' by default.
3915
3916    o coding-category-big5
3917
3918         The category for a coding system which has the same code range
3919         as BIG5.  Assigned the coding-system (Lisp symbol)
3920         `cn-big5' by default.
3921
3922    o coding-category-utf-8
3923
3924         The category for a coding system which has the same code range
3925         as UTF-8 (cf. RFC2279).  Assigned the coding-system (Lisp
3926         symbol) `utf-8' by default.
3927
3928    o coding-category-utf-16-be
3929
3930         The category for a coding system in which a text has an
3931         Unicode signature (cf. Unicode Standard) in the order of BIG
3932         endian at the head.  Assigned the coding-system (Lisp symbol)
3933         `utf-16-be' by default.
3934
3935    o coding-category-utf-16-le
3936
3937         The category for a coding system in which a text has an
3938         Unicode signature (cf. Unicode Standard) in the order of
3939         LITTLE endian at the head.  Assigned the coding-system (Lisp
3940         symbol) `utf-16-le' by default.
3941
3942    o coding-category-ccl
3943
3944         The category for a coding system of which encoder/decoder is
3945         written in CCL programs.  The default value is nil, i.e., no
3946         coding system is assigned.
3947
3948    o coding-category-binary
3949
3950         The category for a coding system not categorized in any of the
3951         above.  Assigned the coding-system (Lisp symbol)
3952         `no-conversion' by default.
3953
3954    Each of them is a Lisp symbol and the value is an actual
3955    `coding-system' (this is also a Lisp symbol) assigned by a user.
3956    What Emacs does actually is to detect a category of coding system.
3957    Then, it uses a `coding-system' assigned to it.  If Emacs can't
3958    decide a single possible category, it selects a category of the
3959    highest priority.  Priorities of categories are also specified by a
3960    user in a Lisp variable `coding-category-list'.
3961
3962 */
3963
3964 static
3965 int ascii_skip_code[256];
3966
3967 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
3968    If it detects possible coding systems, return an integer in which
3969    appropriate flag bits are set.  Flag bits are defined by macros
3970    CODING_CATEGORY_MASK_XXX in `coding.h'.  If PRIORITIES is non-NULL,
3971    it should point the table `coding_priorities'.  In that case, only
3972    the flag bit for a coding system of the highest priority is set in
3973    the returned value.  If MULTIBYTEP is nonzero, 8-bit codes of the
3974    range 0x80..0x9F are in multibyte form.
3975
3976    How many ASCII characters are at the head is returned as *SKIP.  */
3977
3978 static int
3979 detect_coding_mask (source, src_bytes, priorities, skip, multibytep)
3980      unsigned char *source;
3981      int src_bytes, *priorities, *skip;
3982      int multibytep;
3983 {
3984   register unsigned char c;
3985   unsigned char *src = source, *src_end = source + src_bytes;
3986   unsigned int mask, utf16_examined_p, iso2022_examined_p;
3987   int i;
3988
3989   /* At first, skip all ASCII characters and control characters except
3990      for three ISO2022 specific control characters.  */
3991   ascii_skip_code[ISO_CODE_SO] = 0;
3992   ascii_skip_code[ISO_CODE_SI] = 0;
3993   ascii_skip_code[ISO_CODE_ESC] = 0;
3994
3995  label_loop_detect_coding:
3996   while (src < src_end && ascii_skip_code[*src]) src++;
3997   *skip = src - source;
3998
3999   if (src >= src_end)
4000     /* We found nothing other than ASCII.  There's nothing to do.  */
4001     return 0;
4002
4003   c = *src;
4004   /* The text seems to be encoded in some multilingual coding system.
4005      Now, try to find in which coding system the text is encoded.  */
4006   if (c < 0x80)
4007     {
4008       /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
4009       /* C is an ISO2022 specific control code of C0.  */
4010       mask = detect_coding_iso2022 (src, src_end, multibytep);
4011       if (mask == 0)
4012         {
4013           /* No valid ISO2022 code follows C.  Try again.  */
4014           src++;
4015           if (c == ISO_CODE_ESC)
4016             ascii_skip_code[ISO_CODE_ESC] = 1;
4017           else
4018             ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1;
4019           goto label_loop_detect_coding;
4020         }
4021       if (priorities)
4022         {
4023           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
4024             {
4025               if (mask & priorities[i])
4026                 return priorities[i];
4027             }
4028           return CODING_CATEGORY_MASK_RAW_TEXT;
4029         }
4030     }
4031   else
4032     {
4033       int try;
4034
4035       if (multibytep && c == LEADING_CODE_8_BIT_CONTROL)
4036         c = src[1] - 0x20;
4037
4038       if (c < 0xA0)
4039         {
4040           /* C is the first byte of SJIS character code,
4041              or a leading-code of Emacs' internal format (emacs-mule),
4042              or the first byte of UTF-16.  */
4043           try = (CODING_CATEGORY_MASK_SJIS
4044                   | CODING_CATEGORY_MASK_EMACS_MULE
4045                   | CODING_CATEGORY_MASK_UTF_16_BE
4046                   | CODING_CATEGORY_MASK_UTF_16_LE);
4047
4048           /* Or, if C is a special latin extra code,
4049              or is an ISO2022 specific control code of C1 (SS2 or SS3),
4050              or is an ISO2022 control-sequence-introducer (CSI),
4051              we should also consider the possibility of ISO2022 codings.  */
4052           if ((VECTORP (Vlatin_extra_code_table)
4053                && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
4054               || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
4055               || (c == ISO_CODE_CSI
4056                   && (src < src_end
4057                       && (*src == ']'
4058                           || ((*src == '0' || *src == '1' || *src == '2')
4059                               && src + 1 < src_end
4060                               && src[1] == ']')))))
4061             try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
4062                      | CODING_CATEGORY_MASK_ISO_8BIT);
4063         }
4064       else
4065         /* C is a character of ISO2022 in graphic plane right,
4066            or a SJIS's 1-byte character code (i.e. JISX0201),
4067            or the first byte of BIG5's 2-byte code,
4068            or the first byte of UTF-8/16.  */
4069         try = (CODING_CATEGORY_MASK_ISO_8_ELSE
4070                 | CODING_CATEGORY_MASK_ISO_8BIT
4071                 | CODING_CATEGORY_MASK_SJIS
4072                 | CODING_CATEGORY_MASK_BIG5
4073                 | CODING_CATEGORY_MASK_UTF_8
4074                 | CODING_CATEGORY_MASK_UTF_16_BE
4075                 | CODING_CATEGORY_MASK_UTF_16_LE);
4076
4077       /* Or, we may have to consider the possibility of CCL.  */
4078       if (coding_system_table[CODING_CATEGORY_IDX_CCL]
4079           && (coding_system_table[CODING_CATEGORY_IDX_CCL]
4080               ->spec.ccl.valid_codes)[c])
4081         try |= CODING_CATEGORY_MASK_CCL;
4082
4083       mask = 0;
4084       utf16_examined_p = iso2022_examined_p = 0;
4085       if (priorities)
4086         {
4087           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
4088             {
4089               if (!iso2022_examined_p
4090                   && (priorities[i] & try & CODING_CATEGORY_MASK_ISO))
4091                 {
4092                   mask |= detect_coding_iso2022 (src, src_end, multibytep);
4093                   iso2022_examined_p = 1;
4094                 }
4095               else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
4096                 mask |= detect_coding_sjis (src, src_end, multibytep);
4097               else if (priorities[i] & try & CODING_CATEGORY_MASK_UTF_8)
4098                 mask |= detect_coding_utf_8 (src, src_end, multibytep);
4099               else if (!utf16_examined_p
4100                        && (priorities[i] & try &
4101                            CODING_CATEGORY_MASK_UTF_16_BE_LE))
4102                 {
4103                   mask |= detect_coding_utf_16 (src, src_end, multibytep);
4104                   utf16_examined_p = 1;
4105                 }
4106               else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
4107                 mask |= detect_coding_big5 (src, src_end, multibytep);
4108               else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
4109                 mask |= detect_coding_emacs_mule (src, src_end, multibytep);
4110               else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL)
4111                 mask |= detect_coding_ccl (src, src_end, multibytep);
4112               else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
4113                 mask |= CODING_CATEGORY_MASK_RAW_TEXT;
4114               else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
4115                 mask |= CODING_CATEGORY_MASK_BINARY;
4116               if (mask & priorities[i])
4117                 return priorities[i];
4118             }
4119           return CODING_CATEGORY_MASK_RAW_TEXT;
4120         }
4121       if (try & CODING_CATEGORY_MASK_ISO)
4122         mask |= detect_coding_iso2022 (src, src_end, multibytep);
4123       if (try & CODING_CATEGORY_MASK_SJIS)
4124         mask |= detect_coding_sjis (src, src_end, multibytep);
4125       if (try & CODING_CATEGORY_MASK_BIG5)
4126         mask |= detect_coding_big5 (src, src_end, multibytep);
4127       if (try & CODING_CATEGORY_MASK_UTF_8)
4128         mask |= detect_coding_utf_8 (src, src_end, multibytep);
4129       if (try & CODING_CATEGORY_MASK_UTF_16_BE_LE)
4130         mask |= detect_coding_utf_16 (src, src_end, multibytep);
4131       if (try & CODING_CATEGORY_MASK_EMACS_MULE)
4132         mask |= detect_coding_emacs_mule (src, src_end, multibytep);
4133       if (try & CODING_CATEGORY_MASK_CCL)
4134         mask |= detect_coding_ccl (src, src_end, multibytep);
4135     }
4136   return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
4137 }
4138
4139 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
4140    The information of the detected coding system is set in CODING.  */
4141
4142 void
4143 detect_coding (coding, src, src_bytes)
4144      struct coding_system *coding;
4145      const unsigned char *src;
4146      int src_bytes;
4147 {
4148   unsigned int idx;
4149   int skip, mask;
4150   Lisp_Object val;
4151
4152   val = Vcoding_category_list;
4153   mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip,
4154                              coding->src_multibyte);
4155   coding->heading_ascii = skip;
4156
4157   if (!mask) return;
4158
4159   /* We found a single coding system of the highest priority in MASK.  */
4160   idx = 0;
4161   while (mask && ! (mask & 1)) mask >>= 1, idx++;
4162   if (! mask)
4163     idx = CODING_CATEGORY_IDX_RAW_TEXT;
4164
4165   val = SYMBOL_VALUE (XVECTOR (Vcoding_category_table)->contents[idx]);
4166
4167   if (coding->eol_type != CODING_EOL_UNDECIDED)
4168     {
4169       Lisp_Object tmp;
4170
4171       tmp = Fget (val, Qeol_type);
4172       if (VECTORP (tmp))
4173         val = XVECTOR (tmp)->contents[coding->eol_type];
4174     }
4175
4176   /* Setup this new coding system while preserving some slots.  */
4177   {
4178     int src_multibyte = coding->src_multibyte;
4179     int dst_multibyte = coding->dst_multibyte;
4180
4181     setup_coding_system (val, coding);
4182     coding->src_multibyte = src_multibyte;
4183     coding->dst_multibyte = dst_multibyte;
4184     coding->heading_ascii = skip;
4185   }
4186 }
4187
4188 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
4189    SOURCE is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
4190    CODING_EOL_CR, and CODING_EOL_UNDECIDED.
4191
4192    How many non-eol characters are at the head is returned as *SKIP.  */
4193
4194 #define MAX_EOL_CHECK_COUNT 3
4195
4196 static int
4197 detect_eol_type (source, src_bytes, skip)
4198      unsigned char *source;
4199      int src_bytes, *skip;
4200 {
4201   unsigned char *src = source, *src_end = src + src_bytes;
4202   unsigned char c;
4203   int total = 0;                /* How many end-of-lines are found so far.  */
4204   int eol_type = CODING_EOL_UNDECIDED;
4205   int this_eol_type;
4206
4207   *skip = 0;
4208
4209   while (src < src_end && total < MAX_EOL_CHECK_COUNT)
4210     {
4211       c = *src++;
4212       if (c == '\n' || c == '\r')
4213         {
4214           if (*skip == 0)
4215             *skip = src - 1 - source;
4216           total++;
4217           if (c == '\n')
4218             this_eol_type = CODING_EOL_LF;
4219           else if (src >= src_end || *src != '\n')
4220             this_eol_type = CODING_EOL_CR;
4221           else
4222             this_eol_type = CODING_EOL_CRLF, src++;
4223
4224           if (eol_type == CODING_EOL_UNDECIDED)
4225             /* This is the first end-of-line.  */
4226             eol_type = this_eol_type;
4227           else if (eol_type != this_eol_type)
4228             {
4229               /* The found type is different from what found before.  */
4230               eol_type = CODING_EOL_INCONSISTENT;
4231               break;
4232             }
4233         }
4234     }
4235
4236   if (*skip == 0)
4237     *skip = src_end - source;
4238   return eol_type;
4239 }
4240
4241 /* Like detect_eol_type, but detect EOL type in 2-octet
4242    big-endian/little-endian format for coding systems utf-16-be and
4243    utf-16-le.  */
4244
4245 static int
4246 detect_eol_type_in_2_octet_form (source, src_bytes, skip, big_endian_p)
4247      unsigned char *source;
4248      int src_bytes, *skip, big_endian_p;
4249 {
4250   unsigned char *src = source, *src_end = src + src_bytes;
4251   unsigned int c1, c2;
4252   int total = 0;                /* How many end-of-lines are found so far.  */
4253   int eol_type = CODING_EOL_UNDECIDED;
4254   int this_eol_type;
4255   int msb, lsb;
4256
4257   if (big_endian_p)
4258     msb = 0, lsb = 1;
4259   else
4260     msb = 1, lsb = 0;
4261
4262   *skip = 0;
4263
4264   while ((src + 1) < src_end && total < MAX_EOL_CHECK_COUNT)
4265     {
4266       c1 = (src[msb] << 8) | (src[lsb]);
4267       src += 2;
4268
4269       if (c1 == '\n' || c1 == '\r')
4270         {
4271           if (*skip == 0)
4272             *skip = src - 2 - source;
4273           total++;
4274           if (c1 == '\n')
4275             {
4276               this_eol_type = CODING_EOL_LF;
4277             }
4278           else
4279             {
4280               if ((src + 1) >= src_end)
4281                 {
4282                   this_eol_type = CODING_EOL_CR;
4283                 }
4284               else
4285                 {
4286                   c2 = (src[msb] << 8) | (src[lsb]);
4287                   if (c2 == '\n')
4288                     this_eol_type = CODING_EOL_CRLF, src += 2;
4289                   else
4290                     this_eol_type = CODING_EOL_CR;
4291                 }
4292             }
4293
4294           if (eol_type == CODING_EOL_UNDECIDED)
4295             /* This is the first end-of-line.  */
4296             eol_type = this_eol_type;
4297           else if (eol_type != this_eol_type)
4298             {
4299               /* The found type is different from what found before.  */
4300               eol_type = CODING_EOL_INCONSISTENT;
4301               break;
4302             }
4303         }
4304     }
4305
4306   if (*skip == 0)
4307     *skip = src_end - source;
4308   return eol_type;
4309 }
4310
4311 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
4312    is encoded.  If it detects an appropriate format of end-of-line, it
4313    sets the information in *CODING.  */
4314
4315 void
4316 detect_eol (coding, src, src_bytes)
4317      struct coding_system *coding;
4318      const unsigned char *src;
4319      int src_bytes;
4320 {
4321   Lisp_Object val;
4322   int skip;
4323   int eol_type;
4324
4325   switch (coding->category_idx)
4326     {
4327     case CODING_CATEGORY_IDX_UTF_16_BE:
4328       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 1);
4329       break;
4330     case CODING_CATEGORY_IDX_UTF_16_LE:
4331       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 0);
4332       break;
4333     default:
4334       eol_type = detect_eol_type (src, src_bytes, &skip);
4335       break;
4336     }
4337
4338   if (coding->heading_ascii > skip)
4339     coding->heading_ascii = skip;
4340   else
4341     skip = coding->heading_ascii;
4342
4343   if (eol_type == CODING_EOL_UNDECIDED)
4344     return;
4345   if (eol_type == CODING_EOL_INCONSISTENT)
4346     {
4347 #if 0
4348       /* This code is suppressed until we find a better way to
4349          distinguish raw text file and binary file.  */
4350
4351       /* If we have already detected that the coding is raw-text, the
4352          coding should actually be no-conversion.  */
4353       if (coding->type == coding_type_raw_text)
4354         {
4355           setup_coding_system (Qno_conversion, coding);
4356           return;
4357         }
4358       /* Else, let's decode only text code anyway.  */
4359 #endif /* 0 */
4360       eol_type = CODING_EOL_LF;
4361     }
4362
4363   val = Fget (coding->symbol, Qeol_type);
4364   if (VECTORP (val) && XVECTOR (val)->size == 3)
4365     {
4366       int src_multibyte = coding->src_multibyte;
4367       int dst_multibyte = coding->dst_multibyte;
4368       struct composition_data *cmp_data = coding->cmp_data;
4369
4370       setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
4371       coding->src_multibyte = src_multibyte;
4372       coding->dst_multibyte = dst_multibyte;
4373       coding->heading_ascii = skip;
4374       coding->cmp_data = cmp_data;
4375     }
4376 }
4377
4378 #define CONVERSION_BUFFER_EXTRA_ROOM 256
4379
4380 #define DECODING_BUFFER_MAG(coding)                     \
4381   (coding->type == coding_type_iso2022                  \
4382    ? 3                                                  \
4383    : (coding->type == coding_type_ccl                   \
4384       ? coding->spec.ccl.decoder.buf_magnification      \
4385       : 2))
4386
4387 /* Return maximum size (bytes) of a buffer enough for decoding
4388    SRC_BYTES of text encoded in CODING.  */
4389
4390 int
4391 decoding_buffer_size (coding, src_bytes)
4392      struct coding_system *coding;
4393      int src_bytes;
4394 {
4395   return (src_bytes * DECODING_BUFFER_MAG (coding)
4396           + CONVERSION_BUFFER_EXTRA_ROOM);
4397 }
4398
4399 /* Return maximum size (bytes) of a buffer enough for encoding
4400    SRC_BYTES of text to CODING.  */
4401
4402 int
4403 encoding_buffer_size (coding, src_bytes)
4404      struct coding_system *coding;
4405      int src_bytes;
4406 {
4407   int magnification;
4408
4409   if (coding->type == coding_type_ccl)
4410     magnification = coding->spec.ccl.encoder.buf_magnification;
4411   else if (CODING_REQUIRE_ENCODING (coding))
4412     magnification = 3;
4413   else
4414     magnification = 1;
4415
4416   return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
4417 }
4418
4419 /* Working buffer for code conversion.  */
4420 struct conversion_buffer
4421 {
4422   int size;                     /* size of data.  */
4423   int on_stack;                 /* 1 if allocated by alloca.  */
4424   unsigned char *data;
4425 };
4426
4427 /* Don't use alloca for allocating memory space larger than this, lest
4428    we overflow their stack.  */
4429 #define MAX_ALLOCA 16*1024
4430
4431 /* Allocate LEN bytes of memory for BUF (struct conversion_buffer).  */
4432 #define allocate_conversion_buffer(buf, len)            \
4433   do {                                                  \
4434     if (len < MAX_ALLOCA)                               \
4435       {                                                 \
4436         buf.data = (unsigned char *) alloca (len);      \
4437         buf.on_stack = 1;                               \
4438       }                                                 \
4439     else                                                \
4440       {                                                 \
4441         buf.data = (unsigned char *) xmalloc (len);     \
4442         buf.on_stack = 0;                               \
4443       }                                                 \
4444     buf.size = len;                                     \
4445   } while (0)
4446
4447 /* Double the allocated memory for *BUF.  */
4448 static void
4449 extend_conversion_buffer (buf)
4450      struct conversion_buffer *buf;
4451 {
4452   if (buf->on_stack)
4453     {
4454       unsigned char *save = buf->data;
4455       buf->data = (unsigned char *) xmalloc (buf->size * 2);
4456       bcopy (save, buf->data, buf->size);
4457       buf->on_stack = 0;
4458     }
4459   else
4460     {
4461       buf->data = (unsigned char *) xrealloc (buf->data, buf->size * 2);
4462     }
4463   buf->size *= 2;
4464 }
4465
4466 /* Free the allocated memory for BUF if it is not on stack.  */
4467 static void
4468 free_conversion_buffer (buf)
4469      struct conversion_buffer *buf;
4470 {
4471   if (!buf->on_stack)
4472     xfree (buf->data);
4473 }
4474
4475 int
4476 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
4477      struct coding_system *coding;
4478      unsigned char *source, *destination;
4479      int src_bytes, dst_bytes, encodep;
4480 {
4481   struct ccl_program *ccl
4482     = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
4483   unsigned char *dst = destination;
4484
4485   ccl->suppress_error = coding->suppress_error;
4486   ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4487   if (encodep)
4488     {
4489       /* On encoding, EOL format is converted within ccl_driver.  For
4490          that, setup proper information in the structure CCL.  */
4491       ccl->eol_type = coding->eol_type;
4492       if (ccl->eol_type ==CODING_EOL_UNDECIDED)
4493         ccl->eol_type = CODING_EOL_LF;
4494       ccl->cr_consumed = coding->spec.ccl.cr_carryover;
4495     }
4496   ccl->multibyte = coding->src_multibyte;
4497   if (coding->spec.ccl.eight_bit_carryover[0] != 0)
4498     {
4499       /* Move carryover bytes to DESTINATION.  */
4500       unsigned char *p = coding->spec.ccl.eight_bit_carryover;
4501       while (*p)
4502         *dst++ = *p++;
4503       coding->spec.ccl.eight_bit_carryover[0] = 0;
4504       if (dst_bytes)
4505         dst_bytes -= dst - destination;
4506     }
4507
4508   coding->produced = (ccl_driver (ccl, source, dst, src_bytes, dst_bytes,
4509                                   &(coding->consumed))
4510                       + dst - destination);
4511
4512   if (encodep)
4513     {
4514       coding->produced_char = coding->produced;
4515       coding->spec.ccl.cr_carryover = ccl->cr_consumed;
4516     }
4517   else if (!ccl->eight_bit_control)
4518     {
4519       /* The produced bytes forms a valid multibyte sequence. */
4520       coding->produced_char
4521         = multibyte_chars_in_text (destination, coding->produced);
4522       coding->spec.ccl.eight_bit_carryover[0] = 0;
4523     }
4524   else
4525     {
4526       /* On decoding, the destination should always multibyte.  But,
4527          CCL program might have been generated an invalid multibyte
4528          sequence.  Here we make such a sequence valid as
4529          multibyte.  */
4530       int bytes
4531         = dst_bytes ? dst_bytes : source + coding->consumed - destination;
4532
4533       if ((coding->consumed < src_bytes
4534            || !ccl->last_block)
4535           && coding->produced >= 1
4536           && destination[coding->produced - 1] >= 0x80)
4537         {
4538           /* We should not convert the tailing 8-bit codes to
4539              multibyte form even if they doesn't form a valid
4540              multibyte sequence.  They may form a valid sequence in
4541              the next call.  */
4542           int carryover = 0;
4543
4544           if (destination[coding->produced - 1] < 0xA0)
4545             carryover = 1;
4546           else if (coding->produced >= 2)
4547             {
4548               if (destination[coding->produced - 2] >= 0x80)
4549                 {
4550                   if (destination[coding->produced - 2] < 0xA0)
4551                     carryover = 2;
4552                   else if (coding->produced >= 3
4553                            && destination[coding->produced - 3] >= 0x80
4554                            && destination[coding->produced - 3] < 0xA0)
4555                     carryover = 3;
4556                 }
4557             }
4558           if (carryover > 0)
4559             {
4560               BCOPY_SHORT (destination + coding->produced - carryover,
4561                            coding->spec.ccl.eight_bit_carryover,
4562                            carryover);
4563               coding->spec.ccl.eight_bit_carryover[carryover] = 0;
4564               coding->produced -= carryover;
4565             }
4566         }
4567       coding->produced = str_as_multibyte (destination, bytes,
4568                                            coding->produced,
4569                                            &(coding->produced_char));
4570     }
4571
4572   switch (ccl->status)
4573     {
4574     case CCL_STAT_SUSPEND_BY_SRC:
4575       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
4576       break;
4577     case CCL_STAT_SUSPEND_BY_DST:
4578       coding->result = CODING_FINISH_INSUFFICIENT_DST;
4579       break;
4580     case CCL_STAT_QUIT:
4581     case CCL_STAT_INVALID_CMD:
4582       coding->result = CODING_FINISH_INTERRUPT;
4583       break;
4584     default:
4585       coding->result = CODING_FINISH_NORMAL;
4586       break;
4587     }
4588   return coding->result;
4589 }
4590
4591 /* Decode EOL format of the text at PTR of BYTES length destructively
4592    according to CODING->eol_type.  This is called after the CCL
4593    program produced a decoded text at PTR.  If we do CRLF->LF
4594    conversion, update CODING->produced and CODING->produced_char.  */
4595
4596 static void
4597 decode_eol_post_ccl (coding, ptr, bytes)
4598      struct coding_system *coding;
4599      unsigned char *ptr;
4600      int bytes;
4601 {
4602   Lisp_Object val, saved_coding_symbol;
4603   unsigned char *pend = ptr + bytes;
4604   int dummy;
4605
4606   /* Remember the current coding system symbol.  We set it back when
4607      an inconsistent EOL is found so that `last-coding-system-used' is
4608      set to the coding system that doesn't specify EOL conversion.  */
4609   saved_coding_symbol = coding->symbol;
4610
4611   coding->spec.ccl.cr_carryover = 0;
4612   if (coding->eol_type == CODING_EOL_UNDECIDED)
4613     {
4614       /* Here, to avoid the call of setup_coding_system, we directly
4615          call detect_eol_type.  */
4616       coding->eol_type = detect_eol_type (ptr, bytes, &dummy);
4617       if (coding->eol_type == CODING_EOL_INCONSISTENT)
4618         coding->eol_type = CODING_EOL_LF;
4619       if (coding->eol_type != CODING_EOL_UNDECIDED)
4620         {
4621           val = Fget (coding->symbol, Qeol_type);
4622           if (VECTORP (val) && XVECTOR (val)->size == 3)
4623             coding->symbol = XVECTOR (val)->contents[coding->eol_type];
4624         }
4625       coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4626     }
4627
4628   if (coding->eol_type == CODING_EOL_LF
4629       || coding->eol_type == CODING_EOL_UNDECIDED)
4630     {
4631       /* We have nothing to do.  */
4632       ptr = pend;
4633     }
4634   else if (coding->eol_type == CODING_EOL_CRLF)
4635     {
4636       unsigned char *pstart = ptr, *p = ptr;
4637
4638       if (! (coding->mode & CODING_MODE_LAST_BLOCK)
4639           && *(pend - 1) == '\r')
4640         {
4641           /* If the last character is CR, we can't handle it here
4642              because LF will be in the not-yet-decoded source text.
4643              Record that the CR is not yet processed.  */
4644           coding->spec.ccl.cr_carryover = 1;
4645           coding->produced--;
4646           coding->produced_char--;
4647           pend--;
4648         }
4649       while (ptr < pend)
4650         {
4651           if (*ptr == '\r')
4652             {
4653               if (ptr + 1 < pend && *(ptr + 1) == '\n')
4654                 {
4655                   *p++ = '\n';
4656                   ptr += 2;
4657                 }
4658               else
4659                 {
4660                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4661                     goto undo_eol_conversion;
4662                   *p++ = *ptr++;
4663                 }
4664             }
4665           else if (*ptr == '\n'
4666                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4667             goto undo_eol_conversion;
4668           else
4669             *p++ = *ptr++;
4670           continue;
4671
4672         undo_eol_conversion:
4673           /* We have faced with inconsistent EOL format at PTR.
4674              Convert all LFs before PTR back to CRLFs.  */
4675           for (p--, ptr--; p >= pstart; p--)
4676             {
4677               if (*p == '\n')
4678                 *ptr-- = '\n', *ptr-- = '\r';
4679               else
4680                 *ptr-- = *p;
4681             }
4682           /*  If carryover is recorded, cancel it because we don't
4683               convert CRLF anymore.  */
4684           if (coding->spec.ccl.cr_carryover)
4685             {
4686               coding->spec.ccl.cr_carryover = 0;
4687               coding->produced++;
4688               coding->produced_char++;
4689               pend++;
4690             }
4691           p = ptr = pend;
4692           coding->eol_type = CODING_EOL_LF;
4693           coding->symbol = saved_coding_symbol;
4694         }
4695       if (p < pend)
4696         {
4697           /* As each two-byte sequence CRLF was converted to LF, (PEND
4698              - P) is the number of deleted characters.  */
4699           coding->produced -= pend - p;
4700           coding->produced_char -= pend - p;
4701         }
4702     }
4703   else                  /* i.e. coding->eol_type == CODING_EOL_CR */
4704     {
4705       unsigned char *p = ptr;
4706
4707       for (; ptr < pend; ptr++)
4708         {
4709           if (*ptr == '\r')
4710             *ptr = '\n';
4711           else if (*ptr == '\n'
4712                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4713             {
4714               for (; p < ptr; p++)
4715                 {
4716                   if (*p == '\n')
4717                     *p = '\r';
4718                 }
4719               ptr = pend;
4720               coding->eol_type = CODING_EOL_LF;
4721               coding->symbol = saved_coding_symbol;
4722             }
4723         }
4724     }
4725 }
4726
4727 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions".  Before
4728    decoding, it may detect coding system and format of end-of-line if
4729    those are not yet decided.  The source should be unibyte, the
4730    result is multibyte if CODING->dst_multibyte is nonzero, else
4731    unibyte.  */
4732
4733 int
4734 decode_coding (coding, source, destination, src_bytes, dst_bytes)
4735      struct coding_system *coding;
4736      const unsigned char *source;
4737      unsigned char *destination;
4738      int src_bytes, dst_bytes;
4739 {
4740   int extra = 0;
4741
4742   if (coding->type == coding_type_undecided)
4743     detect_coding (coding, source, src_bytes);
4744
4745   if (coding->eol_type == CODING_EOL_UNDECIDED
4746       && coding->type != coding_type_ccl)
4747     {
4748       detect_eol (coding, source, src_bytes);
4749       /* We had better recover the original eol format if we
4750          encounter an inconsistent eol format while decoding.  */
4751       coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4752     }
4753
4754   coding->produced = coding->produced_char = 0;
4755   coding->consumed = coding->consumed_char = 0;
4756   coding->errors = 0;
4757   coding->result = CODING_FINISH_NORMAL;
4758
4759   switch (coding->type)
4760     {
4761     case coding_type_sjis:
4762       decode_coding_sjis_big5 (coding, source, destination,
4763                                src_bytes, dst_bytes, 1);
4764       break;
4765
4766     case coding_type_iso2022:
4767       decode_coding_iso2022 (coding, source, destination,
4768                              src_bytes, dst_bytes);
4769       break;
4770
4771     case coding_type_big5:
4772       decode_coding_sjis_big5 (coding, source, destination,
4773                                src_bytes, dst_bytes, 0);
4774       break;
4775
4776     case coding_type_emacs_mule:
4777       decode_coding_emacs_mule (coding, source, destination,
4778                                 src_bytes, dst_bytes);
4779       break;
4780
4781     case coding_type_ccl:
4782       if (coding->spec.ccl.cr_carryover)
4783         {
4784           /* Put the CR which was not processed by the previous call
4785              of decode_eol_post_ccl in DESTINATION.  It will be
4786              decoded together with the following LF by the call to
4787              decode_eol_post_ccl below.  */
4788           *destination = '\r';
4789           coding->produced++;
4790           coding->produced_char++;
4791           dst_bytes--;
4792           extra = coding->spec.ccl.cr_carryover;
4793         }
4794       ccl_coding_driver (coding, source, destination + extra,
4795                          src_bytes, dst_bytes, 0);
4796       if (coding->eol_type != CODING_EOL_LF)
4797         {
4798           coding->produced += extra;
4799           coding->produced_char += extra;
4800           decode_eol_post_ccl (coding, destination, coding->produced);
4801         }
4802       break;
4803
4804     default:
4805       decode_eol (coding, source, destination, src_bytes, dst_bytes);
4806     }
4807
4808   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
4809       && coding->mode & CODING_MODE_LAST_BLOCK
4810       && coding->consumed == src_bytes)
4811     coding->result = CODING_FINISH_NORMAL;
4812
4813   if (coding->mode & CODING_MODE_LAST_BLOCK
4814       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
4815     {
4816       const unsigned char *src = source + coding->consumed;
4817       unsigned char *dst = destination + coding->produced;
4818
4819       src_bytes -= coding->consumed;
4820       coding->errors++;
4821       if (COMPOSING_P (coding))
4822         DECODE_COMPOSITION_END ('1');
4823       while (src_bytes--)
4824         {
4825           int c = *src++;
4826           dst += CHAR_STRING (c, dst);
4827           coding->produced_char++;
4828         }
4829       coding->consumed = coding->consumed_char = src - source;
4830       coding->produced = dst - destination;
4831       coding->result = CODING_FINISH_NORMAL;
4832     }
4833
4834   if (!coding->dst_multibyte)
4835     {
4836       coding->produced = str_as_unibyte (destination, coding->produced);
4837       coding->produced_char = coding->produced;
4838     }
4839
4840   return coding->result;
4841 }
4842
4843 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  The
4844    multibyteness of the source is CODING->src_multibyte, the
4845    multibyteness of the result is always unibyte.  */
4846
4847 int
4848 encode_coding (coding, source, destination, src_bytes, dst_bytes)
4849      struct coding_system *coding;
4850      const unsigned char *source;
4851      unsigned char *destination;
4852      int src_bytes, dst_bytes;
4853 {
4854   coding->produced = coding->produced_char = 0;
4855   coding->consumed = coding->consumed_char = 0;
4856   coding->errors = 0;
4857   coding->result = CODING_FINISH_NORMAL;
4858
4859   switch (coding->type)
4860     {
4861     case coding_type_sjis:
4862       encode_coding_sjis_big5 (coding, source, destination,
4863                                src_bytes, dst_bytes, 1);
4864       break;
4865
4866     case coding_type_iso2022:
4867       encode_coding_iso2022 (coding, source, destination,
4868                              src_bytes, dst_bytes);
4869       break;
4870
4871     case coding_type_big5:
4872       encode_coding_sjis_big5 (coding, source, destination,
4873                                src_bytes, dst_bytes, 0);
4874       break;
4875
4876     case coding_type_emacs_mule:
4877       encode_coding_emacs_mule (coding, source, destination,
4878                                 src_bytes, dst_bytes);
4879       break;
4880
4881     case coding_type_ccl:
4882       ccl_coding_driver (coding, source, destination,
4883                          src_bytes, dst_bytes, 1);
4884       break;
4885
4886     default:
4887       encode_eol (coding, source, destination, src_bytes, dst_bytes);
4888     }
4889
4890   if (coding->mode & CODING_MODE_LAST_BLOCK
4891       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
4892     {
4893       const unsigned char *src = source + coding->consumed;
4894       unsigned char *dst = destination + coding->produced;
4895
4896       if (coding->type == coding_type_iso2022)
4897         ENCODE_RESET_PLANE_AND_REGISTER;
4898       if (COMPOSING_P (coding))
4899         *dst++ = ISO_CODE_ESC, *dst++ = '1';
4900       if (coding->consumed < src_bytes)
4901         {
4902           int len = src_bytes - coding->consumed;
4903
4904           BCOPY_SHORT (src, dst, len);
4905           if (coding->src_multibyte)
4906             len = str_as_unibyte (dst, len);
4907           dst += len;
4908           coding->consumed = src_bytes;
4909         }
4910       coding->produced = coding->produced_char = dst - destination;
4911       coding->result = CODING_FINISH_NORMAL;
4912     }
4913
4914   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
4915       && coding->consumed == src_bytes)
4916     coding->result = CODING_FINISH_NORMAL;
4917
4918   return coding->result;
4919 }
4920
4921 /* Scan text in the region between *BEG and *END (byte positions),
4922    skip characters which we don't have to decode by coding system
4923    CODING at the head and tail, then set *BEG and *END to the region
4924    of the text we actually have to convert.  The caller should move
4925    the gap out of the region in advance if the region is from a
4926    buffer.
4927
4928    If STR is not NULL, *BEG and *END are indices into STR.  */
4929
4930 static void
4931 shrink_decoding_region (beg, end, coding, str)
4932      int *beg, *end;
4933      struct coding_system *coding;
4934      unsigned char *str;
4935 {
4936   unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
4937   int eol_conversion;
4938   Lisp_Object translation_table;
4939
4940   if (coding->type == coding_type_ccl
4941       || coding->type == coding_type_undecided
4942       || coding->eol_type != CODING_EOL_LF
4943       || !NILP (coding->post_read_conversion)
4944       || coding->composing != COMPOSITION_DISABLED)
4945     {
4946       /* We can't skip any data.  */
4947       return;
4948     }
4949   if (coding->type == coding_type_no_conversion
4950       || coding->type == coding_type_raw_text
4951       || coding->type == coding_type_emacs_mule)
4952     {
4953       /* We need no conversion, but don't have to skip any data here.
4954          Decoding routine handles them effectively anyway.  */
4955       return;
4956     }
4957
4958   translation_table = coding->translation_table_for_decode;
4959   if (NILP (translation_table) && !NILP (Venable_character_translation))
4960     translation_table = Vstandard_translation_table_for_decode;
4961   if (CHAR_TABLE_P (translation_table))
4962     {
4963       int i;
4964       for (i = 0; i < 128; i++)
4965         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
4966           break;
4967       if (i < 128)
4968         /* Some ASCII character should be translated.  We give up
4969            shrinking.  */
4970         return;
4971     }
4972
4973   if (coding->heading_ascii >= 0)
4974     /* Detection routine has already found how much we can skip at the
4975        head.  */
4976     *beg += coding->heading_ascii;
4977
4978   if (str)
4979     {
4980       begp_orig = begp = str + *beg;
4981       endp_orig = endp = str + *end;
4982     }
4983   else
4984     {
4985       begp_orig = begp = BYTE_POS_ADDR (*beg);
4986       endp_orig = endp = begp + *end - *beg;
4987     }
4988
4989   eol_conversion = (coding->eol_type == CODING_EOL_CR
4990                     || coding->eol_type == CODING_EOL_CRLF);
4991
4992   switch (coding->type)
4993     {
4994     case coding_type_sjis:
4995     case coding_type_big5:
4996       /* We can skip all ASCII characters at the head.  */
4997       if (coding->heading_ascii < 0)
4998         {
4999           if (eol_conversion)
5000             while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
5001           else
5002             while (begp < endp && *begp < 0x80) begp++;
5003         }
5004       /* We can skip all ASCII characters at the tail except for the
5005          second byte of SJIS or BIG5 code.  */
5006       if (eol_conversion)
5007         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
5008       else
5009         while (begp < endp && endp[-1] < 0x80) endp--;
5010       /* Do not consider LF as ascii if preceded by CR, since that
5011          confuses eol decoding. */
5012       if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
5013         endp++;
5014       if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
5015         endp++;
5016       break;
5017
5018     case coding_type_iso2022:
5019       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
5020         /* We can't skip any data.  */
5021         break;
5022       if (coding->heading_ascii < 0)
5023         {
5024           /* We can skip all ASCII characters at the head except for a
5025              few control codes.  */
5026           while (begp < endp && (c = *begp) < 0x80
5027                  && c != ISO_CODE_CR && c != ISO_CODE_SO
5028                  && c != ISO_CODE_SI && c != ISO_CODE_ESC
5029                  && (!eol_conversion || c != ISO_CODE_LF))
5030             begp++;
5031         }
5032       switch (coding->category_idx)
5033         {
5034         case CODING_CATEGORY_IDX_ISO_8_1:
5035         case CODING_CATEGORY_IDX_ISO_8_2:
5036           /* We can skip all ASCII characters at the tail.  */
5037           if (eol_conversion)
5038             while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
5039           else
5040             while (begp < endp && endp[-1] < 0x80) endp--;
5041           /* Do not consider LF as ascii if preceded by CR, since that
5042              confuses eol decoding. */
5043           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
5044             endp++;
5045           break;
5046
5047         case CODING_CATEGORY_IDX_ISO_7:
5048         case CODING_CATEGORY_IDX_ISO_7_TIGHT:
5049           {
5050             /* We can skip all characters at the tail except for 8-bit
5051                codes and ESC and the following 2-byte at the tail.  */
5052             unsigned char *eight_bit = NULL;
5053
5054             if (eol_conversion)
5055               while (begp < endp
5056                      && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
5057                 {
5058                   if (!eight_bit && c & 0x80) eight_bit = endp;
5059                   endp--;
5060                 }
5061             else
5062               while (begp < endp
5063                      && (c = endp[-1]) != ISO_CODE_ESC)
5064                 {
5065                   if (!eight_bit && c & 0x80) eight_bit = endp;
5066                   endp--;
5067                 }
5068             /* Do not consider LF as ascii if preceded by CR, since that
5069                confuses eol decoding. */
5070             if (begp < endp && endp < endp_orig
5071                 && endp[-1] == '\r' && endp[0] == '\n')
5072               endp++;
5073             if (begp < endp && endp[-1] == ISO_CODE_ESC)
5074               {
5075                 if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
5076                   /* This is an ASCII designation sequence.  We can
5077                      surely skip the tail.  But, if we have
5078                      encountered an 8-bit code, skip only the codes
5079                      after that.  */
5080                   endp = eight_bit ? eight_bit : endp + 2;
5081                 else
5082                   /* Hmmm, we can't skip the tail.  */
5083                   endp = endp_orig;
5084               }
5085             else if (eight_bit)
5086               endp = eight_bit;
5087           }
5088         }
5089       break;
5090
5091     default:
5092       abort ();
5093     }
5094   *beg += begp - begp_orig;
5095   *end += endp - endp_orig;
5096   return;
5097 }
5098
5099 /* Like shrink_decoding_region but for encoding.  */
5100
5101 static void
5102 shrink_encoding_region (beg, end, coding, str)
5103      int *beg, *end;
5104      struct coding_system *coding;
5105      unsigned char *str;
5106 {
5107   unsigned char *begp_orig, *begp, *endp_orig, *endp;
5108   int eol_conversion;
5109   Lisp_Object translation_table;
5110
5111   if (coding->type == coding_type_ccl
5112       || coding->eol_type == CODING_EOL_CRLF
5113       || coding->eol_type == CODING_EOL_CR
5114       || (coding->cmp_data && coding->cmp_data->used > 0))
5115     {
5116       /* We can't skip any data.  */
5117       return;
5118     }
5119   if (coding->type == coding_type_no_conversion
5120       || coding->type == coding_type_raw_text
5121       || coding->type == coding_type_emacs_mule
5122       || coding->type == coding_type_undecided)
5123     {
5124       /* We need no conversion, but don't have to skip any data here.
5125          Encoding routine handles them effectively anyway.  */
5126       return;
5127     }
5128
5129   translation_table = coding->translation_table_for_encode;
5130   if (NILP (translation_table) && !NILP (Venable_character_translation))
5131     translation_table = Vstandard_translation_table_for_encode;
5132   if (CHAR_TABLE_P (translation_table))
5133     {
5134       int i;
5135       for (i = 0; i < 128; i++)
5136         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
5137           break;
5138       if (i < 128)
5139         /* Some ASCII character should be translated.  We give up
5140            shrinking.  */
5141         return;
5142     }
5143
5144   if (str)
5145     {
5146       begp_orig = begp = str + *beg;
5147       endp_orig = endp = str + *end;
5148     }
5149   else
5150     {
5151       begp_orig = begp = BYTE_POS_ADDR (*beg);
5152       endp_orig = endp = begp + *end - *beg;
5153     }
5154
5155   eol_conversion = (coding->eol_type == CODING_EOL_CR
5156                     || coding->eol_type == CODING_EOL_CRLF);
5157
5158   /* Here, we don't have to check coding->pre_write_conversion because
5159      the caller is expected to have handled it already.  */
5160   switch (coding->type)
5161     {
5162     case coding_type_iso2022:
5163       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
5164         /* We can't skip any data.  */
5165         break;
5166       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
5167         {
5168           unsigned char *bol = begp;
5169           while (begp < endp && *begp < 0x80)
5170             {
5171               begp++;
5172               if (begp[-1] == '\n')
5173                 bol = begp;
5174             }
5175           begp = bol;
5176           goto label_skip_tail;
5177         }
5178       /* fall down ... */
5179
5180     case coding_type_sjis:
5181     case coding_type_big5:
5182       /* We can skip all ASCII characters at the head and tail.  */
5183       if (eol_conversion)
5184         while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
5185       else
5186         while (begp < endp && *begp < 0x80) begp++;
5187     label_skip_tail:
5188       if (eol_conversion)
5189         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
5190       else
5191         while (begp < endp && *(endp - 1) < 0x80) endp--;
5192       break;
5193
5194     default:
5195       abort ();
5196     }
5197
5198   *beg += begp - begp_orig;
5199   *end += endp - endp_orig;
5200   return;
5201 }
5202
5203 /* As shrinking conversion region requires some overhead, we don't try
5204    shrinking if the length of conversion region is less than this
5205    value.  */
5206 static int shrink_conversion_region_threshhold = 1024;
5207
5208 #define SHRINK_CONVERSION_REGION(beg, end, coding, str, encodep)        \
5209   do {                                                                  \
5210     if (*(end) - *(beg) > shrink_conversion_region_threshhold)          \
5211       {                                                                 \
5212         if (encodep) shrink_encoding_region (beg, end, coding, str);    \
5213         else shrink_decoding_region (beg, end, coding, str);            \
5214       }                                                                 \
5215   } while (0)
5216
5217 static Lisp_Object
5218 code_convert_region_unwind (arg)
5219      Lisp_Object arg;
5220 {
5221   inhibit_pre_post_conversion = 0;
5222   Vlast_coding_system_used = arg;
5223   return Qnil;
5224 }
5225
5226 /* Store information about all compositions in the range FROM and TO
5227    of OBJ in memory blocks pointed by CODING->cmp_data.  OBJ is a
5228    buffer or a string, defaults to the current buffer.  */
5229
5230 void
5231 coding_save_composition (coding, from, to, obj)
5232      struct coding_system *coding;
5233      int from, to;
5234      Lisp_Object obj;
5235 {
5236   Lisp_Object prop;
5237   int start, end;
5238
5239   if (coding->composing == COMPOSITION_DISABLED)
5240     return;
5241   if (!coding->cmp_data)
5242     coding_allocate_composition_data (coding, from);
5243   if (!find_composition (from, to, &start, &end, &prop, obj)
5244       || end > to)
5245     return;
5246   if (start < from
5247       && (!find_composition (end, to, &start, &end, &prop, obj)
5248           || end > to))
5249     return;
5250   coding->composing = COMPOSITION_NO;
5251   do
5252     {
5253       if (COMPOSITION_VALID_P (start, end, prop))
5254         {
5255           enum composition_method method = COMPOSITION_METHOD (prop);
5256           if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
5257               >= COMPOSITION_DATA_SIZE)
5258             coding_allocate_composition_data (coding, from);
5259           /* For relative composition, we remember start and end
5260              positions, for the other compositions, we also remember
5261              components.  */
5262           CODING_ADD_COMPOSITION_START (coding, start - from, method);
5263           if (method != COMPOSITION_RELATIVE)
5264             {
5265               /* We must store a*/
5266               Lisp_Object val, ch;
5267
5268               val = COMPOSITION_COMPONENTS (prop);
5269               if (CONSP (val))
5270                 while (CONSP (val))
5271                   {
5272                     ch = XCAR (val), val = XCDR (val);
5273                     CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
5274                   }
5275               else if (VECTORP (val) || STRINGP (val))
5276                 {
5277                   int len = (VECTORP (val)
5278                              ? XVECTOR (val)->size : SCHARS (val));
5279                   int i;
5280                   for (i = 0; i < len; i++)
5281                     {
5282                       ch = (STRINGP (val)
5283                             ? Faref (val, make_number (i))
5284                             : XVECTOR (val)->contents[i]);
5285                       CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
5286                     }
5287                 }
5288               else              /* INTEGERP (val) */
5289                 CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (val));
5290             }
5291           CODING_ADD_COMPOSITION_END (coding, end - from);
5292         }
5293       start = end;
5294     }
5295   while (start < to
5296          && find_composition (start, to, &start, &end, &prop, obj)
5297          && end <= to);
5298
5299   /* Make coding->cmp_data point to the first memory block.  */
5300   while (coding->cmp_data->prev)
5301     coding->cmp_data = coding->cmp_data->prev;
5302   coding->cmp_data_start = 0;
5303 }
5304
5305 /* Reflect the saved information about compositions to OBJ.
5306    CODING->cmp_data points to a memory block for the information.  OBJ
5307    is a buffer or a string, defaults to the current buffer.  */
5308
5309 void
5310 coding_restore_composition (coding, obj)
5311      struct coding_system *coding;
5312      Lisp_Object obj;
5313 {
5314   struct composition_data *cmp_data = coding->cmp_data;
5315
5316   if (!cmp_data)
5317     return;
5318
5319   while (cmp_data->prev)
5320     cmp_data = cmp_data->prev;
5321
5322   while (cmp_data)
5323     {
5324       int i;
5325
5326       for (i = 0; i < cmp_data->used && cmp_data->data[i] > 0;
5327            i += cmp_data->data[i])
5328         {
5329           int *data = cmp_data->data + i;
5330           enum composition_method method = (enum composition_method) data[3];
5331           Lisp_Object components;
5332
5333           if (method == COMPOSITION_RELATIVE)
5334             components = Qnil;
5335           else
5336             {
5337               int len = data[0] - 4, j;
5338               Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5339
5340               if (method == COMPOSITION_WITH_RULE_ALTCHARS
5341                   && len % 2 == 0)
5342                 len --;
5343               for (j = 0; j < len; j++)
5344                 args[j] = make_number (data[4 + j]);
5345               components = (method == COMPOSITION_WITH_ALTCHARS
5346                             ? Fstring (len, args) : Fvector (len, args));
5347             }
5348           compose_text (data[1], data[2], components, Qnil, obj);
5349         }
5350       cmp_data = cmp_data->next;
5351     }
5352 }
5353
5354 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
5355    text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
5356    coding system CODING, and return the status code of code conversion
5357    (currently, this value has no meaning).
5358
5359    How many characters (and bytes) are converted to how many
5360    characters (and bytes) are recorded in members of the structure
5361    CODING.
5362
5363    If REPLACE is nonzero, we do various things as if the original text
5364    is deleted and a new text is inserted.  See the comments in
5365    replace_range (insdel.c) to know what we are doing.
5366
5367    If REPLACE is zero, it is assumed that the source text is unibyte.
5368    Otherwise, it is assumed that the source text is multibyte.  */
5369
5370 int
5371 code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
5372      int from, from_byte, to, to_byte, encodep, replace;
5373      struct coding_system *coding;
5374 {
5375   int len = to - from, len_byte = to_byte - from_byte;
5376   int nchars_del = 0, nbytes_del = 0;
5377   int require, inserted, inserted_byte;
5378   int head_skip, tail_skip, total_skip = 0;
5379   Lisp_Object saved_coding_symbol;
5380   int first = 1;
5381   unsigned char *src, *dst;
5382   Lisp_Object deletion;
5383   int orig_point = PT, orig_len = len;
5384   int prev_Z;
5385   int multibyte_p = !NILP (current_buffer->enable_multibyte_characters);
5386
5387   deletion = Qnil;
5388   saved_coding_symbol = coding->symbol;
5389
5390   if (from < PT && PT < to)
5391     {
5392       TEMP_SET_PT_BOTH (from, from_byte);
5393       orig_point = from;
5394     }
5395
5396   if (replace)
5397     {
5398       int saved_from = from;
5399       int saved_inhibit_modification_hooks;
5400
5401       prepare_to_modify_buffer (from, to, &from);
5402       if (saved_from != from)
5403         {
5404           to = from + len;
5405           from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
5406           len_byte = to_byte - from_byte;
5407         }
5408
5409       /* The code conversion routine can not preserve text properties
5410          for now.  So, we must remove all text properties in the
5411          region.  Here, we must suppress all modification hooks.  */
5412       saved_inhibit_modification_hooks = inhibit_modification_hooks;
5413       inhibit_modification_hooks = 1;
5414       Fset_text_properties (make_number (from), make_number (to), Qnil, Qnil);
5415       inhibit_modification_hooks = saved_inhibit_modification_hooks;
5416     }
5417
5418   if (! encodep && CODING_REQUIRE_DETECTION (coding))
5419     {
5420       /* We must detect encoding of text and eol format.  */
5421
5422       if (from < GPT && to > GPT)
5423         move_gap_both (from, from_byte);
5424       if (coding->type == coding_type_undecided)
5425         {
5426           detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
5427           if (coding->type == coding_type_undecided)
5428             {
5429               /* It seems that the text contains only ASCII, but we
5430                  should not leave it undecided because the deeper
5431                  decoding routine (decode_coding) tries to detect the
5432                  encodings again in vain.  */
5433               coding->type = coding_type_emacs_mule;
5434               coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
5435               /* As emacs-mule decoder will handle composition, we
5436                  need this setting to allocate coding->cmp_data
5437                  later.  */
5438               coding->composing = COMPOSITION_NO;
5439             }
5440         }
5441       if (coding->eol_type == CODING_EOL_UNDECIDED
5442           && coding->type != coding_type_ccl)
5443         {
5444           detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
5445           if (coding->eol_type == CODING_EOL_UNDECIDED)
5446             coding->eol_type = CODING_EOL_LF;
5447           /* We had better recover the original eol format if we
5448              encounter an inconsistent eol format while decoding.  */
5449           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
5450         }
5451     }
5452
5453   /* Now we convert the text.  */
5454
5455   /* For encoding, we must process pre-write-conversion in advance.  */
5456   if (! inhibit_pre_post_conversion
5457       && encodep
5458       && SYMBOLP (coding->pre_write_conversion)
5459       && ! NILP (Ffboundp (coding->pre_write_conversion)))
5460     {
5461       /* The function in pre-write-conversion may put a new text in a
5462          new buffer.  */
5463       struct buffer *prev = current_buffer;
5464       Lisp_Object new;
5465
5466       record_unwind_protect (code_convert_region_unwind,
5467                              Vlast_coding_system_used);
5468       /* We should not call any more pre-write/post-read-conversion
5469          functions while this pre-write-conversion is running.  */
5470       inhibit_pre_post_conversion = 1;
5471       call2 (coding->pre_write_conversion,
5472              make_number (from), make_number (to));
5473       inhibit_pre_post_conversion = 0;
5474       /* Discard the unwind protect.  */
5475       specpdl_ptr--;
5476
5477       if (current_buffer != prev)
5478         {
5479           len = ZV - BEGV;
5480           new = Fcurrent_buffer ();
5481           set_buffer_internal_1 (prev);
5482           del_range_2 (from, from_byte, to, to_byte, 0);
5483           TEMP_SET_PT_BOTH (from, from_byte);
5484           insert_from_buffer (XBUFFER (new), 1, len, 0);
5485           Fkill_buffer (new);
5486           if (orig_point >= to)
5487             orig_point += len - orig_len;
5488           else if (orig_point > from)
5489             orig_point = from;
5490           orig_len = len;
5491           to = from + len;
5492           from_byte = CHAR_TO_BYTE (from);
5493           to_byte = CHAR_TO_BYTE (to);
5494           len_byte = to_byte - from_byte;
5495           TEMP_SET_PT_BOTH (from, from_byte);
5496         }
5497     }
5498
5499   if (replace)
5500     {
5501       if (! EQ (current_buffer->undo_list, Qt))
5502         deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
5503       else
5504         {
5505           nchars_del = to - from;
5506           nbytes_del = to_byte - from_byte;
5507         }
5508     }
5509
5510   if (coding->composing != COMPOSITION_DISABLED)
5511     {
5512       if (encodep)
5513         coding_save_composition (coding, from, to, Fcurrent_buffer ());
5514       else
5515         coding_allocate_composition_data (coding, from);
5516     }
5517
5518   /* Try to skip the heading and tailing ASCIIs.  */
5519   if (coding->type != coding_type_ccl)
5520     {
5521       int from_byte_orig = from_byte, to_byte_orig = to_byte;
5522
5523       if (from < GPT && GPT < to)
5524         move_gap_both (from, from_byte);
5525       SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
5526       if (from_byte == to_byte
5527           && (encodep || NILP (coding->post_read_conversion))
5528           && ! CODING_REQUIRE_FLUSHING (coding))
5529         {
5530           coding->produced = len_byte;
5531           coding->produced_char = len;
5532           if (!replace)
5533             /* We must record and adjust for this new text now.  */
5534             adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
5535           return 0;
5536         }
5537
5538       head_skip = from_byte - from_byte_orig;
5539       tail_skip = to_byte_orig - to_byte;
5540       total_skip = head_skip + tail_skip;
5541       from += head_skip;
5542       to -= tail_skip;
5543       len -= total_skip; len_byte -= total_skip;
5544     }
5545
5546   /* For conversion, we must put the gap before the text in addition to
5547      making the gap larger for efficient decoding.  The required gap
5548      size starts from 2000 which is the magic number used in make_gap.
5549      But, after one batch of conversion, it will be incremented if we
5550      find that it is not enough .  */
5551   require = 2000;
5552
5553   if (GAP_SIZE  < require)
5554     make_gap (require - GAP_SIZE);
5555   move_gap_both (from, from_byte);
5556
5557   inserted = inserted_byte = 0;
5558
5559   GAP_SIZE += len_byte;
5560   ZV -= len;
5561   Z -= len;
5562   ZV_BYTE -= len_byte;
5563   Z_BYTE -= len_byte;
5564
5565   if (GPT - BEG < BEG_UNCHANGED)
5566     BEG_UNCHANGED = GPT - BEG;
5567   if (Z - GPT < END_UNCHANGED)
5568     END_UNCHANGED = Z - GPT;
5569
5570   if (!encodep && coding->src_multibyte)
5571     {
5572       /* Decoding routines expects that the source text is unibyte.
5573          We must convert 8-bit characters of multibyte form to
5574          unibyte.  */
5575       int len_byte_orig = len_byte;
5576       len_byte = str_as_unibyte (GAP_END_ADDR - len_byte, len_byte);
5577       if (len_byte < len_byte_orig)
5578         safe_bcopy (GAP_END_ADDR - len_byte_orig, GAP_END_ADDR - len_byte,
5579                     len_byte);
5580       coding->src_multibyte = 0;
5581     }
5582
5583   for (;;)
5584     {
5585       int result;
5586
5587       /* The buffer memory is now:
5588          +--------+converted-text+---------+-------original-text-------+---+
5589          |<-from->|<--inserted-->|---------|<--------len_byte--------->|---|
5590                   |<---------------------- GAP ----------------------->|  */
5591       src = GAP_END_ADDR - len_byte;
5592       dst = GPT_ADDR + inserted_byte;
5593
5594       if (encodep)
5595         result = encode_coding (coding, src, dst, len_byte, 0);
5596       else
5597         {
5598           if (coding->composing != COMPOSITION_DISABLED)
5599             coding->cmp_data->char_offset = from + inserted;
5600           result = decode_coding (coding, src, dst, len_byte, 0);
5601         }
5602
5603       /* The buffer memory is now:
5604          +--------+-------converted-text----+--+------original-text----+---+
5605          |<-from->|<-inserted->|<-produced->|--|<-(len_byte-consumed)->|---|
5606                   |<---------------------- GAP ----------------------->|  */
5607
5608       inserted += coding->produced_char;
5609       inserted_byte += coding->produced;
5610       len_byte -= coding->consumed;
5611
5612       if (result == CODING_FINISH_INSUFFICIENT_CMP)
5613         {
5614           coding_allocate_composition_data (coding, from + inserted);
5615           continue;
5616         }
5617
5618       src += coding->consumed;
5619       dst += coding->produced;
5620
5621       if (result == CODING_FINISH_NORMAL)
5622         {
5623           src += len_byte;
5624           break;
5625         }
5626       if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
5627         {
5628           unsigned char *pend = dst, *p = pend - inserted_byte;
5629           Lisp_Object eol_type;
5630
5631           /* Encode LFs back to the original eol format (CR or CRLF).  */
5632           if (coding->eol_type == CODING_EOL_CR)
5633             {
5634               while (p < pend) if (*p++ == '\n') p[-1] = '\r';
5635             }
5636           else
5637             {
5638               int count = 0;
5639
5640               while (p < pend) if (*p++ == '\n') count++;
5641               if (src - dst < count)
5642                 {
5643                   /* We don't have sufficient room for encoding LFs
5644                      back to CRLF.  We must record converted and
5645                      not-yet-converted text back to the buffer
5646                      content, enlarge the gap, then record them out of
5647                      the buffer contents again.  */
5648                   int add = len_byte + inserted_byte;
5649
5650                   GAP_SIZE -= add;
5651                   ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5652                   GPT += inserted_byte; GPT_BYTE += inserted_byte;
5653                   make_gap (count - GAP_SIZE);
5654                   GAP_SIZE += add;
5655                   ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5656                   GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5657                   /* Don't forget to update SRC, DST, and PEND.  */
5658                   src = GAP_END_ADDR - len_byte;
5659                   dst = GPT_ADDR + inserted_byte;
5660                   pend = dst;
5661                 }
5662               inserted += count;
5663               inserted_byte += count;
5664               coding->produced += count;
5665               p = dst = pend + count;
5666               while (count)
5667                 {
5668                   *--p = *--pend;
5669                   if (*p == '\n') count--, *--p = '\r';
5670                 }
5671             }
5672
5673           /* Suppress eol-format conversion in the further conversion.  */
5674           coding->eol_type = CODING_EOL_LF;
5675
5676           /* Set the coding system symbol to that for Unix-like EOL.  */
5677           eol_type = Fget (saved_coding_symbol, Qeol_type);
5678           if (VECTORP (eol_type)
5679               && XVECTOR (eol_type)->size == 3
5680               && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
5681             coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
5682           else
5683             coding->symbol = saved_coding_symbol;
5684
5685           continue;
5686         }
5687       if (len_byte <= 0)
5688         {
5689           if (coding->type != coding_type_ccl
5690               || coding->mode & CODING_MODE_LAST_BLOCK)
5691             break;
5692           coding->mode |= CODING_MODE_LAST_BLOCK;
5693           continue;
5694         }
5695       if (result == CODING_FINISH_INSUFFICIENT_SRC)
5696         {
5697           /* The source text ends in invalid codes.  Let's just
5698              make them valid buffer contents, and finish conversion.  */
5699           if (multibyte_p)
5700             {
5701               unsigned char *start = dst;
5702
5703               inserted += len_byte;
5704               while (len_byte--)
5705                 {
5706                   int c = *src++;
5707                   dst += CHAR_STRING (c, dst);
5708                 }
5709
5710               inserted_byte += dst - start;
5711             }
5712           else
5713             {
5714               inserted += len_byte;
5715               inserted_byte += len_byte;
5716               while (len_byte--)
5717                 *dst++ = *src++;
5718             }
5719           break;
5720         }
5721       if (result == CODING_FINISH_INTERRUPT)
5722         {
5723           /* The conversion procedure was interrupted by a user.  */
5724           break;
5725         }
5726       /* Now RESULT == CODING_FINISH_INSUFFICIENT_DST  */
5727       if (coding->consumed < 1)
5728         {
5729           /* It's quite strange to require more memory without
5730              consuming any bytes.  Perhaps CCL program bug.  */
5731           break;
5732         }
5733       if (first)
5734         {
5735           /* We have just done the first batch of conversion which was
5736              stopped because of insufficient gap.  Let's reconsider the
5737              required gap size (i.e. SRT - DST) now.
5738
5739              We have converted ORIG bytes (== coding->consumed) into
5740              NEW bytes (coding->produced).  To convert the remaining
5741              LEN bytes, we may need REQUIRE bytes of gap, where:
5742                 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
5743                 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
5744              Here, we are sure that NEW >= ORIG.  */
5745           float ratio;
5746
5747           if (coding->produced <= coding->consumed)
5748             {
5749               /* This happens because of CCL-based coding system with
5750                  eol-type CRLF.  */
5751               require = 0;
5752             }
5753           else
5754             {
5755               ratio = (coding->produced - coding->consumed) / coding->consumed;
5756               require = len_byte * ratio;
5757             }
5758           first = 0;
5759         }
5760       if ((src - dst) < (require + 2000))
5761         {
5762           /* See the comment above the previous call of make_gap.  */
5763           int add = len_byte + inserted_byte;
5764
5765           GAP_SIZE -= add;
5766           ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5767           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5768           make_gap (require + 2000);
5769           GAP_SIZE += add;
5770           ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5771           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5772         }
5773     }
5774   if (src - dst > 0) *dst = 0; /* Put an anchor.  */
5775
5776   if (encodep && coding->dst_multibyte)
5777     {
5778       /* The output is unibyte.  We must convert 8-bit characters to
5779          multibyte form.  */
5780       if (inserted_byte * 2 > GAP_SIZE)
5781         {
5782           GAP_SIZE -= inserted_byte;
5783           ZV += inserted_byte; Z += inserted_byte;
5784           ZV_BYTE += inserted_byte; Z_BYTE += inserted_byte;
5785           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5786           make_gap (inserted_byte - GAP_SIZE);
5787           GAP_SIZE += inserted_byte;
5788           ZV -= inserted_byte; Z -= inserted_byte;
5789           ZV_BYTE -= inserted_byte; Z_BYTE -= inserted_byte;
5790           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5791         }
5792       inserted_byte = str_to_multibyte (GPT_ADDR, GAP_SIZE, inserted_byte);
5793     }
5794
5795   /* If we shrank the conversion area, adjust it now.  */
5796   if (total_skip > 0)
5797     {
5798       if (tail_skip > 0)
5799         safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
5800       inserted += total_skip; inserted_byte += total_skip;
5801       GAP_SIZE += total_skip;
5802       GPT -= head_skip; GPT_BYTE -= head_skip;
5803       ZV -= total_skip; ZV_BYTE -= total_skip;
5804       Z -= total_skip; Z_BYTE -= total_skip;
5805       from -= head_skip; from_byte -= head_skip;
5806       to += tail_skip; to_byte += tail_skip;
5807     }
5808
5809   prev_Z = Z;
5810   if (! EQ (current_buffer->undo_list, Qt))
5811     adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
5812   else
5813     adjust_after_replace_noundo (from, from_byte, nchars_del, nbytes_del,
5814                                  inserted, inserted_byte);
5815   inserted = Z - prev_Z;
5816
5817   if (!encodep && coding->cmp_data && coding->cmp_data->used)
5818     coding_restore_composition (coding, Fcurrent_buffer ());
5819   coding_free_composition_data (coding);
5820
5821   if (! inhibit_pre_post_conversion
5822       && ! encodep && ! NILP (coding->post_read_conversion))
5823     {
5824       Lisp_Object val;
5825       Lisp_Object saved_coding_system;
5826
5827       if (from != PT)
5828         TEMP_SET_PT_BOTH (from, from_byte);
5829       prev_Z = Z;
5830       record_unwind_protect (code_convert_region_unwind,
5831                              Vlast_coding_system_used);
5832       saved_coding_system = Vlast_coding_system_used;
5833       Vlast_coding_system_used = coding->symbol;
5834       /* We should not call any more pre-write/post-read-conversion
5835          functions while this post-read-conversion is running.  */
5836       inhibit_pre_post_conversion = 1;
5837       val = call1 (coding->post_read_conversion, make_number (inserted));
5838       inhibit_pre_post_conversion = 0;
5839       coding->symbol = Vlast_coding_system_used;
5840       Vlast_coding_system_used = saved_coding_system;
5841       /* Discard the unwind protect.  */
5842       specpdl_ptr--;
5843       CHECK_NUMBER (val);
5844       inserted += Z - prev_Z;
5845     }
5846
5847   if (orig_point >= from)
5848     {
5849       if (orig_point >= from + orig_len)
5850         orig_point += inserted - orig_len;
5851       else
5852         orig_point = from;
5853       TEMP_SET_PT (orig_point);
5854     }
5855
5856   if (replace)
5857     {
5858       signal_after_change (from, to - from, inserted);
5859       update_compositions (from, from + inserted, CHECK_BORDER);
5860     }
5861
5862   {
5863     coding->consumed = to_byte - from_byte;
5864     coding->consumed_char = to - from;
5865     coding->produced = inserted_byte;
5866     coding->produced_char = inserted;
5867   }
5868
5869   return 0;
5870 }
5871
5872 Lisp_Object
5873 run_pre_post_conversion_on_str (str, coding, encodep)
5874      Lisp_Object str;
5875      struct coding_system *coding;
5876      int encodep;
5877 {
5878   int count = SPECPDL_INDEX ();
5879   struct gcpro gcpro1, gcpro2;
5880   int multibyte = STRING_MULTIBYTE (str);
5881   Lisp_Object buffer;
5882   struct buffer *buf;
5883   Lisp_Object old_deactivate_mark;
5884
5885   record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
5886   record_unwind_protect (code_convert_region_unwind,
5887                          Vlast_coding_system_used);
5888   /* It is not crucial to specbind this.  */
5889   old_deactivate_mark = Vdeactivate_mark;
5890   GCPRO2 (str, old_deactivate_mark);
5891
5892   buffer = Fget_buffer_create (build_string (" *code-converting-work*"));
5893   buf = XBUFFER (buffer);
5894
5895   buf->directory = current_buffer->directory;
5896   buf->read_only = Qnil;
5897   buf->filename = Qnil;
5898   buf->undo_list = Qt;
5899   buf->overlays_before = Qnil;
5900   buf->overlays_after = Qnil;
5901
5902   set_buffer_internal (buf);
5903   /* We must insert the contents of STR as is without
5904      unibyte<->multibyte conversion.  For that, we adjust the
5905      multibyteness of the working buffer to that of STR.  */
5906   Ferase_buffer ();
5907   buf->enable_multibyte_characters = multibyte ? Qt : Qnil;
5908
5909   insert_from_string (str, 0, 0,
5910                       SCHARS (str), SBYTES (str), 0);
5911   UNGCPRO;
5912   inhibit_pre_post_conversion = 1;
5913   if (encodep)
5914     call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z));
5915   else
5916     {
5917       Vlast_coding_system_used = coding->symbol;
5918       TEMP_SET_PT_BOTH (BEG, BEG_BYTE);
5919       call1 (coding->post_read_conversion, make_number (Z - BEG));
5920       coding->symbol = Vlast_coding_system_used;
5921     }
5922   inhibit_pre_post_conversion = 0;
5923   Vdeactivate_mark = old_deactivate_mark;
5924   str = make_buffer_string (BEG, Z, 1);
5925   return unbind_to (count, str);
5926 }
5927
5928 Lisp_Object
5929 decode_coding_string (str, coding, nocopy)
5930      Lisp_Object str;
5931      struct coding_system *coding;
5932      int nocopy;
5933 {
5934   int len;
5935   struct conversion_buffer buf;
5936   int from, to_byte;
5937   Lisp_Object saved_coding_symbol;
5938   int result;
5939   int require_decoding;
5940   int shrinked_bytes = 0;
5941   Lisp_Object newstr;
5942   int consumed, consumed_char, produced, produced_char;
5943
5944   from = 0;
5945   to_byte = SBYTES (str);
5946
5947   saved_coding_symbol = coding->symbol;
5948   coding->src_multibyte = STRING_MULTIBYTE (str);
5949   coding->dst_multibyte = 1;
5950   if (CODING_REQUIRE_DETECTION (coding))
5951     {
5952       /* See the comments in code_convert_region.  */
5953       if (coding->type == coding_type_undecided)
5954         {
5955           detect_coding (coding, SDATA (str), to_byte);
5956           if (coding->type == coding_type_undecided)
5957             {
5958               coding->type = coding_type_emacs_mule;
5959               coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
5960               /* As emacs-mule decoder will handle composition, we
5961                  need this setting to allocate coding->cmp_data
5962                  later.  */
5963               coding->composing = COMPOSITION_NO;
5964             }
5965         }
5966       if (coding->eol_type == CODING_EOL_UNDECIDED
5967           && coding->type != coding_type_ccl)
5968         {
5969           saved_coding_symbol = coding->symbol;
5970           detect_eol (coding, SDATA (str), to_byte);
5971           if (coding->eol_type == CODING_EOL_UNDECIDED)
5972             coding->eol_type = CODING_EOL_LF;
5973           /* We had better recover the original eol format if we
5974              encounter an inconsistent eol format while decoding.  */
5975           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
5976         }
5977     }
5978
5979   if (coding->type == coding_type_no_conversion
5980       || coding->type == coding_type_raw_text)
5981     coding->dst_multibyte = 0;
5982
5983   require_decoding = CODING_REQUIRE_DECODING (coding);
5984
5985   if (STRING_MULTIBYTE (str))
5986     {
5987       /* Decoding routines expect the source text to be unibyte.  */
5988       str = Fstring_as_unibyte (str);
5989       to_byte = SBYTES (str);
5990       nocopy = 1;
5991       coding->src_multibyte = 0;
5992     }
5993
5994   /* Try to skip the heading and tailing ASCIIs.  */
5995   if (require_decoding && coding->type != coding_type_ccl)
5996     {
5997       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str),
5998                                 0);
5999       if (from == to_byte)
6000         require_decoding = 0;
6001       shrinked_bytes = from + (SBYTES (str) - to_byte);
6002     }
6003
6004   if (!require_decoding
6005       && !(SYMBOLP (coding->post_read_conversion)
6006            && !NILP (Ffboundp (coding->post_read_conversion))))
6007     {
6008       coding->consumed = SBYTES (str);
6009       coding->consumed_char = SCHARS (str);
6010       if (coding->dst_multibyte)
6011         {
6012           str = Fstring_as_multibyte (str);
6013           nocopy = 1;
6014         }
6015       coding->produced = SBYTES (str);
6016       coding->produced_char = SCHARS (str);
6017       return (nocopy ? str : Fcopy_sequence (str));
6018     }
6019
6020   if (coding->composing != COMPOSITION_DISABLED)
6021     coding_allocate_composition_data (coding, from);
6022   len = decoding_buffer_size (coding, to_byte - from);
6023   allocate_conversion_buffer (buf, len);
6024
6025   consumed = consumed_char = produced = produced_char = 0;
6026   while (1)
6027     {
6028       result = decode_coding (coding, SDATA (str) + from + consumed,
6029                               buf.data + produced, to_byte - from - consumed,
6030                               buf.size - produced);
6031       consumed += coding->consumed;
6032       consumed_char += coding->consumed_char;
6033       produced += coding->produced;
6034       produced_char += coding->produced_char;
6035       if (result == CODING_FINISH_NORMAL
6036           || (result == CODING_FINISH_INSUFFICIENT_SRC
6037               && coding->consumed == 0))
6038         break;
6039       if (result == CODING_FINISH_INSUFFICIENT_CMP)
6040         coding_allocate_composition_data (coding, from + produced_char);
6041       else if (result == CODING_FINISH_INSUFFICIENT_DST)
6042         extend_conversion_buffer (&buf);
6043       else if (result == CODING_FINISH_INCONSISTENT_EOL)
6044         {
6045           Lisp_Object eol_type;
6046
6047           /* Recover the original EOL format.  */
6048           if (coding->eol_type == CODING_EOL_CR)
6049             {
6050               unsigned char *p;
6051               for (p = buf.data; p < buf.data + produced; p++)
6052                 if (*p == '\n') *p = '\r';
6053             }
6054           else if (coding->eol_type == CODING_EOL_CRLF)
6055             {
6056               int num_eol = 0;
6057               unsigned char *p0, *p1;
6058               for (p0 = buf.data, p1 = p0 + produced; p0 < p1; p0++)
6059                 if (*p0 == '\n') num_eol++;
6060               if (produced + num_eol >= buf.size)
6061                 extend_conversion_buffer (&buf);
6062               for (p0 = buf.data + produced, p1 = p0 + num_eol; p0 > buf.data;)
6063                 {
6064                   *--p1 = *--p0;
6065                   if (*p0 == '\n') *--p1 = '\r';
6066                 }
6067               produced += num_eol;
6068               produced_char += num_eol;
6069             }
6070           /* Suppress eol-format conversion in the further conversion.  */
6071           coding->eol_type = CODING_EOL_LF;
6072
6073           /* Set the coding system symbol to that for Unix-like EOL.  */
6074           eol_type = Fget (saved_coding_symbol, Qeol_type);
6075           if (VECTORP (eol_type)
6076               && XVECTOR (eol_type)->size == 3
6077               && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
6078             coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
6079           else
6080             coding->symbol = saved_coding_symbol;
6081
6082
6083         }
6084     }
6085
6086   coding->consumed = consumed;
6087   coding->consumed_char = consumed_char;
6088   coding->produced = produced;
6089   coding->produced_char = produced_char;
6090
6091   if (coding->dst_multibyte)
6092     newstr = make_uninit_multibyte_string (produced_char + shrinked_bytes,
6093                                            produced + shrinked_bytes);
6094   else
6095     newstr = make_uninit_string (produced + shrinked_bytes);
6096   if (from > 0)
6097     STRING_COPYIN (newstr, 0, SDATA (str), from);
6098   STRING_COPYIN (newstr, from, buf.data, produced);
6099   if (shrinked_bytes > from)
6100     STRING_COPYIN (newstr, from + produced,
6101                    SDATA (str) + to_byte,
6102                    shrinked_bytes - from);
6103   free_conversion_buffer (&buf);
6104
6105   if (coding->cmp_data && coding->cmp_data->used)
6106     coding_restore_composition (coding, newstr);
6107   coding_free_composition_data (coding);
6108
6109   if (SYMBOLP (coding->post_read_conversion)
6110       && !NILP (Ffboundp (coding->post_read_conversion)))
6111     newstr = run_pre_post_conversion_on_str (newstr, coding, 0);
6112
6113   return newstr;
6114 }
6115
6116 Lisp_Object
6117 encode_coding_string (str, coding, nocopy)
6118      Lisp_Object str;
6119      struct coding_system *coding;
6120      int nocopy;
6121 {
6122   int len;
6123   struct conversion_buffer buf;
6124   int from, to, to_byte;
6125   int result;
6126   int shrinked_bytes = 0;
6127   Lisp_Object newstr;
6128   int consumed, consumed_char, produced, produced_char;
6129
6130   if (SYMBOLP (coding->pre_write_conversion)
6131       && !NILP (Ffboundp (coding->pre_write_conversion)))
6132     str = run_pre_post_conversion_on_str (str, coding, 1);
6133
6134   from = 0;
6135   to = SCHARS (str);
6136   to_byte = SBYTES (str);
6137
6138   /* Encoding routines determine the multibyteness of the source text
6139      by coding->src_multibyte.  */
6140   coding->src_multibyte = STRING_MULTIBYTE (str);
6141   coding->dst_multibyte = 0;
6142   if (! CODING_REQUIRE_ENCODING (coding))
6143     {
6144       coding->consumed = SBYTES (str);
6145       coding->consumed_char = SCHARS (str);
6146       if (STRING_MULTIBYTE (str))
6147         {
6148           str = Fstring_as_unibyte (str);
6149           nocopy = 1;
6150         }
6151       coding->produced = SBYTES (str);
6152       coding->produced_char = SCHARS (str);
6153       return (nocopy ? str : Fcopy_sequence (str));
6154     }
6155
6156   if (coding->composing != COMPOSITION_DISABLED)
6157     coding_save_composition (coding, from, to, str);
6158
6159   /* Try to skip the heading and tailing ASCIIs.  */
6160   if (coding->type != coding_type_ccl)
6161     {
6162       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str),
6163                                 1);
6164       if (from == to_byte)
6165         return (nocopy ? str : Fcopy_sequence (str));
6166       shrinked_bytes = from + (SBYTES (str) - to_byte);
6167     }
6168
6169   len = encoding_buffer_size (coding, to_byte - from);
6170   allocate_conversion_buffer (buf, len);
6171
6172   consumed = consumed_char = produced = produced_char = 0;
6173   while (1)
6174     {
6175       result = encode_coding (coding, SDATA (str) + from + consumed,
6176                               buf.data + produced, to_byte - from - consumed,
6177                               buf.size - produced);
6178       consumed += coding->consumed;
6179       consumed_char += coding->consumed_char;
6180       produced += coding->produced;
6181       produced_char += coding->produced_char;
6182       if (result == CODING_FINISH_NORMAL
6183           || (result == CODING_FINISH_INSUFFICIENT_SRC
6184               && coding->consumed == 0))
6185         break;
6186       /* Now result should be CODING_FINISH_INSUFFICIENT_DST.  */
6187       extend_conversion_buffer (&buf);
6188     }
6189
6190   coding->consumed = consumed;
6191   coding->consumed_char = consumed_char;
6192   coding->produced = produced;
6193   coding->produced_char = produced_char;
6194
6195   newstr = make_uninit_string (produced + shrinked_bytes);
6196   if (from > 0)
6197     STRING_COPYIN (newstr, 0, SDATA (str), from);
6198   STRING_COPYIN (newstr, from, buf.data, produced);
6199   if (shrinked_bytes > from)
6200     STRING_COPYIN (newstr, from + produced,
6201                    SDATA (str) + to_byte,
6202                    shrinked_bytes - from);
6203
6204   free_conversion_buffer (&buf);
6205   coding_free_composition_data (coding);
6206
6207   return newstr;
6208 }
6209
6210 \f
6211 #ifdef emacs
6212 /*** 8. Emacs Lisp library functions ***/
6213
6214 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
6215        doc: /* Return t if OBJECT is nil or a coding-system.
6216 See the documentation of `make-coding-system' for information
6217 about coding-system objects.  */)
6218      (obj)
6219      Lisp_Object obj;
6220 {
6221   if (NILP (obj))
6222     return Qt;
6223   if (!SYMBOLP (obj))
6224     return Qnil;
6225   /* Get coding-spec vector for OBJ.  */
6226   obj = Fget (obj, Qcoding_system);
6227   return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
6228           ? Qt : Qnil);
6229 }
6230
6231 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6232        Sread_non_nil_coding_system, 1, 1, 0,
6233        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
6234      (prompt)
6235      Lisp_Object prompt;
6236 {
6237   Lisp_Object val;
6238   do
6239     {
6240       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6241                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
6242     }
6243   while (SCHARS (val) == 0);
6244   return (Fintern (val, Qnil));
6245 }
6246
6247 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
6248        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6249 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.  */)
6250      (prompt, default_coding_system)
6251      Lisp_Object prompt, default_coding_system;
6252 {
6253   Lisp_Object val;
6254   if (SYMBOLP (default_coding_system))
6255     default_coding_system = SYMBOL_NAME (default_coding_system);
6256   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6257                           Qt, Qnil, Qcoding_system_history,
6258                           default_coding_system, Qnil);
6259   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
6260 }
6261
6262 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6263        1, 1, 0,
6264        doc: /* Check validity of CODING-SYSTEM.
6265 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
6266 It is valid if it is a symbol with a non-nil `coding-system' property.
6267 The value of property should be a vector of length 5.  */)
6268      (coding_system)
6269      Lisp_Object coding_system;
6270 {
6271   CHECK_SYMBOL (coding_system);
6272   if (!NILP (Fcoding_system_p (coding_system)))
6273     return coding_system;
6274   while (1)
6275     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
6276 }
6277 \f
6278 Lisp_Object
6279 detect_coding_system (src, src_bytes, highest, multibytep)
6280      const unsigned char *src;
6281      int src_bytes, highest;
6282      int multibytep;
6283 {
6284   int coding_mask, eol_type;
6285   Lisp_Object val, tmp;
6286   int dummy;
6287
6288   coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy, multibytep);
6289   eol_type  = detect_eol_type (src, src_bytes, &dummy);
6290   if (eol_type == CODING_EOL_INCONSISTENT)
6291     eol_type = CODING_EOL_UNDECIDED;
6292
6293   if (!coding_mask)
6294     {
6295       val = Qundecided;
6296       if (eol_type != CODING_EOL_UNDECIDED)
6297         {
6298           Lisp_Object val2;
6299           val2 = Fget (Qundecided, Qeol_type);
6300           if (VECTORP (val2))
6301             val = XVECTOR (val2)->contents[eol_type];
6302         }
6303       return (highest ? val : Fcons (val, Qnil));
6304     }
6305
6306   /* At first, gather possible coding systems in VAL.  */
6307   val = Qnil;
6308   for (tmp = Vcoding_category_list; CONSP (tmp); tmp = XCDR (tmp))
6309     {
6310       Lisp_Object category_val, category_index;
6311
6312       category_index = Fget (XCAR (tmp), Qcoding_category_index);
6313       category_val = Fsymbol_value (XCAR (tmp));
6314       if (!NILP (category_val)
6315           && NATNUMP (category_index)
6316           && (coding_mask & (1 << XFASTINT (category_index))))
6317         {
6318           val = Fcons (category_val, val);
6319           if (highest)
6320             break;
6321         }
6322     }
6323   if (!highest)
6324     val = Fnreverse (val);
6325
6326   /* Then, replace the elements with subsidiary coding systems.  */
6327   for (tmp = val; CONSP (tmp); tmp = XCDR (tmp))
6328     {
6329       if (eol_type != CODING_EOL_UNDECIDED
6330           && eol_type != CODING_EOL_INCONSISTENT)
6331         {
6332           Lisp_Object eol;
6333           eol = Fget (XCAR (tmp), Qeol_type);
6334           if (VECTORP (eol))
6335             XSETCAR (tmp, XVECTOR (eol)->contents[eol_type]);
6336         }
6337     }
6338   return (highest ? XCAR (val) : val);
6339 }
6340
6341 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6342        2, 3, 0,
6343        doc: /* Detect how the byte sequence in the region is encoded.
6344 Return a list of possible coding systems used on decoding a byte
6345 sequence containing the bytes in the region between START and END when
6346 the coding system `undecided' is specified.  The list is ordered by
6347 priority decided in the current language environment.
6348
6349 If only ASCII characters are found, it returns a list of single element
6350 `undecided' or its subsidiary coding system according to a detected
6351 end-of-line format.
6352
6353 If optional argument HIGHEST is non-nil, return the coding system of
6354 highest priority.  */)
6355      (start, end, highest)
6356      Lisp_Object start, end, highest;
6357 {
6358   int from, to;
6359   int from_byte, to_byte;
6360   int include_anchor_byte = 0;
6361
6362   CHECK_NUMBER_COERCE_MARKER (start);
6363   CHECK_NUMBER_COERCE_MARKER (end);
6364
6365   validate_region (&start, &end);
6366   from = XINT (start), to = XINT (end);
6367   from_byte = CHAR_TO_BYTE (from);
6368   to_byte = CHAR_TO_BYTE (to);
6369
6370   if (from < GPT && to >= GPT)
6371     move_gap_both (to, to_byte);
6372   /* If we an anchor byte `\0' follows the region, we include it in
6373      the detecting source.  Then code detectors can handle the tailing
6374      byte sequence more accurately.
6375
6376      Fix me: This is not a perfect solution.  It is better that we
6377      add one more argument, say LAST_BLOCK, to all detect_coding_XXX.
6378   */
6379   if (to == Z || (to == GPT && GAP_SIZE > 0))
6380     include_anchor_byte = 1;
6381   return detect_coding_system (BYTE_POS_ADDR (from_byte),
6382                                to_byte - from_byte + include_anchor_byte,
6383                                !NILP (highest),
6384                                !NILP (current_buffer
6385                                       ->enable_multibyte_characters));
6386 }
6387
6388 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6389        1, 2, 0,
6390        doc: /* Detect how the byte sequence in STRING is encoded.
6391 Return a list of possible coding systems used on decoding a byte
6392 sequence containing the bytes in STRING when the coding system
6393 `undecided' is specified.  The list is ordered by priority decided in
6394 the current language environment.
6395
6396 If only ASCII characters are found, it returns a list of single element
6397 `undecided' or its subsidiary coding system according to a detected
6398 end-of-line format.
6399
6400 If optional argument HIGHEST is non-nil, return the coding system of
6401 highest priority.  */)
6402      (string, highest)
6403      Lisp_Object string, highest;
6404 {
6405   CHECK_STRING (string);
6406
6407   return detect_coding_system (SDATA (string),
6408                                /* "+ 1" is to include the anchor byte
6409                                   `\0'.  With this, code detectors can
6410                                   handle the tailing bytes more
6411                                   accurately.  */
6412                                SBYTES (string) + 1,
6413                                !NILP (highest),
6414                                STRING_MULTIBYTE (string));
6415 }
6416
6417 /*  Subroutine for Fsafe_coding_systems_region_internal.
6418
6419     Return a list of coding systems that safely encode the multibyte
6420     text between P and PEND.  SAFE_CODINGS, if non-nil, is an alist of
6421     possible coding systems.  If it is nil, it means that we have not
6422     yet found any coding systems.
6423
6424     WORK_TABLE is a copy of the char-table Vchar_coding_system_table.  An
6425     element of WORK_TABLE is set to t once the element is looked up.
6426
6427     If a non-ASCII single byte char is found, set
6428     *single_byte_char_found to 1.  */
6429
6430 static Lisp_Object
6431 find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found)
6432      unsigned char *p, *pend;
6433      Lisp_Object safe_codings, work_table;
6434      int *single_byte_char_found;
6435 {
6436   int c, len, i;
6437   Lisp_Object val, ch;
6438   Lisp_Object prev, tail;
6439
6440   while (p < pend)
6441     {
6442       c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
6443       p += len;
6444       if (ASCII_BYTE_P (c))
6445         /* We can ignore ASCII characters here.  */
6446         continue;
6447       if (SINGLE_BYTE_CHAR_P (c))
6448         *single_byte_char_found = 1;
6449       if (NILP (safe_codings))
6450         /* Already all coding systems are excluded.  But, we can't
6451            terminate the loop here because non-ASCII single-byte char
6452            must be found.  */
6453         continue;
6454       /* Check the safe coding systems for C.  */
6455       ch = make_number (c);
6456       val = Faref (work_table, ch);
6457       if (EQ (val, Qt))
6458         /* This element was already checked.  Ignore it.  */
6459         continue;
6460       /* Remember that we checked this element.  */
6461       Faset (work_table, ch, Qt);
6462
6463       for (prev = tail = safe_codings; CONSP (tail); tail = XCDR (tail))
6464         {
6465           Lisp_Object elt, translation_table, hash_table, accept_latin_extra;
6466           int encodable;
6467
6468           elt = XCAR (tail);
6469           if (CONSP (XCDR (elt)))
6470             {
6471               /* This entry has this format now:
6472                  ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE
6473                           ACCEPT-LATIN-EXTRA ) */
6474               val = XCDR (elt);
6475               encodable = ! NILP (Faref (XCAR (val), ch));
6476               if (! encodable)
6477                 {
6478                   val = XCDR (val);
6479                   translation_table = XCAR (val);
6480                   hash_table = XCAR (XCDR (val));
6481                   accept_latin_extra = XCAR (XCDR (XCDR (val)));
6482                 }
6483             }
6484           else
6485             {
6486               /* This entry has this format now: ( CODING . SAFE-CHARS) */
6487               encodable = ! NILP (Faref (XCDR (elt), ch));
6488               if (! encodable)
6489                 {
6490                   /* Transform the format to:
6491                      ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE
6492                        ACCEPT-LATIN-EXTRA )  */
6493                   val = Fget (XCAR (elt), Qcoding_system);
6494                   translation_table
6495                     = Fplist_get (AREF (val, 3),
6496                                   Qtranslation_table_for_encode);
6497                   if (SYMBOLP (translation_table))
6498                     translation_table = Fget (translation_table,
6499                                               Qtranslation_table);
6500                   hash_table
6501                     = (CHAR_TABLE_P (translation_table)
6502                        ? XCHAR_TABLE (translation_table)->extras[1]
6503                        : Qnil);
6504                   accept_latin_extra
6505                     = ((EQ (AREF (val, 0), make_number (2))
6506                         && VECTORP (AREF (val, 4)))
6507                        ? AREF (AREF (val, 4), CODING_FLAG_ISO_LATIN_EXTRA)
6508                        : Qnil);
6509                   XSETCAR (tail, list5 (XCAR (elt), XCDR (elt),
6510                                         translation_table, hash_table,
6511                                         accept_latin_extra));
6512                 }
6513             }
6514
6515           if (! encodable
6516               && ((CHAR_TABLE_P (translation_table)
6517                    && ! NILP (Faref (translation_table, ch)))
6518                   || (HASH_TABLE_P (hash_table)
6519                       && ! NILP (Fgethash (ch, hash_table, Qnil)))
6520                   || (SINGLE_BYTE_CHAR_P (c)
6521                       && ! NILP (accept_latin_extra)
6522                       && VECTORP (Vlatin_extra_code_table)
6523                       && ! NILP (AREF (Vlatin_extra_code_table, c)))))
6524             encodable = 1;
6525           if (encodable)
6526             prev = tail;
6527           else
6528             {
6529               /* Exclude this coding system from SAFE_CODINGS.  */
6530               if (EQ (tail, safe_codings))
6531                 safe_codings = XCDR (safe_codings);
6532               else
6533                 XSETCDR (prev, XCDR (tail));
6534             }
6535         }
6536     }
6537   return safe_codings;
6538 }
6539
6540 DEFUN ("find-coding-systems-region-internal",
6541        Ffind_coding_systems_region_internal,
6542        Sfind_coding_systems_region_internal, 2, 2, 0,
6543        doc: /* Internal use only.  */)
6544      (start, end)
6545      Lisp_Object start, end;
6546 {
6547   Lisp_Object work_table, safe_codings;
6548   int non_ascii_p = 0;
6549   int single_byte_char_found = 0;
6550   const unsigned char *p1, *p1end, *p2, *p2end, *p;
6551
6552   if (STRINGP (start))
6553     {
6554       if (!STRING_MULTIBYTE (start))
6555         return Qt;
6556       p1 = SDATA (start), p1end = p1 + SBYTES (start);
6557       p2 = p2end = p1end;
6558       if (SCHARS (start) != SBYTES (start))
6559         non_ascii_p = 1;
6560     }
6561   else
6562     {
6563       int from, to, stop;
6564
6565       CHECK_NUMBER_COERCE_MARKER (start);
6566       CHECK_NUMBER_COERCE_MARKER (end);
6567       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6568         args_out_of_range (start, end);
6569       if (NILP (current_buffer->enable_multibyte_characters))
6570         return Qt;
6571       from = CHAR_TO_BYTE (XINT (start));
6572       to = CHAR_TO_BYTE (XINT (end));
6573       stop = from < GPT_BYTE && GPT_BYTE < to ? GPT_BYTE : to;
6574       p1 = BYTE_POS_ADDR (from), p1end = p1 + (stop - from);
6575       if (stop == to)
6576         p2 = p2end = p1end;
6577       else
6578         p2 = BYTE_POS_ADDR (stop), p2end = p2 + (to - stop);
6579       if (XINT (end) - XINT (start) != to - from)
6580         non_ascii_p = 1;
6581     }
6582
6583   if (!non_ascii_p)
6584     {
6585       /* We are sure that the text contains no multibyte character.
6586          Check if it contains eight-bit-graphic.  */
6587       p = p1;
6588       for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++);
6589       if (p == p1end)
6590         {
6591           for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++);
6592           if (p == p2end)
6593             return Qt;
6594         }
6595     }
6596
6597   /* The text contains non-ASCII characters.  */
6598
6599   work_table = Fmake_char_table (Qchar_coding_system, Qnil);
6600   safe_codings = Fcopy_sequence (XCDR (Vcoding_system_safe_chars));
6601
6602   safe_codings = find_safe_codings (p1, p1end, safe_codings, work_table,
6603                                     &single_byte_char_found);
6604   if (p2 < p2end)
6605     safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table,
6606                                       &single_byte_char_found);
6607   if (EQ (safe_codings, XCDR (Vcoding_system_safe_chars)))
6608     safe_codings = Qt;
6609   else
6610     {
6611       /* Turn safe_codings to a list of coding systems... */
6612       Lisp_Object val;
6613
6614       if (single_byte_char_found)
6615         /* ... and append these for eight-bit chars.  */
6616         val = Fcons (Qraw_text,
6617                      Fcons (Qemacs_mule, Fcons (Qno_conversion, Qnil)));
6618       else
6619         /* ... and append generic coding systems.  */
6620         val = Fcopy_sequence (XCAR (Vcoding_system_safe_chars));
6621
6622       for (; CONSP (safe_codings); safe_codings = XCDR (safe_codings))
6623         val = Fcons (XCAR (XCAR (safe_codings)), val);
6624       safe_codings = val;
6625     }
6626
6627   return safe_codings;
6628 }
6629
6630
6631 /* Search from position POS for such characters that are unencodable
6632    accoding to SAFE_CHARS, and return a list of their positions.  P
6633    points where in the memory the character at POS exists.  Limit the
6634    search at PEND or when Nth unencodable characters are found.
6635
6636    If SAFE_CHARS is a char table, an element for an unencodable
6637    character is nil.
6638
6639    If SAFE_CHARS is nil, all non-ASCII characters are unencodable.
6640
6641    Otherwise, SAFE_CHARS is t, and only eight-bit-contrl and
6642    eight-bit-graphic characters are unencodable.  */
6643
6644 static Lisp_Object
6645 unencodable_char_position (safe_chars, pos, p, pend, n)
6646      Lisp_Object safe_chars;
6647      int pos;
6648      unsigned char *p, *pend;
6649      int n;
6650 {
6651   Lisp_Object pos_list;
6652
6653   pos_list = Qnil;
6654   while (p < pend)
6655     {
6656       int len;
6657       int c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, len);
6658
6659       if (c >= 128
6660           && (CHAR_TABLE_P (safe_chars)
6661               ? NILP (CHAR_TABLE_REF (safe_chars, c))
6662               : (NILP (safe_chars) || c < 256)))
6663         {
6664           pos_list = Fcons (make_number (pos), pos_list);
6665           if (--n <= 0)
6666             break;
6667         }
6668       pos++;
6669       p += len;
6670     }
6671   return Fnreverse (pos_list);
6672 }
6673
6674
6675 DEFUN ("unencodable-char-position", Funencodable_char_position,
6676        Sunencodable_char_position, 3, 5, 0,
6677        doc: /*
6678 Return position of first un-encodable character in a region.
6679 START and END specfiy the region and CODING-SYSTEM specifies the
6680 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
6681
6682 If optional 4th argument COUNT is non-nil, it specifies at most how
6683 many un-encodable characters to search.  In this case, the value is a
6684 list of positions.
6685
6686 If optional 5th argument STRING is non-nil, it is a string to search
6687 for un-encodable characters.  In that case, START and END are indexes
6688 to the string.  */)
6689      (start, end, coding_system, count, string)
6690      Lisp_Object start, end, coding_system, count, string;
6691 {
6692   int n;
6693   Lisp_Object safe_chars;
6694   struct coding_system coding;
6695   Lisp_Object positions;
6696   int from, to;
6697   unsigned char *p, *pend;
6698
6699   if (NILP (string))
6700     {
6701       validate_region (&start, &end);
6702       from = XINT (start);
6703       to = XINT (end);
6704       if (NILP (current_buffer->enable_multibyte_characters))
6705         return Qnil;
6706       p = CHAR_POS_ADDR (from);
6707       if (to == GPT)
6708         pend = GPT_ADDR;
6709       else
6710         pend = CHAR_POS_ADDR (to);
6711     }
6712   else
6713     {
6714       CHECK_STRING (string);
6715       CHECK_NATNUM (start);
6716       CHECK_NATNUM (end);
6717       from = XINT (start);
6718       to = XINT (end);
6719       if (from > to
6720           || to > SCHARS (string))
6721         args_out_of_range_3 (string, start, end);
6722       if (! STRING_MULTIBYTE (string))
6723         return Qnil;
6724       p = SDATA (string) + string_char_to_byte (string, from);
6725       pend = SDATA (string) + string_char_to_byte (string, to);
6726     }
6727
6728   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
6729
6730   if (NILP (count))
6731     n = 1;
6732   else
6733     {
6734       CHECK_NATNUM (count);
6735       n = XINT (count);
6736     }
6737
6738   if (coding.type == coding_type_no_conversion
6739       || coding.type == coding_type_raw_text)
6740     return Qnil;
6741
6742   if (coding.type == coding_type_undecided)
6743     safe_chars = Qnil;
6744   else
6745     safe_chars = coding_safe_chars (coding_system);
6746
6747   if (STRINGP (string)
6748       || from >= GPT || to <= GPT)
6749     positions = unencodable_char_position (safe_chars, from, p, pend, n);
6750   else
6751     {
6752       Lisp_Object args[2];
6753
6754       args[0] = unencodable_char_position (safe_chars, from, p, GPT_ADDR, n);
6755       n -= XINT (Flength (args[0]));
6756       if (n <= 0)
6757         positions = args[0];
6758       else
6759         {
6760           args[1] = unencodable_char_position (safe_chars, GPT, GAP_END_ADDR,
6761                                                pend, n);
6762           positions = Fappend (2, args);
6763         }
6764     }
6765
6766   return  (NILP (count) ? Fcar (positions) : positions);
6767 }
6768
6769
6770 Lisp_Object
6771 code_convert_region1 (start, end, coding_system, encodep)
6772      Lisp_Object start, end, coding_system;
6773      int encodep;
6774 {
6775   struct coding_system coding;
6776   int from, to;
6777
6778   CHECK_NUMBER_COERCE_MARKER (start);
6779   CHECK_NUMBER_COERCE_MARKER (end);
6780   CHECK_SYMBOL (coding_system);
6781
6782   validate_region (&start, &end);
6783   from = XFASTINT (start);
6784   to = XFASTINT (end);
6785
6786   if (NILP (coding_system))
6787     return make_number (to - from);
6788
6789   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
6790     error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
6791
6792   coding.mode |= CODING_MODE_LAST_BLOCK;
6793   coding.src_multibyte = coding.dst_multibyte
6794     = !NILP (current_buffer->enable_multibyte_characters);
6795   code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
6796                        &coding, encodep, 1);
6797   Vlast_coding_system_used = coding.symbol;
6798   return make_number (coding.produced_char);
6799 }
6800
6801 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
6802        3, 3, "r\nzCoding system: ",
6803        doc: /* Decode the current region from the specified coding system.
6804 When called from a program, takes three arguments:
6805 START, END, and CODING-SYSTEM.  START and END are buffer positions.
6806 This function sets `last-coding-system-used' to the precise coding system
6807 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6808 not fully specified.)
6809 It returns the length of the decoded text.  */)
6810      (start, end, coding_system)
6811      Lisp_Object start, end, coding_system;
6812 {
6813   return code_convert_region1 (start, end, coding_system, 0);
6814 }
6815
6816 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
6817        3, 3, "r\nzCoding system: ",
6818        doc: /* Encode the current region into the specified coding system.
6819 When called from a program, takes three arguments:
6820 START, END, and CODING-SYSTEM.  START and END are buffer positions.
6821 This function sets `last-coding-system-used' to the precise coding system
6822 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6823 not fully specified.)
6824 It returns the length of the encoded text.  */)
6825      (start, end, coding_system)
6826      Lisp_Object start, end, coding_system;
6827 {
6828   return code_convert_region1 (start, end, coding_system, 1);
6829 }
6830
6831 Lisp_Object
6832 code_convert_string1 (string, coding_system, nocopy, encodep)
6833      Lisp_Object string, coding_system, nocopy;
6834      int encodep;
6835 {
6836   struct coding_system coding;
6837
6838   CHECK_STRING (string);
6839   CHECK_SYMBOL (coding_system);
6840
6841   if (NILP (coding_system))
6842     return (NILP (nocopy) ? Fcopy_sequence (string) : string);
6843
6844   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
6845     error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
6846
6847   coding.mode |= CODING_MODE_LAST_BLOCK;
6848   string = (encodep
6849             ? encode_coding_string (string, &coding, !NILP (nocopy))
6850             : decode_coding_string (string, &coding, !NILP (nocopy)));
6851   Vlast_coding_system_used = coding.symbol;
6852
6853   return string;
6854 }
6855
6856 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
6857        2, 3, 0,
6858        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
6859 Optional arg NOCOPY non-nil means it is OK to return STRING itself
6860 if the decoding operation is trivial.
6861 This function sets `last-coding-system-used' to the precise coding system
6862 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6863 not fully specified.)  */)
6864      (string, coding_system, nocopy)
6865      Lisp_Object string, coding_system, nocopy;
6866 {
6867   return code_convert_string1 (string, coding_system, nocopy, 0);
6868 }
6869
6870 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
6871        2, 3, 0,
6872        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
6873 Optional arg NOCOPY non-nil means it is OK to return STRING itself
6874 if the encoding operation is trivial.
6875 This function sets `last-coding-system-used' to the precise coding system
6876 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6877 not fully specified.)  */)
6878      (string, coding_system, nocopy)
6879      Lisp_Object string, coding_system, nocopy;
6880 {
6881   return code_convert_string1 (string, coding_system, nocopy, 1);
6882 }
6883
6884 /* Encode or decode STRING according to CODING_SYSTEM.
6885    Do not set Vlast_coding_system_used.
6886
6887    This function is called only from macros DECODE_FILE and
6888    ENCODE_FILE, thus we ignore character composition.  */
6889
6890 Lisp_Object
6891 code_convert_string_norecord (string, coding_system, encodep)
6892      Lisp_Object string, coding_system;
6893      int encodep;
6894 {
6895   struct coding_system coding;
6896
6897   CHECK_STRING (string);
6898   CHECK_SYMBOL (coding_system);
6899
6900   if (NILP (coding_system))
6901     return string;
6902
6903   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
6904     error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
6905
6906   coding.composing = COMPOSITION_DISABLED;
6907   coding.mode |= CODING_MODE_LAST_BLOCK;
6908   return (encodep
6909           ? encode_coding_string (string, &coding, 1)
6910           : decode_coding_string (string, &coding, 1));
6911 }
6912 \f
6913 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
6914        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
6915 Return the corresponding character.  */)
6916      (code)
6917      Lisp_Object code;
6918 {
6919   unsigned char c1, c2, s1, s2;
6920   Lisp_Object val;
6921
6922   CHECK_NUMBER (code);
6923   s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
6924   if (s1 == 0)
6925     {
6926       if (s2 < 0x80)
6927         XSETFASTINT (val, s2);
6928       else if (s2 >= 0xA0 || s2 <= 0xDF)
6929         XSETFASTINT (val, MAKE_CHAR (charset_katakana_jisx0201, s2, 0));
6930       else
6931         error ("Invalid Shift JIS code: %x", XFASTINT (code));
6932     }
6933   else
6934     {
6935       if ((s1 < 0x80 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF)
6936           || (s2 < 0x40 || s2 == 0x7F || s2 > 0xFC))
6937         error ("Invalid Shift JIS code: %x", XFASTINT (code));
6938       DECODE_SJIS (s1, s2, c1, c2);
6939       XSETFASTINT (val, MAKE_CHAR (charset_jisx0208, c1, c2));
6940     }
6941   return val;
6942 }
6943
6944 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
6945        doc: /* Encode a Japanese character CHAR to shift_jis encoding.
6946 Return the corresponding code in SJIS.  */)
6947      (ch)
6948      Lisp_Object ch;
6949 {
6950   int charset, c1, c2, s1, s2;
6951   Lisp_Object val;
6952
6953   CHECK_NUMBER (ch);
6954   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
6955   if (charset == CHARSET_ASCII)
6956     {
6957       val = ch;
6958     }
6959   else if (charset == charset_jisx0208
6960            && c1 > 0x20 && c1 < 0x7F && c2 > 0x20 && c2 < 0x7F)
6961     {
6962       ENCODE_SJIS (c1, c2, s1, s2);
6963       XSETFASTINT (val, (s1 << 8) | s2);
6964     }
6965   else if (charset == charset_katakana_jisx0201
6966            && c1 > 0x20 && c2 < 0xE0)
6967     {
6968       XSETFASTINT (val, c1 | 0x80);
6969     }
6970   else
6971     error ("Can't encode to shift_jis: %d", XFASTINT (ch));
6972   return val;
6973 }
6974
6975 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
6976        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
6977 Return the corresponding character.  */)
6978      (code)
6979      Lisp_Object code;
6980 {
6981   int charset;
6982   unsigned char b1, b2, c1, c2;
6983   Lisp_Object val;
6984
6985   CHECK_NUMBER (code);
6986   b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
6987   if (b1 == 0)
6988     {
6989       if (b2 >= 0x80)
6990         error ("Invalid BIG5 code: %x", XFASTINT (code));
6991       val = code;
6992     }
6993   else
6994     {
6995       if ((b1 < 0xA1 || b1 > 0xFE)
6996           || (b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE))
6997         error ("Invalid BIG5 code: %x", XFASTINT (code));
6998       DECODE_BIG5 (b1, b2, charset, c1, c2);
6999       XSETFASTINT (val, MAKE_CHAR (charset, c1, c2));
7000     }
7001   return val;
7002 }
7003
7004 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
7005        doc: /* Encode the Big5 character CHAR to BIG5 coding system.
7006 Return the corresponding character code in Big5.  */)
7007      (ch)
7008      Lisp_Object ch;
7009 {
7010   int charset, c1, c2, b1, b2;
7011   Lisp_Object val;
7012
7013   CHECK_NUMBER (ch);
7014   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
7015   if (charset == CHARSET_ASCII)
7016     {
7017       val = ch;
7018     }
7019   else if ((charset == charset_big5_1
7020             && (XFASTINT (ch) >= 0x250a1 && XFASTINT (ch) <= 0x271ec))
7021            || (charset == charset_big5_2
7022                && XFASTINT (ch) >= 0x290a1 && XFASTINT (ch) <= 0x2bdb2))
7023     {
7024       ENCODE_BIG5 (charset, c1, c2, b1, b2);
7025       XSETFASTINT (val, (b1 << 8) | b2);
7026     }
7027   else
7028     error ("Can't encode to Big5: %d", XFASTINT (ch));
7029   return val;
7030 }
7031 \f
7032 DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
7033        Sset_terminal_coding_system_internal, 1, 1, 0,
7034        doc: /* Internal use only.  */)
7035      (coding_system)
7036      Lisp_Object coding_system;
7037 {
7038   CHECK_SYMBOL (coding_system);
7039   setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
7040   /* We had better not send unsafe characters to terminal.  */
7041   terminal_coding.mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR;
7042   /* Character composition should be disabled.  */
7043   terminal_coding.composing = COMPOSITION_DISABLED;
7044   /* Error notification should be suppressed.  */
7045   terminal_coding.suppress_error = 1;
7046   terminal_coding.src_multibyte = 1;
7047   terminal_coding.dst_multibyte = 0;
7048   return Qnil;
7049 }
7050
7051 DEFUN ("set-safe-terminal-coding-system-internal", Fset_safe_terminal_coding_system_internal,
7052        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
7053        doc: /* Internal use only.  */)
7054      (coding_system)
7055      Lisp_Object coding_system;
7056 {
7057   CHECK_SYMBOL (coding_system);
7058   setup_coding_system (Fcheck_coding_system (coding_system),
7059                        &safe_terminal_coding);
7060   /* Character composition should be disabled.  */
7061   safe_terminal_coding.composing = COMPOSITION_DISABLED;
7062   /* Error notification should be suppressed.  */
7063   terminal_coding.suppress_error = 1;
7064   safe_terminal_coding.src_multibyte = 1;
7065   safe_terminal_coding.dst_multibyte = 0;
7066   return Qnil;
7067 }
7068
7069 DEFUN ("terminal-coding-system", Fterminal_coding_system,
7070        Sterminal_coding_system, 0, 0, 0,
7071        doc: /* Return coding system specified for terminal output.  */)
7072      ()
7073 {
7074   return terminal_coding.symbol;
7075 }
7076
7077 DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
7078        Sset_keyboard_coding_system_internal, 1, 1, 0,
7079        doc: /* Internal use only.  */)
7080      (coding_system)
7081      Lisp_Object coding_system;
7082 {
7083   CHECK_SYMBOL (coding_system);
7084   setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
7085   /* Character composition should be disabled.  */
7086   keyboard_coding.composing = COMPOSITION_DISABLED;
7087   return Qnil;
7088 }
7089
7090 DEFUN ("keyboard-coding-system", Fkeyboard_coding_system,
7091        Skeyboard_coding_system, 0, 0, 0,
7092        doc: /* Return coding system specified for decoding keyboard input.  */)
7093      ()
7094 {
7095   return keyboard_coding.symbol;
7096 }
7097
7098 \f
7099 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7100        Sfind_operation_coding_system,  1, MANY, 0,
7101        doc: /* Choose a coding system for an operation based on the target name.
7102 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7103 DECODING-SYSTEM is the coding system to use for decoding
7104 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7105 for encoding (in case OPERATION does encoding).
7106
7107 The first argument OPERATION specifies an I/O primitive:
7108   For file I/O, `insert-file-contents' or `write-region'.
7109   For process I/O, `call-process', `call-process-region', or `start-process'.
7110   For network I/O, `open-network-stream'.
7111
7112 The remaining arguments should be the same arguments that were passed
7113 to the primitive.  Depending on which primitive, one of those arguments
7114 is selected as the TARGET.  For example, if OPERATION does file I/O,
7115 whichever argument specifies the file name is TARGET.
7116
7117 TARGET has a meaning which depends on OPERATION:
7118   For file I/O, TARGET is a file name.
7119   For process I/O, TARGET is a process name.
7120   For network I/O, TARGET is a service name or a port number
7121
7122 This function looks up what specified for TARGET in,
7123 `file-coding-system-alist', `process-coding-system-alist',
7124 or `network-coding-system-alist' depending on OPERATION.
7125 They may specify a coding system, a cons of coding systems,
7126 or a function symbol to call.
7127 In the last case, we call the function with one argument,
7128 which is a list of all the arguments given to this function.
7129
7130 usage: (find-operation-coding-system OPERATION ARGUMENTS ...)  */)
7131      (nargs, args)
7132      int nargs;
7133      Lisp_Object *args;
7134 {
7135   Lisp_Object operation, target_idx, target, val;
7136   register Lisp_Object chain;
7137
7138   if (nargs < 2)
7139     error ("Too few arguments");
7140   operation = args[0];
7141   if (!SYMBOLP (operation)
7142       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
7143     error ("Invalid first argument");
7144   if (nargs < 1 + XINT (target_idx))
7145     error ("Too few arguments for operation: %s",
7146            SDATA (SYMBOL_NAME (operation)));
7147   /* For write-region, if the 6th argument (i.e. VISIT, the 5th
7148      argument to write-region) is string, it must be treated as a
7149      target file name.  */
7150   if (EQ (operation, Qwrite_region)
7151       && nargs > 5
7152       && STRINGP (args[5]))
7153     target_idx = make_number (4);
7154   target = args[XINT (target_idx) + 1];
7155   if (!(STRINGP (target)
7156         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
7157     error ("Invalid argument %d", XINT (target_idx) + 1);
7158
7159   chain = ((EQ (operation, Qinsert_file_contents)
7160             || EQ (operation, Qwrite_region))
7161            ? Vfile_coding_system_alist
7162            : (EQ (operation, Qopen_network_stream)
7163               ? Vnetwork_coding_system_alist
7164               : Vprocess_coding_system_alist));
7165   if (NILP (chain))
7166     return Qnil;
7167
7168   for (; CONSP (chain); chain = XCDR (chain))
7169     {
7170       Lisp_Object elt;
7171       elt = XCAR (chain);
7172
7173       if (CONSP (elt)
7174           && ((STRINGP (target)
7175                && STRINGP (XCAR (elt))
7176                && fast_string_match (XCAR (elt), target) >= 0)
7177               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
7178         {
7179           val = XCDR (elt);
7180           /* Here, if VAL is both a valid coding system and a valid
7181              function symbol, we return VAL as a coding system.  */
7182           if (CONSP (val))
7183             return val;
7184           if (! SYMBOLP (val))
7185             return Qnil;
7186           if (! NILP (Fcoding_system_p (val)))
7187             return Fcons (val, val);
7188           if (! NILP (Ffboundp (val)))
7189             {
7190               val = call1 (val, Flist (nargs, args));
7191               if (CONSP (val))
7192                 return val;
7193               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7194                 return Fcons (val, val);
7195             }
7196           return Qnil;
7197         }
7198     }
7199   return Qnil;
7200 }
7201
7202 DEFUN ("update-coding-systems-internal",  Fupdate_coding_systems_internal,
7203        Supdate_coding_systems_internal, 0, 0, 0,
7204        doc: /* Update internal database for ISO2022 and CCL based coding systems.
7205 When values of any coding categories are changed, you must
7206 call this function.  */)
7207      ()
7208 {
7209   int i;
7210
7211   for (i = CODING_CATEGORY_IDX_EMACS_MULE; i < CODING_CATEGORY_IDX_MAX; i++)
7212     {
7213       Lisp_Object val;
7214
7215       val = SYMBOL_VALUE (XVECTOR (Vcoding_category_table)->contents[i]);
7216       if (!NILP (val))
7217         {
7218           if (! coding_system_table[i])
7219             coding_system_table[i] = ((struct coding_system *)
7220                                       xmalloc (sizeof (struct coding_system)));
7221           setup_coding_system (val, coding_system_table[i]);
7222         }
7223       else if (coding_system_table[i])
7224         {
7225           xfree (coding_system_table[i]);
7226           coding_system_table[i] = NULL;
7227         }
7228     }
7229
7230   return Qnil;
7231 }
7232
7233 DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal,
7234        Sset_coding_priority_internal, 0, 0, 0,
7235        doc: /* Update internal database for the current value of `coding-category-list'.
7236 This function is internal use only.  */)
7237      ()
7238 {
7239   int i = 0, idx;
7240   Lisp_Object val;
7241
7242   val = Vcoding_category_list;
7243
7244   while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
7245     {
7246       if (! SYMBOLP (XCAR (val)))
7247         break;
7248       idx = XFASTINT (Fget (XCAR (val), Qcoding_category_index));
7249       if (idx >= CODING_CATEGORY_IDX_MAX)
7250         break;
7251       coding_priorities[i++] = (1 << idx);
7252       val = XCDR (val);
7253     }
7254   /* If coding-category-list is valid and contains all coding
7255      categories, `i' should be CODING_CATEGORY_IDX_MAX now.  If not,
7256      the following code saves Emacs from crashing.  */
7257   while (i < CODING_CATEGORY_IDX_MAX)
7258     coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
7259
7260   return Qnil;
7261 }
7262
7263 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
7264        Sdefine_coding_system_internal, 1, 1, 0,
7265        doc: /* Register CODING-SYSTEM as a base coding system.
7266 This function is internal use only.  */)
7267      (coding_system)
7268      Lisp_Object coding_system;
7269 {
7270   Lisp_Object safe_chars, slot;
7271
7272   if (NILP (Fcheck_coding_system (coding_system)))
7273     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
7274   safe_chars = coding_safe_chars (coding_system);
7275   if (! EQ (safe_chars, Qt) && ! CHAR_TABLE_P (safe_chars))
7276     error ("No valid safe-chars property for %s",
7277            SDATA (SYMBOL_NAME (coding_system)));
7278   if (EQ (safe_chars, Qt))
7279     {
7280       if (NILP (Fmemq (coding_system, XCAR (Vcoding_system_safe_chars))))
7281         XSETCAR (Vcoding_system_safe_chars,
7282                  Fcons (coding_system, XCAR (Vcoding_system_safe_chars)));
7283     }
7284   else
7285     {
7286       slot = Fassq (coding_system, XCDR (Vcoding_system_safe_chars));
7287       if (NILP (slot))
7288         XSETCDR (Vcoding_system_safe_chars,
7289                  nconc2 (XCDR (Vcoding_system_safe_chars),
7290                          Fcons (Fcons (coding_system, safe_chars), Qnil)));
7291       else
7292         XSETCDR (slot, safe_chars);
7293     }
7294   return Qnil;
7295 }
7296
7297 #endif /* emacs */
7298
7299 \f
7300 /*** 9. Post-amble ***/
7301
7302 void
7303 init_coding_once ()
7304 {
7305   int i;
7306
7307   /* Emacs' internal format specific initialize routine.  */
7308   for (i = 0; i <= 0x20; i++)
7309     emacs_code_class[i] = EMACS_control_code;
7310   emacs_code_class[0x0A] = EMACS_linefeed_code;
7311   emacs_code_class[0x0D] = EMACS_carriage_return_code;
7312   for (i = 0x21 ; i < 0x7F; i++)
7313     emacs_code_class[i] = EMACS_ascii_code;
7314   emacs_code_class[0x7F] = EMACS_control_code;
7315   for (i = 0x80; i < 0xFF; i++)
7316     emacs_code_class[i] = EMACS_invalid_code;
7317   emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
7318   emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
7319   emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
7320   emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
7321
7322   /* ISO2022 specific initialize routine.  */
7323   for (i = 0; i < 0x20; i++)
7324     iso_code_class[i] = ISO_control_0;
7325   for (i = 0x21; i < 0x7F; i++)
7326     iso_code_class[i] = ISO_graphic_plane_0;
7327   for (i = 0x80; i < 0xA0; i++)
7328     iso_code_class[i] = ISO_control_1;
7329   for (i = 0xA1; i < 0xFF; i++)
7330     iso_code_class[i] = ISO_graphic_plane_1;
7331   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
7332   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
7333   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
7334   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
7335   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
7336   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
7337   iso_code_class[ISO_CODE_ESC] = ISO_escape;
7338   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
7339   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
7340   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
7341
7342   setup_coding_system (Qnil, &keyboard_coding);
7343   setup_coding_system (Qnil, &terminal_coding);
7344   setup_coding_system (Qnil, &safe_terminal_coding);
7345   setup_coding_system (Qnil, &default_buffer_file_coding);
7346
7347   bzero (coding_system_table, sizeof coding_system_table);
7348
7349   bzero (ascii_skip_code, sizeof ascii_skip_code);
7350   for (i = 0; i < 128; i++)
7351     ascii_skip_code[i] = 1;
7352
7353 #if defined (MSDOS) || defined (WINDOWSNT)
7354   system_eol_type = CODING_EOL_CRLF;
7355 #else
7356   system_eol_type = CODING_EOL_LF;
7357 #endif
7358
7359   inhibit_pre_post_conversion = 0;
7360 }
7361
7362 #ifdef emacs
7363
7364 void
7365 syms_of_coding ()
7366 {
7367   Qtarget_idx = intern ("target-idx");
7368   staticpro (&Qtarget_idx);
7369
7370   Qcoding_system_history = intern ("coding-system-history");
7371   staticpro (&Qcoding_system_history);
7372   Fset (Qcoding_system_history, Qnil);
7373
7374   /* Target FILENAME is the first argument.  */
7375   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
7376   /* Target FILENAME is the third argument.  */
7377   Fput (Qwrite_region, Qtarget_idx, make_number (2));
7378
7379   Qcall_process = intern ("call-process");
7380   staticpro (&Qcall_process);
7381   /* Target PROGRAM is the first argument.  */
7382   Fput (Qcall_process, Qtarget_idx, make_number (0));
7383
7384   Qcall_process_region = intern ("call-process-region");
7385   staticpro (&Qcall_process_region);
7386   /* Target PROGRAM is the third argument.  */
7387   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
7388
7389   Qstart_process = intern ("start-process");
7390   staticpro (&Qstart_process);
7391   /* Target PROGRAM is the third argument.  */
7392   Fput (Qstart_process, Qtarget_idx, make_number (2));
7393
7394   Qopen_network_stream = intern ("open-network-stream");
7395   staticpro (&Qopen_network_stream);
7396   /* Target SERVICE is the fourth argument.  */
7397   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
7398
7399   Qcoding_system = intern ("coding-system");
7400   staticpro (&Qcoding_system);
7401
7402   Qeol_type = intern ("eol-type");
7403   staticpro (&Qeol_type);
7404
7405   Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
7406   staticpro (&Qbuffer_file_coding_system);
7407
7408   Qpost_read_conversion = intern ("post-read-conversion");
7409   staticpro (&Qpost_read_conversion);
7410
7411   Qpre_write_conversion = intern ("pre-write-conversion");
7412   staticpro (&Qpre_write_conversion);
7413
7414   Qno_conversion = intern ("no-conversion");
7415   staticpro (&Qno_conversion);
7416
7417   Qundecided = intern ("undecided");
7418   staticpro (&Qundecided);
7419
7420   Qcoding_system_p = intern ("coding-system-p");
7421   staticpro (&Qcoding_system_p);
7422
7423   Qcoding_system_error = intern ("coding-system-error");
7424   staticpro (&Qcoding_system_error);
7425
7426   Fput (Qcoding_system_error, Qerror_conditions,
7427         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
7428   Fput (Qcoding_system_error, Qerror_message,
7429         build_string ("Invalid coding system"));
7430
7431   Qcoding_category = intern ("coding-category");
7432   staticpro (&Qcoding_category);
7433   Qcoding_category_index = intern ("coding-category-index");
7434   staticpro (&Qcoding_category_index);
7435
7436   Vcoding_category_table
7437     = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
7438   staticpro (&Vcoding_category_table);
7439   {
7440     int i;
7441     for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
7442       {
7443         XVECTOR (Vcoding_category_table)->contents[i]
7444           = intern (coding_category_name[i]);
7445         Fput (XVECTOR (Vcoding_category_table)->contents[i],
7446               Qcoding_category_index, make_number (i));
7447       }
7448   }
7449
7450   Vcoding_system_safe_chars = Fcons (Qnil, Qnil);
7451   staticpro (&Vcoding_system_safe_chars);
7452
7453   Qtranslation_table = intern ("translation-table");
7454   staticpro (&Qtranslation_table);
7455   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
7456
7457   Qtranslation_table_id = intern ("translation-table-id");
7458   staticpro (&Qtranslation_table_id);
7459
7460   Qtranslation_table_for_decode = intern ("translation-table-for-decode");
7461   staticpro (&Qtranslation_table_for_decode);
7462
7463   Qtranslation_table_for_encode = intern ("translation-table-for-encode");
7464   staticpro (&Qtranslation_table_for_encode);
7465
7466   Qsafe_chars = intern ("safe-chars");
7467   staticpro (&Qsafe_chars);
7468
7469   Qchar_coding_system = intern ("char-coding-system");
7470   staticpro (&Qchar_coding_system);
7471
7472   /* Intern this now in case it isn't already done.
7473      Setting this variable twice is harmless.
7474      But don't staticpro it here--that is done in alloc.c.  */
7475   Qchar_table_extra_slots = intern ("char-table-extra-slots");
7476   Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0));
7477   Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (0));
7478
7479   Qvalid_codes = intern ("valid-codes");
7480   staticpro (&Qvalid_codes);
7481
7482   Qemacs_mule = intern ("emacs-mule");
7483   staticpro (&Qemacs_mule);
7484
7485   Qraw_text = intern ("raw-text");
7486   staticpro (&Qraw_text);
7487
7488   defsubr (&Scoding_system_p);
7489   defsubr (&Sread_coding_system);
7490   defsubr (&Sread_non_nil_coding_system);
7491   defsubr (&Scheck_coding_system);
7492   defsubr (&Sdetect_coding_region);
7493   defsubr (&Sdetect_coding_string);
7494   defsubr (&Sfind_coding_systems_region_internal);
7495   defsubr (&Sunencodable_char_position);
7496   defsubr (&Sdecode_coding_region);
7497   defsubr (&Sencode_coding_region);
7498   defsubr (&Sdecode_coding_string);
7499   defsubr (&Sencode_coding_string);
7500   defsubr (&Sdecode_sjis_char);
7501   defsubr (&Sencode_sjis_char);
7502   defsubr (&Sdecode_big5_char);
7503   defsubr (&Sencode_big5_char);
7504   defsubr (&Sset_terminal_coding_system_internal);
7505   defsubr (&Sset_safe_terminal_coding_system_internal);
7506   defsubr (&Sterminal_coding_system);
7507   defsubr (&Sset_keyboard_coding_system_internal);
7508   defsubr (&Skeyboard_coding_system);
7509   defsubr (&Sfind_operation_coding_system);
7510   defsubr (&Supdate_coding_systems_internal);
7511   defsubr (&Sset_coding_priority_internal);
7512   defsubr (&Sdefine_coding_system_internal);
7513
7514   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
7515                doc: /* List of coding systems.
7516
7517 Do not alter the value of this variable manually.  This variable should be
7518 updated by the functions `make-coding-system' and
7519 `define-coding-system-alias'.  */);
7520   Vcoding_system_list = Qnil;
7521
7522   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
7523                doc: /* Alist of coding system names.
7524 Each element is one element list of coding system name.
7525 This variable is given to `completing-read' as TABLE argument.
7526
7527 Do not alter the value of this variable manually.  This variable should be
7528 updated by the functions `make-coding-system' and
7529 `define-coding-system-alias'.  */);
7530   Vcoding_system_alist = Qnil;
7531
7532   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
7533                doc: /* List of coding-categories (symbols) ordered by priority.
7534
7535 On detecting a coding system, Emacs tries code detection algorithms
7536 associated with each coding-category one by one in this order.  When
7537 one algorithm agrees with a byte sequence of source text, the coding
7538 system bound to the corresponding coding-category is selected.  */);
7539   {
7540     int i;
7541
7542     Vcoding_category_list = Qnil;
7543     for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
7544       Vcoding_category_list
7545         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
7546                  Vcoding_category_list);
7547   }
7548
7549   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
7550                doc: /* Specify the coding system for read operations.
7551 It is useful to bind this variable with `let', but do not set it globally.
7552 If the value is a coding system, it is used for decoding on read operation.
7553 If not, an appropriate element is used from one of the coding system alists:
7554 There are three such tables, `file-coding-system-alist',
7555 `process-coding-system-alist', and `network-coding-system-alist'.  */);
7556   Vcoding_system_for_read = Qnil;
7557
7558   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
7559                doc: /* Specify the coding system for write operations.
7560 Programs bind this variable with `let', but you should not set it globally.
7561 If the value is a coding system, it is used for encoding of output,
7562 when writing it to a file and when sending it to a file or subprocess.
7563
7564 If this does not specify a coding system, an appropriate element
7565 is used from one of the coding system alists:
7566 There are three such tables, `file-coding-system-alist',
7567 `process-coding-system-alist', and `network-coding-system-alist'.
7568 For output to files, if the above procedure does not specify a coding system,
7569 the value of `buffer-file-coding-system' is used.  */);
7570   Vcoding_system_for_write = Qnil;
7571
7572   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
7573                doc: /* Coding system used in the latest file or process I/O.
7574 Also set by `encode-coding-region', `decode-coding-region',
7575 `encode-coding-string' and `decode-coding-string'.  */);
7576   Vlast_coding_system_used = Qnil;
7577
7578   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
7579                doc: /* *Non-nil means always inhibit code conversion of end-of-line format.
7580 See info node `Coding Systems' and info node `Text and Binary' concerning
7581 such conversion.  */);
7582   inhibit_eol_conversion = 0;
7583
7584   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
7585                doc: /* Non-nil means process buffer inherits coding system of process output.
7586 Bind it to t if the process output is to be treated as if it were a file
7587 read from some filesystem.  */);
7588   inherit_process_coding_system = 0;
7589
7590   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
7591                doc: /* Alist to decide a coding system to use for a file I/O operation.
7592 The format is ((PATTERN . VAL) ...),
7593 where PATTERN is a regular expression matching a file name,
7594 VAL is a coding system, a cons of coding systems, or a function symbol.
7595 If VAL is a coding system, it is used for both decoding and encoding
7596 the file contents.
7597 If VAL is a cons of coding systems, the car part is used for decoding,
7598 and the cdr part is used for encoding.
7599 If VAL is a function symbol, the function must return a coding system
7600 or a cons of coding systems which are used as above.  The function gets
7601 the arguments with which `find-operation-coding-system' was called.
7602
7603 See also the function `find-operation-coding-system'
7604 and the variable `auto-coding-alist'.  */);
7605   Vfile_coding_system_alist = Qnil;
7606
7607   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
7608     doc: /* Alist to decide a coding system to use for a process I/O operation.
7609 The format is ((PATTERN . VAL) ...),
7610 where PATTERN is a regular expression matching a program name,
7611 VAL is a coding system, a cons of coding systems, or a function symbol.
7612 If VAL is a coding system, it is used for both decoding what received
7613 from the program and encoding what sent to the program.
7614 If VAL is a cons of coding systems, the car part is used for decoding,
7615 and the cdr part is used for encoding.
7616 If VAL is a function symbol, the function must return a coding system
7617 or a cons of coding systems which are used as above.
7618
7619 See also the function `find-operation-coding-system'.  */);
7620   Vprocess_coding_system_alist = Qnil;
7621
7622   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
7623     doc: /* Alist to decide a coding system to use for a network I/O operation.
7624 The format is ((PATTERN . VAL) ...),
7625 where PATTERN is a regular expression matching a network service name
7626 or is a port number to connect to,
7627 VAL is a coding system, a cons of coding systems, or a function symbol.
7628 If VAL is a coding system, it is used for both decoding what received
7629 from the network stream and encoding what sent to the network stream.
7630 If VAL is a cons of coding systems, the car part is used for decoding,
7631 and the cdr part is used for encoding.
7632 If VAL is a function symbol, the function must return a coding system
7633 or a cons of coding systems which are used as above.
7634
7635 See also the function `find-operation-coding-system'.  */);
7636   Vnetwork_coding_system_alist = Qnil;
7637
7638   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
7639                doc: /* Coding system to use with system messages.
7640 Also used for decoding keyboard input on X Window system.  */);
7641   Vlocale_coding_system = Qnil;
7642
7643   /* The eol mnemonics are reset in startup.el system-dependently.  */
7644   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
7645                doc: /* *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
7646   eol_mnemonic_unix = build_string (":");
7647
7648   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
7649                doc: /* *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
7650   eol_mnemonic_dos = build_string ("\\");
7651
7652   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
7653                doc: /* *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
7654   eol_mnemonic_mac = build_string ("/");
7655
7656   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
7657                doc: /* *String displayed in mode line when end-of-line format is not yet determined.  */);
7658   eol_mnemonic_undecided = build_string (":");
7659
7660   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
7661                doc: /* *Non-nil enables character translation while encoding and decoding.  */);
7662   Venable_character_translation = Qt;
7663
7664   DEFVAR_LISP ("standard-translation-table-for-decode",
7665                &Vstandard_translation_table_for_decode,
7666                doc: /* Table for translating characters while decoding.  */);
7667   Vstandard_translation_table_for_decode = Qnil;
7668
7669   DEFVAR_LISP ("standard-translation-table-for-encode",
7670                &Vstandard_translation_table_for_encode,
7671                doc: /* Table for translating characters while encoding.  */);
7672   Vstandard_translation_table_for_encode = Qnil;
7673
7674   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
7675                doc: /* Alist of charsets vs revision numbers.
7676 While encoding, if a charset (car part of an element) is found,
7677 designate it with the escape sequence identifying revision (cdr part of the element).  */);
7678   Vcharset_revision_alist = Qnil;
7679
7680   DEFVAR_LISP ("default-process-coding-system",
7681                &Vdefault_process_coding_system,
7682                doc: /* Cons of coding systems used for process I/O by default.
7683 The car part is used for decoding a process output,
7684 the cdr part is used for encoding a text to be sent to a process.  */);
7685   Vdefault_process_coding_system = Qnil;
7686
7687   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
7688                doc: /* Table of extra Latin codes in the range 128..159 (inclusive).
7689 This is a vector of length 256.
7690 If Nth element is non-nil, the existence of code N in a file
7691 \(or output of subprocess) doesn't prevent it to be detected as
7692 a coding system of ISO 2022 variant which has a flag
7693 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
7694 or reading output of a subprocess.
7695 Only 128th through 159th elements has a meaning.  */);
7696   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
7697
7698   DEFVAR_LISP ("select-safe-coding-system-function",
7699                &Vselect_safe_coding_system_function,
7700                doc: /* Function to call to select safe coding system for encoding a text.
7701
7702 If set, this function is called to force a user to select a proper
7703 coding system which can encode the text in the case that a default
7704 coding system used in each operation can't encode the text.
7705
7706 The default value is `select-safe-coding-system' (which see).  */);
7707   Vselect_safe_coding_system_function = Qnil;
7708
7709   DEFVAR_BOOL ("coding-system-require-warning",
7710                &coding_system_require_warning,
7711                doc: /* Internal use only.
7712 If non-nil, on writing a file, `select-safe-coding-system-function' is
7713 called even if `coding-system-for-write' is non-nil.  The command
7714 `universal-coding-system-argument' binds this variable to t temporarily.  */);
7715   coding_system_require_warning = 0;
7716
7717
7718   DEFVAR_BOOL ("inhibit-iso-escape-detection",
7719                &inhibit_iso_escape_detection,
7720                doc: /* If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
7721
7722 By default, on reading a file, Emacs tries to detect how the text is
7723 encoded.  This code detection is sensitive to escape sequences.  If
7724 the sequence is valid as ISO2022, the code is determined as one of
7725 the ISO2022 encodings, and the file is decoded by the corresponding
7726 coding system (e.g. `iso-2022-7bit').
7727
7728 However, there may be a case that you want to read escape sequences in
7729 a file as is.  In such a case, you can set this variable to non-nil.
7730 Then, as the code detection ignores any escape sequences, no file is
7731 detected as encoded in some ISO2022 encoding.  The result is that all
7732 escape sequences become visible in a buffer.
7733
7734 The default value is nil, and it is strongly recommended not to change
7735 it.  That is because many Emacs Lisp source files that contain
7736 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
7737 in Emacs's distribution, and they won't be decoded correctly on
7738 reading if you suppress escape sequence detection.
7739
7740 The other way to read escape sequences in a file without decoding is
7741 to explicitly specify some coding system that doesn't use ISO2022's
7742 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
7743   inhibit_iso_escape_detection = 0;
7744
7745   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
7746                doc: /* Char table for translating self-inserting characters.
7747 This is applied to the result of input methods, not their input.  See also
7748 `keyboard-translate-table'.  */);
7749     Vtranslation_table_for_input = Qnil;
7750 }
7751
7752 char *
7753 emacs_strerror (error_number)
7754      int error_number;
7755 {
7756   char *str;
7757
7758   synchronize_system_messages_locale ();
7759   str = strerror (error_number);
7760
7761   if (! NILP (Vlocale_coding_system))
7762     {
7763       Lisp_Object dec = code_convert_string_norecord (build_string (str),
7764                                                       Vlocale_coding_system,
7765                                                       0);
7766       str = (char *) SDATA (dec);
7767     }
7768
7769   return str;
7770 }
7771
7772 #endif /* emacs */
7773