src/coding.c

   1 /* Coding system handler (conversion, detection, and etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /*** TABLE OF CONTENTS ***
  23
  24   0. General comments
  25   1. Preamble
  26   2. Emacs' internal format (emacs-mule) handlers
  27   3. ISO2022 handlers
  28   4. Shift-JIS and BIG5 handlers
  29   5. CCL handlers
  30   6. End-of-line handlers
  31   7. C library functions
  32   8. Emacs Lisp library functions
  33   9. Post-amble
  34
  35 */
  36
  37 /*** 0. General comments ***/
  38
  39
  40 /*** GENERAL NOTE on CODING SYSTEM ***
  41
  42   Coding system is an encoding mechanism of one or more character
  43   sets.  Here's a list of coding systems which Emacs can handle.  When
  44   we say "decode", it means converting some other coding system to
  45   Emacs' internal format (emacs-internal), and when we say "encode",
  46   it means converting the coding system emacs-mule to some other
  47   coding system.
  48
  49   0. Emacs' internal format (emacs-mule)
  50
  51   Emacs itself holds a multi-lingual character in a buffer and a string
  52   in a special format.  Details are described in section 2.
  53
  54   1. ISO2022
  55
  56   The most famous coding system for multiple character sets.  X's
  57   Compound Text, various EUCs (Extended Unix Code), and coding
  58   systems used in Internet communication such as ISO-2022-JP are
  59   all variants of ISO2022.  Details are described in section 3.
  60
  61   2. SJIS (or Shift-JIS or MS-Kanji-Code)
  62
  63   A coding system to encode character sets: ASCII, JISX0201, and
  64   JISX0208.  Widely used for PC's in Japan.  Details are described in
  65   section 4.
  66
  67   3. BIG5
  68
  69   A coding system to encode character sets: ASCII and Big5.  Widely
  70   used by Chinese (mainly in Taiwan and Hong Kong).  Details are
  71   described in section 4.  In this file, when we write "BIG5"
  72   (all uppercase), we mean the coding system, and when we write
  73   "Big5" (capitalized), we mean the character set.
  74
  75   4. Raw text
  76
  77   A coding system for a text containing random 8-bit code.  Emacs does
  78   no code conversion on such a text except for end-of-line format.
  79
  80   5. Other
  81
  82   If a user wants to read/write a text encoded in a coding system not
  83   listed above, he can supply a decoder and an encoder for it in CCL
  84   (Code Conversion Language) programs.  Emacs executes the CCL program
  85   while reading/writing.
  86
  87   Emacs represents a coding system by a Lisp symbol that has a property
  88   `coding-system'.  But, before actually using the coding system, the
  89   information about it is set in a structure of type `struct
  90   coding_system' for rapid processing.  See section 6 for more details.
  91
  92 */
  93
  94 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
  95
  96   How end-of-line of a text is encoded depends on a system.  For
  97   instance, Unix's format is just one byte of `line-feed' code,
  98   whereas DOS's format is two-byte sequence of `carriage-return' and
  99   `line-feed' codes.  MacOS's format is usually one byte of
 100   `carriage-return'.
 101
 102   Since text characters encoding and end-of-line encoding are
 103   independent, any coding system described above can take
 104   any format of end-of-line.  So, Emacs has information of format of
 105   end-of-line in each coding-system.  See section 6 for more details.
 106
 107 */
 108
 109 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 110
 111   These functions check if a text between SRC and SRC_END is encoded
 112   in the coding system category XXX.  Each returns an integer value in
 113   which appropriate flag bits for the category XXX is set.  The flag
 114   bits are defined in macros CODING_CATEGORY_MASK_XXX.  Below is the
 115   template of these functions.  */
 116 #if 0
 117 int
 118 detect_coding_emacs_mule (src, src_end)
 119      unsigned char *src, *src_end;
 120 {
 121   ...
 122 }
 123 #endif
 124
 125 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 126
 127   These functions decode SRC_BYTES length of unibyte text at SOURCE
 128   encoded in CODING to Emacs' internal format.  The resulting
 129   multibyte text goes to a place pointed to by DESTINATION, the length
 130   of which should not exceed DST_BYTES.
 131
 132   These functions set the information of original and decoded texts in
 133   the members produced, produced_char, consumed, and consumed_char of
 134   the structure *CODING.  They also set the member result to one of
 135   CODING_FINISH_XXX indicating how the decoding finished.
 136
 137   DST_BYTES zero means that source area and destination area are
 138   overlapped, which means that we can produce a decoded text until it
 139   reaches at the head of not-yet-decoded source text.
 140
 141   Below is a template of these functions.  */
 142 #if 0
 143 static void
 144 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 145      struct coding_system *coding;
 146      unsigned char *source, *destination;
 147      int src_bytes, dst_bytes;
 148 {
 149   ...
 150 }
 151 #endif
 152
 153 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 154
 155   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 156   internal multibyte format to CODING.  The resulting unibyte text
 157   goes to a place pointed to by DESTINATION, the length of which
 158   should not exceed DST_BYTES.
 159
 160   These functions set the information of original and encoded texts in
 161   the members produced, produced_char, consumed, and consumed_char of
 162   the structure *CODING.  They also set the member result to one of
 163   CODING_FINISH_XXX indicating how the encoding finished.
 164
 165   DST_BYTES zero means that source area and destination area are
 166   overlapped, which means that we can produce a encoded text until it
 167   reaches at the head of not-yet-encoded source text.
 168
 169   Below is a template of these functions.  */
 170 #if 0
 171 static void
 172 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 173      struct coding_system *coding;
 174      unsigned char *source, *destination;
 175      int src_bytes, dst_bytes;
 176 {
 177   ...
 178 }
 179 #endif
 180
 181 /*** COMMONLY USED MACROS ***/
 182
 183 /* The following two macros ONE_MORE_BYTE and TWO_MORE_BYTES safely
 184    get one, two, and three bytes from the source text respectively.
 185    If there are not enough bytes in the source, they jump to
 186    `label_end_of_loop'.  The caller should set variables `coding',
 187    `src' and `src_end' to appropriate pointer in advance.  These
 188    macros are called from decoding routines `decode_coding_XXX', thus
 189    it is assumed that the source text is unibyte.  */
 190
 191 #define ONE_MORE_BYTE(c1)                                       \
 192   do {                                                          \
 193     if (src >= src_end)                                         \
 194       {                                                         \
 195         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 196         goto label_end_of_loop;                                 \
 197       }                                                         \
 198     c1 = *src++;                                                \
 199   } while (0)
 200
 201 #define TWO_MORE_BYTES(c1, c2)                                  \
 202   do {                                                          \
 203     if (src + 1 >= src_end)                                     \
 204       {                                                         \
 205         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 206         goto label_end_of_loop;                                 \
 207       }                                                         \
 208     c1 = *src++;                                                \
 209     c2 = *src++;                                                \
 210   } while (0)
 211
 212
 213 /* Set C to the next character at the source text pointed by `src'.
 214    If there are not enough characters in the source, jump to
 215    `label_end_of_loop'.  The caller should set variables `coding'
 216    `src', `src_end', and `translation_table' to appropriate pointers
 217    in advance.  This macro is used in encoding routines
 218    `encode_coding_XXX', thus it assumes that the source text is in
 219    multibyte form except for 8-bit characters.  8-bit characters are
 220    in multibyte form if coding->src_multibyte is nonzero, else they
 221    are represented by a single byte.  */
 222
 223 #define ONE_MORE_CHAR(c)                                        \
 224   do {                                                          \
 225     int len = src_end - src;                                    \
 226     int bytes;                                                  \
 227     if (len <= 0)                                               \
 228       {                                                         \
 229         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 230         goto label_end_of_loop;                                 \
 231       }                                                         \
 232     if (coding->src_multibyte                                   \
 233         || UNIBYTE_STR_AS_MULTIBYTE_P (src, len, bytes))        \
 234       c = STRING_CHAR_AND_LENGTH (src, len, bytes);             \
 235     else                                                        \
 236       c = *src, bytes = 1;                                      \
 237     if (!NILP (translation_table))                              \
 238       c = translate_char (translation_table, c, -1, 0, 0);      \
 239     src += bytes;                                               \
 240   } while (0)
 241
 242
 243 /* Produce a multibyte form of characater C to `dst'.  Jump to
 244    `label_end_of_loop' if there's not enough space at `dst'.
 245
 246    If we are now in the middle of composition sequence, the decoded
 247    character may be ALTCHAR (for the current composition).  In that
 248    case, the character goes to coding->cmp_data->data instead of
 249    `dst'.
 250
 251    This macro is used in decoding routines.  */
 252
 253 #define EMIT_CHAR(c)                                                    \
 254   do {                                                                  \
 255     if (! COMPOSING_P (coding)                                          \
 256         || coding->composing == COMPOSITION_RELATIVE                    \
 257         || coding->composing == COMPOSITION_WITH_RULE)                  \
 258       {                                                                 \
 259         int bytes = CHAR_BYTES (c);                                     \
 260         if ((dst + bytes) > (dst_bytes ? dst_end : src))                \
 261           {                                                             \
 262             coding->result = CODING_FINISH_INSUFFICIENT_DST;            \
 263             goto label_end_of_loop;                                     \
 264           }                                                             \
 265         dst += CHAR_STRING (c, dst);                                    \
 266         coding->produced_char++;                                        \
 267       }                                                                 \
 268                                                                         \
 269     if (COMPOSING_P (coding)                                            \
 270         && coding->composing != COMPOSITION_RELATIVE)                   \
 271       {                                                                 \
 272         CODING_ADD_COMPOSITION_COMPONENT (coding, c);                   \
 273         coding->composition_rule_follows                                \
 274           = coding->composing != COMPOSITION_WITH_ALTCHARS;             \
 275       }                                                                 \
 276   } while (0)
 277
 278
 279 #define EMIT_ONE_BYTE(c)                                        \
 280   do {                                                          \
 281     if (dst >= (dst_bytes ? dst_end : src))                     \
 282       {                                                         \
 283         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 284         goto label_end_of_loop;                                 \
 285       }                                                         \
 286     *dst++ = c;                                                 \
 287   } while (0)
 288
 289 #define EMIT_TWO_BYTES(c1, c2)                                  \
 290   do {                                                          \
 291     if (dst + 2 > (dst_bytes ? dst_end : src))                  \
 292       {                                                         \
 293         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 294         goto label_end_of_loop;                                 \
 295       }                                                         \
 296     *dst++ = c1, *dst++ = c2;                                   \
 297   } while (0)
 298
 299 #define EMIT_BYTES(from, to)                                    \
 300   do {                                                          \
 301     if (dst + (to - from) > (dst_bytes ? dst_end : src))        \
 302       {                                                         \
 303         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 304         goto label_end_of_loop;                                 \
 305       }                                                         \
 306     while (from < to)                                           \
 307       *dst++ = *from++;                                         \
 308   } while (0)
 309
 310 \f
 311 /*** 1. Preamble ***/
 312
 313 #ifdef emacs
 314 #include <config.h>
 315 #endif
 316
 317 #include <stdio.h>
 318
 319 #ifdef emacs
 320
 321 #include "lisp.h"
 322 #include "buffer.h"
 323 #include "charset.h"
 324 #include "composite.h"
 325 #include "ccl.h"
 326 #include "coding.h"
 327 #include "window.h"
 328
 329 #else  /* not emacs */
 330
 331 #include "mulelib.h"
 332
 333 #endif /* not emacs */
 334
 335 Lisp_Object Qcoding_system, Qeol_type;
 336 Lisp_Object Qbuffer_file_coding_system;
 337 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 338 Lisp_Object Qno_conversion, Qundecided;
 339 Lisp_Object Qcoding_system_history;
 340 Lisp_Object Qsafe_chars;
 341 Lisp_Object Qvalid_codes;
 342
 343 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 344 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 345 Lisp_Object Qstart_process, Qopen_network_stream;
 346 Lisp_Object Qtarget_idx;
 347
 348 Lisp_Object Vselect_safe_coding_system_function;
 349
 350 /* Mnemonic string for each format of end-of-line.  */
 351 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 352 /* Mnemonic string to indicate format of end-of-line is not yet
 353    decided.  */
 354 Lisp_Object eol_mnemonic_undecided;
 355
 356 /* Format of end-of-line decided by system.  This is CODING_EOL_LF on
 357    Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.  */
 358 int system_eol_type;
 359
 360 #ifdef emacs
 361
 362 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 363
 364 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 365
 366 /* Coding system emacs-mule and raw-text are for converting only
 367    end-of-line format.  */
 368 Lisp_Object Qemacs_mule, Qraw_text;
 369
 370 /* Coding-systems are handed between Emacs Lisp programs and C internal
 371    routines by the following three variables.  */
 372 /* Coding-system for reading files and receiving data from process.  */
 373 Lisp_Object Vcoding_system_for_read;
 374 /* Coding-system for writing files and sending data to process.  */
 375 Lisp_Object Vcoding_system_for_write;
 376 /* Coding-system actually used in the latest I/O.  */
 377 Lisp_Object Vlast_coding_system_used;
 378
 379 /* A vector of length 256 which contains information about special
 380    Latin codes (especially for dealing with Microsoft codes).  */
 381 Lisp_Object Vlatin_extra_code_table;
 382
 383 /* Flag to inhibit code conversion of end-of-line format.  */
 384 int inhibit_eol_conversion;
 385
 386 /* Flag to inhibit ISO2022 escape sequence detection.  */
 387 int inhibit_iso_escape_detection;
 388
 389 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 390 int inherit_process_coding_system;
 391
 392 /* Coding system to be used to encode text for terminal display.  */
 393 struct coding_system terminal_coding;
 394
 395 /* Coding system to be used to encode text for terminal display when
 396    terminal coding system is nil.  */
 397 struct coding_system safe_terminal_coding;
 398
 399 /* Coding system of what is sent from terminal keyboard.  */
 400 struct coding_system keyboard_coding;
 401
 402 /* Default coding system to be used to write a file.  */
 403 struct coding_system default_buffer_file_coding;
 404
 405 Lisp_Object Vfile_coding_system_alist;
 406 Lisp_Object Vprocess_coding_system_alist;
 407 Lisp_Object Vnetwork_coding_system_alist;
 408
 409 Lisp_Object Vlocale_coding_system;
 410
 411 #endif /* emacs */
 412
 413 Lisp_Object Qcoding_category, Qcoding_category_index;
 414
 415 /* List of symbols `coding-category-xxx' ordered by priority.  */
 416 Lisp_Object Vcoding_category_list;
 417
 418 /* Table of coding categories (Lisp symbols).  */
 419 Lisp_Object Vcoding_category_table;
 420
 421 /* Table of names of symbol for each coding-category.  */
 422 char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
 423   "coding-category-emacs-mule",
 424   "coding-category-sjis",
 425   "coding-category-iso-7",
 426   "coding-category-iso-7-tight",
 427   "coding-category-iso-8-1",
 428   "coding-category-iso-8-2",
 429   "coding-category-iso-7-else",
 430   "coding-category-iso-8-else",
 431   "coding-category-ccl",
 432   "coding-category-big5",
 433   "coding-category-utf-8",
 434   "coding-category-utf-16-be",
 435   "coding-category-utf-16-le",
 436   "coding-category-raw-text",
 437   "coding-category-binary"
 438 };
 439
 440 /* Table of pointers to coding systems corresponding to each coding
 441    categories.  */
 442 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
 443
 444 /* Table of coding category masks.  Nth element is a mask for a coding
 445    cateogry of which priority is Nth.  */
 446 static
 447 int coding_priorities[CODING_CATEGORY_IDX_MAX];
 448
 449 /* Flag to tell if we look up translation table on character code
 450    conversion.  */
 451 Lisp_Object Venable_character_translation;
 452 /* Standard translation table to look up on decoding (reading).  */
 453 Lisp_Object Vstandard_translation_table_for_decode;
 454 /* Standard translation table to look up on encoding (writing).  */
 455 Lisp_Object Vstandard_translation_table_for_encode;
 456
 457 Lisp_Object Qtranslation_table;
 458 Lisp_Object Qtranslation_table_id;
 459 Lisp_Object Qtranslation_table_for_decode;
 460 Lisp_Object Qtranslation_table_for_encode;
 461
 462 /* Alist of charsets vs revision number.  */
 463 Lisp_Object Vcharset_revision_alist;
 464
 465 /* Default coding systems used for process I/O.  */
 466 Lisp_Object Vdefault_process_coding_system;
 467
 468 /* Global flag to tell that we can't call post-read-conversion and
 469    pre-write-conversion functions.  Usually the value is zero, but it
 470    is set to 1 temporarily while such functions are running.  This is
 471    to avoid infinite recursive call.  */
 472 static int inhibit_pre_post_conversion;
 473
 474 /* Char-table containing safe coding systems of each character.  */
 475 Lisp_Object Vchar_coding_system_table;
 476 Lisp_Object Qchar_coding_system;
 477
 478 /* Return `safe-chars' property of coding system CODING.  Don't check
 479    validity of CODING.  */
 480
 481 Lisp_Object
 482 coding_safe_chars (coding)
 483      struct coding_system *coding;
 484 {
 485   Lisp_Object coding_spec, plist, safe_chars;
 486
 487   coding_spec = Fget (coding->symbol, Qcoding_system);
 488   plist = XVECTOR (coding_spec)->contents[3];
 489   safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars);
 490   return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt);
 491 }
 492
 493 #define CODING_SAFE_CHAR_P(safe_chars, c) \
 494   (EQ (safe_chars, Qt) || !NILP (CHAR_TABLE_REF (safe_chars, c)))
 495
 496 \f
 497 /*** 2. Emacs internal format (emacs-mule) handlers ***/
 498
 499 /* Emacs' internal format for encoding multiple character sets is a
 500    kind of multi-byte encoding, i.e. characters are encoded by
 501    variable-length sequences of one-byte codes.
 502
 503    ASCII characters and control characters (e.g. `tab', `newline') are
 504    represented by one-byte sequences which are their ASCII codes, in
 505    the range 0x00 through 0x7F.
 506
 507    8-bit characters of the range 0x80..0x9F are represented by
 508    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
 509    code + 0x20).
 510
 511    8-bit characters of the range 0xA0..0xFF are represented by
 512    one-byte sequences which are their 8-bit code.
 513
 514    The other characters are represented by a sequence of `base
 515    leading-code', optional `extended leading-code', and one or two
 516    `position-code's.  The length of the sequence is determined by the
 517    base leading-code.  Leading-code takes the range 0x80 through 0x9F,
 518    whereas extended leading-code and position-code take the range 0xA0
 519    through 0xFF.  See `charset.h' for more details about leading-code
 520    and position-code.
 521
 522    --- CODE RANGE of Emacs' internal format ---
 523    character set        range
 524    -------------        -----
 525    ascii                0x00..0x7F
 526    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
 527    eight-bit-graphic    0xA0..0xBF
 528    ELSE                 0x81..0x9F + [0xA0..0xFF]+
 529    ---------------------------------------------
 530
 531   */
 532
 533 enum emacs_code_class_type emacs_code_class[256];
 534
 535 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 536    Check if a text is encoded in Emacs' internal format.  If it is,
 537    return CODING_CATEGORY_MASK_EMACS_MULE, else return 0.  */
 538
 539 int
 540 detect_coding_emacs_mule (src, src_end)
 541       unsigned char *src, *src_end;
 542 {
 543   unsigned char c;
 544   int composing = 0;
 545   /* Dummy for ONE_MORE_BYTE.  */
 546   struct coding_system dummy_coding;
 547   struct coding_system *coding = &dummy_coding;
 548
 549   while (1)
 550     {
 551       ONE_MORE_BYTE (c);
 552
 553       if (composing)
 554         {
 555           if (c < 0xA0)
 556             composing = 0;
 557           else if (c == 0xA0)
 558             {
 559               ONE_MORE_BYTE (c);
 560               c &= 0x7F;
 561             }
 562           else
 563             c -= 0x20;
 564         }
 565
 566       if (c < 0x20)
 567         {
 568           if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
 569             return 0;
 570         }
 571       else if (c >= 0x80 && c < 0xA0)
 572         {
 573           if (c == 0x80)
 574             /* Old leading code for a composite character.  */
 575             composing = 1;
 576           else
 577             {
 578               unsigned char *src_base = src - 1;
 579               int bytes;
 580
 581               if (!UNIBYTE_STR_AS_MULTIBYTE_P (src_base, src_end - src_base,
 582                                                bytes))
 583                 return 0;
 584               src = src_base + bytes;
 585             }
 586         }
 587     }
 588  label_end_of_loop:
 589   return CODING_CATEGORY_MASK_EMACS_MULE;
 590 }
 591
 592
 593 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
 594
 595 static void
 596 decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
 597      struct coding_system *coding;
 598      unsigned char *source, *destination;
 599      int src_bytes, dst_bytes;
 600 {
 601   unsigned char *src = source;
 602   unsigned char *src_end = source + src_bytes;
 603   unsigned char *dst = destination;
 604   unsigned char *dst_end = destination + dst_bytes;
 605   /* SRC_BASE remembers the start position in source in each loop.
 606      The loop will be exited when there's not enough source code, or
 607      when there's not enough destination area to produce a
 608      character.  */
 609   unsigned char *src_base;
 610
 611   coding->produced_char = 0;
 612   while ((src_base = src) < src_end)
 613     {
 614       unsigned char tmp[MAX_MULTIBYTE_LENGTH], *p;
 615       int bytes;
 616
 617       if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
 618         {
 619           p = src;
 620           src += bytes;
 621         }
 622       else
 623         {
 624           bytes = CHAR_STRING (*src, tmp);
 625           p = tmp;
 626           src++;
 627         }
 628       if (dst + bytes >= (dst_bytes ? dst_end : src))
 629         {
 630           coding->result = CODING_FINISH_INSUFFICIENT_DST;
 631           break;
 632         }
 633       while (bytes--) *dst++ = *p++;
 634       coding->produced_char++;
 635     }
 636   coding->consumed = coding->consumed_char = src_base - source;
 637   coding->produced = dst - destination;
 638 }
 639
 640 #define encode_coding_emacs_mule(coding, source, destination, src_bytes, dst_bytes) \
 641   encode_eol (coding, source, destination, src_bytes, dst_bytes)
 642
 643
 644 \f
 645 /*** 3. ISO2022 handlers ***/
 646
 647 /* The following note describes the coding system ISO2022 briefly.
 648    Since the intention of this note is to help understand the
 649    functions in this file, some parts are NOT ACCURATE or OVERLY
 650    SIMPLIFIED.  For thorough understanding, please refer to the
 651    original document of ISO2022.
 652
 653    ISO2022 provides many mechanisms to encode several character sets
 654    in 7-bit and 8-bit environments.  For 7-bite environments, all text
 655    is encoded using bytes less than 128.  This may make the encoded
 656    text a little bit longer, but the text passes more easily through
 657    several gateways, some of which strip off MSB (Most Signigant Bit).
 658
 659    There are two kinds of character sets: control character set and
 660    graphic character set.  The former contains control characters such
 661    as `newline' and `escape' to provide control functions (control
 662    functions are also provided by escape sequences).  The latter
 663    contains graphic characters such as 'A' and '-'.  Emacs recognizes
 664    two control character sets and many graphic character sets.
 665
 666    Graphic character sets are classified into one of the following
 667    four classes, according to the number of bytes (DIMENSION) and
 668    number of characters in one dimension (CHARS) of the set:
 669    - DIMENSION1_CHARS94
 670    - DIMENSION1_CHARS96
 671    - DIMENSION2_CHARS94
 672    - DIMENSION2_CHARS96
 673
 674    In addition, each character set is assigned an identification tag,
 675    unique for each set, called "final character" (denoted as <F>
 676    hereafter).  The <F> of each character set is decided by ECMA(*)
 677    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
 678    (0x30..0x3F are for private use only).
 679
 680    Note (*): ECMA = European Computer Manufacturers Association
 681
 682    Here are examples of graphic character set [NAME(<F>)]:
 683         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
 684         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
 685         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
 686         o DIMENSION2_CHARS96 -- none for the moment
 687
 688    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
 689         C0 [0x00..0x1F] -- control character plane 0
 690         GL [0x20..0x7F] -- graphic character plane 0
 691         C1 [0x80..0x9F] -- control character plane 1
 692         GR [0xA0..0xFF] -- graphic character plane 1
 693
 694    A control character set is directly designated and invoked to C0 or
 695    C1 by an escape sequence.  The most common case is that:
 696    - ISO646's  control character set is designated/invoked to C0, and
 697    - ISO6429's control character set is designated/invoked to C1,
 698    and usually these designations/invocations are omitted in encoded
 699    text.  In a 7-bit environment, only C0 can be used, and a control
 700    character for C1 is encoded by an appropriate escape sequence to
 701    fit into the environment.  All control characters for C1 are
 702    defined to have corresponding escape sequences.
 703
 704    A graphic character set is at first designated to one of four
 705    graphic registers (G0 through G3), then these graphic registers are
 706    invoked to GL or GR.  These designations and invocations can be
 707    done independently.  The most common case is that G0 is invoked to
 708    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
 709    these invocations and designations are omitted in encoded text.
 710    In a 7-bit environment, only GL can be used.
 711
 712    When a graphic character set of CHARS94 is invoked to GL, codes
 713    0x20 and 0x7F of the GL area work as control characters SPACE and
 714    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
 715    be used.
 716
 717    There are two ways of invocation: locking-shift and single-shift.
 718    With locking-shift, the invocation lasts until the next different
 719    invocation, whereas with single-shift, the invocation affects the
 720    following character only and doesn't affect the locking-shift
 721    state.  Invocations are done by the following control characters or
 722    escape sequences:
 723
 724    ----------------------------------------------------------------------
 725    abbrev  function                  cntrl escape seq   description
 726    ----------------------------------------------------------------------
 727    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
 728    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
 729    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
 730    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
 731    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
 732    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
 733    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
 734    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
 735    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
 736    ----------------------------------------------------------------------
 737    (*) These are not used by any known coding system.
 738
 739    Control characters for these functions are defined by macros
 740    ISO_CODE_XXX in `coding.h'.
 741
 742    Designations are done by the following escape sequences:
 743    ----------------------------------------------------------------------
 744    escape sequence      description
 745    ----------------------------------------------------------------------
 746    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
 747    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
 748    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
 749    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
 750    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
 751    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
 752    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
 753    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
 754    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
 755    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
 756    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
 757    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
 758    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
 759    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
 760    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
 761    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
 762    ----------------------------------------------------------------------
 763
 764    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
 765    of dimension 1, chars 94, and final character <F>, etc...
 766
 767    Note (*): Although these designations are not allowed in ISO2022,
 768    Emacs accepts them on decoding, and produces them on encoding
 769    CHARS96 character sets in a coding system which is characterized as
 770    7-bit environment, non-locking-shift, and non-single-shift.
 771
 772    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
 773    '(' can be omitted.  We refer to this as "short-form" hereafter.
 774
 775    Now you may notice that there are a lot of ways for encoding the
 776    same multilingual text in ISO2022.  Actually, there exist many
 777    coding systems such as Compound Text (used in X11's inter client
 778    communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR
 779    (used in Korean internet), EUC (Extended UNIX Code, used in Asian
 780    localized platforms), and all of these are variants of ISO2022.
 781
 782    In addition to the above, Emacs handles two more kinds of escape
 783    sequences: ISO6429's direction specification and Emacs' private
 784    sequence for specifying character composition.
 785
 786    ISO6429's direction specification takes the following form:
 787         o CSI ']'      -- end of the current direction
 788         o CSI '0' ']'  -- end of the current direction
 789         o CSI '1' ']'  -- start of left-to-right text
 790         o CSI '2' ']'  -- start of right-to-left text
 791    The control character CSI (0x9B: control sequence introducer) is
 792    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
 793
 794    Character composition specification takes the following form:
 795         o ESC '0' -- start relative composition
 796         o ESC '1' -- end composition
 797         o ESC '2' -- start rule-base composition (*)
 798         o ESC '3' -- start relative composition with alternate chars  (**)
 799         o ESC '4' -- start rule-base composition with alternate chars  (**)
 800   Since these are not standard escape sequences of any ISO standard,
 801   the use of them for these meaning is restricted to Emacs only.
 802
 803   (*) This form is used only in Emacs 20.5 and the older versions,
 804   but the newer versions can safely decode it.
 805   (**) This form is used only in Emacs 21.1 and the newer versions,
 806   and the older versions can't decode it.
 807
 808   Here's a list of examples usages of these composition escape
 809   sequences (categorized by `enum composition_method').
 810
 811   COMPOSITION_RELATIVE:
 812         ESC 0 CHAR [ CHAR ] ESC 1
 813   COMPOSITOIN_WITH_RULE:
 814         ESC 2 CHAR [ RULE CHAR ] ESC 1
 815   COMPOSITION_WITH_ALTCHARS:
 816         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
 817   COMPOSITION_WITH_RULE_ALTCHARS:
 818         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
 819
 820 enum iso_code_class_type iso_code_class[256];
 821
 822 #define CHARSET_OK(idx, charset, c)                                     \
 823   (coding_system_table[idx]                                             \
 824    && (charset == CHARSET_ASCII                                         \
 825        || (safe_chars = coding_safe_chars (coding_system_table[idx]),   \
 826            CODING_SAFE_CHAR_P (safe_chars, c)))                         \
 827    && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding_system_table[idx], \
 828                                               charset)                  \
 829        != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
 830
 831 #define SHIFT_OUT_OK(idx) \
 832   (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
 833
 834 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 835    Check if a text is encoded in ISO2022.  If it is, returns an
 836    integer in which appropriate flag bits any of:
 837         CODING_CATEGORY_MASK_ISO_7
 838         CODING_CATEGORY_MASK_ISO_7_TIGHT
 839         CODING_CATEGORY_MASK_ISO_8_1
 840         CODING_CATEGORY_MASK_ISO_8_2
 841         CODING_CATEGORY_MASK_ISO_7_ELSE
 842         CODING_CATEGORY_MASK_ISO_8_ELSE
 843    are set.  If a code which should never appear in ISO2022 is found,
 844    returns 0.  */
 845
 846 int
 847 detect_coding_iso2022 (src, src_end)
 848      unsigned char *src, *src_end;
 849 {
 850   int mask = CODING_CATEGORY_MASK_ISO;
 851   int mask_found = 0;
 852   int reg[4], shift_out = 0, single_shifting = 0;
 853   int c, c1, i, charset;
 854   /* Dummy for ONE_MORE_BYTE.  */
 855   struct coding_system dummy_coding;
 856   struct coding_system *coding = &dummy_coding;
 857   Lisp_Object safe_chars;
 858
 859   reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
 860   while (mask && src < src_end)
 861     {
 862       ONE_MORE_BYTE (c);
 863       switch (c)
 864         {
 865         case ISO_CODE_ESC:
 866           if (inhibit_iso_escape_detection)
 867             break;
 868           single_shifting = 0;
 869           ONE_MORE_BYTE (c);
 870           if (c >= '(' && c <= '/')
 871             {
 872               /* Designation sequence for a charset of dimension 1.  */
 873               ONE_MORE_BYTE (c1);
 874               if (c1 < ' ' || c1 >= 0x80
 875                   || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
 876                 /* Invalid designation sequence.  Just ignore.  */
 877                 break;
 878               reg[(c - '(') % 4] = charset;
 879             }
 880           else if (c == '$')
 881             {
 882               /* Designation sequence for a charset of dimension 2.  */
 883               ONE_MORE_BYTE (c);
 884               if (c >= '@' && c <= 'B')
 885                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
 886                 reg[0] = charset = iso_charset_table[1][0][c];
 887               else if (c >= '(' && c <= '/')
 888                 {
 889                   ONE_MORE_BYTE (c1);
 890                   if (c1 < ' ' || c1 >= 0x80
 891                       || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
 892                     /* Invalid designation sequence.  Just ignore.  */
 893                     break;
 894                   reg[(c - '(') % 4] = charset;
 895                 }
 896               else
 897                 /* Invalid designation sequence.  Just ignore.  */
 898                 break;
 899             }
 900           else if (c == 'N' || c == 'O')
 901             {
 902               /* ESC <Fe> for SS2 or SS3.  */
 903               mask &= CODING_CATEGORY_MASK_ISO_7_ELSE;
 904               break;
 905             }
 906           else if (c >= '0' && c <= '4')
 907             {
 908               /* ESC <Fp> for start/end composition.  */
 909               mask_found |= CODING_CATEGORY_MASK_ISO;
 910               break;
 911             }
 912           else
 913             /* Invalid escape sequence.  Just ignore.  */
 914             break;
 915
 916           /* We found a valid designation sequence for CHARSET.  */
 917           mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
 918           c = MAKE_CHAR (charset, 0, 0);
 919           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset, c))
 920             mask_found |= CODING_CATEGORY_MASK_ISO_7;
 921           else
 922             mask &= ~CODING_CATEGORY_MASK_ISO_7;
 923           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset, c))
 924             mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
 925           else
 926             mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
 927           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset, c))
 928             mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
 929           else
 930             mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
 931           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset, c))
 932             mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
 933           else
 934             mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
 935           break;
 936
 937         case ISO_CODE_SO:
 938           if (inhibit_iso_escape_detection)
 939             break;
 940           single_shifting = 0;
 941           if (shift_out == 0
 942               && (reg[1] >= 0
 943                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
 944                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
 945             {
 946               /* Locking shift out.  */
 947               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 948               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 949             }
 950           break;
 951
 952         case ISO_CODE_SI:
 953           if (inhibit_iso_escape_detection)
 954             break;
 955           single_shifting = 0;
 956           if (shift_out == 1)
 957             {
 958               /* Locking shift in.  */
 959               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 960               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 961             }
 962           break;
 963
 964         case ISO_CODE_CSI:
 965           single_shifting = 0;
 966         case ISO_CODE_SS2:
 967         case ISO_CODE_SS3:
 968           {
 969             int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
 970
 971             if (inhibit_iso_escape_detection)
 972               break;
 973             if (c != ISO_CODE_CSI)
 974               {
 975                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 976                     & CODING_FLAG_ISO_SINGLE_SHIFT)
 977                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 978                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 979                     & CODING_FLAG_ISO_SINGLE_SHIFT)
 980                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 981                 single_shifting = 1;
 982               }
 983             if (VECTORP (Vlatin_extra_code_table)
 984                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
 985               {
 986                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 987                     & CODING_FLAG_ISO_LATIN_EXTRA)
 988                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 989                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 990                     & CODING_FLAG_ISO_LATIN_EXTRA)
 991                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 992               }
 993             mask &= newmask;
 994             mask_found |= newmask;
 995           }
 996           break;
 997
 998         default:
 999           if (c < 0x80)
1000             {
1001               single_shifting = 0;
1002               break;
1003             }
1004           else if (c < 0xA0)
1005             {
1006               single_shifting = 0;
1007               if (VECTORP (Vlatin_extra_code_table)
1008                   && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
1009                 {
1010                   int newmask = 0;
1011
1012                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1013                       & CODING_FLAG_ISO_LATIN_EXTRA)
1014                     newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1015                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1016                       & CODING_FLAG_ISO_LATIN_EXTRA)
1017                     newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1018                   mask &= newmask;
1019                   mask_found |= newmask;
1020                 }
1021               else
1022                 return 0;
1023             }
1024           else
1025             {
1026               mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
1027                         | CODING_CATEGORY_MASK_ISO_7_ELSE);
1028               mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
1029               /* Check the length of succeeding codes of the range
1030                  0xA0..0FF.  If the byte length is odd, we exclude
1031                  CODING_CATEGORY_MASK_ISO_8_2.  We can check this only
1032                  when we are not single shifting.  */
1033               if (!single_shifting
1034                   && mask & CODING_CATEGORY_MASK_ISO_8_2)
1035                 {
1036                   int i = 1;
1037                   while (src < src_end)
1038                     {
1039                       ONE_MORE_BYTE (c);
1040                       if (c < 0xA0)
1041                         break;
1042                       i++;
1043                     }
1044
1045                   if (i & 1 && src < src_end)
1046                     mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
1047                   else
1048                     mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
1049                 }
1050             }
1051           break;
1052         }
1053     }
1054  label_end_of_loop:
1055   return (mask & mask_found);
1056 }
1057
1058 /* Decode a character of which charset is CHARSET, the 1st position
1059    code is C1, the 2nd position code is C2, and return the decoded
1060    character code.  If the variable `translation_table' is non-nil,
1061    returned the translated code.  */
1062
1063 #define DECODE_ISO_CHARACTER(charset, c1, c2)   \
1064   (NILP (translation_table)                     \
1065    ? MAKE_CHAR (charset, c1, c2)                \
1066    : translate_char (translation_table, -1, charset, c1, c2))
1067
1068 /* Set designation state into CODING.  */
1069 #define DECODE_DESIGNATION(reg, dimension, chars, final_char)              \
1070   do {                                                                     \
1071     int charset, c;                                                        \
1072                                                                            \
1073     if (final_char < '0' || final_char >= 128)                             \
1074       goto label_invalid_code;                                             \
1075     charset = ISO_CHARSET_TABLE (make_number (dimension),                  \
1076                                  make_number (chars),                      \
1077                                  make_number (final_char));                \
1078     c = MAKE_CHAR (charset, 0, 0);                                         \
1079     if (charset >= 0                                                       \
1080         && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
1081             || CODING_SAFE_CHAR_P (safe_chars, c)))                        \
1082       {                                                                    \
1083         if (coding->spec.iso2022.last_invalid_designation_register == 0    \
1084             && reg == 0                                                    \
1085             && charset == CHARSET_ASCII)                                   \
1086           {                                                                \
1087             /* We should insert this designation sequence as is so         \
1088                that it is surely written back to a file.  */               \
1089             coding->spec.iso2022.last_invalid_designation_register = -1;   \
1090             goto label_invalid_code;                                       \
1091           }                                                                \
1092         coding->spec.iso2022.last_invalid_designation_register = -1;       \
1093         if ((coding->mode & CODING_MODE_DIRECTION)                         \
1094             && CHARSET_REVERSE_CHARSET (charset) >= 0)                     \
1095           charset = CHARSET_REVERSE_CHARSET (charset);                     \
1096         CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;               \
1097       }                                                                    \
1098     else                                                                   \
1099       {                                                                    \
1100         coding->spec.iso2022.last_invalid_designation_register = reg;      \
1101         goto label_invalid_code;                                           \
1102       }                                                                    \
1103   } while (0)
1104
1105 /* Allocate a memory block for storing information about compositions.
1106    The block is chained to the already allocated blocks.  */
1107
1108 void
1109 coding_allocate_composition_data (coding, char_offset)
1110      struct coding_system *coding;
1111      int char_offset;
1112 {
1113   struct composition_data *cmp_data
1114     = (struct composition_data *) xmalloc (sizeof *cmp_data);
1115
1116   cmp_data->char_offset = char_offset;
1117   cmp_data->used = 0;
1118   cmp_data->prev = coding->cmp_data;
1119   cmp_data->next = NULL;
1120   if (coding->cmp_data)
1121     coding->cmp_data->next = cmp_data;
1122   coding->cmp_data = cmp_data;
1123   coding->cmp_data_start = 0;
1124 }
1125
1126 /* Record the starting position START and METHOD of one composition.  */
1127
1128 #define CODING_ADD_COMPOSITION_START(coding, start, method)     \
1129   do {                                                          \
1130     struct composition_data *cmp_data = coding->cmp_data;       \
1131     int *data = cmp_data->data + cmp_data->used;                \
1132     coding->cmp_data_start = cmp_data->used;                    \
1133     data[0] = -1;                                               \
1134     data[1] = cmp_data->char_offset + start;                    \
1135     data[3] = (int) method;                                     \
1136     cmp_data->used += 4;                                        \
1137   } while (0)
1138
1139 /* Record the ending position END of the current composition.  */
1140
1141 #define CODING_ADD_COMPOSITION_END(coding, end)                 \
1142   do {                                                          \
1143     struct composition_data *cmp_data = coding->cmp_data;       \
1144     int *data = cmp_data->data + coding->cmp_data_start;        \
1145     data[0] = cmp_data->used - coding->cmp_data_start;          \
1146     data[2] = cmp_data->char_offset + end;                      \
1147   } while (0)
1148
1149 /* Record one COMPONENT (alternate character or composition rule).  */
1150
1151 #define CODING_ADD_COMPOSITION_COMPONENT(coding, component)     \
1152   (coding->cmp_data->data[coding->cmp_data->used++] = component)
1153
1154 /* Handle compositoin start sequence ESC 0, ESC 2, ESC 3, or ESC 4.  */
1155
1156 #define DECODE_COMPOSITION_START(c1)                                       \
1157   do {                                                                     \
1158     if (coding->composing == COMPOSITION_DISABLED)                         \
1159       {                                                                    \
1160         *dst++ = ISO_CODE_ESC;                                             \
1161         *dst++ = c1 & 0x7f;                                                \
1162         coding->produced_char += 2;                                        \
1163       }                                                                    \
1164     else if (!COMPOSING_P (coding))                                        \
1165       {                                                                    \
1166         /* This is surely the start of a composition.  We must be sure     \
1167            that coding->cmp_data has enough space to store the             \
1168            information about the composition.  If not, terminate the       \
1169            current decoding loop, allocate one more memory block for       \
1170            coding->cmp_data in the calller, then start the decoding        \
1171            loop again.  We can't allocate memory here directly because     \
1172            it may cause buffer/string relocation.  */                      \
1173         if (!coding->cmp_data                                              \
1174             || (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \
1175                 >= COMPOSITION_DATA_SIZE))                                 \
1176           {                                                                \
1177             coding->result = CODING_FINISH_INSUFFICIENT_CMP;               \
1178             goto label_end_of_loop;                                        \
1179           }                                                                \
1180         coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE              \
1181                              : c1 == '2' ? COMPOSITION_WITH_RULE           \
1182                              : c1 == '3' ? COMPOSITION_WITH_ALTCHARS       \
1183                              : COMPOSITION_WITH_RULE_ALTCHARS);            \
1184         CODING_ADD_COMPOSITION_START (coding, coding->produced_char,       \
1185                                       coding->composing);                  \
1186         coding->composition_rule_follows = 0;                              \
1187       }                                                                    \
1188     else                                                                   \
1189       {                                                                    \
1190         /* We are already handling a composition.  If the method is        \
1191            the following two, the codes following the current escape       \
1192            sequence are actual characters stored in a buffer.  */          \
1193         if (coding->composing == COMPOSITION_WITH_ALTCHARS                 \
1194             || coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)        \
1195           {                                                                \
1196             coding->composing = COMPOSITION_RELATIVE;                      \
1197             coding->composition_rule_follows = 0;                          \
1198           }                                                                \
1199       }                                                                    \
1200   } while (0)
1201
1202 /* Handle compositoin end sequence ESC 1.  */
1203
1204 #define DECODE_COMPOSITION_END(c1)                                      \
1205   do {                                                                  \
1206     if (coding->composing == COMPOSITION_DISABLED)                      \
1207       {                                                                 \
1208         *dst++ = ISO_CODE_ESC;                                          \
1209         *dst++ = c1;                                                    \
1210         coding->produced_char += 2;                                     \
1211       }                                                                 \
1212     else                                                                \
1213       {                                                                 \
1214         CODING_ADD_COMPOSITION_END (coding, coding->produced_char);     \
1215         coding->composing = COMPOSITION_NO;                             \
1216       }                                                                 \
1217   } while (0)
1218
1219 /* Decode a composition rule from the byte C1 (and maybe one more byte
1220    from SRC) and store one encoded composition rule in
1221    coding->cmp_data.  */
1222
1223 #define DECODE_COMPOSITION_RULE(c1)                                     \
1224   do {                                                                  \
1225     int rule = 0;                                                       \
1226     (c1) -= 32;                                                         \
1227     if (c1 < 81)                /* old format (before ver.21) */        \
1228       {                                                                 \
1229         int gref = (c1) / 9;                                            \
1230         int nref = (c1) % 9;                                            \
1231         if (gref == 4) gref = 10;                                       \
1232         if (nref == 4) nref = 10;                                       \
1233         rule = COMPOSITION_ENCODE_RULE (gref, nref);                    \
1234       }                                                                 \
1235     else if (c1 < 93)           /* new format (after ver.21) */         \
1236       {                                                                 \
1237         ONE_MORE_BYTE (c2);                                             \
1238         rule = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);              \
1239       }                                                                 \
1240     CODING_ADD_COMPOSITION_COMPONENT (coding, rule);                    \
1241     coding->composition_rule_follows = 0;                               \
1242   } while (0)
1243
1244
1245 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1246
1247 static void
1248 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1249      struct coding_system *coding;
1250      unsigned char *source, *destination;
1251      int src_bytes, dst_bytes;
1252 {
1253   unsigned char *src = source;
1254   unsigned char *src_end = source + src_bytes;
1255   unsigned char *dst = destination;
1256   unsigned char *dst_end = destination + dst_bytes;
1257   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
1258   int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1259   int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1260   /* SRC_BASE remembers the start position in source in each loop.
1261      The loop will be exited when there's not enough source code
1262      (within macro ONE_MORE_BYTE), or when there's not enough
1263      destination area to produce a character (within macro
1264      EMIT_CHAR).  */
1265   unsigned char *src_base;
1266   int c, charset;
1267   Lisp_Object translation_table;
1268   Lisp_Object safe_chars;
1269
1270   safe_chars = coding_safe_chars (coding);
1271
1272   if (NILP (Venable_character_translation))
1273     translation_table = Qnil;
1274   else
1275     {
1276       translation_table = coding->translation_table_for_decode;
1277       if (NILP (translation_table))
1278         translation_table = Vstandard_translation_table_for_decode;
1279     }
1280
1281   coding->result = CODING_FINISH_NORMAL;
1282
1283   while (1)
1284     {
1285       int c1, c2;
1286
1287       src_base = src;
1288       ONE_MORE_BYTE (c1);
1289
1290       /* We produce no character or one character.  */
1291       switch (iso_code_class [c1])
1292         {
1293         case ISO_0x20_or_0x7F:
1294           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1295             {
1296               DECODE_COMPOSITION_RULE (c1);
1297               continue;
1298             }
1299           if (charset0 < 0 || CHARSET_CHARS (charset0) == 94)
1300             {
1301               /* This is SPACE or DEL.  */
1302               charset = CHARSET_ASCII;
1303               break;
1304             }
1305           /* This is a graphic character, we fall down ...  */
1306
1307         case ISO_graphic_plane_0:
1308           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1309             {
1310               DECODE_COMPOSITION_RULE (c1);
1311               continue;
1312             }
1313           charset = charset0;
1314           break;
1315
1316         case ISO_0xA0_or_0xFF:
1317           if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1318               || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1319             goto label_invalid_code;
1320           /* This is a graphic character, we fall down ... */
1321
1322         case ISO_graphic_plane_1:
1323           if (charset1 < 0)
1324             goto label_invalid_code;
1325           charset = charset1;
1326           break;
1327
1328         case ISO_control_0:
1329           if (COMPOSING_P (coding))
1330             DECODE_COMPOSITION_END ('1');
1331
1332           /* All ISO2022 control characters in this class have the
1333              same representation in Emacs internal format.  */
1334           if (c1 == '\n'
1335               && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1336               && (coding->eol_type == CODING_EOL_CR
1337                   || coding->eol_type == CODING_EOL_CRLF))
1338             {
1339               coding->result = CODING_FINISH_INCONSISTENT_EOL;
1340               goto label_end_of_loop;
1341             }
1342           charset = CHARSET_ASCII;
1343           break;
1344
1345         case ISO_control_1:
1346           if (COMPOSING_P (coding))
1347             DECODE_COMPOSITION_END ('1');
1348           goto label_invalid_code;
1349
1350         case ISO_carriage_return:
1351           if (COMPOSING_P (coding))
1352             DECODE_COMPOSITION_END ('1');
1353
1354           if (coding->eol_type == CODING_EOL_CR)
1355             c1 = '\n';
1356           else if (coding->eol_type == CODING_EOL_CRLF)
1357             {
1358               ONE_MORE_BYTE (c1);
1359               if (c1 != ISO_CODE_LF)
1360                 {
1361                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1362                     {
1363                       coding->result = CODING_FINISH_INCONSISTENT_EOL;
1364                       goto label_end_of_loop;
1365                     }
1366                   src--;
1367                   c1 = '\r';
1368                 }
1369             }
1370           charset = CHARSET_ASCII;
1371           break;
1372
1373         case ISO_shift_out:
1374           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1375               || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1376             goto label_invalid_code;
1377           CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1378           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1379           continue;
1380
1381         case ISO_shift_in:
1382           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1383             goto label_invalid_code;
1384           CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1385           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1386           continue;
1387
1388         case ISO_single_shift_2_7:
1389         case ISO_single_shift_2:
1390           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1391             goto label_invalid_code;
1392           /* SS2 is handled as an escape sequence of ESC 'N' */
1393           c1 = 'N';
1394           goto label_escape_sequence;
1395
1396         case ISO_single_shift_3:
1397           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1398             goto label_invalid_code;
1399           /* SS2 is handled as an escape sequence of ESC 'O' */
1400           c1 = 'O';
1401           goto label_escape_sequence;
1402
1403         case ISO_control_sequence_introducer:
1404           /* CSI is handled as an escape sequence of ESC '[' ...  */
1405           c1 = '[';
1406           goto label_escape_sequence;
1407
1408         case ISO_escape:
1409           ONE_MORE_BYTE (c1);
1410         label_escape_sequence:
1411           /* Escape sequences handled by Emacs are invocation,
1412              designation, direction specification, and character
1413              composition specification.  */
1414           switch (c1)
1415             {
1416             case '&':           /* revision of following character set */
1417               ONE_MORE_BYTE (c1);
1418               if (!(c1 >= '@' && c1 <= '~'))
1419                 goto label_invalid_code;
1420               ONE_MORE_BYTE (c1);
1421               if (c1 != ISO_CODE_ESC)
1422                 goto label_invalid_code;
1423               ONE_MORE_BYTE (c1);
1424               goto label_escape_sequence;
1425
1426             case '$':           /* designation of 2-byte character set */
1427               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1428                 goto label_invalid_code;
1429               ONE_MORE_BYTE (c1);
1430               if (c1 >= '@' && c1 <= 'B')
1431                 {       /* designation of JISX0208.1978, GB2312.1980,
1432                            or JISX0208.1980 */
1433                   DECODE_DESIGNATION (0, 2, 94, c1);
1434                 }
1435               else if (c1 >= 0x28 && c1 <= 0x2B)
1436                 {       /* designation of DIMENSION2_CHARS94 character set */
1437                   ONE_MORE_BYTE (c2);
1438                   DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
1439                 }
1440               else if (c1 >= 0x2C && c1 <= 0x2F)
1441                 {       /* designation of DIMENSION2_CHARS96 character set */
1442                   ONE_MORE_BYTE (c2);
1443                   DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
1444                 }
1445               else
1446                 goto label_invalid_code;
1447               /* We must update these variables now.  */
1448               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1449               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1450               continue;
1451
1452             case 'n':           /* invocation of locking-shift-2 */
1453               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1454                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1455                 goto label_invalid_code;
1456               CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
1457               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1458               continue;
1459
1460             case 'o':           /* invocation of locking-shift-3 */
1461               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1462                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1463                 goto label_invalid_code;
1464               CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
1465               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1466               continue;
1467
1468             case 'N':           /* invocation of single-shift-2 */
1469               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1470                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1471                 goto label_invalid_code;
1472               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
1473               ONE_MORE_BYTE (c1);
1474               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
1475                 goto label_invalid_code;
1476               break;
1477
1478             case 'O':           /* invocation of single-shift-3 */
1479               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1480                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1481                 goto label_invalid_code;
1482               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
1483               ONE_MORE_BYTE (c1);
1484               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
1485                 goto label_invalid_code;
1486               break;
1487
1488             case '0': case '2': case '3': case '4': /* start composition */
1489               DECODE_COMPOSITION_START (c1);
1490               continue;
1491
1492             case '1':           /* end composition */
1493               DECODE_COMPOSITION_END (c1);
1494               continue;
1495
1496             case '[':           /* specification of direction */
1497               if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
1498                 goto label_invalid_code;
1499               /* For the moment, nested direction is not supported.
1500                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
1501                  left-to-right, and nozero means right-to-left.  */
1502               ONE_MORE_BYTE (c1);
1503               switch (c1)
1504                 {
1505                 case ']':       /* end of the current direction */
1506                   coding->mode &= ~CODING_MODE_DIRECTION;
1507
1508                 case '0':       /* end of the current direction */
1509                 case '1':       /* start of left-to-right direction */
1510                   ONE_MORE_BYTE (c1);
1511                   if (c1 == ']')
1512                     coding->mode &= ~CODING_MODE_DIRECTION;
1513                   else
1514                     goto label_invalid_code;
1515                   break;
1516
1517                 case '2':       /* start of right-to-left direction */
1518                   ONE_MORE_BYTE (c1);
1519                   if (c1 == ']')
1520                     coding->mode |= CODING_MODE_DIRECTION;
1521                   else
1522                     goto label_invalid_code;
1523                   break;
1524
1525                 default:
1526                   goto label_invalid_code;
1527                 }
1528               continue;
1529
1530             default:
1531               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1532                 goto label_invalid_code;
1533               if (c1 >= 0x28 && c1 <= 0x2B)
1534                 {       /* designation of DIMENSION1_CHARS94 character set */
1535                   ONE_MORE_BYTE (c2);
1536                   DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
1537                 }
1538               else if (c1 >= 0x2C && c1 <= 0x2F)
1539                 {       /* designation of DIMENSION1_CHARS96 character set */
1540                   ONE_MORE_BYTE (c2);
1541                   DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
1542                 }
1543               else
1544                 goto label_invalid_code;
1545               /* We must update these variables now.  */
1546               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1547               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1548               continue;
1549             }
1550         }
1551
1552       /* Now we know CHARSET and 1st position code C1 of a character.
1553          Produce a multibyte sequence for that character while getting
1554          2nd position code C2 if necessary.  */
1555       if (CHARSET_DIMENSION (charset) == 2)
1556         {
1557           ONE_MORE_BYTE (c2);
1558           if (c1 < 0x80 ? c2 < 0x20 || c2 >= 0x80 : c2 < 0xA0)
1559             /* C2 is not in a valid range.  */
1560             goto label_invalid_code;
1561         }
1562       c = DECODE_ISO_CHARACTER (charset, c1, c2);
1563       EMIT_CHAR (c);
1564       continue;
1565
1566     label_invalid_code:
1567       coding->errors++;
1568       if (COMPOSING_P (coding))
1569         DECODE_COMPOSITION_END ('1');
1570       src = src_base;
1571       c = *src++;
1572       EMIT_CHAR (c);
1573     }
1574
1575  label_end_of_loop:
1576   coding->consumed = coding->consumed_char = src_base - source;
1577   coding->produced = dst - destination;
1578   return;
1579 }
1580
1581
1582 /* ISO2022 encoding stuff.  */
1583
1584 /*
1585    It is not enough to say just "ISO2022" on encoding, we have to
1586    specify more details.  In Emacs, each coding system of ISO2022
1587    variant has the following specifications:
1588         1. Initial designation to G0 thru G3.
1589         2. Allows short-form designation?
1590         3. ASCII should be designated to G0 before control characters?
1591         4. ASCII should be designated to G0 at end of line?
1592         5. 7-bit environment or 8-bit environment?
1593         6. Use locking-shift?
1594         7. Use Single-shift?
1595    And the following two are only for Japanese:
1596         8. Use ASCII in place of JIS0201-1976-Roman?
1597         9. Use JISX0208-1983 in place of JISX0208-1978?
1598    These specifications are encoded in `coding->flags' as flag bits
1599    defined by macros CODING_FLAG_ISO_XXX.  See `coding.h' for more
1600    details.
1601 */
1602
1603 /* Produce codes (escape sequence) for designating CHARSET to graphic
1604    register REG at DST, and increment DST.  If <final-char> of CHARSET is
1605    '@', 'A', or 'B' and the coding system CODING allows, produce
1606    designation sequence of short-form.  */
1607
1608 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
1609   do {                                                                  \
1610     unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset);        \
1611     char *intermediate_char_94 = "()*+";                                \
1612     char *intermediate_char_96 = ",-./";                                \
1613     int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset);    \
1614                                                                         \
1615     if (revision < 255)                                                 \
1616       {                                                                 \
1617         *dst++ = ISO_CODE_ESC;                                          \
1618         *dst++ = '&';                                                   \
1619         *dst++ = '@' + revision;                                        \
1620       }                                                                 \
1621     *dst++ = ISO_CODE_ESC;                                              \
1622     if (CHARSET_DIMENSION (charset) == 1)                               \
1623       {                                                                 \
1624         if (CHARSET_CHARS (charset) == 94)                              \
1625           *dst++ = (unsigned char) (intermediate_char_94[reg]);         \
1626         else                                                            \
1627           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1628       }                                                                 \
1629     else                                                                \
1630       {                                                                 \
1631         *dst++ = '$';                                                   \
1632         if (CHARSET_CHARS (charset) == 94)                              \
1633           {                                                             \
1634             if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM)          \
1635                 || reg != 0                                             \
1636                 || final_char < '@' || final_char > 'B')                \
1637               *dst++ = (unsigned char) (intermediate_char_94[reg]);     \
1638           }                                                             \
1639         else                                                            \
1640           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1641       }                                                                 \
1642     *dst++ = final_char;                                                \
1643     CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;                \
1644   } while (0)
1645
1646 /* The following two macros produce codes (control character or escape
1647    sequence) for ISO2022 single-shift functions (single-shift-2 and
1648    single-shift-3).  */
1649
1650 #define ENCODE_SINGLE_SHIFT_2                           \
1651   do {                                                  \
1652     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1653       *dst++ = ISO_CODE_ESC, *dst++ = 'N';              \
1654     else                                                \
1655       *dst++ = ISO_CODE_SS2;                            \
1656     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1657   } while (0)
1658
1659 #define ENCODE_SINGLE_SHIFT_3                           \
1660   do {                                                  \
1661     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1662       *dst++ = ISO_CODE_ESC, *dst++ = 'O';              \
1663     else                                                \
1664       *dst++ = ISO_CODE_SS3;                            \
1665     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1666   } while (0)
1667
1668 /* The following four macros produce codes (control character or
1669    escape sequence) for ISO2022 locking-shift functions (shift-in,
1670    shift-out, locking-shift-2, and locking-shift-3).  */
1671
1672 #define ENCODE_SHIFT_IN                         \
1673   do {                                          \
1674     *dst++ = ISO_CODE_SI;                       \
1675     CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
1676   } while (0)
1677
1678 #define ENCODE_SHIFT_OUT                        \
1679   do {                                          \
1680     *dst++ = ISO_CODE_SO;                       \
1681     CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
1682   } while (0)
1683
1684 #define ENCODE_LOCKING_SHIFT_2                  \
1685   do {                                          \
1686     *dst++ = ISO_CODE_ESC, *dst++ = 'n';        \
1687     CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
1688   } while (0)
1689
1690 #define ENCODE_LOCKING_SHIFT_3                  \
1691   do {                                          \
1692     *dst++ = ISO_CODE_ESC, *dst++ = 'o';        \
1693     CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
1694   } while (0)
1695
1696 /* Produce codes for a DIMENSION1 character whose character set is
1697    CHARSET and whose position-code is C1.  Designation and invocation
1698    sequences are also produced in advance if necessary.  */
1699
1700 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
1701   do {                                                                  \
1702     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1703       {                                                                 \
1704         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1705           *dst++ = c1 & 0x7F;                                           \
1706         else                                                            \
1707           *dst++ = c1 | 0x80;                                           \
1708         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1709         break;                                                          \
1710       }                                                                 \
1711     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1712       {                                                                 \
1713         *dst++ = c1 & 0x7F;                                             \
1714         break;                                                          \
1715       }                                                                 \
1716     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1717       {                                                                 \
1718         *dst++ = c1 | 0x80;                                             \
1719         break;                                                          \
1720       }                                                                 \
1721     else                                                                \
1722       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1723          must invoke it, or, at first, designate it to some graphic     \
1724          register.  Then repeat the loop to actually produce the        \
1725          character.  */                                                 \
1726       dst = encode_invocation_designation (charset, coding, dst);       \
1727   } while (1)
1728
1729 /* Produce codes for a DIMENSION2 character whose character set is
1730    CHARSET and whose position-codes are C1 and C2.  Designation and
1731    invocation codes are also produced in advance if necessary.  */
1732
1733 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
1734   do {                                                                  \
1735     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1736       {                                                                 \
1737         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1738           *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F;                       \
1739         else                                                            \
1740           *dst++ = c1 | 0x80, *dst++ = c2 | 0x80;                       \
1741         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1742         break;                                                          \
1743       }                                                                 \
1744     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1745       {                                                                 \
1746         *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F;                          \
1747         break;                                                          \
1748       }                                                                 \
1749     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1750       {                                                                 \
1751         *dst++ = c1 | 0x80, *dst++= c2 | 0x80;                          \
1752         break;                                                          \
1753       }                                                                 \
1754     else                                                                \
1755       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1756          must invoke it, or, at first, designate it to some graphic     \
1757          register.  Then repeat the loop to actually produce the        \
1758          character.  */                                                 \
1759       dst = encode_invocation_designation (charset, coding, dst);       \
1760   } while (1)
1761
1762 #define ENCODE_ISO_CHARACTER(c)                                 \
1763   do {                                                          \
1764     int charset, c1, c2;                                        \
1765                                                                 \
1766     SPLIT_CHAR (c, charset, c1, c2);                            \
1767     if (CHARSET_DEFINED_P (charset))                            \
1768       {                                                         \
1769         if (CHARSET_DIMENSION (charset) == 1)                   \
1770           {                                                     \
1771             if (charset == CHARSET_ASCII                        \
1772                 && coding->flags & CODING_FLAG_ISO_USE_ROMAN)   \
1773               charset = charset_latin_jisx0201;                 \
1774             ENCODE_ISO_CHARACTER_DIMENSION1 (charset, c1);      \
1775           }                                                     \
1776         else                                                    \
1777           {                                                     \
1778             if (charset == charset_jisx0208                     \
1779                 && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)  \
1780               charset = charset_jisx0208_1978;                  \
1781             ENCODE_ISO_CHARACTER_DIMENSION2 (charset, c1, c2);  \
1782           }                                                     \
1783       }                                                         \
1784     else                                                        \
1785       {                                                         \
1786         *dst++ = c1;                                            \
1787         if (c2 >= 0)                                            \
1788           *dst++ = c2;                                          \
1789       }                                                         \
1790   } while (0)
1791
1792
1793 /* Instead of encoding character C, produce one or two `?'s.  */
1794
1795 #define ENCODE_UNSAFE_CHARACTER(c)                                      \
1796   do {                                                                  \
1797     ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION);       \
1798     if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1)                           \
1799       ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION);     \
1800   } while (0)
1801
1802
1803 /* Produce designation and invocation codes at a place pointed by DST
1804    to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
1805    Return new DST.  */
1806
1807 unsigned char *
1808 encode_invocation_designation (charset, coding, dst)
1809      int charset;
1810      struct coding_system *coding;
1811      unsigned char *dst;
1812 {
1813   int reg;                      /* graphic register number */
1814
1815   /* At first, check designations.  */
1816   for (reg = 0; reg < 4; reg++)
1817     if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
1818       break;
1819
1820   if (reg >= 4)
1821     {
1822       /* CHARSET is not yet designated to any graphic registers.  */
1823       /* At first check the requested designation.  */
1824       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1825       if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
1826         /* Since CHARSET requests no special designation, designate it
1827            to graphic register 0.  */
1828         reg = 0;
1829
1830       ENCODE_DESIGNATION (charset, reg, coding);
1831     }
1832
1833   if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
1834       && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
1835     {
1836       /* Since the graphic register REG is not invoked to any graphic
1837          planes, invoke it to graphic plane 0.  */
1838       switch (reg)
1839         {
1840         case 0:                 /* graphic register 0 */
1841           ENCODE_SHIFT_IN;
1842           break;
1843
1844         case 1:                 /* graphic register 1 */
1845           ENCODE_SHIFT_OUT;
1846           break;
1847
1848         case 2:                 /* graphic register 2 */
1849           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1850             ENCODE_SINGLE_SHIFT_2;
1851           else
1852             ENCODE_LOCKING_SHIFT_2;
1853           break;
1854
1855         case 3:                 /* graphic register 3 */
1856           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1857             ENCODE_SINGLE_SHIFT_3;
1858           else
1859             ENCODE_LOCKING_SHIFT_3;
1860           break;
1861         }
1862     }
1863
1864   return dst;
1865 }
1866
1867 /* Produce 2-byte codes for encoded composition rule RULE.  */
1868
1869 #define ENCODE_COMPOSITION_RULE(rule)           \
1870   do {                                          \
1871     int gref, nref;                             \
1872     COMPOSITION_DECODE_RULE (rule, gref, nref); \
1873     *dst++ = 32 + 81 + gref;                    \
1874     *dst++ = 32 + nref;                         \
1875   } while (0)
1876
1877 /* Produce codes for indicating the start of a composition sequence
1878    (ESC 0, ESC 3, or ESC 4).  DATA points to an array of integers
1879    which specify information about the composition.  See the comment
1880    in coding.h for the format of DATA.  */
1881
1882 #define ENCODE_COMPOSITION_START(coding, data)                          \
1883   do {                                                                  \
1884     coding->composing = data[3];                                        \
1885     *dst++ = ISO_CODE_ESC;                                              \
1886     if (coding->composing == COMPOSITION_RELATIVE)                      \
1887       *dst++ = '0';                                                     \
1888     else                                                                \
1889       {                                                                 \
1890         *dst++ = (coding->composing == COMPOSITION_WITH_ALTCHARS        \
1891                   ? '3' : '4');                                         \
1892         coding->cmp_data_index = coding->cmp_data_start + 4;            \
1893         coding->composition_rule_follows = 0;                           \
1894       }                                                                 \
1895   } while (0)
1896
1897 /* Produce codes for indicating the end of the current composition.  */
1898
1899 #define ENCODE_COMPOSITION_END(coding, data)                    \
1900   do {                                                          \
1901     *dst++ = ISO_CODE_ESC;                                      \
1902     *dst++ = '1';                                               \
1903     coding->cmp_data_start += data[0];                          \
1904     coding->composing = COMPOSITION_NO;                         \
1905     if (coding->cmp_data_start == coding->cmp_data->used        \
1906         && coding->cmp_data->next)                              \
1907       {                                                         \
1908         coding->cmp_data = coding->cmp_data->next;              \
1909         coding->cmp_data_start = 0;                             \
1910       }                                                         \
1911   } while (0)
1912
1913 /* Produce composition start sequence ESC 0.  Here, this sequence
1914    doesn't mean the start of a new composition but means that we have
1915    just produced components (alternate chars and composition rules) of
1916    the composition and the actual text follows in SRC.  */
1917
1918 #define ENCODE_COMPOSITION_FAKE_START(coding)   \
1919   do {                                          \
1920     *dst++ = ISO_CODE_ESC;                      \
1921     *dst++ = '0';                               \
1922     coding->composing = COMPOSITION_RELATIVE;   \
1923   } while (0)
1924
1925 /* The following three macros produce codes for indicating direction
1926    of text.  */
1927 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER              \
1928   do {                                                  \
1929     if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS)    \
1930       *dst++ = ISO_CODE_ESC, *dst++ = '[';              \
1931     else                                                \
1932       *dst++ = ISO_CODE_CSI;                            \
1933   } while (0)
1934
1935 #define ENCODE_DIRECTION_R2L    \
1936   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '2', *dst++ = ']'
1937
1938 #define ENCODE_DIRECTION_L2R    \
1939   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '0', *dst++ = ']'
1940
1941 /* Produce codes for designation and invocation to reset the graphic
1942    planes and registers to initial state.  */
1943 #define ENCODE_RESET_PLANE_AND_REGISTER                                     \
1944   do {                                                                      \
1945     int reg;                                                                \
1946     if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                        \
1947       ENCODE_SHIFT_IN;                                                      \
1948     for (reg = 0; reg < 4; reg++)                                           \
1949       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0            \
1950           && (CODING_SPEC_ISO_DESIGNATION (coding, reg)                     \
1951               != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)))        \
1952         ENCODE_DESIGNATION                                                  \
1953           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
1954   } while (0)
1955
1956 /* Produce designation sequences of charsets in the line started from
1957    SRC to a place pointed by DST, and return updated DST.
1958
1959    If the current block ends before any end-of-line, we may fail to
1960    find all the necessary designations.  */
1961
1962 static unsigned char *
1963 encode_designation_at_bol (coding, translation_table, src, src_end, dst)
1964      struct coding_system *coding;
1965      Lisp_Object translation_table;
1966      unsigned char *src, *src_end, *dst;
1967 {
1968   int charset, c, found = 0, reg;
1969   /* Table of charsets to be designated to each graphic register.  */
1970   int r[4];
1971
1972   for (reg = 0; reg < 4; reg++)
1973     r[reg] = -1;
1974
1975   while (found < 4)
1976     {
1977       ONE_MORE_CHAR (c);
1978       if (c == '\n')
1979         break;
1980
1981       charset = CHAR_CHARSET (c);
1982       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1983       if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
1984         {
1985           found++;
1986           r[reg] = charset;
1987         }
1988     }
1989
1990  label_end_of_loop:
1991   if (found)
1992     {
1993       for (reg = 0; reg < 4; reg++)
1994         if (r[reg] >= 0
1995             && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
1996           ENCODE_DESIGNATION (r[reg], reg, coding);
1997     }
1998
1999   return dst;
2000 }
2001
2002 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
2003
2004 static void
2005 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
2006      struct coding_system *coding;
2007      unsigned char *source, *destination;
2008      int src_bytes, dst_bytes;
2009 {
2010   unsigned char *src = source;
2011   unsigned char *src_end = source + src_bytes;
2012   unsigned char *dst = destination;
2013   unsigned char *dst_end = destination + dst_bytes;
2014   /* Since the maximum bytes produced by each loop is 20, we subtract 19
2015      from DST_END to assure overflow checking is necessary only at the
2016      head of loop.  */
2017   unsigned char *adjusted_dst_end = dst_end - 19;
2018   /* SRC_BASE remembers the start position in source in each loop.
2019      The loop will be exited when there's not enough source text to
2020      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2021      there's not enough destination area to produce encoded codes
2022      (within macro EMIT_BYTES).  */
2023   unsigned char *src_base;
2024   int c;
2025   Lisp_Object translation_table;
2026   Lisp_Object safe_chars;
2027
2028   safe_chars = coding_safe_chars (coding);
2029
2030   if (NILP (Venable_character_translation))
2031     translation_table = Qnil;
2032   else
2033     {
2034       translation_table = coding->translation_table_for_encode;
2035       if (NILP (translation_table))
2036         translation_table = Vstandard_translation_table_for_encode;
2037     }
2038
2039   coding->consumed_char = 0;
2040   coding->errors = 0;
2041   while (1)
2042     {
2043       src_base = src;
2044
2045       if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19)))
2046         {
2047           coding->result = CODING_FINISH_INSUFFICIENT_DST;
2048           break;
2049         }
2050
2051       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
2052           && CODING_SPEC_ISO_BOL (coding))
2053         {
2054           /* We have to produce designation sequences if any now.  */
2055           dst = encode_designation_at_bol (coding, translation_table,
2056                                            src, src_end, dst);
2057           CODING_SPEC_ISO_BOL (coding) = 0;
2058         }
2059
2060       /* Check composition start and end.  */
2061       if (coding->composing != COMPOSITION_DISABLED
2062           && coding->cmp_data_start < coding->cmp_data->used)
2063         {
2064           struct composition_data *cmp_data = coding->cmp_data;
2065           int *data = cmp_data->data + coding->cmp_data_start;
2066           int this_pos = cmp_data->char_offset + coding->consumed_char;
2067
2068           if (coding->composing == COMPOSITION_RELATIVE)
2069             {
2070               if (this_pos == data[2])
2071                 {
2072                   ENCODE_COMPOSITION_END (coding, data);
2073                   cmp_data = coding->cmp_data;
2074                   data = cmp_data->data + coding->cmp_data_start;
2075                 }
2076             }
2077           else if (COMPOSING_P (coding))
2078             {
2079               /* COMPOSITION_WITH_ALTCHARS or COMPOSITION_WITH_RULE_ALTCHAR  */
2080               if (coding->cmp_data_index == coding->cmp_data_start + data[0])
2081                 /* We have consumed components of the composition.
2082                    What follows in SRC is the compositions's base
2083                    text.  */
2084                 ENCODE_COMPOSITION_FAKE_START (coding);
2085               else
2086                 {
2087                   int c = cmp_data->data[coding->cmp_data_index++];
2088                   if (coding->composition_rule_follows)
2089                     {
2090                       ENCODE_COMPOSITION_RULE (c);
2091                       coding->composition_rule_follows = 0;
2092                     }
2093                   else
2094                     {
2095                       if (coding->flags & CODING_FLAG_ISO_SAFE
2096                           && ! CODING_SAFE_CHAR_P (safe_chars, c))
2097                         ENCODE_UNSAFE_CHARACTER (c);
2098                       else
2099                         ENCODE_ISO_CHARACTER (c);
2100                       if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)
2101                         coding->composition_rule_follows = 1;
2102                     }
2103                   continue;
2104                 }
2105             }
2106           if (!COMPOSING_P (coding))
2107             {
2108               if (this_pos == data[1])
2109                 {
2110                   ENCODE_COMPOSITION_START (coding, data);
2111                   continue;
2112                 }
2113             }
2114         }
2115
2116       ONE_MORE_CHAR (c);
2117
2118       /* Now encode the character C.  */
2119       if (c < 0x20 || c == 0x7F)
2120         {
2121           if (c == '\r')
2122             {
2123               if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2124                 {
2125                   if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2126                     ENCODE_RESET_PLANE_AND_REGISTER;
2127                   *dst++ = c;
2128                   continue;
2129                 }
2130               /* fall down to treat '\r' as '\n' ...  */
2131               c = '\n';
2132             }
2133           if (c == '\n')
2134             {
2135               if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
2136                 ENCODE_RESET_PLANE_AND_REGISTER;
2137               if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
2138                 bcopy (coding->spec.iso2022.initial_designation,
2139                        coding->spec.iso2022.current_designation,
2140                        sizeof coding->spec.iso2022.initial_designation);
2141               if (coding->eol_type == CODING_EOL_LF
2142                   || coding->eol_type == CODING_EOL_UNDECIDED)
2143                 *dst++ = ISO_CODE_LF;
2144               else if (coding->eol_type == CODING_EOL_CRLF)
2145                 *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
2146               else
2147                 *dst++ = ISO_CODE_CR;
2148               CODING_SPEC_ISO_BOL (coding) = 1;
2149             }
2150           else
2151             {
2152               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2153                 ENCODE_RESET_PLANE_AND_REGISTER;
2154               *dst++ = c;
2155             }
2156         }
2157       else if (ASCII_BYTE_P (c))
2158         ENCODE_ISO_CHARACTER (c);
2159       else if (SINGLE_BYTE_CHAR_P (c))
2160         {
2161           *dst++ = c;
2162           coding->errors++;
2163         }
2164       else if (coding->flags & CODING_FLAG_ISO_SAFE
2165                && ! CODING_SAFE_CHAR_P (safe_chars, c))
2166         ENCODE_UNSAFE_CHARACTER (c);
2167       else
2168         ENCODE_ISO_CHARACTER (c);
2169
2170       coding->consumed_char++;
2171     }
2172
2173  label_end_of_loop:
2174   coding->consumed = src_base - source;
2175   coding->produced = coding->produced_char = dst - destination;
2176 }
2177
2178 \f
2179 /*** 4. SJIS and BIG5 handlers ***/
2180
2181 /* Although SJIS and BIG5 are not ISO's coding system, they are used
2182    quite widely.  So, for the moment, Emacs supports them in the bare
2183    C code.  But, in the future, they may be supported only by CCL.  */
2184
2185 /* SJIS is a coding system encoding three character sets: ASCII, right
2186    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
2187    as is.  A character of charset katakana-jisx0201 is encoded by
2188    "position-code + 0x80".  A character of charset japanese-jisx0208
2189    is encoded in 2-byte but two position-codes are divided and shifted
2190    so that it fit in the range below.
2191
2192    --- CODE RANGE of SJIS ---
2193    (character set)      (range)
2194    ASCII                0x00 .. 0x7F
2195    KATAKANA-JISX0201    0xA0 .. 0xDF
2196    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
2197             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
2198    -------------------------------
2199
2200 */
2201
2202 /* BIG5 is a coding system encoding two character sets: ASCII and
2203    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
2204    character set and is encoded in two-byte.
2205
2206    --- CODE RANGE of BIG5 ---
2207    (character set)      (range)
2208    ASCII                0x00 .. 0x7F
2209    Big5 (1st byte)      0xA1 .. 0xFE
2210         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
2211    --------------------------
2212
2213    Since the number of characters in Big5 is larger than maximum
2214    characters in Emacs' charset (96x96), it can't be handled as one
2215    charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
2216    and `charset-big5-2'.  Both are DIMENSION2 and CHARS94.  The former
2217    contains frequently used characters and the latter contains less
2218    frequently used characters.  */
2219
2220 /* Macros to decode or encode a character of Big5 in BIG5.  B1 and B2
2221    are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2222    C1 and C2 are the 1st and 2nd position-codes of of Emacs' internal
2223    format.  CHARSET is `charset_big5_1' or `charset_big5_2'.  */
2224
2225 /* Number of Big5 characters which have the same code in 1st byte.  */
2226 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2227
2228 #define DECODE_BIG5(b1, b2, charset, c1, c2)                            \
2229   do {                                                                  \
2230     unsigned int temp                                                   \
2231       = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62);   \
2232     if (b1 < 0xC9)                                                      \
2233       charset = charset_big5_1;                                         \
2234     else                                                                \
2235       {                                                                 \
2236         charset = charset_big5_2;                                       \
2237         temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW;                          \
2238       }                                                                 \
2239     c1 = temp / (0xFF - 0xA1) + 0x21;                                   \
2240     c2 = temp % (0xFF - 0xA1) + 0x21;                                   \
2241   } while (0)
2242
2243 #define ENCODE_BIG5(charset, c1, c2, b1, b2)                            \
2244   do {                                                                  \
2245     unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21);      \
2246     if (charset == charset_big5_2)                                      \
2247       temp += BIG5_SAME_ROW * (0xC9 - 0xA1);                            \
2248     b1 = temp / BIG5_SAME_ROW + 0xA1;                                   \
2249     b2 = temp % BIG5_SAME_ROW;                                          \
2250     b2 += b2 < 0x3F ? 0x40 : 0x62;                                      \
2251   } while (0)
2252
2253 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2254    Check if a text is encoded in SJIS.  If it is, return
2255    CODING_CATEGORY_MASK_SJIS, else return 0.  */
2256
2257 int
2258 detect_coding_sjis (src, src_end)
2259      unsigned char *src, *src_end;
2260 {
2261   int c;
2262   /* Dummy for ONE_MORE_BYTE.  */
2263   struct coding_system dummy_coding;
2264   struct coding_system *coding = &dummy_coding;
2265
2266   while (1)
2267     {
2268       ONE_MORE_BYTE (c);
2269       if ((c >= 0x80 && c < 0xA0) || c >= 0xE0)
2270         {
2271           ONE_MORE_BYTE (c);
2272           if (c < 0x40)
2273             return 0;
2274         }
2275     }
2276  label_end_of_loop:
2277   return CODING_CATEGORY_MASK_SJIS;
2278 }
2279
2280 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2281    Check if a text is encoded in BIG5.  If it is, return
2282    CODING_CATEGORY_MASK_BIG5, else return 0.  */
2283
2284 int
2285 detect_coding_big5 (src, src_end)
2286      unsigned char *src, *src_end;
2287 {
2288   int c;
2289   /* Dummy for ONE_MORE_BYTE.  */
2290   struct coding_system dummy_coding;
2291   struct coding_system *coding = &dummy_coding;
2292
2293   while (1)
2294     {
2295       ONE_MORE_BYTE (c);
2296       if (c >= 0xA1)
2297         {
2298           ONE_MORE_BYTE (c);
2299           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
2300             return 0;
2301         }
2302     }
2303  label_end_of_loop:
2304   return CODING_CATEGORY_MASK_BIG5;
2305 }
2306
2307 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2308    Check if a text is encoded in UTF-8.  If it is, return
2309    CODING_CATEGORY_MASK_UTF_8, else return 0.  */
2310
2311 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
2312 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
2313 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
2314 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
2315 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
2316 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
2317 #define UTF_8_6_OCTET_LEADING_P(c) (((c) & 0xFE) == 0xFC)
2318
2319 int
2320 detect_coding_utf_8 (src, src_end)
2321      unsigned char *src, *src_end;
2322 {
2323   unsigned char c;
2324   int seq_maybe_bytes;
2325   /* Dummy for ONE_MORE_BYTE.  */
2326   struct coding_system dummy_coding;
2327   struct coding_system *coding = &dummy_coding;
2328
2329   while (1)
2330     {
2331       ONE_MORE_BYTE (c);
2332       if (UTF_8_1_OCTET_P (c))
2333         continue;
2334       else if (UTF_8_2_OCTET_LEADING_P (c))
2335         seq_maybe_bytes = 1;
2336       else if (UTF_8_3_OCTET_LEADING_P (c))
2337         seq_maybe_bytes = 2;
2338       else if (UTF_8_4_OCTET_LEADING_P (c))
2339         seq_maybe_bytes = 3;
2340       else if (UTF_8_5_OCTET_LEADING_P (c))
2341         seq_maybe_bytes = 4;
2342       else if (UTF_8_6_OCTET_LEADING_P (c))
2343         seq_maybe_bytes = 5;
2344       else
2345         return 0;
2346
2347       do
2348         {
2349           ONE_MORE_BYTE (c);
2350           if (!UTF_8_EXTRA_OCTET_P (c))
2351             return 0;
2352           seq_maybe_bytes--;
2353         }
2354       while (seq_maybe_bytes > 0);
2355     }
2356
2357  label_end_of_loop:
2358   return CODING_CATEGORY_MASK_UTF_8;
2359 }
2360
2361 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2362    Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
2363    Little Endian (otherwise).  If it is, return
2364    CODING_CATEGORY_MASK_UTF_16_BE or CODING_CATEGORY_MASK_UTF_16_LE,
2365    else return 0.  */
2366
2367 #define UTF_16_INVALID_P(val)   \
2368   (((val) == 0xFFFE)            \
2369    || ((val) == 0xFFFF))
2370
2371 #define UTF_16_HIGH_SURROGATE_P(val) \
2372   (((val) & 0xD800) == 0xD800)
2373
2374 #define UTF_16_LOW_SURROGATE_P(val) \
2375   (((val) & 0xDC00) == 0xDC00)
2376
2377 int
2378 detect_coding_utf_16 (src, src_end)
2379      unsigned char *src, *src_end;
2380 {
2381   unsigned char c1, c2;
2382   /* Dummy for TWO_MORE_BYTES.  */
2383   struct coding_system dummy_coding;
2384   struct coding_system *coding = &dummy_coding;
2385
2386   TWO_MORE_BYTES (c1, c2);
2387
2388   if ((c1 == 0xFF) && (c2 == 0xFE))
2389     return CODING_CATEGORY_MASK_UTF_16_LE;
2390   else if ((c1 == 0xFE) && (c2 == 0xFF))
2391     return CODING_CATEGORY_MASK_UTF_16_BE;
2392
2393  label_end_of_loop:
2394   return 0;
2395 }
2396
2397 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2398    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
2399
2400 static void
2401 decode_coding_sjis_big5 (coding, source, destination,
2402                          src_bytes, dst_bytes, sjis_p)
2403      struct coding_system *coding;
2404      unsigned char *source, *destination;
2405      int src_bytes, dst_bytes;
2406      int sjis_p;
2407 {
2408   unsigned char *src = source;
2409   unsigned char *src_end = source + src_bytes;
2410   unsigned char *dst = destination;
2411   unsigned char *dst_end = destination + dst_bytes;
2412   /* SRC_BASE remembers the start position in source in each loop.
2413      The loop will be exited when there's not enough source code
2414      (within macro ONE_MORE_BYTE), or when there's not enough
2415      destination area to produce a character (within macro
2416      EMIT_CHAR).  */
2417   unsigned char *src_base;
2418   Lisp_Object translation_table;
2419
2420   if (NILP (Venable_character_translation))
2421     translation_table = Qnil;
2422   else
2423     {
2424       translation_table = coding->translation_table_for_decode;
2425       if (NILP (translation_table))
2426         translation_table = Vstandard_translation_table_for_decode;
2427     }
2428
2429   coding->produced_char = 0;
2430   while (1)
2431     {
2432       int c, charset, c1, c2;
2433
2434       src_base = src;
2435       ONE_MORE_BYTE (c1);
2436
2437       if (c1 < 0x80)
2438         {
2439           charset = CHARSET_ASCII;
2440           if (c1 < 0x20)
2441             {
2442               if (c1 == '\r')
2443                 {
2444                   if (coding->eol_type == CODING_EOL_CRLF)
2445                     {
2446                       ONE_MORE_BYTE (c2);
2447                       if (c2 == '\n')
2448                         c1 = c2;
2449                       else if (coding->mode
2450                                & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2451                         {
2452                           coding->result = CODING_FINISH_INCONSISTENT_EOL;
2453                           goto label_end_of_loop;
2454                         }
2455                       else
2456                         /* To process C2 again, SRC is subtracted by 1.  */
2457                         src--;
2458                     }
2459                   else if (coding->eol_type == CODING_EOL_CR)
2460                     c1 = '\n';
2461                 }
2462               else if (c1 == '\n'
2463                        && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2464                        && (coding->eol_type == CODING_EOL_CR
2465                            || coding->eol_type == CODING_EOL_CRLF))
2466                 {
2467                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
2468                   goto label_end_of_loop;
2469                 }
2470             }
2471         }
2472       else
2473         {
2474           if (sjis_p)
2475             {
2476               if (c1 >= 0xF0)
2477                 goto label_invalid_code;
2478               if (c1 < 0xA0 || c1 >= 0xE0)
2479                 {
2480                   /* SJIS -> JISX0208 */
2481                   ONE_MORE_BYTE (c2);
2482                   if (c2 < 0x40 || c2 == 0x7F || c2 > 0xFC)
2483                     goto label_invalid_code;
2484                   DECODE_SJIS (c1, c2, c1, c2);
2485                   charset = charset_jisx0208;
2486                 }
2487               else
2488                 /* SJIS -> JISX0201-Kana */
2489                 charset = charset_katakana_jisx0201;
2490             }
2491           else
2492             {
2493               /* BIG5 -> Big5 */
2494               if (c1 < 0xA1 || c1 > 0xFE)
2495                 goto label_invalid_code;
2496               ONE_MORE_BYTE (c2);
2497               if (c2 < 0x40 || (c2 > 0x7E && c2 < 0xA1) || c2 > 0xFE)
2498                 goto label_invalid_code;
2499               DECODE_BIG5 (c1, c2, charset, c1, c2);
2500             }
2501         }
2502
2503       c = DECODE_ISO_CHARACTER (charset, c1, c2);
2504       EMIT_CHAR (c);
2505       continue;
2506
2507     label_invalid_code:
2508       coding->errors++;
2509       src = src_base;
2510       c = *src++;
2511       EMIT_CHAR (c);
2512     }
2513
2514  label_end_of_loop:
2515   coding->consumed = coding->consumed_char = src_base - source;
2516   coding->produced = dst - destination;
2517   return;
2518 }
2519
2520 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
2521    This function can encode charsets `ascii', `katakana-jisx0201',
2522    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
2523    are sure that all these charsets are registered as official charset
2524    (i.e. do not have extended leading-codes).  Characters of other
2525    charsets are produced without any encoding.  If SJIS_P is 1, encode
2526    SJIS text, else encode BIG5 text.  */
2527
2528 static void
2529 encode_coding_sjis_big5 (coding, source, destination,
2530                          src_bytes, dst_bytes, sjis_p)
2531      struct coding_system *coding;
2532      unsigned char *source, *destination;
2533      int src_bytes, dst_bytes;
2534      int sjis_p;
2535 {
2536   unsigned char *src = source;
2537   unsigned char *src_end = source + src_bytes;
2538   unsigned char *dst = destination;
2539   unsigned char *dst_end = destination + dst_bytes;
2540   /* SRC_BASE remembers the start position in source in each loop.
2541      The loop will be exited when there's not enough source text to
2542      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2543      there's not enough destination area to produce encoded codes
2544      (within macro EMIT_BYTES).  */
2545   unsigned char *src_base;
2546   Lisp_Object translation_table;
2547
2548   if (NILP (Venable_character_translation))
2549     translation_table = Qnil;
2550   else
2551     {
2552       translation_table = coding->translation_table_for_encode;
2553       if (NILP (translation_table))
2554         translation_table = Vstandard_translation_table_for_encode;
2555     }
2556
2557   while (1)
2558     {
2559       int c, charset, c1, c2;
2560
2561       src_base = src;
2562       ONE_MORE_CHAR (c);
2563
2564       /* Now encode the character C.  */
2565       if (SINGLE_BYTE_CHAR_P (c))
2566         {
2567           switch (c)
2568             {
2569             case '\r':
2570               if (!coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
2571                 {
2572                   EMIT_ONE_BYTE (c);
2573                   break;
2574                 }
2575               c = '\n';
2576             case '\n':
2577               if (coding->eol_type == CODING_EOL_CRLF)
2578                 {
2579                   EMIT_TWO_BYTES ('\r', c);
2580                   break;
2581                 }
2582               else if (coding->eol_type == CODING_EOL_CR)
2583                 c = '\r';
2584             default:
2585               EMIT_ONE_BYTE (c);
2586             }
2587         }
2588       else
2589         {
2590           SPLIT_CHAR (c, charset, c1, c2);
2591           if (sjis_p)
2592             {
2593               if (charset == charset_jisx0208
2594                   || charset == charset_jisx0208_1978)
2595                 {
2596                   ENCODE_SJIS (c1, c2, c1, c2);
2597                   EMIT_TWO_BYTES (c1, c2);
2598                 }
2599               else if (charset == charset_katakana_jisx0201)
2600                 EMIT_ONE_BYTE (c1 | 0x80);
2601               else
2602                 /* There's no way other than producing the internal
2603                    codes as is.  */
2604                 EMIT_BYTES (src_base, src);
2605             }
2606           else
2607             {
2608               if (charset == charset_big5_1 || charset == charset_big5_2)
2609                 {
2610                   ENCODE_BIG5 (charset, c1, c2, c1, c2);
2611                   EMIT_TWO_BYTES (c1, c2);
2612                 }
2613               else
2614                 /* There's no way other than producing the internal
2615                    codes as is.  */
2616                 EMIT_BYTES (src_base, src);
2617             }
2618         }
2619       coding->consumed_char++;
2620     }
2621
2622  label_end_of_loop:
2623   coding->consumed = src_base - source;
2624   coding->produced = coding->produced_char = dst - destination;
2625 }
2626
2627 \f
2628 /*** 5. CCL handlers ***/
2629
2630 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2631    Check if a text is encoded in a coding system of which
2632    encoder/decoder are written in CCL program.  If it is, return
2633    CODING_CATEGORY_MASK_CCL, else return 0.  */
2634
2635 int
2636 detect_coding_ccl (src, src_end)
2637      unsigned char *src, *src_end;
2638 {
2639   unsigned char *valid;
2640   int c;
2641   /* Dummy for ONE_MORE_BYTE.  */
2642   struct coding_system dummy_coding;
2643   struct coding_system *coding = &dummy_coding;
2644
2645   /* No coding system is assigned to coding-category-ccl.  */
2646   if (!coding_system_table[CODING_CATEGORY_IDX_CCL])
2647     return 0;
2648
2649   valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
2650   while (1)
2651     {
2652       ONE_MORE_BYTE (c);
2653       if (! valid[c])
2654         return 0;
2655     }
2656  label_end_of_loop:
2657   return CODING_CATEGORY_MASK_CCL;
2658 }
2659
2660 \f
2661 /*** 6. End-of-line handlers ***/
2662
2663 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2664
2665 static void
2666 decode_eol (coding, source, destination, src_bytes, dst_bytes)
2667      struct coding_system *coding;
2668      unsigned char *source, *destination;
2669      int src_bytes, dst_bytes;
2670 {
2671   unsigned char *src = source;
2672   unsigned char *dst = destination;
2673   unsigned char *src_end = src + src_bytes;
2674   unsigned char *dst_end = dst + dst_bytes;
2675   Lisp_Object translation_table;
2676   /* SRC_BASE remembers the start position in source in each loop.
2677      The loop will be exited when there's not enough source code
2678      (within macro ONE_MORE_BYTE), or when there's not enough
2679      destination area to produce a character (within macro
2680      EMIT_CHAR).  */
2681   unsigned char *src_base;
2682   int c;
2683
2684   translation_table = Qnil;
2685   switch (coding->eol_type)
2686     {
2687     case CODING_EOL_CRLF:
2688       while (1)
2689         {
2690           src_base = src;
2691           ONE_MORE_BYTE (c);
2692           if (c == '\r')
2693             {
2694               ONE_MORE_BYTE (c);
2695               if (c != '\n')
2696                 {
2697                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2698                     {
2699                       coding->result = CODING_FINISH_INCONSISTENT_EOL;
2700                       goto label_end_of_loop;
2701                     }
2702                   src--;
2703                   c = '\r';
2704                 }
2705             }
2706           else if (c == '\n'
2707                    && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
2708             {
2709               coding->result = CODING_FINISH_INCONSISTENT_EOL;
2710               goto label_end_of_loop;
2711             }
2712           EMIT_CHAR (c);
2713         }
2714       break;
2715
2716     case CODING_EOL_CR:
2717       while (1)
2718         {
2719           src_base = src;
2720           ONE_MORE_BYTE (c);
2721           if (c == '\n')
2722             {
2723               if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2724                 {
2725                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
2726                   goto label_end_of_loop;
2727                 }
2728             }
2729           else if (c == '\r')
2730             c = '\n';
2731           EMIT_CHAR (c);
2732         }
2733       break;
2734
2735     default:                    /* no need for EOL handling */
2736       while (1)
2737         {
2738           src_base = src;
2739           ONE_MORE_BYTE (c);
2740           EMIT_CHAR (c);
2741         }
2742     }
2743
2744  label_end_of_loop:
2745   coding->consumed = coding->consumed_char = src_base - source;
2746   coding->produced = dst - destination;
2747   return;
2748 }
2749
2750 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  Encode
2751    format of end-of-line according to `coding->eol_type'.  It also
2752    convert multibyte form 8-bit characers to unibyte if
2753    CODING->src_multibyte is nonzero.  If `coding->mode &
2754    CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code '\r' in source text
2755    also means end-of-line.  */
2756
2757 static void
2758 encode_eol (coding, source, destination, src_bytes, dst_bytes)
2759      struct coding_system *coding;
2760      unsigned char *source, *destination;
2761      int src_bytes, dst_bytes;
2762 {
2763   unsigned char *src = source;
2764   unsigned char *dst = destination;
2765   unsigned char *src_end = src + src_bytes;
2766   unsigned char *dst_end = dst + dst_bytes;
2767   Lisp_Object translation_table;
2768   /* SRC_BASE remembers the start position in source in each loop.
2769      The loop will be exited when there's not enough source text to
2770      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2771      there's not enough destination area to produce encoded codes
2772      (within macro EMIT_BYTES).  */
2773   unsigned char *src_base;
2774   int c;
2775   int selective_display = coding->mode & CODING_MODE_SELECTIVE_DISPLAY;
2776
2777   translation_table = Qnil;
2778   if (coding->src_multibyte
2779       && *(src_end - 1) == LEADING_CODE_8_BIT_CONTROL)
2780     {
2781       src_end--;
2782       src_bytes--;
2783       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
2784     }
2785
2786   if (coding->eol_type == CODING_EOL_CRLF)
2787     {
2788       while (src < src_end)
2789         {
2790           src_base = src;
2791           c = *src++;
2792           if (c >= 0x20)
2793             EMIT_ONE_BYTE (c);
2794           else if (c == '\n' || (c == '\r' && selective_display))
2795             EMIT_TWO_BYTES ('\r', '\n');
2796           else
2797             EMIT_ONE_BYTE (c);
2798         }
2799       src_base = src;
2800     label_end_of_loop:
2801       ;
2802     }
2803   else
2804     {
2805       if (!dst_bytes || src_bytes <= dst_bytes)
2806         {
2807           safe_bcopy (src, dst, src_bytes);
2808           src_base = src_end;
2809           dst += src_bytes;
2810         }
2811       else
2812         {
2813           if (coding->src_multibyte
2814               && *(src + dst_bytes - 1) == LEADING_CODE_8_BIT_CONTROL)
2815             dst_bytes--;
2816           safe_bcopy (src, dst, dst_bytes);
2817           src_base = src + dst_bytes;
2818           dst = destination + dst_bytes;
2819           coding->result = CODING_FINISH_INSUFFICIENT_DST;
2820         }
2821       if (coding->eol_type == CODING_EOL_CR)
2822         {
2823           for (src = destination; src < dst; src++)
2824             if (*src == '\n') *src = '\r';
2825         }
2826       else if (selective_display)
2827         {
2828           for (src = destination; src < dst; src++)
2829             if (*src == '\r') *src = '\n';
2830         }
2831     }
2832   if (coding->src_multibyte)
2833     dst = destination + str_as_unibyte (destination, dst - destination);
2834
2835   coding->consumed = src_base - source;
2836   coding->produced = dst - destination;
2837   coding->produced_char = coding->produced;
2838 }
2839
2840 \f
2841 /*** 7. C library functions ***/
2842
2843 /* In Emacs Lisp, coding system is represented by a Lisp symbol which
2844    has a property `coding-system'.  The value of this property is a
2845    vector of length 5 (called as coding-vector).  Among elements of
2846    this vector, the first (element[0]) and the fifth (element[4])
2847    carry important information for decoding/encoding.  Before
2848    decoding/encoding, this information should be set in fields of a
2849    structure of type `coding_system'.
2850
2851    A value of property `coding-system' can be a symbol of another
2852    subsidiary coding-system.  In that case, Emacs gets coding-vector
2853    from that symbol.
2854
2855    `element[0]' contains information to be set in `coding->type'.  The
2856    value and its meaning is as follows:
2857
2858    0 -- coding_type_emacs_mule
2859    1 -- coding_type_sjis
2860    2 -- coding_type_iso2022
2861    3 -- coding_type_big5
2862    4 -- coding_type_ccl encoder/decoder written in CCL
2863    nil -- coding_type_no_conversion
2864    t -- coding_type_undecided (automatic conversion on decoding,
2865                                no-conversion on encoding)
2866
2867    `element[4]' contains information to be set in `coding->flags' and
2868    `coding->spec'.  The meaning varies by `coding->type'.
2869
2870    If `coding->type' is `coding_type_iso2022', element[4] is a vector
2871    of length 32 (of which the first 13 sub-elements are used now).
2872    Meanings of these sub-elements are:
2873
2874    sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
2875         If the value is an integer of valid charset, the charset is
2876         assumed to be designated to graphic register N initially.
2877
2878         If the value is minus, it is a minus value of charset which
2879         reserves graphic register N, which means that the charset is
2880         not designated initially but should be designated to graphic
2881         register N just before encoding a character in that charset.
2882
2883         If the value is nil, graphic register N is never used on
2884         encoding.
2885
2886    sub-element[N] where N is 4 through 11: to be set in `coding->flags'
2887         Each value takes t or nil.  See the section ISO2022 of
2888         `coding.h' for more information.
2889
2890    If `coding->type' is `coding_type_big5', element[4] is t to denote
2891    BIG5-ETen or nil to denote BIG5-HKU.
2892
2893    If `coding->type' takes the other value, element[4] is ignored.
2894
2895    Emacs Lisp's coding system also carries information about format of
2896    end-of-line in a value of property `eol-type'.  If the value is
2897    integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
2898    means CODING_EOL_CR.  If it is not integer, it should be a vector
2899    of subsidiary coding systems of which property `eol-type' has one
2900    of above values.
2901
2902 */
2903
2904 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
2905    and set it in CODING.  If CODING_SYSTEM_SYMBOL is invalid, CODING
2906    is setup so that no conversion is necessary and return -1, else
2907    return 0.  */
2908
2909 int
2910 setup_coding_system (coding_system, coding)
2911      Lisp_Object coding_system;
2912      struct coding_system *coding;
2913 {
2914   Lisp_Object coding_spec, coding_type, eol_type, plist;
2915   Lisp_Object val;
2916   int i;
2917
2918   /* Initialize some fields required for all kinds of coding systems.  */
2919   coding->symbol = coding_system;
2920   coding->common_flags = 0;
2921   coding->mode = 0;
2922   coding->heading_ascii = -1;
2923   coding->post_read_conversion = coding->pre_write_conversion = Qnil;
2924   coding->composing = COMPOSITION_DISABLED;
2925   coding->cmp_data = NULL;
2926
2927   if (NILP (coding_system))
2928     goto label_invalid_coding_system;
2929
2930   coding_spec = Fget (coding_system, Qcoding_system);
2931
2932   if (!VECTORP (coding_spec)
2933       || XVECTOR (coding_spec)->size != 5
2934       || !CONSP (XVECTOR (coding_spec)->contents[3]))
2935     goto label_invalid_coding_system;
2936
2937   eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
2938   if (VECTORP (eol_type))
2939     {
2940       coding->eol_type = CODING_EOL_UNDECIDED;
2941       coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
2942     }
2943   else if (XFASTINT (eol_type) == 1)
2944     {
2945       coding->eol_type = CODING_EOL_CRLF;
2946       coding->common_flags
2947         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2948     }
2949   else if (XFASTINT (eol_type) == 2)
2950     {
2951       coding->eol_type = CODING_EOL_CR;
2952       coding->common_flags
2953         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2954     }
2955   else
2956     coding->eol_type = CODING_EOL_LF;
2957
2958   coding_type = XVECTOR (coding_spec)->contents[0];
2959   /* Try short cut.  */
2960   if (SYMBOLP (coding_type))
2961     {
2962       if (EQ (coding_type, Qt))
2963         {
2964           coding->type = coding_type_undecided;
2965           coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
2966         }
2967       else
2968         coding->type = coding_type_no_conversion;
2969       return 0;
2970     }
2971
2972   /* Get values of coding system properties:
2973      `post-read-conversion', `pre-write-conversion',
2974      `translation-table-for-decode', `translation-table-for-encode'.  */
2975   plist = XVECTOR (coding_spec)->contents[3];
2976   /* Pre & post conversion functions should be disabled if
2977      inhibit_eol_conversion is nozero.  This is the case that a code
2978      conversion function is called while those functions are running.  */
2979   if (! inhibit_pre_post_conversion)
2980     {
2981       coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
2982       coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
2983     }
2984   val = Fplist_get (plist, Qtranslation_table_for_decode);
2985   if (SYMBOLP (val))
2986     val = Fget (val, Qtranslation_table_for_decode);
2987   coding->translation_table_for_decode = CHAR_TABLE_P (val) ? val : Qnil;
2988   val = Fplist_get (plist, Qtranslation_table_for_encode);
2989   if (SYMBOLP (val))
2990     val = Fget (val, Qtranslation_table_for_encode);
2991   coding->translation_table_for_encode = CHAR_TABLE_P (val) ? val : Qnil;
2992   val = Fplist_get (plist, Qcoding_category);
2993   if (!NILP (val))
2994     {
2995       val = Fget (val, Qcoding_category_index);
2996       if (INTEGERP (val))
2997         coding->category_idx = XINT (val);
2998       else
2999         goto label_invalid_coding_system;
3000     }
3001   else
3002     goto label_invalid_coding_system;
3003
3004   /* If the coding system has non-nil `composition' property, enable
3005      composition handling.  */
3006   val = Fplist_get (plist, Qcomposition);
3007   if (!NILP (val))
3008     coding->composing = COMPOSITION_NO;
3009
3010   switch (XFASTINT (coding_type))
3011     {
3012     case 0:
3013       coding->type = coding_type_emacs_mule;
3014       if (!NILP (coding->post_read_conversion))
3015         coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
3016       if (!NILP (coding->pre_write_conversion))
3017         coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
3018       break;
3019
3020     case 1:
3021       coding->type = coding_type_sjis;
3022       coding->common_flags
3023         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3024       break;
3025
3026     case 2:
3027       coding->type = coding_type_iso2022;
3028       coding->common_flags
3029         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3030       {
3031         Lisp_Object val, temp;
3032         Lisp_Object *flags;
3033         int i, charset, reg_bits = 0;
3034
3035         val = XVECTOR (coding_spec)->contents[4];
3036
3037         if (!VECTORP (val) || XVECTOR (val)->size != 32)
3038           goto label_invalid_coding_system;
3039
3040         flags = XVECTOR (val)->contents;
3041         coding->flags
3042           = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
3043              | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
3044              | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
3045              | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
3046              | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
3047              | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
3048              | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
3049              | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
3050              | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
3051              | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
3052              | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
3053              | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
3054              | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
3055              );
3056
3057         /* Invoke graphic register 0 to plane 0.  */
3058         CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
3059         /* Invoke graphic register 1 to plane 1 if we can use full 8-bit.  */
3060         CODING_SPEC_ISO_INVOCATION (coding, 1)
3061           = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
3062         /* Not single shifting at first.  */
3063         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
3064         /* Beginning of buffer should also be regarded as bol. */
3065         CODING_SPEC_ISO_BOL (coding) = 1;
3066
3067         for (charset = 0; charset <= MAX_CHARSET; charset++)
3068           CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
3069         val = Vcharset_revision_alist;
3070         while (CONSP (val))
3071           {
3072             charset = get_charset_id (Fcar_safe (XCAR (val)));
3073             if (charset >= 0
3074                 && (temp = Fcdr_safe (XCAR (val)), INTEGERP (temp))
3075                 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
3076               CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
3077             val = XCDR (val);
3078           }
3079
3080         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
3081            FLAGS[REG] can be one of below:
3082                 integer CHARSET: CHARSET occupies register I,
3083                 t: designate nothing to REG initially, but can be used
3084                   by any charsets,
3085                 list of integer, nil, or t: designate the first
3086                   element (if integer) to REG initially, the remaining
3087                   elements (if integer) is designated to REG on request,
3088                   if an element is t, REG can be used by any charsets,
3089                 nil: REG is never used.  */
3090         for (charset = 0; charset <= MAX_CHARSET; charset++)
3091           CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3092             = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
3093         for (i = 0; i < 4; i++)
3094           {
3095             if (INTEGERP (flags[i])
3096                 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))
3097                 || (charset = get_charset_id (flags[i])) >= 0)
3098               {
3099                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3100                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
3101               }
3102             else if (EQ (flags[i], Qt))
3103               {
3104                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3105                 reg_bits |= 1 << i;
3106                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3107               }
3108             else if (CONSP (flags[i]))
3109               {
3110                 Lisp_Object tail;
3111                 tail = flags[i];
3112
3113                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3114                 if (INTEGERP (XCAR (tail))
3115                     && (charset = XINT (XCAR (tail)),
3116                         CHARSET_VALID_P (charset))
3117                     || (charset = get_charset_id (XCAR (tail))) >= 0)
3118                   {
3119                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3120                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
3121                   }
3122                 else
3123                   CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3124                 tail = XCDR (tail);
3125                 while (CONSP (tail))
3126                   {
3127                     if (INTEGERP (XCAR (tail))
3128                         && (charset = XINT (XCAR (tail)),
3129                             CHARSET_VALID_P (charset))
3130                         || (charset = get_charset_id (XCAR (tail))) >= 0)
3131                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3132                         = i;
3133                     else if (EQ (XCAR (tail), Qt))
3134                       reg_bits |= 1 << i;
3135                     tail = XCDR (tail);
3136                   }
3137               }
3138             else
3139               CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3140
3141             CODING_SPEC_ISO_DESIGNATION (coding, i)
3142               = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
3143           }
3144
3145         if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
3146           {
3147             /* REG 1 can be used only by locking shift in 7-bit env.  */
3148             if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
3149               reg_bits &= ~2;
3150             if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
3151               /* Without any shifting, only REG 0 and 1 can be used.  */
3152               reg_bits &= 3;
3153           }
3154
3155         if (reg_bits)
3156           for (charset = 0; charset <= MAX_CHARSET; charset++)
3157             {
3158               if (CHARSET_VALID_P (charset)
3159                   && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3160                       == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
3161                 {
3162                   /* There exist some default graphic registers to be
3163                      used by CHARSET.  */
3164
3165                   /* We had better avoid designating a charset of
3166                      CHARS96 to REG 0 as far as possible.  */
3167                   if (CHARSET_CHARS (charset) == 96)
3168                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3169                       = (reg_bits & 2
3170                          ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
3171                   else
3172                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3173                       = (reg_bits & 1
3174                          ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
3175                 }
3176             }
3177       }
3178       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3179       coding->spec.iso2022.last_invalid_designation_register = -1;
3180       break;
3181
3182     case 3:
3183       coding->type = coding_type_big5;
3184       coding->common_flags
3185         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3186       coding->flags
3187         = (NILP (XVECTOR (coding_spec)->contents[4])
3188            ? CODING_FLAG_BIG5_HKU
3189            : CODING_FLAG_BIG5_ETEN);
3190       break;
3191
3192     case 4:
3193       coding->type = coding_type_ccl;
3194       coding->common_flags
3195         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3196       {
3197         val = XVECTOR (coding_spec)->contents[4];
3198         if (! CONSP (val)
3199             || setup_ccl_program (&(coding->spec.ccl.decoder),
3200                                   XCAR (val)) < 0
3201             || setup_ccl_program (&(coding->spec.ccl.encoder),
3202                                   XCDR (val)) < 0)
3203           goto label_invalid_coding_system;
3204
3205         bzero (coding->spec.ccl.valid_codes, 256);
3206         val = Fplist_get (plist, Qvalid_codes);
3207         if (CONSP (val))
3208           {
3209             Lisp_Object this;
3210
3211             for (; CONSP (val); val = XCDR (val))
3212               {
3213                 this = XCAR (val);
3214                 if (INTEGERP (this)
3215                     && XINT (this) >= 0 && XINT (this) < 256)
3216                   coding->spec.ccl.valid_codes[XINT (this)] = 1;
3217                 else if (CONSP (this)
3218                          && INTEGERP (XCAR (this))
3219                          && INTEGERP (XCDR (this)))
3220                   {
3221                     int start = XINT (XCAR (this));
3222                     int end = XINT (XCDR (this));
3223
3224                     if (start >= 0 && start <= end && end < 256)
3225                       while (start <= end)
3226                         coding->spec.ccl.valid_codes[start++] = 1;
3227                   }
3228               }
3229           }
3230       }
3231       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3232       coding->spec.ccl.cr_carryover = 0;
3233       break;
3234
3235     case 5:
3236       coding->type = coding_type_raw_text;
3237       break;
3238
3239     default:
3240       goto label_invalid_coding_system;
3241     }
3242   return 0;
3243
3244  label_invalid_coding_system:
3245   coding->type = coding_type_no_conversion;
3246   coding->category_idx = CODING_CATEGORY_IDX_BINARY;
3247   coding->common_flags = 0;
3248   coding->eol_type = CODING_EOL_LF;
3249   coding->pre_write_conversion = coding->post_read_conversion = Qnil;
3250   return -1;
3251 }
3252
3253 /* Free memory blocks allocated for storing composition information.  */
3254
3255 void
3256 coding_free_composition_data (coding)
3257      struct coding_system *coding;
3258 {
3259   struct composition_data *cmp_data = coding->cmp_data, *next;
3260
3261   if (!cmp_data)
3262     return;
3263   /* Memory blocks are chained.  At first, rewind to the first, then,
3264      free blocks one by one.  */
3265   while (cmp_data->prev)
3266     cmp_data = cmp_data->prev;
3267   while (cmp_data)
3268     {
3269       next = cmp_data->next;
3270       xfree (cmp_data);
3271       cmp_data = next;
3272     }
3273   coding->cmp_data = NULL;
3274 }
3275
3276 /* Set `char_offset' member of all memory blocks pointed by
3277    coding->cmp_data to POS.  */
3278
3279 void
3280 coding_adjust_composition_offset (coding, pos)
3281      struct coding_system *coding;
3282      int pos;
3283 {
3284   struct composition_data *cmp_data;
3285
3286   for (cmp_data = coding->cmp_data; cmp_data; cmp_data = cmp_data->next)
3287     cmp_data->char_offset = pos;
3288 }
3289
3290 /* Setup raw-text or one of its subsidiaries in the structure
3291    coding_system CODING according to the already setup value eol_type
3292    in CODING.  CODING should be setup for some coding system in
3293    advance.  */
3294
3295 void
3296 setup_raw_text_coding_system (coding)
3297      struct coding_system *coding;
3298 {
3299   if (coding->type != coding_type_raw_text)
3300     {
3301       coding->symbol = Qraw_text;
3302       coding->type = coding_type_raw_text;
3303       if (coding->eol_type != CODING_EOL_UNDECIDED)
3304         {
3305           Lisp_Object subsidiaries;
3306           subsidiaries = Fget (Qraw_text, Qeol_type);
3307
3308           if (VECTORP (subsidiaries)
3309               && XVECTOR (subsidiaries)->size == 3)
3310             coding->symbol
3311               = XVECTOR (subsidiaries)->contents[coding->eol_type];
3312         }
3313       setup_coding_system (coding->symbol, coding);
3314     }
3315   return;
3316 }
3317
3318 /* Emacs has a mechanism to automatically detect a coding system if it
3319    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
3320    it's impossible to distinguish some coding systems accurately
3321    because they use the same range of codes.  So, at first, coding
3322    systems are categorized into 7, those are:
3323
3324    o coding-category-emacs-mule
3325
3326         The category for a coding system which has the same code range
3327         as Emacs' internal format.  Assigned the coding-system (Lisp
3328         symbol) `emacs-mule' by default.
3329
3330    o coding-category-sjis
3331
3332         The category for a coding system which has the same code range
3333         as SJIS.  Assigned the coding-system (Lisp
3334         symbol) `japanese-shift-jis' by default.
3335
3336    o coding-category-iso-7
3337
3338         The category for a coding system which has the same code range
3339         as ISO2022 of 7-bit environment.  This doesn't use any locking
3340         shift and single shift functions.  This can encode/decode all
3341         charsets.  Assigned the coding-system (Lisp symbol)
3342         `iso-2022-7bit' by default.
3343
3344    o coding-category-iso-7-tight
3345
3346         Same as coding-category-iso-7 except that this can
3347         encode/decode only the specified charsets.
3348
3349    o coding-category-iso-8-1
3350
3351         The category for a coding system which has the same code range
3352         as ISO2022 of 8-bit environment and graphic plane 1 used only
3353         for DIMENSION1 charset.  This doesn't use any locking shift
3354         and single shift functions.  Assigned the coding-system (Lisp
3355         symbol) `iso-latin-1' by default.
3356
3357    o coding-category-iso-8-2
3358
3359         The category for a coding system which has the same code range
3360         as ISO2022 of 8-bit environment and graphic plane 1 used only
3361         for DIMENSION2 charset.  This doesn't use any locking shift
3362         and single shift functions.  Assigned the coding-system (Lisp
3363         symbol) `japanese-iso-8bit' by default.
3364
3365    o coding-category-iso-7-else
3366
3367         The category for a coding system which has the same code range
3368         as ISO2022 of 7-bit environemnt but uses locking shift or
3369         single shift functions.  Assigned the coding-system (Lisp
3370         symbol) `iso-2022-7bit-lock' by default.
3371
3372    o coding-category-iso-8-else
3373
3374         The category for a coding system which has the same code range
3375         as ISO2022 of 8-bit environemnt but uses locking shift or
3376         single shift functions.  Assigned the coding-system (Lisp
3377         symbol) `iso-2022-8bit-ss2' by default.
3378
3379    o coding-category-big5
3380
3381         The category for a coding system which has the same code range
3382         as BIG5.  Assigned the coding-system (Lisp symbol)
3383         `cn-big5' by default.
3384
3385    o coding-category-utf-8
3386
3387         The category for a coding system which has the same code range
3388         as UTF-8 (cf. RFC2279).  Assigned the coding-system (Lisp
3389         symbol) `utf-8' by default.
3390
3391    o coding-category-utf-16-be
3392
3393         The category for a coding system in which a text has an
3394         Unicode signature (cf. Unicode Standard) in the order of BIG
3395         endian at the head.  Assigned the coding-system (Lisp symbol)
3396         `utf-16-be' by default.
3397
3398    o coding-category-utf-16-le
3399
3400         The category for a coding system in which a text has an
3401         Unicode signature (cf. Unicode Standard) in the order of
3402         LITTLE endian at the head.  Assigned the coding-system (Lisp
3403         symbol) `utf-16-le' by default.
3404
3405    o coding-category-ccl
3406
3407         The category for a coding system of which encoder/decoder is
3408         written in CCL programs.  The default value is nil, i.e., no
3409         coding system is assigned.
3410
3411    o coding-category-binary
3412
3413         The category for a coding system not categorized in any of the
3414         above.  Assigned the coding-system (Lisp symbol)
3415         `no-conversion' by default.
3416
3417    Each of them is a Lisp symbol and the value is an actual
3418    `coding-system's (this is also a Lisp symbol) assigned by a user.
3419    What Emacs does actually is to detect a category of coding system.
3420    Then, it uses a `coding-system' assigned to it.  If Emacs can't
3421    decide only one possible category, it selects a category of the
3422    highest priority.  Priorities of categories are also specified by a
3423    user in a Lisp variable `coding-category-list'.
3424
3425 */
3426
3427 static
3428 int ascii_skip_code[256];
3429
3430 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
3431    If it detects possible coding systems, return an integer in which
3432    appropriate flag bits are set.  Flag bits are defined by macros
3433    CODING_CATEGORY_MASK_XXX in `coding.h'.  If PRIORITIES is non-NULL,
3434    it should point the table `coding_priorities'.  In that case, only
3435    the flag bit for a coding system of the highest priority is set in
3436    the returned value.
3437
3438    How many ASCII characters are at the head is returned as *SKIP.  */
3439
3440 static int
3441 detect_coding_mask (source, src_bytes, priorities, skip)
3442      unsigned char *source;
3443      int src_bytes, *priorities, *skip;
3444 {
3445   register unsigned char c;
3446   unsigned char *src = source, *src_end = source + src_bytes;
3447   unsigned int mask, utf16_examined_p, iso2022_examined_p;
3448   int i, idx;
3449
3450   /* At first, skip all ASCII characters and control characters except
3451      for three ISO2022 specific control characters.  */
3452   ascii_skip_code[ISO_CODE_SO] = 0;
3453   ascii_skip_code[ISO_CODE_SI] = 0;
3454   ascii_skip_code[ISO_CODE_ESC] = 0;
3455
3456  label_loop_detect_coding:
3457   while (src < src_end && ascii_skip_code[*src]) src++;
3458   *skip = src - source;
3459
3460   if (src >= src_end)
3461     /* We found nothing other than ASCII.  There's nothing to do.  */
3462     return 0;
3463
3464   c = *src;
3465   /* The text seems to be encoded in some multilingual coding system.
3466      Now, try to find in which coding system the text is encoded.  */
3467   if (c < 0x80)
3468     {
3469       /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
3470       /* C is an ISO2022 specific control code of C0.  */
3471       mask = detect_coding_iso2022 (src, src_end);
3472       if (mask == 0)
3473         {
3474           /* No valid ISO2022 code follows C.  Try again.  */
3475           src++;
3476           if (c == ISO_CODE_ESC)
3477             ascii_skip_code[ISO_CODE_ESC] = 1;
3478           else
3479             ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1;
3480           goto label_loop_detect_coding;
3481         }
3482       if (priorities)
3483         {
3484           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3485             {
3486               if (mask & priorities[i])
3487                 return priorities[i];
3488             }
3489           return CODING_CATEGORY_MASK_RAW_TEXT;
3490         }
3491     }
3492   else
3493     {
3494       int try;
3495
3496       if (c < 0xA0)
3497         {
3498           /* C is the first byte of SJIS character code,
3499              or a leading-code of Emacs' internal format (emacs-mule),
3500              or the first byte of UTF-16.  */
3501           try = (CODING_CATEGORY_MASK_SJIS
3502                   | CODING_CATEGORY_MASK_EMACS_MULE
3503                   | CODING_CATEGORY_MASK_UTF_16_BE
3504                   | CODING_CATEGORY_MASK_UTF_16_LE);
3505
3506           /* Or, if C is a special latin extra code,
3507              or is an ISO2022 specific control code of C1 (SS2 or SS3),
3508              or is an ISO2022 control-sequence-introducer (CSI),
3509              we should also consider the possibility of ISO2022 codings.  */
3510           if ((VECTORP (Vlatin_extra_code_table)
3511                && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3512               || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
3513               || (c == ISO_CODE_CSI
3514                   && (src < src_end
3515                       && (*src == ']'
3516                           || ((*src == '0' || *src == '1' || *src == '2')
3517                               && src + 1 < src_end
3518                               && src[1] == ']')))))
3519             try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
3520                      | CODING_CATEGORY_MASK_ISO_8BIT);
3521         }
3522       else
3523         /* C is a character of ISO2022 in graphic plane right,
3524            or a SJIS's 1-byte character code (i.e. JISX0201),
3525            or the first byte of BIG5's 2-byte code,
3526            or the first byte of UTF-8/16.  */
3527         try = (CODING_CATEGORY_MASK_ISO_8_ELSE
3528                 | CODING_CATEGORY_MASK_ISO_8BIT
3529                 | CODING_CATEGORY_MASK_SJIS
3530                 | CODING_CATEGORY_MASK_BIG5
3531                 | CODING_CATEGORY_MASK_UTF_8
3532                 | CODING_CATEGORY_MASK_UTF_16_BE
3533                 | CODING_CATEGORY_MASK_UTF_16_LE);
3534
3535       /* Or, we may have to consider the possibility of CCL.  */
3536       if (coding_system_table[CODING_CATEGORY_IDX_CCL]
3537           && (coding_system_table[CODING_CATEGORY_IDX_CCL]
3538               ->spec.ccl.valid_codes)[c])
3539         try |= CODING_CATEGORY_MASK_CCL;
3540
3541       mask = 0;
3542       utf16_examined_p = iso2022_examined_p = 0;
3543       if (priorities)
3544         {
3545           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3546             {
3547               if (!iso2022_examined_p
3548                   && (priorities[i] & try & CODING_CATEGORY_MASK_ISO))
3549                 {
3550                   mask |= detect_coding_iso2022 (src, src_end);
3551                   iso2022_examined_p = 1;
3552                 }
3553               else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
3554                 mask |= detect_coding_sjis (src, src_end);
3555               else if (priorities[i] & try & CODING_CATEGORY_MASK_UTF_8)
3556                 mask |= detect_coding_utf_8 (src, src_end);
3557               else if (!utf16_examined_p
3558                        && (priorities[i] & try &
3559                            CODING_CATEGORY_MASK_UTF_16_BE_LE))
3560                 {
3561                   mask |= detect_coding_utf_16 (src, src_end);
3562                   utf16_examined_p = 1;
3563                 }
3564               else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
3565                 mask |= detect_coding_big5 (src, src_end);
3566               else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
3567                 mask |= detect_coding_emacs_mule (src, src_end);
3568               else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL)
3569                 mask |= detect_coding_ccl (src, src_end);
3570               else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
3571                 mask |= CODING_CATEGORY_MASK_RAW_TEXT;
3572               else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
3573                 mask |= CODING_CATEGORY_MASK_BINARY;
3574               if (mask & priorities[i])
3575                 return priorities[i];
3576             }
3577           return CODING_CATEGORY_MASK_RAW_TEXT;
3578         }
3579       if (try & CODING_CATEGORY_MASK_ISO)
3580         mask |= detect_coding_iso2022 (src, src_end);
3581       if (try & CODING_CATEGORY_MASK_SJIS)
3582         mask |= detect_coding_sjis (src, src_end);
3583       if (try & CODING_CATEGORY_MASK_BIG5)
3584         mask |= detect_coding_big5 (src, src_end);
3585       if (try & CODING_CATEGORY_MASK_UTF_8)
3586         mask |= detect_coding_utf_8 (src, src_end);
3587       if (try & CODING_CATEGORY_MASK_UTF_16_BE_LE)
3588         mask |= detect_coding_utf_16 (src, src_end);
3589       if (try & CODING_CATEGORY_MASK_EMACS_MULE)
3590         mask |= detect_coding_emacs_mule (src, src_end);
3591       if (try & CODING_CATEGORY_MASK_CCL)
3592         mask |= detect_coding_ccl (src, src_end);
3593     }
3594   return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
3595 }
3596
3597 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
3598    The information of the detected coding system is set in CODING.  */
3599
3600 void
3601 detect_coding (coding, src, src_bytes)
3602      struct coding_system *coding;
3603      unsigned char *src;
3604      int src_bytes;
3605 {
3606   unsigned int idx;
3607   int skip, mask, i;
3608   Lisp_Object val;
3609
3610   val = Vcoding_category_list;
3611   mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip);
3612   coding->heading_ascii = skip;
3613
3614   if (!mask) return;
3615
3616   /* We found a single coding system of the highest priority in MASK.  */
3617   idx = 0;
3618   while (mask && ! (mask & 1)) mask >>= 1, idx++;
3619   if (! mask)
3620     idx = CODING_CATEGORY_IDX_RAW_TEXT;
3621
3622   val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[idx])->value;
3623
3624   if (coding->eol_type != CODING_EOL_UNDECIDED)
3625     {
3626       Lisp_Object tmp;
3627
3628       tmp = Fget (val, Qeol_type);
3629       if (VECTORP (tmp))
3630         val = XVECTOR (tmp)->contents[coding->eol_type];
3631     }
3632
3633   /* Setup this new coding system while preserving some slots.  */
3634   {
3635     int src_multibyte = coding->src_multibyte;
3636     int dst_multibyte = coding->dst_multibyte;
3637
3638     setup_coding_system (val, coding);
3639     coding->src_multibyte = src_multibyte;
3640     coding->dst_multibyte = dst_multibyte;
3641     coding->heading_ascii = skip;
3642   }
3643 }
3644
3645 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
3646    SOURCE is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
3647    CODING_EOL_CR, and CODING_EOL_UNDECIDED.
3648
3649    How many non-eol characters are at the head is returned as *SKIP.  */
3650
3651 #define MAX_EOL_CHECK_COUNT 3
3652
3653 static int
3654 detect_eol_type (source, src_bytes, skip)
3655      unsigned char *source;
3656      int src_bytes, *skip;
3657 {
3658   unsigned char *src = source, *src_end = src + src_bytes;
3659   unsigned char c;
3660   int total = 0;                /* How many end-of-lines are found so far.  */
3661   int eol_type = CODING_EOL_UNDECIDED;
3662   int this_eol_type;
3663
3664   *skip = 0;
3665
3666   while (src < src_end && total < MAX_EOL_CHECK_COUNT)
3667     {
3668       c = *src++;
3669       if (c == '\n' || c == '\r')
3670         {
3671           if (*skip == 0)
3672             *skip = src - 1 - source;
3673           total++;
3674           if (c == '\n')
3675             this_eol_type = CODING_EOL_LF;
3676           else if (src >= src_end || *src != '\n')
3677             this_eol_type = CODING_EOL_CR;
3678           else
3679             this_eol_type = CODING_EOL_CRLF, src++;
3680
3681           if (eol_type == CODING_EOL_UNDECIDED)
3682             /* This is the first end-of-line.  */
3683             eol_type = this_eol_type;
3684           else if (eol_type != this_eol_type)
3685             {
3686               /* The found type is different from what found before.  */
3687               eol_type = CODING_EOL_INCONSISTENT;
3688               break;
3689             }
3690         }
3691     }
3692
3693   if (*skip == 0)
3694     *skip = src_end - source;
3695   return eol_type;
3696 }
3697
3698 /* Like detect_eol_type, but detect EOL type in 2-octet
3699    big-endian/little-endian format for coding systems utf-16-be and
3700    utf-16-le.  */
3701
3702 static int
3703 detect_eol_type_in_2_octet_form (source, src_bytes, skip, big_endian_p)
3704      unsigned char *source;
3705      int src_bytes, *skip;
3706 {
3707   unsigned char *src = source, *src_end = src + src_bytes;
3708   unsigned int c1, c2;
3709   int total = 0;                /* How many end-of-lines are found so far.  */
3710   int eol_type = CODING_EOL_UNDECIDED;
3711   int this_eol_type;
3712   int msb, lsb;
3713
3714   if (big_endian_p)
3715     msb = 0, lsb = 1;
3716   else
3717     msb = 1, lsb = 0;
3718
3719   *skip = 0;
3720
3721   while ((src + 1) < src_end && total < MAX_EOL_CHECK_COUNT)
3722     {
3723       c1 = (src[msb] << 8) | (src[lsb]);
3724       src += 2;
3725
3726       if (c1 == '\n' || c1 == '\r')
3727         {
3728           if (*skip == 0)
3729             *skip = src - 2 - source;
3730           total++;
3731           if (c1 == '\n')
3732             {
3733               this_eol_type = CODING_EOL_LF;
3734             }
3735           else
3736             {
3737               if ((src + 1) >= src_end)
3738                 {
3739                   this_eol_type = CODING_EOL_CR;
3740                 }
3741               else
3742                 {
3743                   c2 = (src[msb] << 8) | (src[lsb]);
3744                   if (c2 == '\n')
3745                     this_eol_type = CODING_EOL_CRLF, src += 2;
3746                   else
3747                     this_eol_type = CODING_EOL_CR;
3748                 }
3749             }
3750
3751           if (eol_type == CODING_EOL_UNDECIDED)
3752             /* This is the first end-of-line.  */
3753             eol_type = this_eol_type;
3754           else if (eol_type != this_eol_type)
3755             {
3756               /* The found type is different from what found before.  */
3757               eol_type = CODING_EOL_INCONSISTENT;
3758               break;
3759             }
3760         }
3761     }
3762
3763   if (*skip == 0)
3764     *skip = src_end - source;
3765   return eol_type;
3766 }
3767
3768 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
3769    is encoded.  If it detects an appropriate format of end-of-line, it
3770    sets the information in *CODING.  */
3771
3772 void
3773 detect_eol (coding, src, src_bytes)
3774      struct coding_system *coding;
3775      unsigned char *src;
3776      int src_bytes;
3777 {
3778   Lisp_Object val;
3779   int skip;
3780   int eol_type;
3781
3782   switch (coding->category_idx)
3783     {
3784     case CODING_CATEGORY_IDX_UTF_16_BE:
3785       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 1);
3786       break;
3787     case CODING_CATEGORY_IDX_UTF_16_LE:
3788       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 0);
3789       break;
3790     default:
3791       eol_type = detect_eol_type (src, src_bytes, &skip);
3792       break;
3793     }
3794
3795   if (coding->heading_ascii > skip)
3796     coding->heading_ascii = skip;
3797   else
3798     skip = coding->heading_ascii;
3799
3800   if (eol_type == CODING_EOL_UNDECIDED)
3801     return;
3802   if (eol_type == CODING_EOL_INCONSISTENT)
3803     {
3804 #if 0
3805       /* This code is suppressed until we find a better way to
3806          distinguish raw text file and binary file.  */
3807
3808       /* If we have already detected that the coding is raw-text, the
3809          coding should actually be no-conversion.  */
3810       if (coding->type == coding_type_raw_text)
3811         {
3812           setup_coding_system (Qno_conversion, coding);
3813           return;
3814         }
3815       /* Else, let's decode only text code anyway.  */
3816 #endif /* 0 */
3817       eol_type = CODING_EOL_LF;
3818     }
3819
3820   val = Fget (coding->symbol, Qeol_type);
3821   if (VECTORP (val) && XVECTOR (val)->size == 3)
3822     {
3823       int src_multibyte = coding->src_multibyte;
3824       int dst_multibyte = coding->dst_multibyte;
3825
3826       setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
3827       coding->src_multibyte = src_multibyte;
3828       coding->dst_multibyte = dst_multibyte;
3829       coding->heading_ascii = skip;
3830     }
3831 }
3832
3833 #define CONVERSION_BUFFER_EXTRA_ROOM 256
3834
3835 #define DECODING_BUFFER_MAG(coding)                     \
3836   (coding->type == coding_type_iso2022                  \
3837    ? 3                                                  \
3838    : (coding->type == coding_type_ccl                   \
3839       ? coding->spec.ccl.decoder.buf_magnification      \
3840       : 2))
3841
3842 /* Return maximum size (bytes) of a buffer enough for decoding
3843    SRC_BYTES of text encoded in CODING.  */
3844
3845 int
3846 decoding_buffer_size (coding, src_bytes)
3847      struct coding_system *coding;
3848      int src_bytes;
3849 {
3850   return (src_bytes * DECODING_BUFFER_MAG (coding)
3851           + CONVERSION_BUFFER_EXTRA_ROOM);
3852 }
3853
3854 /* Return maximum size (bytes) of a buffer enough for encoding
3855    SRC_BYTES of text to CODING.  */
3856
3857 int
3858 encoding_buffer_size (coding, src_bytes)
3859      struct coding_system *coding;
3860      int src_bytes;
3861 {
3862   int magnification;
3863
3864   if (coding->type == coding_type_ccl)
3865     magnification = coding->spec.ccl.encoder.buf_magnification;
3866   else if (CODING_REQUIRE_ENCODING (coding))
3867     magnification = 3;
3868   else
3869     magnification = 1;
3870
3871   return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
3872 }
3873
3874 /* Working buffer for code conversion.  */
3875 struct conversion_buffer
3876 {
3877   int size;                     /* size of data.  */
3878   int on_stack;                 /* 1 if allocated by alloca.  */
3879   unsigned char *data;
3880 };
3881
3882 /* Don't use alloca for allocating memory space larger than this, lest
3883    we overflow their stack.  */
3884 #define MAX_ALLOCA 16*1024
3885
3886 /* Allocate LEN bytes of memory for BUF (struct conversion_buffer).  */
3887 #define allocate_conversion_buffer(buf, len)            \
3888   do {                                                  \
3889     if (len < MAX_ALLOCA)                               \
3890       {                                                 \
3891         buf.data = (unsigned char *) alloca (len);      \
3892         buf.on_stack = 1;                               \
3893       }                                                 \
3894     else                                                \
3895       {                                                 \
3896         buf.data = (unsigned char *) xmalloc (len);     \
3897         buf.on_stack = 0;                               \
3898       }                                                 \
3899     buf.size = len;                                     \
3900   } while (0)
3901
3902 /* Double the allocated memory for *BUF.  */
3903 static void
3904 extend_conversion_buffer (buf)
3905      struct conversion_buffer *buf;
3906 {
3907   if (buf->on_stack)
3908     {
3909       unsigned char *save = buf->data;
3910       buf->data = (unsigned char *) xmalloc (buf->size * 2);
3911       bcopy (save, buf->data, buf->size);
3912       buf->on_stack = 0;
3913     }
3914   else
3915     {
3916       buf->data = (unsigned char *) xrealloc (buf->data, buf->size * 2);
3917     }
3918   buf->size *= 2;
3919 }
3920
3921 /* Free the allocated memory for BUF if it is not on stack.  */
3922 static void
3923 free_conversion_buffer (buf)
3924      struct conversion_buffer *buf;
3925 {
3926   if (!buf->on_stack)
3927     xfree (buf->data);
3928 }
3929
3930 int
3931 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
3932      struct coding_system *coding;
3933      unsigned char *source, *destination;
3934      int src_bytes, dst_bytes, encodep;
3935 {
3936   struct ccl_program *ccl
3937     = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
3938   int result;
3939
3940   ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
3941   if (encodep)
3942     ccl->eol_type = coding->eol_type;
3943   ccl->multibyte = coding->src_multibyte;
3944   coding->produced = ccl_driver (ccl, source, destination,
3945                                  src_bytes, dst_bytes, &(coding->consumed));
3946   if (encodep)
3947     coding->produced_char = coding->produced;
3948   else
3949     {
3950       int bytes
3951         = dst_bytes ? dst_bytes : source + coding->consumed - destination;
3952       coding->produced = str_as_multibyte (destination, bytes,
3953                                            coding->produced,
3954                                            &(coding->produced_char));
3955     }
3956
3957   switch (ccl->status)
3958     {
3959     case CCL_STAT_SUSPEND_BY_SRC:
3960       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
3961       break;
3962     case CCL_STAT_SUSPEND_BY_DST:
3963       coding->result = CODING_FINISH_INSUFFICIENT_DST;
3964       break;
3965     case CCL_STAT_QUIT:
3966     case CCL_STAT_INVALID_CMD:
3967       coding->result = CODING_FINISH_INTERRUPT;
3968       break;
3969     default:
3970       coding->result = CODING_FINISH_NORMAL;
3971       break;
3972     }
3973   return coding->result;
3974 }
3975
3976 /* Decode EOL format of the text at PTR of BYTES length destructively
3977    according to CODING->eol_type.  This is called after the CCL
3978    program produced a decoded text at PTR.  If we do CRLF->LF
3979    conversion, update CODING->produced and CODING->produced_char.  */
3980
3981 static void
3982 decode_eol_post_ccl (coding, ptr, bytes)
3983      struct coding_system *coding;
3984      unsigned char *ptr;
3985      int bytes;
3986 {
3987   Lisp_Object val, saved_coding_symbol;
3988   unsigned char *pend = ptr + bytes;
3989   int dummy;
3990
3991   /* Remember the current coding system symbol.  We set it back when
3992      an inconsistent EOL is found so that `last-coding-system-used' is
3993      set to the coding system that doesn't specify EOL conversion.  */
3994   saved_coding_symbol = coding->symbol;
3995
3996   coding->spec.ccl.cr_carryover = 0;
3997   if (coding->eol_type == CODING_EOL_UNDECIDED)
3998     {
3999       /* Here, to avoid the call of setup_coding_system, we directly
4000          call detect_eol_type.  */
4001       coding->eol_type = detect_eol_type (ptr, bytes, &dummy);
4002       if (coding->eol_type == CODING_EOL_INCONSISTENT)
4003         coding->eol_type = CODING_EOL_LF;
4004       if (coding->eol_type != CODING_EOL_UNDECIDED)
4005         {
4006           val = Fget (coding->symbol, Qeol_type);
4007           if (VECTORP (val) && XVECTOR (val)->size == 3)
4008             coding->symbol = XVECTOR (val)->contents[coding->eol_type];
4009         }
4010       coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4011     }
4012
4013   if (coding->eol_type == CODING_EOL_LF
4014       || coding->eol_type == CODING_EOL_UNDECIDED)
4015     {
4016       /* We have nothing to do.  */
4017       ptr = pend;
4018     }
4019   else if (coding->eol_type == CODING_EOL_CRLF)
4020     {
4021       unsigned char *pstart = ptr, *p = ptr;
4022
4023       if (! (coding->mode & CODING_MODE_LAST_BLOCK)
4024           && *(pend - 1) == '\r')
4025         {
4026           /* If the last character is CR, we can't handle it here
4027              because LF will be in the not-yet-decoded source text.
4028              Recorded that the CR is not yet processed.  */
4029           coding->spec.ccl.cr_carryover = 1;
4030           coding->produced--;
4031           coding->produced_char--;
4032           pend--;
4033         }
4034       while (ptr < pend)
4035         {
4036           if (*ptr == '\r')
4037             {
4038               if (ptr + 1 < pend && *(ptr + 1) == '\n')
4039                 {
4040                   *p++ = '\n';
4041                   ptr += 2;
4042                 }
4043               else
4044                 {
4045                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4046                     goto undo_eol_conversion;
4047                   *p++ = *ptr++;
4048                 }
4049             }
4050           else if (*ptr == '\n'
4051                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4052             goto undo_eol_conversion;
4053           else
4054             *p++ = *ptr++;
4055           continue;
4056
4057         undo_eol_conversion:
4058           /* We have faced with inconsistent EOL format at PTR.
4059              Convert all LFs before PTR back to CRLFs.  */
4060           for (p--, ptr--; p >= pstart; p--)
4061             {
4062               if (*p == '\n')
4063                 *ptr-- = '\n', *ptr-- = '\r';
4064               else
4065                 *ptr-- = *p;
4066             }
4067           /*  If carryover is recorded, cancel it because we don't
4068               convert CRLF anymore.  */
4069           if (coding->spec.ccl.cr_carryover)
4070             {
4071               coding->spec.ccl.cr_carryover = 0;
4072               coding->produced++;
4073               coding->produced_char++;
4074               pend++;
4075             }
4076           p = ptr = pend;
4077           coding->eol_type = CODING_EOL_LF;
4078           coding->symbol = saved_coding_symbol;
4079         }
4080       if (p < pend)
4081         {
4082           /* As each two-byte sequence CRLF was converted to LF, (PEND
4083              - P) is the number of deleted characters.  */
4084           coding->produced -= pend - p;
4085           coding->produced_char -= pend - p;
4086         }
4087     }
4088   else                  /* i.e. coding->eol_type == CODING_EOL_CR */
4089     {
4090       unsigned char *p = ptr;
4091
4092       for (; ptr < pend; ptr++)
4093         {
4094           if (*ptr == '\r')
4095             *ptr = '\n';
4096           else if (*ptr == '\n'
4097                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4098             {
4099               for (; p < ptr; p++)
4100                 {
4101                   if (*p == '\n')
4102                     *p = '\r';
4103                 }
4104               ptr = pend;
4105               coding->eol_type = CODING_EOL_LF;
4106               coding->symbol = saved_coding_symbol;
4107             }
4108         }
4109     }
4110 }
4111
4112 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions".  Before
4113    decoding, it may detect coding system and format of end-of-line if
4114    those are not yet decided.  The source should be unibyte, the
4115    result is multibyte if CODING->dst_multibyte is nonzero, else
4116    unibyte.  */
4117
4118 int
4119 decode_coding (coding, source, destination, src_bytes, dst_bytes)
4120      struct coding_system *coding;
4121      unsigned char *source, *destination;
4122      int src_bytes, dst_bytes;
4123 {
4124   if (coding->type == coding_type_undecided)
4125     detect_coding (coding, source, src_bytes);
4126
4127   if (coding->eol_type == CODING_EOL_UNDECIDED
4128       && coding->type != coding_type_ccl)
4129     detect_eol (coding, source, src_bytes);
4130
4131   coding->produced = coding->produced_char = 0;
4132   coding->consumed = coding->consumed_char = 0;
4133   coding->errors = 0;
4134   coding->result = CODING_FINISH_NORMAL;
4135
4136   switch (coding->type)
4137     {
4138     case coding_type_sjis:
4139       decode_coding_sjis_big5 (coding, source, destination,
4140                                src_bytes, dst_bytes, 1);
4141       break;
4142
4143     case coding_type_iso2022:
4144       decode_coding_iso2022 (coding, source, destination,
4145                              src_bytes, dst_bytes);
4146       break;
4147
4148     case coding_type_big5:
4149       decode_coding_sjis_big5 (coding, source, destination,
4150                                src_bytes, dst_bytes, 0);
4151       break;
4152
4153     case coding_type_emacs_mule:
4154       decode_coding_emacs_mule (coding, source, destination,
4155                                 src_bytes, dst_bytes);
4156       break;
4157
4158     case coding_type_ccl:
4159       if (coding->spec.ccl.cr_carryover)
4160         {
4161           /* Set the CR which is not processed by the previous call of
4162              decode_eol_post_ccl in DESTINATION.  */
4163           *destination = '\r';
4164           coding->produced++;
4165           coding->produced_char++;
4166           dst_bytes--;
4167         }
4168       ccl_coding_driver (coding, source,
4169                          destination + coding->spec.ccl.cr_carryover,
4170                          src_bytes, dst_bytes, 0);
4171       if (coding->eol_type != CODING_EOL_LF)
4172         decode_eol_post_ccl (coding, destination, coding->produced);
4173       break;
4174
4175     default:
4176       decode_eol (coding, source, destination, src_bytes, dst_bytes);
4177     }
4178
4179   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
4180       && coding->consumed == src_bytes)
4181     coding->result = CODING_FINISH_NORMAL;
4182
4183   if (coding->mode & CODING_MODE_LAST_BLOCK
4184       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
4185     {
4186       unsigned char *src = source + coding->consumed;
4187       unsigned char *dst = destination + coding->produced;
4188
4189       src_bytes -= coding->consumed;
4190       coding->errors++;
4191       if (COMPOSING_P (coding))
4192         DECODE_COMPOSITION_END ('1');
4193       while (src_bytes--)
4194         {
4195           int c = *src++;
4196           dst += CHAR_STRING (c, dst);
4197           coding->produced_char++;
4198         }
4199       coding->consumed = coding->consumed_char = src - source;
4200       coding->produced = dst - destination;
4201       coding->result = CODING_FINISH_NORMAL;
4202     }
4203
4204   if (!coding->dst_multibyte)
4205     {
4206       coding->produced = str_as_unibyte (destination, coding->produced);
4207       coding->produced_char = coding->produced;
4208     }
4209
4210   return coding->result;
4211 }
4212
4213 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  The
4214    multibyteness of the source is CODING->src_multibyte, the
4215    multibyteness of the result is always unibyte.  */
4216
4217 int
4218 encode_coding (coding, source, destination, src_bytes, dst_bytes)
4219      struct coding_system *coding;
4220      unsigned char *source, *destination;
4221      int src_bytes, dst_bytes;
4222 {
4223   coding->produced = coding->produced_char = 0;
4224   coding->consumed = coding->consumed_char = 0;
4225   coding->errors = 0;
4226   coding->result = CODING_FINISH_NORMAL;
4227
4228   switch (coding->type)
4229     {
4230     case coding_type_sjis:
4231       encode_coding_sjis_big5 (coding, source, destination,
4232                                src_bytes, dst_bytes, 1);
4233       break;
4234
4235     case coding_type_iso2022:
4236       encode_coding_iso2022 (coding, source, destination,
4237                              src_bytes, dst_bytes);
4238       break;
4239
4240     case coding_type_big5:
4241       encode_coding_sjis_big5 (coding, source, destination,
4242                                src_bytes, dst_bytes, 0);
4243       break;
4244
4245     case coding_type_emacs_mule:
4246       encode_coding_emacs_mule (coding, source, destination,
4247                                 src_bytes, dst_bytes);
4248       break;
4249
4250     case coding_type_ccl:
4251       ccl_coding_driver (coding, source, destination,
4252                          src_bytes, dst_bytes, 1);
4253       break;
4254
4255     default:
4256       encode_eol (coding, source, destination, src_bytes, dst_bytes);
4257     }
4258
4259   if (coding->mode & CODING_MODE_LAST_BLOCK
4260       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
4261     {
4262       unsigned char *src = source + coding->consumed;
4263       unsigned char *src_end = src + src_bytes;
4264       unsigned char *dst = destination + coding->produced;
4265
4266       if (coding->type == coding_type_iso2022)
4267         ENCODE_RESET_PLANE_AND_REGISTER;
4268       if (COMPOSING_P (coding))
4269         *dst++ = ISO_CODE_ESC, *dst++ = '1';
4270       if (coding->consumed < src_bytes)
4271         {
4272           int len = src_bytes - coding->consumed;
4273
4274           BCOPY_SHORT (source + coding->consumed, dst, len);
4275           if (coding->src_multibyte)
4276             len = str_as_unibyte (dst, len);
4277           dst += len;
4278           coding->consumed = src_bytes;
4279         }
4280       coding->produced = coding->produced_char = dst - destination;
4281       coding->result = CODING_FINISH_NORMAL;
4282     }
4283
4284   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
4285       && coding->consumed == src_bytes)
4286     coding->result = CODING_FINISH_NORMAL;
4287
4288   return coding->result;
4289 }
4290
4291 /* Scan text in the region between *BEG and *END (byte positions),
4292    skip characters which we don't have to decode by coding system
4293    CODING at the head and tail, then set *BEG and *END to the region
4294    of the text we actually have to convert.  The caller should move
4295    the gap out of the region in advance if the region is from a
4296    buffer.
4297
4298    If STR is not NULL, *BEG and *END are indices into STR.  */
4299
4300 static void
4301 shrink_decoding_region (beg, end, coding, str)
4302      int *beg, *end;
4303      struct coding_system *coding;
4304      unsigned char *str;
4305 {
4306   unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
4307   int eol_conversion;
4308   Lisp_Object translation_table;
4309
4310   if (coding->type == coding_type_ccl
4311       || coding->type == coding_type_undecided
4312       || coding->eol_type != CODING_EOL_LF
4313       || !NILP (coding->post_read_conversion)
4314       || coding->composing != COMPOSITION_DISABLED)
4315     {
4316       /* We can't skip any data.  */
4317       return;
4318     }
4319   if (coding->type == coding_type_no_conversion
4320       || coding->type == coding_type_raw_text
4321       || coding->type == coding_type_emacs_mule)
4322     {
4323       /* We need no conversion, but don't have to skip any data here.
4324          Decoding routine handles them effectively anyway.  */
4325       return;
4326     }
4327
4328   translation_table = coding->translation_table_for_decode;
4329   if (NILP (translation_table) && !NILP (Venable_character_translation))
4330     translation_table = Vstandard_translation_table_for_decode;
4331   if (CHAR_TABLE_P (translation_table))
4332     {
4333       int i;
4334       for (i = 0; i < 128; i++)
4335         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
4336           break;
4337       if (i < 128)
4338         /* Some ASCII character should be translated.  We give up
4339            shrinking.  */
4340         return;
4341     }
4342
4343   if (coding->heading_ascii >= 0)
4344     /* Detection routine has already found how much we can skip at the
4345        head.  */
4346     *beg += coding->heading_ascii;
4347
4348   if (str)
4349     {
4350       begp_orig = begp = str + *beg;
4351       endp_orig = endp = str + *end;
4352     }
4353   else
4354     {
4355       begp_orig = begp = BYTE_POS_ADDR (*beg);
4356       endp_orig = endp = begp + *end - *beg;
4357     }
4358
4359   eol_conversion = (coding->eol_type == CODING_EOL_CR
4360                     || coding->eol_type == CODING_EOL_CRLF);
4361
4362   switch (coding->type)
4363     {
4364     case coding_type_sjis:
4365     case coding_type_big5:
4366       /* We can skip all ASCII characters at the head.  */
4367       if (coding->heading_ascii < 0)
4368         {
4369           if (eol_conversion)
4370             while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
4371           else
4372             while (begp < endp && *begp < 0x80) begp++;
4373         }
4374       /* We can skip all ASCII characters at the tail except for the
4375          second byte of SJIS or BIG5 code.  */
4376       if (eol_conversion)
4377         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
4378       else
4379         while (begp < endp && endp[-1] < 0x80) endp--;
4380       /* Do not consider LF as ascii if preceded by CR, since that
4381          confuses eol decoding. */
4382       if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
4383         endp++;
4384       if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
4385         endp++;
4386       break;
4387
4388     case coding_type_iso2022:
4389       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
4390         /* We can't skip any data.  */
4391         break;
4392       if (coding->heading_ascii < 0)
4393         {
4394           /* We can skip all ASCII characters at the head except for a
4395              few control codes.  */
4396           while (begp < endp && (c = *begp) < 0x80
4397                  && c != ISO_CODE_CR && c != ISO_CODE_SO
4398                  && c != ISO_CODE_SI && c != ISO_CODE_ESC
4399                  && (!eol_conversion || c != ISO_CODE_LF))
4400             begp++;
4401         }
4402       switch (coding->category_idx)
4403         {
4404         case CODING_CATEGORY_IDX_ISO_8_1:
4405         case CODING_CATEGORY_IDX_ISO_8_2:
4406           /* We can skip all ASCII characters at the tail.  */
4407           if (eol_conversion)
4408             while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
4409           else
4410             while (begp < endp && endp[-1] < 0x80) endp--;
4411           /* Do not consider LF as ascii if preceded by CR, since that
4412              confuses eol decoding. */
4413           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
4414             endp++;
4415           break;
4416
4417         case CODING_CATEGORY_IDX_ISO_7:
4418         case CODING_CATEGORY_IDX_ISO_7_TIGHT:
4419           {
4420             /* We can skip all charactes at the tail except for 8-bit
4421                codes and ESC and the following 2-byte at the tail.  */
4422             unsigned char *eight_bit = NULL;
4423
4424             if (eol_conversion)
4425               while (begp < endp
4426                      && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
4427                 {
4428                   if (!eight_bit && c & 0x80) eight_bit = endp;
4429                   endp--;
4430                 }
4431             else
4432               while (begp < endp
4433                      && (c = endp[-1]) != ISO_CODE_ESC)
4434                 {
4435                   if (!eight_bit && c & 0x80) eight_bit = endp;
4436                   endp--;
4437                 }
4438             /* Do not consider LF as ascii if preceded by CR, since that
4439                confuses eol decoding. */
4440             if (begp < endp && endp < endp_orig
4441                 && endp[-1] == '\r' && endp[0] == '\n')
4442               endp++;
4443             if (begp < endp && endp[-1] == ISO_CODE_ESC)
4444               {
4445                 if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
4446                   /* This is an ASCII designation sequence.  We can
4447                      surely skip the tail.  But, if we have
4448                      encountered an 8-bit code, skip only the codes
4449                      after that.  */
4450                   endp = eight_bit ? eight_bit : endp + 2;
4451                 else
4452                   /* Hmmm, we can't skip the tail.  */
4453                   endp = endp_orig;
4454               }
4455             else if (eight_bit)
4456               endp = eight_bit;
4457           }
4458         }
4459       break;
4460
4461     default:
4462       abort ();
4463     }
4464   *beg += begp - begp_orig;
4465   *end += endp - endp_orig;
4466   return;
4467 }
4468
4469 /* Like shrink_decoding_region but for encoding.  */
4470
4471 static void
4472 shrink_encoding_region (beg, end, coding, str)
4473      int *beg, *end;
4474      struct coding_system *coding;
4475      unsigned char *str;
4476 {
4477   unsigned char *begp_orig, *begp, *endp_orig, *endp;
4478   int eol_conversion;
4479   Lisp_Object translation_table;
4480
4481   if (coding->type == coding_type_ccl
4482       || coding->eol_type == CODING_EOL_CRLF
4483       || coding->eol_type == CODING_EOL_CR
4484       || coding->cmp_data && coding->cmp_data->used > 0)
4485     {
4486       /* We can't skip any data.  */
4487       return;
4488     }
4489   if (coding->type == coding_type_no_conversion
4490       || coding->type == coding_type_raw_text
4491       || coding->type == coding_type_emacs_mule
4492       || coding->type == coding_type_undecided)
4493     {
4494       /* We need no conversion, but don't have to skip any data here.
4495          Encoding routine handles them effectively anyway.  */
4496       return;
4497     }
4498
4499   translation_table = coding->translation_table_for_encode;
4500   if (NILP (translation_table) && !NILP (Venable_character_translation))
4501     translation_table = Vstandard_translation_table_for_encode;
4502   if (CHAR_TABLE_P (translation_table))
4503     {
4504       int i;
4505       for (i = 0; i < 128; i++)
4506         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
4507           break;
4508       if (i < 128)
4509         /* Some ASCII character should be tranlsated.  We give up
4510            shrinking.  */
4511         return;
4512     }
4513
4514   if (str)
4515     {
4516       begp_orig = begp = str + *beg;
4517       endp_orig = endp = str + *end;
4518     }
4519   else
4520     {
4521       begp_orig = begp = BYTE_POS_ADDR (*beg);
4522       endp_orig = endp = begp + *end - *beg;
4523     }
4524
4525   eol_conversion = (coding->eol_type == CODING_EOL_CR
4526                     || coding->eol_type == CODING_EOL_CRLF);
4527
4528   /* Here, we don't have to check coding->pre_write_conversion because
4529      the caller is expected to have handled it already.  */
4530   switch (coding->type)
4531     {
4532     case coding_type_iso2022:
4533       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
4534         /* We can't skip any data.  */
4535         break;
4536       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
4537         {
4538           unsigned char *bol = begp;
4539           while (begp < endp && *begp < 0x80)
4540             {
4541               begp++;
4542               if (begp[-1] == '\n')
4543                 bol = begp;
4544             }
4545           begp = bol;
4546           goto label_skip_tail;
4547         }
4548       /* fall down ... */
4549
4550     case coding_type_sjis:
4551     case coding_type_big5:
4552       /* We can skip all ASCII characters at the head and tail.  */
4553       if (eol_conversion)
4554         while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
4555       else
4556         while (begp < endp && *begp < 0x80) begp++;
4557     label_skip_tail:
4558       if (eol_conversion)
4559         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
4560       else
4561         while (begp < endp && *(endp - 1) < 0x80) endp--;
4562       break;
4563
4564     default:
4565       abort ();
4566     }
4567
4568   *beg += begp - begp_orig;
4569   *end += endp - endp_orig;
4570   return;
4571 }
4572
4573 /* As shrinking conversion region requires some overhead, we don't try
4574    shrinking if the length of conversion region is less than this
4575    value.  */
4576 static int shrink_conversion_region_threshhold = 1024;
4577
4578 #define SHRINK_CONVERSION_REGION(beg, end, coding, str, encodep)        \
4579   do {                                                                  \
4580     if (*(end) - *(beg) > shrink_conversion_region_threshhold)          \
4581       {                                                                 \
4582         if (encodep) shrink_encoding_region (beg, end, coding, str);    \
4583         else shrink_decoding_region (beg, end, coding, str);            \
4584       }                                                                 \
4585   } while (0)
4586
4587 static Lisp_Object
4588 code_convert_region_unwind (dummy)
4589      Lisp_Object dummy;
4590 {
4591   inhibit_pre_post_conversion = 0;
4592   return Qnil;
4593 }
4594
4595 /* Store information about all compositions in the range FROM and TO
4596    of OBJ in memory blocks pointed by CODING->cmp_data.  OBJ is a
4597    buffer or a string, defaults to the current buffer.  */
4598
4599 void
4600 coding_save_composition (coding, from, to, obj)
4601      struct coding_system *coding;
4602      int from, to;
4603      Lisp_Object obj;
4604 {
4605   Lisp_Object prop;
4606   int start, end;
4607
4608   if (coding->composing == COMPOSITION_DISABLED)
4609     return;
4610   if (!coding->cmp_data)
4611     coding_allocate_composition_data (coding, from);
4612   if (!find_composition (from, to, &start, &end, &prop, obj)
4613       || end > to)
4614     return;
4615   if (start < from
4616       && (!find_composition (end, to, &start, &end, &prop, obj)
4617           || end > to))
4618     return;
4619   coding->composing = COMPOSITION_NO;
4620   do
4621     {
4622       if (COMPOSITION_VALID_P (start, end, prop))
4623         {
4624           enum composition_method method = COMPOSITION_METHOD (prop);
4625           if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
4626               >= COMPOSITION_DATA_SIZE)
4627             coding_allocate_composition_data (coding, from);
4628           /* For relative composition, we remember start and end
4629              positions, for the other compositions, we also remember
4630              components.  */
4631           CODING_ADD_COMPOSITION_START (coding, start - from, method);
4632           if (method != COMPOSITION_RELATIVE)
4633             {
4634               /* We must store a*/
4635               Lisp_Object val, ch;
4636
4637               val = COMPOSITION_COMPONENTS (prop);
4638               if (CONSP (val))
4639                 while (CONSP (val))
4640                   {
4641                     ch = XCAR (val), val = XCDR (val);
4642                     CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
4643                   }
4644               else if (VECTORP (val) || STRINGP (val))
4645                 {
4646                   int len = (VECTORP (val)
4647                              ? XVECTOR (val)->size : XSTRING (val)->size);
4648                   int i;
4649                   for (i = 0; i < len; i++)
4650                     {
4651                       ch = (STRINGP (val)
4652                             ? Faref (val, make_number (i))
4653                             : XVECTOR (val)->contents[i]);
4654                       CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
4655                     }
4656                 }
4657               else              /* INTEGERP (val) */
4658                 CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (val));
4659             }
4660           CODING_ADD_COMPOSITION_END (coding, end - from);
4661         }
4662       start = end;
4663     }
4664   while (start < to
4665          && find_composition (start, to, &start, &end, &prop, obj)
4666          && end <= to);
4667
4668   /* Make coding->cmp_data point to the first memory block.  */
4669   while (coding->cmp_data->prev)
4670     coding->cmp_data = coding->cmp_data->prev;
4671   coding->cmp_data_start = 0;
4672 }
4673
4674 /* Reflect the saved information about compositions to OBJ.
4675    CODING->cmp_data points to a memory block for the informaiton.  OBJ
4676    is a buffer or a string, defaults to the current buffer.  */
4677
4678 void
4679 coding_restore_composition (coding, obj)
4680      struct coding_system *coding;
4681      Lisp_Object obj;
4682 {
4683   struct composition_data *cmp_data = coding->cmp_data;
4684
4685   if (!cmp_data)
4686     return;
4687
4688   while (cmp_data->prev)
4689     cmp_data = cmp_data->prev;
4690
4691   while (cmp_data)
4692     {
4693       int i;
4694
4695       for (i = 0; i < cmp_data->used && cmp_data->data[i] > 0;
4696            i += cmp_data->data[i])
4697         {
4698           int *data = cmp_data->data + i;
4699           enum composition_method method = (enum composition_method) data[3];
4700           Lisp_Object components;
4701
4702           if (method == COMPOSITION_RELATIVE)
4703             components = Qnil;
4704           else
4705             {
4706               int len = data[0] - 4, j;
4707               Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
4708
4709               for (j = 0; j < len; j++)
4710                 args[j] = make_number (data[4 + j]);
4711               components = (method == COMPOSITION_WITH_ALTCHARS
4712                             ? Fstring (len, args) : Fvector (len, args));
4713             }
4714           compose_text (data[1], data[2], components, Qnil, obj);
4715         }
4716       cmp_data = cmp_data->next;
4717     }
4718 }
4719
4720 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
4721    text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
4722    coding system CODING, and return the status code of code conversion
4723    (currently, this value has no meaning).
4724
4725    How many characters (and bytes) are converted to how many
4726    characters (and bytes) are recorded in members of the structure
4727    CODING.
4728
4729    If REPLACE is nonzero, we do various things as if the original text
4730    is deleted and a new text is inserted.  See the comments in
4731    replace_range (insdel.c) to know what we are doing.
4732
4733    If REPLACE is zero, it is assumed that the source text is unibyte.
4734    Otherwize, it is assumed that the source text is multibyte.  */
4735
4736 int
4737 code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
4738      int from, from_byte, to, to_byte, encodep, replace;
4739      struct coding_system *coding;
4740 {
4741   int len = to - from, len_byte = to_byte - from_byte;
4742   int require, inserted, inserted_byte;
4743   int head_skip, tail_skip, total_skip = 0;
4744   Lisp_Object saved_coding_symbol;
4745   int first = 1;
4746   unsigned char *src, *dst;
4747   Lisp_Object deletion;
4748   int orig_point = PT, orig_len = len;
4749   int prev_Z;
4750   int multibyte_p = !NILP (current_buffer->enable_multibyte_characters);
4751
4752   coding->src_multibyte = replace && multibyte_p;
4753   coding->dst_multibyte = multibyte_p;
4754
4755   deletion = Qnil;
4756   saved_coding_symbol = Qnil;
4757
4758   if (from < PT && PT < to)
4759     {
4760       TEMP_SET_PT_BOTH (from, from_byte);
4761       orig_point = from;
4762     }
4763
4764   if (replace)
4765     {
4766       int saved_from = from;
4767       int saved_inhibit_modification_hooks;
4768
4769       prepare_to_modify_buffer (from, to, &from);
4770       if (saved_from != from)
4771         {
4772           to = from + len;
4773           from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
4774           len_byte = to_byte - from_byte;
4775         }
4776
4777       /* The code conversion routine can not preserve text properties
4778          for now.  So, we must remove all text properties in the
4779          region.  Here, we must suppress all modification hooks.  */
4780       saved_inhibit_modification_hooks = inhibit_modification_hooks;
4781       inhibit_modification_hooks = 1;
4782       Fset_text_properties (make_number (from), make_number (to), Qnil, Qnil);
4783       inhibit_modification_hooks = saved_inhibit_modification_hooks;
4784     }
4785
4786   if (! encodep && CODING_REQUIRE_DETECTION (coding))
4787     {
4788       /* We must detect encoding of text and eol format.  */
4789
4790       if (from < GPT && to > GPT)
4791         move_gap_both (from, from_byte);
4792       if (coding->type == coding_type_undecided)
4793         {
4794           detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
4795           if (coding->type == coding_type_undecided)
4796             /* It seems that the text contains only ASCII, but we
4797                should not left it undecided because the deeper
4798                decoding routine (decode_coding) tries to detect the
4799                encodings again in vain.  */
4800             coding->type = coding_type_emacs_mule;
4801         }
4802       if (coding->eol_type == CODING_EOL_UNDECIDED
4803           && coding->type != coding_type_ccl)
4804         {
4805           saved_coding_symbol = coding->symbol;
4806           detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
4807           if (coding->eol_type == CODING_EOL_UNDECIDED)
4808             coding->eol_type = CODING_EOL_LF;
4809           /* We had better recover the original eol format if we
4810              encounter an inconsitent eol format while decoding.  */
4811           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4812         }
4813     }
4814
4815   /* Now we convert the text.  */
4816
4817   /* For encoding, we must process pre-write-conversion in advance.  */
4818   if (! inhibit_pre_post_conversion
4819       && encodep
4820       && SYMBOLP (coding->pre_write_conversion)
4821       && ! NILP (Ffboundp (coding->pre_write_conversion)))
4822     {
4823       /* The function in pre-write-conversion may put a new text in a
4824          new buffer.  */
4825       struct buffer *prev = current_buffer;
4826       Lisp_Object new;
4827       int count = specpdl_ptr - specpdl;
4828
4829       record_unwind_protect (code_convert_region_unwind, Qnil);
4830       /* We should not call any more pre-write/post-read-conversion
4831          functions while this pre-write-conversion is running.  */
4832       inhibit_pre_post_conversion = 1;
4833       call2 (coding->pre_write_conversion,
4834              make_number (from), make_number (to));
4835       inhibit_pre_post_conversion = 0;
4836       /* Discard the unwind protect.  */
4837       specpdl_ptr--;
4838
4839       if (current_buffer != prev)
4840         {
4841           len = ZV - BEGV;
4842           new = Fcurrent_buffer ();
4843           set_buffer_internal_1 (prev);
4844           del_range_2 (from, from_byte, to, to_byte, 0);
4845           TEMP_SET_PT_BOTH (from, from_byte);
4846           insert_from_buffer (XBUFFER (new), 1, len, 0);
4847           Fkill_buffer (new);
4848           if (orig_point >= to)
4849             orig_point += len - orig_len;
4850           else if (orig_point > from)
4851             orig_point = from;
4852           orig_len = len;
4853           to = from + len;
4854           from_byte = CHAR_TO_BYTE (from);
4855           to_byte = CHAR_TO_BYTE (to);
4856           len_byte = to_byte - from_byte;
4857           TEMP_SET_PT_BOTH (from, from_byte);
4858         }
4859     }
4860
4861   if (replace)
4862     deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
4863
4864   if (coding->composing != COMPOSITION_DISABLED)
4865     {
4866       if (encodep)
4867         coding_save_composition (coding, from, to, Fcurrent_buffer ());
4868       else
4869         coding_allocate_composition_data (coding, from);
4870     }
4871
4872   /* Try to skip the heading and tailing ASCIIs.  */
4873   if (coding->type != coding_type_ccl)
4874     {
4875       int from_byte_orig = from_byte, to_byte_orig = to_byte;
4876
4877       if (from < GPT && GPT < to)
4878         move_gap_both (from, from_byte);
4879       SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
4880       if (from_byte == to_byte
4881           && (encodep || NILP (coding->post_read_conversion))
4882           && ! CODING_REQUIRE_FLUSHING (coding))
4883         {
4884           coding->produced = len_byte;
4885           coding->produced_char = len;
4886           if (!replace)
4887             /* We must record and adjust for this new text now.  */
4888             adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
4889           return 0;
4890         }
4891
4892       head_skip = from_byte - from_byte_orig;
4893       tail_skip = to_byte_orig - to_byte;
4894       total_skip = head_skip + tail_skip;
4895       from += head_skip;
4896       to -= tail_skip;
4897       len -= total_skip; len_byte -= total_skip;
4898     }
4899
4900   /* For converion, we must put the gap before the text in addition to
4901      making the gap larger for efficient decoding.  The required gap
4902      size starts from 2000 which is the magic number used in make_gap.
4903      But, after one batch of conversion, it will be incremented if we
4904      find that it is not enough .  */
4905   require = 2000;
4906
4907   if (GAP_SIZE  < require)
4908     make_gap (require - GAP_SIZE);
4909   move_gap_both (from, from_byte);
4910
4911   inserted = inserted_byte = 0;
4912
4913   GAP_SIZE += len_byte;
4914   ZV -= len;
4915   Z -= len;
4916   ZV_BYTE -= len_byte;
4917   Z_BYTE -= len_byte;
4918
4919   if (GPT - BEG < BEG_UNCHANGED)
4920     BEG_UNCHANGED = GPT - BEG;
4921   if (Z - GPT < END_UNCHANGED)
4922     END_UNCHANGED = Z - GPT;
4923
4924   if (!encodep && coding->src_multibyte)
4925     {
4926       /* Decoding routines expects that the source text is unibyte.
4927          We must convert 8-bit characters of multibyte form to
4928          unibyte.  */
4929       int len_byte_orig = len_byte;
4930       len_byte = str_as_unibyte (GAP_END_ADDR - len_byte, len_byte);
4931       if (len_byte < len_byte_orig)
4932         safe_bcopy (GAP_END_ADDR - len_byte_orig, GAP_END_ADDR - len_byte,
4933                     len_byte);
4934       coding->src_multibyte = 0;
4935     }
4936
4937   for (;;)
4938     {
4939       int result;
4940
4941       /* The buffer memory is now:
4942          +--------+converted-text+---------+-------original-text-------+---+
4943          |<-from->|<--inserted-->|---------|<--------len_byte--------->|---|
4944                   |<---------------------- GAP ----------------------->|  */
4945       src = GAP_END_ADDR - len_byte;
4946       dst = GPT_ADDR + inserted_byte;
4947
4948       if (encodep)
4949         result = encode_coding (coding, src, dst, len_byte, 0);
4950       else
4951         result = decode_coding (coding, src, dst, len_byte, 0);
4952
4953       /* The buffer memory is now:
4954          +--------+-------converted-text----+--+------original-text----+---+
4955          |<-from->|<-inserted->|<-produced->|--|<-(len_byte-consumed)->|---|
4956                   |<---------------------- GAP ----------------------->|  */
4957
4958       inserted += coding->produced_char;
4959       inserted_byte += coding->produced;
4960       len_byte -= coding->consumed;
4961
4962       if (result == CODING_FINISH_INSUFFICIENT_CMP)
4963         {
4964           coding_allocate_composition_data (coding, from + inserted);
4965           continue;
4966         }
4967
4968       src += coding->consumed;
4969       dst += coding->produced;
4970
4971       if (result == CODING_FINISH_NORMAL)
4972         {
4973           src += len_byte;
4974           break;
4975         }
4976       if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
4977         {
4978           unsigned char *pend = dst, *p = pend - inserted_byte;
4979           Lisp_Object eol_type;
4980
4981           /* Encode LFs back to the original eol format (CR or CRLF).  */
4982           if (coding->eol_type == CODING_EOL_CR)
4983             {
4984               while (p < pend) if (*p++ == '\n') p[-1] = '\r';
4985             }
4986           else
4987             {
4988               int count = 0;
4989
4990               while (p < pend) if (*p++ == '\n') count++;
4991               if (src - dst < count)
4992                 {
4993                   /* We don't have sufficient room for encoding LFs
4994                      back to CRLF.  We must record converted and
4995                      not-yet-converted text back to the buffer
4996                      content, enlarge the gap, then record them out of
4997                      the buffer contents again.  */
4998                   int add = len_byte + inserted_byte;
4999
5000                   GAP_SIZE -= add;
5001                   ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5002                   GPT += inserted_byte; GPT_BYTE += inserted_byte;
5003                   make_gap (count - GAP_SIZE);
5004                   GAP_SIZE += add;
5005                   ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5006                   GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5007                   /* Don't forget to update SRC, DST, and PEND.  */
5008                   src = GAP_END_ADDR - len_byte;
5009                   dst = GPT_ADDR + inserted_byte;
5010                   pend = dst;
5011                 }
5012               inserted += count;
5013               inserted_byte += count;
5014               coding->produced += count;
5015               p = dst = pend + count;
5016               while (count)
5017                 {
5018                   *--p = *--pend;
5019                   if (*p == '\n') count--, *--p = '\r';
5020                 }
5021             }
5022
5023           /* Suppress eol-format conversion in the further conversion.  */
5024           coding->eol_type = CODING_EOL_LF;
5025
5026           /* Set the coding system symbol to that for Unix-like EOL.  */
5027           eol_type = Fget (saved_coding_symbol, Qeol_type);
5028           if (VECTORP (eol_type)
5029               && XVECTOR (eol_type)->size == 3
5030               && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
5031             coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
5032           else
5033             coding->symbol = saved_coding_symbol;
5034
5035           continue;
5036         }
5037       if (len_byte <= 0)
5038         {
5039           if (coding->type != coding_type_ccl
5040               || coding->mode & CODING_MODE_LAST_BLOCK)
5041             break;
5042           coding->mode |= CODING_MODE_LAST_BLOCK;
5043           continue;
5044         }
5045       if (result == CODING_FINISH_INSUFFICIENT_SRC)
5046         {
5047           /* The source text ends in invalid codes.  Let's just
5048              make them valid buffer contents, and finish conversion.  */
5049           inserted += len_byte;
5050           inserted_byte += len_byte;
5051           while (len_byte--)
5052             *dst++ = *src++;
5053           break;
5054         }
5055       if (result == CODING_FINISH_INTERRUPT)
5056         {
5057           /* The conversion procedure was interrupted by a user.  */
5058           break;
5059         }
5060       /* Now RESULT == CODING_FINISH_INSUFFICIENT_DST  */
5061       if (coding->consumed < 1)
5062         {
5063           /* It's quite strange to require more memory without
5064              consuming any bytes.  Perhaps CCL program bug.  */
5065           break;
5066         }
5067       if (first)
5068         {
5069           /* We have just done the first batch of conversion which was
5070              stoped because of insufficient gap.  Let's reconsider the
5071              required gap size (i.e. SRT - DST) now.
5072
5073              We have converted ORIG bytes (== coding->consumed) into
5074              NEW bytes (coding->produced).  To convert the remaining
5075              LEN bytes, we may need REQUIRE bytes of gap, where:
5076                 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
5077                 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
5078              Here, we are sure that NEW >= ORIG.  */
5079           float ratio = coding->produced - coding->consumed;
5080           ratio /= coding->consumed;
5081           require = len_byte * ratio;
5082           first = 0;
5083         }
5084       if ((src - dst) < (require + 2000))
5085         {
5086           /* See the comment above the previous call of make_gap.  */
5087           int add = len_byte + inserted_byte;
5088
5089           GAP_SIZE -= add;
5090           ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5091           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5092           make_gap (require + 2000);
5093           GAP_SIZE += add;
5094           ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5095           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5096         }
5097     }
5098   if (src - dst > 0) *dst = 0; /* Put an anchor.  */
5099
5100   if (encodep && coding->dst_multibyte)
5101     {
5102       /* The output is unibyte.  We must convert 8-bit characters to
5103          multibyte form.  */
5104       if (inserted_byte * 2 > GAP_SIZE)
5105         {
5106           GAP_SIZE -= inserted_byte;
5107           ZV += inserted_byte; Z += inserted_byte;
5108           ZV_BYTE += inserted_byte; Z_BYTE += inserted_byte;
5109           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5110           make_gap (inserted_byte - GAP_SIZE);
5111           GAP_SIZE += inserted_byte;
5112           ZV -= inserted_byte; Z -= inserted_byte;
5113           ZV_BYTE -= inserted_byte; Z_BYTE -= inserted_byte;
5114           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5115         }
5116       inserted_byte = str_to_multibyte (GPT_ADDR, GAP_SIZE, inserted_byte);
5117     }
5118
5119   /* If we have shrinked the conversion area, adjust it now.  */
5120   if (total_skip > 0)
5121     {
5122       if (tail_skip > 0)
5123         safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
5124       inserted += total_skip; inserted_byte += total_skip;
5125       GAP_SIZE += total_skip;
5126       GPT -= head_skip; GPT_BYTE -= head_skip;
5127       ZV -= total_skip; ZV_BYTE -= total_skip;
5128       Z -= total_skip; Z_BYTE -= total_skip;
5129       from -= head_skip; from_byte -= head_skip;
5130       to += tail_skip; to_byte += tail_skip;
5131     }
5132
5133   prev_Z = Z;
5134   adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
5135   inserted = Z - prev_Z;
5136
5137   if (!encodep && coding->cmp_data && coding->cmp_data->used)
5138     coding_restore_composition (coding, Fcurrent_buffer ());
5139   coding_free_composition_data (coding);
5140
5141   if (! inhibit_pre_post_conversion
5142       && ! encodep && ! NILP (coding->post_read_conversion))
5143     {
5144       Lisp_Object val;
5145       int count = specpdl_ptr - specpdl;
5146
5147       if (from != PT)
5148         TEMP_SET_PT_BOTH (from, from_byte);
5149       prev_Z = Z;
5150       record_unwind_protect (code_convert_region_unwind, Qnil);
5151       /* We should not call any more pre-write/post-read-conversion
5152          functions while this post-read-conversion is running.  */
5153       inhibit_pre_post_conversion = 1;
5154       val = call1 (coding->post_read_conversion, make_number (inserted));
5155       inhibit_pre_post_conversion = 0;
5156       /* Discard the unwind protect.  */
5157       specpdl_ptr--;
5158       CHECK_NUMBER (val, 0);
5159       inserted += Z - prev_Z;
5160     }
5161
5162   if (orig_point >= from)
5163     {
5164       if (orig_point >= from + orig_len)
5165         orig_point += inserted - orig_len;
5166       else
5167         orig_point = from;
5168       TEMP_SET_PT (orig_point);
5169     }
5170
5171   if (replace)
5172     {
5173       signal_after_change (from, to - from, inserted);
5174       update_compositions (from, from + inserted, CHECK_BORDER);
5175     }
5176
5177   {
5178     coding->consumed = to_byte - from_byte;
5179     coding->consumed_char = to - from;
5180     coding->produced = inserted_byte;
5181     coding->produced_char = inserted;
5182   }
5183
5184   return 0;
5185 }
5186
5187 Lisp_Object
5188 run_pre_post_conversion_on_str (str, coding, encodep)
5189      Lisp_Object str;
5190      struct coding_system *coding;
5191      int encodep;
5192 {
5193   int count = specpdl_ptr - specpdl;
5194   struct gcpro gcpro1;
5195   struct buffer *prev = current_buffer;
5196   int multibyte = STRING_MULTIBYTE (str);
5197
5198   record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
5199   record_unwind_protect (code_convert_region_unwind, Qnil);
5200   GCPRO1 (str);
5201   temp_output_buffer_setup (" *code-converting-work*");
5202   set_buffer_internal (XBUFFER (Vstandard_output));
5203   /* We must insert the contents of STR as is without
5204      unibyte<->multibyte conversion.  For that, we adjust the
5205      multibyteness of the working buffer to that of STR.  */
5206   Ferase_buffer ();
5207   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
5208   insert_from_string (str, 0, 0,
5209                       XSTRING (str)->size, STRING_BYTES (XSTRING (str)), 0);
5210   UNGCPRO;
5211   inhibit_pre_post_conversion = 1;
5212   if (encodep)
5213     call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z));
5214   else
5215     {
5216       TEMP_SET_PT_BOTH (BEG, BEG_BYTE);
5217       call1 (coding->post_read_conversion, make_number (Z - BEG));
5218     }
5219   inhibit_pre_post_conversion = 0;
5220   str = make_buffer_string (BEG, Z, 1);
5221   return unbind_to (count, str);
5222 }
5223
5224 Lisp_Object
5225 decode_coding_string (str, coding, nocopy)
5226      Lisp_Object str;
5227      struct coding_system *coding;
5228      int nocopy;
5229 {
5230   int len;
5231   struct conversion_buffer buf;
5232   int from, to, to_byte;
5233   struct gcpro gcpro1;
5234   Lisp_Object saved_coding_symbol;
5235   int result;
5236   int require_decoding;
5237   int shrinked_bytes = 0;
5238   Lisp_Object newstr;
5239   int consumed, consumed_char, produced, produced_char;
5240
5241   from = 0;
5242   to = XSTRING (str)->size;
5243   to_byte = STRING_BYTES (XSTRING (str));
5244
5245   saved_coding_symbol = Qnil;
5246   if (CODING_REQUIRE_DETECTION (coding))
5247     {
5248       /* See the comments in code_convert_region.  */
5249       if (coding->type == coding_type_undecided)
5250         {
5251           detect_coding (coding, XSTRING (str)->data, to_byte);
5252           if (coding->type == coding_type_undecided)
5253             coding->type = coding_type_emacs_mule;
5254         }
5255       if (coding->eol_type == CODING_EOL_UNDECIDED
5256           && coding->type != coding_type_ccl)
5257         {
5258           saved_coding_symbol = coding->symbol;
5259           detect_eol (coding, XSTRING (str)->data, to_byte);
5260           if (coding->eol_type == CODING_EOL_UNDECIDED)
5261             coding->eol_type = CODING_EOL_LF;
5262           /* We had better recover the original eol format if we
5263              encounter an inconsitent eol format while decoding.  */
5264           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
5265         }
5266     }
5267
5268   require_decoding = CODING_REQUIRE_DECODING (coding);
5269
5270   if (STRING_MULTIBYTE (str))
5271     {
5272       /* Decoding routines expect the source text to be unibyte.  */
5273       str = Fstring_as_unibyte (str);
5274       to_byte = STRING_BYTES (XSTRING (str));
5275       nocopy = 1;
5276     }
5277   coding->src_multibyte = 0;
5278   coding->dst_multibyte = (coding->type != coding_type_no_conversion
5279                            && coding->type != coding_type_raw_text);
5280
5281   /* Try to skip the heading and tailing ASCIIs.  */
5282   if (require_decoding && coding->type != coding_type_ccl)
5283     {
5284       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
5285                                 0);
5286       if (from == to_byte)
5287         require_decoding = 0;
5288       shrinked_bytes = from + (STRING_BYTES (XSTRING (str)) - to_byte);
5289     }
5290
5291   if (!require_decoding)
5292     {
5293       coding->consumed = STRING_BYTES (XSTRING (str));
5294       coding->consumed_char = XSTRING (str)->size;
5295       if (coding->dst_multibyte)
5296         {
5297           str = Fstring_as_multibyte (str);
5298           nocopy = 1;
5299         }
5300       coding->produced = STRING_BYTES (XSTRING (str));
5301       coding->produced_char = XSTRING (str)->size;
5302       return (nocopy ? str : Fcopy_sequence (str));
5303     }
5304
5305   if (coding->composing != COMPOSITION_DISABLED)
5306     coding_allocate_composition_data (coding, from);
5307   len = decoding_buffer_size (coding, to_byte - from);
5308   allocate_conversion_buffer (buf, len);
5309
5310   consumed = consumed_char = produced = produced_char = 0;
5311   while (1)
5312     {
5313       result = decode_coding (coding, XSTRING (str)->data + from + consumed,
5314                               buf.data + produced, to_byte - from - consumed,
5315                               buf.size - produced);
5316       consumed += coding->consumed;
5317       consumed_char += coding->consumed_char;
5318       produced += coding->produced;
5319       produced_char += coding->produced_char;
5320       if (result == CODING_FINISH_NORMAL
5321           || (result == CODING_FINISH_INSUFFICIENT_SRC
5322               && coding->consumed == 0))
5323         break;
5324       if (result == CODING_FINISH_INSUFFICIENT_CMP)
5325         coding_allocate_composition_data (coding, from + produced_char);
5326       else if (result == CODING_FINISH_INSUFFICIENT_DST)
5327         extend_conversion_buffer (&buf);
5328       else if (result == CODING_FINISH_INCONSISTENT_EOL)
5329         {
5330           /* Recover the original EOL format.  */
5331           if (coding->eol_type == CODING_EOL_CR)
5332             {
5333               unsigned char *p;
5334               for (p = buf.data; p < buf.data + produced; p++)
5335                 if (*p == '\n') *p = '\r';
5336             }
5337           else if (coding->eol_type == CODING_EOL_CRLF)
5338             {
5339               int num_eol = 0;
5340               unsigned char *p0, *p1;
5341               for (p0 = buf.data, p1 = p0 + produced; p0 < p1; p0++)
5342                 if (*p0 == '\n') num_eol++;
5343               if (produced + num_eol >= buf.size)
5344                 extend_conversion_buffer (&buf);
5345               for (p0 = buf.data + produced, p1 = p0 + num_eol; p0 > buf.data;)
5346                 {
5347                   *--p1 = *--p0;
5348                   if (*p0 == '\n') *--p1 = '\r';
5349                 }
5350               produced += num_eol;
5351               produced_char += num_eol;
5352             }
5353           coding->eol_type = CODING_EOL_LF;
5354           coding->symbol = saved_coding_symbol;
5355         }
5356     }
5357
5358   coding->consumed = consumed;
5359   coding->consumed_char = consumed_char;
5360   coding->produced = produced;
5361   coding->produced_char = produced_char;
5362
5363   if (coding->dst_multibyte)
5364     newstr = make_uninit_multibyte_string (produced_char + shrinked_bytes,
5365                                            produced + shrinked_bytes);
5366   else
5367     newstr = make_uninit_string (produced + shrinked_bytes);
5368   if (from > 0)
5369     bcopy (XSTRING (str)->data, XSTRING (newstr)->data, from);
5370   bcopy (buf.data, XSTRING (newstr)->data + from, produced);
5371   if (shrinked_bytes > from)
5372     bcopy (XSTRING (str)->data + to_byte,
5373            XSTRING (newstr)->data + from + produced,
5374            shrinked_bytes - from);
5375   free_conversion_buffer (&buf);
5376
5377   if (coding->cmp_data && coding->cmp_data->used)
5378     coding_restore_composition (coding, newstr);
5379   coding_free_composition_data (coding);
5380
5381   if (SYMBOLP (coding->post_read_conversion)
5382       && !NILP (Ffboundp (coding->post_read_conversion)))
5383     newstr = run_pre_post_conversion_on_str (newstr, coding, 0);
5384
5385   return newstr;
5386 }
5387
5388 Lisp_Object
5389 encode_coding_string (str, coding, nocopy)
5390      Lisp_Object str;
5391      struct coding_system *coding;
5392      int nocopy;
5393 {
5394   int len;
5395   struct conversion_buffer buf;
5396   int from, to, to_byte;
5397   struct gcpro gcpro1;
5398   Lisp_Object saved_coding_symbol;
5399   int result;
5400   int shrinked_bytes = 0;
5401   Lisp_Object newstr;
5402   int consumed, consumed_char, produced, produced_char;
5403
5404   if (SYMBOLP (coding->pre_write_conversion)
5405       && !NILP (Ffboundp (coding->pre_write_conversion)))
5406     str = run_pre_post_conversion_on_str (str, coding, 1);
5407
5408   from = 0;
5409   to = XSTRING (str)->size;
5410   to_byte = STRING_BYTES (XSTRING (str));
5411
5412   saved_coding_symbol = Qnil;
5413   if (! CODING_REQUIRE_ENCODING (coding))
5414     {
5415       coding->consumed = STRING_BYTES (XSTRING (str));
5416       coding->consumed_char = XSTRING (str)->size;
5417       if (STRING_MULTIBYTE (str))
5418         {
5419           str = Fstring_as_unibyte (str);
5420           nocopy = 1;
5421         }
5422       coding->produced = STRING_BYTES (XSTRING (str));
5423       coding->produced_char = XSTRING (str)->size;
5424       return (nocopy ? str : Fcopy_sequence (str));
5425     }
5426
5427   /* Encoding routines determine the multibyteness of the source text
5428      by coding->src_multibyte.  */
5429   coding->src_multibyte = STRING_MULTIBYTE (str);
5430   coding->dst_multibyte = 0;
5431
5432   if (coding->composing != COMPOSITION_DISABLED)
5433     coding_save_composition (coding, from, to, str);
5434
5435   /* Try to skip the heading and tailing ASCIIs.  */
5436   if (coding->type != coding_type_ccl)
5437     {
5438       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
5439                                 1);
5440       if (from == to_byte)
5441         return (nocopy ? str : Fcopy_sequence (str));
5442       shrinked_bytes = from + (STRING_BYTES (XSTRING (str)) - to_byte);
5443     }
5444
5445   len = encoding_buffer_size (coding, to_byte - from);
5446   allocate_conversion_buffer (buf, len);
5447
5448   consumed = consumed_char = produced = produced_char = 0;
5449   while (1)
5450     {
5451       result = encode_coding (coding, XSTRING (str)->data + from + consumed,
5452                               buf.data + produced, to_byte - from - consumed,
5453                               buf.size - produced);
5454       consumed += coding->consumed;
5455       consumed_char += coding->consumed_char;
5456       produced += coding->produced;
5457       produced_char += coding->produced_char;
5458       if (result == CODING_FINISH_NORMAL
5459           || (result == CODING_FINISH_INSUFFICIENT_SRC
5460               && coding->consumed == 0))
5461         break;
5462       /* Now result should be CODING_FINISH_INSUFFICIENT_DST.  */
5463       extend_conversion_buffer (&buf);
5464     }
5465
5466   coding->consumed = consumed;
5467   coding->consumed_char = consumed_char;
5468   coding->produced = produced;
5469   coding->produced_char = produced_char;
5470
5471   newstr = make_uninit_string (produced + shrinked_bytes);
5472   if (from > 0)
5473     bcopy (XSTRING (str)->data, XSTRING (newstr)->data, from);
5474   bcopy (buf.data, XSTRING (newstr)->data + from, produced);
5475   if (shrinked_bytes > from)
5476     bcopy (XSTRING (str)->data + to_byte,
5477            XSTRING (newstr)->data + from + produced,
5478            shrinked_bytes - from);
5479
5480   free_conversion_buffer (&buf);
5481   coding_free_composition_data (coding);
5482
5483   return newstr;
5484 }
5485
5486 \f
5487 #ifdef emacs
5488 /*** 8. Emacs Lisp library functions ***/
5489
5490 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
5491   "Return t if OBJECT is nil or a coding-system.\n\
5492 See the documentation of `make-coding-system' for information\n\
5493 about coding-system objects.")
5494   (obj)
5495      Lisp_Object obj;
5496 {
5497   if (NILP (obj))
5498     return Qt;
5499   if (!SYMBOLP (obj))
5500     return Qnil;
5501   /* Get coding-spec vector for OBJ.  */
5502   obj = Fget (obj, Qcoding_system);
5503   return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
5504           ? Qt : Qnil);
5505 }
5506
5507 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
5508        Sread_non_nil_coding_system, 1, 1, 0,
5509   "Read a coding system from the minibuffer, prompting with string PROMPT.")
5510   (prompt)
5511      Lisp_Object prompt;
5512 {
5513   Lisp_Object val;
5514   do
5515     {
5516       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
5517                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
5518     }
5519   while (XSTRING (val)->size == 0);
5520   return (Fintern (val, Qnil));
5521 }
5522
5523 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
5524   "Read a coding system from the minibuffer, prompting with string PROMPT.\n\
5525 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.")
5526   (prompt, default_coding_system)
5527      Lisp_Object prompt, default_coding_system;
5528 {
5529   Lisp_Object val;
5530   if (SYMBOLP (default_coding_system))
5531     XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
5532   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
5533                           Qt, Qnil, Qcoding_system_history,
5534                           default_coding_system, Qnil);
5535   return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
5536 }
5537
5538 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
5539        1, 1, 0,
5540   "Check validity of CODING-SYSTEM.\n\
5541 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.\n\
5542 It is valid if it is a symbol with a non-nil `coding-system' property.\n\
5543 The value of property should be a vector of length 5.")
5544   (coding_system)
5545      Lisp_Object coding_system;
5546 {
5547   CHECK_SYMBOL (coding_system, 0);
5548   if (!NILP (Fcoding_system_p (coding_system)))
5549     return coding_system;
5550   while (1)
5551     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
5552 }
5553 \f
5554 Lisp_Object
5555 detect_coding_system (src, src_bytes, highest)
5556      unsigned char *src;
5557      int src_bytes, highest;
5558 {
5559   int coding_mask, eol_type;
5560   Lisp_Object val, tmp;
5561   int dummy;
5562
5563   coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy);
5564   eol_type  = detect_eol_type (src, src_bytes, &dummy);
5565   if (eol_type == CODING_EOL_INCONSISTENT)
5566     eol_type = CODING_EOL_UNDECIDED;
5567
5568   if (!coding_mask)
5569     {
5570       val = Qundecided;
5571       if (eol_type != CODING_EOL_UNDECIDED)
5572         {
5573           Lisp_Object val2;
5574           val2 = Fget (Qundecided, Qeol_type);
5575           if (VECTORP (val2))
5576             val = XVECTOR (val2)->contents[eol_type];
5577         }
5578       return (highest ? val : Fcons (val, Qnil));
5579     }
5580
5581   /* At first, gather possible coding systems in VAL.  */
5582   val = Qnil;
5583   for (tmp = Vcoding_category_list; CONSP (tmp); tmp = XCDR (tmp))
5584     {
5585       Lisp_Object category_val, category_index;
5586
5587       category_index = Fget (XCAR (tmp), Qcoding_category_index);
5588       category_val = Fsymbol_value (XCAR (tmp));
5589       if (!NILP (category_val)
5590           && NATNUMP (category_index)
5591           && (coding_mask & (1 << XFASTINT (category_index))))
5592         {
5593           val = Fcons (category_val, val);
5594           if (highest)
5595             break;
5596         }
5597     }
5598   if (!highest)
5599     val = Fnreverse (val);
5600
5601   /* Then, replace the elements with subsidiary coding systems.  */
5602   for (tmp = val; CONSP (tmp); tmp = XCDR (tmp))
5603     {
5604       if (eol_type != CODING_EOL_UNDECIDED
5605           && eol_type != CODING_EOL_INCONSISTENT)
5606         {
5607           Lisp_Object eol;
5608           eol = Fget (XCAR (tmp), Qeol_type);
5609           if (VECTORP (eol))
5610             XCAR (tmp) = XVECTOR (eol)->contents[eol_type];
5611         }
5612     }
5613   return (highest ? XCAR (val) : val);
5614 }
5615
5616 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
5617        2, 3, 0,
5618   "Detect coding system of the text in the region between START and END.\n\
5619 Return a list of possible coding systems ordered by priority.\n\
5620 \n\
5621 If only ASCII characters are found, it returns a list of single element\n\
5622 `undecided' or its subsidiary coding system according to a detected\n\
5623 end-of-line format.\n\
5624 \n\
5625 If optional argument HIGHEST is non-nil, return the coding system of\n\
5626 highest priority.")
5627   (start, end, highest)
5628      Lisp_Object start, end, highest;
5629 {
5630   int from, to;
5631   int from_byte, to_byte;
5632
5633   CHECK_NUMBER_COERCE_MARKER (start, 0);
5634   CHECK_NUMBER_COERCE_MARKER (end, 1);
5635
5636   validate_region (&start, &end);
5637   from = XINT (start), to = XINT (end);
5638   from_byte = CHAR_TO_BYTE (from);
5639   to_byte = CHAR_TO_BYTE (to);
5640
5641   if (from < GPT && to >= GPT)
5642     move_gap_both (to, to_byte);
5643
5644   return detect_coding_system (BYTE_POS_ADDR (from_byte),
5645                                to_byte - from_byte,
5646                                !NILP (highest));
5647 }
5648
5649 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
5650        1, 2, 0,
5651   "Detect coding system of the text in STRING.\n\
5652 Return a list of possible coding systems ordered by priority.\n\
5653 \n\
5654 If only ASCII characters are found, it returns a list of single element\n\
5655 `undecided' or its subsidiary coding system according to a detected\n\
5656 end-of-line format.\n\
5657 \n\
5658 If optional argument HIGHEST is non-nil, return the coding system of\n\
5659 highest priority.")
5660   (string, highest)
5661      Lisp_Object string, highest;
5662 {
5663   CHECK_STRING (string, 0);
5664
5665   return detect_coding_system (XSTRING (string)->data,
5666                                STRING_BYTES (XSTRING (string)),
5667                                !NILP (highest));
5668 }
5669
5670 /* Return an intersection of lists L1 and L2.  */
5671
5672 static Lisp_Object
5673 intersection (l1, l2)
5674      Lisp_Object l1, l2;
5675 {
5676   Lisp_Object val;
5677
5678   for (val = Qnil; CONSP (l1); l1 = XCDR (l1))
5679     {
5680       if (!NILP (Fmemq (XCAR (l1), l2)))
5681         val = Fcons (XCAR (l1), val);
5682     }
5683   return val;
5684 }
5685
5686
5687 /*  Subroutine for Fsafe_coding_systems_region_internal.
5688
5689     Return a list of coding systems that safely encode the multibyte
5690     text between P and PEND.  SAFE_CODINGS, if non-nil, is a list of
5691     possible coding systems.  If it is nil, it means that we have not
5692     yet found any coding systems.
5693
5694     WORK_TABLE is a copy of the char-table Vchar_coding_system_table.  An
5695     element of WORK_TABLE is set to t once the element is looked up.
5696
5697     If a non-ASCII single byte char is found, set
5698     *single_byte_char_found to 1.  */
5699
5700 static Lisp_Object
5701 find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found)
5702      unsigned char *p, *pend;
5703      Lisp_Object safe_codings, work_table;
5704      int *single_byte_char_found;
5705 {
5706   int c, len, idx;
5707   Lisp_Object val;
5708
5709   while (p < pend)
5710     {
5711       c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
5712       p += len;
5713       if (ASCII_BYTE_P (c))
5714         /* We can ignore ASCII characters here.  */
5715         continue;
5716       if (SINGLE_BYTE_CHAR_P (c))
5717         *single_byte_char_found = 1;
5718       if (NILP (safe_codings))
5719         continue;
5720       /* Check the safe coding systems for C.  */
5721       val = char_table_ref_and_index (work_table, c, &idx);
5722       if (EQ (val, Qt))
5723         /* This element was already checked.  Ignore it.  */
5724         continue;
5725       /* Remember that we checked this element.  */
5726       CHAR_TABLE_SET (work_table, make_number (idx), Qt);
5727
5728       /* If there are some safe coding systems for C and we have
5729          already found the other set of coding systems for the
5730          different characters, get the intersection of them.  */
5731       if (!EQ (safe_codings, Qt) && !NILP (val))
5732         val = intersection (safe_codings, val);
5733       safe_codings = val;
5734     }
5735   return safe_codings;
5736 }
5737
5738
5739 /* Return a list of coding systems that safely encode the text between
5740    START and END.  If the text contains only ASCII or is unibyte,
5741    return t.  */
5742
5743 DEFUN ("find-coding-systems-region-internal",
5744        Ffind_coding_systems_region_internal,
5745        Sfind_coding_systems_region_internal, 2, 2, 0,
5746   "Internal use only.")
5747   (start, end)
5748      Lisp_Object start, end;
5749 {
5750   Lisp_Object work_table, safe_codings;
5751   int non_ascii_p = 0;
5752   int single_byte_char_found = 0;
5753   unsigned char *p1, *p1end, *p2, *p2end, *p;
5754   Lisp_Object args[2];
5755
5756   if (STRINGP (start))
5757     {
5758       if (!STRING_MULTIBYTE (start))
5759         return Qt;
5760       p1 = XSTRING (start)->data, p1end = p1 + STRING_BYTES (XSTRING (start));
5761       p2 = p2end = p1end;
5762       if (XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
5763         non_ascii_p = 1;
5764     }
5765   else
5766     {
5767       int from, to, stop;
5768
5769       CHECK_NUMBER_COERCE_MARKER (start, 0);
5770       CHECK_NUMBER_COERCE_MARKER (end, 1);
5771       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
5772         args_out_of_range (start, end);
5773       if (NILP (current_buffer->enable_multibyte_characters))
5774         return Qt;
5775       from = CHAR_TO_BYTE (XINT (start));
5776       to = CHAR_TO_BYTE (XINT (end));
5777       stop = from < GPT_BYTE && GPT_BYTE < to ? GPT_BYTE : to;
5778       p1 = BYTE_POS_ADDR (from), p1end = p1 + (stop - from);
5779       if (stop == to)
5780         p2 = p2end = p1end;
5781       else
5782         p2 = BYTE_POS_ADDR (stop), p2end = p2 + (to - stop);
5783       if (XINT (end) - XINT (start) != to - from)
5784         non_ascii_p = 1;
5785     }
5786
5787   if (!non_ascii_p)
5788     {
5789       /* We are sure that the text contains no multibyte character.
5790          Check if it contains eight-bit-graphic.  */
5791       p = p1;
5792       for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++);
5793       if (p == p1end)
5794         {
5795           for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++);
5796           if (p == p2end)
5797             return Qt;
5798         }
5799     }
5800
5801   /* The text contains non-ASCII characters.  */
5802   work_table = Fcopy_sequence (Vchar_coding_system_table);
5803   safe_codings = find_safe_codings (p1, p1end, Qt, work_table,
5804                                     &single_byte_char_found);
5805   if (p2 < p2end)
5806     safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table,
5807                                       &single_byte_char_found);
5808
5809   if (!single_byte_char_found)
5810     {
5811       /* Append generic coding systems.  */
5812       Lisp_Object args[2];
5813       args[0] = safe_codings;
5814       args[1] = Fchar_table_extra_slot (Vchar_coding_system_table,
5815                                         make_number (0));
5816       safe_codings = Fappend (2, args);
5817     }
5818   else
5819     safe_codings = Fcons (Qraw_text, Fcons (Qemacs_mule, safe_codings));
5820   return safe_codings;
5821 }
5822
5823
5824 Lisp_Object
5825 code_convert_region1 (start, end, coding_system, encodep)
5826      Lisp_Object start, end, coding_system;
5827      int encodep;
5828 {
5829   struct coding_system coding;
5830   int from, to, len;
5831
5832   CHECK_NUMBER_COERCE_MARKER (start, 0);
5833   CHECK_NUMBER_COERCE_MARKER (end, 1);
5834   CHECK_SYMBOL (coding_system, 2);
5835
5836   validate_region (&start, &end);
5837   from = XFASTINT (start);
5838   to = XFASTINT (end);
5839
5840   if (NILP (coding_system))
5841     return make_number (to - from);
5842
5843   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
5844     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
5845
5846   coding.mode |= CODING_MODE_LAST_BLOCK;
5847   coding.src_multibyte = coding.dst_multibyte
5848     = !NILP (current_buffer->enable_multibyte_characters);
5849   code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
5850                        &coding, encodep, 1);
5851   Vlast_coding_system_used = coding.symbol;
5852   return make_number (coding.produced_char);
5853 }
5854
5855 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
5856        3, 3, "r\nzCoding system: ",
5857   "Decode the current region by specified coding system.\n\
5858 When called from a program, takes three arguments:\n\
5859 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
5860 This function sets `last-coding-system-used' to the precise coding system\n\
5861 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5862 not fully specified.)\n\
5863 It returns the length of the decoded text.")
5864   (start, end, coding_system)
5865      Lisp_Object start, end, coding_system;
5866 {
5867   return code_convert_region1 (start, end, coding_system, 0);
5868 }
5869
5870 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
5871        3, 3, "r\nzCoding system: ",
5872   "Encode the current region by specified coding system.\n\
5873 When called from a program, takes three arguments:\n\
5874 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
5875 This function sets `last-coding-system-used' to the precise coding system\n\
5876 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5877 not fully specified.)\n\
5878 It returns the length of the encoded text.")
5879   (start, end, coding_system)
5880      Lisp_Object start, end, coding_system;
5881 {
5882   return code_convert_region1 (start, end, coding_system, 1);
5883 }
5884
5885 Lisp_Object
5886 code_convert_string1 (string, coding_system, nocopy, encodep)
5887      Lisp_Object string, coding_system, nocopy;
5888      int encodep;
5889 {
5890   struct coding_system coding;
5891
5892   CHECK_STRING (string, 0);
5893   CHECK_SYMBOL (coding_system, 1);
5894
5895   if (NILP (coding_system))
5896     return (NILP (nocopy) ? Fcopy_sequence (string) : string);
5897
5898   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
5899     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
5900
5901   coding.mode |= CODING_MODE_LAST_BLOCK;
5902   string = (encodep
5903             ? encode_coding_string (string, &coding, !NILP (nocopy))
5904             : decode_coding_string (string, &coding, !NILP (nocopy)));
5905   Vlast_coding_system_used = coding.symbol;
5906
5907   return string;
5908 }
5909
5910 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
5911        2, 3, 0,
5912   "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
5913 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
5914 if the decoding operation is trivial.\n\
5915 This function sets `last-coding-system-used' to the precise coding system\n\
5916 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5917 not fully specified.)")
5918   (string, coding_system, nocopy)
5919      Lisp_Object string, coding_system, nocopy;
5920 {
5921   return code_convert_string1 (string, coding_system, nocopy, 0);
5922 }
5923
5924 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
5925        2, 3, 0,
5926   "Encode STRING to CODING-SYSTEM, and return the result.\n\
5927 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
5928 if the encoding operation is trivial.\n\
5929 This function sets `last-coding-system-used' to the precise coding system\n\
5930 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5931 not fully specified.)")
5932   (string, coding_system, nocopy)
5933      Lisp_Object string, coding_system, nocopy;
5934 {
5935   return code_convert_string1 (string, coding_system, nocopy, 1);
5936 }
5937
5938 /* Encode or decode STRING according to CODING_SYSTEM.
5939    Do not set Vlast_coding_system_used.
5940
5941    This function is called only from macros DECODE_FILE and
5942    ENCODE_FILE, thus we ignore character composition.  */
5943
5944 Lisp_Object
5945 code_convert_string_norecord (string, coding_system, encodep)
5946      Lisp_Object string, coding_system;
5947      int encodep;
5948 {
5949   struct coding_system coding;
5950
5951   CHECK_STRING (string, 0);
5952   CHECK_SYMBOL (coding_system, 1);
5953
5954   if (NILP (coding_system))
5955     return string;
5956
5957   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
5958     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
5959
5960   coding.composing = COMPOSITION_DISABLED;
5961   coding.mode |= CODING_MODE_LAST_BLOCK;
5962   return (encodep
5963           ? encode_coding_string (string, &coding, 1)
5964           : decode_coding_string (string, &coding, 1));
5965 }
5966 \f
5967 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
5968   "Decode a Japanese character which has CODE in shift_jis encoding.\n\
5969 Return the corresponding character.")
5970   (code)
5971      Lisp_Object code;
5972 {
5973   unsigned char c1, c2, s1, s2;
5974   Lisp_Object val;
5975
5976   CHECK_NUMBER (code, 0);
5977   s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
5978   if (s1 == 0)
5979     {
5980       if (s2 < 0x80)
5981         XSETFASTINT (val, s2);
5982       else if (s2 >= 0xA0 || s2 <= 0xDF)
5983         XSETFASTINT (val, MAKE_CHAR (charset_katakana_jisx0201, s2, 0));
5984       else
5985         error ("Invalid Shift JIS code: %x", XFASTINT (code));
5986     }
5987   else
5988     {
5989       if ((s1 < 0x80 || s1 > 0x9F && s1 < 0xE0 || s1 > 0xEF)
5990           || (s2 < 0x40 || s2 == 0x7F || s2 > 0xFC))
5991         error ("Invalid Shift JIS code: %x", XFASTINT (code));
5992       DECODE_SJIS (s1, s2, c1, c2);
5993       XSETFASTINT (val, MAKE_CHAR (charset_jisx0208, c1, c2));
5994     }
5995   return val;
5996 }
5997
5998 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
5999   "Encode a Japanese character CHAR to shift_jis encoding.\n\
6000 Return the corresponding code in SJIS.")
6001   (ch)
6002      Lisp_Object ch;
6003 {
6004   int charset, c1, c2, s1, s2;
6005   Lisp_Object val;
6006
6007   CHECK_NUMBER (ch, 0);
6008   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
6009   if (charset == CHARSET_ASCII)
6010     {
6011       val = ch;
6012     }
6013   else if (charset == charset_jisx0208
6014            && c1 > 0x20 && c1 < 0x7F && c2 > 0x20 && c2 < 0x7F)
6015     {
6016       ENCODE_SJIS (c1, c2, s1, s2);
6017       XSETFASTINT (val, (s1 << 8) | s2);
6018     }
6019   else if (charset == charset_katakana_jisx0201
6020            && c1 > 0x20 && c2 < 0xE0)
6021     {
6022       XSETFASTINT (val, c1 | 0x80);
6023     }
6024   else
6025     error ("Can't encode to shift_jis: %d", XFASTINT (ch));
6026   return val;
6027 }
6028
6029 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
6030   "Decode a Big5 character which has CODE in BIG5 coding system.\n\
6031 Return the corresponding character.")
6032   (code)
6033      Lisp_Object code;
6034 {
6035   int charset;
6036   unsigned char b1, b2, c1, c2;
6037   Lisp_Object val;
6038
6039   CHECK_NUMBER (code, 0);
6040   b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
6041   if (b1 == 0)
6042     {
6043       if (b2 >= 0x80)
6044         error ("Invalid BIG5 code: %x", XFASTINT (code));
6045       val = code;
6046     }
6047   else
6048     {
6049       if ((b1 < 0xA1 || b1 > 0xFE)
6050           || (b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE))
6051         error ("Invalid BIG5 code: %x", XFASTINT (code));
6052       DECODE_BIG5 (b1, b2, charset, c1, c2);
6053       XSETFASTINT (val, MAKE_CHAR (charset, c1, c2));
6054     }
6055   return val;
6056 }
6057
6058 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
6059   "Encode the Big5 character CHAR to BIG5 coding system.\n\
6060 Return the corresponding character code in Big5.")
6061   (ch)
6062      Lisp_Object ch;
6063 {
6064   int charset, c1, c2, b1, b2;
6065   Lisp_Object val;
6066
6067   CHECK_NUMBER (ch, 0);
6068   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
6069   if (charset == CHARSET_ASCII)
6070     {
6071       val = ch;
6072     }
6073   else if ((charset == charset_big5_1
6074             && (XFASTINT (ch) >= 0x250a1 && XFASTINT (ch) <= 0x271ec))
6075            || (charset == charset_big5_2
6076                && XFASTINT (ch) >= 0x290a1 && XFASTINT (ch) <= 0x2bdb2))
6077     {
6078       ENCODE_BIG5 (charset, c1, c2, b1, b2);
6079       XSETFASTINT (val, (b1 << 8) | b2);
6080     }
6081   else
6082     error ("Can't encode to Big5: %d", XFASTINT (ch));
6083   return val;
6084 }
6085 \f
6086 DEFUN ("set-terminal-coding-system-internal",
6087        Fset_terminal_coding_system_internal,
6088        Sset_terminal_coding_system_internal, 1, 1, 0, "")
6089   (coding_system)
6090      Lisp_Object coding_system;
6091 {
6092   CHECK_SYMBOL (coding_system, 0);
6093   setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
6094   /* We had better not send unsafe characters to terminal.  */
6095   terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
6096   /* Characer composition should be disabled.  */
6097   terminal_coding.composing = COMPOSITION_DISABLED;
6098   terminal_coding.src_multibyte = 1;
6099   terminal_coding.dst_multibyte = 0;
6100   return Qnil;
6101 }
6102
6103 DEFUN ("set-safe-terminal-coding-system-internal",
6104        Fset_safe_terminal_coding_system_internal,
6105        Sset_safe_terminal_coding_system_internal, 1, 1, 0, "")
6106   (coding_system)
6107      Lisp_Object coding_system;
6108 {
6109   CHECK_SYMBOL (coding_system, 0);
6110   setup_coding_system (Fcheck_coding_system (coding_system),
6111                        &safe_terminal_coding);
6112   /* Characer composition should be disabled.  */
6113   safe_terminal_coding.composing = COMPOSITION_DISABLED;
6114   safe_terminal_coding.src_multibyte = 1;
6115   safe_terminal_coding.dst_multibyte = 0;
6116   return Qnil;
6117 }
6118
6119 DEFUN ("terminal-coding-system",
6120        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
6121   "Return coding system specified for terminal output.")
6122   ()
6123 {
6124   return terminal_coding.symbol;
6125 }
6126
6127 DEFUN ("set-keyboard-coding-system-internal",
6128        Fset_keyboard_coding_system_internal,
6129        Sset_keyboard_coding_system_internal, 1, 1, 0, "")
6130   (coding_system)
6131      Lisp_Object coding_system;
6132 {
6133   CHECK_SYMBOL (coding_system, 0);
6134   setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
6135   /* Characer composition should be disabled.  */
6136   keyboard_coding.composing = COMPOSITION_DISABLED;
6137   return Qnil;
6138 }
6139
6140 DEFUN ("keyboard-coding-system",
6141        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
6142   "Return coding system specified for decoding keyboard input.")
6143   ()
6144 {
6145   return keyboard_coding.symbol;
6146 }
6147
6148 \f
6149 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
6150        Sfind_operation_coding_system,  1, MANY, 0,
6151   "Choose a coding system for an operation based on the target name.\n\
6152 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).\n\
6153 DECODING-SYSTEM is the coding system to use for decoding\n\
6154 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system\n\
6155 for encoding (in case OPERATION does encoding).\n\
6156 \n\
6157 The first argument OPERATION specifies an I/O primitive:\n\
6158   For file I/O, `insert-file-contents' or `write-region'.\n\
6159   For process I/O, `call-process', `call-process-region', or `start-process'.\n\
6160   For network I/O, `open-network-stream'.\n\
6161 \n\
6162 The remaining arguments should be the same arguments that were passed\n\
6163 to the primitive.  Depending on which primitive, one of those arguments\n\
6164 is selected as the TARGET.  For example, if OPERATION does file I/O,\n\
6165 whichever argument specifies the file name is TARGET.\n\
6166 \n\
6167 TARGET has a meaning which depends on OPERATION:\n\
6168   For file I/O, TARGET is a file name.\n\
6169   For process I/O, TARGET is a process name.\n\
6170   For network I/O, TARGET is a service name or a port number\n\
6171 \n\
6172 This function looks up what specified for TARGET in,\n\
6173 `file-coding-system-alist', `process-coding-system-alist',\n\
6174 or `network-coding-system-alist' depending on OPERATION.\n\
6175 They may specify a coding system, a cons of coding systems,\n\
6176 or a function symbol to call.\n\
6177 In the last case, we call the function with one argument,\n\
6178 which is a list of all the arguments given to this function.")
6179   (nargs, args)
6180      int nargs;
6181      Lisp_Object *args;
6182 {
6183   Lisp_Object operation, target_idx, target, val;
6184   register Lisp_Object chain;
6185
6186   if (nargs < 2)
6187     error ("Too few arguments");
6188   operation = args[0];
6189   if (!SYMBOLP (operation)
6190       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
6191     error ("Invalid first arguement");
6192   if (nargs < 1 + XINT (target_idx))
6193     error ("Too few arguments for operation: %s",
6194            XSYMBOL (operation)->name->data);
6195   target = args[XINT (target_idx) + 1];
6196   if (!(STRINGP (target)
6197         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
6198     error ("Invalid %dth argument", XINT (target_idx) + 1);
6199
6200   chain = ((EQ (operation, Qinsert_file_contents)
6201             || EQ (operation, Qwrite_region))
6202            ? Vfile_coding_system_alist
6203            : (EQ (operation, Qopen_network_stream)
6204               ? Vnetwork_coding_system_alist
6205               : Vprocess_coding_system_alist));
6206   if (NILP (chain))
6207     return Qnil;
6208
6209   for (; CONSP (chain); chain = XCDR (chain))
6210     {
6211       Lisp_Object elt;
6212       elt = XCAR (chain);
6213
6214       if (CONSP (elt)
6215           && ((STRINGP (target)
6216                && STRINGP (XCAR (elt))
6217                && fast_string_match (XCAR (elt), target) >= 0)
6218               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
6219         {
6220           val = XCDR (elt);
6221           /* Here, if VAL is both a valid coding system and a valid
6222              function symbol, we return VAL as a coding system.  */
6223           if (CONSP (val))
6224             return val;
6225           if (! SYMBOLP (val))
6226             return Qnil;
6227           if (! NILP (Fcoding_system_p (val)))
6228             return Fcons (val, val);
6229           if (! NILP (Ffboundp (val)))
6230             {
6231               val = call1 (val, Flist (nargs, args));
6232               if (CONSP (val))
6233                 return val;
6234               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
6235                 return Fcons (val, val);
6236             }
6237           return Qnil;
6238         }
6239     }
6240   return Qnil;
6241 }
6242
6243 DEFUN ("update-coding-systems-internal",  Fupdate_coding_systems_internal,
6244        Supdate_coding_systems_internal, 0, 0, 0,
6245   "Update internal database for ISO2022 and CCL based coding systems.\n\
6246 When values of any coding categories are changed, you must\n\
6247 call this function")
6248   ()
6249 {
6250   int i;
6251
6252   for (i = CODING_CATEGORY_IDX_EMACS_MULE; i < CODING_CATEGORY_IDX_MAX; i++)
6253     {
6254       Lisp_Object val;
6255
6256       val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[i])->value;
6257       if (!NILP (val))
6258         {
6259           if (! coding_system_table[i])
6260             coding_system_table[i] = ((struct coding_system *)
6261                                       xmalloc (sizeof (struct coding_system)));
6262           setup_coding_system (val, coding_system_table[i]);
6263         }
6264       else if (coding_system_table[i])
6265         {
6266           xfree (coding_system_table[i]);
6267           coding_system_table[i] = NULL;
6268         }
6269     }
6270
6271   return Qnil;
6272 }
6273
6274 DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal,
6275        Sset_coding_priority_internal, 0, 0, 0,
6276   "Update internal database for the current value of `coding-category-list'.\n\
6277 This function is internal use only.")
6278   ()
6279 {
6280   int i = 0, idx;
6281   Lisp_Object val;
6282
6283   val = Vcoding_category_list;
6284
6285   while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
6286     {
6287       if (! SYMBOLP (XCAR (val)))
6288         break;
6289       idx = XFASTINT (Fget (XCAR (val), Qcoding_category_index));
6290       if (idx >= CODING_CATEGORY_IDX_MAX)
6291         break;
6292       coding_priorities[i++] = (1 << idx);
6293       val = XCDR (val);
6294     }
6295   /* If coding-category-list is valid and contains all coding
6296      categories, `i' should be CODING_CATEGORY_IDX_MAX now.  If not,
6297      the following code saves Emacs from crashing.  */
6298   while (i < CODING_CATEGORY_IDX_MAX)
6299     coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
6300
6301   return Qnil;
6302 }
6303
6304 #endif /* emacs */
6305
6306 \f
6307 /*** 9. Post-amble ***/
6308
6309 void
6310 init_coding_once ()
6311 {
6312   int i;
6313
6314   /* Emacs' internal format specific initialize routine.  */
6315   for (i = 0; i <= 0x20; i++)
6316     emacs_code_class[i] = EMACS_control_code;
6317   emacs_code_class[0x0A] = EMACS_linefeed_code;
6318   emacs_code_class[0x0D] = EMACS_carriage_return_code;
6319   for (i = 0x21 ; i < 0x7F; i++)
6320     emacs_code_class[i] = EMACS_ascii_code;
6321   emacs_code_class[0x7F] = EMACS_control_code;
6322   for (i = 0x80; i < 0xFF; i++)
6323     emacs_code_class[i] = EMACS_invalid_code;
6324   emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
6325   emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
6326   emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
6327   emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
6328
6329   /* ISO2022 specific initialize routine.  */
6330   for (i = 0; i < 0x20; i++)
6331     iso_code_class[i] = ISO_control_0;
6332   for (i = 0x21; i < 0x7F; i++)
6333     iso_code_class[i] = ISO_graphic_plane_0;
6334   for (i = 0x80; i < 0xA0; i++)
6335     iso_code_class[i] = ISO_control_1;
6336   for (i = 0xA1; i < 0xFF; i++)
6337     iso_code_class[i] = ISO_graphic_plane_1;
6338   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
6339   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
6340   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
6341   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
6342   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
6343   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
6344   iso_code_class[ISO_CODE_ESC] = ISO_escape;
6345   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
6346   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
6347   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
6348
6349   setup_coding_system (Qnil, &keyboard_coding);
6350   setup_coding_system (Qnil, &terminal_coding);
6351   setup_coding_system (Qnil, &safe_terminal_coding);
6352   setup_coding_system (Qnil, &default_buffer_file_coding);
6353
6354   bzero (coding_system_table, sizeof coding_system_table);
6355
6356   bzero (ascii_skip_code, sizeof ascii_skip_code);
6357   for (i = 0; i < 128; i++)
6358     ascii_skip_code[i] = 1;
6359
6360 #if defined (MSDOS) || defined (WINDOWSNT)
6361   system_eol_type = CODING_EOL_CRLF;
6362 #else
6363   system_eol_type = CODING_EOL_LF;
6364 #endif
6365
6366   inhibit_pre_post_conversion = 0;
6367 }
6368
6369 #ifdef emacs
6370
6371 void
6372 syms_of_coding ()
6373 {
6374   Qtarget_idx = intern ("target-idx");
6375   staticpro (&Qtarget_idx);
6376
6377   Qcoding_system_history = intern ("coding-system-history");
6378   staticpro (&Qcoding_system_history);
6379   Fset (Qcoding_system_history, Qnil);
6380
6381   /* Target FILENAME is the first argument.  */
6382   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
6383   /* Target FILENAME is the third argument.  */
6384   Fput (Qwrite_region, Qtarget_idx, make_number (2));
6385
6386   Qcall_process = intern ("call-process");
6387   staticpro (&Qcall_process);
6388   /* Target PROGRAM is the first argument.  */
6389   Fput (Qcall_process, Qtarget_idx, make_number (0));
6390
6391   Qcall_process_region = intern ("call-process-region");
6392   staticpro (&Qcall_process_region);
6393   /* Target PROGRAM is the third argument.  */
6394   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
6395
6396   Qstart_process = intern ("start-process");
6397   staticpro (&Qstart_process);
6398   /* Target PROGRAM is the third argument.  */
6399   Fput (Qstart_process, Qtarget_idx, make_number (2));
6400
6401   Qopen_network_stream = intern ("open-network-stream");
6402   staticpro (&Qopen_network_stream);
6403   /* Target SERVICE is the fourth argument.  */
6404   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
6405
6406   Qcoding_system = intern ("coding-system");
6407   staticpro (&Qcoding_system);
6408
6409   Qeol_type = intern ("eol-type");
6410   staticpro (&Qeol_type);
6411
6412   Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
6413   staticpro (&Qbuffer_file_coding_system);
6414
6415   Qpost_read_conversion = intern ("post-read-conversion");
6416   staticpro (&Qpost_read_conversion);
6417
6418   Qpre_write_conversion = intern ("pre-write-conversion");
6419   staticpro (&Qpre_write_conversion);
6420
6421   Qno_conversion = intern ("no-conversion");
6422   staticpro (&Qno_conversion);
6423
6424   Qundecided = intern ("undecided");
6425   staticpro (&Qundecided);
6426
6427   Qcoding_system_p = intern ("coding-system-p");
6428   staticpro (&Qcoding_system_p);
6429
6430   Qcoding_system_error = intern ("coding-system-error");
6431   staticpro (&Qcoding_system_error);
6432
6433   Fput (Qcoding_system_error, Qerror_conditions,
6434         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
6435   Fput (Qcoding_system_error, Qerror_message,
6436         build_string ("Invalid coding system"));
6437
6438   Qcoding_category = intern ("coding-category");
6439   staticpro (&Qcoding_category);
6440   Qcoding_category_index = intern ("coding-category-index");
6441   staticpro (&Qcoding_category_index);
6442
6443   Vcoding_category_table
6444     = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
6445   staticpro (&Vcoding_category_table);
6446   {
6447     int i;
6448     for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
6449       {
6450         XVECTOR (Vcoding_category_table)->contents[i]
6451           = intern (coding_category_name[i]);
6452         Fput (XVECTOR (Vcoding_category_table)->contents[i],
6453               Qcoding_category_index, make_number (i));
6454       }
6455   }
6456
6457   Qtranslation_table = intern ("translation-table");
6458   staticpro (&Qtranslation_table);
6459   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
6460
6461   Qtranslation_table_id = intern ("translation-table-id");
6462   staticpro (&Qtranslation_table_id);
6463
6464   Qtranslation_table_for_decode = intern ("translation-table-for-decode");
6465   staticpro (&Qtranslation_table_for_decode);
6466
6467   Qtranslation_table_for_encode = intern ("translation-table-for-encode");
6468   staticpro (&Qtranslation_table_for_encode);
6469
6470   Qsafe_chars = intern ("safe-chars");
6471   staticpro (&Qsafe_chars);
6472
6473   Qchar_coding_system = intern ("char-coding-system");
6474   staticpro (&Qchar_coding_system);
6475
6476   /* Intern this now in case it isn't already done.
6477      Setting this variable twice is harmless.
6478      But don't staticpro it here--that is done in alloc.c.  */
6479   Qchar_table_extra_slots = intern ("char-table-extra-slots");
6480   Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0));
6481   Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (1));
6482
6483   Qvalid_codes = intern ("valid-codes");
6484   staticpro (&Qvalid_codes);
6485
6486   Qemacs_mule = intern ("emacs-mule");
6487   staticpro (&Qemacs_mule);
6488
6489   Qraw_text = intern ("raw-text");
6490   staticpro (&Qraw_text);
6491
6492   defsubr (&Scoding_system_p);
6493   defsubr (&Sread_coding_system);
6494   defsubr (&Sread_non_nil_coding_system);
6495   defsubr (&Scheck_coding_system);
6496   defsubr (&Sdetect_coding_region);
6497   defsubr (&Sdetect_coding_string);
6498   defsubr (&Sfind_coding_systems_region_internal);
6499   defsubr (&Sdecode_coding_region);
6500   defsubr (&Sencode_coding_region);
6501   defsubr (&Sdecode_coding_string);
6502   defsubr (&Sencode_coding_string);
6503   defsubr (&Sdecode_sjis_char);
6504   defsubr (&Sencode_sjis_char);
6505   defsubr (&Sdecode_big5_char);
6506   defsubr (&Sencode_big5_char);
6507   defsubr (&Sset_terminal_coding_system_internal);
6508   defsubr (&Sset_safe_terminal_coding_system_internal);
6509   defsubr (&Sterminal_coding_system);
6510   defsubr (&Sset_keyboard_coding_system_internal);
6511   defsubr (&Skeyboard_coding_system);
6512   defsubr (&Sfind_operation_coding_system);
6513   defsubr (&Supdate_coding_systems_internal);
6514   defsubr (&Sset_coding_priority_internal);
6515
6516   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
6517     "List of coding systems.\n\
6518 \n\
6519 Do not alter the value of this variable manually.  This variable should be\n\
6520 updated by the functions `make-coding-system' and\n\
6521 `define-coding-system-alias'.");
6522   Vcoding_system_list = Qnil;
6523
6524   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
6525     "Alist of coding system names.\n\
6526 Each element is one element list of coding system name.\n\
6527 This variable is given to `completing-read' as TABLE argument.\n\
6528 \n\
6529 Do not alter the value of this variable manually.  This variable should be\n\
6530 updated by the functions `make-coding-system' and\n\
6531 `define-coding-system-alias'.");
6532   Vcoding_system_alist = Qnil;
6533
6534   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
6535     "List of coding-categories (symbols) ordered by priority.");
6536   {
6537     int i;
6538
6539     Vcoding_category_list = Qnil;
6540     for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
6541       Vcoding_category_list
6542         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
6543                  Vcoding_category_list);
6544   }
6545
6546   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
6547     "Specify the coding system for read operations.\n\
6548 It is useful to bind this variable with `let', but do not set it globally.\n\
6549 If the value is a coding system, it is used for decoding on read operation.\n\
6550 If not, an appropriate element is used from one of the coding system alists:\n\
6551 There are three such tables, `file-coding-system-alist',\n\
6552 `process-coding-system-alist', and `network-coding-system-alist'.");
6553   Vcoding_system_for_read = Qnil;
6554
6555   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
6556     "Specify the coding system for write operations.\n\
6557 Programs bind this variable with `let', but you should not set it globally.\n\
6558 If the value is a coding system, it is used for encoding of output,\n\
6559 when writing it to a file and when sending it to a file or subprocess.\n\
6560 \n\
6561 If this does not specify a coding system, an appropriate element\n\
6562 is used from one of the coding system alists:\n\
6563 There are three such tables, `file-coding-system-alist',\n\
6564 `process-coding-system-alist', and `network-coding-system-alist'.\n\
6565 For output to files, if the above procedure does not specify a coding system,\n\
6566 the value of `buffer-file-coding-system' is used.");
6567   Vcoding_system_for_write = Qnil;
6568
6569   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
6570     "Coding system used in the latest file or process I/O.");
6571   Vlast_coding_system_used = Qnil;
6572
6573   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
6574     "*Non-nil means always inhibit code conversion of end-of-line format.\n\
6575 See info node `Coding Systems' and info node `Text and Binary' concerning\n\
6576 such conversion.");
6577   inhibit_eol_conversion = 0;
6578
6579   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
6580     "Non-nil means process buffer inherits coding system of process output.\n\
6581 Bind it to t if the process output is to be treated as if it were a file\n\
6582 read from some filesystem.");
6583   inherit_process_coding_system = 0;
6584
6585   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
6586     "Alist to decide a coding system to use for a file I/O operation.\n\
6587 The format is ((PATTERN . VAL) ...),\n\
6588 where PATTERN is a regular expression matching a file name,\n\
6589 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
6590 If VAL is a coding system, it is used for both decoding and encoding\n\
6591 the file contents.\n\
6592 If VAL is a cons of coding systems, the car part is used for decoding,\n\
6593 and the cdr part is used for encoding.\n\
6594 If VAL is a function symbol, the function must return a coding system\n\
6595 or a cons of coding systems which are used as above.\n\
6596 \n\
6597 See also the function `find-operation-coding-system'\n\
6598 and the variable `auto-coding-alist'.");
6599   Vfile_coding_system_alist = Qnil;
6600
6601   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
6602     "Alist to decide a coding system to use for a process I/O operation.\n\
6603 The format is ((PATTERN . VAL) ...),\n\
6604 where PATTERN is a regular expression matching a program name,\n\
6605 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
6606 If VAL is a coding system, it is used for both decoding what received\n\
6607 from the program and encoding what sent to the program.\n\
6608 If VAL is a cons of coding systems, the car part is used for decoding,\n\
6609 and the cdr part is used for encoding.\n\
6610 If VAL is a function symbol, the function must return a coding system\n\
6611 or a cons of coding systems which are used as above.\n\
6612 \n\
6613 See also the function `find-operation-coding-system'.");
6614   Vprocess_coding_system_alist = Qnil;
6615
6616   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
6617     "Alist to decide a coding system to use for a network I/O operation.\n\
6618 The format is ((PATTERN . VAL) ...),\n\
6619 where PATTERN is a regular expression matching a network service name\n\
6620 or is a port number to connect to,\n\
6621 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
6622 If VAL is a coding system, it is used for both decoding what received\n\
6623 from the network stream and encoding what sent to the network stream.\n\
6624 If VAL is a cons of coding systems, the car part is used for decoding,\n\
6625 and the cdr part is used for encoding.\n\
6626 If VAL is a function symbol, the function must return a coding system\n\
6627 or a cons of coding systems which are used as above.\n\
6628 \n\
6629 See also the function `find-operation-coding-system'.");
6630   Vnetwork_coding_system_alist = Qnil;
6631
6632   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
6633     "Coding system to use with system messages.");
6634   Vlocale_coding_system = Qnil;
6635
6636   /* The eol mnemonics are reset in startup.el system-dependently.  */
6637   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
6638     "*String displayed in mode line for UNIX-like (LF) end-of-line format.");
6639   eol_mnemonic_unix = build_string (":");
6640
6641   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
6642     "*String displayed in mode line for DOS-like (CRLF) end-of-line format.");
6643   eol_mnemonic_dos = build_string ("\\");
6644
6645   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
6646     "*String displayed in mode line for MAC-like (CR) end-of-line format.");
6647   eol_mnemonic_mac = build_string ("/");
6648
6649   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
6650     "*String displayed in mode line when end-of-line format is not yet determined.");
6651   eol_mnemonic_undecided = build_string (":");
6652
6653   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
6654     "*Non-nil enables character translation while encoding and decoding.");
6655   Venable_character_translation = Qt;
6656
6657   DEFVAR_LISP ("standard-translation-table-for-decode",
6658     &Vstandard_translation_table_for_decode,
6659     "Table for translating characters while decoding.");
6660   Vstandard_translation_table_for_decode = Qnil;
6661
6662   DEFVAR_LISP ("standard-translation-table-for-encode",
6663     &Vstandard_translation_table_for_encode,
6664     "Table for translationg characters while encoding.");
6665   Vstandard_translation_table_for_encode = Qnil;
6666
6667   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
6668     "Alist of charsets vs revision numbers.\n\
6669 While encoding, if a charset (car part of an element) is found,\n\
6670 designate it with the escape sequence identifing revision (cdr part of the element).");
6671   Vcharset_revision_alist = Qnil;
6672
6673   DEFVAR_LISP ("default-process-coding-system",
6674                &Vdefault_process_coding_system,
6675     "Cons of coding systems used for process I/O by default.\n\
6676 The car part is used for decoding a process output,\n\
6677 the cdr part is used for encoding a text to be sent to a process.");
6678   Vdefault_process_coding_system = Qnil;
6679
6680   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
6681     "Table of extra Latin codes in the range 128..159 (inclusive).\n\
6682 This is a vector of length 256.\n\
6683 If Nth element is non-nil, the existence of code N in a file\n\
6684 \(or output of subprocess) doesn't prevent it to be detected as\n\
6685 a coding system of ISO 2022 variant which has a flag\n\
6686 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file\n\
6687 or reading output of a subprocess.\n\
6688 Only 128th through 159th elements has a meaning.");
6689   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
6690
6691   DEFVAR_LISP ("select-safe-coding-system-function",
6692                &Vselect_safe_coding_system_function,
6693     "Function to call to select safe coding system for encoding a text.\n\
6694 \n\
6695 If set, this function is called to force a user to select a proper\n\
6696 coding system which can encode the text in the case that a default\n\
6697 coding system used in each operation can't encode the text.\n\
6698 \n\
6699 The default value is `select-safe-coding-system' (which see).");
6700   Vselect_safe_coding_system_function = Qnil;
6701
6702   DEFVAR_LISP ("char-coding-system-table", &Vchar_coding_system_table,
6703     "Char-table containing safe coding systems of each characters.\n\
6704 Each element doesn't include such generic coding systems that can\n\
6705 encode any characters.   They are in the first extra slot.");
6706   Vchar_coding_system_table = Fmake_char_table (Qchar_coding_system, Qnil);
6707
6708   DEFVAR_BOOL ("inhibit-iso-escape-detection",
6709                &inhibit_iso_escape_detection,
6710     "If non-nil, Emacs ignores ISO2022's escape sequence on code detection.\n\
6711 \n\
6712 By default, on reading a file, Emacs tries to detect how the text is\n\
6713 encoded.  This code detection is sensitive to escape sequences.  If\n\
6714 the sequence is valid as ISO2022, the code is determined as one of\n\
6715 the ISO2022 encodings, and the file is decoded by the corresponding\n\
6716 coding system (e.g. `iso-2022-7bit').\n\
6717 \n\
6718 However, there may be a case that you want to read escape sequences in\n\
6719 a file as is.  In such a case, you can set this variable to non-nil.\n\
6720 Then, as the code detection ignores any escape sequences, no file is\n\
6721 detected as encoded in some ISO2022 encoding.  The result is that all\n\
6722 escape sequences become visible in a buffer.\n\
6723 \n\
6724 The default value is nil, and it is strongly recommended not to change\n\
6725 it.  That is because many Emacs Lisp source files that contain\n\
6726 non-ASCII characters are encoded by the coding system `iso-2022-7bit'\n\
6727 in Emacs's distribution, and they won't be decoded correctly on\n\
6728 reading if you suppress escape sequence detection.\n\
6729 \n\
6730 The other way to read escape sequences in a file without decoding is\n\
6731 to explicitly specify some coding system that doesn't use ISO2022's\n\
6732 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].");
6733   inhibit_iso_escape_detection = 0;
6734 }
6735
6736 char *
6737 emacs_strerror (error_number)
6738      int error_number;
6739 {
6740   char *str;
6741
6742   synchronize_system_messages_locale ();
6743   str = strerror (error_number);
6744
6745   if (! NILP (Vlocale_coding_system))
6746     {
6747       Lisp_Object dec = code_convert_string_norecord (build_string (str),
6748                                                       Vlocale_coding_system,
6749                                                       0);
6750       str = (char *) XSTRING (dec)->data;
6751     }
6752
6753   return str;
6754 }
6755
6756 #endif /* emacs */
6757