src/coding.c

   1 /* Coding system handler (conversion, detection, and etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /*** TABLE OF CONTENTS ***
  23
  24   0. General comments
  25   1. Preamble
  26   2. Emacs' internal format (emacs-mule) handlers
  27   3. ISO2022 handlers
  28   4. Shift-JIS and BIG5 handlers
  29   5. CCL handlers
  30   6. End-of-line handlers
  31   7. C library functions
  32   8. Emacs Lisp library functions
  33   9. Post-amble
  34
  35 */
  36
  37 /*** 0. General comments ***/
  38
  39
  40 /*** GENERAL NOTE on CODING SYSTEM ***
  41
  42   Coding system is an encoding mechanism of one or more character
  43   sets.  Here's a list of coding systems which Emacs can handle.  When
  44   we say "decode", it means converting some other coding system to
  45   Emacs' internal format (emacs-internal), and when we say "encode",
  46   it means converting the coding system emacs-mule to some other
  47   coding system.
  48
  49   0. Emacs' internal format (emacs-mule)
  50
  51   Emacs itself holds a multi-lingual character in a buffer and a string
  52   in a special format.  Details are described in section 2.
  53
  54   1. ISO2022
  55
  56   The most famous coding system for multiple character sets.  X's
  57   Compound Text, various EUCs (Extended Unix Code), and coding
  58   systems used in Internet communication such as ISO-2022-JP are
  59   all variants of ISO2022.  Details are described in section 3.
  60
  61   2. SJIS (or Shift-JIS or MS-Kanji-Code)
  62
  63   A coding system to encode character sets: ASCII, JISX0201, and
  64   JISX0208.  Widely used for PC's in Japan.  Details are described in
  65   section 4.
  66
  67   3. BIG5
  68
  69   A coding system to encode character sets: ASCII and Big5.  Widely
  70   used by Chinese (mainly in Taiwan and Hong Kong).  Details are
  71   described in section 4.  In this file, when we write "BIG5"
  72   (all uppercase), we mean the coding system, and when we write
  73   "Big5" (capitalized), we mean the character set.
  74
  75   4. Raw text
  76
  77   A coding system for a text containing random 8-bit code.  Emacs does
  78   no code conversion on such a text except for end-of-line format.
  79
  80   5. Other
  81
  82   If a user wants to read/write a text encoded in a coding system not
  83   listed above, he can supply a decoder and an encoder for it in CCL
  84   (Code Conversion Language) programs.  Emacs executes the CCL program
  85   while reading/writing.
  86
  87   Emacs represents a coding system by a Lisp symbol that has a property
  88   `coding-system'.  But, before actually using the coding system, the
  89   information about it is set in a structure of type `struct
  90   coding_system' for rapid processing.  See section 6 for more details.
  91
  92 */
  93
  94 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
  95
  96   How end-of-line of a text is encoded depends on a system.  For
  97   instance, Unix's format is just one byte of `line-feed' code,
  98   whereas DOS's format is two-byte sequence of `carriage-return' and
  99   `line-feed' codes.  MacOS's format is usually one byte of
 100   `carriage-return'.
 101
 102   Since text characters encoding and end-of-line encoding are
 103   independent, any coding system described above can take
 104   any format of end-of-line.  So, Emacs has information of format of
 105   end-of-line in each coding-system.  See section 6 for more details.
 106
 107 */
 108
 109 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 110
 111   These functions check if a text between SRC and SRC_END is encoded
 112   in the coding system category XXX.  Each returns an integer value in
 113   which appropriate flag bits for the category XXX is set.  The flag
 114   bits are defined in macros CODING_CATEGORY_MASK_XXX.  Below is the
 115   template of these functions.  */
 116 #if 0
 117 int
 118 detect_coding_emacs_mule (src, src_end)
 119      unsigned char *src, *src_end;
 120 {
 121   ...
 122 }
 123 #endif
 124
 125 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 126
 127   These functions decode SRC_BYTES length of unibyte text at SOURCE
 128   encoded in CODING to Emacs' internal format.  The resulting
 129   multibyte text goes to a place pointed to by DESTINATION, the length
 130   of which should not exceed DST_BYTES.
 131
 132   These functions set the information of original and decoded texts in
 133   the members produced, produced_char, consumed, and consumed_char of
 134   the structure *CODING.  They also set the member result to one of
 135   CODING_FINISH_XXX indicating how the decoding finished.
 136
 137   DST_BYTES zero means that source area and destination area are
 138   overlapped, which means that we can produce a decoded text until it
 139   reaches at the head of not-yet-decoded source text.
 140
 141   Below is a template of these functions.  */
 142 #if 0
 143 static void
 144 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 145      struct coding_system *coding;
 146      unsigned char *source, *destination;
 147      int src_bytes, dst_bytes;
 148 {
 149   ...
 150 }
 151 #endif
 152
 153 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 154
 155   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 156   internal multibyte format to CODING.  The resulting unibyte text
 157   goes to a place pointed to by DESTINATION, the length of which
 158   should not exceed DST_BYTES.
 159
 160   These functions set the information of original and encoded texts in
 161   the members produced, produced_char, consumed, and consumed_char of
 162   the structure *CODING.  They also set the member result to one of
 163   CODING_FINISH_XXX indicating how the encoding finished.
 164
 165   DST_BYTES zero means that source area and destination area are
 166   overlapped, which means that we can produce a encoded text until it
 167   reaches at the head of not-yet-encoded source text.
 168
 169   Below is a template of these functions.  */
 170 #if 0
 171 static void
 172 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 173      struct coding_system *coding;
 174      unsigned char *source, *destination;
 175      int src_bytes, dst_bytes;
 176 {
 177   ...
 178 }
 179 #endif
 180
 181 /*** COMMONLY USED MACROS ***/
 182
 183 /* The following two macros ONE_MORE_BYTE and TWO_MORE_BYTES safely
 184    get one, two, and three bytes from the source text respectively.
 185    If there are not enough bytes in the source, they jump to
 186    `label_end_of_loop'.  The caller should set variables `coding',
 187    `src' and `src_end' to appropriate pointer in advance.  These
 188    macros are called from decoding routines `decode_coding_XXX', thus
 189    it is assumed that the source text is unibyte.  */
 190
 191 #define ONE_MORE_BYTE(c1)                                       \
 192   do {                                                          \
 193     if (src >= src_end)                                         \
 194       {                                                         \
 195         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 196         goto label_end_of_loop;                                 \
 197       }                                                         \
 198     c1 = *src++;                                                \
 199   } while (0)
 200
 201 #define TWO_MORE_BYTES(c1, c2)                                  \
 202   do {                                                          \
 203     if (src + 1 >= src_end)                                     \
 204       {                                                         \
 205         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 206         goto label_end_of_loop;                                 \
 207       }                                                         \
 208     c1 = *src++;                                                \
 209     c2 = *src++;                                                \
 210   } while (0)
 211
 212
 213 /* Set C to the next character at the source text pointed by `src'.
 214    If there are not enough characters in the source, jump to
 215    `label_end_of_loop'.  The caller should set variables `coding'
 216    `src', `src_end', and `translation_table' to appropriate pointers
 217    in advance.  This macro is used in encoding routines
 218    `encode_coding_XXX', thus it assumes that the source text is in
 219    multibyte form except for 8-bit characters.  8-bit characters are
 220    in multibyte form if coding->src_multibyte is nonzero, else they
 221    are represented by a single byte.  */
 222
 223 #define ONE_MORE_CHAR(c)                                        \
 224   do {                                                          \
 225     int len = src_end - src;                                    \
 226     int bytes;                                                  \
 227     if (len <= 0)                                               \
 228       {                                                         \
 229         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 230         goto label_end_of_loop;                                 \
 231       }                                                         \
 232     if (coding->src_multibyte                                   \
 233         || UNIBYTE_STR_AS_MULTIBYTE_P (src, len, bytes))        \
 234       c = STRING_CHAR_AND_LENGTH (src, len, bytes);             \
 235     else                                                        \
 236       c = *src, bytes = 1;                                      \
 237     if (!NILP (translation_table))                              \
 238       c = translate_char (translation_table, c, 0, 0, 0);       \
 239     src += bytes;                                               \
 240   } while (0)
 241
 242
 243 /* Produce a multibyte form of characater C to `dst'.  Jump to
 244    `label_end_of_loop' if there's not enough space at `dst'.
 245
 246    If we are now in the middle of composition sequence, the decoded
 247    character may be ALTCHAR (for the current composition).  In that
 248    case, the character goes to coding->cmp_data->data instead of
 249    `dst'.
 250
 251    This macro is used in decoding routines.  */
 252
 253 #define EMIT_CHAR(c)                                                    \
 254   do {                                                                  \
 255     if (! COMPOSING_P (coding)                                          \
 256         || coding->composing == COMPOSITION_RELATIVE                    \
 257         || coding->composing == COMPOSITION_WITH_RULE)                  \
 258       {                                                                 \
 259         int bytes = CHAR_BYTES (c);                                     \
 260         if ((dst + bytes) > (dst_bytes ? dst_end : src))                \
 261           {                                                             \
 262             coding->result = CODING_FINISH_INSUFFICIENT_DST;            \
 263             goto label_end_of_loop;                                     \
 264           }                                                             \
 265         dst += CHAR_STRING (c, dst);                                    \
 266         coding->produced_char++;                                        \
 267       }                                                                 \
 268                                                                         \
 269     if (COMPOSING_P (coding)                                            \
 270         && coding->composing != COMPOSITION_RELATIVE)                   \
 271       {                                                                 \
 272         CODING_ADD_COMPOSITION_COMPONENT (coding, c);                   \
 273         coding->composition_rule_follows                                \
 274           = coding->composing != COMPOSITION_WITH_ALTCHARS;             \
 275       }                                                                 \
 276   } while (0)
 277
 278
 279 #define EMIT_ONE_BYTE(c)                                        \
 280   do {                                                          \
 281     if (dst >= (dst_bytes ? dst_end : src))                     \
 282       {                                                         \
 283         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 284         goto label_end_of_loop;                                 \
 285       }                                                         \
 286     *dst++ = c;                                                 \
 287   } while (0)
 288
 289 #define EMIT_TWO_BYTES(c1, c2)                                  \
 290   do {                                                          \
 291     if (dst + 2 > (dst_bytes ? dst_end : src))                  \
 292       {                                                         \
 293         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 294         goto label_end_of_loop;                                 \
 295       }                                                         \
 296     *dst++ = c1, *dst++ = c2;                                   \
 297   } while (0)
 298
 299 #define EMIT_BYTES(from, to)                                    \
 300   do {                                                          \
 301     if (dst + (to - from) > (dst_bytes ? dst_end : src))        \
 302       {                                                         \
 303         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 304         goto label_end_of_loop;                                 \
 305       }                                                         \
 306     while (from < to)                                           \
 307       *dst++ = *from++;                                         \
 308   } while (0)
 309
 310 \f
 311 /*** 1. Preamble ***/
 312
 313 #ifdef emacs
 314 #include <config.h>
 315 #endif
 316
 317 #include <stdio.h>
 318
 319 #ifdef emacs
 320
 321 #include "lisp.h"
 322 #include "buffer.h"
 323 #include "charset.h"
 324 #include "composite.h"
 325 #include "ccl.h"
 326 #include "coding.h"
 327 #include "window.h"
 328
 329 #else  /* not emacs */
 330
 331 #include "mulelib.h"
 332
 333 #endif /* not emacs */
 334
 335 Lisp_Object Qcoding_system, Qeol_type;
 336 Lisp_Object Qbuffer_file_coding_system;
 337 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 338 Lisp_Object Qno_conversion, Qundecided;
 339 Lisp_Object Qcoding_system_history;
 340 Lisp_Object Qsafe_chars;
 341 Lisp_Object Qvalid_codes;
 342
 343 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 344 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 345 Lisp_Object Qstart_process, Qopen_network_stream;
 346 Lisp_Object Qtarget_idx;
 347
 348 Lisp_Object Vselect_safe_coding_system_function;
 349
 350 /* Mnemonic string for each format of end-of-line.  */
 351 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 352 /* Mnemonic string to indicate format of end-of-line is not yet
 353    decided.  */
 354 Lisp_Object eol_mnemonic_undecided;
 355
 356 /* Format of end-of-line decided by system.  This is CODING_EOL_LF on
 357    Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.  */
 358 int system_eol_type;
 359
 360 #ifdef emacs
 361
 362 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 363
 364 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 365
 366 /* Coding system emacs-mule and raw-text are for converting only
 367    end-of-line format.  */
 368 Lisp_Object Qemacs_mule, Qraw_text;
 369
 370 /* Coding-systems are handed between Emacs Lisp programs and C internal
 371    routines by the following three variables.  */
 372 /* Coding-system for reading files and receiving data from process.  */
 373 Lisp_Object Vcoding_system_for_read;
 374 /* Coding-system for writing files and sending data to process.  */
 375 Lisp_Object Vcoding_system_for_write;
 376 /* Coding-system actually used in the latest I/O.  */
 377 Lisp_Object Vlast_coding_system_used;
 378
 379 /* A vector of length 256 which contains information about special
 380    Latin codes (especially for dealing with Microsoft codes).  */
 381 Lisp_Object Vlatin_extra_code_table;
 382
 383 /* Flag to inhibit code conversion of end-of-line format.  */
 384 int inhibit_eol_conversion;
 385
 386 /* Flag to inhibit ISO2022 escape sequence detection.  */
 387 int inhibit_iso_escape_detection;
 388
 389 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 390 int inherit_process_coding_system;
 391
 392 /* Coding system to be used to encode text for terminal display.  */
 393 struct coding_system terminal_coding;
 394
 395 /* Coding system to be used to encode text for terminal display when
 396    terminal coding system is nil.  */
 397 struct coding_system safe_terminal_coding;
 398
 399 /* Coding system of what is sent from terminal keyboard.  */
 400 struct coding_system keyboard_coding;
 401
 402 /* Default coding system to be used to write a file.  */
 403 struct coding_system default_buffer_file_coding;
 404
 405 Lisp_Object Vfile_coding_system_alist;
 406 Lisp_Object Vprocess_coding_system_alist;
 407 Lisp_Object Vnetwork_coding_system_alist;
 408
 409 Lisp_Object Vlocale_coding_system;
 410
 411 #endif /* emacs */
 412
 413 Lisp_Object Qcoding_category, Qcoding_category_index;
 414
 415 /* List of symbols `coding-category-xxx' ordered by priority.  */
 416 Lisp_Object Vcoding_category_list;
 417
 418 /* Table of coding categories (Lisp symbols).  */
 419 Lisp_Object Vcoding_category_table;
 420
 421 /* Table of names of symbol for each coding-category.  */
 422 char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
 423   "coding-category-emacs-mule",
 424   "coding-category-sjis",
 425   "coding-category-iso-7",
 426   "coding-category-iso-7-tight",
 427   "coding-category-iso-8-1",
 428   "coding-category-iso-8-2",
 429   "coding-category-iso-7-else",
 430   "coding-category-iso-8-else",
 431   "coding-category-ccl",
 432   "coding-category-big5",
 433   "coding-category-utf-8",
 434   "coding-category-utf-16-be",
 435   "coding-category-utf-16-le",
 436   "coding-category-raw-text",
 437   "coding-category-binary"
 438 };
 439
 440 /* Table of pointers to coding systems corresponding to each coding
 441    categories.  */
 442 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
 443
 444 /* Table of coding category masks.  Nth element is a mask for a coding
 445    cateogry of which priority is Nth.  */
 446 static
 447 int coding_priorities[CODING_CATEGORY_IDX_MAX];
 448
 449 /* Flag to tell if we look up translation table on character code
 450    conversion.  */
 451 Lisp_Object Venable_character_translation;
 452 /* Standard translation table to look up on decoding (reading).  */
 453 Lisp_Object Vstandard_translation_table_for_decode;
 454 /* Standard translation table to look up on encoding (writing).  */
 455 Lisp_Object Vstandard_translation_table_for_encode;
 456
 457 Lisp_Object Qtranslation_table;
 458 Lisp_Object Qtranslation_table_id;
 459 Lisp_Object Qtranslation_table_for_decode;
 460 Lisp_Object Qtranslation_table_for_encode;
 461
 462 /* Alist of charsets vs revision number.  */
 463 Lisp_Object Vcharset_revision_alist;
 464
 465 /* Default coding systems used for process I/O.  */
 466 Lisp_Object Vdefault_process_coding_system;
 467
 468 /* Global flag to tell that we can't call post-read-conversion and
 469    pre-write-conversion functions.  Usually the value is zero, but it
 470    is set to 1 temporarily while such functions are running.  This is
 471    to avoid infinite recursive call.  */
 472 static int inhibit_pre_post_conversion;
 473
 474 /* Char-table containing safe coding systems of each character.  */
 475 Lisp_Object Vchar_coding_system_table;
 476 Lisp_Object Qchar_coding_system;
 477
 478 /* Return `safe-chars' property of coding system CODING.  Don't check
 479    validity of CODING.  */
 480
 481 Lisp_Object
 482 coding_safe_chars (coding)
 483      struct coding_system *coding;
 484 {
 485   Lisp_Object coding_spec, plist, safe_chars;
 486
 487   coding_spec = Fget (coding->symbol, Qcoding_system);
 488   plist = XVECTOR (coding_spec)->contents[3];
 489   safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars);
 490   return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt);
 491 }
 492
 493 #define CODING_SAFE_CHAR_P(safe_chars, c) \
 494   (EQ (safe_chars, Qt) || !NILP (CHAR_TABLE_REF (safe_chars, c)))
 495
 496 \f
 497 /*** 2. Emacs internal format (emacs-mule) handlers ***/
 498
 499 /* Emacs' internal format for encoding multiple character sets is a
 500    kind of multi-byte encoding, i.e. characters are encoded by
 501    variable-length sequences of one-byte codes.
 502
 503    ASCII characters and control characters (e.g. `tab', `newline') are
 504    represented by one-byte sequences which are their ASCII codes, in
 505    the range 0x00 through 0x7F.
 506
 507    8-bit characters of the range 0x80..0x9F are represented by
 508    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
 509    code + 0x20).
 510
 511    8-bit characters of the range 0xA0..0xFF are represented by
 512    one-byte sequences which are their 8-bit code.
 513
 514    The other characters are represented by a sequence of `base
 515    leading-code', optional `extended leading-code', and one or two
 516    `position-code's.  The length of the sequence is determined by the
 517    base leading-code.  Leading-code takes the range 0x80 through 0x9F,
 518    whereas extended leading-code and position-code take the range 0xA0
 519    through 0xFF.  See `charset.h' for more details about leading-code
 520    and position-code.
 521
 522    --- CODE RANGE of Emacs' internal format ---
 523    character set        range
 524    -------------        -----
 525    ascii                0x00..0x7F
 526    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
 527    eight-bit-graphic    0xA0..0xBF
 528    ELSE                 0x81..0x9F + [0xA0..0xFF]+
 529    ---------------------------------------------
 530
 531   */
 532
 533 enum emacs_code_class_type emacs_code_class[256];
 534
 535 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 536    Check if a text is encoded in Emacs' internal format.  If it is,
 537    return CODING_CATEGORY_MASK_EMACS_MULE, else return 0.  */
 538
 539 int
 540 detect_coding_emacs_mule (src, src_end)
 541       unsigned char *src, *src_end;
 542 {
 543   unsigned char c;
 544   int composing = 0;
 545   /* Dummy for ONE_MORE_BYTE.  */
 546   struct coding_system dummy_coding;
 547   struct coding_system *coding = &dummy_coding;
 548
 549   while (1)
 550     {
 551       ONE_MORE_BYTE (c);
 552
 553       if (composing)
 554         {
 555           if (c < 0xA0)
 556             composing = 0;
 557           else if (c == 0xA0)
 558             {
 559               ONE_MORE_BYTE (c);
 560               c &= 0x7F;
 561             }
 562           else
 563             c -= 0x20;
 564         }
 565
 566       if (c < 0x20)
 567         {
 568           if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
 569             return 0;
 570         }
 571       else if (c >= 0x80 && c < 0xA0)
 572         {
 573           if (c == 0x80)
 574             /* Old leading code for a composite character.  */
 575             composing = 1;
 576           else
 577             {
 578               unsigned char *src_base = src - 1;
 579               int bytes;
 580
 581               if (!UNIBYTE_STR_AS_MULTIBYTE_P (src_base, src_end - src_base,
 582                                                bytes))
 583                 return 0;
 584               src = src_base + bytes;
 585             }
 586         }
 587     }
 588  label_end_of_loop:
 589   return CODING_CATEGORY_MASK_EMACS_MULE;
 590 }
 591
 592
 593 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
 594
 595 static void
 596 decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
 597      struct coding_system *coding;
 598      unsigned char *source, *destination;
 599      int src_bytes, dst_bytes;
 600 {
 601   unsigned char *src = source;
 602   unsigned char *src_end = source + src_bytes;
 603   unsigned char *dst = destination;
 604   unsigned char *dst_end = destination + dst_bytes;
 605   /* SRC_BASE remembers the start position in source in each loop.
 606      The loop will be exited when there's not enough source code, or
 607      when there's not enough destination area to produce a
 608      character.  */
 609   unsigned char *src_base;
 610
 611   coding->produced_char = 0;
 612   while ((src_base = src) < src_end)
 613     {
 614       unsigned char tmp[MAX_MULTIBYTE_LENGTH], *p;
 615       int bytes;
 616
 617       if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
 618         {
 619           p = src;
 620           src += bytes;
 621         }
 622       else
 623         {
 624           bytes = CHAR_STRING (*src, tmp);
 625           p = tmp;
 626           src++;
 627         }
 628       if (dst + bytes >= (dst_bytes ? dst_end : src))
 629         {
 630           coding->result = CODING_FINISH_INSUFFICIENT_DST;
 631           break;
 632         }
 633       while (bytes--) *dst++ = *p++;
 634       coding->produced_char++;
 635     }
 636   coding->consumed = coding->consumed_char = src_base - source;
 637   coding->produced = dst - destination;
 638 }
 639
 640 #define encode_coding_emacs_mule(coding, source, destination, src_bytes, dst_bytes) \
 641   encode_eol (coding, source, destination, src_bytes, dst_bytes)
 642
 643
 644 \f
 645 /*** 3. ISO2022 handlers ***/
 646
 647 /* The following note describes the coding system ISO2022 briefly.
 648    Since the intention of this note is to help understand the
 649    functions in this file, some parts are NOT ACCURATE or OVERLY
 650    SIMPLIFIED.  For thorough understanding, please refer to the
 651    original document of ISO2022.
 652
 653    ISO2022 provides many mechanisms to encode several character sets
 654    in 7-bit and 8-bit environments.  For 7-bite environments, all text
 655    is encoded using bytes less than 128.  This may make the encoded
 656    text a little bit longer, but the text passes more easily through
 657    several gateways, some of which strip off MSB (Most Signigant Bit).
 658
 659    There are two kinds of character sets: control character set and
 660    graphic character set.  The former contains control characters such
 661    as `newline' and `escape' to provide control functions (control
 662    functions are also provided by escape sequences).  The latter
 663    contains graphic characters such as 'A' and '-'.  Emacs recognizes
 664    two control character sets and many graphic character sets.
 665
 666    Graphic character sets are classified into one of the following
 667    four classes, according to the number of bytes (DIMENSION) and
 668    number of characters in one dimension (CHARS) of the set:
 669    - DIMENSION1_CHARS94
 670    - DIMENSION1_CHARS96
 671    - DIMENSION2_CHARS94
 672    - DIMENSION2_CHARS96
 673
 674    In addition, each character set is assigned an identification tag,
 675    unique for each set, called "final character" (denoted as <F>
 676    hereafter).  The <F> of each character set is decided by ECMA(*)
 677    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
 678    (0x30..0x3F are for private use only).
 679
 680    Note (*): ECMA = European Computer Manufacturers Association
 681
 682    Here are examples of graphic character set [NAME(<F>)]:
 683         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
 684         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
 685         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
 686         o DIMENSION2_CHARS96 -- none for the moment
 687
 688    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
 689         C0 [0x00..0x1F] -- control character plane 0
 690         GL [0x20..0x7F] -- graphic character plane 0
 691         C1 [0x80..0x9F] -- control character plane 1
 692         GR [0xA0..0xFF] -- graphic character plane 1
 693
 694    A control character set is directly designated and invoked to C0 or
 695    C1 by an escape sequence.  The most common case is that:
 696    - ISO646's  control character set is designated/invoked to C0, and
 697    - ISO6429's control character set is designated/invoked to C1,
 698    and usually these designations/invocations are omitted in encoded
 699    text.  In a 7-bit environment, only C0 can be used, and a control
 700    character for C1 is encoded by an appropriate escape sequence to
 701    fit into the environment.  All control characters for C1 are
 702    defined to have corresponding escape sequences.
 703
 704    A graphic character set is at first designated to one of four
 705    graphic registers (G0 through G3), then these graphic registers are
 706    invoked to GL or GR.  These designations and invocations can be
 707    done independently.  The most common case is that G0 is invoked to
 708    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
 709    these invocations and designations are omitted in encoded text.
 710    In a 7-bit environment, only GL can be used.
 711
 712    When a graphic character set of CHARS94 is invoked to GL, codes
 713    0x20 and 0x7F of the GL area work as control characters SPACE and
 714    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
 715    be used.
 716
 717    There are two ways of invocation: locking-shift and single-shift.
 718    With locking-shift, the invocation lasts until the next different
 719    invocation, whereas with single-shift, the invocation affects the
 720    following character only and doesn't affect the locking-shift
 721    state.  Invocations are done by the following control characters or
 722    escape sequences:
 723
 724    ----------------------------------------------------------------------
 725    abbrev  function                  cntrl escape seq   description
 726    ----------------------------------------------------------------------
 727    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
 728    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
 729    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
 730    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
 731    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
 732    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
 733    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
 734    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
 735    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
 736    ----------------------------------------------------------------------
 737    (*) These are not used by any known coding system.
 738
 739    Control characters for these functions are defined by macros
 740    ISO_CODE_XXX in `coding.h'.
 741
 742    Designations are done by the following escape sequences:
 743    ----------------------------------------------------------------------
 744    escape sequence      description
 745    ----------------------------------------------------------------------
 746    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
 747    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
 748    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
 749    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
 750    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
 751    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
 752    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
 753    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
 754    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
 755    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
 756    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
 757    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
 758    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
 759    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
 760    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
 761    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
 762    ----------------------------------------------------------------------
 763
 764    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
 765    of dimension 1, chars 94, and final character <F>, etc...
 766
 767    Note (*): Although these designations are not allowed in ISO2022,
 768    Emacs accepts them on decoding, and produces them on encoding
 769    CHARS96 character sets in a coding system which is characterized as
 770    7-bit environment, non-locking-shift, and non-single-shift.
 771
 772    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
 773    '(' can be omitted.  We refer to this as "short-form" hereafter.
 774
 775    Now you may notice that there are a lot of ways for encoding the
 776    same multilingual text in ISO2022.  Actually, there exist many
 777    coding systems such as Compound Text (used in X11's inter client
 778    communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR
 779    (used in Korean internet), EUC (Extended UNIX Code, used in Asian
 780    localized platforms), and all of these are variants of ISO2022.
 781
 782    In addition to the above, Emacs handles two more kinds of escape
 783    sequences: ISO6429's direction specification and Emacs' private
 784    sequence for specifying character composition.
 785
 786    ISO6429's direction specification takes the following form:
 787         o CSI ']'      -- end of the current direction
 788         o CSI '0' ']'  -- end of the current direction
 789         o CSI '1' ']'  -- start of left-to-right text
 790         o CSI '2' ']'  -- start of right-to-left text
 791    The control character CSI (0x9B: control sequence introducer) is
 792    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
 793
 794    Character composition specification takes the following form:
 795         o ESC '0' -- start relative composition
 796         o ESC '1' -- end composition
 797         o ESC '2' -- start rule-base composition (*)
 798         o ESC '3' -- start relative composition with alternate chars  (**)
 799         o ESC '4' -- start rule-base composition with alternate chars  (**)
 800   Since these are not standard escape sequences of any ISO standard,
 801   the use of them for these meaning is restricted to Emacs only.
 802
 803   (*) This form is used only in Emacs 20.5 and the older versions,
 804   but the newer versions can safely decode it.
 805   (**) This form is used only in Emacs 21.1 and the newer versions,
 806   and the older versions can't decode it.
 807
 808   Here's a list of examples usages of these composition escape
 809   sequences (categorized by `enum composition_method').
 810
 811   COMPOSITION_RELATIVE:
 812         ESC 0 CHAR [ CHAR ] ESC 1
 813   COMPOSITOIN_WITH_RULE:
 814         ESC 2 CHAR [ RULE CHAR ] ESC 1
 815   COMPOSITION_WITH_ALTCHARS:
 816         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
 817   COMPOSITION_WITH_RULE_ALTCHARS:
 818         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
 819
 820 enum iso_code_class_type iso_code_class[256];
 821
 822 #define CHARSET_OK(idx, charset, c)                                     \
 823   (coding_system_table[idx]                                             \
 824    && (charset == CHARSET_ASCII                                         \
 825        || (safe_chars = coding_safe_chars (coding_system_table[idx]),   \
 826            CODING_SAFE_CHAR_P (safe_chars, c)))                         \
 827    && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding_system_table[idx], \
 828                                               charset)                  \
 829        != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
 830
 831 #define SHIFT_OUT_OK(idx) \
 832   (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
 833
 834 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 835    Check if a text is encoded in ISO2022.  If it is, returns an
 836    integer in which appropriate flag bits any of:
 837         CODING_CATEGORY_MASK_ISO_7
 838         CODING_CATEGORY_MASK_ISO_7_TIGHT
 839         CODING_CATEGORY_MASK_ISO_8_1
 840         CODING_CATEGORY_MASK_ISO_8_2
 841         CODING_CATEGORY_MASK_ISO_7_ELSE
 842         CODING_CATEGORY_MASK_ISO_8_ELSE
 843    are set.  If a code which should never appear in ISO2022 is found,
 844    returns 0.  */
 845
 846 int
 847 detect_coding_iso2022 (src, src_end)
 848      unsigned char *src, *src_end;
 849 {
 850   int mask = CODING_CATEGORY_MASK_ISO;
 851   int mask_found = 0;
 852   int reg[4], shift_out = 0, single_shifting = 0;
 853   int c, c1, i, charset;
 854   /* Dummy for ONE_MORE_BYTE.  */
 855   struct coding_system dummy_coding;
 856   struct coding_system *coding = &dummy_coding;
 857   Lisp_Object safe_chars;
 858
 859   reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
 860   while (mask && src < src_end)
 861     {
 862       ONE_MORE_BYTE (c);
 863       switch (c)
 864         {
 865         case ISO_CODE_ESC:
 866           if (inhibit_iso_escape_detection)
 867             break;
 868           single_shifting = 0;
 869           ONE_MORE_BYTE (c);
 870           if (c >= '(' && c <= '/')
 871             {
 872               /* Designation sequence for a charset of dimension 1.  */
 873               ONE_MORE_BYTE (c1);
 874               if (c1 < ' ' || c1 >= 0x80
 875                   || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
 876                 /* Invalid designation sequence.  Just ignore.  */
 877                 break;
 878               reg[(c - '(') % 4] = charset;
 879             }
 880           else if (c == '$')
 881             {
 882               /* Designation sequence for a charset of dimension 2.  */
 883               ONE_MORE_BYTE (c);
 884               if (c >= '@' && c <= 'B')
 885                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
 886                 reg[0] = charset = iso_charset_table[1][0][c];
 887               else if (c >= '(' && c <= '/')
 888                 {
 889                   ONE_MORE_BYTE (c1);
 890                   if (c1 < ' ' || c1 >= 0x80
 891                       || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
 892                     /* Invalid designation sequence.  Just ignore.  */
 893                     break;
 894                   reg[(c - '(') % 4] = charset;
 895                 }
 896               else
 897                 /* Invalid designation sequence.  Just ignore.  */
 898                 break;
 899             }
 900           else if (c == 'N' || c == 'O')
 901             {
 902               /* ESC <Fe> for SS2 or SS3.  */
 903               mask &= CODING_CATEGORY_MASK_ISO_7_ELSE;
 904               break;
 905             }
 906           else if (c >= '0' && c <= '4')
 907             {
 908               /* ESC <Fp> for start/end composition.  */
 909               mask_found |= CODING_CATEGORY_MASK_ISO;
 910               break;
 911             }
 912           else
 913             /* Invalid escape sequence.  Just ignore.  */
 914             break;
 915
 916           /* We found a valid designation sequence for CHARSET.  */
 917           mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
 918           c = MAKE_CHAR (charset, 0, 0);
 919           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset, c))
 920             mask_found |= CODING_CATEGORY_MASK_ISO_7;
 921           else
 922             mask &= ~CODING_CATEGORY_MASK_ISO_7;
 923           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset, c))
 924             mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
 925           else
 926             mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
 927           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset, c))
 928             mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
 929           else
 930             mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
 931           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset, c))
 932             mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
 933           else
 934             mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
 935           break;
 936
 937         case ISO_CODE_SO:
 938           if (inhibit_iso_escape_detection)
 939             break;
 940           single_shifting = 0;
 941           if (shift_out == 0
 942               && (reg[1] >= 0
 943                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
 944                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
 945             {
 946               /* Locking shift out.  */
 947               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 948               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 949             }
 950           break;
 951
 952         case ISO_CODE_SI:
 953           if (inhibit_iso_escape_detection)
 954             break;
 955           single_shifting = 0;
 956           if (shift_out == 1)
 957             {
 958               /* Locking shift in.  */
 959               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 960               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 961             }
 962           break;
 963
 964         case ISO_CODE_CSI:
 965           single_shifting = 0;
 966         case ISO_CODE_SS2:
 967         case ISO_CODE_SS3:
 968           {
 969             int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
 970
 971             if (inhibit_iso_escape_detection)
 972               break;
 973             if (c != ISO_CODE_CSI)
 974               {
 975                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 976                     & CODING_FLAG_ISO_SINGLE_SHIFT)
 977                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 978                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 979                     & CODING_FLAG_ISO_SINGLE_SHIFT)
 980                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 981                 single_shifting = 1;
 982               }
 983             if (VECTORP (Vlatin_extra_code_table)
 984                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
 985               {
 986                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 987                     & CODING_FLAG_ISO_LATIN_EXTRA)
 988                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 989                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 990                     & CODING_FLAG_ISO_LATIN_EXTRA)
 991                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 992               }
 993             mask &= newmask;
 994             mask_found |= newmask;
 995           }
 996           break;
 997
 998         default:
 999           if (c < 0x80)
1000             {
1001               single_shifting = 0;
1002               break;
1003             }
1004           else if (c < 0xA0)
1005             {
1006               single_shifting = 0;
1007               if (VECTORP (Vlatin_extra_code_table)
1008                   && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
1009                 {
1010                   int newmask = 0;
1011
1012                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1013                       & CODING_FLAG_ISO_LATIN_EXTRA)
1014                     newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1015                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1016                       & CODING_FLAG_ISO_LATIN_EXTRA)
1017                     newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1018                   mask &= newmask;
1019                   mask_found |= newmask;
1020                 }
1021               else
1022                 return 0;
1023             }
1024           else
1025             {
1026               mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
1027                         | CODING_CATEGORY_MASK_ISO_7_ELSE);
1028               mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
1029               /* Check the length of succeeding codes of the range
1030                  0xA0..0FF.  If the byte length is odd, we exclude
1031                  CODING_CATEGORY_MASK_ISO_8_2.  We can check this only
1032                  when we are not single shifting.  */
1033               if (!single_shifting
1034                   && mask & CODING_CATEGORY_MASK_ISO_8_2)
1035                 {
1036                   int i = 1;
1037                   while (src < src_end)
1038                     {
1039                       ONE_MORE_BYTE (c);
1040                       if (c < 0xA0)
1041                         break;
1042                       i++;
1043                     }
1044
1045                   if (i & 1 && src < src_end)
1046                     mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
1047                   else
1048                     mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
1049                 }
1050             }
1051           break;
1052         }
1053     }
1054  label_end_of_loop:
1055   return (mask & mask_found);
1056 }
1057
1058 /* Decode a character of which charset is CHARSET, the 1st position
1059    code is C1, the 2nd position code is C2, and return the decoded
1060    character code.  If the variable `translation_table' is non-nil,
1061    returned the translated code.  */
1062
1063 #define DECODE_ISO_CHARACTER(charset, c1, c2)   \
1064   (NILP (translation_table)                     \
1065    ? MAKE_CHAR (charset, c1, c2)                \
1066    : translate_char (translation_table, -1, charset, c1, c2))
1067
1068 /* Set designation state into CODING.  */
1069 #define DECODE_DESIGNATION(reg, dimension, chars, final_char)              \
1070   do {                                                                     \
1071     int charset, c;                                                        \
1072                                                                            \
1073     if (final_char < '0' || final_char >= 128)                             \
1074       goto label_invalid_code;                                             \
1075     charset = ISO_CHARSET_TABLE (make_number (dimension),                  \
1076                                  make_number (chars),                      \
1077                                  make_number (final_char));                \
1078     c = MAKE_CHAR (charset, 0, 0);                                         \
1079     if (charset >= 0                                                       \
1080         && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
1081             || CODING_SAFE_CHAR_P (safe_chars, c)))                        \
1082       {                                                                    \
1083         if (coding->spec.iso2022.last_invalid_designation_register == 0    \
1084             && reg == 0                                                    \
1085             && charset == CHARSET_ASCII)                                   \
1086           {                                                                \
1087             /* We should insert this designation sequence as is so         \
1088                that it is surely written back to a file.  */               \
1089             coding->spec.iso2022.last_invalid_designation_register = -1;   \
1090             goto label_invalid_code;                                       \
1091           }                                                                \
1092         coding->spec.iso2022.last_invalid_designation_register = -1;       \
1093         if ((coding->mode & CODING_MODE_DIRECTION)                         \
1094             && CHARSET_REVERSE_CHARSET (charset) >= 0)                     \
1095           charset = CHARSET_REVERSE_CHARSET (charset);                     \
1096         CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;               \
1097       }                                                                    \
1098     else                                                                   \
1099       {                                                                    \
1100         coding->spec.iso2022.last_invalid_designation_register = reg;      \
1101         goto label_invalid_code;                                           \
1102       }                                                                    \
1103   } while (0)
1104
1105 /* Allocate a memory block for storing information about compositions.
1106    The block is chained to the already allocated blocks.  */
1107
1108 void
1109 coding_allocate_composition_data (coding, char_offset)
1110      struct coding_system *coding;
1111      int char_offset;
1112 {
1113   struct composition_data *cmp_data
1114     = (struct composition_data *) xmalloc (sizeof *cmp_data);
1115
1116   cmp_data->char_offset = char_offset;
1117   cmp_data->used = 0;
1118   cmp_data->prev = coding->cmp_data;
1119   cmp_data->next = NULL;
1120   if (coding->cmp_data)
1121     coding->cmp_data->next = cmp_data;
1122   coding->cmp_data = cmp_data;
1123   coding->cmp_data_start = 0;
1124 }
1125
1126 /* Record the starting position START and METHOD of one composition.  */
1127
1128 #define CODING_ADD_COMPOSITION_START(coding, start, method)     \
1129   do {                                                          \
1130     struct composition_data *cmp_data = coding->cmp_data;       \
1131     int *data = cmp_data->data + cmp_data->used;                \
1132     coding->cmp_data_start = cmp_data->used;                    \
1133     data[0] = -1;                                               \
1134     data[1] = cmp_data->char_offset + start;                    \
1135     data[3] = (int) method;                                     \
1136     cmp_data->used += 4;                                        \
1137   } while (0)
1138
1139 /* Record the ending position END of the current composition.  */
1140
1141 #define CODING_ADD_COMPOSITION_END(coding, end)                 \
1142   do {                                                          \
1143     struct composition_data *cmp_data = coding->cmp_data;       \
1144     int *data = cmp_data->data + coding->cmp_data_start;        \
1145     data[0] = cmp_data->used - coding->cmp_data_start;          \
1146     data[2] = cmp_data->char_offset + end;                      \
1147   } while (0)
1148
1149 /* Record one COMPONENT (alternate character or composition rule).  */
1150
1151 #define CODING_ADD_COMPOSITION_COMPONENT(coding, component)     \
1152   (coding->cmp_data->data[coding->cmp_data->used++] = component)
1153
1154 /* Handle compositoin start sequence ESC 0, ESC 2, ESC 3, or ESC 4.  */
1155
1156 #define DECODE_COMPOSITION_START(c1)                                       \
1157   do {                                                                     \
1158     if (coding->composing == COMPOSITION_DISABLED)                         \
1159       {                                                                    \
1160         *dst++ = ISO_CODE_ESC;                                             \
1161         *dst++ = c1 & 0x7f;                                                \
1162         coding->produced_char += 2;                                        \
1163       }                                                                    \
1164     else if (!COMPOSING_P (coding))                                        \
1165       {                                                                    \
1166         /* This is surely the start of a composition.  We must be sure     \
1167            that coding->cmp_data has enough space to store the             \
1168            information about the composition.  If not, terminate the       \
1169            current decoding loop, allocate one more memory block for       \
1170            coding->cmp_data in the calller, then start the decoding        \
1171            loop again.  We can't allocate memory here directly because     \
1172            it may cause buffer/string relocation.  */                      \
1173         if (!coding->cmp_data                                              \
1174             || (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \
1175                 >= COMPOSITION_DATA_SIZE))                                 \
1176           {                                                                \
1177             coding->result = CODING_FINISH_INSUFFICIENT_CMP;               \
1178             goto label_end_of_loop;                                        \
1179           }                                                                \
1180         coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE              \
1181                              : c1 == '2' ? COMPOSITION_WITH_RULE           \
1182                              : c1 == '3' ? COMPOSITION_WITH_ALTCHARS       \
1183                              : COMPOSITION_WITH_RULE_ALTCHARS);            \
1184         CODING_ADD_COMPOSITION_START (coding, coding->produced_char,       \
1185                                       coding->composing);                  \
1186         coding->composition_rule_follows = 0;                              \
1187       }                                                                    \
1188     else                                                                   \
1189       {                                                                    \
1190         /* We are already handling a composition.  If the method is        \
1191            the following two, the codes following the current escape       \
1192            sequence are actual characters stored in a buffer.  */          \
1193         if (coding->composing == COMPOSITION_WITH_ALTCHARS                 \
1194             || coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)        \
1195           {                                                                \
1196             coding->composing = COMPOSITION_RELATIVE;                      \
1197             coding->composition_rule_follows = 0;                          \
1198           }                                                                \
1199       }                                                                    \
1200   } while (0)
1201
1202 /* Handle compositoin end sequence ESC 1.  */
1203
1204 #define DECODE_COMPOSITION_END(c1)                                      \
1205   do {                                                                  \
1206     if (coding->composing == COMPOSITION_DISABLED)                      \
1207       {                                                                 \
1208         *dst++ = ISO_CODE_ESC;                                          \
1209         *dst++ = c1;                                                    \
1210         coding->produced_char += 2;                                     \
1211       }                                                                 \
1212     else                                                                \
1213       {                                                                 \
1214         CODING_ADD_COMPOSITION_END (coding, coding->produced_char);     \
1215         coding->composing = COMPOSITION_NO;                             \
1216       }                                                                 \
1217   } while (0)
1218
1219 /* Decode a composition rule from the byte C1 (and maybe one more byte
1220    from SRC) and store one encoded composition rule in
1221    coding->cmp_data.  */
1222
1223 #define DECODE_COMPOSITION_RULE(c1)                                     \
1224   do {                                                                  \
1225     int rule = 0;                                                       \
1226     (c1) -= 32;                                                         \
1227     if (c1 < 81)                /* old format (before ver.21) */        \
1228       {                                                                 \
1229         int gref = (c1) / 9;                                            \
1230         int nref = (c1) % 9;                                            \
1231         if (gref == 4) gref = 10;                                       \
1232         if (nref == 4) nref = 10;                                       \
1233         rule = COMPOSITION_ENCODE_RULE (gref, nref);                    \
1234       }                                                                 \
1235     else if (c1 < 93)           /* new format (after ver.21) */         \
1236       {                                                                 \
1237         ONE_MORE_BYTE (c2);                                             \
1238         rule = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);              \
1239       }                                                                 \
1240     CODING_ADD_COMPOSITION_COMPONENT (coding, rule);                    \
1241     coding->composition_rule_follows = 0;                               \
1242   } while (0)
1243
1244
1245 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1246
1247 static void
1248 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1249      struct coding_system *coding;
1250      unsigned char *source, *destination;
1251      int src_bytes, dst_bytes;
1252 {
1253   unsigned char *src = source;
1254   unsigned char *src_end = source + src_bytes;
1255   unsigned char *dst = destination;
1256   unsigned char *dst_end = destination + dst_bytes;
1257   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
1258   int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1259   int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1260   /* SRC_BASE remembers the start position in source in each loop.
1261      The loop will be exited when there's not enough source code
1262      (within macro ONE_MORE_BYTE), or when there's not enough
1263      destination area to produce a character (within macro
1264      EMIT_CHAR).  */
1265   unsigned char *src_base;
1266   int c, charset;
1267   Lisp_Object translation_table;
1268   Lisp_Object safe_chars;
1269
1270   safe_chars = coding_safe_chars (coding);
1271
1272   if (NILP (Venable_character_translation))
1273     translation_table = Qnil;
1274   else
1275     {
1276       translation_table = coding->translation_table_for_decode;
1277       if (NILP (translation_table))
1278         translation_table = Vstandard_translation_table_for_decode;
1279     }
1280
1281   coding->result = CODING_FINISH_NORMAL;
1282
1283   while (1)
1284     {
1285       int c1, c2;
1286
1287       src_base = src;
1288       ONE_MORE_BYTE (c1);
1289
1290       /* We produce no character or one character.  */
1291       switch (iso_code_class [c1])
1292         {
1293         case ISO_0x20_or_0x7F:
1294           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1295             {
1296               DECODE_COMPOSITION_RULE (c1);
1297               continue;
1298             }
1299           if (charset0 < 0 || CHARSET_CHARS (charset0) == 94)
1300             {
1301               /* This is SPACE or DEL.  */
1302               charset = CHARSET_ASCII;
1303               break;
1304             }
1305           /* This is a graphic character, we fall down ...  */
1306
1307         case ISO_graphic_plane_0:
1308           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1309             {
1310               DECODE_COMPOSITION_RULE (c1);
1311               continue;
1312             }
1313           charset = charset0;
1314           break;
1315
1316         case ISO_0xA0_or_0xFF:
1317           if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1318               || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1319             goto label_invalid_code;
1320           /* This is a graphic character, we fall down ... */
1321
1322         case ISO_graphic_plane_1:
1323           if (charset1 < 0)
1324             goto label_invalid_code;
1325           charset = charset1;
1326           break;
1327
1328         case ISO_control_0:
1329           if (COMPOSING_P (coding))
1330             DECODE_COMPOSITION_END ('1');
1331
1332           /* All ISO2022 control characters in this class have the
1333              same representation in Emacs internal format.  */
1334           if (c1 == '\n'
1335               && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1336               && (coding->eol_type == CODING_EOL_CR
1337                   || coding->eol_type == CODING_EOL_CRLF))
1338             {
1339               coding->result = CODING_FINISH_INCONSISTENT_EOL;
1340               goto label_end_of_loop;
1341             }
1342           charset = CHARSET_ASCII;
1343           break;
1344
1345         case ISO_control_1:
1346           if (COMPOSING_P (coding))
1347             DECODE_COMPOSITION_END ('1');
1348           goto label_invalid_code;
1349
1350         case ISO_carriage_return:
1351           if (COMPOSING_P (coding))
1352             DECODE_COMPOSITION_END ('1');
1353
1354           if (coding->eol_type == CODING_EOL_CR)
1355             c1 = '\n';
1356           else if (coding->eol_type == CODING_EOL_CRLF)
1357             {
1358               ONE_MORE_BYTE (c1);
1359               if (c1 != ISO_CODE_LF)
1360                 {
1361                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1362                     {
1363                       coding->result = CODING_FINISH_INCONSISTENT_EOL;
1364                       goto label_end_of_loop;
1365                     }
1366                   src--;
1367                   c1 = '\r';
1368                 }
1369             }
1370           charset = CHARSET_ASCII;
1371           break;
1372
1373         case ISO_shift_out:
1374           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1375               || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1376             goto label_invalid_code;
1377           CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1378           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1379           continue;
1380
1381         case ISO_shift_in:
1382           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1383             goto label_invalid_code;
1384           CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1385           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1386           continue;
1387
1388         case ISO_single_shift_2_7:
1389         case ISO_single_shift_2:
1390           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1391             goto label_invalid_code;
1392           /* SS2 is handled as an escape sequence of ESC 'N' */
1393           c1 = 'N';
1394           goto label_escape_sequence;
1395
1396         case ISO_single_shift_3:
1397           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1398             goto label_invalid_code;
1399           /* SS2 is handled as an escape sequence of ESC 'O' */
1400           c1 = 'O';
1401           goto label_escape_sequence;
1402
1403         case ISO_control_sequence_introducer:
1404           /* CSI is handled as an escape sequence of ESC '[' ...  */
1405           c1 = '[';
1406           goto label_escape_sequence;
1407
1408         case ISO_escape:
1409           ONE_MORE_BYTE (c1);
1410         label_escape_sequence:
1411           /* Escape sequences handled by Emacs are invocation,
1412              designation, direction specification, and character
1413              composition specification.  */
1414           switch (c1)
1415             {
1416             case '&':           /* revision of following character set */
1417               ONE_MORE_BYTE (c1);
1418               if (!(c1 >= '@' && c1 <= '~'))
1419                 goto label_invalid_code;
1420               ONE_MORE_BYTE (c1);
1421               if (c1 != ISO_CODE_ESC)
1422                 goto label_invalid_code;
1423               ONE_MORE_BYTE (c1);
1424               goto label_escape_sequence;
1425
1426             case '$':           /* designation of 2-byte character set */
1427               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1428                 goto label_invalid_code;
1429               ONE_MORE_BYTE (c1);
1430               if (c1 >= '@' && c1 <= 'B')
1431                 {       /* designation of JISX0208.1978, GB2312.1980,
1432                            or JISX0208.1980 */
1433                   DECODE_DESIGNATION (0, 2, 94, c1);
1434                 }
1435               else if (c1 >= 0x28 && c1 <= 0x2B)
1436                 {       /* designation of DIMENSION2_CHARS94 character set */
1437                   ONE_MORE_BYTE (c2);
1438                   DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
1439                 }
1440               else if (c1 >= 0x2C && c1 <= 0x2F)
1441                 {       /* designation of DIMENSION2_CHARS96 character set */
1442                   ONE_MORE_BYTE (c2);
1443                   DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
1444                 }
1445               else
1446                 goto label_invalid_code;
1447               /* We must update these variables now.  */
1448               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1449               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1450               continue;
1451
1452             case 'n':           /* invocation of locking-shift-2 */
1453               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1454                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1455                 goto label_invalid_code;
1456               CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
1457               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1458               continue;
1459
1460             case 'o':           /* invocation of locking-shift-3 */
1461               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1462                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1463                 goto label_invalid_code;
1464               CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
1465               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1466               continue;
1467
1468             case 'N':           /* invocation of single-shift-2 */
1469               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1470                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1471                 goto label_invalid_code;
1472               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
1473               ONE_MORE_BYTE (c1);
1474               break;
1475
1476             case 'O':           /* invocation of single-shift-3 */
1477               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1478                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1479                 goto label_invalid_code;
1480               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
1481               ONE_MORE_BYTE (c1);
1482               break;
1483
1484             case '0': case '2': case '3': case '4': /* start composition */
1485               DECODE_COMPOSITION_START (c1);
1486               continue;
1487
1488             case '1':           /* end composition */
1489               DECODE_COMPOSITION_END (c1);
1490               continue;
1491
1492             case '[':           /* specification of direction */
1493               if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
1494                 goto label_invalid_code;
1495               /* For the moment, nested direction is not supported.
1496                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
1497                  left-to-right, and nozero means right-to-left.  */
1498               ONE_MORE_BYTE (c1);
1499               switch (c1)
1500                 {
1501                 case ']':       /* end of the current direction */
1502                   coding->mode &= ~CODING_MODE_DIRECTION;
1503
1504                 case '0':       /* end of the current direction */
1505                 case '1':       /* start of left-to-right direction */
1506                   ONE_MORE_BYTE (c1);
1507                   if (c1 == ']')
1508                     coding->mode &= ~CODING_MODE_DIRECTION;
1509                   else
1510                     goto label_invalid_code;
1511                   break;
1512
1513                 case '2':       /* start of right-to-left direction */
1514                   ONE_MORE_BYTE (c1);
1515                   if (c1 == ']')
1516                     coding->mode |= CODING_MODE_DIRECTION;
1517                   else
1518                     goto label_invalid_code;
1519                   break;
1520
1521                 default:
1522                   goto label_invalid_code;
1523                 }
1524               continue;
1525
1526             default:
1527               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1528                 goto label_invalid_code;
1529               if (c1 >= 0x28 && c1 <= 0x2B)
1530                 {       /* designation of DIMENSION1_CHARS94 character set */
1531                   ONE_MORE_BYTE (c2);
1532                   DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
1533                 }
1534               else if (c1 >= 0x2C && c1 <= 0x2F)
1535                 {       /* designation of DIMENSION1_CHARS96 character set */
1536                   ONE_MORE_BYTE (c2);
1537                   DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
1538                 }
1539               else
1540                 goto label_invalid_code;
1541               /* We must update these variables now.  */
1542               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1543               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1544               continue;
1545             }
1546         }
1547
1548       /* Now we know CHARSET and 1st position code C1 of a character.
1549          Produce a multibyte sequence for that character while getting
1550          2nd position code C2 if necessary.  */
1551       if (CHARSET_DIMENSION (charset) == 2)
1552         {
1553           ONE_MORE_BYTE (c2);
1554           if (c1 < 0x80 ? c2 < 0x20 || c2 >= 0x80 : c2 < 0xA0)
1555             /* C2 is not in a valid range.  */
1556             goto label_invalid_code;
1557         }
1558       c = DECODE_ISO_CHARACTER (charset, c1, c2);
1559       EMIT_CHAR (c);
1560       continue;
1561
1562     label_invalid_code:
1563       coding->errors++;
1564       if (COMPOSING_P (coding))
1565         DECODE_COMPOSITION_END ('1');
1566       src = src_base;
1567       c = *src++;
1568       EMIT_CHAR (c);
1569     }
1570
1571  label_end_of_loop:
1572   coding->consumed = coding->consumed_char = src_base - source;
1573   coding->produced = dst - destination;
1574   return;
1575 }
1576
1577
1578 /* ISO2022 encoding stuff.  */
1579
1580 /*
1581    It is not enough to say just "ISO2022" on encoding, we have to
1582    specify more details.  In Emacs, each coding system of ISO2022
1583    variant has the following specifications:
1584         1. Initial designation to G0 thru G3.
1585         2. Allows short-form designation?
1586         3. ASCII should be designated to G0 before control characters?
1587         4. ASCII should be designated to G0 at end of line?
1588         5. 7-bit environment or 8-bit environment?
1589         6. Use locking-shift?
1590         7. Use Single-shift?
1591    And the following two are only for Japanese:
1592         8. Use ASCII in place of JIS0201-1976-Roman?
1593         9. Use JISX0208-1983 in place of JISX0208-1978?
1594    These specifications are encoded in `coding->flags' as flag bits
1595    defined by macros CODING_FLAG_ISO_XXX.  See `coding.h' for more
1596    details.
1597 */
1598
1599 /* Produce codes (escape sequence) for designating CHARSET to graphic
1600    register REG at DST, and increment DST.  If <final-char> of CHARSET is
1601    '@', 'A', or 'B' and the coding system CODING allows, produce
1602    designation sequence of short-form.  */
1603
1604 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
1605   do {                                                                  \
1606     unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset);        \
1607     char *intermediate_char_94 = "()*+";                                \
1608     char *intermediate_char_96 = ",-./";                                \
1609     int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset);    \
1610                                                                         \
1611     if (revision < 255)                                                 \
1612       {                                                                 \
1613         *dst++ = ISO_CODE_ESC;                                          \
1614         *dst++ = '&';                                                   \
1615         *dst++ = '@' + revision;                                        \
1616       }                                                                 \
1617     *dst++ = ISO_CODE_ESC;                                              \
1618     if (CHARSET_DIMENSION (charset) == 1)                               \
1619       {                                                                 \
1620         if (CHARSET_CHARS (charset) == 94)                              \
1621           *dst++ = (unsigned char) (intermediate_char_94[reg]);         \
1622         else                                                            \
1623           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1624       }                                                                 \
1625     else                                                                \
1626       {                                                                 \
1627         *dst++ = '$';                                                   \
1628         if (CHARSET_CHARS (charset) == 94)                              \
1629           {                                                             \
1630             if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM)          \
1631                 || reg != 0                                             \
1632                 || final_char < '@' || final_char > 'B')                \
1633               *dst++ = (unsigned char) (intermediate_char_94[reg]);     \
1634           }                                                             \
1635         else                                                            \
1636           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1637       }                                                                 \
1638     *dst++ = final_char;                                                \
1639     CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;                \
1640   } while (0)
1641
1642 /* The following two macros produce codes (control character or escape
1643    sequence) for ISO2022 single-shift functions (single-shift-2 and
1644    single-shift-3).  */
1645
1646 #define ENCODE_SINGLE_SHIFT_2                           \
1647   do {                                                  \
1648     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1649       *dst++ = ISO_CODE_ESC, *dst++ = 'N';              \
1650     else                                                \
1651       *dst++ = ISO_CODE_SS2;                            \
1652     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1653   } while (0)
1654
1655 #define ENCODE_SINGLE_SHIFT_3                           \
1656   do {                                                  \
1657     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1658       *dst++ = ISO_CODE_ESC, *dst++ = 'O';              \
1659     else                                                \
1660       *dst++ = ISO_CODE_SS3;                            \
1661     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1662   } while (0)
1663
1664 /* The following four macros produce codes (control character or
1665    escape sequence) for ISO2022 locking-shift functions (shift-in,
1666    shift-out, locking-shift-2, and locking-shift-3).  */
1667
1668 #define ENCODE_SHIFT_IN                         \
1669   do {                                          \
1670     *dst++ = ISO_CODE_SI;                       \
1671     CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
1672   } while (0)
1673
1674 #define ENCODE_SHIFT_OUT                        \
1675   do {                                          \
1676     *dst++ = ISO_CODE_SO;                       \
1677     CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
1678   } while (0)
1679
1680 #define ENCODE_LOCKING_SHIFT_2                  \
1681   do {                                          \
1682     *dst++ = ISO_CODE_ESC, *dst++ = 'n';        \
1683     CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
1684   } while (0)
1685
1686 #define ENCODE_LOCKING_SHIFT_3                  \
1687   do {                                          \
1688     *dst++ = ISO_CODE_ESC, *dst++ = 'o';        \
1689     CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
1690   } while (0)
1691
1692 /* Produce codes for a DIMENSION1 character whose character set is
1693    CHARSET and whose position-code is C1.  Designation and invocation
1694    sequences are also produced in advance if necessary.  */
1695
1696 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
1697   do {                                                                  \
1698     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1699       {                                                                 \
1700         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1701           *dst++ = c1 & 0x7F;                                           \
1702         else                                                            \
1703           *dst++ = c1 | 0x80;                                           \
1704         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1705         break;                                                          \
1706       }                                                                 \
1707     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1708       {                                                                 \
1709         *dst++ = c1 & 0x7F;                                             \
1710         break;                                                          \
1711       }                                                                 \
1712     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1713       {                                                                 \
1714         *dst++ = c1 | 0x80;                                             \
1715         break;                                                          \
1716       }                                                                 \
1717     else                                                                \
1718       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1719          must invoke it, or, at first, designate it to some graphic     \
1720          register.  Then repeat the loop to actually produce the        \
1721          character.  */                                                 \
1722       dst = encode_invocation_designation (charset, coding, dst);       \
1723   } while (1)
1724
1725 /* Produce codes for a DIMENSION2 character whose character set is
1726    CHARSET and whose position-codes are C1 and C2.  Designation and
1727    invocation codes are also produced in advance if necessary.  */
1728
1729 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
1730   do {                                                                  \
1731     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1732       {                                                                 \
1733         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1734           *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F;                       \
1735         else                                                            \
1736           *dst++ = c1 | 0x80, *dst++ = c2 | 0x80;                       \
1737         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1738         break;                                                          \
1739       }                                                                 \
1740     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1741       {                                                                 \
1742         *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F;                          \
1743         break;                                                          \
1744       }                                                                 \
1745     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1746       {                                                                 \
1747         *dst++ = c1 | 0x80, *dst++= c2 | 0x80;                          \
1748         break;                                                          \
1749       }                                                                 \
1750     else                                                                \
1751       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1752          must invoke it, or, at first, designate it to some graphic     \
1753          register.  Then repeat the loop to actually produce the        \
1754          character.  */                                                 \
1755       dst = encode_invocation_designation (charset, coding, dst);       \
1756   } while (1)
1757
1758 #define ENCODE_ISO_CHARACTER(c)                                 \
1759   do {                                                          \
1760     int charset, c1, c2;                                        \
1761                                                                 \
1762     SPLIT_CHAR (c, charset, c1, c2);                            \
1763     if (CHARSET_DEFINED_P (charset))                            \
1764       {                                                         \
1765         if (CHARSET_DIMENSION (charset) == 1)                   \
1766           {                                                     \
1767             if (charset == CHARSET_ASCII                        \
1768                 && coding->flags & CODING_FLAG_ISO_USE_ROMAN)   \
1769               charset = charset_latin_jisx0201;                 \
1770             ENCODE_ISO_CHARACTER_DIMENSION1 (charset, c1);      \
1771           }                                                     \
1772         else                                                    \
1773           {                                                     \
1774             if (charset == charset_jisx0208                     \
1775                 && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)  \
1776               charset = charset_jisx0208_1978;                  \
1777             ENCODE_ISO_CHARACTER_DIMENSION2 (charset, c1, c2);  \
1778           }                                                     \
1779       }                                                         \
1780     else                                                        \
1781       {                                                         \
1782         *dst++ = c1;                                            \
1783         if (c2 >= 0)                                            \
1784           *dst++ = c2;                                          \
1785       }                                                         \
1786   } while (0)
1787
1788
1789 /* Instead of encoding character C, produce one or two `?'s.  */
1790
1791 #define ENCODE_UNSAFE_CHARACTER(c)                                      \
1792   do {                                                                  \
1793     ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION);       \
1794     if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1)                           \
1795       ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION);     \
1796   } while (0)
1797
1798
1799 /* Produce designation and invocation codes at a place pointed by DST
1800    to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
1801    Return new DST.  */
1802
1803 unsigned char *
1804 encode_invocation_designation (charset, coding, dst)
1805      int charset;
1806      struct coding_system *coding;
1807      unsigned char *dst;
1808 {
1809   int reg;                      /* graphic register number */
1810
1811   /* At first, check designations.  */
1812   for (reg = 0; reg < 4; reg++)
1813     if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
1814       break;
1815
1816   if (reg >= 4)
1817     {
1818       /* CHARSET is not yet designated to any graphic registers.  */
1819       /* At first check the requested designation.  */
1820       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1821       if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
1822         /* Since CHARSET requests no special designation, designate it
1823            to graphic register 0.  */
1824         reg = 0;
1825
1826       ENCODE_DESIGNATION (charset, reg, coding);
1827     }
1828
1829   if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
1830       && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
1831     {
1832       /* Since the graphic register REG is not invoked to any graphic
1833          planes, invoke it to graphic plane 0.  */
1834       switch (reg)
1835         {
1836         case 0:                 /* graphic register 0 */
1837           ENCODE_SHIFT_IN;
1838           break;
1839
1840         case 1:                 /* graphic register 1 */
1841           ENCODE_SHIFT_OUT;
1842           break;
1843
1844         case 2:                 /* graphic register 2 */
1845           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1846             ENCODE_SINGLE_SHIFT_2;
1847           else
1848             ENCODE_LOCKING_SHIFT_2;
1849           break;
1850
1851         case 3:                 /* graphic register 3 */
1852           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1853             ENCODE_SINGLE_SHIFT_3;
1854           else
1855             ENCODE_LOCKING_SHIFT_3;
1856           break;
1857         }
1858     }
1859
1860   return dst;
1861 }
1862
1863 /* Produce 2-byte codes for encoded composition rule RULE.  */
1864
1865 #define ENCODE_COMPOSITION_RULE(rule)           \
1866   do {                                          \
1867     int gref, nref;                             \
1868     COMPOSITION_DECODE_RULE (rule, gref, nref); \
1869     *dst++ = 32 + 81 + gref;                    \
1870     *dst++ = 32 + nref;                         \
1871   } while (0)
1872
1873 /* Produce codes for indicating the start of a composition sequence
1874    (ESC 0, ESC 3, or ESC 4).  DATA points to an array of integers
1875    which specify information about the composition.  See the comment
1876    in coding.h for the format of DATA.  */
1877
1878 #define ENCODE_COMPOSITION_START(coding, data)                          \
1879   do {                                                                  \
1880     coding->composing = data[3];                                        \
1881     *dst++ = ISO_CODE_ESC;                                              \
1882     if (coding->composing == COMPOSITION_RELATIVE)                      \
1883       *dst++ = '0';                                                     \
1884     else                                                                \
1885       {                                                                 \
1886         *dst++ = (coding->composing == COMPOSITION_WITH_ALTCHARS        \
1887                   ? '3' : '4');                                         \
1888         coding->cmp_data_index = coding->cmp_data_start + 4;            \
1889         coding->composition_rule_follows = 0;                           \
1890       }                                                                 \
1891   } while (0)
1892
1893 /* Produce codes for indicating the end of the current composition.  */
1894
1895 #define ENCODE_COMPOSITION_END(coding, data)                    \
1896   do {                                                          \
1897     *dst++ = ISO_CODE_ESC;                                      \
1898     *dst++ = '1';                                               \
1899     coding->cmp_data_start += data[0];                          \
1900     coding->composing = COMPOSITION_NO;                         \
1901     if (coding->cmp_data_start == coding->cmp_data->used        \
1902         && coding->cmp_data->next)                              \
1903       {                                                         \
1904         coding->cmp_data = coding->cmp_data->next;              \
1905         coding->cmp_data_start = 0;                             \
1906       }                                                         \
1907   } while (0)
1908
1909 /* Produce composition start sequence ESC 0.  Here, this sequence
1910    doesn't mean the start of a new composition but means that we have
1911    just produced components (alternate chars and composition rules) of
1912    the composition and the actual text follows in SRC.  */
1913
1914 #define ENCODE_COMPOSITION_FAKE_START(coding)   \
1915   do {                                          \
1916     *dst++ = ISO_CODE_ESC;                      \
1917     *dst++ = '0';                               \
1918     coding->composing = COMPOSITION_RELATIVE;   \
1919   } while (0)
1920
1921 /* The following three macros produce codes for indicating direction
1922    of text.  */
1923 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER              \
1924   do {                                                  \
1925     if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS)    \
1926       *dst++ = ISO_CODE_ESC, *dst++ = '[';              \
1927     else                                                \
1928       *dst++ = ISO_CODE_CSI;                            \
1929   } while (0)
1930
1931 #define ENCODE_DIRECTION_R2L    \
1932   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '2', *dst++ = ']'
1933
1934 #define ENCODE_DIRECTION_L2R    \
1935   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '0', *dst++ = ']'
1936
1937 /* Produce codes for designation and invocation to reset the graphic
1938    planes and registers to initial state.  */
1939 #define ENCODE_RESET_PLANE_AND_REGISTER                                     \
1940   do {                                                                      \
1941     int reg;                                                                \
1942     if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                        \
1943       ENCODE_SHIFT_IN;                                                      \
1944     for (reg = 0; reg < 4; reg++)                                           \
1945       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0            \
1946           && (CODING_SPEC_ISO_DESIGNATION (coding, reg)                     \
1947               != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)))        \
1948         ENCODE_DESIGNATION                                                  \
1949           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
1950   } while (0)
1951
1952 /* Produce designation sequences of charsets in the line started from
1953    SRC to a place pointed by DST, and return updated DST.
1954
1955    If the current block ends before any end-of-line, we may fail to
1956    find all the necessary designations.  */
1957
1958 static unsigned char *
1959 encode_designation_at_bol (coding, translation_table, src, src_end, dst)
1960      struct coding_system *coding;
1961      Lisp_Object translation_table;
1962      unsigned char *src, *src_end, *dst;
1963 {
1964   int charset, c, found = 0, reg;
1965   /* Table of charsets to be designated to each graphic register.  */
1966   int r[4];
1967
1968   for (reg = 0; reg < 4; reg++)
1969     r[reg] = -1;
1970
1971   while (found < 4)
1972     {
1973       ONE_MORE_CHAR (c);
1974       if (c == '\n')
1975         break;
1976
1977       charset = CHAR_CHARSET (c);
1978       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1979       if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
1980         {
1981           found++;
1982           r[reg] = charset;
1983         }
1984     }
1985
1986  label_end_of_loop:
1987   if (found)
1988     {
1989       for (reg = 0; reg < 4; reg++)
1990         if (r[reg] >= 0
1991             && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
1992           ENCODE_DESIGNATION (r[reg], reg, coding);
1993     }
1994
1995   return dst;
1996 }
1997
1998 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
1999
2000 static void
2001 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
2002      struct coding_system *coding;
2003      unsigned char *source, *destination;
2004      int src_bytes, dst_bytes;
2005 {
2006   unsigned char *src = source;
2007   unsigned char *src_end = source + src_bytes;
2008   unsigned char *dst = destination;
2009   unsigned char *dst_end = destination + dst_bytes;
2010   /* Since the maximum bytes produced by each loop is 20, we subtract 19
2011      from DST_END to assure overflow checking is necessary only at the
2012      head of loop.  */
2013   unsigned char *adjusted_dst_end = dst_end - 19;
2014   /* SRC_BASE remembers the start position in source in each loop.
2015      The loop will be exited when there's not enough source text to
2016      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2017      there's not enough destination area to produce encoded codes
2018      (within macro EMIT_BYTES).  */
2019   unsigned char *src_base;
2020   int c;
2021   Lisp_Object translation_table;
2022   Lisp_Object safe_chars;
2023
2024   safe_chars = coding_safe_chars (coding);
2025
2026   if (NILP (Venable_character_translation))
2027     translation_table = Qnil;
2028   else
2029     {
2030       translation_table = coding->translation_table_for_encode;
2031       if (NILP (translation_table))
2032         translation_table = Vstandard_translation_table_for_encode;
2033     }
2034
2035   coding->consumed_char = 0;
2036   coding->errors = 0;
2037   while (1)
2038     {
2039       src_base = src;
2040
2041       if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19)))
2042         {
2043           coding->result = CODING_FINISH_INSUFFICIENT_DST;
2044           break;
2045         }
2046
2047       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
2048           && CODING_SPEC_ISO_BOL (coding))
2049         {
2050           /* We have to produce designation sequences if any now.  */
2051           dst = encode_designation_at_bol (coding, translation_table,
2052                                            src, src_end, dst);
2053           CODING_SPEC_ISO_BOL (coding) = 0;
2054         }
2055
2056       /* Check composition start and end.  */
2057       if (coding->composing != COMPOSITION_DISABLED
2058           && coding->cmp_data_start < coding->cmp_data->used)
2059         {
2060           struct composition_data *cmp_data = coding->cmp_data;
2061           int *data = cmp_data->data + coding->cmp_data_start;
2062           int this_pos = cmp_data->char_offset + coding->consumed_char;
2063
2064           if (coding->composing == COMPOSITION_RELATIVE)
2065             {
2066               if (this_pos == data[2])
2067                 {
2068                   ENCODE_COMPOSITION_END (coding, data);
2069                   cmp_data = coding->cmp_data;
2070                   data = cmp_data->data + coding->cmp_data_start;
2071                 }
2072             }
2073           else if (COMPOSING_P (coding))
2074             {
2075               /* COMPOSITION_WITH_ALTCHARS or COMPOSITION_WITH_RULE_ALTCHAR  */
2076               if (coding->cmp_data_index == coding->cmp_data_start + data[0])
2077                 /* We have consumed components of the composition.
2078                    What follows in SRC is the compositions's base
2079                    text.  */
2080                 ENCODE_COMPOSITION_FAKE_START (coding);
2081               else
2082                 {
2083                   int c = cmp_data->data[coding->cmp_data_index++];
2084                   if (coding->composition_rule_follows)
2085                     {
2086                       ENCODE_COMPOSITION_RULE (c);
2087                       coding->composition_rule_follows = 0;
2088                     }
2089                   else
2090                     {
2091                       if (coding->flags & CODING_FLAG_ISO_SAFE
2092                           && ! CODING_SAFE_CHAR_P (safe_chars, c))
2093                         ENCODE_UNSAFE_CHARACTER (c);
2094                       else
2095                         ENCODE_ISO_CHARACTER (c);
2096                       if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)
2097                         coding->composition_rule_follows = 1;
2098                     }
2099                   continue;
2100                 }
2101             }
2102           if (!COMPOSING_P (coding))
2103             {
2104               if (this_pos == data[1])
2105                 {
2106                   ENCODE_COMPOSITION_START (coding, data);
2107                   continue;
2108                 }
2109             }
2110         }
2111
2112       ONE_MORE_CHAR (c);
2113
2114       /* Now encode the character C.  */
2115       if (c < 0x20 || c == 0x7F)
2116         {
2117           if (c == '\r')
2118             {
2119               if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2120                 {
2121                   if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2122                     ENCODE_RESET_PLANE_AND_REGISTER;
2123                   *dst++ = c;
2124                   continue;
2125                 }
2126               /* fall down to treat '\r' as '\n' ...  */
2127               c = '\n';
2128             }
2129           if (c == '\n')
2130             {
2131               if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
2132                 ENCODE_RESET_PLANE_AND_REGISTER;
2133               if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
2134                 bcopy (coding->spec.iso2022.initial_designation,
2135                        coding->spec.iso2022.current_designation,
2136                        sizeof coding->spec.iso2022.initial_designation);
2137               if (coding->eol_type == CODING_EOL_LF
2138                   || coding->eol_type == CODING_EOL_UNDECIDED)
2139                 *dst++ = ISO_CODE_LF;
2140               else if (coding->eol_type == CODING_EOL_CRLF)
2141                 *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
2142               else
2143                 *dst++ = ISO_CODE_CR;
2144               CODING_SPEC_ISO_BOL (coding) = 1;
2145             }
2146           else
2147             {
2148               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2149                 ENCODE_RESET_PLANE_AND_REGISTER;
2150               *dst++ = c;
2151             }
2152         }
2153       else if (ASCII_BYTE_P (c))
2154         ENCODE_ISO_CHARACTER (c);
2155       else if (SINGLE_BYTE_CHAR_P (c))
2156         {
2157           *dst++ = c;
2158           coding->errors++;
2159         }
2160       else if (coding->flags & CODING_FLAG_ISO_SAFE
2161                && ! CODING_SAFE_CHAR_P (safe_chars, c))
2162         ENCODE_UNSAFE_CHARACTER (c);
2163       else
2164         ENCODE_ISO_CHARACTER (c);
2165
2166       coding->consumed_char++;
2167     }
2168
2169  label_end_of_loop:
2170   coding->consumed = src_base - source;
2171   coding->produced = coding->produced_char = dst - destination;
2172 }
2173
2174 \f
2175 /*** 4. SJIS and BIG5 handlers ***/
2176
2177 /* Although SJIS and BIG5 are not ISO's coding system, they are used
2178    quite widely.  So, for the moment, Emacs supports them in the bare
2179    C code.  But, in the future, they may be supported only by CCL.  */
2180
2181 /* SJIS is a coding system encoding three character sets: ASCII, right
2182    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
2183    as is.  A character of charset katakana-jisx0201 is encoded by
2184    "position-code + 0x80".  A character of charset japanese-jisx0208
2185    is encoded in 2-byte but two position-codes are divided and shifted
2186    so that it fit in the range below.
2187
2188    --- CODE RANGE of SJIS ---
2189    (character set)      (range)
2190    ASCII                0x00 .. 0x7F
2191    KATAKANA-JISX0201    0xA0 .. 0xDF
2192    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
2193             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
2194    -------------------------------
2195
2196 */
2197
2198 /* BIG5 is a coding system encoding two character sets: ASCII and
2199    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
2200    character set and is encoded in two-byte.
2201
2202    --- CODE RANGE of BIG5 ---
2203    (character set)      (range)
2204    ASCII                0x00 .. 0x7F
2205    Big5 (1st byte)      0xA1 .. 0xFE
2206         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
2207    --------------------------
2208
2209    Since the number of characters in Big5 is larger than maximum
2210    characters in Emacs' charset (96x96), it can't be handled as one
2211    charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
2212    and `charset-big5-2'.  Both are DIMENSION2 and CHARS94.  The former
2213    contains frequently used characters and the latter contains less
2214    frequently used characters.  */
2215
2216 /* Macros to decode or encode a character of Big5 in BIG5.  B1 and B2
2217    are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2218    C1 and C2 are the 1st and 2nd position-codes of of Emacs' internal
2219    format.  CHARSET is `charset_big5_1' or `charset_big5_2'.  */
2220
2221 /* Number of Big5 characters which have the same code in 1st byte.  */
2222 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2223
2224 #define DECODE_BIG5(b1, b2, charset, c1, c2)                            \
2225   do {                                                                  \
2226     unsigned int temp                                                   \
2227       = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62);   \
2228     if (b1 < 0xC9)                                                      \
2229       charset = charset_big5_1;                                         \
2230     else                                                                \
2231       {                                                                 \
2232         charset = charset_big5_2;                                       \
2233         temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW;                          \
2234       }                                                                 \
2235     c1 = temp / (0xFF - 0xA1) + 0x21;                                   \
2236     c2 = temp % (0xFF - 0xA1) + 0x21;                                   \
2237   } while (0)
2238
2239 #define ENCODE_BIG5(charset, c1, c2, b1, b2)                            \
2240   do {                                                                  \
2241     unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21);      \
2242     if (charset == charset_big5_2)                                      \
2243       temp += BIG5_SAME_ROW * (0xC9 - 0xA1);                            \
2244     b1 = temp / BIG5_SAME_ROW + 0xA1;                                   \
2245     b2 = temp % BIG5_SAME_ROW;                                          \
2246     b2 += b2 < 0x3F ? 0x40 : 0x62;                                      \
2247   } while (0)
2248
2249 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2250    Check if a text is encoded in SJIS.  If it is, return
2251    CODING_CATEGORY_MASK_SJIS, else return 0.  */
2252
2253 int
2254 detect_coding_sjis (src, src_end)
2255      unsigned char *src, *src_end;
2256 {
2257   int c;
2258   /* Dummy for ONE_MORE_BYTE.  */
2259   struct coding_system dummy_coding;
2260   struct coding_system *coding = &dummy_coding;
2261
2262   while (1)
2263     {
2264       ONE_MORE_BYTE (c);
2265       if ((c >= 0x80 && c < 0xA0) || c >= 0xE0)
2266         {
2267           ONE_MORE_BYTE (c);
2268           if (c < 0x40)
2269             return 0;
2270         }
2271     }
2272  label_end_of_loop:
2273   return CODING_CATEGORY_MASK_SJIS;
2274 }
2275
2276 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2277    Check if a text is encoded in BIG5.  If it is, return
2278    CODING_CATEGORY_MASK_BIG5, else return 0.  */
2279
2280 int
2281 detect_coding_big5 (src, src_end)
2282      unsigned char *src, *src_end;
2283 {
2284   int c;
2285   /* Dummy for ONE_MORE_BYTE.  */
2286   struct coding_system dummy_coding;
2287   struct coding_system *coding = &dummy_coding;
2288
2289   while (1)
2290     {
2291       ONE_MORE_BYTE (c);
2292       if (c >= 0xA1)
2293         {
2294           ONE_MORE_BYTE (c);
2295           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
2296             return 0;
2297         }
2298     }
2299  label_end_of_loop:
2300   return CODING_CATEGORY_MASK_BIG5;
2301 }
2302
2303 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2304    Check if a text is encoded in UTF-8.  If it is, return
2305    CODING_CATEGORY_MASK_UTF_8, else return 0.  */
2306
2307 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
2308 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
2309 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
2310 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
2311 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
2312 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
2313 #define UTF_8_6_OCTET_LEADING_P(c) (((c) & 0xFE) == 0xFC)
2314
2315 int
2316 detect_coding_utf_8 (src, src_end)
2317      unsigned char *src, *src_end;
2318 {
2319   unsigned char c;
2320   int seq_maybe_bytes;
2321   /* Dummy for ONE_MORE_BYTE.  */
2322   struct coding_system dummy_coding;
2323   struct coding_system *coding = &dummy_coding;
2324
2325   while (1)
2326     {
2327       ONE_MORE_BYTE (c);
2328       if (UTF_8_1_OCTET_P (c))
2329         continue;
2330       else if (UTF_8_2_OCTET_LEADING_P (c))
2331         seq_maybe_bytes = 1;
2332       else if (UTF_8_3_OCTET_LEADING_P (c))
2333         seq_maybe_bytes = 2;
2334       else if (UTF_8_4_OCTET_LEADING_P (c))
2335         seq_maybe_bytes = 3;
2336       else if (UTF_8_5_OCTET_LEADING_P (c))
2337         seq_maybe_bytes = 4;
2338       else if (UTF_8_6_OCTET_LEADING_P (c))
2339         seq_maybe_bytes = 5;
2340       else
2341         return 0;
2342
2343       do
2344         {
2345           ONE_MORE_BYTE (c);
2346           if (!UTF_8_EXTRA_OCTET_P (c))
2347             return 0;
2348           seq_maybe_bytes--;
2349         }
2350       while (seq_maybe_bytes > 0);
2351     }
2352
2353  label_end_of_loop:
2354   return CODING_CATEGORY_MASK_UTF_8;
2355 }
2356
2357 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2358    Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
2359    Little Endian (otherwise).  If it is, return
2360    CODING_CATEGORY_MASK_UTF_16_BE or CODING_CATEGORY_MASK_UTF_16_LE,
2361    else return 0.  */
2362
2363 #define UTF_16_INVALID_P(val)   \
2364   (((val) == 0xFFFE)            \
2365    || ((val) == 0xFFFF))
2366
2367 #define UTF_16_HIGH_SURROGATE_P(val) \
2368   (((val) & 0xD800) == 0xD800)
2369
2370 #define UTF_16_LOW_SURROGATE_P(val) \
2371   (((val) & 0xDC00) == 0xDC00)
2372
2373 int
2374 detect_coding_utf_16 (src, src_end)
2375      unsigned char *src, *src_end;
2376 {
2377   unsigned char c1, c2;
2378   /* Dummy for TWO_MORE_BYTES.  */
2379   struct coding_system dummy_coding;
2380   struct coding_system *coding = &dummy_coding;
2381
2382   TWO_MORE_BYTES (c1, c2);
2383
2384   if ((c1 == 0xFF) && (c2 == 0xFE))
2385     return CODING_CATEGORY_MASK_UTF_16_LE;
2386   else if ((c1 == 0xFE) && (c2 == 0xFF))
2387     return CODING_CATEGORY_MASK_UTF_16_BE;
2388
2389  label_end_of_loop:
2390   return 0;
2391 }
2392
2393 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2394    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
2395
2396 static void
2397 decode_coding_sjis_big5 (coding, source, destination,
2398                          src_bytes, dst_bytes, sjis_p)
2399      struct coding_system *coding;
2400      unsigned char *source, *destination;
2401      int src_bytes, dst_bytes;
2402      int sjis_p;
2403 {
2404   unsigned char *src = source;
2405   unsigned char *src_end = source + src_bytes;
2406   unsigned char *dst = destination;
2407   unsigned char *dst_end = destination + dst_bytes;
2408   /* SRC_BASE remembers the start position in source in each loop.
2409      The loop will be exited when there's not enough source code
2410      (within macro ONE_MORE_BYTE), or when there's not enough
2411      destination area to produce a character (within macro
2412      EMIT_CHAR).  */
2413   unsigned char *src_base;
2414   Lisp_Object translation_table;
2415
2416   if (NILP (Venable_character_translation))
2417     translation_table = Qnil;
2418   else
2419     {
2420       translation_table = coding->translation_table_for_decode;
2421       if (NILP (translation_table))
2422         translation_table = Vstandard_translation_table_for_decode;
2423     }
2424
2425   coding->produced_char = 0;
2426   while (1)
2427     {
2428       int c, charset, c1, c2;
2429
2430       src_base = src;
2431       ONE_MORE_BYTE (c1);
2432
2433       if (c1 < 0x80)
2434         {
2435           charset = CHARSET_ASCII;
2436           if (c1 < 0x20)
2437             {
2438               if (c1 == '\r')
2439                 {
2440                   if (coding->eol_type == CODING_EOL_CRLF)
2441                     {
2442                       ONE_MORE_BYTE (c2);
2443                       if (c2 == '\n')
2444                         c1 = c2;
2445                       else if (coding->mode
2446                                & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2447                         {
2448                           coding->result = CODING_FINISH_INCONSISTENT_EOL;
2449                           goto label_end_of_loop;
2450                         }
2451                       else
2452                         /* To process C2 again, SRC is subtracted by 1.  */
2453                         src--;
2454                     }
2455                   else if (coding->eol_type == CODING_EOL_CR)
2456                     c1 = '\n';
2457                 }
2458               else if (c1 == '\n'
2459                        && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2460                        && (coding->eol_type == CODING_EOL_CR
2461                            || coding->eol_type == CODING_EOL_CRLF))
2462                 {
2463                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
2464                   goto label_end_of_loop;
2465                 }
2466             }
2467         }
2468       else
2469         {
2470           if (sjis_p)
2471             {
2472               if (c1 >= 0xF0)
2473                 goto label_invalid_code;
2474               if (c1 < 0xA0 || c1 >= 0xE0)
2475                 {
2476                   /* SJIS -> JISX0208 */
2477                   ONE_MORE_BYTE (c2);
2478                   if (c2 < 0x40 || c2 == 0x7F || c2 > 0xFC)
2479                     goto label_invalid_code;
2480                   DECODE_SJIS (c1, c2, c1, c2);
2481                   charset = charset_jisx0208;
2482                 }
2483               else
2484                 /* SJIS -> JISX0201-Kana */
2485                 charset = charset_katakana_jisx0201;
2486             }
2487           else
2488             {
2489               /* BIG5 -> Big5 */
2490               if (c1 < 0xA1 || c1 > 0xFE)
2491                 goto label_invalid_code;
2492               ONE_MORE_BYTE (c2);
2493               if (c2 < 0x40 || (c2 > 0x7E && c2 < 0xA1) || c2 > 0xFE)
2494                 goto label_invalid_code;
2495               DECODE_BIG5 (c1, c2, charset, c1, c2);
2496             }
2497         }
2498
2499       c = DECODE_ISO_CHARACTER (charset, c1, c2);
2500       EMIT_CHAR (c);
2501       continue;
2502
2503     label_invalid_code:
2504       coding->errors++;
2505       src = src_base;
2506       c = *src++;
2507       EMIT_CHAR (c);
2508     }
2509
2510  label_end_of_loop:
2511   coding->consumed = coding->consumed_char = src_base - source;
2512   coding->produced = dst - destination;
2513   return;
2514 }
2515
2516 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
2517    This function can encode charsets `ascii', `katakana-jisx0201',
2518    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
2519    are sure that all these charsets are registered as official charset
2520    (i.e. do not have extended leading-codes).  Characters of other
2521    charsets are produced without any encoding.  If SJIS_P is 1, encode
2522    SJIS text, else encode BIG5 text.  */
2523
2524 static void
2525 encode_coding_sjis_big5 (coding, source, destination,
2526                          src_bytes, dst_bytes, sjis_p)
2527      struct coding_system *coding;
2528      unsigned char *source, *destination;
2529      int src_bytes, dst_bytes;
2530      int sjis_p;
2531 {
2532   unsigned char *src = source;
2533   unsigned char *src_end = source + src_bytes;
2534   unsigned char *dst = destination;
2535   unsigned char *dst_end = destination + dst_bytes;
2536   /* SRC_BASE remembers the start position in source in each loop.
2537      The loop will be exited when there's not enough source text to
2538      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2539      there's not enough destination area to produce encoded codes
2540      (within macro EMIT_BYTES).  */
2541   unsigned char *src_base;
2542   Lisp_Object translation_table;
2543
2544   if (NILP (Venable_character_translation))
2545     translation_table = Qnil;
2546   else
2547     {
2548       translation_table = coding->translation_table_for_decode;
2549       if (NILP (translation_table))
2550         translation_table = Vstandard_translation_table_for_decode;
2551     }
2552
2553   while (1)
2554     {
2555       int c, charset, c1, c2;
2556
2557       src_base = src;
2558       ONE_MORE_CHAR (c);
2559
2560       /* Now encode the character C.  */
2561       if (SINGLE_BYTE_CHAR_P (c))
2562         {
2563           switch (c)
2564             {
2565             case '\r':
2566               if (!coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
2567                 {
2568                   EMIT_ONE_BYTE (c);
2569                   break;
2570                 }
2571               c = '\n';
2572             case '\n':
2573               if (coding->eol_type == CODING_EOL_CRLF)
2574                 {
2575                   EMIT_TWO_BYTES ('\r', c);
2576                   break;
2577                 }
2578               else if (coding->eol_type == CODING_EOL_CR)
2579                 c = '\r';
2580             default:
2581               EMIT_ONE_BYTE (c);
2582             }
2583         }
2584       else
2585         {
2586           SPLIT_CHAR (c, charset, c1, c2);
2587           if (sjis_p)
2588             {
2589               if (charset == charset_jisx0208
2590                   || charset == charset_jisx0208_1978)
2591                 {
2592                   ENCODE_SJIS (c1, c2, c1, c2);
2593                   EMIT_TWO_BYTES (c1, c2);
2594                 }
2595               else if (charset == charset_latin_jisx0201)
2596                 EMIT_ONE_BYTE (c1);
2597               else
2598                 /* There's no way other than producing the internal
2599                    codes as is.  */
2600                 EMIT_BYTES (src_base, src);
2601             }
2602           else
2603             {
2604               if (charset == charset_big5_1 || charset == charset_big5_2)
2605                 {
2606                   ENCODE_BIG5 (charset, c1, c2, c1, c2);
2607                   EMIT_TWO_BYTES (c1, c2);
2608                 }
2609               else
2610                 /* There's no way other than producing the internal
2611                    codes as is.  */
2612                 EMIT_BYTES (src_base, src);
2613             }
2614         }
2615       coding->consumed_char++;
2616     }
2617
2618  label_end_of_loop:
2619   coding->consumed = src_base - source;
2620   coding->produced = coding->produced_char = dst - destination;
2621 }
2622
2623 \f
2624 /*** 5. CCL handlers ***/
2625
2626 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2627    Check if a text is encoded in a coding system of which
2628    encoder/decoder are written in CCL program.  If it is, return
2629    CODING_CATEGORY_MASK_CCL, else return 0.  */
2630
2631 int
2632 detect_coding_ccl (src, src_end)
2633      unsigned char *src, *src_end;
2634 {
2635   unsigned char *valid;
2636   int c;
2637   /* Dummy for ONE_MORE_BYTE.  */
2638   struct coding_system dummy_coding;
2639   struct coding_system *coding = &dummy_coding;
2640
2641   /* No coding system is assigned to coding-category-ccl.  */
2642   if (!coding_system_table[CODING_CATEGORY_IDX_CCL])
2643     return 0;
2644
2645   valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
2646   while (1)
2647     {
2648       ONE_MORE_BYTE (c);
2649       if (! valid[c])
2650         return 0;
2651     }
2652  label_end_of_loop:
2653   return CODING_CATEGORY_MASK_CCL;
2654 }
2655
2656 \f
2657 /*** 6. End-of-line handlers ***/
2658
2659 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2660
2661 static void
2662 decode_eol (coding, source, destination, src_bytes, dst_bytes)
2663      struct coding_system *coding;
2664      unsigned char *source, *destination;
2665      int src_bytes, dst_bytes;
2666 {
2667   unsigned char *src = source;
2668   unsigned char *dst = destination;
2669   unsigned char *src_end = src + src_bytes;
2670   unsigned char *dst_end = dst + dst_bytes;
2671   Lisp_Object translation_table;
2672   /* SRC_BASE remembers the start position in source in each loop.
2673      The loop will be exited when there's not enough source code
2674      (within macro ONE_MORE_BYTE), or when there's not enough
2675      destination area to produce a character (within macro
2676      EMIT_CHAR).  */
2677   unsigned char *src_base;
2678   int c;
2679
2680   translation_table = Qnil;
2681   switch (coding->eol_type)
2682     {
2683     case CODING_EOL_CRLF:
2684       while (1)
2685         {
2686           src_base = src;
2687           ONE_MORE_BYTE (c);
2688           if (c == '\r')
2689             {
2690               ONE_MORE_BYTE (c);
2691               if (c != '\n')
2692                 {
2693                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2694                     {
2695                       coding->result = CODING_FINISH_INCONSISTENT_EOL;
2696                       goto label_end_of_loop;
2697                     }
2698                   src--;
2699                   c = '\r';
2700                 }
2701             }
2702           else if (c == '\n'
2703                    && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
2704             {
2705               coding->result = CODING_FINISH_INCONSISTENT_EOL;
2706               goto label_end_of_loop;
2707             }
2708           EMIT_CHAR (c);
2709         }
2710       break;
2711
2712     case CODING_EOL_CR:
2713       while (1)
2714         {
2715           src_base = src;
2716           ONE_MORE_BYTE (c);
2717           if (c == '\n')
2718             {
2719               if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2720                 {
2721                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
2722                   goto label_end_of_loop;
2723                 }
2724             }
2725           else if (c == '\r')
2726             c = '\n';
2727           EMIT_CHAR (c);
2728         }
2729       break;
2730
2731     default:                    /* no need for EOL handling */
2732       while (1)
2733         {
2734           src_base = src;
2735           ONE_MORE_BYTE (c);
2736           EMIT_CHAR (c);
2737         }
2738     }
2739
2740  label_end_of_loop:
2741   coding->consumed = coding->consumed_char = src_base - source;
2742   coding->produced = dst - destination;
2743   return;
2744 }
2745
2746 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  Encode
2747    format of end-of-line according to `coding->eol_type'.  It also
2748    convert multibyte form 8-bit characers to unibyte if
2749    CODING->src_multibyte is nonzero.  If `coding->mode &
2750    CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code '\r' in source text
2751    also means end-of-line.  */
2752
2753 static void
2754 encode_eol (coding, source, destination, src_bytes, dst_bytes)
2755      struct coding_system *coding;
2756      unsigned char *source, *destination;
2757      int src_bytes, dst_bytes;
2758 {
2759   unsigned char *src = source;
2760   unsigned char *dst = destination;
2761   unsigned char *src_end = src + src_bytes;
2762   unsigned char *dst_end = dst + dst_bytes;
2763   Lisp_Object translation_table;
2764   /* SRC_BASE remembers the start position in source in each loop.
2765      The loop will be exited when there's not enough source text to
2766      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2767      there's not enough destination area to produce encoded codes
2768      (within macro EMIT_BYTES).  */
2769   unsigned char *src_base;
2770   int c;
2771   int selective_display = coding->mode & CODING_MODE_SELECTIVE_DISPLAY;
2772
2773   translation_table = Qnil;
2774   if (coding->src_multibyte
2775       && *(src_end - 1) == LEADING_CODE_8_BIT_CONTROL)
2776     {
2777       src_end--;
2778       src_bytes--;
2779       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
2780     }
2781
2782   if (coding->eol_type == CODING_EOL_CRLF)
2783     {
2784       while (src < src_end)
2785         {
2786           src_base = src;
2787           c = *src++;
2788           if (c >= 0x20)
2789             EMIT_ONE_BYTE (c);
2790           else if (c == '\n' || (c == '\r' && selective_display))
2791             EMIT_TWO_BYTES ('\r', '\n');
2792           else
2793             EMIT_ONE_BYTE (c);
2794         }
2795       src_base = src;
2796     label_end_of_loop:
2797       ;
2798     }
2799   else
2800     {
2801       if (src_bytes <= dst_bytes)
2802         {
2803           safe_bcopy (src, dst, src_bytes);
2804           src_base = src_end;
2805           dst += src_bytes;
2806         }
2807       else
2808         {
2809           if (coding->src_multibyte
2810               && *(src + dst_bytes - 1) == LEADING_CODE_8_BIT_CONTROL)
2811             dst_bytes--;
2812           safe_bcopy (src, dst, dst_bytes);
2813           src_base = src + dst_bytes;
2814           dst = destination + dst_bytes;
2815           coding->result = CODING_FINISH_INSUFFICIENT_DST;
2816         }
2817       if (coding->eol_type == CODING_EOL_CR)
2818         {
2819           for (src = destination; src < dst; src++)
2820             if (*src == '\n') *src = '\r';
2821         }
2822       else if (selective_display)
2823         {
2824           for (src = destination; src < dst; src++)
2825             if (*src == '\r') *src = '\n';
2826         }
2827     }
2828   if (coding->src_multibyte)
2829     dst = destination + str_as_unibyte (destination, dst - destination);
2830
2831   coding->consumed = src_base - source;
2832   coding->produced = dst - destination;
2833 }
2834
2835 \f
2836 /*** 7. C library functions ***/
2837
2838 /* In Emacs Lisp, coding system is represented by a Lisp symbol which
2839    has a property `coding-system'.  The value of this property is a
2840    vector of length 5 (called as coding-vector).  Among elements of
2841    this vector, the first (element[0]) and the fifth (element[4])
2842    carry important information for decoding/encoding.  Before
2843    decoding/encoding, this information should be set in fields of a
2844    structure of type `coding_system'.
2845
2846    A value of property `coding-system' can be a symbol of another
2847    subsidiary coding-system.  In that case, Emacs gets coding-vector
2848    from that symbol.
2849
2850    `element[0]' contains information to be set in `coding->type'.  The
2851    value and its meaning is as follows:
2852
2853    0 -- coding_type_emacs_mule
2854    1 -- coding_type_sjis
2855    2 -- coding_type_iso2022
2856    3 -- coding_type_big5
2857    4 -- coding_type_ccl encoder/decoder written in CCL
2858    nil -- coding_type_no_conversion
2859    t -- coding_type_undecided (automatic conversion on decoding,
2860                                no-conversion on encoding)
2861
2862    `element[4]' contains information to be set in `coding->flags' and
2863    `coding->spec'.  The meaning varies by `coding->type'.
2864
2865    If `coding->type' is `coding_type_iso2022', element[4] is a vector
2866    of length 32 (of which the first 13 sub-elements are used now).
2867    Meanings of these sub-elements are:
2868
2869    sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
2870         If the value is an integer of valid charset, the charset is
2871         assumed to be designated to graphic register N initially.
2872
2873         If the value is minus, it is a minus value of charset which
2874         reserves graphic register N, which means that the charset is
2875         not designated initially but should be designated to graphic
2876         register N just before encoding a character in that charset.
2877
2878         If the value is nil, graphic register N is never used on
2879         encoding.
2880
2881    sub-element[N] where N is 4 through 11: to be set in `coding->flags'
2882         Each value takes t or nil.  See the section ISO2022 of
2883         `coding.h' for more information.
2884
2885    If `coding->type' is `coding_type_big5', element[4] is t to denote
2886    BIG5-ETen or nil to denote BIG5-HKU.
2887
2888    If `coding->type' takes the other value, element[4] is ignored.
2889
2890    Emacs Lisp's coding system also carries information about format of
2891    end-of-line in a value of property `eol-type'.  If the value is
2892    integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
2893    means CODING_EOL_CR.  If it is not integer, it should be a vector
2894    of subsidiary coding systems of which property `eol-type' has one
2895    of above values.
2896
2897 */
2898
2899 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
2900    and set it in CODING.  If CODING_SYSTEM_SYMBOL is invalid, CODING
2901    is setup so that no conversion is necessary and return -1, else
2902    return 0.  */
2903
2904 int
2905 setup_coding_system (coding_system, coding)
2906      Lisp_Object coding_system;
2907      struct coding_system *coding;
2908 {
2909   Lisp_Object coding_spec, coding_type, eol_type, plist;
2910   Lisp_Object val;
2911   int i;
2912
2913   /* Initialize some fields required for all kinds of coding systems.  */
2914   coding->symbol = coding_system;
2915   coding->common_flags = 0;
2916   coding->mode = 0;
2917   coding->heading_ascii = -1;
2918   coding->post_read_conversion = coding->pre_write_conversion = Qnil;
2919   coding->composing = COMPOSITION_DISABLED;
2920   coding->cmp_data = NULL;
2921
2922   if (NILP (coding_system))
2923     goto label_invalid_coding_system;
2924
2925   coding_spec = Fget (coding_system, Qcoding_system);
2926
2927   if (!VECTORP (coding_spec)
2928       || XVECTOR (coding_spec)->size != 5
2929       || !CONSP (XVECTOR (coding_spec)->contents[3]))
2930     goto label_invalid_coding_system;
2931
2932   eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
2933   if (VECTORP (eol_type))
2934     {
2935       coding->eol_type = CODING_EOL_UNDECIDED;
2936       coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
2937     }
2938   else if (XFASTINT (eol_type) == 1)
2939     {
2940       coding->eol_type = CODING_EOL_CRLF;
2941       coding->common_flags
2942         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2943     }
2944   else if (XFASTINT (eol_type) == 2)
2945     {
2946       coding->eol_type = CODING_EOL_CR;
2947       coding->common_flags
2948         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2949     }
2950   else
2951     coding->eol_type = CODING_EOL_LF;
2952
2953   coding_type = XVECTOR (coding_spec)->contents[0];
2954   /* Try short cut.  */
2955   if (SYMBOLP (coding_type))
2956     {
2957       if (EQ (coding_type, Qt))
2958         {
2959           coding->type = coding_type_undecided;
2960           coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
2961         }
2962       else
2963         coding->type = coding_type_no_conversion;
2964       return 0;
2965     }
2966
2967   /* Get values of coding system properties:
2968      `post-read-conversion', `pre-write-conversion',
2969      `translation-table-for-decode', `translation-table-for-encode'.  */
2970   plist = XVECTOR (coding_spec)->contents[3];
2971   /* Pre & post conversion functions should be disabled if
2972      inhibit_eol_conversion is nozero.  This is the case that a code
2973      conversion function is called while those functions are running.  */
2974   if (! inhibit_pre_post_conversion)
2975     {
2976       coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
2977       coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
2978     }
2979   val = Fplist_get (plist, Qtranslation_table_for_decode);
2980   if (SYMBOLP (val))
2981     val = Fget (val, Qtranslation_table_for_decode);
2982   coding->translation_table_for_decode = CHAR_TABLE_P (val) ? val : Qnil;
2983   val = Fplist_get (plist, Qtranslation_table_for_encode);
2984   if (SYMBOLP (val))
2985     val = Fget (val, Qtranslation_table_for_encode);
2986   coding->translation_table_for_encode = CHAR_TABLE_P (val) ? val : Qnil;
2987   val = Fplist_get (plist, Qcoding_category);
2988   if (!NILP (val))
2989     {
2990       val = Fget (val, Qcoding_category_index);
2991       if (INTEGERP (val))
2992         coding->category_idx = XINT (val);
2993       else
2994         goto label_invalid_coding_system;
2995     }
2996   else
2997     goto label_invalid_coding_system;
2998
2999   /* If the coding system has non-nil `composition' property, enable
3000      composition handling.  */
3001   val = Fplist_get (plist, Qcomposition);
3002   if (!NILP (val))
3003     coding->composing = COMPOSITION_NO;
3004
3005   switch (XFASTINT (coding_type))
3006     {
3007     case 0:
3008       coding->type = coding_type_emacs_mule;
3009       if (!NILP (coding->post_read_conversion))
3010         coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
3011       if (!NILP (coding->pre_write_conversion))
3012         coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
3013       break;
3014
3015     case 1:
3016       coding->type = coding_type_sjis;
3017       coding->common_flags
3018         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3019       break;
3020
3021     case 2:
3022       coding->type = coding_type_iso2022;
3023       coding->common_flags
3024         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3025       {
3026         Lisp_Object val, temp;
3027         Lisp_Object *flags;
3028         int i, charset, reg_bits = 0;
3029
3030         val = XVECTOR (coding_spec)->contents[4];
3031
3032         if (!VECTORP (val) || XVECTOR (val)->size != 32)
3033           goto label_invalid_coding_system;
3034
3035         flags = XVECTOR (val)->contents;
3036         coding->flags
3037           = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
3038              | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
3039              | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
3040              | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
3041              | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
3042              | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
3043              | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
3044              | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
3045              | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
3046              | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
3047              | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
3048              | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
3049              | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
3050              );
3051
3052         /* Invoke graphic register 0 to plane 0.  */
3053         CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
3054         /* Invoke graphic register 1 to plane 1 if we can use full 8-bit.  */
3055         CODING_SPEC_ISO_INVOCATION (coding, 1)
3056           = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
3057         /* Not single shifting at first.  */
3058         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
3059         /* Beginning of buffer should also be regarded as bol. */
3060         CODING_SPEC_ISO_BOL (coding) = 1;
3061
3062         for (charset = 0; charset <= MAX_CHARSET; charset++)
3063           CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
3064         val = Vcharset_revision_alist;
3065         while (CONSP (val))
3066           {
3067             charset = get_charset_id (Fcar_safe (XCAR (val)));
3068             if (charset >= 0
3069                 && (temp = Fcdr_safe (XCAR (val)), INTEGERP (temp))
3070                 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
3071               CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
3072             val = XCDR (val);
3073           }
3074
3075         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
3076            FLAGS[REG] can be one of below:
3077                 integer CHARSET: CHARSET occupies register I,
3078                 t: designate nothing to REG initially, but can be used
3079                   by any charsets,
3080                 list of integer, nil, or t: designate the first
3081                   element (if integer) to REG initially, the remaining
3082                   elements (if integer) is designated to REG on request,
3083                   if an element is t, REG can be used by any charsets,
3084                 nil: REG is never used.  */
3085         for (charset = 0; charset <= MAX_CHARSET; charset++)
3086           CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3087             = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
3088         for (i = 0; i < 4; i++)
3089           {
3090             if (INTEGERP (flags[i])
3091                 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))
3092                 || (charset = get_charset_id (flags[i])) >= 0)
3093               {
3094                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3095                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
3096               }
3097             else if (EQ (flags[i], Qt))
3098               {
3099                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3100                 reg_bits |= 1 << i;
3101                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3102               }
3103             else if (CONSP (flags[i]))
3104               {
3105                 Lisp_Object tail;
3106                 tail = flags[i];
3107
3108                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3109                 if (INTEGERP (XCAR (tail))
3110                     && (charset = XINT (XCAR (tail)),
3111                         CHARSET_VALID_P (charset))
3112                     || (charset = get_charset_id (XCAR (tail))) >= 0)
3113                   {
3114                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3115                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
3116                   }
3117                 else
3118                   CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3119                 tail = XCDR (tail);
3120                 while (CONSP (tail))
3121                   {
3122                     if (INTEGERP (XCAR (tail))
3123                         && (charset = XINT (XCAR (tail)),
3124                             CHARSET_VALID_P (charset))
3125                         || (charset = get_charset_id (XCAR (tail))) >= 0)
3126                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3127                         = i;
3128                     else if (EQ (XCAR (tail), Qt))
3129                       reg_bits |= 1 << i;
3130                     tail = XCDR (tail);
3131                   }
3132               }
3133             else
3134               CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3135
3136             CODING_SPEC_ISO_DESIGNATION (coding, i)
3137               = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
3138           }
3139
3140         if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
3141           {
3142             /* REG 1 can be used only by locking shift in 7-bit env.  */
3143             if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
3144               reg_bits &= ~2;
3145             if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
3146               /* Without any shifting, only REG 0 and 1 can be used.  */
3147               reg_bits &= 3;
3148           }
3149
3150         if (reg_bits)
3151           for (charset = 0; charset <= MAX_CHARSET; charset++)
3152             {
3153               if (CHARSET_VALID_P (charset)
3154                   && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3155                       == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
3156                 {
3157                   /* There exist some default graphic registers to be
3158                      used by CHARSET.  */
3159
3160                   /* We had better avoid designating a charset of
3161                      CHARS96 to REG 0 as far as possible.  */
3162                   if (CHARSET_CHARS (charset) == 96)
3163                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3164                       = (reg_bits & 2
3165                          ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
3166                   else
3167                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3168                       = (reg_bits & 1
3169                          ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
3170                 }
3171             }
3172       }
3173       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3174       coding->spec.iso2022.last_invalid_designation_register = -1;
3175       break;
3176
3177     case 3:
3178       coding->type = coding_type_big5;
3179       coding->common_flags
3180         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3181       coding->flags
3182         = (NILP (XVECTOR (coding_spec)->contents[4])
3183            ? CODING_FLAG_BIG5_HKU
3184            : CODING_FLAG_BIG5_ETEN);
3185       break;
3186
3187     case 4:
3188       coding->type = coding_type_ccl;
3189       coding->common_flags
3190         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3191       {
3192         val = XVECTOR (coding_spec)->contents[4];
3193         if (! CONSP (val)
3194             || setup_ccl_program (&(coding->spec.ccl.decoder),
3195                                   XCAR (val)) < 0
3196             || setup_ccl_program (&(coding->spec.ccl.encoder),
3197                                   XCDR (val)) < 0)
3198           goto label_invalid_coding_system;
3199
3200         bzero (coding->spec.ccl.valid_codes, 256);
3201         val = Fplist_get (plist, Qvalid_codes);
3202         if (CONSP (val))
3203           {
3204             Lisp_Object this;
3205
3206             for (; CONSP (val); val = XCDR (val))
3207               {
3208                 this = XCAR (val);
3209                 if (INTEGERP (this)
3210                     && XINT (this) >= 0 && XINT (this) < 256)
3211                   coding->spec.ccl.valid_codes[XINT (this)] = 1;
3212                 else if (CONSP (this)
3213                          && INTEGERP (XCAR (this))
3214                          && INTEGERP (XCDR (this)))
3215                   {
3216                     int start = XINT (XCAR (this));
3217                     int end = XINT (XCDR (this));
3218
3219                     if (start >= 0 && start <= end && end < 256)
3220                       while (start <= end)
3221                         coding->spec.ccl.valid_codes[start++] = 1;
3222                   }
3223               }
3224           }
3225       }
3226       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3227       coding->spec.ccl.cr_carryover = 0;
3228       break;
3229
3230     case 5:
3231       coding->type = coding_type_raw_text;
3232       break;
3233
3234     default:
3235       goto label_invalid_coding_system;
3236     }
3237   return 0;
3238
3239  label_invalid_coding_system:
3240   coding->type = coding_type_no_conversion;
3241   coding->category_idx = CODING_CATEGORY_IDX_BINARY;
3242   coding->common_flags = 0;
3243   coding->eol_type = CODING_EOL_LF;
3244   coding->pre_write_conversion = coding->post_read_conversion = Qnil;
3245   return -1;
3246 }
3247
3248 /* Free memory blocks allocated for storing composition information.  */
3249
3250 void
3251 coding_free_composition_data (coding)
3252      struct coding_system *coding;
3253 {
3254   struct composition_data *cmp_data = coding->cmp_data, *next;
3255
3256   if (!cmp_data)
3257     return;
3258   /* Memory blocks are chained.  At first, rewind to the first, then,
3259      free blocks one by one.  */
3260   while (cmp_data->prev)
3261     cmp_data = cmp_data->prev;
3262   while (cmp_data)
3263     {
3264       next = cmp_data->next;
3265       xfree (cmp_data);
3266       cmp_data = next;
3267     }
3268   coding->cmp_data = NULL;
3269 }
3270
3271 /* Set `char_offset' member of all memory blocks pointed by
3272    coding->cmp_data to POS.  */
3273
3274 void
3275 coding_adjust_composition_offset (coding, pos)
3276      struct coding_system *coding;
3277      int pos;
3278 {
3279   struct composition_data *cmp_data;
3280
3281   for (cmp_data = coding->cmp_data; cmp_data; cmp_data = cmp_data->next)
3282     cmp_data->char_offset = pos;
3283 }
3284
3285 /* Setup raw-text or one of its subsidiaries in the structure
3286    coding_system CODING according to the already setup value eol_type
3287    in CODING.  CODING should be setup for some coding system in
3288    advance.  */
3289
3290 void
3291 setup_raw_text_coding_system (coding)
3292      struct coding_system *coding;
3293 {
3294   if (coding->type != coding_type_raw_text)
3295     {
3296       coding->symbol = Qraw_text;
3297       coding->type = coding_type_raw_text;
3298       if (coding->eol_type != CODING_EOL_UNDECIDED)
3299         {
3300           Lisp_Object subsidiaries;
3301           subsidiaries = Fget (Qraw_text, Qeol_type);
3302
3303           if (VECTORP (subsidiaries)
3304               && XVECTOR (subsidiaries)->size == 3)
3305             coding->symbol
3306               = XVECTOR (subsidiaries)->contents[coding->eol_type];
3307         }
3308       setup_coding_system (coding->symbol, coding);
3309     }
3310   return;
3311 }
3312
3313 /* Emacs has a mechanism to automatically detect a coding system if it
3314    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
3315    it's impossible to distinguish some coding systems accurately
3316    because they use the same range of codes.  So, at first, coding
3317    systems are categorized into 7, those are:
3318
3319    o coding-category-emacs-mule
3320
3321         The category for a coding system which has the same code range
3322         as Emacs' internal format.  Assigned the coding-system (Lisp
3323         symbol) `emacs-mule' by default.
3324
3325    o coding-category-sjis
3326
3327         The category for a coding system which has the same code range
3328         as SJIS.  Assigned the coding-system (Lisp
3329         symbol) `japanese-shift-jis' by default.
3330
3331    o coding-category-iso-7
3332
3333         The category for a coding system which has the same code range
3334         as ISO2022 of 7-bit environment.  This doesn't use any locking
3335         shift and single shift functions.  This can encode/decode all
3336         charsets.  Assigned the coding-system (Lisp symbol)
3337         `iso-2022-7bit' by default.
3338
3339    o coding-category-iso-7-tight
3340
3341         Same as coding-category-iso-7 except that this can
3342         encode/decode only the specified charsets.
3343
3344    o coding-category-iso-8-1
3345
3346         The category for a coding system which has the same code range
3347         as ISO2022 of 8-bit environment and graphic plane 1 used only
3348         for DIMENSION1 charset.  This doesn't use any locking shift
3349         and single shift functions.  Assigned the coding-system (Lisp
3350         symbol) `iso-latin-1' by default.
3351
3352    o coding-category-iso-8-2
3353
3354         The category for a coding system which has the same code range
3355         as ISO2022 of 8-bit environment and graphic plane 1 used only
3356         for DIMENSION2 charset.  This doesn't use any locking shift
3357         and single shift functions.  Assigned the coding-system (Lisp
3358         symbol) `japanese-iso-8bit' by default.
3359
3360    o coding-category-iso-7-else
3361
3362         The category for a coding system which has the same code range
3363         as ISO2022 of 7-bit environemnt but uses locking shift or
3364         single shift functions.  Assigned the coding-system (Lisp
3365         symbol) `iso-2022-7bit-lock' by default.
3366
3367    o coding-category-iso-8-else
3368
3369         The category for a coding system which has the same code range
3370         as ISO2022 of 8-bit environemnt but uses locking shift or
3371         single shift functions.  Assigned the coding-system (Lisp
3372         symbol) `iso-2022-8bit-ss2' by default.
3373
3374    o coding-category-big5
3375
3376         The category for a coding system which has the same code range
3377         as BIG5.  Assigned the coding-system (Lisp symbol)
3378         `cn-big5' by default.
3379
3380    o coding-category-utf-8
3381
3382         The category for a coding system which has the same code range
3383         as UTF-8 (cf. RFC2279).  Assigned the coding-system (Lisp
3384         symbol) `utf-8' by default.
3385
3386    o coding-category-utf-16-be
3387
3388         The category for a coding system in which a text has an
3389         Unicode signature (cf. Unicode Standard) in the order of BIG
3390         endian at the head.  Assigned the coding-system (Lisp symbol)
3391         `utf-16-be' by default.
3392
3393    o coding-category-utf-16-le
3394
3395         The category for a coding system in which a text has an
3396         Unicode signature (cf. Unicode Standard) in the order of
3397         LITTLE endian at the head.  Assigned the coding-system (Lisp
3398         symbol) `utf-16-le' by default.
3399
3400    o coding-category-ccl
3401
3402         The category for a coding system of which encoder/decoder is
3403         written in CCL programs.  The default value is nil, i.e., no
3404         coding system is assigned.
3405
3406    o coding-category-binary
3407
3408         The category for a coding system not categorized in any of the
3409         above.  Assigned the coding-system (Lisp symbol)
3410         `no-conversion' by default.
3411
3412    Each of them is a Lisp symbol and the value is an actual
3413    `coding-system's (this is also a Lisp symbol) assigned by a user.
3414    What Emacs does actually is to detect a category of coding system.
3415    Then, it uses a `coding-system' assigned to it.  If Emacs can't
3416    decide only one possible category, it selects a category of the
3417    highest priority.  Priorities of categories are also specified by a
3418    user in a Lisp variable `coding-category-list'.
3419
3420 */
3421
3422 static
3423 int ascii_skip_code[256];
3424
3425 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
3426    If it detects possible coding systems, return an integer in which
3427    appropriate flag bits are set.  Flag bits are defined by macros
3428    CODING_CATEGORY_MASK_XXX in `coding.h'.  If PRIORITIES is non-NULL,
3429    it should point the table `coding_priorities'.  In that case, only
3430    the flag bit for a coding system of the highest priority is set in
3431    the returned value.
3432
3433    How many ASCII characters are at the head is returned as *SKIP.  */
3434
3435 static int
3436 detect_coding_mask (source, src_bytes, priorities, skip)
3437      unsigned char *source;
3438      int src_bytes, *priorities, *skip;
3439 {
3440   register unsigned char c;
3441   unsigned char *src = source, *src_end = source + src_bytes;
3442   unsigned int mask, utf16_examined_p, iso2022_examined_p;
3443   int i, idx;
3444
3445   /* At first, skip all ASCII characters and control characters except
3446      for three ISO2022 specific control characters.  */
3447   ascii_skip_code[ISO_CODE_SO] = 0;
3448   ascii_skip_code[ISO_CODE_SI] = 0;
3449   ascii_skip_code[ISO_CODE_ESC] = 0;
3450
3451  label_loop_detect_coding:
3452   while (src < src_end && ascii_skip_code[*src]) src++;
3453   *skip = src - source;
3454
3455   if (src >= src_end)
3456     /* We found nothing other than ASCII.  There's nothing to do.  */
3457     return 0;
3458
3459   c = *src;
3460   /* The text seems to be encoded in some multilingual coding system.
3461      Now, try to find in which coding system the text is encoded.  */
3462   if (c < 0x80)
3463     {
3464       /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
3465       /* C is an ISO2022 specific control code of C0.  */
3466       mask = detect_coding_iso2022 (src, src_end);
3467       if (mask == 0)
3468         {
3469           /* No valid ISO2022 code follows C.  Try again.  */
3470           src++;
3471           if (c == ISO_CODE_ESC)
3472             ascii_skip_code[ISO_CODE_ESC] = 1;
3473           else
3474             ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1;
3475           goto label_loop_detect_coding;
3476         }
3477       if (priorities)
3478         {
3479           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3480             {
3481               if (mask & priorities[i])
3482                 return priorities[i];
3483             }
3484           return CODING_CATEGORY_MASK_RAW_TEXT;
3485         }
3486     }
3487   else
3488     {
3489       int try;
3490
3491       if (c < 0xA0)
3492         {
3493           /* C is the first byte of SJIS character code,
3494              or a leading-code of Emacs' internal format (emacs-mule),
3495              or the first byte of UTF-16.  */
3496           try = (CODING_CATEGORY_MASK_SJIS
3497                   | CODING_CATEGORY_MASK_EMACS_MULE
3498                   | CODING_CATEGORY_MASK_UTF_16_BE
3499                   | CODING_CATEGORY_MASK_UTF_16_LE);
3500
3501           /* Or, if C is a special latin extra code,
3502              or is an ISO2022 specific control code of C1 (SS2 or SS3),
3503              or is an ISO2022 control-sequence-introducer (CSI),
3504              we should also consider the possibility of ISO2022 codings.  */
3505           if ((VECTORP (Vlatin_extra_code_table)
3506                && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3507               || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
3508               || (c == ISO_CODE_CSI
3509                   && (src < src_end
3510                       && (*src == ']'
3511                           || ((*src == '0' || *src == '1' || *src == '2')
3512                               && src + 1 < src_end
3513                               && src[1] == ']')))))
3514             try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
3515                      | CODING_CATEGORY_MASK_ISO_8BIT);
3516         }
3517       else
3518         /* C is a character of ISO2022 in graphic plane right,
3519            or a SJIS's 1-byte character code (i.e. JISX0201),
3520            or the first byte of BIG5's 2-byte code,
3521            or the first byte of UTF-8/16.  */
3522         try = (CODING_CATEGORY_MASK_ISO_8_ELSE
3523                 | CODING_CATEGORY_MASK_ISO_8BIT
3524                 | CODING_CATEGORY_MASK_SJIS
3525                 | CODING_CATEGORY_MASK_BIG5
3526                 | CODING_CATEGORY_MASK_UTF_8
3527                 | CODING_CATEGORY_MASK_UTF_16_BE
3528                 | CODING_CATEGORY_MASK_UTF_16_LE);
3529
3530       /* Or, we may have to consider the possibility of CCL.  */
3531       if (coding_system_table[CODING_CATEGORY_IDX_CCL]
3532           && (coding_system_table[CODING_CATEGORY_IDX_CCL]
3533               ->spec.ccl.valid_codes)[c])
3534         try |= CODING_CATEGORY_MASK_CCL;
3535
3536       mask = 0;
3537       utf16_examined_p = iso2022_examined_p = 0;
3538       if (priorities)
3539         {
3540           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3541             {
3542               if (!iso2022_examined_p
3543                   && (priorities[i] & try & CODING_CATEGORY_MASK_ISO))
3544                 {
3545                   mask |= detect_coding_iso2022 (src, src_end);
3546                   iso2022_examined_p = 1;
3547                 }
3548               else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
3549                 mask |= detect_coding_sjis (src, src_end);
3550               else if (priorities[i] & try & CODING_CATEGORY_MASK_UTF_8)
3551                 mask |= detect_coding_utf_8 (src, src_end);
3552               else if (!utf16_examined_p
3553                        && (priorities[i] & try &
3554                            CODING_CATEGORY_MASK_UTF_16_BE_LE))
3555                 {
3556                   mask |= detect_coding_utf_16 (src, src_end);
3557                   utf16_examined_p = 1;
3558                 }
3559               else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
3560                 mask |= detect_coding_big5 (src, src_end);
3561               else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
3562                 mask |= detect_coding_emacs_mule (src, src_end);
3563               else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL)
3564                 mask |= detect_coding_ccl (src, src_end);
3565               else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
3566                 mask |= CODING_CATEGORY_MASK_RAW_TEXT;
3567               else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
3568                 mask |= CODING_CATEGORY_MASK_BINARY;
3569               if (mask & priorities[i])
3570                 return priorities[i];
3571             }
3572           return CODING_CATEGORY_MASK_RAW_TEXT;
3573         }
3574       if (try & CODING_CATEGORY_MASK_ISO)
3575         mask |= detect_coding_iso2022 (src, src_end);
3576       if (try & CODING_CATEGORY_MASK_SJIS)
3577         mask |= detect_coding_sjis (src, src_end);
3578       if (try & CODING_CATEGORY_MASK_BIG5)
3579         mask |= detect_coding_big5 (src, src_end);
3580       if (try & CODING_CATEGORY_MASK_UTF_8)
3581         mask |= detect_coding_utf_8 (src, src_end);
3582       if (try & CODING_CATEGORY_MASK_UTF_16_BE_LE)
3583         mask |= detect_coding_utf_16 (src, src_end);
3584       if (try & CODING_CATEGORY_MASK_EMACS_MULE)
3585         mask |= detect_coding_emacs_mule (src, src_end);
3586       if (try & CODING_CATEGORY_MASK_CCL)
3587         mask |= detect_coding_ccl (src, src_end);
3588     }
3589   return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
3590 }
3591
3592 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
3593    The information of the detected coding system is set in CODING.  */
3594
3595 void
3596 detect_coding (coding, src, src_bytes)
3597      struct coding_system *coding;
3598      unsigned char *src;
3599      int src_bytes;
3600 {
3601   unsigned int idx;
3602   int skip, mask, i;
3603   Lisp_Object val;
3604
3605   val = Vcoding_category_list;
3606   mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip);
3607   coding->heading_ascii = skip;
3608
3609   if (!mask) return;
3610
3611   /* We found a single coding system of the highest priority in MASK.  */
3612   idx = 0;
3613   while (mask && ! (mask & 1)) mask >>= 1, idx++;
3614   if (! mask)
3615     idx = CODING_CATEGORY_IDX_RAW_TEXT;
3616
3617   val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[idx])->value;
3618
3619   if (coding->eol_type != CODING_EOL_UNDECIDED)
3620     {
3621       Lisp_Object tmp;
3622
3623       tmp = Fget (val, Qeol_type);
3624       if (VECTORP (tmp))
3625         val = XVECTOR (tmp)->contents[coding->eol_type];
3626     }
3627
3628   /* Setup this new coding system while preserving some slots.  */
3629   {
3630     int src_multibyte = coding->src_multibyte;
3631     int dst_multibyte = coding->dst_multibyte;
3632
3633     setup_coding_system (val, coding);
3634     coding->src_multibyte = src_multibyte;
3635     coding->dst_multibyte = dst_multibyte;
3636     coding->heading_ascii = skip;
3637   }
3638 }
3639
3640 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
3641    SOURCE is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
3642    CODING_EOL_CR, and CODING_EOL_UNDECIDED.
3643
3644    How many non-eol characters are at the head is returned as *SKIP.  */
3645
3646 #define MAX_EOL_CHECK_COUNT 3
3647
3648 static int
3649 detect_eol_type (source, src_bytes, skip)
3650      unsigned char *source;
3651      int src_bytes, *skip;
3652 {
3653   unsigned char *src = source, *src_end = src + src_bytes;
3654   unsigned char c;
3655   int total = 0;                /* How many end-of-lines are found so far.  */
3656   int eol_type = CODING_EOL_UNDECIDED;
3657   int this_eol_type;
3658
3659   *skip = 0;
3660
3661   while (src < src_end && total < MAX_EOL_CHECK_COUNT)
3662     {
3663       c = *src++;
3664       if (c == '\n' || c == '\r')
3665         {
3666           if (*skip == 0)
3667             *skip = src - 1 - source;
3668           total++;
3669           if (c == '\n')
3670             this_eol_type = CODING_EOL_LF;
3671           else if (src >= src_end || *src != '\n')
3672             this_eol_type = CODING_EOL_CR;
3673           else
3674             this_eol_type = CODING_EOL_CRLF, src++;
3675
3676           if (eol_type == CODING_EOL_UNDECIDED)
3677             /* This is the first end-of-line.  */
3678             eol_type = this_eol_type;
3679           else if (eol_type != this_eol_type)
3680             {
3681               /* The found type is different from what found before.  */
3682               eol_type = CODING_EOL_INCONSISTENT;
3683               break;
3684             }
3685         }
3686     }
3687
3688   if (*skip == 0)
3689     *skip = src_end - source;
3690   return eol_type;
3691 }
3692
3693 /* Like detect_eol_type, but detect EOL type in 2-octet
3694    big-endian/little-endian format for coding systems utf-16-be and
3695    utf-16-le.  */
3696
3697 static int
3698 detect_eol_type_in_2_octet_form (source, src_bytes, skip, big_endian_p)
3699      unsigned char *source;
3700      int src_bytes, *skip;
3701 {
3702   unsigned char *src = source, *src_end = src + src_bytes;
3703   unsigned int c1, c2;
3704   int total = 0;                /* How many end-of-lines are found so far.  */
3705   int eol_type = CODING_EOL_UNDECIDED;
3706   int this_eol_type;
3707   int msb, lsb;
3708
3709   if (big_endian_p)
3710     msb = 0, lsb = 1;
3711   else
3712     msb = 1, lsb = 0;
3713
3714   *skip = 0;
3715
3716   while ((src + 1) < src_end && total < MAX_EOL_CHECK_COUNT)
3717     {
3718       c1 = (src[msb] << 8) | (src[lsb]);
3719       src += 2;
3720
3721       if (c1 == '\n' || c1 == '\r')
3722         {
3723           if (*skip == 0)
3724             *skip = src - 2 - source;
3725           total++;
3726           if (c1 == '\n')
3727             {
3728               this_eol_type = CODING_EOL_LF;
3729             }
3730           else
3731             {
3732               if ((src + 1) >= src_end)
3733                 {
3734                   this_eol_type = CODING_EOL_CR;
3735                 }
3736               else
3737                 {
3738                   c2 = (src[msb] << 8) | (src[lsb]);
3739                   if (c2 == '\n')
3740                     this_eol_type = CODING_EOL_CRLF, src += 2;
3741                   else
3742                     this_eol_type = CODING_EOL_CR;
3743                 }
3744             }
3745
3746           if (eol_type == CODING_EOL_UNDECIDED)
3747             /* This is the first end-of-line.  */
3748             eol_type = this_eol_type;
3749           else if (eol_type != this_eol_type)
3750             {
3751               /* The found type is different from what found before.  */
3752               eol_type = CODING_EOL_INCONSISTENT;
3753               break;
3754             }
3755         }
3756     }
3757
3758   if (*skip == 0)
3759     *skip = src_end - source;
3760   return eol_type;
3761 }
3762
3763 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
3764    is encoded.  If it detects an appropriate format of end-of-line, it
3765    sets the information in *CODING.  */
3766
3767 void
3768 detect_eol (coding, src, src_bytes)
3769      struct coding_system *coding;
3770      unsigned char *src;
3771      int src_bytes;
3772 {
3773   Lisp_Object val;
3774   int skip;
3775   int eol_type;
3776
3777   switch (coding->category_idx)
3778     {
3779     case CODING_CATEGORY_IDX_UTF_16_BE:
3780       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 1);
3781       break;
3782     case CODING_CATEGORY_IDX_UTF_16_LE:
3783       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 0);
3784       break;
3785     default:
3786       eol_type = detect_eol_type (src, src_bytes, &skip);
3787       break;
3788     }
3789
3790   if (coding->heading_ascii > skip)
3791     coding->heading_ascii = skip;
3792   else
3793     skip = coding->heading_ascii;
3794
3795   if (eol_type == CODING_EOL_UNDECIDED)
3796     return;
3797   if (eol_type == CODING_EOL_INCONSISTENT)
3798     {
3799 #if 0
3800       /* This code is suppressed until we find a better way to
3801          distinguish raw text file and binary file.  */
3802
3803       /* If we have already detected that the coding is raw-text, the
3804          coding should actually be no-conversion.  */
3805       if (coding->type == coding_type_raw_text)
3806         {
3807           setup_coding_system (Qno_conversion, coding);
3808           return;
3809         }
3810       /* Else, let's decode only text code anyway.  */
3811 #endif /* 0 */
3812       eol_type = CODING_EOL_LF;
3813     }
3814
3815   val = Fget (coding->symbol, Qeol_type);
3816   if (VECTORP (val) && XVECTOR (val)->size == 3)
3817     {
3818       int src_multibyte = coding->src_multibyte;
3819       int dst_multibyte = coding->dst_multibyte;
3820
3821       setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
3822       coding->src_multibyte = src_multibyte;
3823       coding->dst_multibyte = dst_multibyte;
3824       coding->heading_ascii = skip;
3825     }
3826 }
3827
3828 #define CONVERSION_BUFFER_EXTRA_ROOM 256
3829
3830 #define DECODING_BUFFER_MAG(coding)                     \
3831   (coding->type == coding_type_iso2022                  \
3832    ? 3                                                  \
3833    : (coding->type == coding_type_ccl                   \
3834       ? coding->spec.ccl.decoder.buf_magnification      \
3835       : 2))
3836
3837 /* Return maximum size (bytes) of a buffer enough for decoding
3838    SRC_BYTES of text encoded in CODING.  */
3839
3840 int
3841 decoding_buffer_size (coding, src_bytes)
3842      struct coding_system *coding;
3843      int src_bytes;
3844 {
3845   return (src_bytes * DECODING_BUFFER_MAG (coding)
3846           + CONVERSION_BUFFER_EXTRA_ROOM);
3847 }
3848
3849 /* Return maximum size (bytes) of a buffer enough for encoding
3850    SRC_BYTES of text to CODING.  */
3851
3852 int
3853 encoding_buffer_size (coding, src_bytes)
3854      struct coding_system *coding;
3855      int src_bytes;
3856 {
3857   int magnification;
3858
3859   if (coding->type == coding_type_ccl)
3860     magnification = coding->spec.ccl.encoder.buf_magnification;
3861   else if (CODING_REQUIRE_ENCODING (coding))
3862     magnification = 3;
3863   else
3864     magnification = 1;
3865
3866   return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
3867 }
3868
3869 #ifndef MINIMUM_CONVERSION_BUFFER_SIZE
3870 #define MINIMUM_CONVERSION_BUFFER_SIZE 1024
3871 #endif
3872
3873 char *conversion_buffer;
3874 int conversion_buffer_size;
3875
3876 /* Return a pointer to a SIZE bytes of buffer to be used for encoding
3877    or decoding.  Sufficient memory is allocated automatically.  If we
3878    run out of memory, return NULL.  */
3879
3880 char *
3881 get_conversion_buffer (size)
3882      int size;
3883 {
3884   if (size > conversion_buffer_size)
3885     {
3886       char *buf;
3887       int real_size = conversion_buffer_size * 2;
3888
3889       while (real_size < size) real_size *= 2;
3890       buf = (char *) xmalloc (real_size);
3891       xfree (conversion_buffer);
3892       conversion_buffer = buf;
3893       conversion_buffer_size = real_size;
3894     }
3895   return conversion_buffer;
3896 }
3897
3898 int
3899 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
3900      struct coding_system *coding;
3901      unsigned char *source, *destination;
3902      int src_bytes, dst_bytes, encodep;
3903 {
3904   struct ccl_program *ccl
3905     = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
3906   int result;
3907
3908   ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
3909   if (encodep)
3910     ccl->eol_type = coding->eol_type;
3911   coding->produced = ccl_driver (ccl, source, destination,
3912                                  src_bytes, dst_bytes, &(coding->consumed));
3913   if (encodep)
3914     coding->produced_char = coding->produced;
3915   else
3916     {
3917       int bytes
3918         = dst_bytes ? dst_bytes : source + coding->consumed - destination;
3919       coding->produced = str_as_multibyte (destination, bytes,
3920                                            coding->produced,
3921                                            &(coding->produced_char));
3922     }
3923
3924   switch (ccl->status)
3925     {
3926     case CCL_STAT_SUSPEND_BY_SRC:
3927       result = CODING_FINISH_INSUFFICIENT_SRC;
3928       break;
3929     case CCL_STAT_SUSPEND_BY_DST:
3930       result = CODING_FINISH_INSUFFICIENT_DST;
3931       break;
3932     case CCL_STAT_QUIT:
3933     case CCL_STAT_INVALID_CMD:
3934       result = CODING_FINISH_INTERRUPT;
3935       break;
3936     default:
3937       result = CODING_FINISH_NORMAL;
3938       break;
3939     }
3940   return result;
3941 }
3942
3943 /* Decode EOL format of the text at PTR of BYTES length destructively
3944    according to CODING->eol_type.  This is called after the CCL
3945    program produced a decoded text at PTR.  If we do CRLF->LF
3946    conversion, update CODING->produced and CODING->produced_char.  */
3947
3948 static void
3949 decode_eol_post_ccl (coding, ptr, bytes)
3950      struct coding_system *coding;
3951      unsigned char *ptr;
3952      int bytes;
3953 {
3954   Lisp_Object val, saved_coding_symbol;
3955   unsigned char *pend = ptr + bytes;
3956   int dummy;
3957
3958   /* Remember the current coding system symbol.  We set it back when
3959      an inconsistent EOL is found so that `last-coding-system-used' is
3960      set to the coding system that doesn't specify EOL conversion.  */
3961   saved_coding_symbol = coding->symbol;
3962
3963   coding->spec.ccl.cr_carryover = 0;
3964   if (coding->eol_type == CODING_EOL_UNDECIDED)
3965     {
3966       /* Here, to avoid the call of setup_coding_system, we directly
3967          call detect_eol_type.  */
3968       coding->eol_type = detect_eol_type (ptr, bytes, &dummy);
3969       if (coding->eol_type == CODING_EOL_INCONSISTENT)
3970         coding->eol_type = CODING_EOL_LF;
3971       if (coding->eol_type != CODING_EOL_UNDECIDED)
3972         {
3973           val = Fget (coding->symbol, Qeol_type);
3974           if (VECTORP (val) && XVECTOR (val)->size == 3)
3975             coding->symbol = XVECTOR (val)->contents[coding->eol_type];
3976         }
3977       coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
3978     }
3979
3980   if (coding->eol_type == CODING_EOL_LF
3981       || coding->eol_type == CODING_EOL_UNDECIDED)
3982     {
3983       /* We have nothing to do.  */
3984       ptr = pend;
3985     }
3986   else if (coding->eol_type == CODING_EOL_CRLF)
3987     {
3988       unsigned char *pstart = ptr, *p = ptr;
3989
3990       if (! (coding->mode & CODING_MODE_LAST_BLOCK)
3991           && *(pend - 1) == '\r')
3992         {
3993           /* If the last character is CR, we can't handle it here
3994              because LF will be in the not-yet-decoded source text.
3995              Recorded that the CR is not yet processed.  */
3996           coding->spec.ccl.cr_carryover = 1;
3997           coding->produced--;
3998           coding->produced_char--;
3999           pend--;
4000         }
4001       while (ptr < pend)
4002         {
4003           if (*ptr == '\r')
4004             {
4005               if (ptr + 1 < pend && *(ptr + 1) == '\n')
4006                 {
4007                   *p++ = '\n';
4008                   ptr += 2;
4009                 }
4010               else
4011                 {
4012                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4013                     goto undo_eol_conversion;
4014                   *p++ = *ptr++;
4015                 }
4016             }
4017           else if (*ptr == '\n'
4018                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4019             goto undo_eol_conversion;
4020           else
4021             *p++ = *ptr++;
4022           continue;
4023
4024         undo_eol_conversion:
4025           /* We have faced with inconsistent EOL format at PTR.
4026              Convert all LFs before PTR back to CRLFs.  */
4027           for (p--, ptr--; p >= pstart; p--)
4028             {
4029               if (*p == '\n')
4030                 *ptr-- = '\n', *ptr-- = '\r';
4031               else
4032                 *ptr-- = *p;
4033             }
4034           /*  If carryover is recorded, cancel it because we don't
4035               convert CRLF anymore.  */
4036           if (coding->spec.ccl.cr_carryover)
4037             {
4038               coding->spec.ccl.cr_carryover = 0;
4039               coding->produced++;
4040               coding->produced_char++;
4041               pend++;
4042             }
4043           p = ptr = pend;
4044           coding->eol_type = CODING_EOL_LF;
4045           coding->symbol = saved_coding_symbol;
4046         }
4047       if (p < pend)
4048         {
4049           /* As each two-byte sequence CRLF was converted to LF, (PEND
4050              - P) is the number of deleted characters.  */
4051           coding->produced -= pend - p;
4052           coding->produced_char -= pend - p;
4053         }
4054     }
4055   else                  /* i.e. coding->eol_type == CODING_EOL_CR */
4056     {
4057       unsigned char *p = ptr;
4058
4059       for (; ptr < pend; ptr++)
4060         {
4061           if (*ptr == '\r')
4062             *ptr = '\n';
4063           else if (*ptr == '\n'
4064                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4065             {
4066               for (; p < ptr; p++)
4067                 {
4068                   if (*p == '\n')
4069                     *p = '\r';
4070                 }
4071               ptr = pend;
4072               coding->eol_type = CODING_EOL_LF;
4073               coding->symbol = saved_coding_symbol;
4074             }
4075         }
4076     }
4077 }
4078
4079 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions".  Before
4080    decoding, it may detect coding system and format of end-of-line if
4081    those are not yet decided.  The source should be unibyte, the
4082    result is multibyte if CODING->dst_multibyte is nonzero, else
4083    unibyte.  */
4084
4085 int
4086 decode_coding (coding, source, destination, src_bytes, dst_bytes)
4087      struct coding_system *coding;
4088      unsigned char *source, *destination;
4089      int src_bytes, dst_bytes;
4090 {
4091   if (coding->type == coding_type_undecided)
4092     detect_coding (coding, source, src_bytes);
4093
4094   if (coding->eol_type == CODING_EOL_UNDECIDED
4095       && coding->type != coding_type_ccl)
4096     detect_eol (coding, source, src_bytes);
4097
4098   coding->produced = coding->produced_char = 0;
4099   coding->consumed = coding->consumed_char = 0;
4100   coding->errors = 0;
4101   coding->result = CODING_FINISH_NORMAL;
4102
4103   switch (coding->type)
4104     {
4105     case coding_type_sjis:
4106       decode_coding_sjis_big5 (coding, source, destination,
4107                                src_bytes, dst_bytes, 1);
4108       break;
4109
4110     case coding_type_iso2022:
4111       decode_coding_iso2022 (coding, source, destination,
4112                              src_bytes, dst_bytes);
4113       break;
4114
4115     case coding_type_big5:
4116       decode_coding_sjis_big5 (coding, source, destination,
4117                                src_bytes, dst_bytes, 0);
4118       break;
4119
4120     case coding_type_emacs_mule:
4121       decode_coding_emacs_mule (coding, source, destination,
4122                                 src_bytes, dst_bytes);
4123       break;
4124
4125     case coding_type_ccl:
4126       if (coding->spec.ccl.cr_carryover)
4127         {
4128           /* Set the CR which is not processed by the previous call of
4129              decode_eol_post_ccl in DESTINATION.  */
4130           *destination = '\r';
4131           coding->produced++;
4132           coding->produced_char++;
4133           dst_bytes--;
4134         }
4135       ccl_coding_driver (coding, source,
4136                          destination + coding->spec.ccl.cr_carryover,
4137                          src_bytes, dst_bytes, 0);
4138       if (coding->eol_type != CODING_EOL_LF)
4139         decode_eol_post_ccl (coding, destination, coding->produced);
4140       break;
4141
4142     default:
4143       decode_eol (coding, source, destination, src_bytes, dst_bytes);
4144     }
4145
4146   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
4147       && coding->consumed == src_bytes)
4148     coding->result = CODING_FINISH_NORMAL;
4149
4150   if (coding->mode & CODING_MODE_LAST_BLOCK
4151       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
4152     {
4153       unsigned char *src = source + coding->consumed;
4154       unsigned char *dst = destination + coding->produced;
4155
4156       src_bytes -= coding->consumed;
4157      coding->errors++;
4158       if (COMPOSING_P (coding))
4159         DECODE_COMPOSITION_END ('1');
4160       while (src_bytes--)
4161         {
4162           int c = *src++;
4163           dst += CHAR_STRING (c, dst);
4164           coding->produced_char++;
4165         }
4166       coding->consumed = coding->consumed_char = src - source;
4167       coding->produced = dst - destination;
4168     }
4169
4170   if (!coding->dst_multibyte)
4171     {
4172       coding->produced = str_as_unibyte (destination, coding->produced);
4173       coding->produced_char = coding->produced;
4174     }
4175
4176   return coding->result;
4177 }
4178
4179 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  The
4180    multibyteness of the source is CODING->src_multibyte, the
4181    multibyteness of the result is always unibyte.  */
4182
4183 int
4184 encode_coding (coding, source, destination, src_bytes, dst_bytes)
4185      struct coding_system *coding;
4186      unsigned char *source, *destination;
4187      int src_bytes, dst_bytes;
4188 {
4189   coding->produced = coding->produced_char = 0;
4190   coding->consumed = coding->consumed_char = 0;
4191   coding->errors = 0;
4192   coding->result = CODING_FINISH_NORMAL;
4193
4194   switch (coding->type)
4195     {
4196     case coding_type_sjis:
4197       encode_coding_sjis_big5 (coding, source, destination,
4198                                src_bytes, dst_bytes, 1);
4199       break;
4200
4201     case coding_type_iso2022:
4202       encode_coding_iso2022 (coding, source, destination,
4203                              src_bytes, dst_bytes);
4204       break;
4205
4206     case coding_type_big5:
4207       encode_coding_sjis_big5 (coding, source, destination,
4208                                src_bytes, dst_bytes, 0);
4209       break;
4210
4211     case coding_type_emacs_mule:
4212       encode_coding_emacs_mule (coding, source, destination,
4213                                 src_bytes, dst_bytes);
4214       break;
4215
4216     case coding_type_ccl:
4217       ccl_coding_driver (coding, source, destination,
4218                          src_bytes, dst_bytes, 1);
4219       break;
4220
4221     default:
4222       encode_eol (coding, source, destination, src_bytes, dst_bytes);
4223     }
4224
4225   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
4226       && coding->consumed == src_bytes)
4227     coding->result = CODING_FINISH_NORMAL;
4228
4229   if (coding->mode & CODING_MODE_LAST_BLOCK)
4230     {
4231       unsigned char *src = source + coding->consumed;
4232       unsigned char *src_end = src + src_bytes;
4233       unsigned char *dst = destination + coding->produced;
4234
4235       if (coding->type == coding_type_iso2022)
4236         ENCODE_RESET_PLANE_AND_REGISTER;
4237       if (COMPOSING_P (coding))
4238         *dst++ = ISO_CODE_ESC, *dst++ = '1';
4239       if (coding->consumed < src_bytes)
4240         {
4241           int len = src_bytes - coding->consumed;
4242
4243           BCOPY_SHORT (source + coding->consumed, dst, len);
4244           if (coding->src_multibyte)
4245             len = str_as_unibyte (dst, len);
4246           dst += len;
4247           coding->consumed = src_bytes;
4248         }
4249       coding->produced = coding->produced_char = dst - destination;
4250     }
4251
4252   return coding->result;
4253 }
4254
4255 /* Scan text in the region between *BEG and *END (byte positions),
4256    skip characters which we don't have to decode by coding system
4257    CODING at the head and tail, then set *BEG and *END to the region
4258    of the text we actually have to convert.  The caller should move
4259    the gap out of the region in advance if the region is from a
4260    buffer.
4261
4262    If STR is not NULL, *BEG and *END are indices into STR.  */
4263
4264 static void
4265 shrink_decoding_region (beg, end, coding, str)
4266      int *beg, *end;
4267      struct coding_system *coding;
4268      unsigned char *str;
4269 {
4270   unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
4271   int eol_conversion;
4272   Lisp_Object translation_table;
4273
4274   if (coding->type == coding_type_ccl
4275       || coding->type == coding_type_undecided
4276       || coding->eol_type != CODING_EOL_LF
4277       || !NILP (coding->post_read_conversion)
4278       || coding->composing != COMPOSITION_DISABLED)
4279     {
4280       /* We can't skip any data.  */
4281       return;
4282     }
4283   if (coding->type == coding_type_no_conversion
4284       || coding->type == coding_type_raw_text
4285       || coding->type == coding_type_emacs_mule)
4286     {
4287       /* We need no conversion, but don't have to skip any data here.
4288          Decoding routine handles them effectively anyway.  */
4289       return;
4290     }
4291
4292   translation_table = coding->translation_table_for_decode;
4293   if (NILP (translation_table) && !NILP (Venable_character_translation))
4294     translation_table = Vstandard_translation_table_for_decode;
4295   if (CHAR_TABLE_P (translation_table))
4296     {
4297       int i;
4298       for (i = 0; i < 128; i++)
4299         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
4300           break;
4301       if (i < 128)
4302         /* Some ASCII character should be translated.  We give up
4303            shrinking.  */
4304         return;
4305     }
4306
4307   if (coding->heading_ascii >= 0)
4308     /* Detection routine has already found how much we can skip at the
4309        head.  */
4310     *beg += coding->heading_ascii;
4311
4312   if (str)
4313     {
4314       begp_orig = begp = str + *beg;
4315       endp_orig = endp = str + *end;
4316     }
4317   else
4318     {
4319       begp_orig = begp = BYTE_POS_ADDR (*beg);
4320       endp_orig = endp = begp + *end - *beg;
4321     }
4322
4323   eol_conversion = (coding->eol_type == CODING_EOL_CR
4324                     || coding->eol_type == CODING_EOL_CRLF);
4325
4326   switch (coding->type)
4327     {
4328     case coding_type_sjis:
4329     case coding_type_big5:
4330       /* We can skip all ASCII characters at the head.  */
4331       if (coding->heading_ascii < 0)
4332         {
4333           if (eol_conversion)
4334             while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
4335           else
4336             while (begp < endp && *begp < 0x80) begp++;
4337         }
4338       /* We can skip all ASCII characters at the tail except for the
4339          second byte of SJIS or BIG5 code.  */
4340       if (eol_conversion)
4341         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
4342       else
4343         while (begp < endp && endp[-1] < 0x80) endp--;
4344       /* Do not consider LF as ascii if preceded by CR, since that
4345          confuses eol decoding. */
4346       if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
4347         endp++;
4348       if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
4349         endp++;
4350       break;
4351
4352     case coding_type_iso2022:
4353       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
4354         /* We can't skip any data.  */
4355         break;
4356       if (coding->heading_ascii < 0)
4357         {
4358           /* We can skip all ASCII characters at the head except for a
4359              few control codes.  */
4360           while (begp < endp && (c = *begp) < 0x80
4361                  && c != ISO_CODE_CR && c != ISO_CODE_SO
4362                  && c != ISO_CODE_SI && c != ISO_CODE_ESC
4363                  && (!eol_conversion || c != ISO_CODE_LF))
4364             begp++;
4365         }
4366       switch (coding->category_idx)
4367         {
4368         case CODING_CATEGORY_IDX_ISO_8_1:
4369         case CODING_CATEGORY_IDX_ISO_8_2:
4370           /* We can skip all ASCII characters at the tail.  */
4371           if (eol_conversion)
4372             while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
4373           else
4374             while (begp < endp && endp[-1] < 0x80) endp--;
4375           /* Do not consider LF as ascii if preceded by CR, since that
4376              confuses eol decoding. */
4377           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
4378             endp++;
4379           break;
4380
4381         case CODING_CATEGORY_IDX_ISO_7:
4382         case CODING_CATEGORY_IDX_ISO_7_TIGHT:
4383           {
4384             /* We can skip all charactes at the tail except for 8-bit
4385                codes and ESC and the following 2-byte at the tail.  */
4386             unsigned char *eight_bit = NULL;
4387
4388             if (eol_conversion)
4389               while (begp < endp
4390                      && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
4391                 {
4392                   if (!eight_bit && c & 0x80) eight_bit = endp;
4393                   endp--;
4394                 }
4395             else
4396               while (begp < endp
4397                      && (c = endp[-1]) != ISO_CODE_ESC)
4398                 {
4399                   if (!eight_bit && c & 0x80) eight_bit = endp;
4400                   endp--;
4401                 }
4402             /* Do not consider LF as ascii if preceded by CR, since that
4403                confuses eol decoding. */
4404             if (begp < endp && endp < endp_orig
4405                 && endp[-1] == '\r' && endp[0] == '\n')
4406               endp++;
4407             if (begp < endp && endp[-1] == ISO_CODE_ESC)
4408               {
4409                 if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
4410                   /* This is an ASCII designation sequence.  We can
4411                      surely skip the tail.  But, if we have
4412                      encountered an 8-bit code, skip only the codes
4413                      after that.  */
4414                   endp = eight_bit ? eight_bit : endp + 2;
4415                 else
4416                   /* Hmmm, we can't skip the tail.  */
4417                   endp = endp_orig;
4418               }
4419             else if (eight_bit)
4420               endp = eight_bit;
4421           }
4422         }
4423       break;
4424
4425     default:
4426       abort ();
4427     }
4428   *beg += begp - begp_orig;
4429   *end += endp - endp_orig;
4430   return;
4431 }
4432
4433 /* Like shrink_decoding_region but for encoding.  */
4434
4435 static void
4436 shrink_encoding_region (beg, end, coding, str)
4437      int *beg, *end;
4438      struct coding_system *coding;
4439      unsigned char *str;
4440 {
4441   unsigned char *begp_orig, *begp, *endp_orig, *endp;
4442   int eol_conversion;
4443   Lisp_Object translation_table;
4444
4445   if (coding->type == coding_type_ccl
4446       || coding->eol_type == CODING_EOL_CRLF
4447       || coding->eol_type == CODING_EOL_CR
4448       || coding->cmp_data && coding->cmp_data->used > 0)
4449     {
4450       /* We can't skip any data.  */
4451       return;
4452     }
4453   if (coding->type == coding_type_no_conversion
4454       || coding->type == coding_type_raw_text
4455       || coding->type == coding_type_emacs_mule
4456       || coding->type == coding_type_undecided)
4457     {
4458       /* We need no conversion, but don't have to skip any data here.
4459          Encoding routine handles them effectively anyway.  */
4460       return;
4461     }
4462
4463   translation_table = coding->translation_table_for_encode;
4464   if (NILP (translation_table) && !NILP (Venable_character_translation))
4465     translation_table = Vstandard_translation_table_for_encode;
4466   if (CHAR_TABLE_P (translation_table))
4467     {
4468       int i;
4469       for (i = 0; i < 128; i++)
4470         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
4471           break;
4472       if (i < 128)
4473         /* Some ASCII character should be tranlsated.  We give up
4474            shrinking.  */
4475         return;
4476     }
4477
4478   if (str)
4479     {
4480       begp_orig = begp = str + *beg;
4481       endp_orig = endp = str + *end;
4482     }
4483   else
4484     {
4485       begp_orig = begp = BYTE_POS_ADDR (*beg);
4486       endp_orig = endp = begp + *end - *beg;
4487     }
4488
4489   eol_conversion = (coding->eol_type == CODING_EOL_CR
4490                     || coding->eol_type == CODING_EOL_CRLF);
4491
4492   /* Here, we don't have to check coding->pre_write_conversion because
4493      the caller is expected to have handled it already.  */
4494   switch (coding->type)
4495     {
4496     case coding_type_iso2022:
4497       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
4498         /* We can't skip any data.  */
4499         break;
4500       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
4501         {
4502           unsigned char *bol = begp;
4503           while (begp < endp && *begp < 0x80)
4504             {
4505               begp++;
4506               if (begp[-1] == '\n')
4507                 bol = begp;
4508             }
4509           begp = bol;
4510           goto label_skip_tail;
4511         }
4512       /* fall down ... */
4513
4514     case coding_type_sjis:
4515     case coding_type_big5:
4516       /* We can skip all ASCII characters at the head and tail.  */
4517       if (eol_conversion)
4518         while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
4519       else
4520         while (begp < endp && *begp < 0x80) begp++;
4521     label_skip_tail:
4522       if (eol_conversion)
4523         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
4524       else
4525         while (begp < endp && *(endp - 1) < 0x80) endp--;
4526       break;
4527
4528     default:
4529       abort ();
4530     }
4531
4532   *beg += begp - begp_orig;
4533   *end += endp - endp_orig;
4534   return;
4535 }
4536
4537 /* As shrinking conversion region requires some overhead, we don't try
4538    shrinking if the length of conversion region is less than this
4539    value.  */
4540 static int shrink_conversion_region_threshhold = 1024;
4541
4542 #define SHRINK_CONVERSION_REGION(beg, end, coding, str, encodep)        \
4543   do {                                                                  \
4544     if (*(end) - *(beg) > shrink_conversion_region_threshhold)          \
4545       {                                                                 \
4546         if (encodep) shrink_encoding_region (beg, end, coding, str);    \
4547         else shrink_decoding_region (beg, end, coding, str);            \
4548       }                                                                 \
4549   } while (0)
4550
4551 static Lisp_Object
4552 code_convert_region_unwind (dummy)
4553      Lisp_Object dummy;
4554 {
4555   inhibit_pre_post_conversion = 0;
4556   return Qnil;
4557 }
4558
4559 /* Store information about all compositions in the range FROM and TO
4560    of OBJ in memory blocks pointed by CODING->cmp_data.  OBJ is a
4561    buffer or a string, defaults to the current buffer.  */
4562
4563 void
4564 coding_save_composition (coding, from, to, obj)
4565      struct coding_system *coding;
4566      int from, to;
4567      Lisp_Object obj;
4568 {
4569   Lisp_Object prop;
4570   int start, end;
4571
4572   if (coding->composing == COMPOSITION_DISABLED)
4573     return;
4574   if (!coding->cmp_data)
4575     coding_allocate_composition_data (coding, from);
4576   if (!find_composition (from, to, &start, &end, &prop, obj)
4577       || end > to)
4578     return;
4579   if (start < from
4580       && (!find_composition (end, to, &start, &end, &prop, obj)
4581           || end > to))
4582     return;
4583   coding->composing = COMPOSITION_NO;
4584   do
4585     {
4586       if (COMPOSITION_VALID_P (start, end, prop))
4587         {
4588           enum composition_method method = COMPOSITION_METHOD (prop);
4589           if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
4590               >= COMPOSITION_DATA_SIZE)
4591             coding_allocate_composition_data (coding, from);
4592           /* For relative composition, we remember start and end
4593              positions, for the other compositions, we also remember
4594              components.  */
4595           CODING_ADD_COMPOSITION_START (coding, start - from, method);
4596           if (method != COMPOSITION_RELATIVE)
4597             {
4598               /* We must store a*/
4599               Lisp_Object val, ch;
4600
4601               val = COMPOSITION_COMPONENTS (prop);
4602               if (CONSP (val))
4603                 while (CONSP (val))
4604                   {
4605                     ch = XCAR (val), val = XCDR (val);
4606                     CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
4607                   }
4608               else if (VECTORP (val) || STRINGP (val))
4609                 {
4610                   int len = (VECTORP (val)
4611                              ? XVECTOR (val)->size : XSTRING (val)->size);
4612                   int i;
4613                   for (i = 0; i < len; i++)
4614                     {
4615                       ch = (STRINGP (val)
4616                             ? Faref (val, make_number (i))
4617                             : XVECTOR (val)->contents[i]);
4618                       CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
4619                     }
4620                 }
4621               else              /* INTEGERP (val) */
4622                 CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (val));
4623             }
4624           CODING_ADD_COMPOSITION_END (coding, end - from);
4625         }
4626       start = end;
4627     }
4628   while (start < to
4629          && find_composition (start, to, &start, &end, &prop, obj)
4630          && end <= to);
4631
4632   /* Make coding->cmp_data point to the first memory block.  */
4633   while (coding->cmp_data->prev)
4634     coding->cmp_data = coding->cmp_data->prev;
4635   coding->cmp_data_start = 0;
4636 }
4637
4638 /* Reflect the saved information about compositions to OBJ.
4639    CODING->cmp_data points to a memory block for the informaiton.  OBJ
4640    is a buffer or a string, defaults to the current buffer.  */
4641
4642 void
4643 coding_restore_composition (coding, obj)
4644      struct coding_system *coding;
4645      Lisp_Object obj;
4646 {
4647   struct composition_data *cmp_data = coding->cmp_data;
4648
4649   if (!cmp_data)
4650     return;
4651
4652   while (cmp_data->prev)
4653     cmp_data = cmp_data->prev;
4654
4655   while (cmp_data)
4656     {
4657       int i;
4658
4659       for (i = 0; i < cmp_data->used; i += cmp_data->data[i])
4660         {
4661           int *data = cmp_data->data + i;
4662           enum composition_method method = (enum composition_method) data[3];
4663           Lisp_Object components;
4664
4665           if (method == COMPOSITION_RELATIVE)
4666             components = Qnil;
4667           else
4668             {
4669               int len = data[0] - 4, j;
4670               Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
4671
4672               for (j = 0; j < len; j++)
4673                 args[j] = make_number (data[4 + j]);
4674               components = (method == COMPOSITION_WITH_ALTCHARS
4675                             ? Fstring (len, args) : Fvector (len, args));
4676             }
4677           compose_text (data[1], data[2], components, Qnil, obj);
4678         }
4679       cmp_data = cmp_data->next;
4680     }
4681 }
4682
4683 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
4684    text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
4685    coding system CODING, and return the status code of code conversion
4686    (currently, this value has no meaning).
4687
4688    How many characters (and bytes) are converted to how many
4689    characters (and bytes) are recorded in members of the structure
4690    CODING.
4691
4692    If REPLACE is nonzero, we do various things as if the original text
4693    is deleted and a new text is inserted.  See the comments in
4694    replace_range (insdel.c) to know what we are doing.
4695
4696    If REPLACE is zero, it is assumed that the source text is unibyte.
4697    Otherwize, it is assumed that the source text is multibyte.  */
4698
4699 int
4700 code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
4701      int from, from_byte, to, to_byte, encodep, replace;
4702      struct coding_system *coding;
4703 {
4704   int len = to - from, len_byte = to_byte - from_byte;
4705   int require, inserted, inserted_byte;
4706   int head_skip, tail_skip, total_skip = 0;
4707   Lisp_Object saved_coding_symbol;
4708   int first = 1;
4709   unsigned char *src, *dst;
4710   Lisp_Object deletion;
4711   int orig_point = PT, orig_len = len;
4712   int prev_Z;
4713   int multibyte_p = !NILP (current_buffer->enable_multibyte_characters);
4714
4715   coding->src_multibyte = replace && multibyte_p;
4716   coding->dst_multibyte = multibyte_p;
4717
4718   deletion = Qnil;
4719   saved_coding_symbol = Qnil;
4720
4721   if (from < PT && PT < to)
4722     {
4723       TEMP_SET_PT_BOTH (from, from_byte);
4724       orig_point = from;
4725     }
4726
4727   if (replace)
4728     {
4729       int saved_from = from;
4730       int saved_inhibit_modification_hooks;
4731
4732       prepare_to_modify_buffer (from, to, &from);
4733       if (saved_from != from)
4734         {
4735           to = from + len;
4736           from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
4737           len_byte = to_byte - from_byte;
4738         }
4739
4740       /* The code conversion routine can not preserve text properties
4741          for now.  So, we must remove all text properties in the
4742          region.  Here, we must suppress all modification hooks.  */
4743       saved_inhibit_modification_hooks = inhibit_modification_hooks;
4744       inhibit_modification_hooks = 1;
4745       Fset_text_properties (make_number (from), make_number (to), Qnil, Qnil);
4746       inhibit_modification_hooks = saved_inhibit_modification_hooks;
4747     }
4748
4749   if (! encodep && CODING_REQUIRE_DETECTION (coding))
4750     {
4751       /* We must detect encoding of text and eol format.  */
4752
4753       if (from < GPT && to > GPT)
4754         move_gap_both (from, from_byte);
4755       if (coding->type == coding_type_undecided)
4756         {
4757           detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
4758           if (coding->type == coding_type_undecided)
4759             /* It seems that the text contains only ASCII, but we
4760                should not left it undecided because the deeper
4761                decoding routine (decode_coding) tries to detect the
4762                encodings again in vain.  */
4763             coding->type = coding_type_emacs_mule;
4764         }
4765       if (coding->eol_type == CODING_EOL_UNDECIDED
4766           && coding->type != coding_type_ccl)
4767         {
4768           saved_coding_symbol = coding->symbol;
4769           detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
4770           if (coding->eol_type == CODING_EOL_UNDECIDED)
4771             coding->eol_type = CODING_EOL_LF;
4772           /* We had better recover the original eol format if we
4773              encounter an inconsitent eol format while decoding.  */
4774           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4775         }
4776     }
4777
4778   /* Now we convert the text.  */
4779
4780   /* For encoding, we must process pre-write-conversion in advance.  */
4781   if (! inhibit_pre_post_conversion
4782       && encodep
4783       && SYMBOLP (coding->pre_write_conversion)
4784       && ! NILP (Ffboundp (coding->pre_write_conversion)))
4785     {
4786       /* The function in pre-write-conversion may put a new text in a
4787          new buffer.  */
4788       struct buffer *prev = current_buffer;
4789       Lisp_Object new;
4790       int count = specpdl_ptr - specpdl;
4791
4792       record_unwind_protect (code_convert_region_unwind, Qnil);
4793       /* We should not call any more pre-write/post-read-conversion
4794          functions while this pre-write-conversion is running.  */
4795       inhibit_pre_post_conversion = 1;
4796       call2 (coding->pre_write_conversion,
4797              make_number (from), make_number (to));
4798       inhibit_pre_post_conversion = 0;
4799       /* Discard the unwind protect.  */
4800       specpdl_ptr--;
4801
4802       if (current_buffer != prev)
4803         {
4804           len = ZV - BEGV;
4805           new = Fcurrent_buffer ();
4806           set_buffer_internal_1 (prev);
4807           del_range_2 (from, from_byte, to, to_byte, 0);
4808           TEMP_SET_PT_BOTH (from, from_byte);
4809           insert_from_buffer (XBUFFER (new), 1, len, 0);
4810           Fkill_buffer (new);
4811           if (orig_point >= to)
4812             orig_point += len - orig_len;
4813           else if (orig_point > from)
4814             orig_point = from;
4815           orig_len = len;
4816           to = from + len;
4817           from_byte = CHAR_TO_BYTE (from);
4818           to_byte = CHAR_TO_BYTE (to);
4819           len_byte = to_byte - from_byte;
4820           TEMP_SET_PT_BOTH (from, from_byte);
4821         }
4822     }
4823
4824   if (replace)
4825     deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
4826
4827   if (coding->composing != COMPOSITION_DISABLED)
4828     {
4829       if (encodep)
4830         coding_save_composition (coding, from, to, Fcurrent_buffer ());
4831       else
4832         coding_allocate_composition_data (coding, from);
4833     }
4834
4835   /* Try to skip the heading and tailing ASCIIs.  */
4836   if (coding->type != coding_type_ccl)
4837     {
4838       int from_byte_orig = from_byte, to_byte_orig = to_byte;
4839
4840       if (from < GPT && GPT < to)
4841         move_gap_both (from, from_byte);
4842       SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
4843       if (from_byte == to_byte
4844           && (encodep || NILP (coding->post_read_conversion))
4845           && ! CODING_REQUIRE_FLUSHING (coding))
4846         {
4847           coding->produced = len_byte;
4848           coding->produced_char = len;
4849           if (!replace)
4850             /* We must record and adjust for this new text now.  */
4851             adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
4852           return 0;
4853         }
4854
4855       head_skip = from_byte - from_byte_orig;
4856       tail_skip = to_byte_orig - to_byte;
4857       total_skip = head_skip + tail_skip;
4858       from += head_skip;
4859       to -= tail_skip;
4860       len -= total_skip; len_byte -= total_skip;
4861     }
4862
4863   /* For converion, we must put the gap before the text in addition to
4864      making the gap larger for efficient decoding.  The required gap
4865      size starts from 2000 which is the magic number used in make_gap.
4866      But, after one batch of conversion, it will be incremented if we
4867      find that it is not enough .  */
4868   require = 2000;
4869
4870   if (GAP_SIZE  < require)
4871     make_gap (require - GAP_SIZE);
4872   move_gap_both (from, from_byte);
4873
4874   inserted = inserted_byte = 0;
4875
4876   GAP_SIZE += len_byte;
4877   ZV -= len;
4878   Z -= len;
4879   ZV_BYTE -= len_byte;
4880   Z_BYTE -= len_byte;
4881
4882   if (GPT - BEG < BEG_UNCHANGED)
4883     BEG_UNCHANGED = GPT - BEG;
4884   if (Z - GPT < END_UNCHANGED)
4885     END_UNCHANGED = Z - GPT;
4886
4887   if (!encodep && coding->src_multibyte)
4888     {
4889       /* Decoding routines expects that the source text is unibyte.
4890          We must convert 8-bit characters of multibyte form to
4891          unibyte.  */
4892       int len_byte_orig = len_byte;
4893       len_byte = str_as_unibyte (GAP_END_ADDR - len_byte, len_byte);
4894       if (len_byte < len_byte_orig)
4895         safe_bcopy (GAP_END_ADDR - len_byte_orig, GAP_END_ADDR - len_byte,
4896                     len_byte);
4897       coding->src_multibyte = 0;
4898     }
4899
4900   for (;;)
4901     {
4902       int result;
4903
4904       /* The buffer memory is now:
4905          +--------+converted-text+---------+-------original-text-------+---+
4906          |<-from->|<--inserted-->|---------|<--------len_byte--------->|---|
4907                   |<---------------------- GAP ----------------------->|  */
4908       src = GAP_END_ADDR - len_byte;
4909       dst = GPT_ADDR + inserted_byte;
4910
4911       if (encodep)
4912         result = encode_coding (coding, src, dst, len_byte, 0);
4913       else
4914         result = decode_coding (coding, src, dst, len_byte, 0);
4915
4916       /* The buffer memory is now:
4917          +--------+-------converted-text----+--+------original-text----+---+
4918          |<-from->|<-inserted->|<-produced->|--|<-(len_byte-consumed)->|---|
4919                   |<---------------------- GAP ----------------------->|  */
4920
4921       inserted += coding->produced_char;
4922       inserted_byte += coding->produced;
4923       len_byte -= coding->consumed;
4924
4925       if (result == CODING_FINISH_INSUFFICIENT_CMP)
4926         {
4927           coding_allocate_composition_data (coding, from + inserted);
4928           continue;
4929         }
4930
4931       src += coding->consumed;
4932       dst += coding->produced;
4933
4934       if (result == CODING_FINISH_NORMAL)
4935         {
4936           src += len_byte;
4937           break;
4938         }
4939       if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
4940         {
4941           unsigned char *pend = dst, *p = pend - inserted_byte;
4942           Lisp_Object eol_type;
4943
4944           /* Encode LFs back to the original eol format (CR or CRLF).  */
4945           if (coding->eol_type == CODING_EOL_CR)
4946             {
4947               while (p < pend) if (*p++ == '\n') p[-1] = '\r';
4948             }
4949           else
4950             {
4951               int count = 0;
4952
4953               while (p < pend) if (*p++ == '\n') count++;
4954               if (src - dst < count)
4955                 {
4956                   /* We don't have sufficient room for encoding LFs
4957                      back to CRLF.  We must record converted and
4958                      not-yet-converted text back to the buffer
4959                      content, enlarge the gap, then record them out of
4960                      the buffer contents again.  */
4961                   int add = len_byte + inserted_byte;
4962
4963                   GAP_SIZE -= add;
4964                   ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
4965                   GPT += inserted_byte; GPT_BYTE += inserted_byte;
4966                   make_gap (count - GAP_SIZE);
4967                   GAP_SIZE += add;
4968                   ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
4969                   GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
4970                   /* Don't forget to update SRC, DST, and PEND.  */
4971                   src = GAP_END_ADDR - len_byte;
4972                   dst = GPT_ADDR + inserted_byte;
4973                   pend = dst;
4974                 }
4975               inserted += count;
4976               inserted_byte += count;
4977               coding->produced += count;
4978               p = dst = pend + count;
4979               while (count)
4980                 {
4981                   *--p = *--pend;
4982                   if (*p == '\n') count--, *--p = '\r';
4983                 }
4984             }
4985
4986           /* Suppress eol-format conversion in the further conversion.  */
4987           coding->eol_type = CODING_EOL_LF;
4988
4989           /* Set the coding system symbol to that for Unix-like EOL.  */
4990           eol_type = Fget (saved_coding_symbol, Qeol_type);
4991           if (VECTORP (eol_type)
4992               && XVECTOR (eol_type)->size == 3
4993               && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
4994             coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
4995           else
4996             coding->symbol = saved_coding_symbol;
4997
4998           continue;
4999         }
5000       if (len_byte <= 0)
5001         {
5002           if (coding->type != coding_type_ccl
5003               || coding->mode & CODING_MODE_LAST_BLOCK)
5004             break;
5005           coding->mode |= CODING_MODE_LAST_BLOCK;
5006           continue;
5007         }
5008       if (result == CODING_FINISH_INSUFFICIENT_SRC)
5009         {
5010           /* The source text ends in invalid codes.  Let's just
5011              make them valid buffer contents, and finish conversion.  */
5012           inserted += len_byte;
5013           inserted_byte += len_byte;
5014           while (len_byte--)
5015             *dst++ = *src++;
5016           break;
5017         }
5018       if (result == CODING_FINISH_INTERRUPT)
5019         {
5020           /* The conversion procedure was interrupted by a user.  */
5021           break;
5022         }
5023       /* Now RESULT == CODING_FINISH_INSUFFICIENT_DST  */
5024       if (coding->consumed < 1)
5025         {
5026           /* It's quite strange to require more memory without
5027              consuming any bytes.  Perhaps CCL program bug.  */
5028           break;
5029         }
5030       if (first)
5031         {
5032           /* We have just done the first batch of conversion which was
5033              stoped because of insufficient gap.  Let's reconsider the
5034              required gap size (i.e. SRT - DST) now.
5035
5036              We have converted ORIG bytes (== coding->consumed) into
5037              NEW bytes (coding->produced).  To convert the remaining
5038              LEN bytes, we may need REQUIRE bytes of gap, where:
5039                 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
5040                 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
5041              Here, we are sure that NEW >= ORIG.  */
5042           float ratio = coding->produced - coding->consumed;
5043           ratio /= coding->consumed;
5044           require = len_byte * ratio;
5045           first = 0;
5046         }
5047       if ((src - dst) < (require + 2000))
5048         {
5049           /* See the comment above the previous call of make_gap.  */
5050           int add = len_byte + inserted_byte;
5051
5052           GAP_SIZE -= add;
5053           ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5054           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5055           make_gap (require + 2000);
5056           GAP_SIZE += add;
5057           ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5058           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5059         }
5060     }
5061   if (src - dst > 0) *dst = 0; /* Put an anchor.  */
5062
5063   if (encodep && coding->dst_multibyte)
5064     {
5065       /* The output is unibyte.  We must convert 8-bit characters to
5066          multibyte form.  */
5067       if (inserted_byte * 2 > GAP_SIZE)
5068         {
5069           GAP_SIZE -= inserted_byte;
5070           ZV += inserted_byte; Z += inserted_byte;
5071           ZV_BYTE += inserted_byte; Z_BYTE += inserted_byte;
5072           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5073           make_gap (inserted_byte - GAP_SIZE);
5074           GAP_SIZE += inserted_byte;
5075           ZV -= inserted_byte; Z -= inserted_byte;
5076           ZV_BYTE -= inserted_byte; Z_BYTE -= inserted_byte;
5077           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5078         }
5079       inserted_byte = str_to_multibyte (GPT_ADDR, GAP_SIZE, inserted_byte);
5080     }
5081
5082   /* If we have shrinked the conversion area, adjust it now.  */
5083   if (total_skip > 0)
5084     {
5085       if (tail_skip > 0)
5086         safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
5087       inserted += total_skip; inserted_byte += total_skip;
5088       GAP_SIZE += total_skip;
5089       GPT -= head_skip; GPT_BYTE -= head_skip;
5090       ZV -= total_skip; ZV_BYTE -= total_skip;
5091       Z -= total_skip; Z_BYTE -= total_skip;
5092       from -= head_skip; from_byte -= head_skip;
5093       to += tail_skip; to_byte += tail_skip;
5094     }
5095
5096   prev_Z = Z;
5097   adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
5098   inserted = Z - prev_Z;
5099
5100   if (!encodep && coding->cmp_data && coding->cmp_data->used)
5101     coding_restore_composition (coding, Fcurrent_buffer ());
5102   coding_free_composition_data (coding);
5103
5104   if (! inhibit_pre_post_conversion
5105       && ! encodep && ! NILP (coding->post_read_conversion))
5106     {
5107       Lisp_Object val;
5108       int count = specpdl_ptr - specpdl;
5109
5110       if (from != PT)
5111         TEMP_SET_PT_BOTH (from, from_byte);
5112       prev_Z = Z;
5113       record_unwind_protect (code_convert_region_unwind, Qnil);
5114       /* We should not call any more pre-write/post-read-conversion
5115          functions while this post-read-conversion is running.  */
5116       inhibit_pre_post_conversion = 1;
5117       val = call1 (coding->post_read_conversion, make_number (inserted));
5118       inhibit_pre_post_conversion = 0;
5119       /* Discard the unwind protect.  */
5120       specpdl_ptr--;
5121       CHECK_NUMBER (val, 0);
5122       inserted += Z - prev_Z;
5123     }
5124
5125   if (orig_point >= from)
5126     {
5127       if (orig_point >= from + orig_len)
5128         orig_point += inserted - orig_len;
5129       else
5130         orig_point = from;
5131       TEMP_SET_PT (orig_point);
5132     }
5133
5134   if (replace)
5135     {
5136       signal_after_change (from, to - from, inserted);
5137       update_compositions (from, from + inserted, CHECK_BORDER);
5138     }
5139
5140   {
5141     coding->consumed = to_byte - from_byte;
5142     coding->consumed_char = to - from;
5143     coding->produced = inserted_byte;
5144     coding->produced_char = inserted;
5145   }
5146
5147   return 0;
5148 }
5149
5150 Lisp_Object
5151 run_pre_post_conversion_on_str (str, coding, encodep)
5152      Lisp_Object str;
5153      struct coding_system *coding;
5154      int encodep;
5155 {
5156   int count = specpdl_ptr - specpdl;
5157   struct gcpro gcpro1;
5158   struct buffer *prev = current_buffer;
5159   int multibyte = STRING_MULTIBYTE (str);
5160
5161   record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
5162   record_unwind_protect (code_convert_region_unwind, Qnil);
5163   GCPRO1 (str);
5164   temp_output_buffer_setup (" *code-converting-work*");
5165   set_buffer_internal (XBUFFER (Vstandard_output));
5166   /* We must insert the contents of STR as is without
5167      unibyte<->multibyte conversion.  For that, we adjust the
5168      multibyteness of the working buffer to that of STR.  */
5169   Ferase_buffer ();
5170   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
5171   insert_from_string (str, 0, 0,
5172                       XSTRING (str)->size, STRING_BYTES (XSTRING (str)), 0);
5173   UNGCPRO;
5174   inhibit_pre_post_conversion = 1;
5175   if (encodep)
5176     call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z));
5177   else
5178     {
5179       TEMP_SET_PT_BOTH (BEG, BEG_BYTE);
5180       call1 (coding->post_read_conversion, make_number (Z - BEG));
5181     }
5182   inhibit_pre_post_conversion = 0;
5183   str = make_buffer_string (BEG, Z, 0);
5184   return unbind_to (count, str);
5185 }
5186
5187 Lisp_Object
5188 decode_coding_string (str, coding, nocopy)
5189      Lisp_Object str;
5190      struct coding_system *coding;
5191      int nocopy;
5192 {
5193   int len;
5194   char *buf;
5195   int from, to, to_byte;
5196   struct gcpro gcpro1;
5197   Lisp_Object saved_coding_symbol;
5198   int result;
5199
5200   from = 0;
5201   to = XSTRING (str)->size;
5202   to_byte = STRING_BYTES (XSTRING (str));
5203
5204   saved_coding_symbol = Qnil;
5205   if (CODING_REQUIRE_DETECTION (coding))
5206     {
5207       /* See the comments in code_convert_region.  */
5208       if (coding->type == coding_type_undecided)
5209         {
5210           detect_coding (coding, XSTRING (str)->data, to_byte);
5211           if (coding->type == coding_type_undecided)
5212             coding->type = coding_type_emacs_mule;
5213         }
5214       if (coding->eol_type == CODING_EOL_UNDECIDED
5215           && coding->type != coding_type_ccl)
5216         {
5217           saved_coding_symbol = coding->symbol;
5218           detect_eol (coding, XSTRING (str)->data, to_byte);
5219           if (coding->eol_type == CODING_EOL_UNDECIDED)
5220             coding->eol_type = CODING_EOL_LF;
5221           /* We had better recover the original eol format if we
5222              encounter an inconsitent eol format while decoding.  */
5223           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
5224         }
5225     }
5226
5227   if (! CODING_REQUIRE_DECODING (coding))
5228     {
5229       if (!STRING_MULTIBYTE (str))
5230         {
5231           str = Fstring_as_multibyte (str);
5232           nocopy = 1;
5233         }
5234       return (nocopy ? str : Fcopy_sequence (str));
5235     }
5236
5237   if (STRING_MULTIBYTE (str))
5238     {
5239       /* Decoding routines expect the source text to be unibyte.  */
5240       str = Fstring_as_unibyte (str);
5241       to_byte = STRING_BYTES (XSTRING (str));
5242       nocopy = 1;
5243       coding->src_multibyte = 0;
5244     }
5245   coding->dst_multibyte = 1;
5246
5247   if (coding->composing != COMPOSITION_DISABLED)
5248     coding_allocate_composition_data (coding, from);
5249
5250   /* Try to skip the heading and tailing ASCIIs.  */
5251   if (coding->type != coding_type_ccl)
5252     {
5253       int from_orig = from;
5254
5255       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
5256                                 0);
5257       if (from == to_byte)
5258         return (nocopy ? str : Fcopy_sequence (str));
5259     }
5260
5261   len = decoding_buffer_size (coding, to_byte - from);
5262   len += from + STRING_BYTES (XSTRING (str)) - to_byte;
5263   GCPRO1 (str);
5264   buf = get_conversion_buffer (len);
5265   UNGCPRO;
5266
5267   if (from > 0)
5268     bcopy (XSTRING (str)->data, buf, from);
5269   result = decode_coding (coding, XSTRING (str)->data + from,
5270                          buf + from, to_byte - from, len);
5271   if (result == CODING_FINISH_INCONSISTENT_EOL)
5272     {
5273       /* We simply try to decode the whole string again but without
5274          eol-conversion this time.  */
5275       coding->eol_type = CODING_EOL_LF;
5276       coding->symbol = saved_coding_symbol;
5277       coding_free_composition_data (coding);
5278       return decode_coding_string (str, coding, nocopy);
5279     }
5280
5281   bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced,
5282          STRING_BYTES (XSTRING (str)) - to_byte);
5283
5284   len = from + STRING_BYTES (XSTRING (str)) - to_byte;
5285   str = make_multibyte_string (buf, len + coding->produced_char,
5286                                len + coding->produced);
5287
5288   if (coding->cmp_data && coding->cmp_data->used)
5289     coding_restore_composition (coding, str);
5290   coding_free_composition_data (coding);
5291
5292   if (SYMBOLP (coding->post_read_conversion)
5293       && !NILP (Ffboundp (coding->post_read_conversion)))
5294     str = run_pre_post_conversion_on_str (str, coding, 0);
5295
5296   return str;
5297 }
5298
5299 Lisp_Object
5300 encode_coding_string (str, coding, nocopy)
5301      Lisp_Object str;
5302      struct coding_system *coding;
5303      int nocopy;
5304 {
5305   int len;
5306   char *buf;
5307   int from, to, to_byte;
5308   struct gcpro gcpro1;
5309   Lisp_Object saved_coding_symbol;
5310   int result;
5311
5312   if (SYMBOLP (coding->pre_write_conversion)
5313       && !NILP (Ffboundp (coding->pre_write_conversion)))
5314     str = run_pre_post_conversion_on_str (str, coding, 1);
5315
5316   from = 0;
5317   to = XSTRING (str)->size;
5318   to_byte = STRING_BYTES (XSTRING (str));
5319
5320   saved_coding_symbol = Qnil;
5321   if (! CODING_REQUIRE_ENCODING (coding))
5322     {
5323       if (STRING_MULTIBYTE (str))
5324         {
5325           str = Fstring_as_unibyte (str);
5326           nocopy = 1;
5327         }
5328       return (nocopy ? str : Fcopy_sequence (str));
5329     }
5330
5331   /* Encoding routines determine the multibyteness of the source text
5332      by coding->src_multibyte.  */
5333   coding->src_multibyte = STRING_MULTIBYTE (str);
5334   coding->dst_multibyte = 0;
5335
5336   if (coding->composing != COMPOSITION_DISABLED)
5337     coding_save_composition (coding, from, to, str);
5338
5339   /* Try to skip the heading and tailing ASCIIs.  */
5340   if (coding->type != coding_type_ccl)
5341     {
5342       int from_orig = from;
5343
5344       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
5345                                 1);
5346       if (from == to_byte)
5347         return (nocopy ? str : Fcopy_sequence (str));
5348     }
5349
5350   len = encoding_buffer_size (coding, to_byte - from);
5351   len += from + STRING_BYTES (XSTRING (str)) - to_byte;
5352   GCPRO1 (str);
5353   buf = get_conversion_buffer (len);
5354   UNGCPRO;
5355
5356   if (from > 0)
5357     bcopy (XSTRING (str)->data, buf, from);
5358   result = encode_coding (coding, XSTRING (str)->data + from,
5359                           buf + from, to_byte - from, len);
5360   bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced,
5361          STRING_BYTES (XSTRING (str)) - to_byte);
5362
5363   len = from + STRING_BYTES (XSTRING (str)) - to_byte;
5364   str = make_unibyte_string (buf, len + coding->produced);
5365   coding_free_composition_data (coding);
5366
5367   return str;
5368 }
5369
5370 \f
5371 #ifdef emacs
5372 /*** 8. Emacs Lisp library functions ***/
5373
5374 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
5375   "Return t if OBJECT is nil or a coding-system.\n\
5376 See the documentation of `make-coding-system' for information\n\
5377 about coding-system objects.")
5378   (obj)
5379      Lisp_Object obj;
5380 {
5381   if (NILP (obj))
5382     return Qt;
5383   if (!SYMBOLP (obj))
5384     return Qnil;
5385   /* Get coding-spec vector for OBJ.  */
5386   obj = Fget (obj, Qcoding_system);
5387   return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
5388           ? Qt : Qnil);
5389 }
5390
5391 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
5392        Sread_non_nil_coding_system, 1, 1, 0,
5393   "Read a coding system from the minibuffer, prompting with string PROMPT.")
5394   (prompt)
5395      Lisp_Object prompt;
5396 {
5397   Lisp_Object val;
5398   do
5399     {
5400       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
5401                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
5402     }
5403   while (XSTRING (val)->size == 0);
5404   return (Fintern (val, Qnil));
5405 }
5406
5407 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
5408   "Read a coding system from the minibuffer, prompting with string PROMPT.\n\
5409 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.")
5410   (prompt, default_coding_system)
5411      Lisp_Object prompt, default_coding_system;
5412 {
5413   Lisp_Object val;
5414   if (SYMBOLP (default_coding_system))
5415     XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
5416   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
5417                           Qt, Qnil, Qcoding_system_history,
5418                           default_coding_system, Qnil);
5419   return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
5420 }
5421
5422 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
5423        1, 1, 0,
5424   "Check validity of CODING-SYSTEM.\n\
5425 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.\n\
5426 It is valid if it is a symbol with a non-nil `coding-system' property.\n\
5427 The value of property should be a vector of length 5.")
5428   (coding_system)
5429      Lisp_Object coding_system;
5430 {
5431   CHECK_SYMBOL (coding_system, 0);
5432   if (!NILP (Fcoding_system_p (coding_system)))
5433     return coding_system;
5434   while (1)
5435     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
5436 }
5437 \f
5438 Lisp_Object
5439 detect_coding_system (src, src_bytes, highest)
5440      unsigned char *src;
5441      int src_bytes, highest;
5442 {
5443   int coding_mask, eol_type;
5444   Lisp_Object val, tmp;
5445   int dummy;
5446
5447   coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy);
5448   eol_type  = detect_eol_type (src, src_bytes, &dummy);
5449   if (eol_type == CODING_EOL_INCONSISTENT)
5450     eol_type = CODING_EOL_UNDECIDED;
5451
5452   if (!coding_mask)
5453     {
5454       val = Qundecided;
5455       if (eol_type != CODING_EOL_UNDECIDED)
5456         {
5457           Lisp_Object val2;
5458           val2 = Fget (Qundecided, Qeol_type);
5459           if (VECTORP (val2))
5460             val = XVECTOR (val2)->contents[eol_type];
5461         }
5462       return (highest ? val : Fcons (val, Qnil));
5463     }
5464
5465   /* At first, gather possible coding systems in VAL.  */
5466   val = Qnil;
5467   for (tmp = Vcoding_category_list; CONSP (tmp); tmp = XCDR (tmp))
5468     {
5469       Lisp_Object category_val, category_index;
5470
5471       category_index = Fget (XCAR (tmp), Qcoding_category_index);
5472       category_val = Fsymbol_value (XCAR (tmp));
5473       if (!NILP (category_val)
5474           && NATNUMP (category_index)
5475           && (coding_mask & (1 << XFASTINT (category_index))))
5476         {
5477           val = Fcons (category_val, val);
5478           if (highest)
5479             break;
5480         }
5481     }
5482   if (!highest)
5483     val = Fnreverse (val);
5484
5485   /* Then, replace the elements with subsidiary coding systems.  */
5486   for (tmp = val; CONSP (tmp); tmp = XCDR (tmp))
5487     {
5488       if (eol_type != CODING_EOL_UNDECIDED
5489           && eol_type != CODING_EOL_INCONSISTENT)
5490         {
5491           Lisp_Object eol;
5492           eol = Fget (XCAR (tmp), Qeol_type);
5493           if (VECTORP (eol))
5494             XCAR (tmp) = XVECTOR (eol)->contents[eol_type];
5495         }
5496     }
5497   return (highest ? XCAR (val) : val);
5498 }
5499
5500 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
5501        2, 3, 0,
5502   "Detect coding system of the text in the region between START and END.\n\
5503 Return a list of possible coding systems ordered by priority.\n\
5504 \n\
5505 If only ASCII characters are found, it returns a list of single element\n\
5506 `undecided' or its subsidiary coding system according to a detected\n\
5507 end-of-line format.\n\
5508 \n\
5509 If optional argument HIGHEST is non-nil, return the coding system of\n\
5510 highest priority.")
5511   (start, end, highest)
5512      Lisp_Object start, end, highest;
5513 {
5514   int from, to;
5515   int from_byte, to_byte;
5516
5517   CHECK_NUMBER_COERCE_MARKER (start, 0);
5518   CHECK_NUMBER_COERCE_MARKER (end, 1);
5519
5520   validate_region (&start, &end);
5521   from = XINT (start), to = XINT (end);
5522   from_byte = CHAR_TO_BYTE (from);
5523   to_byte = CHAR_TO_BYTE (to);
5524
5525   if (from < GPT && to >= GPT)
5526     move_gap_both (to, to_byte);
5527
5528   return detect_coding_system (BYTE_POS_ADDR (from_byte),
5529                                to_byte - from_byte,
5530                                !NILP (highest));
5531 }
5532
5533 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
5534        1, 2, 0,
5535   "Detect coding system of the text in STRING.\n\
5536 Return a list of possible coding systems ordered by priority.\n\
5537 \n\
5538 If only ASCII characters are found, it returns a list of single element\n\
5539 `undecided' or its subsidiary coding system according to a detected\n\
5540 end-of-line format.\n\
5541 \n\
5542 If optional argument HIGHEST is non-nil, return the coding system of\n\
5543 highest priority.")
5544   (string, highest)
5545      Lisp_Object string, highest;
5546 {
5547   CHECK_STRING (string, 0);
5548
5549   return detect_coding_system (XSTRING (string)->data,
5550                                STRING_BYTES (XSTRING (string)),
5551                                !NILP (highest));
5552 }
5553
5554 /* Return an intersection of lists L1 and L2.  */
5555
5556 static Lisp_Object
5557 intersection (l1, l2)
5558      Lisp_Object l1, l2;
5559 {
5560   Lisp_Object val;
5561
5562   for (val = Qnil; CONSP (l1); l1 = XCDR (l1))
5563     {
5564       if (!NILP (Fmemq (XCAR (l1), l2)))
5565         val = Fcons (XCAR (l1), val);
5566     }
5567   return val;
5568 }
5569
5570
5571 /*  Subroutine for Fsafe_coding_systems_region_internal.
5572
5573     Return a list of coding systems that safely encode the multibyte
5574     text between P and PEND.  SAFE_CODINGS, if non-nil, is a list of
5575     possible coding systems.  If it is nil, it means that we have not
5576     yet found any coding systems.
5577
5578     WORK_TABLE is a copy of the char-table Vchar_coding_system_table.  An
5579     element of WORK_TABLE is set to t once the element is looked up.
5580
5581     If a non-ASCII single byte char is found, set
5582     *single_byte_char_found to 1.  */
5583
5584 static Lisp_Object
5585 find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found)
5586      unsigned char *p, *pend;
5587      Lisp_Object safe_codings, work_table;
5588      int *single_byte_char_found;
5589 {
5590   int c, len, idx;
5591   Lisp_Object val;
5592
5593   while (p < pend)
5594     {
5595       c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
5596       p += len;
5597       if (ASCII_BYTE_P (c))
5598         /* We can ignore ASCII characters here.  */
5599         continue;
5600       if (SINGLE_BYTE_CHAR_P (c))
5601         *single_byte_char_found = 1;
5602       if (NILP (safe_codings))
5603         continue;
5604       /* Check the safe coding systems for C.  */
5605       val = char_table_ref_and_index (work_table, c, &idx);
5606       if (EQ (val, Qt))
5607         /* This element was already checked.  Ignore it.  */
5608         continue;
5609       /* Remember that we checked this element.  */
5610       CHAR_TABLE_SET (work_table, idx, Qt);
5611
5612       /* If there are some safe coding systems for C and we have
5613          already found the other set of coding systems for the
5614          different characters, get the intersection of them.  */
5615       if (!EQ (safe_codings, Qt) && !NILP (val))
5616         val = intersection (safe_codings, val);
5617       safe_codings = val;
5618     }
5619   return safe_codings;
5620 }
5621
5622
5623 /* Return a list of coding systems that safely encode the text between
5624    START and END.  If the text contains only ASCII or is unibyte,
5625    return t.  */
5626
5627 DEFUN ("find-coding-systems-region-internal",
5628        Ffind_coding_systems_region_internal,
5629        Sfind_coding_systems_region_internal, 2, 2, 0,
5630   "Internal use only.")
5631   (start, end)
5632      Lisp_Object start, end;
5633 {
5634   Lisp_Object work_table, safe_codings;
5635   int non_ascii_p = 0;
5636   int single_byte_char_found = 0;
5637   unsigned char *p1, *p1end, *p2, *p2end, *p;
5638   Lisp_Object args[2];
5639
5640   if (STRINGP (start))
5641     {
5642       if (!STRING_MULTIBYTE (start))
5643         return Qt;
5644       p1 = XSTRING (start)->data, p1end = p1 + STRING_BYTES (XSTRING (start));
5645       p2 = p2end = p1end;
5646       if (XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
5647         non_ascii_p = 1;
5648     }
5649   else
5650     {
5651       int from, to, stop;
5652
5653       CHECK_NUMBER_COERCE_MARKER (start, 0);
5654       CHECK_NUMBER_COERCE_MARKER (end, 1);
5655       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
5656         args_out_of_range (start, end);
5657       if (NILP (current_buffer->enable_multibyte_characters))
5658         return Qt;
5659       from = CHAR_TO_BYTE (XINT (start));
5660       to = CHAR_TO_BYTE (XINT (end));
5661       stop = from < GPT_BYTE && GPT_BYTE < to ? GPT_BYTE : to;
5662       p1 = BYTE_POS_ADDR (from), p1end = p1 + (stop - from);
5663       if (stop == to)
5664         p2 = p2end = p1end;
5665       else
5666         p2 = BYTE_POS_ADDR (stop), p2end = p2 + (to - stop);
5667       if (XINT (end) - XINT (start) != to - from)
5668         non_ascii_p = 1;
5669     }
5670
5671   if (!non_ascii_p)
5672     {
5673       /* We are sure that the text contains no multibyte character.
5674          Check if it contains eight-bit-graphic.  */
5675       p = p1;
5676       for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++);
5677       if (p == p1end)
5678         {
5679           for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++);
5680           if (p == p2end)
5681             return Qt;
5682         }
5683     }
5684
5685   /* The text contains non-ASCII characters.  */
5686   work_table = Fcopy_sequence (Vchar_coding_system_table);
5687   safe_codings = find_safe_codings (p1, p1end, Qt, work_table,
5688                                     &single_byte_char_found);
5689   if (p2 < p2end)
5690     safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table,
5691                                       &single_byte_char_found);
5692
5693   if (!single_byte_char_found)
5694     {
5695       /* Append generic coding systems.  */
5696       Lisp_Object args[2];
5697       args[0] = safe_codings;
5698       args[1] = Fchar_table_extra_slot (Vchar_coding_system_table,
5699                                         make_number (0));
5700       safe_codings = Fappend (make_number (2), args);
5701     }
5702   else
5703     safe_codings = Fcons (Qraw_text, Fcons (Qemacs_mule, safe_codings));
5704   return safe_codings;
5705 }
5706
5707
5708 Lisp_Object
5709 code_convert_region1 (start, end, coding_system, encodep)
5710      Lisp_Object start, end, coding_system;
5711      int encodep;
5712 {
5713   struct coding_system coding;
5714   int from, to, len;
5715
5716   CHECK_NUMBER_COERCE_MARKER (start, 0);
5717   CHECK_NUMBER_COERCE_MARKER (end, 1);
5718   CHECK_SYMBOL (coding_system, 2);
5719
5720   validate_region (&start, &end);
5721   from = XFASTINT (start);
5722   to = XFASTINT (end);
5723
5724   if (NILP (coding_system))
5725     return make_number (to - from);
5726
5727   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
5728     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
5729
5730   coding.mode |= CODING_MODE_LAST_BLOCK;
5731   coding.src_multibyte = coding.dst_multibyte
5732     = !NILP (current_buffer->enable_multibyte_characters);
5733   code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
5734                        &coding, encodep, 1);
5735   Vlast_coding_system_used = coding.symbol;
5736   return make_number (coding.produced_char);
5737 }
5738
5739 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
5740        3, 3, "r\nzCoding system: ",
5741   "Decode the current region by specified coding system.\n\
5742 When called from a program, takes three arguments:\n\
5743 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
5744 This function sets `last-coding-system-used' to the precise coding system\n\
5745 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5746 not fully specified.)\n\
5747 It returns the length of the decoded text.")
5748   (start, end, coding_system)
5749      Lisp_Object start, end, coding_system;
5750 {
5751   return code_convert_region1 (start, end, coding_system, 0);
5752 }
5753
5754 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
5755        3, 3, "r\nzCoding system: ",
5756   "Encode the current region by specified coding system.\n\
5757 When called from a program, takes three arguments:\n\
5758 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
5759 This function sets `last-coding-system-used' to the precise coding system\n\
5760 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5761 not fully specified.)\n\
5762 It returns the length of the encoded text.")
5763   (start, end, coding_system)
5764      Lisp_Object start, end, coding_system;
5765 {
5766   return code_convert_region1 (start, end, coding_system, 1);
5767 }
5768
5769 Lisp_Object
5770 code_convert_string1 (string, coding_system, nocopy, encodep)
5771      Lisp_Object string, coding_system, nocopy;
5772      int encodep;
5773 {
5774   struct coding_system coding;
5775
5776   CHECK_STRING (string, 0);
5777   CHECK_SYMBOL (coding_system, 1);
5778
5779   if (NILP (coding_system))
5780     return (NILP (nocopy) ? Fcopy_sequence (string) : string);
5781
5782   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
5783     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
5784
5785   coding.mode |= CODING_MODE_LAST_BLOCK;
5786   string = (encodep
5787             ? encode_coding_string (string, &coding, !NILP (nocopy))
5788             : decode_coding_string (string, &coding, !NILP (nocopy)));
5789   Vlast_coding_system_used = coding.symbol;
5790
5791   return string;
5792 }
5793
5794 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
5795        2, 3, 0,
5796   "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
5797 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
5798 if the decoding operation is trivial.\n\
5799 This function sets `last-coding-system-used' to the precise coding system\n\
5800 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5801 not fully specified.)")
5802   (string, coding_system, nocopy)
5803      Lisp_Object string, coding_system, nocopy;
5804 {
5805   return code_convert_string1 (string, coding_system, nocopy, 0);
5806 }
5807
5808 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
5809        2, 3, 0,
5810   "Encode STRING to CODING-SYSTEM, and return the result.\n\
5811 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
5812 if the encoding operation is trivial.\n\
5813 This function sets `last-coding-system-used' to the precise coding system\n\
5814 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5815 not fully specified.)")
5816   (string, coding_system, nocopy)
5817      Lisp_Object string, coding_system, nocopy;
5818 {
5819   return code_convert_string1 (string, coding_system, nocopy, 1);
5820 }
5821
5822 /* Encode or decode STRING according to CODING_SYSTEM.
5823    Do not set Vlast_coding_system_used.
5824
5825    This function is called only from macros DECODE_FILE and
5826    ENCODE_FILE, thus we ignore character composition.  */
5827
5828 Lisp_Object
5829 code_convert_string_norecord (string, coding_system, encodep)
5830      Lisp_Object string, coding_system;
5831      int encodep;
5832 {
5833   struct coding_system coding;
5834
5835   CHECK_STRING (string, 0);
5836   CHECK_SYMBOL (coding_system, 1);
5837
5838   if (NILP (coding_system))
5839     return string;
5840
5841   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
5842     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
5843
5844   coding.composing = COMPOSITION_DISABLED;
5845   coding.mode |= CODING_MODE_LAST_BLOCK;
5846   return (encodep
5847           ? encode_coding_string (string, &coding, 1)
5848           : decode_coding_string (string, &coding, 1));
5849 }
5850 \f
5851 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
5852   "Decode a Japanese character which has CODE in shift_jis encoding.\n\
5853 Return the corresponding character.")
5854   (code)
5855      Lisp_Object code;
5856 {
5857   unsigned char c1, c2, s1, s2;
5858   Lisp_Object val;
5859
5860   CHECK_NUMBER (code, 0);
5861   s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
5862   if (s1 == 0)
5863     {
5864       if (s2 < 0x80)
5865         XSETFASTINT (val, s2);
5866       else if (s2 >= 0xA0 || s2 <= 0xDF)
5867         XSETFASTINT (val, MAKE_CHAR (charset_katakana_jisx0201, s2, 0));
5868       else
5869         error ("Invalid Shift JIS code: %x", XFASTINT (code));
5870     }
5871   else
5872     {
5873       if ((s1 < 0x80 || s1 > 0x9F && s1 < 0xE0 || s1 > 0xEF)
5874           || (s2 < 0x40 || s2 == 0x7F || s2 > 0xFC))
5875         error ("Invalid Shift JIS code: %x", XFASTINT (code));
5876       DECODE_SJIS (s1, s2, c1, c2);
5877       XSETFASTINT (val, MAKE_CHAR (charset_jisx0208, c1, c2));
5878     }
5879   return val;
5880 }
5881
5882 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
5883   "Encode a Japanese character CHAR to shift_jis encoding.\n\
5884 Return the corresponding code in SJIS.")
5885   (ch)
5886      Lisp_Object ch;
5887 {
5888   int charset, c1, c2, s1, s2;
5889   Lisp_Object val;
5890
5891   CHECK_NUMBER (ch, 0);
5892   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
5893   if (charset == CHARSET_ASCII)
5894     {
5895       val = ch;
5896     }
5897   else if (charset == charset_jisx0208
5898            && c1 > 0x20 && c1 < 0x7F && c2 > 0x20 && c2 < 0x7F)
5899     {
5900       ENCODE_SJIS (c1, c2, s1, s2);
5901       XSETFASTINT (val, (s1 << 8) | s2);
5902     }
5903   else if (charset == charset_katakana_jisx0201
5904            && c1 > 0x20 && c2 < 0xE0)
5905     {
5906       XSETFASTINT (val, c1 | 0x80);
5907     }
5908   else
5909     error ("Can't encode to shift_jis: %d", XFASTINT (ch));
5910   return val;
5911 }
5912
5913 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
5914   "Decode a Big5 character which has CODE in BIG5 coding system.\n\
5915 Return the corresponding character.")
5916   (code)
5917      Lisp_Object code;
5918 {
5919   int charset;
5920   unsigned char b1, b2, c1, c2;
5921   Lisp_Object val;
5922
5923   CHECK_NUMBER (code, 0);
5924   b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
5925   if (b1 == 0)
5926     {
5927       if (b2 >= 0x80)
5928         error ("Invalid BIG5 code: %x", XFASTINT (code));
5929       val = code;
5930     }
5931   else
5932     {
5933       if ((b1 < 0xA1 || b1 > 0xFE)
5934           || (b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE))
5935         error ("Invalid BIG5 code: %x", XFASTINT (code));
5936       DECODE_BIG5 (b1, b2, charset, c1, c2);
5937       XSETFASTINT (val, MAKE_CHAR (charset, c1, c2));
5938     }
5939   return val;
5940 }
5941
5942 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
5943   "Encode the Big5 character CHAR to BIG5 coding system.\n\
5944 Return the corresponding character code in Big5.")
5945   (ch)
5946      Lisp_Object ch;
5947 {
5948   int charset, c1, c2, b1, b2;
5949   Lisp_Object val;
5950
5951   CHECK_NUMBER (ch, 0);
5952   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
5953   if (charset == CHARSET_ASCII)
5954     {
5955       val = ch;
5956     }
5957   else if ((charset == charset_big5_1
5958             && (XFASTINT (ch) >= 0x250a1 && XFASTINT (ch) <= 0x271ec))
5959            || (charset == charset_big5_2
5960                && XFASTINT (ch) >= 0x290a1 && XFASTINT (ch) <= 0x2bdb2))
5961     {
5962       ENCODE_BIG5 (charset, c1, c2, b1, b2);
5963       XSETFASTINT (val, (b1 << 8) | b2);
5964     }
5965   else
5966     error ("Can't encode to Big5: %d", XFASTINT (ch));
5967   return val;
5968 }
5969 \f
5970 DEFUN ("set-terminal-coding-system-internal",
5971        Fset_terminal_coding_system_internal,
5972        Sset_terminal_coding_system_internal, 1, 1, 0, "")
5973   (coding_system)
5974      Lisp_Object coding_system;
5975 {
5976   CHECK_SYMBOL (coding_system, 0);
5977   setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
5978   /* We had better not send unsafe characters to terminal.  */
5979   terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
5980   /* Characer composition should be disabled.  */
5981   terminal_coding.composing = COMPOSITION_DISABLED;
5982   terminal_coding.src_multibyte = 1;
5983   terminal_coding.dst_multibyte = 0;
5984   return Qnil;
5985 }
5986
5987 DEFUN ("set-safe-terminal-coding-system-internal",
5988        Fset_safe_terminal_coding_system_internal,
5989        Sset_safe_terminal_coding_system_internal, 1, 1, 0, "")
5990   (coding_system)
5991      Lisp_Object coding_system;
5992 {
5993   CHECK_SYMBOL (coding_system, 0);
5994   setup_coding_system (Fcheck_coding_system (coding_system),
5995                        &safe_terminal_coding);
5996   /* Characer composition should be disabled.  */
5997   safe_terminal_coding.composing = COMPOSITION_DISABLED;
5998   safe_terminal_coding.src_multibyte = 1;
5999   safe_terminal_coding.dst_multibyte = 0;
6000   return Qnil;
6001 }
6002
6003 DEFUN ("terminal-coding-system",
6004        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
6005   "Return coding system specified for terminal output.")
6006   ()
6007 {
6008   return terminal_coding.symbol;
6009 }
6010
6011 DEFUN ("set-keyboard-coding-system-internal",
6012        Fset_keyboard_coding_system_internal,
6013        Sset_keyboard_coding_system_internal, 1, 1, 0, "")
6014   (coding_system)
6015      Lisp_Object coding_system;
6016 {
6017   CHECK_SYMBOL (coding_system, 0);
6018   setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
6019   /* Characer composition should be disabled.  */
6020   keyboard_coding.composing = COMPOSITION_DISABLED;
6021   return Qnil;
6022 }
6023
6024 DEFUN ("keyboard-coding-system",
6025        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
6026   "Return coding system specified for decoding keyboard input.")
6027   ()
6028 {
6029   return keyboard_coding.symbol;
6030 }
6031
6032 \f
6033 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
6034        Sfind_operation_coding_system,  1, MANY, 0,
6035   "Choose a coding system for an operation based on the target name.\n\
6036 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).\n\
6037 DECODING-SYSTEM is the coding system to use for decoding\n\
6038 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system\n\
6039 for encoding (in case OPERATION does encoding).\n\
6040 \n\
6041 The first argument OPERATION specifies an I/O primitive:\n\
6042   For file I/O, `insert-file-contents' or `write-region'.\n\
6043   For process I/O, `call-process', `call-process-region', or `start-process'.\n\
6044   For network I/O, `open-network-stream'.\n\
6045 \n\
6046 The remaining arguments should be the same arguments that were passed\n\
6047 to the primitive.  Depending on which primitive, one of those arguments\n\
6048 is selected as the TARGET.  For example, if OPERATION does file I/O,\n\
6049 whichever argument specifies the file name is TARGET.\n\
6050 \n\
6051 TARGET has a meaning which depends on OPERATION:\n\
6052   For file I/O, TARGET is a file name.\n\
6053   For process I/O, TARGET is a process name.\n\
6054   For network I/O, TARGET is a service name or a port number\n\
6055 \n\
6056 This function looks up what specified for TARGET in,\n\
6057 `file-coding-system-alist', `process-coding-system-alist',\n\
6058 or `network-coding-system-alist' depending on OPERATION.\n\
6059 They may specify a coding system, a cons of coding systems,\n\
6060 or a function symbol to call.\n\
6061 In the last case, we call the function with one argument,\n\
6062 which is a list of all the arguments given to this function.")
6063   (nargs, args)
6064      int nargs;
6065      Lisp_Object *args;
6066 {
6067   Lisp_Object operation, target_idx, target, val;
6068   register Lisp_Object chain;
6069
6070   if (nargs < 2)
6071     error ("Too few arguments");
6072   operation = args[0];
6073   if (!SYMBOLP (operation)
6074       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
6075     error ("Invalid first arguement");
6076   if (nargs < 1 + XINT (target_idx))
6077     error ("Too few arguments for operation: %s",
6078            XSYMBOL (operation)->name->data);
6079   target = args[XINT (target_idx) + 1];
6080   if (!(STRINGP (target)
6081         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
6082     error ("Invalid %dth argument", XINT (target_idx) + 1);
6083
6084   chain = ((EQ (operation, Qinsert_file_contents)
6085             || EQ (operation, Qwrite_region))
6086            ? Vfile_coding_system_alist
6087            : (EQ (operation, Qopen_network_stream)
6088               ? Vnetwork_coding_system_alist
6089               : Vprocess_coding_system_alist));
6090   if (NILP (chain))
6091     return Qnil;
6092
6093   for (; CONSP (chain); chain = XCDR (chain))
6094     {
6095       Lisp_Object elt;
6096       elt = XCAR (chain);
6097
6098       if (CONSP (elt)
6099           && ((STRINGP (target)
6100                && STRINGP (XCAR (elt))
6101                && fast_string_match (XCAR (elt), target) >= 0)
6102               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
6103         {
6104           val = XCDR (elt);
6105           /* Here, if VAL is both a valid coding system and a valid
6106              function symbol, we return VAL as a coding system.  */
6107           if (CONSP (val))
6108             return val;
6109           if (! SYMBOLP (val))
6110             return Qnil;
6111           if (! NILP (Fcoding_system_p (val)))
6112             return Fcons (val, val);
6113           if (! NILP (Ffboundp (val)))
6114             {
6115               val = call1 (val, Flist (nargs, args));
6116               if (CONSP (val))
6117                 return val;
6118               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
6119                 return Fcons (val, val);
6120             }
6121           return Qnil;
6122         }
6123     }
6124   return Qnil;
6125 }
6126
6127 DEFUN ("update-coding-systems-internal",  Fupdate_coding_systems_internal,
6128        Supdate_coding_systems_internal, 0, 0, 0,
6129   "Update internal database for ISO2022 and CCL based coding systems.\n\
6130 When values of any coding categories are changed, you must\n\
6131 call this function")
6132   ()
6133 {
6134   int i;
6135
6136   for (i = CODING_CATEGORY_IDX_EMACS_MULE; i < CODING_CATEGORY_IDX_MAX; i++)
6137     {
6138       Lisp_Object val;
6139
6140       val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[i])->value;
6141       if (!NILP (val))
6142         {
6143           if (! coding_system_table[i])
6144             coding_system_table[i] = ((struct coding_system *)
6145                                       xmalloc (sizeof (struct coding_system)));
6146           setup_coding_system (val, coding_system_table[i]);
6147         }
6148       else if (coding_system_table[i])
6149         {
6150           xfree (coding_system_table[i]);
6151           coding_system_table[i] = NULL;
6152         }
6153     }
6154
6155   return Qnil;
6156 }
6157
6158 DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal,
6159        Sset_coding_priority_internal, 0, 0, 0,
6160   "Update internal database for the current value of `coding-category-list'.\n\
6161 This function is internal use only.")
6162   ()
6163 {
6164   int i = 0, idx;
6165   Lisp_Object val;
6166
6167   val = Vcoding_category_list;
6168
6169   while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
6170     {
6171       if (! SYMBOLP (XCAR (val)))
6172         break;
6173       idx = XFASTINT (Fget (XCAR (val), Qcoding_category_index));
6174       if (idx >= CODING_CATEGORY_IDX_MAX)
6175         break;
6176       coding_priorities[i++] = (1 << idx);
6177       val = XCDR (val);
6178     }
6179   /* If coding-category-list is valid and contains all coding
6180      categories, `i' should be CODING_CATEGORY_IDX_MAX now.  If not,
6181      the following code saves Emacs from crashing.  */
6182   while (i < CODING_CATEGORY_IDX_MAX)
6183     coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
6184
6185   return Qnil;
6186 }
6187
6188 #endif /* emacs */
6189
6190 \f
6191 /*** 9. Post-amble ***/
6192
6193 void
6194 init_coding ()
6195 {
6196   conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE);
6197 }
6198
6199 void
6200 init_coding_once ()
6201 {
6202   int i;
6203
6204   /* Emacs' internal format specific initialize routine.  */
6205   for (i = 0; i <= 0x20; i++)
6206     emacs_code_class[i] = EMACS_control_code;
6207   emacs_code_class[0x0A] = EMACS_linefeed_code;
6208   emacs_code_class[0x0D] = EMACS_carriage_return_code;
6209   for (i = 0x21 ; i < 0x7F; i++)
6210     emacs_code_class[i] = EMACS_ascii_code;
6211   emacs_code_class[0x7F] = EMACS_control_code;
6212   for (i = 0x80; i < 0xFF; i++)
6213     emacs_code_class[i] = EMACS_invalid_code;
6214   emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
6215   emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
6216   emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
6217   emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
6218
6219   /* ISO2022 specific initialize routine.  */
6220   for (i = 0; i < 0x20; i++)
6221     iso_code_class[i] = ISO_control_0;
6222   for (i = 0x21; i < 0x7F; i++)
6223     iso_code_class[i] = ISO_graphic_plane_0;
6224   for (i = 0x80; i < 0xA0; i++)
6225     iso_code_class[i] = ISO_control_1;
6226   for (i = 0xA1; i < 0xFF; i++)
6227     iso_code_class[i] = ISO_graphic_plane_1;
6228   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
6229   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
6230   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
6231   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
6232   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
6233   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
6234   iso_code_class[ISO_CODE_ESC] = ISO_escape;
6235   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
6236   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
6237   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
6238
6239   conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE;
6240
6241   setup_coding_system (Qnil, &keyboard_coding);
6242   setup_coding_system (Qnil, &terminal_coding);
6243   setup_coding_system (Qnil, &safe_terminal_coding);
6244   setup_coding_system (Qnil, &default_buffer_file_coding);
6245
6246   bzero (coding_system_table, sizeof coding_system_table);
6247
6248   bzero (ascii_skip_code, sizeof ascii_skip_code);
6249   for (i = 0; i < 128; i++)
6250     ascii_skip_code[i] = 1;
6251
6252 #if defined (MSDOS) || defined (WINDOWSNT)
6253   system_eol_type = CODING_EOL_CRLF;
6254 #else
6255   system_eol_type = CODING_EOL_LF;
6256 #endif
6257
6258   inhibit_pre_post_conversion = 0;
6259 }
6260
6261 #ifdef emacs
6262
6263 void
6264 syms_of_coding ()
6265 {
6266   Qtarget_idx = intern ("target-idx");
6267   staticpro (&Qtarget_idx);
6268
6269   Qcoding_system_history = intern ("coding-system-history");
6270   staticpro (&Qcoding_system_history);
6271   Fset (Qcoding_system_history, Qnil);
6272
6273   /* Target FILENAME is the first argument.  */
6274   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
6275   /* Target FILENAME is the third argument.  */
6276   Fput (Qwrite_region, Qtarget_idx, make_number (2));
6277
6278   Qcall_process = intern ("call-process");
6279   staticpro (&Qcall_process);
6280   /* Target PROGRAM is the first argument.  */
6281   Fput (Qcall_process, Qtarget_idx, make_number (0));
6282
6283   Qcall_process_region = intern ("call-process-region");
6284   staticpro (&Qcall_process_region);
6285   /* Target PROGRAM is the third argument.  */
6286   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
6287
6288   Qstart_process = intern ("start-process");
6289   staticpro (&Qstart_process);
6290   /* Target PROGRAM is the third argument.  */
6291   Fput (Qstart_process, Qtarget_idx, make_number (2));
6292
6293   Qopen_network_stream = intern ("open-network-stream");
6294   staticpro (&Qopen_network_stream);
6295   /* Target SERVICE is the fourth argument.  */
6296   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
6297
6298   Qcoding_system = intern ("coding-system");
6299   staticpro (&Qcoding_system);
6300
6301   Qeol_type = intern ("eol-type");
6302   staticpro (&Qeol_type);
6303
6304   Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
6305   staticpro (&Qbuffer_file_coding_system);
6306
6307   Qpost_read_conversion = intern ("post-read-conversion");
6308   staticpro (&Qpost_read_conversion);
6309
6310   Qpre_write_conversion = intern ("pre-write-conversion");
6311   staticpro (&Qpre_write_conversion);
6312
6313   Qno_conversion = intern ("no-conversion");
6314   staticpro (&Qno_conversion);
6315
6316   Qundecided = intern ("undecided");
6317   staticpro (&Qundecided);
6318
6319   Qcoding_system_p = intern ("coding-system-p");
6320   staticpro (&Qcoding_system_p);
6321
6322   Qcoding_system_error = intern ("coding-system-error");
6323   staticpro (&Qcoding_system_error);
6324
6325   Fput (Qcoding_system_error, Qerror_conditions,
6326         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
6327   Fput (Qcoding_system_error, Qerror_message,
6328         build_string ("Invalid coding system"));
6329
6330   Qcoding_category = intern ("coding-category");
6331   staticpro (&Qcoding_category);
6332   Qcoding_category_index = intern ("coding-category-index");
6333   staticpro (&Qcoding_category_index);
6334
6335   Vcoding_category_table
6336     = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
6337   staticpro (&Vcoding_category_table);
6338   {
6339     int i;
6340     for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
6341       {
6342         XVECTOR (Vcoding_category_table)->contents[i]
6343           = intern (coding_category_name[i]);
6344         Fput (XVECTOR (Vcoding_category_table)->contents[i],
6345               Qcoding_category_index, make_number (i));
6346       }
6347   }
6348
6349   Qtranslation_table = intern ("translation-table");
6350   staticpro (&Qtranslation_table);
6351   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
6352
6353   Qtranslation_table_id = intern ("translation-table-id");
6354   staticpro (&Qtranslation_table_id);
6355
6356   Qtranslation_table_for_decode = intern ("translation-table-for-decode");
6357   staticpro (&Qtranslation_table_for_decode);
6358
6359   Qtranslation_table_for_encode = intern ("translation-table-for-encode");
6360   staticpro (&Qtranslation_table_for_encode);
6361
6362   Qsafe_chars = intern ("safe-chars");
6363   staticpro (&Qsafe_chars);
6364
6365   Qchar_coding_system = intern ("char-coding-system");
6366   staticpro (&Qchar_coding_system);
6367
6368   /* Intern this now in case it isn't already done.
6369      Setting this variable twice is harmless.
6370      But don't staticpro it here--that is done in alloc.c.  */
6371   Qchar_table_extra_slots = intern ("char-table-extra-slots");
6372   Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0));
6373   Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (1));
6374
6375   Qvalid_codes = intern ("valid-codes");
6376   staticpro (&Qvalid_codes);
6377
6378   Qemacs_mule = intern ("emacs-mule");
6379   staticpro (&Qemacs_mule);
6380
6381   Qraw_text = intern ("raw-text");
6382   staticpro (&Qraw_text);
6383
6384   defsubr (&Scoding_system_p);
6385   defsubr (&Sread_coding_system);
6386   defsubr (&Sread_non_nil_coding_system);
6387   defsubr (&Scheck_coding_system);
6388   defsubr (&Sdetect_coding_region);
6389   defsubr (&Sdetect_coding_string);
6390   defsubr (&Sfind_coding_systems_region_internal);
6391   defsubr (&Sdecode_coding_region);
6392   defsubr (&Sencode_coding_region);
6393   defsubr (&Sdecode_coding_string);
6394   defsubr (&Sencode_coding_string);
6395   defsubr (&Sdecode_sjis_char);
6396   defsubr (&Sencode_sjis_char);
6397   defsubr (&Sdecode_big5_char);
6398   defsubr (&Sencode_big5_char);
6399   defsubr (&Sset_terminal_coding_system_internal);
6400   defsubr (&Sset_safe_terminal_coding_system_internal);
6401   defsubr (&Sterminal_coding_system);
6402   defsubr (&Sset_keyboard_coding_system_internal);
6403   defsubr (&Skeyboard_coding_system);
6404   defsubr (&Sfind_operation_coding_system);
6405   defsubr (&Supdate_coding_systems_internal);
6406   defsubr (&Sset_coding_priority_internal);
6407
6408   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
6409     "List of coding systems.\n\
6410 \n\
6411 Do not alter the value of this variable manually.  This variable should be\n\
6412 updated by the functions `make-coding-system' and\n\
6413 `define-coding-system-alias'.");
6414   Vcoding_system_list = Qnil;
6415
6416   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
6417     "Alist of coding system names.\n\
6418 Each element is one element list of coding system name.\n\
6419 This variable is given to `completing-read' as TABLE argument.\n\
6420 \n\
6421 Do not alter the value of this variable manually.  This variable should be\n\
6422 updated by the functions `make-coding-system' and\n\
6423 `define-coding-system-alias'.");
6424   Vcoding_system_alist = Qnil;
6425
6426   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
6427     "List of coding-categories (symbols) ordered by priority.");
6428   {
6429     int i;
6430
6431     Vcoding_category_list = Qnil;
6432     for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
6433       Vcoding_category_list
6434         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
6435                  Vcoding_category_list);
6436   }
6437
6438   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
6439     "Specify the coding system for read operations.\n\
6440 It is useful to bind this variable with `let', but do not set it globally.\n\
6441 If the value is a coding system, it is used for decoding on read operation.\n\
6442 If not, an appropriate element is used from one of the coding system alists:\n\
6443 There are three such tables, `file-coding-system-alist',\n\
6444 `process-coding-system-alist', and `network-coding-system-alist'.");
6445   Vcoding_system_for_read = Qnil;
6446
6447   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
6448     "Specify the coding system for write operations.\n\
6449 Programs bind this variable with `let', but you should not set it globally.\n\
6450 If the value is a coding system, it is used for encoding of output,\n\
6451 when writing it to a file and when sending it to a file or subprocess.\n\
6452 \n\
6453 If this does not specify a coding system, an appropriate element\n\
6454 is used from one of the coding system alists:\n\
6455 There are three such tables, `file-coding-system-alist',\n\
6456 `process-coding-system-alist', and `network-coding-system-alist'.\n\
6457 For output to files, if the above procedure does not specify a coding system,\n\
6458 the value of `buffer-file-coding-system' is used.");
6459   Vcoding_system_for_write = Qnil;
6460
6461   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
6462     "Coding system used in the latest file or process I/O.");
6463   Vlast_coding_system_used = Qnil;
6464
6465   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
6466     "*Non-nil means always inhibit code conversion of end-of-line format.\n\
6467 See info node `Coding Systems' and info node `Text and Binary' concerning\n\
6468 such conversion.");
6469   inhibit_eol_conversion = 0;
6470
6471   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
6472     "Non-nil means process buffer inherits coding system of process output.\n\
6473 Bind it to t if the process output is to be treated as if it were a file\n\
6474 read from some filesystem.");
6475   inherit_process_coding_system = 0;
6476
6477   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
6478     "Alist to decide a coding system to use for a file I/O operation.\n\
6479 The format is ((PATTERN . VAL) ...),\n\
6480 where PATTERN is a regular expression matching a file name,\n\
6481 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
6482 If VAL is a coding system, it is used for both decoding and encoding\n\
6483 the file contents.\n\
6484 If VAL is a cons of coding systems, the car part is used for decoding,\n\
6485 and the cdr part is used for encoding.\n\
6486 If VAL is a function symbol, the function must return a coding system\n\
6487 or a cons of coding systems which are used as above.\n\
6488 \n\
6489 See also the function `find-operation-coding-system'\n\
6490 and the variable `auto-coding-alist'.");
6491   Vfile_coding_system_alist = Qnil;
6492
6493   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
6494     "Alist to decide a coding system to use for a process I/O operation.\n\
6495 The format is ((PATTERN . VAL) ...),\n\
6496 where PATTERN is a regular expression matching a program name,\n\
6497 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
6498 If VAL is a coding system, it is used for both decoding what received\n\
6499 from the program and encoding what sent to the program.\n\
6500 If VAL is a cons of coding systems, the car part is used for decoding,\n\
6501 and the cdr part is used for encoding.\n\
6502 If VAL is a function symbol, the function must return a coding system\n\
6503 or a cons of coding systems which are used as above.\n\
6504 \n\
6505 See also the function `find-operation-coding-system'.");
6506   Vprocess_coding_system_alist = Qnil;
6507
6508   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
6509     "Alist to decide a coding system to use for a network I/O operation.\n\
6510 The format is ((PATTERN . VAL) ...),\n\
6511 where PATTERN is a regular expression matching a network service name\n\
6512 or is a port number to connect to,\n\
6513 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
6514 If VAL is a coding system, it is used for both decoding what received\n\
6515 from the network stream and encoding what sent to the network stream.\n\
6516 If VAL is a cons of coding systems, the car part is used for decoding,\n\
6517 and the cdr part is used for encoding.\n\
6518 If VAL is a function symbol, the function must return a coding system\n\
6519 or a cons of coding systems which are used as above.\n\
6520 \n\
6521 See also the function `find-operation-coding-system'.");
6522   Vnetwork_coding_system_alist = Qnil;
6523
6524   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
6525     "Coding system to use with system messages.");
6526   Vlocale_coding_system = Qnil;
6527
6528   /* The eol mnemonics are reset in startup.el system-dependently.  */
6529   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
6530     "*String displayed in mode line for UNIX-like (LF) end-of-line format.");
6531   eol_mnemonic_unix = build_string (":");
6532
6533   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
6534     "*String displayed in mode line for DOS-like (CRLF) end-of-line format.");
6535   eol_mnemonic_dos = build_string ("\\");
6536
6537   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
6538     "*String displayed in mode line for MAC-like (CR) end-of-line format.");
6539   eol_mnemonic_mac = build_string ("/");
6540
6541   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
6542     "*String displayed in mode line when end-of-line format is not yet determined.");
6543   eol_mnemonic_undecided = build_string (":");
6544
6545   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
6546     "*Non-nil enables character translation while encoding and decoding.");
6547   Venable_character_translation = Qt;
6548
6549   DEFVAR_LISP ("standard-translation-table-for-decode",
6550     &Vstandard_translation_table_for_decode,
6551     "Table for translating characters while decoding.");
6552   Vstandard_translation_table_for_decode = Qnil;
6553
6554   DEFVAR_LISP ("standard-translation-table-for-encode",
6555     &Vstandard_translation_table_for_encode,
6556     "Table for translationg characters while encoding.");
6557   Vstandard_translation_table_for_encode = Qnil;
6558
6559   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
6560     "Alist of charsets vs revision numbers.\n\
6561 While encoding, if a charset (car part of an element) is found,\n\
6562 designate it with the escape sequence identifing revision (cdr part of the element).");
6563   Vcharset_revision_alist = Qnil;
6564
6565   DEFVAR_LISP ("default-process-coding-system",
6566                &Vdefault_process_coding_system,
6567     "Cons of coding systems used for process I/O by default.\n\
6568 The car part is used for decoding a process output,\n\
6569 the cdr part is used for encoding a text to be sent to a process.");
6570   Vdefault_process_coding_system = Qnil;
6571
6572   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
6573     "Table of extra Latin codes in the range 128..159 (inclusive).\n\
6574 This is a vector of length 256.\n\
6575 If Nth element is non-nil, the existence of code N in a file\n\
6576 \(or output of subprocess) doesn't prevent it to be detected as\n\
6577 a coding system of ISO 2022 variant which has a flag\n\
6578 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file\n\
6579 or reading output of a subprocess.\n\
6580 Only 128th through 159th elements has a meaning.");
6581   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
6582
6583   DEFVAR_LISP ("select-safe-coding-system-function",
6584                &Vselect_safe_coding_system_function,
6585     "Function to call to select safe coding system for encoding a text.\n\
6586 \n\
6587 If set, this function is called to force a user to select a proper\n\
6588 coding system which can encode the text in the case that a default\n\
6589 coding system used in each operation can't encode the text.\n\
6590 \n\
6591 The default value is `select-safe-coding-system' (which see).");
6592   Vselect_safe_coding_system_function = Qnil;
6593
6594   DEFVAR_LISP ("char-coding-system-table", &Vchar_coding_system_table,
6595     "Char-table containing safe coding systems of each characters.\n\
6596 Each element doesn't include such generic coding systems that can\n\
6597 encode any characters.   They are in the first extra slot.");
6598   Vchar_coding_system_table = Fmake_char_table (Qchar_coding_system, Qnil);
6599
6600   DEFVAR_BOOL ("inhibit-iso-escape-detection",
6601                &inhibit_iso_escape_detection,
6602     "If non-nil, Emacs ignores ISO2022's escape sequence on code detection.\n\
6603 \n\
6604 By default, on reading a file, Emacs tries to detect how the text is\n\
6605 encoded.  This code detection is sensitive to escape sequences.  If\n\
6606 the sequence is valid as ISO2022, the code is determined as one of\n\
6607 the ISO2022 encodings, and the file is decoded by the corresponding\n\
6608 coding system (e.g. `iso-2022-7bit').\n\
6609 \n\
6610 However, there may be a case that you want to read escape sequences in\n\
6611 a file as is.  In such a case, you can set this variable to non-nil.\n\
6612 Then, as the code detection ignores any escape sequences, no file is\n\
6613 detected as encoded in some ISO2022 encoding.  The result is that all\n\
6614 escape sequences become visible in a buffer.\n\
6615 \n\
6616 The default value is nil, and it is strongly recommended not to change\n\
6617 it.  That is because many Emacs Lisp source files that contain\n\
6618 non-ASCII characters are encoded by the coding system `iso-2022-7bit'\n\
6619 in Emacs's distribution, and they won't be decoded correctly on\n\
6620 reading if you suppress escape sequence detection.\n\
6621 \n\
6622 The other way to read escape sequences in a file without decoding is\n\
6623 to explicitly specify some coding system that doesn't use ISO2022's\n\
6624 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].");
6625   inhibit_iso_escape_detection = 0;
6626 }
6627
6628 char *
6629 emacs_strerror (error_number)
6630      int error_number;
6631 {
6632   char *str;
6633
6634   synchronize_system_messages_locale ();
6635   str = strerror (error_number);
6636
6637   if (! NILP (Vlocale_coding_system))
6638     {
6639       Lisp_Object dec = code_convert_string_norecord (build_string (str),
6640                                                       Vlocale_coding_system,
6641                                                       0);
6642       str = (char *) XSTRING (dec)->data;
6643     }
6644
6645   return str;
6646 }
6647
6648 #endif /* emacs */
6649