src/coding.c

   1 /* Coding system handler (conversion, detection, and etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /*** TABLE OF CONTENTS ***
  23
  24   0. General comments
  25   1. Preamble
  26   2. Emacs' internal format (emacs-mule) handlers
  27   3. ISO2022 handlers
  28   4. Shift-JIS and BIG5 handlers
  29   5. CCL handlers
  30   6. End-of-line handlers
  31   7. C library functions
  32   8. Emacs Lisp library functions
  33   9. Post-amble
  34
  35 */
  36
  37 /*** 0. General comments ***/
  38
  39
  40 /*** GENERAL NOTE on CODING SYSTEM ***
  41
  42   Coding system is an encoding mechanism of one or more character
  43   sets.  Here's a list of coding systems which Emacs can handle.  When
  44   we say "decode", it means converting some other coding system to
  45   Emacs' internal format (emacs-internal), and when we say "encode",
  46   it means converting the coding system emacs-mule to some other
  47   coding system.
  48
  49   0. Emacs' internal format (emacs-mule)
  50
  51   Emacs itself holds a multi-lingual character in a buffer and a string
  52   in a special format.  Details are described in section 2.
  53
  54   1. ISO2022
  55
  56   The most famous coding system for multiple character sets.  X's
  57   Compound Text, various EUCs (Extended Unix Code), and coding
  58   systems used in Internet communication such as ISO-2022-JP are
  59   all variants of ISO2022.  Details are described in section 3.
  60
  61   2. SJIS (or Shift-JIS or MS-Kanji-Code)
  62
  63   A coding system to encode character sets: ASCII, JISX0201, and
  64   JISX0208.  Widely used for PC's in Japan.  Details are described in
  65   section 4.
  66
  67   3. BIG5
  68
  69   A coding system to encode character sets: ASCII and Big5.  Widely
  70   used by Chinese (mainly in Taiwan and Hong Kong).  Details are
  71   described in section 4.  In this file, when we write "BIG5"
  72   (all uppercase), we mean the coding system, and when we write
  73   "Big5" (capitalized), we mean the character set.
  74
  75   4. Raw text
  76
  77   A coding system for a text containing random 8-bit code.  Emacs does
  78   no code conversion on such a text except for end-of-line format.
  79
  80   5. Other
  81
  82   If a user wants to read/write a text encoded in a coding system not
  83   listed above, he can supply a decoder and an encoder for it in CCL
  84   (Code Conversion Language) programs.  Emacs executes the CCL program
  85   while reading/writing.
  86
  87   Emacs represents a coding system by a Lisp symbol that has a property
  88   `coding-system'.  But, before actually using the coding system, the
  89   information about it is set in a structure of type `struct
  90   coding_system' for rapid processing.  See section 6 for more details.
  91
  92 */
  93
  94 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
  95
  96   How end-of-line of a text is encoded depends on a system.  For
  97   instance, Unix's format is just one byte of `line-feed' code,
  98   whereas DOS's format is two-byte sequence of `carriage-return' and
  99   `line-feed' codes.  MacOS's format is usually one byte of
 100   `carriage-return'.
 101
 102   Since text characters encoding and end-of-line encoding are
 103   independent, any coding system described above can take
 104   any format of end-of-line.  So, Emacs has information of format of
 105   end-of-line in each coding-system.  See section 6 for more details.
 106
 107 */
 108
 109 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 110
 111   These functions check if a text between SRC and SRC_END is encoded
 112   in the coding system category XXX.  Each returns an integer value in
 113   which appropriate flag bits for the category XXX is set.  The flag
 114   bits are defined in macros CODING_CATEGORY_MASK_XXX.  Below is the
 115   template of these functions.  */
 116 #if 0
 117 int
 118 detect_coding_emacs_mule (src, src_end)
 119      unsigned char *src, *src_end;
 120 {
 121   ...
 122 }
 123 #endif
 124
 125 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 126
 127   These functions decode SRC_BYTES length of unibyte text at SOURCE
 128   encoded in CODING to Emacs' internal format.  The resulting
 129   multibyte text goes to a place pointed to by DESTINATION, the length
 130   of which should not exceed DST_BYTES.
 131
 132   These functions set the information of original and decoded texts in
 133   the members produced, produced_char, consumed, and consumed_char of
 134   the structure *CODING.  They also set the member result to one of
 135   CODING_FINISH_XXX indicating how the decoding finished.
 136
 137   DST_BYTES zero means that source area and destination area are
 138   overlapped, which means that we can produce a decoded text until it
 139   reaches at the head of not-yet-decoded source text.
 140
 141   Below is a template of these functions.  */
 142 #if 0
 143 static void
 144 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 145      struct coding_system *coding;
 146      unsigned char *source, *destination;
 147      int src_bytes, dst_bytes;
 148 {
 149   ...
 150 }
 151 #endif
 152
 153 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 154
 155   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 156   internal multibyte format to CODING.  The resulting unibyte text
 157   goes to a place pointed to by DESTINATION, the length of which
 158   should not exceed DST_BYTES.
 159
 160   These functions set the information of original and encoded texts in
 161   the members produced, produced_char, consumed, and consumed_char of
 162   the structure *CODING.  They also set the member result to one of
 163   CODING_FINISH_XXX indicating how the encoding finished.
 164
 165   DST_BYTES zero means that source area and destination area are
 166   overlapped, which means that we can produce a encoded text until it
 167   reaches at the head of not-yet-encoded source text.
 168
 169   Below is a template of these functions.  */
 170 #if 0
 171 static void
 172 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 173      struct coding_system *coding;
 174      unsigned char *source, *destination;
 175      int src_bytes, dst_bytes;
 176 {
 177   ...
 178 }
 179 #endif
 180
 181 /*** COMMONLY USED MACROS ***/
 182
 183 /* The following two macros ONE_MORE_BYTE and TWO_MORE_BYTES safely
 184    get one, two, and three bytes from the source text respectively.
 185    If there are not enough bytes in the source, they jump to
 186    `label_end_of_loop'.  The caller should set variables `coding',
 187    `src' and `src_end' to appropriate pointer in advance.  These
 188    macros are called from decoding routines `decode_coding_XXX', thus
 189    it is assumed that the source text is unibyte.  */
 190
 191 #define ONE_MORE_BYTE(c1)                                       \
 192   do {                                                          \
 193     if (src >= src_end)                                         \
 194       {                                                         \
 195         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 196         goto label_end_of_loop;                                 \
 197       }                                                         \
 198     c1 = *src++;                                                \
 199   } while (0)
 200
 201 #define TWO_MORE_BYTES(c1, c2)                                  \
 202   do {                                                          \
 203     if (src + 1 >= src_end)                                     \
 204       {                                                         \
 205         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 206         goto label_end_of_loop;                                 \
 207       }                                                         \
 208     c1 = *src++;                                                \
 209     c2 = *src++;                                                \
 210   } while (0)
 211
 212
 213 /* Set C to the next character at the source text pointed by `src'.
 214    If there are not enough characters in the source, jump to
 215    `label_end_of_loop'.  The caller should set variables `coding'
 216    `src', `src_end', and `translation_table' to appropriate pointers
 217    in advance.  This macro is used in encoding routines
 218    `encode_coding_XXX', thus it assumes that the source text is in
 219    multibyte form except for 8-bit characters.  8-bit characters are
 220    in multibyte form if coding->src_multibyte is nonzero, else they
 221    are represented by a single byte.  */
 222
 223 #define ONE_MORE_CHAR(c)                                        \
 224   do {                                                          \
 225     int len = src_end - src;                                    \
 226     int bytes;                                                  \
 227     if (len <= 0)                                               \
 228       {                                                         \
 229         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 230         goto label_end_of_loop;                                 \
 231       }                                                         \
 232     if (coding->src_multibyte                                   \
 233         || UNIBYTE_STR_AS_MULTIBYTE_P (src, len, bytes))        \
 234       c = STRING_CHAR_AND_LENGTH (src, len, bytes);             \
 235     else                                                        \
 236       c = *src, bytes = 1;                                      \
 237     if (!NILP (translation_table))                              \
 238       c = translate_char (translation_table, c, -1, 0, 0);      \
 239     src += bytes;                                               \
 240   } while (0)
 241
 242
 243 /* Produce a multibyte form of characater C to `dst'.  Jump to
 244    `label_end_of_loop' if there's not enough space at `dst'.
 245
 246    If we are now in the middle of composition sequence, the decoded
 247    character may be ALTCHAR (for the current composition).  In that
 248    case, the character goes to coding->cmp_data->data instead of
 249    `dst'.
 250
 251    This macro is used in decoding routines.  */
 252
 253 #define EMIT_CHAR(c)                                                    \
 254   do {                                                                  \
 255     if (! COMPOSING_P (coding)                                          \
 256         || coding->composing == COMPOSITION_RELATIVE                    \
 257         || coding->composing == COMPOSITION_WITH_RULE)                  \
 258       {                                                                 \
 259         int bytes = CHAR_BYTES (c);                                     \
 260         if ((dst + bytes) > (dst_bytes ? dst_end : src))                \
 261           {                                                             \
 262             coding->result = CODING_FINISH_INSUFFICIENT_DST;            \
 263             goto label_end_of_loop;                                     \
 264           }                                                             \
 265         dst += CHAR_STRING (c, dst);                                    \
 266         coding->produced_char++;                                        \
 267       }                                                                 \
 268                                                                         \
 269     if (COMPOSING_P (coding)                                            \
 270         && coding->composing != COMPOSITION_RELATIVE)                   \
 271       {                                                                 \
 272         CODING_ADD_COMPOSITION_COMPONENT (coding, c);                   \
 273         coding->composition_rule_follows                                \
 274           = coding->composing != COMPOSITION_WITH_ALTCHARS;             \
 275       }                                                                 \
 276   } while (0)
 277
 278
 279 #define EMIT_ONE_BYTE(c)                                        \
 280   do {                                                          \
 281     if (dst >= (dst_bytes ? dst_end : src))                     \
 282       {                                                         \
 283         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 284         goto label_end_of_loop;                                 \
 285       }                                                         \
 286     *dst++ = c;                                                 \
 287   } while (0)
 288
 289 #define EMIT_TWO_BYTES(c1, c2)                                  \
 290   do {                                                          \
 291     if (dst + 2 > (dst_bytes ? dst_end : src))                  \
 292       {                                                         \
 293         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 294         goto label_end_of_loop;                                 \
 295       }                                                         \
 296     *dst++ = c1, *dst++ = c2;                                   \
 297   } while (0)
 298
 299 #define EMIT_BYTES(from, to)                                    \
 300   do {                                                          \
 301     if (dst + (to - from) > (dst_bytes ? dst_end : src))        \
 302       {                                                         \
 303         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 304         goto label_end_of_loop;                                 \
 305       }                                                         \
 306     while (from < to)                                           \
 307       *dst++ = *from++;                                         \
 308   } while (0)
 309
 310 \f
 311 /*** 1. Preamble ***/
 312
 313 #ifdef emacs
 314 #include <config.h>
 315 #endif
 316
 317 #include <stdio.h>
 318
 319 #ifdef emacs
 320
 321 #include "lisp.h"
 322 #include "buffer.h"
 323 #include "charset.h"
 324 #include "composite.h"
 325 #include "ccl.h"
 326 #include "coding.h"
 327 #include "window.h"
 328
 329 #else  /* not emacs */
 330
 331 #include "mulelib.h"
 332
 333 #endif /* not emacs */
 334
 335 Lisp_Object Qcoding_system, Qeol_type;
 336 Lisp_Object Qbuffer_file_coding_system;
 337 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 338 Lisp_Object Qno_conversion, Qundecided;
 339 Lisp_Object Qcoding_system_history;
 340 Lisp_Object Qsafe_chars;
 341 Lisp_Object Qvalid_codes;
 342
 343 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 344 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 345 Lisp_Object Qstart_process, Qopen_network_stream;
 346 Lisp_Object Qtarget_idx;
 347
 348 Lisp_Object Vselect_safe_coding_system_function;
 349
 350 /* Mnemonic string for each format of end-of-line.  */
 351 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 352 /* Mnemonic string to indicate format of end-of-line is not yet
 353    decided.  */
 354 Lisp_Object eol_mnemonic_undecided;
 355
 356 /* Format of end-of-line decided by system.  This is CODING_EOL_LF on
 357    Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.  */
 358 int system_eol_type;
 359
 360 #ifdef emacs
 361
 362 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 363
 364 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 365
 366 /* Coding system emacs-mule and raw-text are for converting only
 367    end-of-line format.  */
 368 Lisp_Object Qemacs_mule, Qraw_text;
 369
 370 /* Coding-systems are handed between Emacs Lisp programs and C internal
 371    routines by the following three variables.  */
 372 /* Coding-system for reading files and receiving data from process.  */
 373 Lisp_Object Vcoding_system_for_read;
 374 /* Coding-system for writing files and sending data to process.  */
 375 Lisp_Object Vcoding_system_for_write;
 376 /* Coding-system actually used in the latest I/O.  */
 377 Lisp_Object Vlast_coding_system_used;
 378
 379 /* A vector of length 256 which contains information about special
 380    Latin codes (especially for dealing with Microsoft codes).  */
 381 Lisp_Object Vlatin_extra_code_table;
 382
 383 /* Flag to inhibit code conversion of end-of-line format.  */
 384 int inhibit_eol_conversion;
 385
 386 /* Flag to inhibit ISO2022 escape sequence detection.  */
 387 int inhibit_iso_escape_detection;
 388
 389 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 390 int inherit_process_coding_system;
 391
 392 /* Coding system to be used to encode text for terminal display.  */
 393 struct coding_system terminal_coding;
 394
 395 /* Coding system to be used to encode text for terminal display when
 396    terminal coding system is nil.  */
 397 struct coding_system safe_terminal_coding;
 398
 399 /* Coding system of what is sent from terminal keyboard.  */
 400 struct coding_system keyboard_coding;
 401
 402 /* Default coding system to be used to write a file.  */
 403 struct coding_system default_buffer_file_coding;
 404
 405 Lisp_Object Vfile_coding_system_alist;
 406 Lisp_Object Vprocess_coding_system_alist;
 407 Lisp_Object Vnetwork_coding_system_alist;
 408
 409 Lisp_Object Vlocale_coding_system;
 410
 411 #endif /* emacs */
 412
 413 Lisp_Object Qcoding_category, Qcoding_category_index;
 414
 415 /* List of symbols `coding-category-xxx' ordered by priority.  */
 416 Lisp_Object Vcoding_category_list;
 417
 418 /* Table of coding categories (Lisp symbols).  */
 419 Lisp_Object Vcoding_category_table;
 420
 421 /* Table of names of symbol for each coding-category.  */
 422 char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
 423   "coding-category-emacs-mule",
 424   "coding-category-sjis",
 425   "coding-category-iso-7",
 426   "coding-category-iso-7-tight",
 427   "coding-category-iso-8-1",
 428   "coding-category-iso-8-2",
 429   "coding-category-iso-7-else",
 430   "coding-category-iso-8-else",
 431   "coding-category-ccl",
 432   "coding-category-big5",
 433   "coding-category-utf-8",
 434   "coding-category-utf-16-be",
 435   "coding-category-utf-16-le",
 436   "coding-category-raw-text",
 437   "coding-category-binary"
 438 };
 439
 440 /* Table of pointers to coding systems corresponding to each coding
 441    categories.  */
 442 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
 443
 444 /* Table of coding category masks.  Nth element is a mask for a coding
 445    cateogry of which priority is Nth.  */
 446 static
 447 int coding_priorities[CODING_CATEGORY_IDX_MAX];
 448
 449 /* Flag to tell if we look up translation table on character code
 450    conversion.  */
 451 Lisp_Object Venable_character_translation;
 452 /* Standard translation table to look up on decoding (reading).  */
 453 Lisp_Object Vstandard_translation_table_for_decode;
 454 /* Standard translation table to look up on encoding (writing).  */
 455 Lisp_Object Vstandard_translation_table_for_encode;
 456
 457 Lisp_Object Qtranslation_table;
 458 Lisp_Object Qtranslation_table_id;
 459 Lisp_Object Qtranslation_table_for_decode;
 460 Lisp_Object Qtranslation_table_for_encode;
 461
 462 /* Alist of charsets vs revision number.  */
 463 Lisp_Object Vcharset_revision_alist;
 464
 465 /* Default coding systems used for process I/O.  */
 466 Lisp_Object Vdefault_process_coding_system;
 467
 468 /* Global flag to tell that we can't call post-read-conversion and
 469    pre-write-conversion functions.  Usually the value is zero, but it
 470    is set to 1 temporarily while such functions are running.  This is
 471    to avoid infinite recursive call.  */
 472 static int inhibit_pre_post_conversion;
 473
 474 /* Char-table containing safe coding systems of each character.  */
 475 Lisp_Object Vchar_coding_system_table;
 476 Lisp_Object Qchar_coding_system;
 477
 478 /* Return `safe-chars' property of coding system CODING.  Don't check
 479    validity of CODING.  */
 480
 481 Lisp_Object
 482 coding_safe_chars (coding)
 483      struct coding_system *coding;
 484 {
 485   Lisp_Object coding_spec, plist, safe_chars;
 486
 487   coding_spec = Fget (coding->symbol, Qcoding_system);
 488   plist = XVECTOR (coding_spec)->contents[3];
 489   safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars);
 490   return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt);
 491 }
 492
 493 #define CODING_SAFE_CHAR_P(safe_chars, c) \
 494   (EQ (safe_chars, Qt) || !NILP (CHAR_TABLE_REF (safe_chars, c)))
 495
 496 \f
 497 /*** 2. Emacs internal format (emacs-mule) handlers ***/
 498
 499 /* Emacs' internal format for encoding multiple character sets is a
 500    kind of multi-byte encoding, i.e. characters are encoded by
 501    variable-length sequences of one-byte codes.
 502
 503    ASCII characters and control characters (e.g. `tab', `newline') are
 504    represented by one-byte sequences which are their ASCII codes, in
 505    the range 0x00 through 0x7F.
 506
 507    8-bit characters of the range 0x80..0x9F are represented by
 508    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
 509    code + 0x20).
 510
 511    8-bit characters of the range 0xA0..0xFF are represented by
 512    one-byte sequences which are their 8-bit code.
 513
 514    The other characters are represented by a sequence of `base
 515    leading-code', optional `extended leading-code', and one or two
 516    `position-code's.  The length of the sequence is determined by the
 517    base leading-code.  Leading-code takes the range 0x80 through 0x9F,
 518    whereas extended leading-code and position-code take the range 0xA0
 519    through 0xFF.  See `charset.h' for more details about leading-code
 520    and position-code.
 521
 522    --- CODE RANGE of Emacs' internal format ---
 523    character set        range
 524    -------------        -----
 525    ascii                0x00..0x7F
 526    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
 527    eight-bit-graphic    0xA0..0xBF
 528    ELSE                 0x81..0x9F + [0xA0..0xFF]+
 529    ---------------------------------------------
 530
 531   */
 532
 533 enum emacs_code_class_type emacs_code_class[256];
 534
 535 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 536    Check if a text is encoded in Emacs' internal format.  If it is,
 537    return CODING_CATEGORY_MASK_EMACS_MULE, else return 0.  */
 538
 539 int
 540 detect_coding_emacs_mule (src, src_end)
 541       unsigned char *src, *src_end;
 542 {
 543   unsigned char c;
 544   int composing = 0;
 545   /* Dummy for ONE_MORE_BYTE.  */
 546   struct coding_system dummy_coding;
 547   struct coding_system *coding = &dummy_coding;
 548
 549   while (1)
 550     {
 551       ONE_MORE_BYTE (c);
 552
 553       if (composing)
 554         {
 555           if (c < 0xA0)
 556             composing = 0;
 557           else if (c == 0xA0)
 558             {
 559               ONE_MORE_BYTE (c);
 560               c &= 0x7F;
 561             }
 562           else
 563             c -= 0x20;
 564         }
 565
 566       if (c < 0x20)
 567         {
 568           if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
 569             return 0;
 570         }
 571       else if (c >= 0x80 && c < 0xA0)
 572         {
 573           if (c == 0x80)
 574             /* Old leading code for a composite character.  */
 575             composing = 1;
 576           else
 577             {
 578               unsigned char *src_base = src - 1;
 579               int bytes;
 580
 581               if (!UNIBYTE_STR_AS_MULTIBYTE_P (src_base, src_end - src_base,
 582                                                bytes))
 583                 return 0;
 584               src = src_base + bytes;
 585             }
 586         }
 587     }
 588  label_end_of_loop:
 589   return CODING_CATEGORY_MASK_EMACS_MULE;
 590 }
 591
 592
 593 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
 594
 595 static void
 596 decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
 597      struct coding_system *coding;
 598      unsigned char *source, *destination;
 599      int src_bytes, dst_bytes;
 600 {
 601   unsigned char *src = source;
 602   unsigned char *src_end = source + src_bytes;
 603   unsigned char *dst = destination;
 604   unsigned char *dst_end = destination + dst_bytes;
 605   /* SRC_BASE remembers the start position in source in each loop.
 606      The loop will be exited when there's not enough source code, or
 607      when there's not enough destination area to produce a
 608      character.  */
 609   unsigned char *src_base;
 610
 611   coding->produced_char = 0;
 612   while ((src_base = src) < src_end)
 613     {
 614       unsigned char tmp[MAX_MULTIBYTE_LENGTH], *p;
 615       int bytes;
 616
 617       if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
 618         {
 619           p = src;
 620           src += bytes;
 621         }
 622       else
 623         {
 624           bytes = CHAR_STRING (*src, tmp);
 625           p = tmp;
 626           src++;
 627         }
 628       if (dst + bytes >= (dst_bytes ? dst_end : src))
 629         {
 630           coding->result = CODING_FINISH_INSUFFICIENT_DST;
 631           break;
 632         }
 633       while (bytes--) *dst++ = *p++;
 634       coding->produced_char++;
 635     }
 636   coding->consumed = coding->consumed_char = src_base - source;
 637   coding->produced = dst - destination;
 638 }
 639
 640 #define encode_coding_emacs_mule(coding, source, destination, src_bytes, dst_bytes) \
 641   encode_eol (coding, source, destination, src_bytes, dst_bytes)
 642
 643
 644 \f
 645 /*** 3. ISO2022 handlers ***/
 646
 647 /* The following note describes the coding system ISO2022 briefly.
 648    Since the intention of this note is to help understand the
 649    functions in this file, some parts are NOT ACCURATE or OVERLY
 650    SIMPLIFIED.  For thorough understanding, please refer to the
 651    original document of ISO2022.
 652
 653    ISO2022 provides many mechanisms to encode several character sets
 654    in 7-bit and 8-bit environments.  For 7-bite environments, all text
 655    is encoded using bytes less than 128.  This may make the encoded
 656    text a little bit longer, but the text passes more easily through
 657    several gateways, some of which strip off MSB (Most Signigant Bit).
 658
 659    There are two kinds of character sets: control character set and
 660    graphic character set.  The former contains control characters such
 661    as `newline' and `escape' to provide control functions (control
 662    functions are also provided by escape sequences).  The latter
 663    contains graphic characters such as 'A' and '-'.  Emacs recognizes
 664    two control character sets and many graphic character sets.
 665
 666    Graphic character sets are classified into one of the following
 667    four classes, according to the number of bytes (DIMENSION) and
 668    number of characters in one dimension (CHARS) of the set:
 669    - DIMENSION1_CHARS94
 670    - DIMENSION1_CHARS96
 671    - DIMENSION2_CHARS94
 672    - DIMENSION2_CHARS96
 673
 674    In addition, each character set is assigned an identification tag,
 675    unique for each set, called "final character" (denoted as <F>
 676    hereafter).  The <F> of each character set is decided by ECMA(*)
 677    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
 678    (0x30..0x3F are for private use only).
 679
 680    Note (*): ECMA = European Computer Manufacturers Association
 681
 682    Here are examples of graphic character set [NAME(<F>)]:
 683         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
 684         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
 685         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
 686         o DIMENSION2_CHARS96 -- none for the moment
 687
 688    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
 689         C0 [0x00..0x1F] -- control character plane 0
 690         GL [0x20..0x7F] -- graphic character plane 0
 691         C1 [0x80..0x9F] -- control character plane 1
 692         GR [0xA0..0xFF] -- graphic character plane 1
 693
 694    A control character set is directly designated and invoked to C0 or
 695    C1 by an escape sequence.  The most common case is that:
 696    - ISO646's  control character set is designated/invoked to C0, and
 697    - ISO6429's control character set is designated/invoked to C1,
 698    and usually these designations/invocations are omitted in encoded
 699    text.  In a 7-bit environment, only C0 can be used, and a control
 700    character for C1 is encoded by an appropriate escape sequence to
 701    fit into the environment.  All control characters for C1 are
 702    defined to have corresponding escape sequences.
 703
 704    A graphic character set is at first designated to one of four
 705    graphic registers (G0 through G3), then these graphic registers are
 706    invoked to GL or GR.  These designations and invocations can be
 707    done independently.  The most common case is that G0 is invoked to
 708    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
 709    these invocations and designations are omitted in encoded text.
 710    In a 7-bit environment, only GL can be used.
 711
 712    When a graphic character set of CHARS94 is invoked to GL, codes
 713    0x20 and 0x7F of the GL area work as control characters SPACE and
 714    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
 715    be used.
 716
 717    There are two ways of invocation: locking-shift and single-shift.
 718    With locking-shift, the invocation lasts until the next different
 719    invocation, whereas with single-shift, the invocation affects the
 720    following character only and doesn't affect the locking-shift
 721    state.  Invocations are done by the following control characters or
 722    escape sequences:
 723
 724    ----------------------------------------------------------------------
 725    abbrev  function                  cntrl escape seq   description
 726    ----------------------------------------------------------------------
 727    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
 728    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
 729    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
 730    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
 731    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
 732    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
 733    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
 734    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
 735    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
 736    ----------------------------------------------------------------------
 737    (*) These are not used by any known coding system.
 738
 739    Control characters for these functions are defined by macros
 740    ISO_CODE_XXX in `coding.h'.
 741
 742    Designations are done by the following escape sequences:
 743    ----------------------------------------------------------------------
 744    escape sequence      description
 745    ----------------------------------------------------------------------
 746    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
 747    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
 748    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
 749    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
 750    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
 751    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
 752    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
 753    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
 754    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
 755    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
 756    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
 757    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
 758    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
 759    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
 760    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
 761    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
 762    ----------------------------------------------------------------------
 763
 764    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
 765    of dimension 1, chars 94, and final character <F>, etc...
 766
 767    Note (*): Although these designations are not allowed in ISO2022,
 768    Emacs accepts them on decoding, and produces them on encoding
 769    CHARS96 character sets in a coding system which is characterized as
 770    7-bit environment, non-locking-shift, and non-single-shift.
 771
 772    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
 773    '(' can be omitted.  We refer to this as "short-form" hereafter.
 774
 775    Now you may notice that there are a lot of ways for encoding the
 776    same multilingual text in ISO2022.  Actually, there exist many
 777    coding systems such as Compound Text (used in X11's inter client
 778    communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR
 779    (used in Korean internet), EUC (Extended UNIX Code, used in Asian
 780    localized platforms), and all of these are variants of ISO2022.
 781
 782    In addition to the above, Emacs handles two more kinds of escape
 783    sequences: ISO6429's direction specification and Emacs' private
 784    sequence for specifying character composition.
 785
 786    ISO6429's direction specification takes the following form:
 787         o CSI ']'      -- end of the current direction
 788         o CSI '0' ']'  -- end of the current direction
 789         o CSI '1' ']'  -- start of left-to-right text
 790         o CSI '2' ']'  -- start of right-to-left text
 791    The control character CSI (0x9B: control sequence introducer) is
 792    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
 793
 794    Character composition specification takes the following form:
 795         o ESC '0' -- start relative composition
 796         o ESC '1' -- end composition
 797         o ESC '2' -- start rule-base composition (*)
 798         o ESC '3' -- start relative composition with alternate chars  (**)
 799         o ESC '4' -- start rule-base composition with alternate chars  (**)
 800   Since these are not standard escape sequences of any ISO standard,
 801   the use of them for these meaning is restricted to Emacs only.
 802
 803   (*) This form is used only in Emacs 20.5 and the older versions,
 804   but the newer versions can safely decode it.
 805   (**) This form is used only in Emacs 21.1 and the newer versions,
 806   and the older versions can't decode it.
 807
 808   Here's a list of examples usages of these composition escape
 809   sequences (categorized by `enum composition_method').
 810
 811   COMPOSITION_RELATIVE:
 812         ESC 0 CHAR [ CHAR ] ESC 1
 813   COMPOSITOIN_WITH_RULE:
 814         ESC 2 CHAR [ RULE CHAR ] ESC 1
 815   COMPOSITION_WITH_ALTCHARS:
 816         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
 817   COMPOSITION_WITH_RULE_ALTCHARS:
 818         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
 819
 820 enum iso_code_class_type iso_code_class[256];
 821
 822 #define CHARSET_OK(idx, charset, c)                                     \
 823   (coding_system_table[idx]                                             \
 824    && (charset == CHARSET_ASCII                                         \
 825        || (safe_chars = coding_safe_chars (coding_system_table[idx]),   \
 826            CODING_SAFE_CHAR_P (safe_chars, c)))                         \
 827    && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding_system_table[idx], \
 828                                               charset)                  \
 829        != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
 830
 831 #define SHIFT_OUT_OK(idx) \
 832   (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
 833
 834 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 835    Check if a text is encoded in ISO2022.  If it is, returns an
 836    integer in which appropriate flag bits any of:
 837         CODING_CATEGORY_MASK_ISO_7
 838         CODING_CATEGORY_MASK_ISO_7_TIGHT
 839         CODING_CATEGORY_MASK_ISO_8_1
 840         CODING_CATEGORY_MASK_ISO_8_2
 841         CODING_CATEGORY_MASK_ISO_7_ELSE
 842         CODING_CATEGORY_MASK_ISO_8_ELSE
 843    are set.  If a code which should never appear in ISO2022 is found,
 844    returns 0.  */
 845
 846 int
 847 detect_coding_iso2022 (src, src_end)
 848      unsigned char *src, *src_end;
 849 {
 850   int mask = CODING_CATEGORY_MASK_ISO;
 851   int mask_found = 0;
 852   int reg[4], shift_out = 0, single_shifting = 0;
 853   int c, c1, i, charset;
 854   /* Dummy for ONE_MORE_BYTE.  */
 855   struct coding_system dummy_coding;
 856   struct coding_system *coding = &dummy_coding;
 857   Lisp_Object safe_chars;
 858
 859   reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
 860   while (mask && src < src_end)
 861     {
 862       ONE_MORE_BYTE (c);
 863       switch (c)
 864         {
 865         case ISO_CODE_ESC:
 866           if (inhibit_iso_escape_detection)
 867             break;
 868           single_shifting = 0;
 869           ONE_MORE_BYTE (c);
 870           if (c >= '(' && c <= '/')
 871             {
 872               /* Designation sequence for a charset of dimension 1.  */
 873               ONE_MORE_BYTE (c1);
 874               if (c1 < ' ' || c1 >= 0x80
 875                   || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
 876                 /* Invalid designation sequence.  Just ignore.  */
 877                 break;
 878               reg[(c - '(') % 4] = charset;
 879             }
 880           else if (c == '$')
 881             {
 882               /* Designation sequence for a charset of dimension 2.  */
 883               ONE_MORE_BYTE (c);
 884               if (c >= '@' && c <= 'B')
 885                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
 886                 reg[0] = charset = iso_charset_table[1][0][c];
 887               else if (c >= '(' && c <= '/')
 888                 {
 889                   ONE_MORE_BYTE (c1);
 890                   if (c1 < ' ' || c1 >= 0x80
 891                       || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
 892                     /* Invalid designation sequence.  Just ignore.  */
 893                     break;
 894                   reg[(c - '(') % 4] = charset;
 895                 }
 896               else
 897                 /* Invalid designation sequence.  Just ignore.  */
 898                 break;
 899             }
 900           else if (c == 'N' || c == 'O')
 901             {
 902               /* ESC <Fe> for SS2 or SS3.  */
 903               mask &= CODING_CATEGORY_MASK_ISO_7_ELSE;
 904               break;
 905             }
 906           else if (c >= '0' && c <= '4')
 907             {
 908               /* ESC <Fp> for start/end composition.  */
 909               mask_found |= CODING_CATEGORY_MASK_ISO;
 910               break;
 911             }
 912           else
 913             /* Invalid escape sequence.  Just ignore.  */
 914             break;
 915
 916           /* We found a valid designation sequence for CHARSET.  */
 917           mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
 918           c = MAKE_CHAR (charset, 0, 0);
 919           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset, c))
 920             mask_found |= CODING_CATEGORY_MASK_ISO_7;
 921           else
 922             mask &= ~CODING_CATEGORY_MASK_ISO_7;
 923           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset, c))
 924             mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
 925           else
 926             mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
 927           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset, c))
 928             mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
 929           else
 930             mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
 931           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset, c))
 932             mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
 933           else
 934             mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
 935           break;
 936
 937         case ISO_CODE_SO:
 938           if (inhibit_iso_escape_detection)
 939             break;
 940           single_shifting = 0;
 941           if (shift_out == 0
 942               && (reg[1] >= 0
 943                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
 944                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
 945             {
 946               /* Locking shift out.  */
 947               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 948               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 949             }
 950           break;
 951
 952         case ISO_CODE_SI:
 953           if (inhibit_iso_escape_detection)
 954             break;
 955           single_shifting = 0;
 956           if (shift_out == 1)
 957             {
 958               /* Locking shift in.  */
 959               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 960               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 961             }
 962           break;
 963
 964         case ISO_CODE_CSI:
 965           single_shifting = 0;
 966         case ISO_CODE_SS2:
 967         case ISO_CODE_SS3:
 968           {
 969             int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
 970
 971             if (inhibit_iso_escape_detection)
 972               break;
 973             if (c != ISO_CODE_CSI)
 974               {
 975                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 976                     & CODING_FLAG_ISO_SINGLE_SHIFT)
 977                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 978                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 979                     & CODING_FLAG_ISO_SINGLE_SHIFT)
 980                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 981                 single_shifting = 1;
 982               }
 983             if (VECTORP (Vlatin_extra_code_table)
 984                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
 985               {
 986                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 987                     & CODING_FLAG_ISO_LATIN_EXTRA)
 988                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 989                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 990                     & CODING_FLAG_ISO_LATIN_EXTRA)
 991                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 992               }
 993             mask &= newmask;
 994             mask_found |= newmask;
 995           }
 996           break;
 997
 998         default:
 999           if (c < 0x80)
1000             {
1001               single_shifting = 0;
1002               break;
1003             }
1004           else if (c < 0xA0)
1005             {
1006               single_shifting = 0;
1007               if (VECTORP (Vlatin_extra_code_table)
1008                   && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
1009                 {
1010                   int newmask = 0;
1011
1012                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1013                       & CODING_FLAG_ISO_LATIN_EXTRA)
1014                     newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1015                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1016                       & CODING_FLAG_ISO_LATIN_EXTRA)
1017                     newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1018                   mask &= newmask;
1019                   mask_found |= newmask;
1020                 }
1021               else
1022                 return 0;
1023             }
1024           else
1025             {
1026               mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
1027                         | CODING_CATEGORY_MASK_ISO_7_ELSE);
1028               mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
1029               /* Check the length of succeeding codes of the range
1030                  0xA0..0FF.  If the byte length is odd, we exclude
1031                  CODING_CATEGORY_MASK_ISO_8_2.  We can check this only
1032                  when we are not single shifting.  */
1033               if (!single_shifting
1034                   && mask & CODING_CATEGORY_MASK_ISO_8_2)
1035                 {
1036                   int i = 1;
1037                   while (src < src_end)
1038                     {
1039                       ONE_MORE_BYTE (c);
1040                       if (c < 0xA0)
1041                         break;
1042                       i++;
1043                     }
1044
1045                   if (i & 1 && src < src_end)
1046                     mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
1047                   else
1048                     mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
1049                 }
1050             }
1051           break;
1052         }
1053     }
1054  label_end_of_loop:
1055   return (mask & mask_found);
1056 }
1057
1058 /* Decode a character of which charset is CHARSET, the 1st position
1059    code is C1, the 2nd position code is C2, and return the decoded
1060    character code.  If the variable `translation_table' is non-nil,
1061    returned the translated code.  */
1062
1063 #define DECODE_ISO_CHARACTER(charset, c1, c2)   \
1064   (NILP (translation_table)                     \
1065    ? MAKE_CHAR (charset, c1, c2)                \
1066    : translate_char (translation_table, -1, charset, c1, c2))
1067
1068 /* Set designation state into CODING.  */
1069 #define DECODE_DESIGNATION(reg, dimension, chars, final_char)              \
1070   do {                                                                     \
1071     int charset, c;                                                        \
1072                                                                            \
1073     if (final_char < '0' || final_char >= 128)                             \
1074       goto label_invalid_code;                                             \
1075     charset = ISO_CHARSET_TABLE (make_number (dimension),                  \
1076                                  make_number (chars),                      \
1077                                  make_number (final_char));                \
1078     c = MAKE_CHAR (charset, 0, 0);                                         \
1079     if (charset >= 0                                                       \
1080         && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
1081             || CODING_SAFE_CHAR_P (safe_chars, c)))                        \
1082       {                                                                    \
1083         if (coding->spec.iso2022.last_invalid_designation_register == 0    \
1084             && reg == 0                                                    \
1085             && charset == CHARSET_ASCII)                                   \
1086           {                                                                \
1087             /* We should insert this designation sequence as is so         \
1088                that it is surely written back to a file.  */               \
1089             coding->spec.iso2022.last_invalid_designation_register = -1;   \
1090             goto label_invalid_code;                                       \
1091           }                                                                \
1092         coding->spec.iso2022.last_invalid_designation_register = -1;       \
1093         if ((coding->mode & CODING_MODE_DIRECTION)                         \
1094             && CHARSET_REVERSE_CHARSET (charset) >= 0)                     \
1095           charset = CHARSET_REVERSE_CHARSET (charset);                     \
1096         CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;               \
1097       }                                                                    \
1098     else                                                                   \
1099       {                                                                    \
1100         coding->spec.iso2022.last_invalid_designation_register = reg;      \
1101         goto label_invalid_code;                                           \
1102       }                                                                    \
1103   } while (0)
1104
1105 /* Allocate a memory block for storing information about compositions.
1106    The block is chained to the already allocated blocks.  */
1107
1108 void
1109 coding_allocate_composition_data (coding, char_offset)
1110      struct coding_system *coding;
1111      int char_offset;
1112 {
1113   struct composition_data *cmp_data
1114     = (struct composition_data *) xmalloc (sizeof *cmp_data);
1115
1116   cmp_data->char_offset = char_offset;
1117   cmp_data->used = 0;
1118   cmp_data->prev = coding->cmp_data;
1119   cmp_data->next = NULL;
1120   if (coding->cmp_data)
1121     coding->cmp_data->next = cmp_data;
1122   coding->cmp_data = cmp_data;
1123   coding->cmp_data_start = 0;
1124 }
1125
1126 /* Record the starting position START and METHOD of one composition.  */
1127
1128 #define CODING_ADD_COMPOSITION_START(coding, start, method)     \
1129   do {                                                          \
1130     struct composition_data *cmp_data = coding->cmp_data;       \
1131     int *data = cmp_data->data + cmp_data->used;                \
1132     coding->cmp_data_start = cmp_data->used;                    \
1133     data[0] = -1;                                               \
1134     data[1] = cmp_data->char_offset + start;                    \
1135     data[3] = (int) method;                                     \
1136     cmp_data->used += 4;                                        \
1137   } while (0)
1138
1139 /* Record the ending position END of the current composition.  */
1140
1141 #define CODING_ADD_COMPOSITION_END(coding, end)                 \
1142   do {                                                          \
1143     struct composition_data *cmp_data = coding->cmp_data;       \
1144     int *data = cmp_data->data + coding->cmp_data_start;        \
1145     data[0] = cmp_data->used - coding->cmp_data_start;          \
1146     data[2] = cmp_data->char_offset + end;                      \
1147   } while (0)
1148
1149 /* Record one COMPONENT (alternate character or composition rule).  */
1150
1151 #define CODING_ADD_COMPOSITION_COMPONENT(coding, component)     \
1152   (coding->cmp_data->data[coding->cmp_data->used++] = component)
1153
1154 /* Handle compositoin start sequence ESC 0, ESC 2, ESC 3, or ESC 4.  */
1155
1156 #define DECODE_COMPOSITION_START(c1)                                       \
1157   do {                                                                     \
1158     if (coding->composing == COMPOSITION_DISABLED)                         \
1159       {                                                                    \
1160         *dst++ = ISO_CODE_ESC;                                             \
1161         *dst++ = c1 & 0x7f;                                                \
1162         coding->produced_char += 2;                                        \
1163       }                                                                    \
1164     else if (!COMPOSING_P (coding))                                        \
1165       {                                                                    \
1166         /* This is surely the start of a composition.  We must be sure     \
1167            that coding->cmp_data has enough space to store the             \
1168            information about the composition.  If not, terminate the       \
1169            current decoding loop, allocate one more memory block for       \
1170            coding->cmp_data in the calller, then start the decoding        \
1171            loop again.  We can't allocate memory here directly because     \
1172            it may cause buffer/string relocation.  */                      \
1173         if (!coding->cmp_data                                              \
1174             || (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \
1175                 >= COMPOSITION_DATA_SIZE))                                 \
1176           {                                                                \
1177             coding->result = CODING_FINISH_INSUFFICIENT_CMP;               \
1178             goto label_end_of_loop;                                        \
1179           }                                                                \
1180         coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE              \
1181                              : c1 == '2' ? COMPOSITION_WITH_RULE           \
1182                              : c1 == '3' ? COMPOSITION_WITH_ALTCHARS       \
1183                              : COMPOSITION_WITH_RULE_ALTCHARS);            \
1184         CODING_ADD_COMPOSITION_START (coding, coding->produced_char,       \
1185                                       coding->composing);                  \
1186         coding->composition_rule_follows = 0;                              \
1187       }                                                                    \
1188     else                                                                   \
1189       {                                                                    \
1190         /* We are already handling a composition.  If the method is        \
1191            the following two, the codes following the current escape       \
1192            sequence are actual characters stored in a buffer.  */          \
1193         if (coding->composing == COMPOSITION_WITH_ALTCHARS                 \
1194             || coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)        \
1195           {                                                                \
1196             coding->composing = COMPOSITION_RELATIVE;                      \
1197             coding->composition_rule_follows = 0;                          \
1198           }                                                                \
1199       }                                                                    \
1200   } while (0)
1201
1202 /* Handle compositoin end sequence ESC 1.  */
1203
1204 #define DECODE_COMPOSITION_END(c1)                                      \
1205   do {                                                                  \
1206     if (coding->composing == COMPOSITION_DISABLED)                      \
1207       {                                                                 \
1208         *dst++ = ISO_CODE_ESC;                                          \
1209         *dst++ = c1;                                                    \
1210         coding->produced_char += 2;                                     \
1211       }                                                                 \
1212     else                                                                \
1213       {                                                                 \
1214         CODING_ADD_COMPOSITION_END (coding, coding->produced_char);     \
1215         coding->composing = COMPOSITION_NO;                             \
1216       }                                                                 \
1217   } while (0)
1218
1219 /* Decode a composition rule from the byte C1 (and maybe one more byte
1220    from SRC) and store one encoded composition rule in
1221    coding->cmp_data.  */
1222
1223 #define DECODE_COMPOSITION_RULE(c1)                                     \
1224   do {                                                                  \
1225     int rule = 0;                                                       \
1226     (c1) -= 32;                                                         \
1227     if (c1 < 81)                /* old format (before ver.21) */        \
1228       {                                                                 \
1229         int gref = (c1) / 9;                                            \
1230         int nref = (c1) % 9;                                            \
1231         if (gref == 4) gref = 10;                                       \
1232         if (nref == 4) nref = 10;                                       \
1233         rule = COMPOSITION_ENCODE_RULE (gref, nref);                    \
1234       }                                                                 \
1235     else if (c1 < 93)           /* new format (after ver.21) */         \
1236       {                                                                 \
1237         ONE_MORE_BYTE (c2);                                             \
1238         rule = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);              \
1239       }                                                                 \
1240     CODING_ADD_COMPOSITION_COMPONENT (coding, rule);                    \
1241     coding->composition_rule_follows = 0;                               \
1242   } while (0)
1243
1244
1245 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1246
1247 static void
1248 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1249      struct coding_system *coding;
1250      unsigned char *source, *destination;
1251      int src_bytes, dst_bytes;
1252 {
1253   unsigned char *src = source;
1254   unsigned char *src_end = source + src_bytes;
1255   unsigned char *dst = destination;
1256   unsigned char *dst_end = destination + dst_bytes;
1257   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
1258   int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1259   int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1260   /* SRC_BASE remembers the start position in source in each loop.
1261      The loop will be exited when there's not enough source code
1262      (within macro ONE_MORE_BYTE), or when there's not enough
1263      destination area to produce a character (within macro
1264      EMIT_CHAR).  */
1265   unsigned char *src_base;
1266   int c, charset;
1267   Lisp_Object translation_table;
1268   Lisp_Object safe_chars;
1269
1270   safe_chars = coding_safe_chars (coding);
1271
1272   if (NILP (Venable_character_translation))
1273     translation_table = Qnil;
1274   else
1275     {
1276       translation_table = coding->translation_table_for_decode;
1277       if (NILP (translation_table))
1278         translation_table = Vstandard_translation_table_for_decode;
1279     }
1280
1281   coding->result = CODING_FINISH_NORMAL;
1282
1283   while (1)
1284     {
1285       int c1, c2;
1286
1287       src_base = src;
1288       ONE_MORE_BYTE (c1);
1289
1290       /* We produce no character or one character.  */
1291       switch (iso_code_class [c1])
1292         {
1293         case ISO_0x20_or_0x7F:
1294           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1295             {
1296               DECODE_COMPOSITION_RULE (c1);
1297               continue;
1298             }
1299           if (charset0 < 0 || CHARSET_CHARS (charset0) == 94)
1300             {
1301               /* This is SPACE or DEL.  */
1302               charset = CHARSET_ASCII;
1303               break;
1304             }
1305           /* This is a graphic character, we fall down ...  */
1306
1307         case ISO_graphic_plane_0:
1308           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1309             {
1310               DECODE_COMPOSITION_RULE (c1);
1311               continue;
1312             }
1313           charset = charset0;
1314           break;
1315
1316         case ISO_0xA0_or_0xFF:
1317           if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1318               || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1319             goto label_invalid_code;
1320           /* This is a graphic character, we fall down ... */
1321
1322         case ISO_graphic_plane_1:
1323           if (charset1 < 0)
1324             goto label_invalid_code;
1325           charset = charset1;
1326           break;
1327
1328         case ISO_control_0:
1329           if (COMPOSING_P (coding))
1330             DECODE_COMPOSITION_END ('1');
1331
1332           /* All ISO2022 control characters in this class have the
1333              same representation in Emacs internal format.  */
1334           if (c1 == '\n'
1335               && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1336               && (coding->eol_type == CODING_EOL_CR
1337                   || coding->eol_type == CODING_EOL_CRLF))
1338             {
1339               coding->result = CODING_FINISH_INCONSISTENT_EOL;
1340               goto label_end_of_loop;
1341             }
1342           charset = CHARSET_ASCII;
1343           break;
1344
1345         case ISO_control_1:
1346           if (COMPOSING_P (coding))
1347             DECODE_COMPOSITION_END ('1');
1348           goto label_invalid_code;
1349
1350         case ISO_carriage_return:
1351           if (COMPOSING_P (coding))
1352             DECODE_COMPOSITION_END ('1');
1353
1354           if (coding->eol_type == CODING_EOL_CR)
1355             c1 = '\n';
1356           else if (coding->eol_type == CODING_EOL_CRLF)
1357             {
1358               ONE_MORE_BYTE (c1);
1359               if (c1 != ISO_CODE_LF)
1360                 {
1361                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1362                     {
1363                       coding->result = CODING_FINISH_INCONSISTENT_EOL;
1364                       goto label_end_of_loop;
1365                     }
1366                   src--;
1367                   c1 = '\r';
1368                 }
1369             }
1370           charset = CHARSET_ASCII;
1371           break;
1372
1373         case ISO_shift_out:
1374           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1375               || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1376             goto label_invalid_code;
1377           CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1378           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1379           continue;
1380
1381         case ISO_shift_in:
1382           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1383             goto label_invalid_code;
1384           CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1385           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1386           continue;
1387
1388         case ISO_single_shift_2_7:
1389         case ISO_single_shift_2:
1390           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1391             goto label_invalid_code;
1392           /* SS2 is handled as an escape sequence of ESC 'N' */
1393           c1 = 'N';
1394           goto label_escape_sequence;
1395
1396         case ISO_single_shift_3:
1397           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1398             goto label_invalid_code;
1399           /* SS2 is handled as an escape sequence of ESC 'O' */
1400           c1 = 'O';
1401           goto label_escape_sequence;
1402
1403         case ISO_control_sequence_introducer:
1404           /* CSI is handled as an escape sequence of ESC '[' ...  */
1405           c1 = '[';
1406           goto label_escape_sequence;
1407
1408         case ISO_escape:
1409           ONE_MORE_BYTE (c1);
1410         label_escape_sequence:
1411           /* Escape sequences handled by Emacs are invocation,
1412              designation, direction specification, and character
1413              composition specification.  */
1414           switch (c1)
1415             {
1416             case '&':           /* revision of following character set */
1417               ONE_MORE_BYTE (c1);
1418               if (!(c1 >= '@' && c1 <= '~'))
1419                 goto label_invalid_code;
1420               ONE_MORE_BYTE (c1);
1421               if (c1 != ISO_CODE_ESC)
1422                 goto label_invalid_code;
1423               ONE_MORE_BYTE (c1);
1424               goto label_escape_sequence;
1425
1426             case '$':           /* designation of 2-byte character set */
1427               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1428                 goto label_invalid_code;
1429               ONE_MORE_BYTE (c1);
1430               if (c1 >= '@' && c1 <= 'B')
1431                 {       /* designation of JISX0208.1978, GB2312.1980,
1432                            or JISX0208.1980 */
1433                   DECODE_DESIGNATION (0, 2, 94, c1);
1434                 }
1435               else if (c1 >= 0x28 && c1 <= 0x2B)
1436                 {       /* designation of DIMENSION2_CHARS94 character set */
1437                   ONE_MORE_BYTE (c2);
1438                   DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
1439                 }
1440               else if (c1 >= 0x2C && c1 <= 0x2F)
1441                 {       /* designation of DIMENSION2_CHARS96 character set */
1442                   ONE_MORE_BYTE (c2);
1443                   DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
1444                 }
1445               else
1446                 goto label_invalid_code;
1447               /* We must update these variables now.  */
1448               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1449               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1450               continue;
1451
1452             case 'n':           /* invocation of locking-shift-2 */
1453               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1454                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1455                 goto label_invalid_code;
1456               CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
1457               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1458               continue;
1459
1460             case 'o':           /* invocation of locking-shift-3 */
1461               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1462                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1463                 goto label_invalid_code;
1464               CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
1465               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1466               continue;
1467
1468             case 'N':           /* invocation of single-shift-2 */
1469               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1470                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1471                 goto label_invalid_code;
1472               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
1473               ONE_MORE_BYTE (c1);
1474               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
1475                 goto label_invalid_code;
1476               break;
1477
1478             case 'O':           /* invocation of single-shift-3 */
1479               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1480                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1481                 goto label_invalid_code;
1482               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
1483               ONE_MORE_BYTE (c1);
1484               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
1485                 goto label_invalid_code;
1486               break;
1487
1488             case '0': case '2': case '3': case '4': /* start composition */
1489               DECODE_COMPOSITION_START (c1);
1490               continue;
1491
1492             case '1':           /* end composition */
1493               DECODE_COMPOSITION_END (c1);
1494               continue;
1495
1496             case '[':           /* specification of direction */
1497               if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
1498                 goto label_invalid_code;
1499               /* For the moment, nested direction is not supported.
1500                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
1501                  left-to-right, and nozero means right-to-left.  */
1502               ONE_MORE_BYTE (c1);
1503               switch (c1)
1504                 {
1505                 case ']':       /* end of the current direction */
1506                   coding->mode &= ~CODING_MODE_DIRECTION;
1507
1508                 case '0':       /* end of the current direction */
1509                 case '1':       /* start of left-to-right direction */
1510                   ONE_MORE_BYTE (c1);
1511                   if (c1 == ']')
1512                     coding->mode &= ~CODING_MODE_DIRECTION;
1513                   else
1514                     goto label_invalid_code;
1515                   break;
1516
1517                 case '2':       /* start of right-to-left direction */
1518                   ONE_MORE_BYTE (c1);
1519                   if (c1 == ']')
1520                     coding->mode |= CODING_MODE_DIRECTION;
1521                   else
1522                     goto label_invalid_code;
1523                   break;
1524
1525                 default:
1526                   goto label_invalid_code;
1527                 }
1528               continue;
1529
1530             default:
1531               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1532                 goto label_invalid_code;
1533               if (c1 >= 0x28 && c1 <= 0x2B)
1534                 {       /* designation of DIMENSION1_CHARS94 character set */
1535                   ONE_MORE_BYTE (c2);
1536                   DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
1537                 }
1538               else if (c1 >= 0x2C && c1 <= 0x2F)
1539                 {       /* designation of DIMENSION1_CHARS96 character set */
1540                   ONE_MORE_BYTE (c2);
1541                   DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
1542                 }
1543               else
1544                 goto label_invalid_code;
1545               /* We must update these variables now.  */
1546               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1547               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1548               continue;
1549             }
1550         }
1551
1552       /* Now we know CHARSET and 1st position code C1 of a character.
1553          Produce a multibyte sequence for that character while getting
1554          2nd position code C2 if necessary.  */
1555       if (CHARSET_DIMENSION (charset) == 2)
1556         {
1557           ONE_MORE_BYTE (c2);
1558           if (c1 < 0x80 ? c2 < 0x20 || c2 >= 0x80 : c2 < 0xA0)
1559             /* C2 is not in a valid range.  */
1560             goto label_invalid_code;
1561         }
1562       c = DECODE_ISO_CHARACTER (charset, c1, c2);
1563       EMIT_CHAR (c);
1564       continue;
1565
1566     label_invalid_code:
1567       coding->errors++;
1568       if (COMPOSING_P (coding))
1569         DECODE_COMPOSITION_END ('1');
1570       src = src_base;
1571       c = *src++;
1572       EMIT_CHAR (c);
1573     }
1574
1575  label_end_of_loop:
1576   coding->consumed = coding->consumed_char = src_base - source;
1577   coding->produced = dst - destination;
1578   return;
1579 }
1580
1581
1582 /* ISO2022 encoding stuff.  */
1583
1584 /*
1585    It is not enough to say just "ISO2022" on encoding, we have to
1586    specify more details.  In Emacs, each coding system of ISO2022
1587    variant has the following specifications:
1588         1. Initial designation to G0 thru G3.
1589         2. Allows short-form designation?
1590         3. ASCII should be designated to G0 before control characters?
1591         4. ASCII should be designated to G0 at end of line?
1592         5. 7-bit environment or 8-bit environment?
1593         6. Use locking-shift?
1594         7. Use Single-shift?
1595    And the following two are only for Japanese:
1596         8. Use ASCII in place of JIS0201-1976-Roman?
1597         9. Use JISX0208-1983 in place of JISX0208-1978?
1598    These specifications are encoded in `coding->flags' as flag bits
1599    defined by macros CODING_FLAG_ISO_XXX.  See `coding.h' for more
1600    details.
1601 */
1602
1603 /* Produce codes (escape sequence) for designating CHARSET to graphic
1604    register REG at DST, and increment DST.  If <final-char> of CHARSET is
1605    '@', 'A', or 'B' and the coding system CODING allows, produce
1606    designation sequence of short-form.  */
1607
1608 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
1609   do {                                                                  \
1610     unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset);        \
1611     char *intermediate_char_94 = "()*+";                                \
1612     char *intermediate_char_96 = ",-./";                                \
1613     int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset);    \
1614                                                                         \
1615     if (revision < 255)                                                 \
1616       {                                                                 \
1617         *dst++ = ISO_CODE_ESC;                                          \
1618         *dst++ = '&';                                                   \
1619         *dst++ = '@' + revision;                                        \
1620       }                                                                 \
1621     *dst++ = ISO_CODE_ESC;                                              \
1622     if (CHARSET_DIMENSION (charset) == 1)                               \
1623       {                                                                 \
1624         if (CHARSET_CHARS (charset) == 94)                              \
1625           *dst++ = (unsigned char) (intermediate_char_94[reg]);         \
1626         else                                                            \
1627           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1628       }                                                                 \
1629     else                                                                \
1630       {                                                                 \
1631         *dst++ = '$';                                                   \
1632         if (CHARSET_CHARS (charset) == 94)                              \
1633           {                                                             \
1634             if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM)          \
1635                 || reg != 0                                             \
1636                 || final_char < '@' || final_char > 'B')                \
1637               *dst++ = (unsigned char) (intermediate_char_94[reg]);     \
1638           }                                                             \
1639         else                                                            \
1640           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1641       }                                                                 \
1642     *dst++ = final_char;                                                \
1643     CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;                \
1644   } while (0)
1645
1646 /* The following two macros produce codes (control character or escape
1647    sequence) for ISO2022 single-shift functions (single-shift-2 and
1648    single-shift-3).  */
1649
1650 #define ENCODE_SINGLE_SHIFT_2                           \
1651   do {                                                  \
1652     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1653       *dst++ = ISO_CODE_ESC, *dst++ = 'N';              \
1654     else                                                \
1655       *dst++ = ISO_CODE_SS2;                            \
1656     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1657   } while (0)
1658
1659 #define ENCODE_SINGLE_SHIFT_3                           \
1660   do {                                                  \
1661     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1662       *dst++ = ISO_CODE_ESC, *dst++ = 'O';              \
1663     else                                                \
1664       *dst++ = ISO_CODE_SS3;                            \
1665     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1666   } while (0)
1667
1668 /* The following four macros produce codes (control character or
1669    escape sequence) for ISO2022 locking-shift functions (shift-in,
1670    shift-out, locking-shift-2, and locking-shift-3).  */
1671
1672 #define ENCODE_SHIFT_IN                         \
1673   do {                                          \
1674     *dst++ = ISO_CODE_SI;                       \
1675     CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
1676   } while (0)
1677
1678 #define ENCODE_SHIFT_OUT                        \
1679   do {                                          \
1680     *dst++ = ISO_CODE_SO;                       \
1681     CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
1682   } while (0)
1683
1684 #define ENCODE_LOCKING_SHIFT_2                  \
1685   do {                                          \
1686     *dst++ = ISO_CODE_ESC, *dst++ = 'n';        \
1687     CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
1688   } while (0)
1689
1690 #define ENCODE_LOCKING_SHIFT_3                  \
1691   do {                                          \
1692     *dst++ = ISO_CODE_ESC, *dst++ = 'o';        \
1693     CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
1694   } while (0)
1695
1696 /* Produce codes for a DIMENSION1 character whose character set is
1697    CHARSET and whose position-code is C1.  Designation and invocation
1698    sequences are also produced in advance if necessary.  */
1699
1700 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
1701   do {                                                                  \
1702     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1703       {                                                                 \
1704         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1705           *dst++ = c1 & 0x7F;                                           \
1706         else                                                            \
1707           *dst++ = c1 | 0x80;                                           \
1708         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1709         break;                                                          \
1710       }                                                                 \
1711     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1712       {                                                                 \
1713         *dst++ = c1 & 0x7F;                                             \
1714         break;                                                          \
1715       }                                                                 \
1716     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1717       {                                                                 \
1718         *dst++ = c1 | 0x80;                                             \
1719         break;                                                          \
1720       }                                                                 \
1721     else                                                                \
1722       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1723          must invoke it, or, at first, designate it to some graphic     \
1724          register.  Then repeat the loop to actually produce the        \
1725          character.  */                                                 \
1726       dst = encode_invocation_designation (charset, coding, dst);       \
1727   } while (1)
1728
1729 /* Produce codes for a DIMENSION2 character whose character set is
1730    CHARSET and whose position-codes are C1 and C2.  Designation and
1731    invocation codes are also produced in advance if necessary.  */
1732
1733 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
1734   do {                                                                  \
1735     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1736       {                                                                 \
1737         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1738           *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F;                       \
1739         else                                                            \
1740           *dst++ = c1 | 0x80, *dst++ = c2 | 0x80;                       \
1741         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1742         break;                                                          \
1743       }                                                                 \
1744     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1745       {                                                                 \
1746         *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F;                          \
1747         break;                                                          \
1748       }                                                                 \
1749     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1750       {                                                                 \
1751         *dst++ = c1 | 0x80, *dst++= c2 | 0x80;                          \
1752         break;                                                          \
1753       }                                                                 \
1754     else                                                                \
1755       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1756          must invoke it, or, at first, designate it to some graphic     \
1757          register.  Then repeat the loop to actually produce the        \
1758          character.  */                                                 \
1759       dst = encode_invocation_designation (charset, coding, dst);       \
1760   } while (1)
1761
1762 #define ENCODE_ISO_CHARACTER(c)                                 \
1763   do {                                                          \
1764     int charset, c1, c2;                                        \
1765                                                                 \
1766     SPLIT_CHAR (c, charset, c1, c2);                            \
1767     if (CHARSET_DEFINED_P (charset))                            \
1768       {                                                         \
1769         if (CHARSET_DIMENSION (charset) == 1)                   \
1770           {                                                     \
1771             if (charset == CHARSET_ASCII                        \
1772                 && coding->flags & CODING_FLAG_ISO_USE_ROMAN)   \
1773               charset = charset_latin_jisx0201;                 \
1774             ENCODE_ISO_CHARACTER_DIMENSION1 (charset, c1);      \
1775           }                                                     \
1776         else                                                    \
1777           {                                                     \
1778             if (charset == charset_jisx0208                     \
1779                 && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)  \
1780               charset = charset_jisx0208_1978;                  \
1781             ENCODE_ISO_CHARACTER_DIMENSION2 (charset, c1, c2);  \
1782           }                                                     \
1783       }                                                         \
1784     else                                                        \
1785       {                                                         \
1786         *dst++ = c1;                                            \
1787         if (c2 >= 0)                                            \
1788           *dst++ = c2;                                          \
1789       }                                                         \
1790   } while (0)
1791
1792
1793 /* Instead of encoding character C, produce one or two `?'s.  */
1794
1795 #define ENCODE_UNSAFE_CHARACTER(c)                                      \
1796   do {                                                                  \
1797     ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION);       \
1798     if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1)                           \
1799       ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION);     \
1800   } while (0)
1801
1802
1803 /* Produce designation and invocation codes at a place pointed by DST
1804    to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
1805    Return new DST.  */
1806
1807 unsigned char *
1808 encode_invocation_designation (charset, coding, dst)
1809      int charset;
1810      struct coding_system *coding;
1811      unsigned char *dst;
1812 {
1813   int reg;                      /* graphic register number */
1814
1815   /* At first, check designations.  */
1816   for (reg = 0; reg < 4; reg++)
1817     if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
1818       break;
1819
1820   if (reg >= 4)
1821     {
1822       /* CHARSET is not yet designated to any graphic registers.  */
1823       /* At first check the requested designation.  */
1824       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1825       if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
1826         /* Since CHARSET requests no special designation, designate it
1827            to graphic register 0.  */
1828         reg = 0;
1829
1830       ENCODE_DESIGNATION (charset, reg, coding);
1831     }
1832
1833   if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
1834       && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
1835     {
1836       /* Since the graphic register REG is not invoked to any graphic
1837          planes, invoke it to graphic plane 0.  */
1838       switch (reg)
1839         {
1840         case 0:                 /* graphic register 0 */
1841           ENCODE_SHIFT_IN;
1842           break;
1843
1844         case 1:                 /* graphic register 1 */
1845           ENCODE_SHIFT_OUT;
1846           break;
1847
1848         case 2:                 /* graphic register 2 */
1849           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1850             ENCODE_SINGLE_SHIFT_2;
1851           else
1852             ENCODE_LOCKING_SHIFT_2;
1853           break;
1854
1855         case 3:                 /* graphic register 3 */
1856           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1857             ENCODE_SINGLE_SHIFT_3;
1858           else
1859             ENCODE_LOCKING_SHIFT_3;
1860           break;
1861         }
1862     }
1863
1864   return dst;
1865 }
1866
1867 /* Produce 2-byte codes for encoded composition rule RULE.  */
1868
1869 #define ENCODE_COMPOSITION_RULE(rule)           \
1870   do {                                          \
1871     int gref, nref;                             \
1872     COMPOSITION_DECODE_RULE (rule, gref, nref); \
1873     *dst++ = 32 + 81 + gref;                    \
1874     *dst++ = 32 + nref;                         \
1875   } while (0)
1876
1877 /* Produce codes for indicating the start of a composition sequence
1878    (ESC 0, ESC 3, or ESC 4).  DATA points to an array of integers
1879    which specify information about the composition.  See the comment
1880    in coding.h for the format of DATA.  */
1881
1882 #define ENCODE_COMPOSITION_START(coding, data)                          \
1883   do {                                                                  \
1884     coding->composing = data[3];                                        \
1885     *dst++ = ISO_CODE_ESC;                                              \
1886     if (coding->composing == COMPOSITION_RELATIVE)                      \
1887       *dst++ = '0';                                                     \
1888     else                                                                \
1889       {                                                                 \
1890         *dst++ = (coding->composing == COMPOSITION_WITH_ALTCHARS        \
1891                   ? '3' : '4');                                         \
1892         coding->cmp_data_index = coding->cmp_data_start + 4;            \
1893         coding->composition_rule_follows = 0;                           \
1894       }                                                                 \
1895   } while (0)
1896
1897 /* Produce codes for indicating the end of the current composition.  */
1898
1899 #define ENCODE_COMPOSITION_END(coding, data)                    \
1900   do {                                                          \
1901     *dst++ = ISO_CODE_ESC;                                      \
1902     *dst++ = '1';                                               \
1903     coding->cmp_data_start += data[0];                          \
1904     coding->composing = COMPOSITION_NO;                         \
1905     if (coding->cmp_data_start == coding->cmp_data->used        \
1906         && coding->cmp_data->next)                              \
1907       {                                                         \
1908         coding->cmp_data = coding->cmp_data->next;              \
1909         coding->cmp_data_start = 0;                             \
1910       }                                                         \
1911   } while (0)
1912
1913 /* Produce composition start sequence ESC 0.  Here, this sequence
1914    doesn't mean the start of a new composition but means that we have
1915    just produced components (alternate chars and composition rules) of
1916    the composition and the actual text follows in SRC.  */
1917
1918 #define ENCODE_COMPOSITION_FAKE_START(coding)   \
1919   do {                                          \
1920     *dst++ = ISO_CODE_ESC;                      \
1921     *dst++ = '0';                               \
1922     coding->composing = COMPOSITION_RELATIVE;   \
1923   } while (0)
1924
1925 /* The following three macros produce codes for indicating direction
1926    of text.  */
1927 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER              \
1928   do {                                                  \
1929     if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS)    \
1930       *dst++ = ISO_CODE_ESC, *dst++ = '[';              \
1931     else                                                \
1932       *dst++ = ISO_CODE_CSI;                            \
1933   } while (0)
1934
1935 #define ENCODE_DIRECTION_R2L    \
1936   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '2', *dst++ = ']'
1937
1938 #define ENCODE_DIRECTION_L2R    \
1939   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '0', *dst++ = ']'
1940
1941 /* Produce codes for designation and invocation to reset the graphic
1942    planes and registers to initial state.  */
1943 #define ENCODE_RESET_PLANE_AND_REGISTER                                     \
1944   do {                                                                      \
1945     int reg;                                                                \
1946     if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                        \
1947       ENCODE_SHIFT_IN;                                                      \
1948     for (reg = 0; reg < 4; reg++)                                           \
1949       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0            \
1950           && (CODING_SPEC_ISO_DESIGNATION (coding, reg)                     \
1951               != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)))        \
1952         ENCODE_DESIGNATION                                                  \
1953           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
1954   } while (0)
1955
1956 /* Produce designation sequences of charsets in the line started from
1957    SRC to a place pointed by DST, and return updated DST.
1958
1959    If the current block ends before any end-of-line, we may fail to
1960    find all the necessary designations.  */
1961
1962 static unsigned char *
1963 encode_designation_at_bol (coding, translation_table, src, src_end, dst)
1964      struct coding_system *coding;
1965      Lisp_Object translation_table;
1966      unsigned char *src, *src_end, *dst;
1967 {
1968   int charset, c, found = 0, reg;
1969   /* Table of charsets to be designated to each graphic register.  */
1970   int r[4];
1971
1972   for (reg = 0; reg < 4; reg++)
1973     r[reg] = -1;
1974
1975   while (found < 4)
1976     {
1977       ONE_MORE_CHAR (c);
1978       if (c == '\n')
1979         break;
1980
1981       charset = CHAR_CHARSET (c);
1982       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1983       if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
1984         {
1985           found++;
1986           r[reg] = charset;
1987         }
1988     }
1989
1990  label_end_of_loop:
1991   if (found)
1992     {
1993       for (reg = 0; reg < 4; reg++)
1994         if (r[reg] >= 0
1995             && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
1996           ENCODE_DESIGNATION (r[reg], reg, coding);
1997     }
1998
1999   return dst;
2000 }
2001
2002 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
2003
2004 static void
2005 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
2006      struct coding_system *coding;
2007      unsigned char *source, *destination;
2008      int src_bytes, dst_bytes;
2009 {
2010   unsigned char *src = source;
2011   unsigned char *src_end = source + src_bytes;
2012   unsigned char *dst = destination;
2013   unsigned char *dst_end = destination + dst_bytes;
2014   /* Since the maximum bytes produced by each loop is 20, we subtract 19
2015      from DST_END to assure overflow checking is necessary only at the
2016      head of loop.  */
2017   unsigned char *adjusted_dst_end = dst_end - 19;
2018   /* SRC_BASE remembers the start position in source in each loop.
2019      The loop will be exited when there's not enough source text to
2020      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2021      there's not enough destination area to produce encoded codes
2022      (within macro EMIT_BYTES).  */
2023   unsigned char *src_base;
2024   int c;
2025   Lisp_Object translation_table;
2026   Lisp_Object safe_chars;
2027
2028   safe_chars = coding_safe_chars (coding);
2029
2030   if (NILP (Venable_character_translation))
2031     translation_table = Qnil;
2032   else
2033     {
2034       translation_table = coding->translation_table_for_encode;
2035       if (NILP (translation_table))
2036         translation_table = Vstandard_translation_table_for_encode;
2037     }
2038
2039   coding->consumed_char = 0;
2040   coding->errors = 0;
2041   while (1)
2042     {
2043       src_base = src;
2044
2045       if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19)))
2046         {
2047           coding->result = CODING_FINISH_INSUFFICIENT_DST;
2048           break;
2049         }
2050
2051       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
2052           && CODING_SPEC_ISO_BOL (coding))
2053         {
2054           /* We have to produce designation sequences if any now.  */
2055           dst = encode_designation_at_bol (coding, translation_table,
2056                                            src, src_end, dst);
2057           CODING_SPEC_ISO_BOL (coding) = 0;
2058         }
2059
2060       /* Check composition start and end.  */
2061       if (coding->composing != COMPOSITION_DISABLED
2062           && coding->cmp_data_start < coding->cmp_data->used)
2063         {
2064           struct composition_data *cmp_data = coding->cmp_data;
2065           int *data = cmp_data->data + coding->cmp_data_start;
2066           int this_pos = cmp_data->char_offset + coding->consumed_char;
2067
2068           if (coding->composing == COMPOSITION_RELATIVE)
2069             {
2070               if (this_pos == data[2])
2071                 {
2072                   ENCODE_COMPOSITION_END (coding, data);
2073                   cmp_data = coding->cmp_data;
2074                   data = cmp_data->data + coding->cmp_data_start;
2075                 }
2076             }
2077           else if (COMPOSING_P (coding))
2078             {
2079               /* COMPOSITION_WITH_ALTCHARS or COMPOSITION_WITH_RULE_ALTCHAR  */
2080               if (coding->cmp_data_index == coding->cmp_data_start + data[0])
2081                 /* We have consumed components of the composition.
2082                    What follows in SRC is the compositions's base
2083                    text.  */
2084                 ENCODE_COMPOSITION_FAKE_START (coding);
2085               else
2086                 {
2087                   int c = cmp_data->data[coding->cmp_data_index++];
2088                   if (coding->composition_rule_follows)
2089                     {
2090                       ENCODE_COMPOSITION_RULE (c);
2091                       coding->composition_rule_follows = 0;
2092                     }
2093                   else
2094                     {
2095                       if (coding->flags & CODING_FLAG_ISO_SAFE
2096                           && ! CODING_SAFE_CHAR_P (safe_chars, c))
2097                         ENCODE_UNSAFE_CHARACTER (c);
2098                       else
2099                         ENCODE_ISO_CHARACTER (c);
2100                       if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)
2101                         coding->composition_rule_follows = 1;
2102                     }
2103                   continue;
2104                 }
2105             }
2106           if (!COMPOSING_P (coding))
2107             {
2108               if (this_pos == data[1])
2109                 {
2110                   ENCODE_COMPOSITION_START (coding, data);
2111                   continue;
2112                 }
2113             }
2114         }
2115
2116       ONE_MORE_CHAR (c);
2117
2118       /* Now encode the character C.  */
2119       if (c < 0x20 || c == 0x7F)
2120         {
2121           if (c == '\r')
2122             {
2123               if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2124                 {
2125                   if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2126                     ENCODE_RESET_PLANE_AND_REGISTER;
2127                   *dst++ = c;
2128                   continue;
2129                 }
2130               /* fall down to treat '\r' as '\n' ...  */
2131               c = '\n';
2132             }
2133           if (c == '\n')
2134             {
2135               if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
2136                 ENCODE_RESET_PLANE_AND_REGISTER;
2137               if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
2138                 bcopy (coding->spec.iso2022.initial_designation,
2139                        coding->spec.iso2022.current_designation,
2140                        sizeof coding->spec.iso2022.initial_designation);
2141               if (coding->eol_type == CODING_EOL_LF
2142                   || coding->eol_type == CODING_EOL_UNDECIDED)
2143                 *dst++ = ISO_CODE_LF;
2144               else if (coding->eol_type == CODING_EOL_CRLF)
2145                 *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
2146               else
2147                 *dst++ = ISO_CODE_CR;
2148               CODING_SPEC_ISO_BOL (coding) = 1;
2149             }
2150           else
2151             {
2152               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2153                 ENCODE_RESET_PLANE_AND_REGISTER;
2154               *dst++ = c;
2155             }
2156         }
2157       else if (ASCII_BYTE_P (c))
2158         ENCODE_ISO_CHARACTER (c);
2159       else if (SINGLE_BYTE_CHAR_P (c))
2160         {
2161           *dst++ = c;
2162           coding->errors++;
2163         }
2164       else if (coding->flags & CODING_FLAG_ISO_SAFE
2165                && ! CODING_SAFE_CHAR_P (safe_chars, c))
2166         ENCODE_UNSAFE_CHARACTER (c);
2167       else
2168         ENCODE_ISO_CHARACTER (c);
2169
2170       coding->consumed_char++;
2171     }
2172
2173  label_end_of_loop:
2174   coding->consumed = src_base - source;
2175   coding->produced = coding->produced_char = dst - destination;
2176 }
2177
2178 \f
2179 /*** 4. SJIS and BIG5 handlers ***/
2180
2181 /* Although SJIS and BIG5 are not ISO's coding system, they are used
2182    quite widely.  So, for the moment, Emacs supports them in the bare
2183    C code.  But, in the future, they may be supported only by CCL.  */
2184
2185 /* SJIS is a coding system encoding three character sets: ASCII, right
2186    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
2187    as is.  A character of charset katakana-jisx0201 is encoded by
2188    "position-code + 0x80".  A character of charset japanese-jisx0208
2189    is encoded in 2-byte but two position-codes are divided and shifted
2190    so that it fit in the range below.
2191
2192    --- CODE RANGE of SJIS ---
2193    (character set)      (range)
2194    ASCII                0x00 .. 0x7F
2195    KATAKANA-JISX0201    0xA0 .. 0xDF
2196    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
2197             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
2198    -------------------------------
2199
2200 */
2201
2202 /* BIG5 is a coding system encoding two character sets: ASCII and
2203    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
2204    character set and is encoded in two-byte.
2205
2206    --- CODE RANGE of BIG5 ---
2207    (character set)      (range)
2208    ASCII                0x00 .. 0x7F
2209    Big5 (1st byte)      0xA1 .. 0xFE
2210         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
2211    --------------------------
2212
2213    Since the number of characters in Big5 is larger than maximum
2214    characters in Emacs' charset (96x96), it can't be handled as one
2215    charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
2216    and `charset-big5-2'.  Both are DIMENSION2 and CHARS94.  The former
2217    contains frequently used characters and the latter contains less
2218    frequently used characters.  */
2219
2220 /* Macros to decode or encode a character of Big5 in BIG5.  B1 and B2
2221    are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2222    C1 and C2 are the 1st and 2nd position-codes of of Emacs' internal
2223    format.  CHARSET is `charset_big5_1' or `charset_big5_2'.  */
2224
2225 /* Number of Big5 characters which have the same code in 1st byte.  */
2226 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2227
2228 #define DECODE_BIG5(b1, b2, charset, c1, c2)                            \
2229   do {                                                                  \
2230     unsigned int temp                                                   \
2231       = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62);   \
2232     if (b1 < 0xC9)                                                      \
2233       charset = charset_big5_1;                                         \
2234     else                                                                \
2235       {                                                                 \
2236         charset = charset_big5_2;                                       \
2237         temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW;                          \
2238       }                                                                 \
2239     c1 = temp / (0xFF - 0xA1) + 0x21;                                   \
2240     c2 = temp % (0xFF - 0xA1) + 0x21;                                   \
2241   } while (0)
2242
2243 #define ENCODE_BIG5(charset, c1, c2, b1, b2)                            \
2244   do {                                                                  \
2245     unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21);      \
2246     if (charset == charset_big5_2)                                      \
2247       temp += BIG5_SAME_ROW * (0xC9 - 0xA1);                            \
2248     b1 = temp / BIG5_SAME_ROW + 0xA1;                                   \
2249     b2 = temp % BIG5_SAME_ROW;                                          \
2250     b2 += b2 < 0x3F ? 0x40 : 0x62;                                      \
2251   } while (0)
2252
2253 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2254    Check if a text is encoded in SJIS.  If it is, return
2255    CODING_CATEGORY_MASK_SJIS, else return 0.  */
2256
2257 int
2258 detect_coding_sjis (src, src_end)
2259      unsigned char *src, *src_end;
2260 {
2261   int c;
2262   /* Dummy for ONE_MORE_BYTE.  */
2263   struct coding_system dummy_coding;
2264   struct coding_system *coding = &dummy_coding;
2265
2266   while (1)
2267     {
2268       ONE_MORE_BYTE (c);
2269       if (c >= 0x81)
2270         {
2271           if (c <= 0x9F || (c >= 0xE0 && c <= 0xEF))
2272             {
2273               ONE_MORE_BYTE (c);
2274               if (c < 0x40 || c == 0x7F || c > 0xFC)
2275                 return 0;
2276             }
2277           else if (c > 0xDF)
2278             return 0;
2279         }
2280     }
2281  label_end_of_loop:
2282   return CODING_CATEGORY_MASK_SJIS;
2283 }
2284
2285 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2286    Check if a text is encoded in BIG5.  If it is, return
2287    CODING_CATEGORY_MASK_BIG5, else return 0.  */
2288
2289 int
2290 detect_coding_big5 (src, src_end)
2291      unsigned char *src, *src_end;
2292 {
2293   int c;
2294   /* Dummy for ONE_MORE_BYTE.  */
2295   struct coding_system dummy_coding;
2296   struct coding_system *coding = &dummy_coding;
2297
2298   while (1)
2299     {
2300       ONE_MORE_BYTE (c);
2301       if (c >= 0xA1)
2302         {
2303           ONE_MORE_BYTE (c);
2304           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
2305             return 0;
2306         }
2307     }
2308  label_end_of_loop:
2309   return CODING_CATEGORY_MASK_BIG5;
2310 }
2311
2312 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2313    Check if a text is encoded in UTF-8.  If it is, return
2314    CODING_CATEGORY_MASK_UTF_8, else return 0.  */
2315
2316 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
2317 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
2318 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
2319 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
2320 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
2321 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
2322 #define UTF_8_6_OCTET_LEADING_P(c) (((c) & 0xFE) == 0xFC)
2323
2324 int
2325 detect_coding_utf_8 (src, src_end)
2326      unsigned char *src, *src_end;
2327 {
2328   unsigned char c;
2329   int seq_maybe_bytes;
2330   /* Dummy for ONE_MORE_BYTE.  */
2331   struct coding_system dummy_coding;
2332   struct coding_system *coding = &dummy_coding;
2333
2334   while (1)
2335     {
2336       ONE_MORE_BYTE (c);
2337       if (UTF_8_1_OCTET_P (c))
2338         continue;
2339       else if (UTF_8_2_OCTET_LEADING_P (c))
2340         seq_maybe_bytes = 1;
2341       else if (UTF_8_3_OCTET_LEADING_P (c))
2342         seq_maybe_bytes = 2;
2343       else if (UTF_8_4_OCTET_LEADING_P (c))
2344         seq_maybe_bytes = 3;
2345       else if (UTF_8_5_OCTET_LEADING_P (c))
2346         seq_maybe_bytes = 4;
2347       else if (UTF_8_6_OCTET_LEADING_P (c))
2348         seq_maybe_bytes = 5;
2349       else
2350         return 0;
2351
2352       do
2353         {
2354           ONE_MORE_BYTE (c);
2355           if (!UTF_8_EXTRA_OCTET_P (c))
2356             return 0;
2357           seq_maybe_bytes--;
2358         }
2359       while (seq_maybe_bytes > 0);
2360     }
2361
2362  label_end_of_loop:
2363   return CODING_CATEGORY_MASK_UTF_8;
2364 }
2365
2366 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2367    Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
2368    Little Endian (otherwise).  If it is, return
2369    CODING_CATEGORY_MASK_UTF_16_BE or CODING_CATEGORY_MASK_UTF_16_LE,
2370    else return 0.  */
2371
2372 #define UTF_16_INVALID_P(val)   \
2373   (((val) == 0xFFFE)            \
2374    || ((val) == 0xFFFF))
2375
2376 #define UTF_16_HIGH_SURROGATE_P(val) \
2377   (((val) & 0xD800) == 0xD800)
2378
2379 #define UTF_16_LOW_SURROGATE_P(val) \
2380   (((val) & 0xDC00) == 0xDC00)
2381
2382 int
2383 detect_coding_utf_16 (src, src_end)
2384      unsigned char *src, *src_end;
2385 {
2386   unsigned char c1, c2;
2387   /* Dummy for TWO_MORE_BYTES.  */
2388   struct coding_system dummy_coding;
2389   struct coding_system *coding = &dummy_coding;
2390
2391   TWO_MORE_BYTES (c1, c2);
2392
2393   if ((c1 == 0xFF) && (c2 == 0xFE))
2394     return CODING_CATEGORY_MASK_UTF_16_LE;
2395   else if ((c1 == 0xFE) && (c2 == 0xFF))
2396     return CODING_CATEGORY_MASK_UTF_16_BE;
2397
2398  label_end_of_loop:
2399   return 0;
2400 }
2401
2402 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2403    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
2404
2405 static void
2406 decode_coding_sjis_big5 (coding, source, destination,
2407                          src_bytes, dst_bytes, sjis_p)
2408      struct coding_system *coding;
2409      unsigned char *source, *destination;
2410      int src_bytes, dst_bytes;
2411      int sjis_p;
2412 {
2413   unsigned char *src = source;
2414   unsigned char *src_end = source + src_bytes;
2415   unsigned char *dst = destination;
2416   unsigned char *dst_end = destination + dst_bytes;
2417   /* SRC_BASE remembers the start position in source in each loop.
2418      The loop will be exited when there's not enough source code
2419      (within macro ONE_MORE_BYTE), or when there's not enough
2420      destination area to produce a character (within macro
2421      EMIT_CHAR).  */
2422   unsigned char *src_base;
2423   Lisp_Object translation_table;
2424
2425   if (NILP (Venable_character_translation))
2426     translation_table = Qnil;
2427   else
2428     {
2429       translation_table = coding->translation_table_for_decode;
2430       if (NILP (translation_table))
2431         translation_table = Vstandard_translation_table_for_decode;
2432     }
2433
2434   coding->produced_char = 0;
2435   while (1)
2436     {
2437       int c, charset, c1, c2;
2438
2439       src_base = src;
2440       ONE_MORE_BYTE (c1);
2441
2442       if (c1 < 0x80)
2443         {
2444           charset = CHARSET_ASCII;
2445           if (c1 < 0x20)
2446             {
2447               if (c1 == '\r')
2448                 {
2449                   if (coding->eol_type == CODING_EOL_CRLF)
2450                     {
2451                       ONE_MORE_BYTE (c2);
2452                       if (c2 == '\n')
2453                         c1 = c2;
2454                       else if (coding->mode
2455                                & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2456                         {
2457                           coding->result = CODING_FINISH_INCONSISTENT_EOL;
2458                           goto label_end_of_loop;
2459                         }
2460                       else
2461                         /* To process C2 again, SRC is subtracted by 1.  */
2462                         src--;
2463                     }
2464                   else if (coding->eol_type == CODING_EOL_CR)
2465                     c1 = '\n';
2466                 }
2467               else if (c1 == '\n'
2468                        && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2469                        && (coding->eol_type == CODING_EOL_CR
2470                            || coding->eol_type == CODING_EOL_CRLF))
2471                 {
2472                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
2473                   goto label_end_of_loop;
2474                 }
2475             }
2476         }
2477       else
2478         {
2479           if (sjis_p)
2480             {
2481               if (c1 >= 0xF0)
2482                 goto label_invalid_code;
2483               if (c1 < 0xA0 || c1 >= 0xE0)
2484                 {
2485                   /* SJIS -> JISX0208 */
2486                   ONE_MORE_BYTE (c2);
2487                   if (c2 < 0x40 || c2 == 0x7F || c2 > 0xFC)
2488                     goto label_invalid_code;
2489                   DECODE_SJIS (c1, c2, c1, c2);
2490                   charset = charset_jisx0208;
2491                 }
2492               else
2493                 /* SJIS -> JISX0201-Kana */
2494                 charset = charset_katakana_jisx0201;
2495             }
2496           else
2497             {
2498               /* BIG5 -> Big5 */
2499               if (c1 < 0xA1 || c1 > 0xFE)
2500                 goto label_invalid_code;
2501               ONE_MORE_BYTE (c2);
2502               if (c2 < 0x40 || (c2 > 0x7E && c2 < 0xA1) || c2 > 0xFE)
2503                 goto label_invalid_code;
2504               DECODE_BIG5 (c1, c2, charset, c1, c2);
2505             }
2506         }
2507
2508       c = DECODE_ISO_CHARACTER (charset, c1, c2);
2509       EMIT_CHAR (c);
2510       continue;
2511
2512     label_invalid_code:
2513       coding->errors++;
2514       src = src_base;
2515       c = *src++;
2516       EMIT_CHAR (c);
2517     }
2518
2519  label_end_of_loop:
2520   coding->consumed = coding->consumed_char = src_base - source;
2521   coding->produced = dst - destination;
2522   return;
2523 }
2524
2525 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
2526    This function can encode charsets `ascii', `katakana-jisx0201',
2527    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
2528    are sure that all these charsets are registered as official charset
2529    (i.e. do not have extended leading-codes).  Characters of other
2530    charsets are produced without any encoding.  If SJIS_P is 1, encode
2531    SJIS text, else encode BIG5 text.  */
2532
2533 static void
2534 encode_coding_sjis_big5 (coding, source, destination,
2535                          src_bytes, dst_bytes, sjis_p)
2536      struct coding_system *coding;
2537      unsigned char *source, *destination;
2538      int src_bytes, dst_bytes;
2539      int sjis_p;
2540 {
2541   unsigned char *src = source;
2542   unsigned char *src_end = source + src_bytes;
2543   unsigned char *dst = destination;
2544   unsigned char *dst_end = destination + dst_bytes;
2545   /* SRC_BASE remembers the start position in source in each loop.
2546      The loop will be exited when there's not enough source text to
2547      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2548      there's not enough destination area to produce encoded codes
2549      (within macro EMIT_BYTES).  */
2550   unsigned char *src_base;
2551   Lisp_Object translation_table;
2552
2553   if (NILP (Venable_character_translation))
2554     translation_table = Qnil;
2555   else
2556     {
2557       translation_table = coding->translation_table_for_encode;
2558       if (NILP (translation_table))
2559         translation_table = Vstandard_translation_table_for_encode;
2560     }
2561
2562   while (1)
2563     {
2564       int c, charset, c1, c2;
2565
2566       src_base = src;
2567       ONE_MORE_CHAR (c);
2568
2569       /* Now encode the character C.  */
2570       if (SINGLE_BYTE_CHAR_P (c))
2571         {
2572           switch (c)
2573             {
2574             case '\r':
2575               if (!coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
2576                 {
2577                   EMIT_ONE_BYTE (c);
2578                   break;
2579                 }
2580               c = '\n';
2581             case '\n':
2582               if (coding->eol_type == CODING_EOL_CRLF)
2583                 {
2584                   EMIT_TWO_BYTES ('\r', c);
2585                   break;
2586                 }
2587               else if (coding->eol_type == CODING_EOL_CR)
2588                 c = '\r';
2589             default:
2590               EMIT_ONE_BYTE (c);
2591             }
2592         }
2593       else
2594         {
2595           SPLIT_CHAR (c, charset, c1, c2);
2596           if (sjis_p)
2597             {
2598               if (charset == charset_jisx0208
2599                   || charset == charset_jisx0208_1978)
2600                 {
2601                   ENCODE_SJIS (c1, c2, c1, c2);
2602                   EMIT_TWO_BYTES (c1, c2);
2603                 }
2604               else if (charset == charset_katakana_jisx0201)
2605                 EMIT_ONE_BYTE (c1 | 0x80);
2606               else if (charset == charset_latin_jisx0201)
2607                 EMIT_ONE_BYTE (c1);
2608               else
2609                 /* There's no way other than producing the internal
2610                    codes as is.  */
2611                 EMIT_BYTES (src_base, src);
2612             }
2613           else
2614             {
2615               if (charset == charset_big5_1 || charset == charset_big5_2)
2616                 {
2617                   ENCODE_BIG5 (charset, c1, c2, c1, c2);
2618                   EMIT_TWO_BYTES (c1, c2);
2619                 }
2620               else
2621                 /* There's no way other than producing the internal
2622                    codes as is.  */
2623                 EMIT_BYTES (src_base, src);
2624             }
2625         }
2626       coding->consumed_char++;
2627     }
2628
2629  label_end_of_loop:
2630   coding->consumed = src_base - source;
2631   coding->produced = coding->produced_char = dst - destination;
2632 }
2633
2634 \f
2635 /*** 5. CCL handlers ***/
2636
2637 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2638    Check if a text is encoded in a coding system of which
2639    encoder/decoder are written in CCL program.  If it is, return
2640    CODING_CATEGORY_MASK_CCL, else return 0.  */
2641
2642 int
2643 detect_coding_ccl (src, src_end)
2644      unsigned char *src, *src_end;
2645 {
2646   unsigned char *valid;
2647   int c;
2648   /* Dummy for ONE_MORE_BYTE.  */
2649   struct coding_system dummy_coding;
2650   struct coding_system *coding = &dummy_coding;
2651
2652   /* No coding system is assigned to coding-category-ccl.  */
2653   if (!coding_system_table[CODING_CATEGORY_IDX_CCL])
2654     return 0;
2655
2656   valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
2657   while (1)
2658     {
2659       ONE_MORE_BYTE (c);
2660       if (! valid[c])
2661         return 0;
2662     }
2663  label_end_of_loop:
2664   return CODING_CATEGORY_MASK_CCL;
2665 }
2666
2667 \f
2668 /*** 6. End-of-line handlers ***/
2669
2670 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2671
2672 static void
2673 decode_eol (coding, source, destination, src_bytes, dst_bytes)
2674      struct coding_system *coding;
2675      unsigned char *source, *destination;
2676      int src_bytes, dst_bytes;
2677 {
2678   unsigned char *src = source;
2679   unsigned char *dst = destination;
2680   unsigned char *src_end = src + src_bytes;
2681   unsigned char *dst_end = dst + dst_bytes;
2682   Lisp_Object translation_table;
2683   /* SRC_BASE remembers the start position in source in each loop.
2684      The loop will be exited when there's not enough source code
2685      (within macro ONE_MORE_BYTE), or when there's not enough
2686      destination area to produce a character (within macro
2687      EMIT_CHAR).  */
2688   unsigned char *src_base;
2689   int c;
2690
2691   translation_table = Qnil;
2692   switch (coding->eol_type)
2693     {
2694     case CODING_EOL_CRLF:
2695       while (1)
2696         {
2697           src_base = src;
2698           ONE_MORE_BYTE (c);
2699           if (c == '\r')
2700             {
2701               ONE_MORE_BYTE (c);
2702               if (c != '\n')
2703                 {
2704                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2705                     {
2706                       coding->result = CODING_FINISH_INCONSISTENT_EOL;
2707                       goto label_end_of_loop;
2708                     }
2709                   src--;
2710                   c = '\r';
2711                 }
2712             }
2713           else if (c == '\n'
2714                    && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
2715             {
2716               coding->result = CODING_FINISH_INCONSISTENT_EOL;
2717               goto label_end_of_loop;
2718             }
2719           EMIT_CHAR (c);
2720         }
2721       break;
2722
2723     case CODING_EOL_CR:
2724       while (1)
2725         {
2726           src_base = src;
2727           ONE_MORE_BYTE (c);
2728           if (c == '\n')
2729             {
2730               if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2731                 {
2732                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
2733                   goto label_end_of_loop;
2734                 }
2735             }
2736           else if (c == '\r')
2737             c = '\n';
2738           EMIT_CHAR (c);
2739         }
2740       break;
2741
2742     default:                    /* no need for EOL handling */
2743       while (1)
2744         {
2745           src_base = src;
2746           ONE_MORE_BYTE (c);
2747           EMIT_CHAR (c);
2748         }
2749     }
2750
2751  label_end_of_loop:
2752   coding->consumed = coding->consumed_char = src_base - source;
2753   coding->produced = dst - destination;
2754   return;
2755 }
2756
2757 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  Encode
2758    format of end-of-line according to `coding->eol_type'.  It also
2759    convert multibyte form 8-bit characers to unibyte if
2760    CODING->src_multibyte is nonzero.  If `coding->mode &
2761    CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code '\r' in source text
2762    also means end-of-line.  */
2763
2764 static void
2765 encode_eol (coding, source, destination, src_bytes, dst_bytes)
2766      struct coding_system *coding;
2767      unsigned char *source, *destination;
2768      int src_bytes, dst_bytes;
2769 {
2770   unsigned char *src = source;
2771   unsigned char *dst = destination;
2772   unsigned char *src_end = src + src_bytes;
2773   unsigned char *dst_end = dst + dst_bytes;
2774   Lisp_Object translation_table;
2775   /* SRC_BASE remembers the start position in source in each loop.
2776      The loop will be exited when there's not enough source text to
2777      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2778      there's not enough destination area to produce encoded codes
2779      (within macro EMIT_BYTES).  */
2780   unsigned char *src_base;
2781   int c;
2782   int selective_display = coding->mode & CODING_MODE_SELECTIVE_DISPLAY;
2783
2784   translation_table = Qnil;
2785   if (coding->src_multibyte
2786       && *(src_end - 1) == LEADING_CODE_8_BIT_CONTROL)
2787     {
2788       src_end--;
2789       src_bytes--;
2790       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
2791     }
2792
2793   if (coding->eol_type == CODING_EOL_CRLF)
2794     {
2795       while (src < src_end)
2796         {
2797           src_base = src;
2798           c = *src++;
2799           if (c >= 0x20)
2800             EMIT_ONE_BYTE (c);
2801           else if (c == '\n' || (c == '\r' && selective_display))
2802             EMIT_TWO_BYTES ('\r', '\n');
2803           else
2804             EMIT_ONE_BYTE (c);
2805         }
2806       src_base = src;
2807     label_end_of_loop:
2808       ;
2809     }
2810   else
2811     {
2812       if (!dst_bytes || src_bytes <= dst_bytes)
2813         {
2814           safe_bcopy (src, dst, src_bytes);
2815           src_base = src_end;
2816           dst += src_bytes;
2817         }
2818       else
2819         {
2820           if (coding->src_multibyte
2821               && *(src + dst_bytes - 1) == LEADING_CODE_8_BIT_CONTROL)
2822             dst_bytes--;
2823           safe_bcopy (src, dst, dst_bytes);
2824           src_base = src + dst_bytes;
2825           dst = destination + dst_bytes;
2826           coding->result = CODING_FINISH_INSUFFICIENT_DST;
2827         }
2828       if (coding->eol_type == CODING_EOL_CR)
2829         {
2830           for (src = destination; src < dst; src++)
2831             if (*src == '\n') *src = '\r';
2832         }
2833       else if (selective_display)
2834         {
2835           for (src = destination; src < dst; src++)
2836             if (*src == '\r') *src = '\n';
2837         }
2838     }
2839   if (coding->src_multibyte)
2840     dst = destination + str_as_unibyte (destination, dst - destination);
2841
2842   coding->consumed = src_base - source;
2843   coding->produced = dst - destination;
2844   coding->produced_char = coding->produced;
2845 }
2846
2847 \f
2848 /*** 7. C library functions ***/
2849
2850 /* In Emacs Lisp, coding system is represented by a Lisp symbol which
2851    has a property `coding-system'.  The value of this property is a
2852    vector of length 5 (called as coding-vector).  Among elements of
2853    this vector, the first (element[0]) and the fifth (element[4])
2854    carry important information for decoding/encoding.  Before
2855    decoding/encoding, this information should be set in fields of a
2856    structure of type `coding_system'.
2857
2858    A value of property `coding-system' can be a symbol of another
2859    subsidiary coding-system.  In that case, Emacs gets coding-vector
2860    from that symbol.
2861
2862    `element[0]' contains information to be set in `coding->type'.  The
2863    value and its meaning is as follows:
2864
2865    0 -- coding_type_emacs_mule
2866    1 -- coding_type_sjis
2867    2 -- coding_type_iso2022
2868    3 -- coding_type_big5
2869    4 -- coding_type_ccl encoder/decoder written in CCL
2870    nil -- coding_type_no_conversion
2871    t -- coding_type_undecided (automatic conversion on decoding,
2872                                no-conversion on encoding)
2873
2874    `element[4]' contains information to be set in `coding->flags' and
2875    `coding->spec'.  The meaning varies by `coding->type'.
2876
2877    If `coding->type' is `coding_type_iso2022', element[4] is a vector
2878    of length 32 (of which the first 13 sub-elements are used now).
2879    Meanings of these sub-elements are:
2880
2881    sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
2882         If the value is an integer of valid charset, the charset is
2883         assumed to be designated to graphic register N initially.
2884
2885         If the value is minus, it is a minus value of charset which
2886         reserves graphic register N, which means that the charset is
2887         not designated initially but should be designated to graphic
2888         register N just before encoding a character in that charset.
2889
2890         If the value is nil, graphic register N is never used on
2891         encoding.
2892
2893    sub-element[N] where N is 4 through 11: to be set in `coding->flags'
2894         Each value takes t or nil.  See the section ISO2022 of
2895         `coding.h' for more information.
2896
2897    If `coding->type' is `coding_type_big5', element[4] is t to denote
2898    BIG5-ETen or nil to denote BIG5-HKU.
2899
2900    If `coding->type' takes the other value, element[4] is ignored.
2901
2902    Emacs Lisp's coding system also carries information about format of
2903    end-of-line in a value of property `eol-type'.  If the value is
2904    integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
2905    means CODING_EOL_CR.  If it is not integer, it should be a vector
2906    of subsidiary coding systems of which property `eol-type' has one
2907    of above values.
2908
2909 */
2910
2911 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
2912    and set it in CODING.  If CODING_SYSTEM_SYMBOL is invalid, CODING
2913    is setup so that no conversion is necessary and return -1, else
2914    return 0.  */
2915
2916 int
2917 setup_coding_system (coding_system, coding)
2918      Lisp_Object coding_system;
2919      struct coding_system *coding;
2920 {
2921   Lisp_Object coding_spec, coding_type, eol_type, plist;
2922   Lisp_Object val;
2923   int i;
2924
2925   /* Initialize some fields required for all kinds of coding systems.  */
2926   coding->symbol = coding_system;
2927   coding->common_flags = 0;
2928   coding->mode = 0;
2929   coding->heading_ascii = -1;
2930   coding->post_read_conversion = coding->pre_write_conversion = Qnil;
2931   coding->composing = COMPOSITION_DISABLED;
2932   coding->cmp_data = NULL;
2933
2934   if (NILP (coding_system))
2935     goto label_invalid_coding_system;
2936
2937   coding_spec = Fget (coding_system, Qcoding_system);
2938
2939   if (!VECTORP (coding_spec)
2940       || XVECTOR (coding_spec)->size != 5
2941       || !CONSP (XVECTOR (coding_spec)->contents[3]))
2942     goto label_invalid_coding_system;
2943
2944   eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
2945   if (VECTORP (eol_type))
2946     {
2947       coding->eol_type = CODING_EOL_UNDECIDED;
2948       coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
2949     }
2950   else if (XFASTINT (eol_type) == 1)
2951     {
2952       coding->eol_type = CODING_EOL_CRLF;
2953       coding->common_flags
2954         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2955     }
2956   else if (XFASTINT (eol_type) == 2)
2957     {
2958       coding->eol_type = CODING_EOL_CR;
2959       coding->common_flags
2960         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2961     }
2962   else
2963     coding->eol_type = CODING_EOL_LF;
2964
2965   coding_type = XVECTOR (coding_spec)->contents[0];
2966   /* Try short cut.  */
2967   if (SYMBOLP (coding_type))
2968     {
2969       if (EQ (coding_type, Qt))
2970         {
2971           coding->type = coding_type_undecided;
2972           coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
2973         }
2974       else
2975         coding->type = coding_type_no_conversion;
2976       return 0;
2977     }
2978
2979   /* Get values of coding system properties:
2980      `post-read-conversion', `pre-write-conversion',
2981      `translation-table-for-decode', `translation-table-for-encode'.  */
2982   plist = XVECTOR (coding_spec)->contents[3];
2983   /* Pre & post conversion functions should be disabled if
2984      inhibit_eol_conversion is nozero.  This is the case that a code
2985      conversion function is called while those functions are running.  */
2986   if (! inhibit_pre_post_conversion)
2987     {
2988       coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
2989       coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
2990     }
2991   val = Fplist_get (plist, Qtranslation_table_for_decode);
2992   if (SYMBOLP (val))
2993     val = Fget (val, Qtranslation_table_for_decode);
2994   coding->translation_table_for_decode = CHAR_TABLE_P (val) ? val : Qnil;
2995   val = Fplist_get (plist, Qtranslation_table_for_encode);
2996   if (SYMBOLP (val))
2997     val = Fget (val, Qtranslation_table_for_encode);
2998   coding->translation_table_for_encode = CHAR_TABLE_P (val) ? val : Qnil;
2999   val = Fplist_get (plist, Qcoding_category);
3000   if (!NILP (val))
3001     {
3002       val = Fget (val, Qcoding_category_index);
3003       if (INTEGERP (val))
3004         coding->category_idx = XINT (val);
3005       else
3006         goto label_invalid_coding_system;
3007     }
3008   else
3009     goto label_invalid_coding_system;
3010
3011   /* If the coding system has non-nil `composition' property, enable
3012      composition handling.  */
3013   val = Fplist_get (plist, Qcomposition);
3014   if (!NILP (val))
3015     coding->composing = COMPOSITION_NO;
3016
3017   switch (XFASTINT (coding_type))
3018     {
3019     case 0:
3020       coding->type = coding_type_emacs_mule;
3021       if (!NILP (coding->post_read_conversion))
3022         coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
3023       if (!NILP (coding->pre_write_conversion))
3024         coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
3025       break;
3026
3027     case 1:
3028       coding->type = coding_type_sjis;
3029       coding->common_flags
3030         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3031       break;
3032
3033     case 2:
3034       coding->type = coding_type_iso2022;
3035       coding->common_flags
3036         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3037       {
3038         Lisp_Object val, temp;
3039         Lisp_Object *flags;
3040         int i, charset, reg_bits = 0;
3041
3042         val = XVECTOR (coding_spec)->contents[4];
3043
3044         if (!VECTORP (val) || XVECTOR (val)->size != 32)
3045           goto label_invalid_coding_system;
3046
3047         flags = XVECTOR (val)->contents;
3048         coding->flags
3049           = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
3050              | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
3051              | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
3052              | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
3053              | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
3054              | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
3055              | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
3056              | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
3057              | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
3058              | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
3059              | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
3060              | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
3061              | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
3062              );
3063
3064         /* Invoke graphic register 0 to plane 0.  */
3065         CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
3066         /* Invoke graphic register 1 to plane 1 if we can use full 8-bit.  */
3067         CODING_SPEC_ISO_INVOCATION (coding, 1)
3068           = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
3069         /* Not single shifting at first.  */
3070         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
3071         /* Beginning of buffer should also be regarded as bol. */
3072         CODING_SPEC_ISO_BOL (coding) = 1;
3073
3074         for (charset = 0; charset <= MAX_CHARSET; charset++)
3075           CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
3076         val = Vcharset_revision_alist;
3077         while (CONSP (val))
3078           {
3079             charset = get_charset_id (Fcar_safe (XCAR (val)));
3080             if (charset >= 0
3081                 && (temp = Fcdr_safe (XCAR (val)), INTEGERP (temp))
3082                 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
3083               CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
3084             val = XCDR (val);
3085           }
3086
3087         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
3088            FLAGS[REG] can be one of below:
3089                 integer CHARSET: CHARSET occupies register I,
3090                 t: designate nothing to REG initially, but can be used
3091                   by any charsets,
3092                 list of integer, nil, or t: designate the first
3093                   element (if integer) to REG initially, the remaining
3094                   elements (if integer) is designated to REG on request,
3095                   if an element is t, REG can be used by any charsets,
3096                 nil: REG is never used.  */
3097         for (charset = 0; charset <= MAX_CHARSET; charset++)
3098           CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3099             = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
3100         for (i = 0; i < 4; i++)
3101           {
3102             if (INTEGERP (flags[i])
3103                 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))
3104                 || (charset = get_charset_id (flags[i])) >= 0)
3105               {
3106                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3107                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
3108               }
3109             else if (EQ (flags[i], Qt))
3110               {
3111                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3112                 reg_bits |= 1 << i;
3113                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3114               }
3115             else if (CONSP (flags[i]))
3116               {
3117                 Lisp_Object tail;
3118                 tail = flags[i];
3119
3120                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3121                 if (INTEGERP (XCAR (tail))
3122                     && (charset = XINT (XCAR (tail)),
3123                         CHARSET_VALID_P (charset))
3124                     || (charset = get_charset_id (XCAR (tail))) >= 0)
3125                   {
3126                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3127                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
3128                   }
3129                 else
3130                   CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3131                 tail = XCDR (tail);
3132                 while (CONSP (tail))
3133                   {
3134                     if (INTEGERP (XCAR (tail))
3135                         && (charset = XINT (XCAR (tail)),
3136                             CHARSET_VALID_P (charset))
3137                         || (charset = get_charset_id (XCAR (tail))) >= 0)
3138                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3139                         = i;
3140                     else if (EQ (XCAR (tail), Qt))
3141                       reg_bits |= 1 << i;
3142                     tail = XCDR (tail);
3143                   }
3144               }
3145             else
3146               CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3147
3148             CODING_SPEC_ISO_DESIGNATION (coding, i)
3149               = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
3150           }
3151
3152         if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
3153           {
3154             /* REG 1 can be used only by locking shift in 7-bit env.  */
3155             if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
3156               reg_bits &= ~2;
3157             if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
3158               /* Without any shifting, only REG 0 and 1 can be used.  */
3159               reg_bits &= 3;
3160           }
3161
3162         if (reg_bits)
3163           for (charset = 0; charset <= MAX_CHARSET; charset++)
3164             {
3165               if (CHARSET_VALID_P (charset)
3166                   && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3167                       == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
3168                 {
3169                   /* There exist some default graphic registers to be
3170                      used by CHARSET.  */
3171
3172                   /* We had better avoid designating a charset of
3173                      CHARS96 to REG 0 as far as possible.  */
3174                   if (CHARSET_CHARS (charset) == 96)
3175                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3176                       = (reg_bits & 2
3177                          ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
3178                   else
3179                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3180                       = (reg_bits & 1
3181                          ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
3182                 }
3183             }
3184       }
3185       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3186       coding->spec.iso2022.last_invalid_designation_register = -1;
3187       break;
3188
3189     case 3:
3190       coding->type = coding_type_big5;
3191       coding->common_flags
3192         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3193       coding->flags
3194         = (NILP (XVECTOR (coding_spec)->contents[4])
3195            ? CODING_FLAG_BIG5_HKU
3196            : CODING_FLAG_BIG5_ETEN);
3197       break;
3198
3199     case 4:
3200       coding->type = coding_type_ccl;
3201       coding->common_flags
3202         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3203       {
3204         val = XVECTOR (coding_spec)->contents[4];
3205         if (! CONSP (val)
3206             || setup_ccl_program (&(coding->spec.ccl.decoder),
3207                                   XCAR (val)) < 0
3208             || setup_ccl_program (&(coding->spec.ccl.encoder),
3209                                   XCDR (val)) < 0)
3210           goto label_invalid_coding_system;
3211
3212         bzero (coding->spec.ccl.valid_codes, 256);
3213         val = Fplist_get (plist, Qvalid_codes);
3214         if (CONSP (val))
3215           {
3216             Lisp_Object this;
3217
3218             for (; CONSP (val); val = XCDR (val))
3219               {
3220                 this = XCAR (val);
3221                 if (INTEGERP (this)
3222                     && XINT (this) >= 0 && XINT (this) < 256)
3223                   coding->spec.ccl.valid_codes[XINT (this)] = 1;
3224                 else if (CONSP (this)
3225                          && INTEGERP (XCAR (this))
3226                          && INTEGERP (XCDR (this)))
3227                   {
3228                     int start = XINT (XCAR (this));
3229                     int end = XINT (XCDR (this));
3230
3231                     if (start >= 0 && start <= end && end < 256)
3232                       while (start <= end)
3233                         coding->spec.ccl.valid_codes[start++] = 1;
3234                   }
3235               }
3236           }
3237       }
3238       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3239       coding->spec.ccl.cr_carryover = 0;
3240       break;
3241
3242     case 5:
3243       coding->type = coding_type_raw_text;
3244       break;
3245
3246     default:
3247       goto label_invalid_coding_system;
3248     }
3249   return 0;
3250
3251  label_invalid_coding_system:
3252   coding->type = coding_type_no_conversion;
3253   coding->category_idx = CODING_CATEGORY_IDX_BINARY;
3254   coding->common_flags = 0;
3255   coding->eol_type = CODING_EOL_LF;
3256   coding->pre_write_conversion = coding->post_read_conversion = Qnil;
3257   return -1;
3258 }
3259
3260 /* Free memory blocks allocated for storing composition information.  */
3261
3262 void
3263 coding_free_composition_data (coding)
3264      struct coding_system *coding;
3265 {
3266   struct composition_data *cmp_data = coding->cmp_data, *next;
3267
3268   if (!cmp_data)
3269     return;
3270   /* Memory blocks are chained.  At first, rewind to the first, then,
3271      free blocks one by one.  */
3272   while (cmp_data->prev)
3273     cmp_data = cmp_data->prev;
3274   while (cmp_data)
3275     {
3276       next = cmp_data->next;
3277       xfree (cmp_data);
3278       cmp_data = next;
3279     }
3280   coding->cmp_data = NULL;
3281 }
3282
3283 /* Set `char_offset' member of all memory blocks pointed by
3284    coding->cmp_data to POS.  */
3285
3286 void
3287 coding_adjust_composition_offset (coding, pos)
3288      struct coding_system *coding;
3289      int pos;
3290 {
3291   struct composition_data *cmp_data;
3292
3293   for (cmp_data = coding->cmp_data; cmp_data; cmp_data = cmp_data->next)
3294     cmp_data->char_offset = pos;
3295 }
3296
3297 /* Setup raw-text or one of its subsidiaries in the structure
3298    coding_system CODING according to the already setup value eol_type
3299    in CODING.  CODING should be setup for some coding system in
3300    advance.  */
3301
3302 void
3303 setup_raw_text_coding_system (coding)
3304      struct coding_system *coding;
3305 {
3306   if (coding->type != coding_type_raw_text)
3307     {
3308       coding->symbol = Qraw_text;
3309       coding->type = coding_type_raw_text;
3310       if (coding->eol_type != CODING_EOL_UNDECIDED)
3311         {
3312           Lisp_Object subsidiaries;
3313           subsidiaries = Fget (Qraw_text, Qeol_type);
3314
3315           if (VECTORP (subsidiaries)
3316               && XVECTOR (subsidiaries)->size == 3)
3317             coding->symbol
3318               = XVECTOR (subsidiaries)->contents[coding->eol_type];
3319         }
3320       setup_coding_system (coding->symbol, coding);
3321     }
3322   return;
3323 }
3324
3325 /* Emacs has a mechanism to automatically detect a coding system if it
3326    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
3327    it's impossible to distinguish some coding systems accurately
3328    because they use the same range of codes.  So, at first, coding
3329    systems are categorized into 7, those are:
3330
3331    o coding-category-emacs-mule
3332
3333         The category for a coding system which has the same code range
3334         as Emacs' internal format.  Assigned the coding-system (Lisp
3335         symbol) `emacs-mule' by default.
3336
3337    o coding-category-sjis
3338
3339         The category for a coding system which has the same code range
3340         as SJIS.  Assigned the coding-system (Lisp
3341         symbol) `japanese-shift-jis' by default.
3342
3343    o coding-category-iso-7
3344
3345         The category for a coding system which has the same code range
3346         as ISO2022 of 7-bit environment.  This doesn't use any locking
3347         shift and single shift functions.  This can encode/decode all
3348         charsets.  Assigned the coding-system (Lisp symbol)
3349         `iso-2022-7bit' by default.
3350
3351    o coding-category-iso-7-tight
3352
3353         Same as coding-category-iso-7 except that this can
3354         encode/decode only the specified charsets.
3355
3356    o coding-category-iso-8-1
3357
3358         The category for a coding system which has the same code range
3359         as ISO2022 of 8-bit environment and graphic plane 1 used only
3360         for DIMENSION1 charset.  This doesn't use any locking shift
3361         and single shift functions.  Assigned the coding-system (Lisp
3362         symbol) `iso-latin-1' by default.
3363
3364    o coding-category-iso-8-2
3365
3366         The category for a coding system which has the same code range
3367         as ISO2022 of 8-bit environment and graphic plane 1 used only
3368         for DIMENSION2 charset.  This doesn't use any locking shift
3369         and single shift functions.  Assigned the coding-system (Lisp
3370         symbol) `japanese-iso-8bit' by default.
3371
3372    o coding-category-iso-7-else
3373
3374         The category for a coding system which has the same code range
3375         as ISO2022 of 7-bit environemnt but uses locking shift or
3376         single shift functions.  Assigned the coding-system (Lisp
3377         symbol) `iso-2022-7bit-lock' by default.
3378
3379    o coding-category-iso-8-else
3380
3381         The category for a coding system which has the same code range
3382         as ISO2022 of 8-bit environemnt but uses locking shift or
3383         single shift functions.  Assigned the coding-system (Lisp
3384         symbol) `iso-2022-8bit-ss2' by default.
3385
3386    o coding-category-big5
3387
3388         The category for a coding system which has the same code range
3389         as BIG5.  Assigned the coding-system (Lisp symbol)
3390         `cn-big5' by default.
3391
3392    o coding-category-utf-8
3393
3394         The category for a coding system which has the same code range
3395         as UTF-8 (cf. RFC2279).  Assigned the coding-system (Lisp
3396         symbol) `utf-8' by default.
3397
3398    o coding-category-utf-16-be
3399
3400         The category for a coding system in which a text has an
3401         Unicode signature (cf. Unicode Standard) in the order of BIG
3402         endian at the head.  Assigned the coding-system (Lisp symbol)
3403         `utf-16-be' by default.
3404
3405    o coding-category-utf-16-le
3406
3407         The category for a coding system in which a text has an
3408         Unicode signature (cf. Unicode Standard) in the order of
3409         LITTLE endian at the head.  Assigned the coding-system (Lisp
3410         symbol) `utf-16-le' by default.
3411
3412    o coding-category-ccl
3413
3414         The category for a coding system of which encoder/decoder is
3415         written in CCL programs.  The default value is nil, i.e., no
3416         coding system is assigned.
3417
3418    o coding-category-binary
3419
3420         The category for a coding system not categorized in any of the
3421         above.  Assigned the coding-system (Lisp symbol)
3422         `no-conversion' by default.
3423
3424    Each of them is a Lisp symbol and the value is an actual
3425    `coding-system's (this is also a Lisp symbol) assigned by a user.
3426    What Emacs does actually is to detect a category of coding system.
3427    Then, it uses a `coding-system' assigned to it.  If Emacs can't
3428    decide only one possible category, it selects a category of the
3429    highest priority.  Priorities of categories are also specified by a
3430    user in a Lisp variable `coding-category-list'.
3431
3432 */
3433
3434 static
3435 int ascii_skip_code[256];
3436
3437 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
3438    If it detects possible coding systems, return an integer in which
3439    appropriate flag bits are set.  Flag bits are defined by macros
3440    CODING_CATEGORY_MASK_XXX in `coding.h'.  If PRIORITIES is non-NULL,
3441    it should point the table `coding_priorities'.  In that case, only
3442    the flag bit for a coding system of the highest priority is set in
3443    the returned value.
3444
3445    How many ASCII characters are at the head is returned as *SKIP.  */
3446
3447 static int
3448 detect_coding_mask (source, src_bytes, priorities, skip)
3449      unsigned char *source;
3450      int src_bytes, *priorities, *skip;
3451 {
3452   register unsigned char c;
3453   unsigned char *src = source, *src_end = source + src_bytes;
3454   unsigned int mask, utf16_examined_p, iso2022_examined_p;
3455   int i, idx;
3456
3457   /* At first, skip all ASCII characters and control characters except
3458      for three ISO2022 specific control characters.  */
3459   ascii_skip_code[ISO_CODE_SO] = 0;
3460   ascii_skip_code[ISO_CODE_SI] = 0;
3461   ascii_skip_code[ISO_CODE_ESC] = 0;
3462
3463  label_loop_detect_coding:
3464   while (src < src_end && ascii_skip_code[*src]) src++;
3465   *skip = src - source;
3466
3467   if (src >= src_end)
3468     /* We found nothing other than ASCII.  There's nothing to do.  */
3469     return 0;
3470
3471   c = *src;
3472   /* The text seems to be encoded in some multilingual coding system.
3473      Now, try to find in which coding system the text is encoded.  */
3474   if (c < 0x80)
3475     {
3476       /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
3477       /* C is an ISO2022 specific control code of C0.  */
3478       mask = detect_coding_iso2022 (src, src_end);
3479       if (mask == 0)
3480         {
3481           /* No valid ISO2022 code follows C.  Try again.  */
3482           src++;
3483           if (c == ISO_CODE_ESC)
3484             ascii_skip_code[ISO_CODE_ESC] = 1;
3485           else
3486             ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1;
3487           goto label_loop_detect_coding;
3488         }
3489       if (priorities)
3490         {
3491           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3492             {
3493               if (mask & priorities[i])
3494                 return priorities[i];
3495             }
3496           return CODING_CATEGORY_MASK_RAW_TEXT;
3497         }
3498     }
3499   else
3500     {
3501       int try;
3502
3503       if (c < 0xA0)
3504         {
3505           /* C is the first byte of SJIS character code,
3506              or a leading-code of Emacs' internal format (emacs-mule),
3507              or the first byte of UTF-16.  */
3508           try = (CODING_CATEGORY_MASK_SJIS
3509                   | CODING_CATEGORY_MASK_EMACS_MULE
3510                   | CODING_CATEGORY_MASK_UTF_16_BE
3511                   | CODING_CATEGORY_MASK_UTF_16_LE);
3512
3513           /* Or, if C is a special latin extra code,
3514              or is an ISO2022 specific control code of C1 (SS2 or SS3),
3515              or is an ISO2022 control-sequence-introducer (CSI),
3516              we should also consider the possibility of ISO2022 codings.  */
3517           if ((VECTORP (Vlatin_extra_code_table)
3518                && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3519               || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
3520               || (c == ISO_CODE_CSI
3521                   && (src < src_end
3522                       && (*src == ']'
3523                           || ((*src == '0' || *src == '1' || *src == '2')
3524                               && src + 1 < src_end
3525                               && src[1] == ']')))))
3526             try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
3527                      | CODING_CATEGORY_MASK_ISO_8BIT);
3528         }
3529       else
3530         /* C is a character of ISO2022 in graphic plane right,
3531            or a SJIS's 1-byte character code (i.e. JISX0201),
3532            or the first byte of BIG5's 2-byte code,
3533            or the first byte of UTF-8/16.  */
3534         try = (CODING_CATEGORY_MASK_ISO_8_ELSE
3535                 | CODING_CATEGORY_MASK_ISO_8BIT
3536                 | CODING_CATEGORY_MASK_SJIS
3537                 | CODING_CATEGORY_MASK_BIG5
3538                 | CODING_CATEGORY_MASK_UTF_8
3539                 | CODING_CATEGORY_MASK_UTF_16_BE
3540                 | CODING_CATEGORY_MASK_UTF_16_LE);
3541
3542       /* Or, we may have to consider the possibility of CCL.  */
3543       if (coding_system_table[CODING_CATEGORY_IDX_CCL]
3544           && (coding_system_table[CODING_CATEGORY_IDX_CCL]
3545               ->spec.ccl.valid_codes)[c])
3546         try |= CODING_CATEGORY_MASK_CCL;
3547
3548       mask = 0;
3549       utf16_examined_p = iso2022_examined_p = 0;
3550       if (priorities)
3551         {
3552           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3553             {
3554               if (!iso2022_examined_p
3555                   && (priorities[i] & try & CODING_CATEGORY_MASK_ISO))
3556                 {
3557                   mask |= detect_coding_iso2022 (src, src_end);
3558                   iso2022_examined_p = 1;
3559                 }
3560               else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
3561                 mask |= detect_coding_sjis (src, src_end);
3562               else if (priorities[i] & try & CODING_CATEGORY_MASK_UTF_8)
3563                 mask |= detect_coding_utf_8 (src, src_end);
3564               else if (!utf16_examined_p
3565                        && (priorities[i] & try &
3566                            CODING_CATEGORY_MASK_UTF_16_BE_LE))
3567                 {
3568                   mask |= detect_coding_utf_16 (src, src_end);
3569                   utf16_examined_p = 1;
3570                 }
3571               else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
3572                 mask |= detect_coding_big5 (src, src_end);
3573               else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
3574                 mask |= detect_coding_emacs_mule (src, src_end);
3575               else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL)
3576                 mask |= detect_coding_ccl (src, src_end);
3577               else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
3578                 mask |= CODING_CATEGORY_MASK_RAW_TEXT;
3579               else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
3580                 mask |= CODING_CATEGORY_MASK_BINARY;
3581               if (mask & priorities[i])
3582                 return priorities[i];
3583             }
3584           return CODING_CATEGORY_MASK_RAW_TEXT;
3585         }
3586       if (try & CODING_CATEGORY_MASK_ISO)
3587         mask |= detect_coding_iso2022 (src, src_end);
3588       if (try & CODING_CATEGORY_MASK_SJIS)
3589         mask |= detect_coding_sjis (src, src_end);
3590       if (try & CODING_CATEGORY_MASK_BIG5)
3591         mask |= detect_coding_big5 (src, src_end);
3592       if (try & CODING_CATEGORY_MASK_UTF_8)
3593         mask |= detect_coding_utf_8 (src, src_end);
3594       if (try & CODING_CATEGORY_MASK_UTF_16_BE_LE)
3595         mask |= detect_coding_utf_16 (src, src_end);
3596       if (try & CODING_CATEGORY_MASK_EMACS_MULE)
3597         mask |= detect_coding_emacs_mule (src, src_end);
3598       if (try & CODING_CATEGORY_MASK_CCL)
3599         mask |= detect_coding_ccl (src, src_end);
3600     }
3601   return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
3602 }
3603
3604 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
3605    The information of the detected coding system is set in CODING.  */
3606
3607 void
3608 detect_coding (coding, src, src_bytes)
3609      struct coding_system *coding;
3610      unsigned char *src;
3611      int src_bytes;
3612 {
3613   unsigned int idx;
3614   int skip, mask, i;
3615   Lisp_Object val;
3616
3617   val = Vcoding_category_list;
3618   mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip);
3619   coding->heading_ascii = skip;
3620
3621   if (!mask) return;
3622
3623   /* We found a single coding system of the highest priority in MASK.  */
3624   idx = 0;
3625   while (mask && ! (mask & 1)) mask >>= 1, idx++;
3626   if (! mask)
3627     idx = CODING_CATEGORY_IDX_RAW_TEXT;
3628
3629   val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[idx])->value;
3630
3631   if (coding->eol_type != CODING_EOL_UNDECIDED)
3632     {
3633       Lisp_Object tmp;
3634
3635       tmp = Fget (val, Qeol_type);
3636       if (VECTORP (tmp))
3637         val = XVECTOR (tmp)->contents[coding->eol_type];
3638     }
3639
3640   /* Setup this new coding system while preserving some slots.  */
3641   {
3642     int src_multibyte = coding->src_multibyte;
3643     int dst_multibyte = coding->dst_multibyte;
3644
3645     setup_coding_system (val, coding);
3646     coding->src_multibyte = src_multibyte;
3647     coding->dst_multibyte = dst_multibyte;
3648     coding->heading_ascii = skip;
3649   }
3650 }
3651
3652 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
3653    SOURCE is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
3654    CODING_EOL_CR, and CODING_EOL_UNDECIDED.
3655
3656    How many non-eol characters are at the head is returned as *SKIP.  */
3657
3658 #define MAX_EOL_CHECK_COUNT 3
3659
3660 static int
3661 detect_eol_type (source, src_bytes, skip)
3662      unsigned char *source;
3663      int src_bytes, *skip;
3664 {
3665   unsigned char *src = source, *src_end = src + src_bytes;
3666   unsigned char c;
3667   int total = 0;                /* How many end-of-lines are found so far.  */
3668   int eol_type = CODING_EOL_UNDECIDED;
3669   int this_eol_type;
3670
3671   *skip = 0;
3672
3673   while (src < src_end && total < MAX_EOL_CHECK_COUNT)
3674     {
3675       c = *src++;
3676       if (c == '\n' || c == '\r')
3677         {
3678           if (*skip == 0)
3679             *skip = src - 1 - source;
3680           total++;
3681           if (c == '\n')
3682             this_eol_type = CODING_EOL_LF;
3683           else if (src >= src_end || *src != '\n')
3684             this_eol_type = CODING_EOL_CR;
3685           else
3686             this_eol_type = CODING_EOL_CRLF, src++;
3687
3688           if (eol_type == CODING_EOL_UNDECIDED)
3689             /* This is the first end-of-line.  */
3690             eol_type = this_eol_type;
3691           else if (eol_type != this_eol_type)
3692             {
3693               /* The found type is different from what found before.  */
3694               eol_type = CODING_EOL_INCONSISTENT;
3695               break;
3696             }
3697         }
3698     }
3699
3700   if (*skip == 0)
3701     *skip = src_end - source;
3702   return eol_type;
3703 }
3704
3705 /* Like detect_eol_type, but detect EOL type in 2-octet
3706    big-endian/little-endian format for coding systems utf-16-be and
3707    utf-16-le.  */
3708
3709 static int
3710 detect_eol_type_in_2_octet_form (source, src_bytes, skip, big_endian_p)
3711      unsigned char *source;
3712      int src_bytes, *skip;
3713 {
3714   unsigned char *src = source, *src_end = src + src_bytes;
3715   unsigned int c1, c2;
3716   int total = 0;                /* How many end-of-lines are found so far.  */
3717   int eol_type = CODING_EOL_UNDECIDED;
3718   int this_eol_type;
3719   int msb, lsb;
3720
3721   if (big_endian_p)
3722     msb = 0, lsb = 1;
3723   else
3724     msb = 1, lsb = 0;
3725
3726   *skip = 0;
3727
3728   while ((src + 1) < src_end && total < MAX_EOL_CHECK_COUNT)
3729     {
3730       c1 = (src[msb] << 8) | (src[lsb]);
3731       src += 2;
3732
3733       if (c1 == '\n' || c1 == '\r')
3734         {
3735           if (*skip == 0)
3736             *skip = src - 2 - source;
3737           total++;
3738           if (c1 == '\n')
3739             {
3740               this_eol_type = CODING_EOL_LF;
3741             }
3742           else
3743             {
3744               if ((src + 1) >= src_end)
3745                 {
3746                   this_eol_type = CODING_EOL_CR;
3747                 }
3748               else
3749                 {
3750                   c2 = (src[msb] << 8) | (src[lsb]);
3751                   if (c2 == '\n')
3752                     this_eol_type = CODING_EOL_CRLF, src += 2;
3753                   else
3754                     this_eol_type = CODING_EOL_CR;
3755                 }
3756             }
3757
3758           if (eol_type == CODING_EOL_UNDECIDED)
3759             /* This is the first end-of-line.  */
3760             eol_type = this_eol_type;
3761           else if (eol_type != this_eol_type)
3762             {
3763               /* The found type is different from what found before.  */
3764               eol_type = CODING_EOL_INCONSISTENT;
3765               break;
3766             }
3767         }
3768     }
3769
3770   if (*skip == 0)
3771     *skip = src_end - source;
3772   return eol_type;
3773 }
3774
3775 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
3776    is encoded.  If it detects an appropriate format of end-of-line, it
3777    sets the information in *CODING.  */
3778
3779 void
3780 detect_eol (coding, src, src_bytes)
3781      struct coding_system *coding;
3782      unsigned char *src;
3783      int src_bytes;
3784 {
3785   Lisp_Object val;
3786   int skip;
3787   int eol_type;
3788
3789   switch (coding->category_idx)
3790     {
3791     case CODING_CATEGORY_IDX_UTF_16_BE:
3792       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 1);
3793       break;
3794     case CODING_CATEGORY_IDX_UTF_16_LE:
3795       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 0);
3796       break;
3797     default:
3798       eol_type = detect_eol_type (src, src_bytes, &skip);
3799       break;
3800     }
3801
3802   if (coding->heading_ascii > skip)
3803     coding->heading_ascii = skip;
3804   else
3805     skip = coding->heading_ascii;
3806
3807   if (eol_type == CODING_EOL_UNDECIDED)
3808     return;
3809   if (eol_type == CODING_EOL_INCONSISTENT)
3810     {
3811 #if 0
3812       /* This code is suppressed until we find a better way to
3813          distinguish raw text file and binary file.  */
3814
3815       /* If we have already detected that the coding is raw-text, the
3816          coding should actually be no-conversion.  */
3817       if (coding->type == coding_type_raw_text)
3818         {
3819           setup_coding_system (Qno_conversion, coding);
3820           return;
3821         }
3822       /* Else, let's decode only text code anyway.  */
3823 #endif /* 0 */
3824       eol_type = CODING_EOL_LF;
3825     }
3826
3827   val = Fget (coding->symbol, Qeol_type);
3828   if (VECTORP (val) && XVECTOR (val)->size == 3)
3829     {
3830       int src_multibyte = coding->src_multibyte;
3831       int dst_multibyte = coding->dst_multibyte;
3832
3833       setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
3834       coding->src_multibyte = src_multibyte;
3835       coding->dst_multibyte = dst_multibyte;
3836       coding->heading_ascii = skip;
3837     }
3838 }
3839
3840 #define CONVERSION_BUFFER_EXTRA_ROOM 256
3841
3842 #define DECODING_BUFFER_MAG(coding)                     \
3843   (coding->type == coding_type_iso2022                  \
3844    ? 3                                                  \
3845    : (coding->type == coding_type_ccl                   \
3846       ? coding->spec.ccl.decoder.buf_magnification      \
3847       : 2))
3848
3849 /* Return maximum size (bytes) of a buffer enough for decoding
3850    SRC_BYTES of text encoded in CODING.  */
3851
3852 int
3853 decoding_buffer_size (coding, src_bytes)
3854      struct coding_system *coding;
3855      int src_bytes;
3856 {
3857   return (src_bytes * DECODING_BUFFER_MAG (coding)
3858           + CONVERSION_BUFFER_EXTRA_ROOM);
3859 }
3860
3861 /* Return maximum size (bytes) of a buffer enough for encoding
3862    SRC_BYTES of text to CODING.  */
3863
3864 int
3865 encoding_buffer_size (coding, src_bytes)
3866      struct coding_system *coding;
3867      int src_bytes;
3868 {
3869   int magnification;
3870
3871   if (coding->type == coding_type_ccl)
3872     magnification = coding->spec.ccl.encoder.buf_magnification;
3873   else if (CODING_REQUIRE_ENCODING (coding))
3874     magnification = 3;
3875   else
3876     magnification = 1;
3877
3878   return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
3879 }
3880
3881 /* Working buffer for code conversion.  */
3882 struct conversion_buffer
3883 {
3884   int size;                     /* size of data.  */
3885   int on_stack;                 /* 1 if allocated by alloca.  */
3886   unsigned char *data;
3887 };
3888
3889 /* Don't use alloca for allocating memory space larger than this, lest
3890    we overflow their stack.  */
3891 #define MAX_ALLOCA 16*1024
3892
3893 /* Allocate LEN bytes of memory for BUF (struct conversion_buffer).  */
3894 #define allocate_conversion_buffer(buf, len)            \
3895   do {                                                  \
3896     if (len < MAX_ALLOCA)                               \
3897       {                                                 \
3898         buf.data = (unsigned char *) alloca (len);      \
3899         buf.on_stack = 1;                               \
3900       }                                                 \
3901     else                                                \
3902       {                                                 \
3903         buf.data = (unsigned char *) xmalloc (len);     \
3904         buf.on_stack = 0;                               \
3905       }                                                 \
3906     buf.size = len;                                     \
3907   } while (0)
3908
3909 /* Double the allocated memory for *BUF.  */
3910 static void
3911 extend_conversion_buffer (buf)
3912      struct conversion_buffer *buf;
3913 {
3914   if (buf->on_stack)
3915     {
3916       unsigned char *save = buf->data;
3917       buf->data = (unsigned char *) xmalloc (buf->size * 2);
3918       bcopy (save, buf->data, buf->size);
3919       buf->on_stack = 0;
3920     }
3921   else
3922     {
3923       buf->data = (unsigned char *) xrealloc (buf->data, buf->size * 2);
3924     }
3925   buf->size *= 2;
3926 }
3927
3928 /* Free the allocated memory for BUF if it is not on stack.  */
3929 static void
3930 free_conversion_buffer (buf)
3931      struct conversion_buffer *buf;
3932 {
3933   if (!buf->on_stack)
3934     xfree (buf->data);
3935 }
3936
3937 int
3938 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
3939      struct coding_system *coding;
3940      unsigned char *source, *destination;
3941      int src_bytes, dst_bytes, encodep;
3942 {
3943   struct ccl_program *ccl
3944     = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
3945   int result;
3946
3947   ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
3948   if (encodep)
3949     ccl->eol_type = coding->eol_type;
3950   ccl->multibyte = coding->src_multibyte;
3951   coding->produced = ccl_driver (ccl, source, destination,
3952                                  src_bytes, dst_bytes, &(coding->consumed));
3953   if (encodep)
3954     coding->produced_char = coding->produced;
3955   else
3956     {
3957       int bytes
3958         = dst_bytes ? dst_bytes : source + coding->consumed - destination;
3959       coding->produced = str_as_multibyte (destination, bytes,
3960                                            coding->produced,
3961                                            &(coding->produced_char));
3962     }
3963
3964   switch (ccl->status)
3965     {
3966     case CCL_STAT_SUSPEND_BY_SRC:
3967       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
3968       break;
3969     case CCL_STAT_SUSPEND_BY_DST:
3970       coding->result = CODING_FINISH_INSUFFICIENT_DST;
3971       break;
3972     case CCL_STAT_QUIT:
3973     case CCL_STAT_INVALID_CMD:
3974       coding->result = CODING_FINISH_INTERRUPT;
3975       break;
3976     default:
3977       coding->result = CODING_FINISH_NORMAL;
3978       break;
3979     }
3980   return coding->result;
3981 }
3982
3983 /* Decode EOL format of the text at PTR of BYTES length destructively
3984    according to CODING->eol_type.  This is called after the CCL
3985    program produced a decoded text at PTR.  If we do CRLF->LF
3986    conversion, update CODING->produced and CODING->produced_char.  */
3987
3988 static void
3989 decode_eol_post_ccl (coding, ptr, bytes)
3990      struct coding_system *coding;
3991      unsigned char *ptr;
3992      int bytes;
3993 {
3994   Lisp_Object val, saved_coding_symbol;
3995   unsigned char *pend = ptr + bytes;
3996   int dummy;
3997
3998   /* Remember the current coding system symbol.  We set it back when
3999      an inconsistent EOL is found so that `last-coding-system-used' is
4000      set to the coding system that doesn't specify EOL conversion.  */
4001   saved_coding_symbol = coding->symbol;
4002
4003   coding->spec.ccl.cr_carryover = 0;
4004   if (coding->eol_type == CODING_EOL_UNDECIDED)
4005     {
4006       /* Here, to avoid the call of setup_coding_system, we directly
4007          call detect_eol_type.  */
4008       coding->eol_type = detect_eol_type (ptr, bytes, &dummy);
4009       if (coding->eol_type == CODING_EOL_INCONSISTENT)
4010         coding->eol_type = CODING_EOL_LF;
4011       if (coding->eol_type != CODING_EOL_UNDECIDED)
4012         {
4013           val = Fget (coding->symbol, Qeol_type);
4014           if (VECTORP (val) && XVECTOR (val)->size == 3)
4015             coding->symbol = XVECTOR (val)->contents[coding->eol_type];
4016         }
4017       coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4018     }
4019
4020   if (coding->eol_type == CODING_EOL_LF
4021       || coding->eol_type == CODING_EOL_UNDECIDED)
4022     {
4023       /* We have nothing to do.  */
4024       ptr = pend;
4025     }
4026   else if (coding->eol_type == CODING_EOL_CRLF)
4027     {
4028       unsigned char *pstart = ptr, *p = ptr;
4029
4030       if (! (coding->mode & CODING_MODE_LAST_BLOCK)
4031           && *(pend - 1) == '\r')
4032         {
4033           /* If the last character is CR, we can't handle it here
4034              because LF will be in the not-yet-decoded source text.
4035              Recorded that the CR is not yet processed.  */
4036           coding->spec.ccl.cr_carryover = 1;
4037           coding->produced--;
4038           coding->produced_char--;
4039           pend--;
4040         }
4041       while (ptr < pend)
4042         {
4043           if (*ptr == '\r')
4044             {
4045               if (ptr + 1 < pend && *(ptr + 1) == '\n')
4046                 {
4047                   *p++ = '\n';
4048                   ptr += 2;
4049                 }
4050               else
4051                 {
4052                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4053                     goto undo_eol_conversion;
4054                   *p++ = *ptr++;
4055                 }
4056             }
4057           else if (*ptr == '\n'
4058                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4059             goto undo_eol_conversion;
4060           else
4061             *p++ = *ptr++;
4062           continue;
4063
4064         undo_eol_conversion:
4065           /* We have faced with inconsistent EOL format at PTR.
4066              Convert all LFs before PTR back to CRLFs.  */
4067           for (p--, ptr--; p >= pstart; p--)
4068             {
4069               if (*p == '\n')
4070                 *ptr-- = '\n', *ptr-- = '\r';
4071               else
4072                 *ptr-- = *p;
4073             }
4074           /*  If carryover is recorded, cancel it because we don't
4075               convert CRLF anymore.  */
4076           if (coding->spec.ccl.cr_carryover)
4077             {
4078               coding->spec.ccl.cr_carryover = 0;
4079               coding->produced++;
4080               coding->produced_char++;
4081               pend++;
4082             }
4083           p = ptr = pend;
4084           coding->eol_type = CODING_EOL_LF;
4085           coding->symbol = saved_coding_symbol;
4086         }
4087       if (p < pend)
4088         {
4089           /* As each two-byte sequence CRLF was converted to LF, (PEND
4090              - P) is the number of deleted characters.  */
4091           coding->produced -= pend - p;
4092           coding->produced_char -= pend - p;
4093         }
4094     }
4095   else                  /* i.e. coding->eol_type == CODING_EOL_CR */
4096     {
4097       unsigned char *p = ptr;
4098
4099       for (; ptr < pend; ptr++)
4100         {
4101           if (*ptr == '\r')
4102             *ptr = '\n';
4103           else if (*ptr == '\n'
4104                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4105             {
4106               for (; p < ptr; p++)
4107                 {
4108                   if (*p == '\n')
4109                     *p = '\r';
4110                 }
4111               ptr = pend;
4112               coding->eol_type = CODING_EOL_LF;
4113               coding->symbol = saved_coding_symbol;
4114             }
4115         }
4116     }
4117 }
4118
4119 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions".  Before
4120    decoding, it may detect coding system and format of end-of-line if
4121    those are not yet decided.  The source should be unibyte, the
4122    result is multibyte if CODING->dst_multibyte is nonzero, else
4123    unibyte.  */
4124
4125 int
4126 decode_coding (coding, source, destination, src_bytes, dst_bytes)
4127      struct coding_system *coding;
4128      unsigned char *source, *destination;
4129      int src_bytes, dst_bytes;
4130 {
4131   if (coding->type == coding_type_undecided)
4132     detect_coding (coding, source, src_bytes);
4133
4134   if (coding->eol_type == CODING_EOL_UNDECIDED
4135       && coding->type != coding_type_ccl)
4136     detect_eol (coding, source, src_bytes);
4137
4138   coding->produced = coding->produced_char = 0;
4139   coding->consumed = coding->consumed_char = 0;
4140   coding->errors = 0;
4141   coding->result = CODING_FINISH_NORMAL;
4142
4143   switch (coding->type)
4144     {
4145     case coding_type_sjis:
4146       decode_coding_sjis_big5 (coding, source, destination,
4147                                src_bytes, dst_bytes, 1);
4148       break;
4149
4150     case coding_type_iso2022:
4151       decode_coding_iso2022 (coding, source, destination,
4152                              src_bytes, dst_bytes);
4153       break;
4154
4155     case coding_type_big5:
4156       decode_coding_sjis_big5 (coding, source, destination,
4157                                src_bytes, dst_bytes, 0);
4158       break;
4159
4160     case coding_type_emacs_mule:
4161       decode_coding_emacs_mule (coding, source, destination,
4162                                 src_bytes, dst_bytes);
4163       break;
4164
4165     case coding_type_ccl:
4166       if (coding->spec.ccl.cr_carryover)
4167         {
4168           /* Set the CR which is not processed by the previous call of
4169              decode_eol_post_ccl in DESTINATION.  */
4170           *destination = '\r';
4171           coding->produced++;
4172           coding->produced_char++;
4173           dst_bytes--;
4174         }
4175       ccl_coding_driver (coding, source,
4176                          destination + coding->spec.ccl.cr_carryover,
4177                          src_bytes, dst_bytes, 0);
4178       if (coding->eol_type != CODING_EOL_LF)
4179         decode_eol_post_ccl (coding, destination, coding->produced);
4180       break;
4181
4182     default:
4183       decode_eol (coding, source, destination, src_bytes, dst_bytes);
4184     }
4185
4186   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
4187       && coding->consumed == src_bytes)
4188     coding->result = CODING_FINISH_NORMAL;
4189
4190   if (coding->mode & CODING_MODE_LAST_BLOCK
4191       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
4192     {
4193       unsigned char *src = source + coding->consumed;
4194       unsigned char *dst = destination + coding->produced;
4195
4196       src_bytes -= coding->consumed;
4197       coding->errors++;
4198       if (COMPOSING_P (coding))
4199         DECODE_COMPOSITION_END ('1');
4200       while (src_bytes--)
4201         {
4202           int c = *src++;
4203           dst += CHAR_STRING (c, dst);
4204           coding->produced_char++;
4205         }
4206       coding->consumed = coding->consumed_char = src - source;
4207       coding->produced = dst - destination;
4208       coding->result = CODING_FINISH_NORMAL;
4209     }
4210
4211   if (!coding->dst_multibyte)
4212     {
4213       coding->produced = str_as_unibyte (destination, coding->produced);
4214       coding->produced_char = coding->produced;
4215     }
4216
4217   return coding->result;
4218 }
4219
4220 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  The
4221    multibyteness of the source is CODING->src_multibyte, the
4222    multibyteness of the result is always unibyte.  */
4223
4224 int
4225 encode_coding (coding, source, destination, src_bytes, dst_bytes)
4226      struct coding_system *coding;
4227      unsigned char *source, *destination;
4228      int src_bytes, dst_bytes;
4229 {
4230   coding->produced = coding->produced_char = 0;
4231   coding->consumed = coding->consumed_char = 0;
4232   coding->errors = 0;
4233   coding->result = CODING_FINISH_NORMAL;
4234
4235   switch (coding->type)
4236     {
4237     case coding_type_sjis:
4238       encode_coding_sjis_big5 (coding, source, destination,
4239                                src_bytes, dst_bytes, 1);
4240       break;
4241
4242     case coding_type_iso2022:
4243       encode_coding_iso2022 (coding, source, destination,
4244                              src_bytes, dst_bytes);
4245       break;
4246
4247     case coding_type_big5:
4248       encode_coding_sjis_big5 (coding, source, destination,
4249                                src_bytes, dst_bytes, 0);
4250       break;
4251
4252     case coding_type_emacs_mule:
4253       encode_coding_emacs_mule (coding, source, destination,
4254                                 src_bytes, dst_bytes);
4255       break;
4256
4257     case coding_type_ccl:
4258       ccl_coding_driver (coding, source, destination,
4259                          src_bytes, dst_bytes, 1);
4260       break;
4261
4262     default:
4263       encode_eol (coding, source, destination, src_bytes, dst_bytes);
4264     }
4265
4266   if (coding->mode & CODING_MODE_LAST_BLOCK
4267       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
4268     {
4269       unsigned char *src = source + coding->consumed;
4270       unsigned char *src_end = src + src_bytes;
4271       unsigned char *dst = destination + coding->produced;
4272
4273       if (coding->type == coding_type_iso2022)
4274         ENCODE_RESET_PLANE_AND_REGISTER;
4275       if (COMPOSING_P (coding))
4276         *dst++ = ISO_CODE_ESC, *dst++ = '1';
4277       if (coding->consumed < src_bytes)
4278         {
4279           int len = src_bytes - coding->consumed;
4280
4281           BCOPY_SHORT (source + coding->consumed, dst, len);
4282           if (coding->src_multibyte)
4283             len = str_as_unibyte (dst, len);
4284           dst += len;
4285           coding->consumed = src_bytes;
4286         }
4287       coding->produced = coding->produced_char = dst - destination;
4288       coding->result = CODING_FINISH_NORMAL;
4289     }
4290
4291   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
4292       && coding->consumed == src_bytes)
4293     coding->result = CODING_FINISH_NORMAL;
4294
4295   return coding->result;
4296 }
4297
4298 /* Scan text in the region between *BEG and *END (byte positions),
4299    skip characters which we don't have to decode by coding system
4300    CODING at the head and tail, then set *BEG and *END to the region
4301    of the text we actually have to convert.  The caller should move
4302    the gap out of the region in advance if the region is from a
4303    buffer.
4304
4305    If STR is not NULL, *BEG and *END are indices into STR.  */
4306
4307 static void
4308 shrink_decoding_region (beg, end, coding, str)
4309      int *beg, *end;
4310      struct coding_system *coding;
4311      unsigned char *str;
4312 {
4313   unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
4314   int eol_conversion;
4315   Lisp_Object translation_table;
4316
4317   if (coding->type == coding_type_ccl
4318       || coding->type == coding_type_undecided
4319       || coding->eol_type != CODING_EOL_LF
4320       || !NILP (coding->post_read_conversion)
4321       || coding->composing != COMPOSITION_DISABLED)
4322     {
4323       /* We can't skip any data.  */
4324       return;
4325     }
4326   if (coding->type == coding_type_no_conversion
4327       || coding->type == coding_type_raw_text
4328       || coding->type == coding_type_emacs_mule)
4329     {
4330       /* We need no conversion, but don't have to skip any data here.
4331          Decoding routine handles them effectively anyway.  */
4332       return;
4333     }
4334
4335   translation_table = coding->translation_table_for_decode;
4336   if (NILP (translation_table) && !NILP (Venable_character_translation))
4337     translation_table = Vstandard_translation_table_for_decode;
4338   if (CHAR_TABLE_P (translation_table))
4339     {
4340       int i;
4341       for (i = 0; i < 128; i++)
4342         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
4343           break;
4344       if (i < 128)
4345         /* Some ASCII character should be translated.  We give up
4346            shrinking.  */
4347         return;
4348     }
4349
4350   if (coding->heading_ascii >= 0)
4351     /* Detection routine has already found how much we can skip at the
4352        head.  */
4353     *beg += coding->heading_ascii;
4354
4355   if (str)
4356     {
4357       begp_orig = begp = str + *beg;
4358       endp_orig = endp = str + *end;
4359     }
4360   else
4361     {
4362       begp_orig = begp = BYTE_POS_ADDR (*beg);
4363       endp_orig = endp = begp + *end - *beg;
4364     }
4365
4366   eol_conversion = (coding->eol_type == CODING_EOL_CR
4367                     || coding->eol_type == CODING_EOL_CRLF);
4368
4369   switch (coding->type)
4370     {
4371     case coding_type_sjis:
4372     case coding_type_big5:
4373       /* We can skip all ASCII characters at the head.  */
4374       if (coding->heading_ascii < 0)
4375         {
4376           if (eol_conversion)
4377             while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
4378           else
4379             while (begp < endp && *begp < 0x80) begp++;
4380         }
4381       /* We can skip all ASCII characters at the tail except for the
4382          second byte of SJIS or BIG5 code.  */
4383       if (eol_conversion)
4384         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
4385       else
4386         while (begp < endp && endp[-1] < 0x80) endp--;
4387       /* Do not consider LF as ascii if preceded by CR, since that
4388          confuses eol decoding. */
4389       if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
4390         endp++;
4391       if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
4392         endp++;
4393       break;
4394
4395     case coding_type_iso2022:
4396       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
4397         /* We can't skip any data.  */
4398         break;
4399       if (coding->heading_ascii < 0)
4400         {
4401           /* We can skip all ASCII characters at the head except for a
4402              few control codes.  */
4403           while (begp < endp && (c = *begp) < 0x80
4404                  && c != ISO_CODE_CR && c != ISO_CODE_SO
4405                  && c != ISO_CODE_SI && c != ISO_CODE_ESC
4406                  && (!eol_conversion || c != ISO_CODE_LF))
4407             begp++;
4408         }
4409       switch (coding->category_idx)
4410         {
4411         case CODING_CATEGORY_IDX_ISO_8_1:
4412         case CODING_CATEGORY_IDX_ISO_8_2:
4413           /* We can skip all ASCII characters at the tail.  */
4414           if (eol_conversion)
4415             while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
4416           else
4417             while (begp < endp && endp[-1] < 0x80) endp--;
4418           /* Do not consider LF as ascii if preceded by CR, since that
4419              confuses eol decoding. */
4420           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
4421             endp++;
4422           break;
4423
4424         case CODING_CATEGORY_IDX_ISO_7:
4425         case CODING_CATEGORY_IDX_ISO_7_TIGHT:
4426           {
4427             /* We can skip all charactes at the tail except for 8-bit
4428                codes and ESC and the following 2-byte at the tail.  */
4429             unsigned char *eight_bit = NULL;
4430
4431             if (eol_conversion)
4432               while (begp < endp
4433                      && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
4434                 {
4435                   if (!eight_bit && c & 0x80) eight_bit = endp;
4436                   endp--;
4437                 }
4438             else
4439               while (begp < endp
4440                      && (c = endp[-1]) != ISO_CODE_ESC)
4441                 {
4442                   if (!eight_bit && c & 0x80) eight_bit = endp;
4443                   endp--;
4444                 }
4445             /* Do not consider LF as ascii if preceded by CR, since that
4446                confuses eol decoding. */
4447             if (begp < endp && endp < endp_orig
4448                 && endp[-1] == '\r' && endp[0] == '\n')
4449               endp++;
4450             if (begp < endp && endp[-1] == ISO_CODE_ESC)
4451               {
4452                 if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
4453                   /* This is an ASCII designation sequence.  We can
4454                      surely skip the tail.  But, if we have
4455                      encountered an 8-bit code, skip only the codes
4456                      after that.  */
4457                   endp = eight_bit ? eight_bit : endp + 2;
4458                 else
4459                   /* Hmmm, we can't skip the tail.  */
4460                   endp = endp_orig;
4461               }
4462             else if (eight_bit)
4463               endp = eight_bit;
4464           }
4465         }
4466       break;
4467
4468     default:
4469       abort ();
4470     }
4471   *beg += begp - begp_orig;
4472   *end += endp - endp_orig;
4473   return;
4474 }
4475
4476 /* Like shrink_decoding_region but for encoding.  */
4477
4478 static void
4479 shrink_encoding_region (beg, end, coding, str)
4480      int *beg, *end;
4481      struct coding_system *coding;
4482      unsigned char *str;
4483 {
4484   unsigned char *begp_orig, *begp, *endp_orig, *endp;
4485   int eol_conversion;
4486   Lisp_Object translation_table;
4487
4488   if (coding->type == coding_type_ccl
4489       || coding->eol_type == CODING_EOL_CRLF
4490       || coding->eol_type == CODING_EOL_CR
4491       || coding->cmp_data && coding->cmp_data->used > 0)
4492     {
4493       /* We can't skip any data.  */
4494       return;
4495     }
4496   if (coding->type == coding_type_no_conversion
4497       || coding->type == coding_type_raw_text
4498       || coding->type == coding_type_emacs_mule
4499       || coding->type == coding_type_undecided)
4500     {
4501       /* We need no conversion, but don't have to skip any data here.
4502          Encoding routine handles them effectively anyway.  */
4503       return;
4504     }
4505
4506   translation_table = coding->translation_table_for_encode;
4507   if (NILP (translation_table) && !NILP (Venable_character_translation))
4508     translation_table = Vstandard_translation_table_for_encode;
4509   if (CHAR_TABLE_P (translation_table))
4510     {
4511       int i;
4512       for (i = 0; i < 128; i++)
4513         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
4514           break;
4515       if (i < 128)
4516         /* Some ASCII character should be tranlsated.  We give up
4517            shrinking.  */
4518         return;
4519     }
4520
4521   if (str)
4522     {
4523       begp_orig = begp = str + *beg;
4524       endp_orig = endp = str + *end;
4525     }
4526   else
4527     {
4528       begp_orig = begp = BYTE_POS_ADDR (*beg);
4529       endp_orig = endp = begp + *end - *beg;
4530     }
4531
4532   eol_conversion = (coding->eol_type == CODING_EOL_CR
4533                     || coding->eol_type == CODING_EOL_CRLF);
4534
4535   /* Here, we don't have to check coding->pre_write_conversion because
4536      the caller is expected to have handled it already.  */
4537   switch (coding->type)
4538     {
4539     case coding_type_iso2022:
4540       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
4541         /* We can't skip any data.  */
4542         break;
4543       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
4544         {
4545           unsigned char *bol = begp;
4546           while (begp < endp && *begp < 0x80)
4547             {
4548               begp++;
4549               if (begp[-1] == '\n')
4550                 bol = begp;
4551             }
4552           begp = bol;
4553           goto label_skip_tail;
4554         }
4555       /* fall down ... */
4556
4557     case coding_type_sjis:
4558     case coding_type_big5:
4559       /* We can skip all ASCII characters at the head and tail.  */
4560       if (eol_conversion)
4561         while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
4562       else
4563         while (begp < endp && *begp < 0x80) begp++;
4564     label_skip_tail:
4565       if (eol_conversion)
4566         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
4567       else
4568         while (begp < endp && *(endp - 1) < 0x80) endp--;
4569       break;
4570
4571     default:
4572       abort ();
4573     }
4574
4575   *beg += begp - begp_orig;
4576   *end += endp - endp_orig;
4577   return;
4578 }
4579
4580 /* As shrinking conversion region requires some overhead, we don't try
4581    shrinking if the length of conversion region is less than this
4582    value.  */
4583 static int shrink_conversion_region_threshhold = 1024;
4584
4585 #define SHRINK_CONVERSION_REGION(beg, end, coding, str, encodep)        \
4586   do {                                                                  \
4587     if (*(end) - *(beg) > shrink_conversion_region_threshhold)          \
4588       {                                                                 \
4589         if (encodep) shrink_encoding_region (beg, end, coding, str);    \
4590         else shrink_decoding_region (beg, end, coding, str);            \
4591       }                                                                 \
4592   } while (0)
4593
4594 static Lisp_Object
4595 code_convert_region_unwind (dummy)
4596      Lisp_Object dummy;
4597 {
4598   inhibit_pre_post_conversion = 0;
4599   return Qnil;
4600 }
4601
4602 /* Store information about all compositions in the range FROM and TO
4603    of OBJ in memory blocks pointed by CODING->cmp_data.  OBJ is a
4604    buffer or a string, defaults to the current buffer.  */
4605
4606 void
4607 coding_save_composition (coding, from, to, obj)
4608      struct coding_system *coding;
4609      int from, to;
4610      Lisp_Object obj;
4611 {
4612   Lisp_Object prop;
4613   int start, end;
4614
4615   if (coding->composing == COMPOSITION_DISABLED)
4616     return;
4617   if (!coding->cmp_data)
4618     coding_allocate_composition_data (coding, from);
4619   if (!find_composition (from, to, &start, &end, &prop, obj)
4620       || end > to)
4621     return;
4622   if (start < from
4623       && (!find_composition (end, to, &start, &end, &prop, obj)
4624           || end > to))
4625     return;
4626   coding->composing = COMPOSITION_NO;
4627   do
4628     {
4629       if (COMPOSITION_VALID_P (start, end, prop))
4630         {
4631           enum composition_method method = COMPOSITION_METHOD (prop);
4632           if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
4633               >= COMPOSITION_DATA_SIZE)
4634             coding_allocate_composition_data (coding, from);
4635           /* For relative composition, we remember start and end
4636              positions, for the other compositions, we also remember
4637              components.  */
4638           CODING_ADD_COMPOSITION_START (coding, start - from, method);
4639           if (method != COMPOSITION_RELATIVE)
4640             {
4641               /* We must store a*/
4642               Lisp_Object val, ch;
4643
4644               val = COMPOSITION_COMPONENTS (prop);
4645               if (CONSP (val))
4646                 while (CONSP (val))
4647                   {
4648                     ch = XCAR (val), val = XCDR (val);
4649                     CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
4650                   }
4651               else if (VECTORP (val) || STRINGP (val))
4652                 {
4653                   int len = (VECTORP (val)
4654                              ? XVECTOR (val)->size : XSTRING (val)->size);
4655                   int i;
4656                   for (i = 0; i < len; i++)
4657                     {
4658                       ch = (STRINGP (val)
4659                             ? Faref (val, make_number (i))
4660                             : XVECTOR (val)->contents[i]);
4661                       CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
4662                     }
4663                 }
4664               else              /* INTEGERP (val) */
4665                 CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (val));
4666             }
4667           CODING_ADD_COMPOSITION_END (coding, end - from);
4668         }
4669       start = end;
4670     }
4671   while (start < to
4672          && find_composition (start, to, &start, &end, &prop, obj)
4673          && end <= to);
4674
4675   /* Make coding->cmp_data point to the first memory block.  */
4676   while (coding->cmp_data->prev)
4677     coding->cmp_data = coding->cmp_data->prev;
4678   coding->cmp_data_start = 0;
4679 }
4680
4681 /* Reflect the saved information about compositions to OBJ.
4682    CODING->cmp_data points to a memory block for the informaiton.  OBJ
4683    is a buffer or a string, defaults to the current buffer.  */
4684
4685 void
4686 coding_restore_composition (coding, obj)
4687      struct coding_system *coding;
4688      Lisp_Object obj;
4689 {
4690   struct composition_data *cmp_data = coding->cmp_data;
4691
4692   if (!cmp_data)
4693     return;
4694
4695   while (cmp_data->prev)
4696     cmp_data = cmp_data->prev;
4697
4698   while (cmp_data)
4699     {
4700       int i;
4701
4702       for (i = 0; i < cmp_data->used && cmp_data->data[i] > 0;
4703            i += cmp_data->data[i])
4704         {
4705           int *data = cmp_data->data + i;
4706           enum composition_method method = (enum composition_method) data[3];
4707           Lisp_Object components;
4708
4709           if (method == COMPOSITION_RELATIVE)
4710             components = Qnil;
4711           else
4712             {
4713               int len = data[0] - 4, j;
4714               Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
4715
4716               for (j = 0; j < len; j++)
4717                 args[j] = make_number (data[4 + j]);
4718               components = (method == COMPOSITION_WITH_ALTCHARS
4719                             ? Fstring (len, args) : Fvector (len, args));
4720             }
4721           compose_text (data[1], data[2], components, Qnil, obj);
4722         }
4723       cmp_data = cmp_data->next;
4724     }
4725 }
4726
4727 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
4728    text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
4729    coding system CODING, and return the status code of code conversion
4730    (currently, this value has no meaning).
4731
4732    How many characters (and bytes) are converted to how many
4733    characters (and bytes) are recorded in members of the structure
4734    CODING.
4735
4736    If REPLACE is nonzero, we do various things as if the original text
4737    is deleted and a new text is inserted.  See the comments in
4738    replace_range (insdel.c) to know what we are doing.
4739
4740    If REPLACE is zero, it is assumed that the source text is unibyte.
4741    Otherwize, it is assumed that the source text is multibyte.  */
4742
4743 int
4744 code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
4745      int from, from_byte, to, to_byte, encodep, replace;
4746      struct coding_system *coding;
4747 {
4748   int len = to - from, len_byte = to_byte - from_byte;
4749   int require, inserted, inserted_byte;
4750   int head_skip, tail_skip, total_skip = 0;
4751   Lisp_Object saved_coding_symbol;
4752   int first = 1;
4753   unsigned char *src, *dst;
4754   Lisp_Object deletion;
4755   int orig_point = PT, orig_len = len;
4756   int prev_Z;
4757   int multibyte_p = !NILP (current_buffer->enable_multibyte_characters);
4758
4759   coding->src_multibyte = replace && multibyte_p;
4760   coding->dst_multibyte = multibyte_p;
4761
4762   deletion = Qnil;
4763   saved_coding_symbol = Qnil;
4764
4765   if (from < PT && PT < to)
4766     {
4767       TEMP_SET_PT_BOTH (from, from_byte);
4768       orig_point = from;
4769     }
4770
4771   if (replace)
4772     {
4773       int saved_from = from;
4774       int saved_inhibit_modification_hooks;
4775
4776       prepare_to_modify_buffer (from, to, &from);
4777       if (saved_from != from)
4778         {
4779           to = from + len;
4780           from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
4781           len_byte = to_byte - from_byte;
4782         }
4783
4784       /* The code conversion routine can not preserve text properties
4785          for now.  So, we must remove all text properties in the
4786          region.  Here, we must suppress all modification hooks.  */
4787       saved_inhibit_modification_hooks = inhibit_modification_hooks;
4788       inhibit_modification_hooks = 1;
4789       Fset_text_properties (make_number (from), make_number (to), Qnil, Qnil);
4790       inhibit_modification_hooks = saved_inhibit_modification_hooks;
4791     }
4792
4793   if (! encodep && CODING_REQUIRE_DETECTION (coding))
4794     {
4795       /* We must detect encoding of text and eol format.  */
4796
4797       if (from < GPT && to > GPT)
4798         move_gap_both (from, from_byte);
4799       if (coding->type == coding_type_undecided)
4800         {
4801           detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
4802           if (coding->type == coding_type_undecided)
4803             {
4804               /* It seems that the text contains only ASCII, but we
4805                  should not left it undecided because the deeper
4806                  decoding routine (decode_coding) tries to detect the
4807                  encodings again in vain.  */
4808               coding->type = coding_type_emacs_mule;
4809               coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
4810             }
4811         }
4812       if (coding->eol_type == CODING_EOL_UNDECIDED
4813           && coding->type != coding_type_ccl)
4814         {
4815           saved_coding_symbol = coding->symbol;
4816           detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
4817           if (coding->eol_type == CODING_EOL_UNDECIDED)
4818             coding->eol_type = CODING_EOL_LF;
4819           /* We had better recover the original eol format if we
4820              encounter an inconsitent eol format while decoding.  */
4821           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4822         }
4823     }
4824
4825   /* Now we convert the text.  */
4826
4827   /* For encoding, we must process pre-write-conversion in advance.  */
4828   if (! inhibit_pre_post_conversion
4829       && encodep
4830       && SYMBOLP (coding->pre_write_conversion)
4831       && ! NILP (Ffboundp (coding->pre_write_conversion)))
4832     {
4833       /* The function in pre-write-conversion may put a new text in a
4834          new buffer.  */
4835       struct buffer *prev = current_buffer;
4836       Lisp_Object new;
4837       int count = specpdl_ptr - specpdl;
4838
4839       record_unwind_protect (code_convert_region_unwind, Qnil);
4840       /* We should not call any more pre-write/post-read-conversion
4841          functions while this pre-write-conversion is running.  */
4842       inhibit_pre_post_conversion = 1;
4843       call2 (coding->pre_write_conversion,
4844              make_number (from), make_number (to));
4845       inhibit_pre_post_conversion = 0;
4846       /* Discard the unwind protect.  */
4847       specpdl_ptr--;
4848
4849       if (current_buffer != prev)
4850         {
4851           len = ZV - BEGV;
4852           new = Fcurrent_buffer ();
4853           set_buffer_internal_1 (prev);
4854           del_range_2 (from, from_byte, to, to_byte, 0);
4855           TEMP_SET_PT_BOTH (from, from_byte);
4856           insert_from_buffer (XBUFFER (new), 1, len, 0);
4857           Fkill_buffer (new);
4858           if (orig_point >= to)
4859             orig_point += len - orig_len;
4860           else if (orig_point > from)
4861             orig_point = from;
4862           orig_len = len;
4863           to = from + len;
4864           from_byte = CHAR_TO_BYTE (from);
4865           to_byte = CHAR_TO_BYTE (to);
4866           len_byte = to_byte - from_byte;
4867           TEMP_SET_PT_BOTH (from, from_byte);
4868         }
4869     }
4870
4871   if (replace)
4872     deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
4873
4874   if (coding->composing != COMPOSITION_DISABLED)
4875     {
4876       if (encodep)
4877         coding_save_composition (coding, from, to, Fcurrent_buffer ());
4878       else
4879         coding_allocate_composition_data (coding, from);
4880     }
4881
4882   /* Try to skip the heading and tailing ASCIIs.  */
4883   if (coding->type != coding_type_ccl)
4884     {
4885       int from_byte_orig = from_byte, to_byte_orig = to_byte;
4886
4887       if (from < GPT && GPT < to)
4888         move_gap_both (from, from_byte);
4889       SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
4890       if (from_byte == to_byte
4891           && (encodep || NILP (coding->post_read_conversion))
4892           && ! CODING_REQUIRE_FLUSHING (coding))
4893         {
4894           coding->produced = len_byte;
4895           coding->produced_char = len;
4896           if (!replace)
4897             /* We must record and adjust for this new text now.  */
4898             adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
4899           return 0;
4900         }
4901
4902       head_skip = from_byte - from_byte_orig;
4903       tail_skip = to_byte_orig - to_byte;
4904       total_skip = head_skip + tail_skip;
4905       from += head_skip;
4906       to -= tail_skip;
4907       len -= total_skip; len_byte -= total_skip;
4908     }
4909
4910   /* For converion, we must put the gap before the text in addition to
4911      making the gap larger for efficient decoding.  The required gap
4912      size starts from 2000 which is the magic number used in make_gap.
4913      But, after one batch of conversion, it will be incremented if we
4914      find that it is not enough .  */
4915   require = 2000;
4916
4917   if (GAP_SIZE  < require)
4918     make_gap (require - GAP_SIZE);
4919   move_gap_both (from, from_byte);
4920
4921   inserted = inserted_byte = 0;
4922
4923   GAP_SIZE += len_byte;
4924   ZV -= len;
4925   Z -= len;
4926   ZV_BYTE -= len_byte;
4927   Z_BYTE -= len_byte;
4928
4929   if (GPT - BEG < BEG_UNCHANGED)
4930     BEG_UNCHANGED = GPT - BEG;
4931   if (Z - GPT < END_UNCHANGED)
4932     END_UNCHANGED = Z - GPT;
4933
4934   if (!encodep && coding->src_multibyte)
4935     {
4936       /* Decoding routines expects that the source text is unibyte.
4937          We must convert 8-bit characters of multibyte form to
4938          unibyte.  */
4939       int len_byte_orig = len_byte;
4940       len_byte = str_as_unibyte (GAP_END_ADDR - len_byte, len_byte);
4941       if (len_byte < len_byte_orig)
4942         safe_bcopy (GAP_END_ADDR - len_byte_orig, GAP_END_ADDR - len_byte,
4943                     len_byte);
4944       coding->src_multibyte = 0;
4945     }
4946
4947   for (;;)
4948     {
4949       int result;
4950
4951       /* The buffer memory is now:
4952          +--------+converted-text+---------+-------original-text-------+---+
4953          |<-from->|<--inserted-->|---------|<--------len_byte--------->|---|
4954                   |<---------------------- GAP ----------------------->|  */
4955       src = GAP_END_ADDR - len_byte;
4956       dst = GPT_ADDR + inserted_byte;
4957
4958       if (encodep)
4959         result = encode_coding (coding, src, dst, len_byte, 0);
4960       else
4961         result = decode_coding (coding, src, dst, len_byte, 0);
4962
4963       /* The buffer memory is now:
4964          +--------+-------converted-text----+--+------original-text----+---+
4965          |<-from->|<-inserted->|<-produced->|--|<-(len_byte-consumed)->|---|
4966                   |<---------------------- GAP ----------------------->|  */
4967
4968       inserted += coding->produced_char;
4969       inserted_byte += coding->produced;
4970       len_byte -= coding->consumed;
4971
4972       if (result == CODING_FINISH_INSUFFICIENT_CMP)
4973         {
4974           coding_allocate_composition_data (coding, from + inserted);
4975           continue;
4976         }
4977
4978       src += coding->consumed;
4979       dst += coding->produced;
4980
4981       if (result == CODING_FINISH_NORMAL)
4982         {
4983           src += len_byte;
4984           break;
4985         }
4986       if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
4987         {
4988           unsigned char *pend = dst, *p = pend - inserted_byte;
4989           Lisp_Object eol_type;
4990
4991           /* Encode LFs back to the original eol format (CR or CRLF).  */
4992           if (coding->eol_type == CODING_EOL_CR)
4993             {
4994               while (p < pend) if (*p++ == '\n') p[-1] = '\r';
4995             }
4996           else
4997             {
4998               int count = 0;
4999
5000               while (p < pend) if (*p++ == '\n') count++;
5001               if (src - dst < count)
5002                 {
5003                   /* We don't have sufficient room for encoding LFs
5004                      back to CRLF.  We must record converted and
5005                      not-yet-converted text back to the buffer
5006                      content, enlarge the gap, then record them out of
5007                      the buffer contents again.  */
5008                   int add = len_byte + inserted_byte;
5009
5010                   GAP_SIZE -= add;
5011                   ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5012                   GPT += inserted_byte; GPT_BYTE += inserted_byte;
5013                   make_gap (count - GAP_SIZE);
5014                   GAP_SIZE += add;
5015                   ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5016                   GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5017                   /* Don't forget to update SRC, DST, and PEND.  */
5018                   src = GAP_END_ADDR - len_byte;
5019                   dst = GPT_ADDR + inserted_byte;
5020                   pend = dst;
5021                 }
5022               inserted += count;
5023               inserted_byte += count;
5024               coding->produced += count;
5025               p = dst = pend + count;
5026               while (count)
5027                 {
5028                   *--p = *--pend;
5029                   if (*p == '\n') count--, *--p = '\r';
5030                 }
5031             }
5032
5033           /* Suppress eol-format conversion in the further conversion.  */
5034           coding->eol_type = CODING_EOL_LF;
5035
5036           /* Set the coding system symbol to that for Unix-like EOL.  */
5037           eol_type = Fget (saved_coding_symbol, Qeol_type);
5038           if (VECTORP (eol_type)
5039               && XVECTOR (eol_type)->size == 3
5040               && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
5041             coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
5042           else
5043             coding->symbol = saved_coding_symbol;
5044
5045           continue;
5046         }
5047       if (len_byte <= 0)
5048         {
5049           if (coding->type != coding_type_ccl
5050               || coding->mode & CODING_MODE_LAST_BLOCK)
5051             break;
5052           coding->mode |= CODING_MODE_LAST_BLOCK;
5053           continue;
5054         }
5055       if (result == CODING_FINISH_INSUFFICIENT_SRC)
5056         {
5057           /* The source text ends in invalid codes.  Let's just
5058              make them valid buffer contents, and finish conversion.  */
5059           inserted += len_byte;
5060           inserted_byte += len_byte;
5061           while (len_byte--)
5062             *dst++ = *src++;
5063           break;
5064         }
5065       if (result == CODING_FINISH_INTERRUPT)
5066         {
5067           /* The conversion procedure was interrupted by a user.  */
5068           break;
5069         }
5070       /* Now RESULT == CODING_FINISH_INSUFFICIENT_DST  */
5071       if (coding->consumed < 1)
5072         {
5073           /* It's quite strange to require more memory without
5074              consuming any bytes.  Perhaps CCL program bug.  */
5075           break;
5076         }
5077       if (first)
5078         {
5079           /* We have just done the first batch of conversion which was
5080              stoped because of insufficient gap.  Let's reconsider the
5081              required gap size (i.e. SRT - DST) now.
5082
5083              We have converted ORIG bytes (== coding->consumed) into
5084              NEW bytes (coding->produced).  To convert the remaining
5085              LEN bytes, we may need REQUIRE bytes of gap, where:
5086                 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
5087                 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
5088              Here, we are sure that NEW >= ORIG.  */
5089           float ratio = coding->produced - coding->consumed;
5090           ratio /= coding->consumed;
5091           require = len_byte * ratio;
5092           first = 0;
5093         }
5094       if ((src - dst) < (require + 2000))
5095         {
5096           /* See the comment above the previous call of make_gap.  */
5097           int add = len_byte + inserted_byte;
5098
5099           GAP_SIZE -= add;
5100           ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5101           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5102           make_gap (require + 2000);
5103           GAP_SIZE += add;
5104           ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5105           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5106         }
5107     }
5108   if (src - dst > 0) *dst = 0; /* Put an anchor.  */
5109
5110   if (encodep && coding->dst_multibyte)
5111     {
5112       /* The output is unibyte.  We must convert 8-bit characters to
5113          multibyte form.  */
5114       if (inserted_byte * 2 > GAP_SIZE)
5115         {
5116           GAP_SIZE -= inserted_byte;
5117           ZV += inserted_byte; Z += inserted_byte;
5118           ZV_BYTE += inserted_byte; Z_BYTE += inserted_byte;
5119           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5120           make_gap (inserted_byte - GAP_SIZE);
5121           GAP_SIZE += inserted_byte;
5122           ZV -= inserted_byte; Z -= inserted_byte;
5123           ZV_BYTE -= inserted_byte; Z_BYTE -= inserted_byte;
5124           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5125         }
5126       inserted_byte = str_to_multibyte (GPT_ADDR, GAP_SIZE, inserted_byte);
5127     }
5128
5129   /* If we have shrinked the conversion area, adjust it now.  */
5130   if (total_skip > 0)
5131     {
5132       if (tail_skip > 0)
5133         safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
5134       inserted += total_skip; inserted_byte += total_skip;
5135       GAP_SIZE += total_skip;
5136       GPT -= head_skip; GPT_BYTE -= head_skip;
5137       ZV -= total_skip; ZV_BYTE -= total_skip;
5138       Z -= total_skip; Z_BYTE -= total_skip;
5139       from -= head_skip; from_byte -= head_skip;
5140       to += tail_skip; to_byte += tail_skip;
5141     }
5142
5143   prev_Z = Z;
5144   adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
5145   inserted = Z - prev_Z;
5146
5147   if (!encodep && coding->cmp_data && coding->cmp_data->used)
5148     coding_restore_composition (coding, Fcurrent_buffer ());
5149   coding_free_composition_data (coding);
5150
5151   if (! inhibit_pre_post_conversion
5152       && ! encodep && ! NILP (coding->post_read_conversion))
5153     {
5154       Lisp_Object val;
5155       int count = specpdl_ptr - specpdl;
5156
5157       if (from != PT)
5158         TEMP_SET_PT_BOTH (from, from_byte);
5159       prev_Z = Z;
5160       record_unwind_protect (code_convert_region_unwind, Qnil);
5161       /* We should not call any more pre-write/post-read-conversion
5162          functions while this post-read-conversion is running.  */
5163       inhibit_pre_post_conversion = 1;
5164       val = call1 (coding->post_read_conversion, make_number (inserted));
5165       inhibit_pre_post_conversion = 0;
5166       /* Discard the unwind protect.  */
5167       specpdl_ptr--;
5168       CHECK_NUMBER (val, 0);
5169       inserted += Z - prev_Z;
5170     }
5171
5172   if (orig_point >= from)
5173     {
5174       if (orig_point >= from + orig_len)
5175         orig_point += inserted - orig_len;
5176       else
5177         orig_point = from;
5178       TEMP_SET_PT (orig_point);
5179     }
5180
5181   if (replace)
5182     {
5183       signal_after_change (from, to - from, inserted);
5184       update_compositions (from, from + inserted, CHECK_BORDER);
5185     }
5186
5187   {
5188     coding->consumed = to_byte - from_byte;
5189     coding->consumed_char = to - from;
5190     coding->produced = inserted_byte;
5191     coding->produced_char = inserted;
5192   }
5193
5194   return 0;
5195 }
5196
5197 Lisp_Object
5198 run_pre_post_conversion_on_str (str, coding, encodep)
5199      Lisp_Object str;
5200      struct coding_system *coding;
5201      int encodep;
5202 {
5203   int count = specpdl_ptr - specpdl;
5204   struct gcpro gcpro1;
5205   struct buffer *prev = current_buffer;
5206   int multibyte = STRING_MULTIBYTE (str);
5207
5208   record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
5209   record_unwind_protect (code_convert_region_unwind, Qnil);
5210   GCPRO1 (str);
5211   temp_output_buffer_setup (" *code-converting-work*");
5212   set_buffer_internal (XBUFFER (Vstandard_output));
5213   /* We must insert the contents of STR as is without
5214      unibyte<->multibyte conversion.  For that, we adjust the
5215      multibyteness of the working buffer to that of STR.  */
5216   Ferase_buffer ();
5217   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
5218   insert_from_string (str, 0, 0,
5219                       XSTRING (str)->size, STRING_BYTES (XSTRING (str)), 0);
5220   UNGCPRO;
5221   inhibit_pre_post_conversion = 1;
5222   if (encodep)
5223     call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z));
5224   else
5225     {
5226       TEMP_SET_PT_BOTH (BEG, BEG_BYTE);
5227       call1 (coding->post_read_conversion, make_number (Z - BEG));
5228     }
5229   inhibit_pre_post_conversion = 0;
5230   str = make_buffer_string (BEG, Z, 1);
5231   return unbind_to (count, str);
5232 }
5233
5234 Lisp_Object
5235 decode_coding_string (str, coding, nocopy)
5236      Lisp_Object str;
5237      struct coding_system *coding;
5238      int nocopy;
5239 {
5240   int len;
5241   struct conversion_buffer buf;
5242   int from, to, to_byte;
5243   struct gcpro gcpro1;
5244   Lisp_Object saved_coding_symbol;
5245   int result;
5246   int require_decoding;
5247   int shrinked_bytes = 0;
5248   Lisp_Object newstr;
5249   int consumed, consumed_char, produced, produced_char;
5250
5251   from = 0;
5252   to = XSTRING (str)->size;
5253   to_byte = STRING_BYTES (XSTRING (str));
5254
5255   saved_coding_symbol = Qnil;
5256   if (CODING_REQUIRE_DETECTION (coding))
5257     {
5258       /* See the comments in code_convert_region.  */
5259       if (coding->type == coding_type_undecided)
5260         {
5261           detect_coding (coding, XSTRING (str)->data, to_byte);
5262           if (coding->type == coding_type_undecided)
5263             coding->type = coding_type_emacs_mule;
5264         }
5265       if (coding->eol_type == CODING_EOL_UNDECIDED
5266           && coding->type != coding_type_ccl)
5267         {
5268           saved_coding_symbol = coding->symbol;
5269           detect_eol (coding, XSTRING (str)->data, to_byte);
5270           if (coding->eol_type == CODING_EOL_UNDECIDED)
5271             coding->eol_type = CODING_EOL_LF;
5272           /* We had better recover the original eol format if we
5273              encounter an inconsitent eol format while decoding.  */
5274           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
5275         }
5276     }
5277
5278   coding->src_multibyte = 0;
5279   coding->dst_multibyte = (coding->type != coding_type_no_conversion
5280                            && coding->type != coding_type_raw_text);
5281   require_decoding = CODING_REQUIRE_DECODING (coding);
5282
5283   if (STRING_MULTIBYTE (str))
5284     {
5285       /* Decoding routines expect the source text to be unibyte.  */
5286       str = Fstring_as_unibyte (str);
5287       to_byte = STRING_BYTES (XSTRING (str));
5288       nocopy = 1;
5289     }
5290
5291   /* Try to skip the heading and tailing ASCIIs.  */
5292   if (require_decoding && coding->type != coding_type_ccl)
5293     {
5294       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
5295                                 0);
5296       if (from == to_byte)
5297         require_decoding = 0;
5298       shrinked_bytes = from + (STRING_BYTES (XSTRING (str)) - to_byte);
5299     }
5300
5301   if (!require_decoding)
5302     {
5303       coding->consumed = STRING_BYTES (XSTRING (str));
5304       coding->consumed_char = XSTRING (str)->size;
5305       if (coding->dst_multibyte)
5306         {
5307           str = Fstring_as_multibyte (str);
5308           nocopy = 1;
5309         }
5310       coding->produced = STRING_BYTES (XSTRING (str));
5311       coding->produced_char = XSTRING (str)->size;
5312       return (nocopy ? str : Fcopy_sequence (str));
5313     }
5314
5315   if (coding->composing != COMPOSITION_DISABLED)
5316     coding_allocate_composition_data (coding, from);
5317   len = decoding_buffer_size (coding, to_byte - from);
5318   allocate_conversion_buffer (buf, len);
5319
5320   consumed = consumed_char = produced = produced_char = 0;
5321   while (1)
5322     {
5323       result = decode_coding (coding, XSTRING (str)->data + from + consumed,
5324                               buf.data + produced, to_byte - from - consumed,
5325                               buf.size - produced);
5326       consumed += coding->consumed;
5327       consumed_char += coding->consumed_char;
5328       produced += coding->produced;
5329       produced_char += coding->produced_char;
5330       if (result == CODING_FINISH_NORMAL
5331           || (result == CODING_FINISH_INSUFFICIENT_SRC
5332               && coding->consumed == 0))
5333         break;
5334       if (result == CODING_FINISH_INSUFFICIENT_CMP)
5335         coding_allocate_composition_data (coding, from + produced_char);
5336       else if (result == CODING_FINISH_INSUFFICIENT_DST)
5337         extend_conversion_buffer (&buf);
5338       else if (result == CODING_FINISH_INCONSISTENT_EOL)
5339         {
5340           /* Recover the original EOL format.  */
5341           if (coding->eol_type == CODING_EOL_CR)
5342             {
5343               unsigned char *p;
5344               for (p = buf.data; p < buf.data + produced; p++)
5345                 if (*p == '\n') *p = '\r';
5346             }
5347           else if (coding->eol_type == CODING_EOL_CRLF)
5348             {
5349               int num_eol = 0;
5350               unsigned char *p0, *p1;
5351               for (p0 = buf.data, p1 = p0 + produced; p0 < p1; p0++)
5352                 if (*p0 == '\n') num_eol++;
5353               if (produced + num_eol >= buf.size)
5354                 extend_conversion_buffer (&buf);
5355               for (p0 = buf.data + produced, p1 = p0 + num_eol; p0 > buf.data;)
5356                 {
5357                   *--p1 = *--p0;
5358                   if (*p0 == '\n') *--p1 = '\r';
5359                 }
5360               produced += num_eol;
5361               produced_char += num_eol;
5362             }
5363           coding->eol_type = CODING_EOL_LF;
5364           coding->symbol = saved_coding_symbol;
5365         }
5366     }
5367
5368   coding->consumed = consumed;
5369   coding->consumed_char = consumed_char;
5370   coding->produced = produced;
5371   coding->produced_char = produced_char;
5372
5373   if (coding->dst_multibyte)
5374     newstr = make_uninit_multibyte_string (produced_char + shrinked_bytes,
5375                                            produced + shrinked_bytes);
5376   else
5377     newstr = make_uninit_string (produced + shrinked_bytes);
5378   if (from > 0)
5379     bcopy (XSTRING (str)->data, XSTRING (newstr)->data, from);
5380   bcopy (buf.data, XSTRING (newstr)->data + from, produced);
5381   if (shrinked_bytes > from)
5382     bcopy (XSTRING (str)->data + to_byte,
5383            XSTRING (newstr)->data + from + produced,
5384            shrinked_bytes - from);
5385   free_conversion_buffer (&buf);
5386
5387   if (coding->cmp_data && coding->cmp_data->used)
5388     coding_restore_composition (coding, newstr);
5389   coding_free_composition_data (coding);
5390
5391   if (SYMBOLP (coding->post_read_conversion)
5392       && !NILP (Ffboundp (coding->post_read_conversion)))
5393     newstr = run_pre_post_conversion_on_str (newstr, coding, 0);
5394
5395   return newstr;
5396 }
5397
5398 Lisp_Object
5399 encode_coding_string (str, coding, nocopy)
5400      Lisp_Object str;
5401      struct coding_system *coding;
5402      int nocopy;
5403 {
5404   int len;
5405   struct conversion_buffer buf;
5406   int from, to, to_byte;
5407   struct gcpro gcpro1;
5408   Lisp_Object saved_coding_symbol;
5409   int result;
5410   int shrinked_bytes = 0;
5411   Lisp_Object newstr;
5412   int consumed, consumed_char, produced, produced_char;
5413
5414   if (SYMBOLP (coding->pre_write_conversion)
5415       && !NILP (Ffboundp (coding->pre_write_conversion)))
5416     str = run_pre_post_conversion_on_str (str, coding, 1);
5417
5418   from = 0;
5419   to = XSTRING (str)->size;
5420   to_byte = STRING_BYTES (XSTRING (str));
5421
5422   saved_coding_symbol = Qnil;
5423
5424   /* Encoding routines determine the multibyteness of the source text
5425      by coding->src_multibyte.  */
5426   coding->src_multibyte = STRING_MULTIBYTE (str);
5427   coding->dst_multibyte = 0;
5428   if (! CODING_REQUIRE_ENCODING (coding))
5429     {
5430       coding->consumed = STRING_BYTES (XSTRING (str));
5431       coding->consumed_char = XSTRING (str)->size;
5432       if (STRING_MULTIBYTE (str))
5433         {
5434           str = Fstring_as_unibyte (str);
5435           nocopy = 1;
5436         }
5437       coding->produced = STRING_BYTES (XSTRING (str));
5438       coding->produced_char = XSTRING (str)->size;
5439       return (nocopy ? str : Fcopy_sequence (str));
5440     }
5441
5442   if (coding->composing != COMPOSITION_DISABLED)
5443     coding_save_composition (coding, from, to, str);
5444
5445   /* Try to skip the heading and tailing ASCIIs.  */
5446   if (coding->type != coding_type_ccl)
5447     {
5448       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
5449                                 1);
5450       if (from == to_byte)
5451         return (nocopy ? str : Fcopy_sequence (str));
5452       shrinked_bytes = from + (STRING_BYTES (XSTRING (str)) - to_byte);
5453     }
5454
5455   len = encoding_buffer_size (coding, to_byte - from);
5456   allocate_conversion_buffer (buf, len);
5457
5458   consumed = consumed_char = produced = produced_char = 0;
5459   while (1)
5460     {
5461       result = encode_coding (coding, XSTRING (str)->data + from + consumed,
5462                               buf.data + produced, to_byte - from - consumed,
5463                               buf.size - produced);
5464       consumed += coding->consumed;
5465       consumed_char += coding->consumed_char;
5466       produced += coding->produced;
5467       produced_char += coding->produced_char;
5468       if (result == CODING_FINISH_NORMAL
5469           || (result == CODING_FINISH_INSUFFICIENT_SRC
5470               && coding->consumed == 0))
5471         break;
5472       /* Now result should be CODING_FINISH_INSUFFICIENT_DST.  */
5473       extend_conversion_buffer (&buf);
5474     }
5475
5476   coding->consumed = consumed;
5477   coding->consumed_char = consumed_char;
5478   coding->produced = produced;
5479   coding->produced_char = produced_char;
5480
5481   newstr = make_uninit_string (produced + shrinked_bytes);
5482   if (from > 0)
5483     bcopy (XSTRING (str)->data, XSTRING (newstr)->data, from);
5484   bcopy (buf.data, XSTRING (newstr)->data + from, produced);
5485   if (shrinked_bytes > from)
5486     bcopy (XSTRING (str)->data + to_byte,
5487            XSTRING (newstr)->data + from + produced,
5488            shrinked_bytes - from);
5489
5490   free_conversion_buffer (&buf);
5491   coding_free_composition_data (coding);
5492
5493   return newstr;
5494 }
5495
5496 \f
5497 #ifdef emacs
5498 /*** 8. Emacs Lisp library functions ***/
5499
5500 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
5501   "Return t if OBJECT is nil or a coding-system.\n\
5502 See the documentation of `make-coding-system' for information\n\
5503 about coding-system objects.")
5504   (obj)
5505      Lisp_Object obj;
5506 {
5507   if (NILP (obj))
5508     return Qt;
5509   if (!SYMBOLP (obj))
5510     return Qnil;
5511   /* Get coding-spec vector for OBJ.  */
5512   obj = Fget (obj, Qcoding_system);
5513   return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
5514           ? Qt : Qnil);
5515 }
5516
5517 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
5518        Sread_non_nil_coding_system, 1, 1, 0,
5519   "Read a coding system from the minibuffer, prompting with string PROMPT.")
5520   (prompt)
5521      Lisp_Object prompt;
5522 {
5523   Lisp_Object val;
5524   do
5525     {
5526       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
5527                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
5528     }
5529   while (XSTRING (val)->size == 0);
5530   return (Fintern (val, Qnil));
5531 }
5532
5533 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
5534   "Read a coding system from the minibuffer, prompting with string PROMPT.\n\
5535 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.")
5536   (prompt, default_coding_system)
5537      Lisp_Object prompt, default_coding_system;
5538 {
5539   Lisp_Object val;
5540   if (SYMBOLP (default_coding_system))
5541     XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
5542   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
5543                           Qt, Qnil, Qcoding_system_history,
5544                           default_coding_system, Qnil);
5545   return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
5546 }
5547
5548 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
5549        1, 1, 0,
5550   "Check validity of CODING-SYSTEM.\n\
5551 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.\n\
5552 It is valid if it is a symbol with a non-nil `coding-system' property.\n\
5553 The value of property should be a vector of length 5.")
5554   (coding_system)
5555      Lisp_Object coding_system;
5556 {
5557   CHECK_SYMBOL (coding_system, 0);
5558   if (!NILP (Fcoding_system_p (coding_system)))
5559     return coding_system;
5560   while (1)
5561     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
5562 }
5563 \f
5564 Lisp_Object
5565 detect_coding_system (src, src_bytes, highest)
5566      unsigned char *src;
5567      int src_bytes, highest;
5568 {
5569   int coding_mask, eol_type;
5570   Lisp_Object val, tmp;
5571   int dummy;
5572
5573   coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy);
5574   eol_type  = detect_eol_type (src, src_bytes, &dummy);
5575   if (eol_type == CODING_EOL_INCONSISTENT)
5576     eol_type = CODING_EOL_UNDECIDED;
5577
5578   if (!coding_mask)
5579     {
5580       val = Qundecided;
5581       if (eol_type != CODING_EOL_UNDECIDED)
5582         {
5583           Lisp_Object val2;
5584           val2 = Fget (Qundecided, Qeol_type);
5585           if (VECTORP (val2))
5586             val = XVECTOR (val2)->contents[eol_type];
5587         }
5588       return (highest ? val : Fcons (val, Qnil));
5589     }
5590
5591   /* At first, gather possible coding systems in VAL.  */
5592   val = Qnil;
5593   for (tmp = Vcoding_category_list; CONSP (tmp); tmp = XCDR (tmp))
5594     {
5595       Lisp_Object category_val, category_index;
5596
5597       category_index = Fget (XCAR (tmp), Qcoding_category_index);
5598       category_val = Fsymbol_value (XCAR (tmp));
5599       if (!NILP (category_val)
5600           && NATNUMP (category_index)
5601           && (coding_mask & (1 << XFASTINT (category_index))))
5602         {
5603           val = Fcons (category_val, val);
5604           if (highest)
5605             break;
5606         }
5607     }
5608   if (!highest)
5609     val = Fnreverse (val);
5610
5611   /* Then, replace the elements with subsidiary coding systems.  */
5612   for (tmp = val; CONSP (tmp); tmp = XCDR (tmp))
5613     {
5614       if (eol_type != CODING_EOL_UNDECIDED
5615           && eol_type != CODING_EOL_INCONSISTENT)
5616         {
5617           Lisp_Object eol;
5618           eol = Fget (XCAR (tmp), Qeol_type);
5619           if (VECTORP (eol))
5620             XCAR (tmp) = XVECTOR (eol)->contents[eol_type];
5621         }
5622     }
5623   return (highest ? XCAR (val) : val);
5624 }
5625
5626 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
5627        2, 3, 0,
5628   "Detect coding system of the text in the region between START and END.\n\
5629 Return a list of possible coding systems ordered by priority.\n\
5630 \n\
5631 If only ASCII characters are found, it returns a list of single element\n\
5632 `undecided' or its subsidiary coding system according to a detected\n\
5633 end-of-line format.\n\
5634 \n\
5635 If optional argument HIGHEST is non-nil, return the coding system of\n\
5636 highest priority.")
5637   (start, end, highest)
5638      Lisp_Object start, end, highest;
5639 {
5640   int from, to;
5641   int from_byte, to_byte;
5642
5643   CHECK_NUMBER_COERCE_MARKER (start, 0);
5644   CHECK_NUMBER_COERCE_MARKER (end, 1);
5645
5646   validate_region (&start, &end);
5647   from = XINT (start), to = XINT (end);
5648   from_byte = CHAR_TO_BYTE (from);
5649   to_byte = CHAR_TO_BYTE (to);
5650
5651   if (from < GPT && to >= GPT)
5652     move_gap_both (to, to_byte);
5653
5654   return detect_coding_system (BYTE_POS_ADDR (from_byte),
5655                                to_byte - from_byte,
5656                                !NILP (highest));
5657 }
5658
5659 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
5660        1, 2, 0,
5661   "Detect coding system of the text in STRING.\n\
5662 Return a list of possible coding systems ordered by priority.\n\
5663 \n\
5664 If only ASCII characters are found, it returns a list of single element\n\
5665 `undecided' or its subsidiary coding system according to a detected\n\
5666 end-of-line format.\n\
5667 \n\
5668 If optional argument HIGHEST is non-nil, return the coding system of\n\
5669 highest priority.")
5670   (string, highest)
5671      Lisp_Object string, highest;
5672 {
5673   CHECK_STRING (string, 0);
5674
5675   return detect_coding_system (XSTRING (string)->data,
5676                                STRING_BYTES (XSTRING (string)),
5677                                !NILP (highest));
5678 }
5679
5680 /* Return an intersection of lists L1 and L2.  */
5681
5682 static Lisp_Object
5683 intersection (l1, l2)
5684      Lisp_Object l1, l2;
5685 {
5686   Lisp_Object val;
5687
5688   for (val = Qnil; CONSP (l1); l1 = XCDR (l1))
5689     {
5690       if (!NILP (Fmemq (XCAR (l1), l2)))
5691         val = Fcons (XCAR (l1), val);
5692     }
5693   return val;
5694 }
5695
5696
5697 /*  Subroutine for Fsafe_coding_systems_region_internal.
5698
5699     Return a list of coding systems that safely encode the multibyte
5700     text between P and PEND.  SAFE_CODINGS, if non-nil, is a list of
5701     possible coding systems.  If it is nil, it means that we have not
5702     yet found any coding systems.
5703
5704     WORK_TABLE is a copy of the char-table Vchar_coding_system_table.  An
5705     element of WORK_TABLE is set to t once the element is looked up.
5706
5707     If a non-ASCII single byte char is found, set
5708     *single_byte_char_found to 1.  */
5709
5710 static Lisp_Object
5711 find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found)
5712      unsigned char *p, *pend;
5713      Lisp_Object safe_codings, work_table;
5714      int *single_byte_char_found;
5715 {
5716   int c, len, idx;
5717   Lisp_Object val;
5718
5719   while (p < pend)
5720     {
5721       c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
5722       p += len;
5723       if (ASCII_BYTE_P (c))
5724         /* We can ignore ASCII characters here.  */
5725         continue;
5726       if (SINGLE_BYTE_CHAR_P (c))
5727         *single_byte_char_found = 1;
5728       if (NILP (safe_codings))
5729         continue;
5730       /* Check the safe coding systems for C.  */
5731       val = char_table_ref_and_index (work_table, c, &idx);
5732       if (EQ (val, Qt))
5733         /* This element was already checked.  Ignore it.  */
5734         continue;
5735       /* Remember that we checked this element.  */
5736       CHAR_TABLE_SET (work_table, make_number (idx), Qt);
5737
5738       /* If there are some safe coding systems for C and we have
5739          already found the other set of coding systems for the
5740          different characters, get the intersection of them.  */
5741       if (!EQ (safe_codings, Qt) && !NILP (val))
5742         val = intersection (safe_codings, val);
5743       safe_codings = val;
5744     }
5745   return safe_codings;
5746 }
5747
5748
5749 /* Return a list of coding systems that safely encode the text between
5750    START and END.  If the text contains only ASCII or is unibyte,
5751    return t.  */
5752
5753 DEFUN ("find-coding-systems-region-internal",
5754        Ffind_coding_systems_region_internal,
5755        Sfind_coding_systems_region_internal, 2, 2, 0,
5756   "Internal use only.")
5757   (start, end)
5758      Lisp_Object start, end;
5759 {
5760   Lisp_Object work_table, safe_codings;
5761   int non_ascii_p = 0;
5762   int single_byte_char_found = 0;
5763   unsigned char *p1, *p1end, *p2, *p2end, *p;
5764   Lisp_Object args[2];
5765
5766   if (STRINGP (start))
5767     {
5768       if (!STRING_MULTIBYTE (start))
5769         return Qt;
5770       p1 = XSTRING (start)->data, p1end = p1 + STRING_BYTES (XSTRING (start));
5771       p2 = p2end = p1end;
5772       if (XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
5773         non_ascii_p = 1;
5774     }
5775   else
5776     {
5777       int from, to, stop;
5778
5779       CHECK_NUMBER_COERCE_MARKER (start, 0);
5780       CHECK_NUMBER_COERCE_MARKER (end, 1);
5781       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
5782         args_out_of_range (start, end);
5783       if (NILP (current_buffer->enable_multibyte_characters))
5784         return Qt;
5785       from = CHAR_TO_BYTE (XINT (start));
5786       to = CHAR_TO_BYTE (XINT (end));
5787       stop = from < GPT_BYTE && GPT_BYTE < to ? GPT_BYTE : to;
5788       p1 = BYTE_POS_ADDR (from), p1end = p1 + (stop - from);
5789       if (stop == to)
5790         p2 = p2end = p1end;
5791       else
5792         p2 = BYTE_POS_ADDR (stop), p2end = p2 + (to - stop);
5793       if (XINT (end) - XINT (start) != to - from)
5794         non_ascii_p = 1;
5795     }
5796
5797   if (!non_ascii_p)
5798     {
5799       /* We are sure that the text contains no multibyte character.
5800          Check if it contains eight-bit-graphic.  */
5801       p = p1;
5802       for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++);
5803       if (p == p1end)
5804         {
5805           for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++);
5806           if (p == p2end)
5807             return Qt;
5808         }
5809     }
5810
5811   /* The text contains non-ASCII characters.  */
5812   work_table = Fcopy_sequence (Vchar_coding_system_table);
5813   safe_codings = find_safe_codings (p1, p1end, Qt, work_table,
5814                                     &single_byte_char_found);
5815   if (p2 < p2end)
5816     safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table,
5817                                       &single_byte_char_found);
5818
5819   if (!single_byte_char_found)
5820     {
5821       /* Append generic coding systems.  */
5822       Lisp_Object args[2];
5823       args[0] = safe_codings;
5824       args[1] = Fchar_table_extra_slot (Vchar_coding_system_table,
5825                                         make_number (0));
5826       safe_codings = Fappend (2, args);
5827     }
5828   else
5829     safe_codings = Fcons (Qraw_text, Fcons (Qemacs_mule, safe_codings));
5830   return safe_codings;
5831 }
5832
5833
5834 Lisp_Object
5835 code_convert_region1 (start, end, coding_system, encodep)
5836      Lisp_Object start, end, coding_system;
5837      int encodep;
5838 {
5839   struct coding_system coding;
5840   int from, to, len;
5841
5842   CHECK_NUMBER_COERCE_MARKER (start, 0);
5843   CHECK_NUMBER_COERCE_MARKER (end, 1);
5844   CHECK_SYMBOL (coding_system, 2);
5845
5846   validate_region (&start, &end);
5847   from = XFASTINT (start);
5848   to = XFASTINT (end);
5849
5850   if (NILP (coding_system))
5851     return make_number (to - from);
5852
5853   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
5854     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
5855
5856   coding.mode |= CODING_MODE_LAST_BLOCK;
5857   coding.src_multibyte = coding.dst_multibyte
5858     = !NILP (current_buffer->enable_multibyte_characters);
5859   code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
5860                        &coding, encodep, 1);
5861   Vlast_coding_system_used = coding.symbol;
5862   return make_number (coding.produced_char);
5863 }
5864
5865 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
5866        3, 3, "r\nzCoding system: ",
5867   "Decode the current region by specified coding system.\n\
5868 When called from a program, takes three arguments:\n\
5869 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
5870 This function sets `last-coding-system-used' to the precise coding system\n\
5871 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5872 not fully specified.)\n\
5873 It returns the length of the decoded text.")
5874   (start, end, coding_system)
5875      Lisp_Object start, end, coding_system;
5876 {
5877   return code_convert_region1 (start, end, coding_system, 0);
5878 }
5879
5880 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
5881        3, 3, "r\nzCoding system: ",
5882   "Encode the current region by specified coding system.\n\
5883 When called from a program, takes three arguments:\n\
5884 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
5885 This function sets `last-coding-system-used' to the precise coding system\n\
5886 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5887 not fully specified.)\n\
5888 It returns the length of the encoded text.")
5889   (start, end, coding_system)
5890      Lisp_Object start, end, coding_system;
5891 {
5892   return code_convert_region1 (start, end, coding_system, 1);
5893 }
5894
5895 Lisp_Object
5896 code_convert_string1 (string, coding_system, nocopy, encodep)
5897      Lisp_Object string, coding_system, nocopy;
5898      int encodep;
5899 {
5900   struct coding_system coding;
5901
5902   CHECK_STRING (string, 0);
5903   CHECK_SYMBOL (coding_system, 1);
5904
5905   if (NILP (coding_system))
5906     return (NILP (nocopy) ? Fcopy_sequence (string) : string);
5907
5908   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
5909     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
5910
5911   coding.mode |= CODING_MODE_LAST_BLOCK;
5912   string = (encodep
5913             ? encode_coding_string (string, &coding, !NILP (nocopy))
5914             : decode_coding_string (string, &coding, !NILP (nocopy)));
5915   Vlast_coding_system_used = coding.symbol;
5916
5917   return string;
5918 }
5919
5920 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
5921        2, 3, 0,
5922   "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
5923 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
5924 if the decoding operation is trivial.\n\
5925 This function sets `last-coding-system-used' to the precise coding system\n\
5926 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5927 not fully specified.)")
5928   (string, coding_system, nocopy)
5929      Lisp_Object string, coding_system, nocopy;
5930 {
5931   return code_convert_string1 (string, coding_system, nocopy, 0);
5932 }
5933
5934 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
5935        2, 3, 0,
5936   "Encode STRING to CODING-SYSTEM, and return the result.\n\
5937 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
5938 if the encoding operation is trivial.\n\
5939 This function sets `last-coding-system-used' to the precise coding system\n\
5940 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5941 not fully specified.)")
5942   (string, coding_system, nocopy)
5943      Lisp_Object string, coding_system, nocopy;
5944 {
5945   return code_convert_string1 (string, coding_system, nocopy, 1);
5946 }
5947
5948 /* Encode or decode STRING according to CODING_SYSTEM.
5949    Do not set Vlast_coding_system_used.
5950
5951    This function is called only from macros DECODE_FILE and
5952    ENCODE_FILE, thus we ignore character composition.  */
5953
5954 Lisp_Object
5955 code_convert_string_norecord (string, coding_system, encodep)
5956      Lisp_Object string, coding_system;
5957      int encodep;
5958 {
5959   struct coding_system coding;
5960
5961   CHECK_STRING (string, 0);
5962   CHECK_SYMBOL (coding_system, 1);
5963
5964   if (NILP (coding_system))
5965     return string;
5966
5967   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
5968     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
5969
5970   coding.composing = COMPOSITION_DISABLED;
5971   coding.mode |= CODING_MODE_LAST_BLOCK;
5972   return (encodep
5973           ? encode_coding_string (string, &coding, 1)
5974           : decode_coding_string (string, &coding, 1));
5975 }
5976 \f
5977 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
5978   "Decode a Japanese character which has CODE in shift_jis encoding.\n\
5979 Return the corresponding character.")
5980   (code)
5981      Lisp_Object code;
5982 {
5983   unsigned char c1, c2, s1, s2;
5984   Lisp_Object val;
5985
5986   CHECK_NUMBER (code, 0);
5987   s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
5988   if (s1 == 0)
5989     {
5990       if (s2 < 0x80)
5991         XSETFASTINT (val, s2);
5992       else if (s2 >= 0xA0 || s2 <= 0xDF)
5993         XSETFASTINT (val, MAKE_CHAR (charset_katakana_jisx0201, s2, 0));
5994       else
5995         error ("Invalid Shift JIS code: %x", XFASTINT (code));
5996     }
5997   else
5998     {
5999       if ((s1 < 0x80 || s1 > 0x9F && s1 < 0xE0 || s1 > 0xEF)
6000           || (s2 < 0x40 || s2 == 0x7F || s2 > 0xFC))
6001         error ("Invalid Shift JIS code: %x", XFASTINT (code));
6002       DECODE_SJIS (s1, s2, c1, c2);
6003       XSETFASTINT (val, MAKE_CHAR (charset_jisx0208, c1, c2));
6004     }
6005   return val;
6006 }
6007
6008 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
6009   "Encode a Japanese character CHAR to shift_jis encoding.\n\
6010 Return the corresponding code in SJIS.")
6011   (ch)
6012      Lisp_Object ch;
6013 {
6014   int charset, c1, c2, s1, s2;
6015   Lisp_Object val;
6016
6017   CHECK_NUMBER (ch, 0);
6018   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
6019   if (charset == CHARSET_ASCII)
6020     {
6021       val = ch;
6022     }
6023   else if (charset == charset_jisx0208
6024            && c1 > 0x20 && c1 < 0x7F && c2 > 0x20 && c2 < 0x7F)
6025     {
6026       ENCODE_SJIS (c1, c2, s1, s2);
6027       XSETFASTINT (val, (s1 << 8) | s2);
6028     }
6029   else if (charset == charset_katakana_jisx0201
6030            && c1 > 0x20 && c2 < 0xE0)
6031     {
6032       XSETFASTINT (val, c1 | 0x80);
6033     }
6034   else
6035     error ("Can't encode to shift_jis: %d", XFASTINT (ch));
6036   return val;
6037 }
6038
6039 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
6040   "Decode a Big5 character which has CODE in BIG5 coding system.\n\
6041 Return the corresponding character.")
6042   (code)
6043      Lisp_Object code;
6044 {
6045   int charset;
6046   unsigned char b1, b2, c1, c2;
6047   Lisp_Object val;
6048
6049   CHECK_NUMBER (code, 0);
6050   b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
6051   if (b1 == 0)
6052     {
6053       if (b2 >= 0x80)
6054         error ("Invalid BIG5 code: %x", XFASTINT (code));
6055       val = code;
6056     }
6057   else
6058     {
6059       if ((b1 < 0xA1 || b1 > 0xFE)
6060           || (b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE))
6061         error ("Invalid BIG5 code: %x", XFASTINT (code));
6062       DECODE_BIG5 (b1, b2, charset, c1, c2);
6063       XSETFASTINT (val, MAKE_CHAR (charset, c1, c2));
6064     }
6065   return val;
6066 }
6067
6068 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
6069   "Encode the Big5 character CHAR to BIG5 coding system.\n\
6070 Return the corresponding character code in Big5.")
6071   (ch)
6072      Lisp_Object ch;
6073 {
6074   int charset, c1, c2, b1, b2;
6075   Lisp_Object val;
6076
6077   CHECK_NUMBER (ch, 0);
6078   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
6079   if (charset == CHARSET_ASCII)
6080     {
6081       val = ch;
6082     }
6083   else if ((charset == charset_big5_1
6084             && (XFASTINT (ch) >= 0x250a1 && XFASTINT (ch) <= 0x271ec))
6085            || (charset == charset_big5_2
6086                && XFASTINT (ch) >= 0x290a1 && XFASTINT (ch) <= 0x2bdb2))
6087     {
6088       ENCODE_BIG5 (charset, c1, c2, b1, b2);
6089       XSETFASTINT (val, (b1 << 8) | b2);
6090     }
6091   else
6092     error ("Can't encode to Big5: %d", XFASTINT (ch));
6093   return val;
6094 }
6095 \f
6096 DEFUN ("set-terminal-coding-system-internal",
6097        Fset_terminal_coding_system_internal,
6098        Sset_terminal_coding_system_internal, 1, 1, 0, "")
6099   (coding_system)
6100      Lisp_Object coding_system;
6101 {
6102   CHECK_SYMBOL (coding_system, 0);
6103   setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
6104   /* We had better not send unsafe characters to terminal.  */
6105   terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
6106   /* Characer composition should be disabled.  */
6107   terminal_coding.composing = COMPOSITION_DISABLED;
6108   terminal_coding.src_multibyte = 1;
6109   terminal_coding.dst_multibyte = 0;
6110   return Qnil;
6111 }
6112
6113 DEFUN ("set-safe-terminal-coding-system-internal",
6114        Fset_safe_terminal_coding_system_internal,
6115        Sset_safe_terminal_coding_system_internal, 1, 1, 0, "")
6116   (coding_system)
6117      Lisp_Object coding_system;
6118 {
6119   CHECK_SYMBOL (coding_system, 0);
6120   setup_coding_system (Fcheck_coding_system (coding_system),
6121                        &safe_terminal_coding);
6122   /* Characer composition should be disabled.  */
6123   safe_terminal_coding.composing = COMPOSITION_DISABLED;
6124   safe_terminal_coding.src_multibyte = 1;
6125   safe_terminal_coding.dst_multibyte = 0;
6126   return Qnil;
6127 }
6128
6129 DEFUN ("terminal-coding-system",
6130        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
6131   "Return coding system specified for terminal output.")
6132   ()
6133 {
6134   return terminal_coding.symbol;
6135 }
6136
6137 DEFUN ("set-keyboard-coding-system-internal",
6138        Fset_keyboard_coding_system_internal,
6139        Sset_keyboard_coding_system_internal, 1, 1, 0, "")
6140   (coding_system)
6141      Lisp_Object coding_system;
6142 {
6143   CHECK_SYMBOL (coding_system, 0);
6144   setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
6145   /* Characer composition should be disabled.  */
6146   keyboard_coding.composing = COMPOSITION_DISABLED;
6147   return Qnil;
6148 }
6149
6150 DEFUN ("keyboard-coding-system",
6151        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
6152   "Return coding system specified for decoding keyboard input.")
6153   ()
6154 {
6155   return keyboard_coding.symbol;
6156 }
6157
6158 \f
6159 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
6160        Sfind_operation_coding_system,  1, MANY, 0,
6161   "Choose a coding system for an operation based on the target name.\n\
6162 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).\n\
6163 DECODING-SYSTEM is the coding system to use for decoding\n\
6164 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system\n\
6165 for encoding (in case OPERATION does encoding).\n\
6166 \n\
6167 The first argument OPERATION specifies an I/O primitive:\n\
6168   For file I/O, `insert-file-contents' or `write-region'.\n\
6169   For process I/O, `call-process', `call-process-region', or `start-process'.\n\
6170   For network I/O, `open-network-stream'.\n\
6171 \n\
6172 The remaining arguments should be the same arguments that were passed\n\
6173 to the primitive.  Depending on which primitive, one of those arguments\n\
6174 is selected as the TARGET.  For example, if OPERATION does file I/O,\n\
6175 whichever argument specifies the file name is TARGET.\n\
6176 \n\
6177 TARGET has a meaning which depends on OPERATION:\n\
6178   For file I/O, TARGET is a file name.\n\
6179   For process I/O, TARGET is a process name.\n\
6180   For network I/O, TARGET is a service name or a port number\n\
6181 \n\
6182 This function looks up what specified for TARGET in,\n\
6183 `file-coding-system-alist', `process-coding-system-alist',\n\
6184 or `network-coding-system-alist' depending on OPERATION.\n\
6185 They may specify a coding system, a cons of coding systems,\n\
6186 or a function symbol to call.\n\
6187 In the last case, we call the function with one argument,\n\
6188 which is a list of all the arguments given to this function.")
6189   (nargs, args)
6190      int nargs;
6191      Lisp_Object *args;
6192 {
6193   Lisp_Object operation, target_idx, target, val;
6194   register Lisp_Object chain;
6195
6196   if (nargs < 2)
6197     error ("Too few arguments");
6198   operation = args[0];
6199   if (!SYMBOLP (operation)
6200       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
6201     error ("Invalid first arguement");
6202   if (nargs < 1 + XINT (target_idx))
6203     error ("Too few arguments for operation: %s",
6204            XSYMBOL (operation)->name->data);
6205   target = args[XINT (target_idx) + 1];
6206   if (!(STRINGP (target)
6207         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
6208     error ("Invalid %dth argument", XINT (target_idx) + 1);
6209
6210   chain = ((EQ (operation, Qinsert_file_contents)
6211             || EQ (operation, Qwrite_region))
6212            ? Vfile_coding_system_alist
6213            : (EQ (operation, Qopen_network_stream)
6214               ? Vnetwork_coding_system_alist
6215               : Vprocess_coding_system_alist));
6216   if (NILP (chain))
6217     return Qnil;
6218
6219   for (; CONSP (chain); chain = XCDR (chain))
6220     {
6221       Lisp_Object elt;
6222       elt = XCAR (chain);
6223
6224       if (CONSP (elt)
6225           && ((STRINGP (target)
6226                && STRINGP (XCAR (elt))
6227                && fast_string_match (XCAR (elt), target) >= 0)
6228               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
6229         {
6230           val = XCDR (elt);
6231           /* Here, if VAL is both a valid coding system and a valid
6232              function symbol, we return VAL as a coding system.  */
6233           if (CONSP (val))
6234             return val;
6235           if (! SYMBOLP (val))
6236             return Qnil;
6237           if (! NILP (Fcoding_system_p (val)))
6238             return Fcons (val, val);
6239           if (! NILP (Ffboundp (val)))
6240             {
6241               val = call1 (val, Flist (nargs, args));
6242               if (CONSP (val))
6243                 return val;
6244               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
6245                 return Fcons (val, val);
6246             }
6247           return Qnil;
6248         }
6249     }
6250   return Qnil;
6251 }
6252
6253 DEFUN ("update-coding-systems-internal",  Fupdate_coding_systems_internal,
6254        Supdate_coding_systems_internal, 0, 0, 0,
6255   "Update internal database for ISO2022 and CCL based coding systems.\n\
6256 When values of any coding categories are changed, you must\n\
6257 call this function")
6258   ()
6259 {
6260   int i;
6261
6262   for (i = CODING_CATEGORY_IDX_EMACS_MULE; i < CODING_CATEGORY_IDX_MAX; i++)
6263     {
6264       Lisp_Object val;
6265
6266       val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[i])->value;
6267       if (!NILP (val))
6268         {
6269           if (! coding_system_table[i])
6270             coding_system_table[i] = ((struct coding_system *)
6271                                       xmalloc (sizeof (struct coding_system)));
6272           setup_coding_system (val, coding_system_table[i]);
6273         }
6274       else if (coding_system_table[i])
6275         {
6276           xfree (coding_system_table[i]);
6277           coding_system_table[i] = NULL;
6278         }
6279     }
6280
6281   return Qnil;
6282 }
6283
6284 DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal,
6285        Sset_coding_priority_internal, 0, 0, 0,
6286   "Update internal database for the current value of `coding-category-list'.\n\
6287 This function is internal use only.")
6288   ()
6289 {
6290   int i = 0, idx;
6291   Lisp_Object val;
6292
6293   val = Vcoding_category_list;
6294
6295   while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
6296     {
6297       if (! SYMBOLP (XCAR (val)))
6298         break;
6299       idx = XFASTINT (Fget (XCAR (val), Qcoding_category_index));
6300       if (idx >= CODING_CATEGORY_IDX_MAX)
6301         break;
6302       coding_priorities[i++] = (1 << idx);
6303       val = XCDR (val);
6304     }
6305   /* If coding-category-list is valid and contains all coding
6306      categories, `i' should be CODING_CATEGORY_IDX_MAX now.  If not,
6307      the following code saves Emacs from crashing.  */
6308   while (i < CODING_CATEGORY_IDX_MAX)
6309     coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
6310
6311   return Qnil;
6312 }
6313
6314 #endif /* emacs */
6315
6316 \f
6317 /*** 9. Post-amble ***/
6318
6319 void
6320 init_coding_once ()
6321 {
6322   int i;
6323
6324   /* Emacs' internal format specific initialize routine.  */
6325   for (i = 0; i <= 0x20; i++)
6326     emacs_code_class[i] = EMACS_control_code;
6327   emacs_code_class[0x0A] = EMACS_linefeed_code;
6328   emacs_code_class[0x0D] = EMACS_carriage_return_code;
6329   for (i = 0x21 ; i < 0x7F; i++)
6330     emacs_code_class[i] = EMACS_ascii_code;
6331   emacs_code_class[0x7F] = EMACS_control_code;
6332   for (i = 0x80; i < 0xFF; i++)
6333     emacs_code_class[i] = EMACS_invalid_code;
6334   emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
6335   emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
6336   emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
6337   emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
6338
6339   /* ISO2022 specific initialize routine.  */
6340   for (i = 0; i < 0x20; i++)
6341     iso_code_class[i] = ISO_control_0;
6342   for (i = 0x21; i < 0x7F; i++)
6343     iso_code_class[i] = ISO_graphic_plane_0;
6344   for (i = 0x80; i < 0xA0; i++)
6345     iso_code_class[i] = ISO_control_1;
6346   for (i = 0xA1; i < 0xFF; i++)
6347     iso_code_class[i] = ISO_graphic_plane_1;
6348   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
6349   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
6350   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
6351   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
6352   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
6353   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
6354   iso_code_class[ISO_CODE_ESC] = ISO_escape;
6355   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
6356   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
6357   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
6358
6359   setup_coding_system (Qnil, &keyboard_coding);
6360   setup_coding_system (Qnil, &terminal_coding);
6361   setup_coding_system (Qnil, &safe_terminal_coding);
6362   setup_coding_system (Qnil, &default_buffer_file_coding);
6363
6364   bzero (coding_system_table, sizeof coding_system_table);
6365
6366   bzero (ascii_skip_code, sizeof ascii_skip_code);
6367   for (i = 0; i < 128; i++)
6368     ascii_skip_code[i] = 1;
6369
6370 #if defined (MSDOS) || defined (WINDOWSNT)
6371   system_eol_type = CODING_EOL_CRLF;
6372 #else
6373   system_eol_type = CODING_EOL_LF;
6374 #endif
6375
6376   inhibit_pre_post_conversion = 0;
6377 }
6378
6379 #ifdef emacs
6380
6381 void
6382 syms_of_coding ()
6383 {
6384   Qtarget_idx = intern ("target-idx");
6385   staticpro (&Qtarget_idx);
6386
6387   Qcoding_system_history = intern ("coding-system-history");
6388   staticpro (&Qcoding_system_history);
6389   Fset (Qcoding_system_history, Qnil);
6390
6391   /* Target FILENAME is the first argument.  */
6392   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
6393   /* Target FILENAME is the third argument.  */
6394   Fput (Qwrite_region, Qtarget_idx, make_number (2));
6395
6396   Qcall_process = intern ("call-process");
6397   staticpro (&Qcall_process);
6398   /* Target PROGRAM is the first argument.  */
6399   Fput (Qcall_process, Qtarget_idx, make_number (0));
6400
6401   Qcall_process_region = intern ("call-process-region");
6402   staticpro (&Qcall_process_region);
6403   /* Target PROGRAM is the third argument.  */
6404   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
6405
6406   Qstart_process = intern ("start-process");
6407   staticpro (&Qstart_process);
6408   /* Target PROGRAM is the third argument.  */
6409   Fput (Qstart_process, Qtarget_idx, make_number (2));
6410
6411   Qopen_network_stream = intern ("open-network-stream");
6412   staticpro (&Qopen_network_stream);
6413   /* Target SERVICE is the fourth argument.  */
6414   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
6415
6416   Qcoding_system = intern ("coding-system");
6417   staticpro (&Qcoding_system);
6418
6419   Qeol_type = intern ("eol-type");
6420   staticpro (&Qeol_type);
6421
6422   Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
6423   staticpro (&Qbuffer_file_coding_system);
6424
6425   Qpost_read_conversion = intern ("post-read-conversion");
6426   staticpro (&Qpost_read_conversion);
6427
6428   Qpre_write_conversion = intern ("pre-write-conversion");
6429   staticpro (&Qpre_write_conversion);
6430
6431   Qno_conversion = intern ("no-conversion");
6432   staticpro (&Qno_conversion);
6433
6434   Qundecided = intern ("undecided");
6435   staticpro (&Qundecided);
6436
6437   Qcoding_system_p = intern ("coding-system-p");
6438   staticpro (&Qcoding_system_p);
6439
6440   Qcoding_system_error = intern ("coding-system-error");
6441   staticpro (&Qcoding_system_error);
6442
6443   Fput (Qcoding_system_error, Qerror_conditions,
6444         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
6445   Fput (Qcoding_system_error, Qerror_message,
6446         build_string ("Invalid coding system"));
6447
6448   Qcoding_category = intern ("coding-category");
6449   staticpro (&Qcoding_category);
6450   Qcoding_category_index = intern ("coding-category-index");
6451   staticpro (&Qcoding_category_index);
6452
6453   Vcoding_category_table
6454     = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
6455   staticpro (&Vcoding_category_table);
6456   {
6457     int i;
6458     for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
6459       {
6460         XVECTOR (Vcoding_category_table)->contents[i]
6461           = intern (coding_category_name[i]);
6462         Fput (XVECTOR (Vcoding_category_table)->contents[i],
6463               Qcoding_category_index, make_number (i));
6464       }
6465   }
6466
6467   Qtranslation_table = intern ("translation-table");
6468   staticpro (&Qtranslation_table);
6469   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
6470
6471   Qtranslation_table_id = intern ("translation-table-id");
6472   staticpro (&Qtranslation_table_id);
6473
6474   Qtranslation_table_for_decode = intern ("translation-table-for-decode");
6475   staticpro (&Qtranslation_table_for_decode);
6476
6477   Qtranslation_table_for_encode = intern ("translation-table-for-encode");
6478   staticpro (&Qtranslation_table_for_encode);
6479
6480   Qsafe_chars = intern ("safe-chars");
6481   staticpro (&Qsafe_chars);
6482
6483   Qchar_coding_system = intern ("char-coding-system");
6484   staticpro (&Qchar_coding_system);
6485
6486   /* Intern this now in case it isn't already done.
6487      Setting this variable twice is harmless.
6488      But don't staticpro it here--that is done in alloc.c.  */
6489   Qchar_table_extra_slots = intern ("char-table-extra-slots");
6490   Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0));
6491   Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (1));
6492
6493   Qvalid_codes = intern ("valid-codes");
6494   staticpro (&Qvalid_codes);
6495
6496   Qemacs_mule = intern ("emacs-mule");
6497   staticpro (&Qemacs_mule);
6498
6499   Qraw_text = intern ("raw-text");
6500   staticpro (&Qraw_text);
6501
6502   defsubr (&Scoding_system_p);
6503   defsubr (&Sread_coding_system);
6504   defsubr (&Sread_non_nil_coding_system);
6505   defsubr (&Scheck_coding_system);
6506   defsubr (&Sdetect_coding_region);
6507   defsubr (&Sdetect_coding_string);
6508   defsubr (&Sfind_coding_systems_region_internal);
6509   defsubr (&Sdecode_coding_region);
6510   defsubr (&Sencode_coding_region);
6511   defsubr (&Sdecode_coding_string);
6512   defsubr (&Sencode_coding_string);
6513   defsubr (&Sdecode_sjis_char);
6514   defsubr (&Sencode_sjis_char);
6515   defsubr (&Sdecode_big5_char);
6516   defsubr (&Sencode_big5_char);
6517   defsubr (&Sset_terminal_coding_system_internal);
6518   defsubr (&Sset_safe_terminal_coding_system_internal);
6519   defsubr (&Sterminal_coding_system);
6520   defsubr (&Sset_keyboard_coding_system_internal);
6521   defsubr (&Skeyboard_coding_system);
6522   defsubr (&Sfind_operation_coding_system);
6523   defsubr (&Supdate_coding_systems_internal);
6524   defsubr (&Sset_coding_priority_internal);
6525
6526   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
6527     "List of coding systems.\n\
6528 \n\
6529 Do not alter the value of this variable manually.  This variable should be\n\
6530 updated by the functions `make-coding-system' and\n\
6531 `define-coding-system-alias'.");
6532   Vcoding_system_list = Qnil;
6533
6534   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
6535     "Alist of coding system names.\n\
6536 Each element is one element list of coding system name.\n\
6537 This variable is given to `completing-read' as TABLE argument.\n\
6538 \n\
6539 Do not alter the value of this variable manually.  This variable should be\n\
6540 updated by the functions `make-coding-system' and\n\
6541 `define-coding-system-alias'.");
6542   Vcoding_system_alist = Qnil;
6543
6544   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
6545     "List of coding-categories (symbols) ordered by priority.");
6546   {
6547     int i;
6548
6549     Vcoding_category_list = Qnil;
6550     for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
6551       Vcoding_category_list
6552         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
6553                  Vcoding_category_list);
6554   }
6555
6556   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
6557     "Specify the coding system for read operations.\n\
6558 It is useful to bind this variable with `let', but do not set it globally.\n\
6559 If the value is a coding system, it is used for decoding on read operation.\n\
6560 If not, an appropriate element is used from one of the coding system alists:\n\
6561 There are three such tables, `file-coding-system-alist',\n\
6562 `process-coding-system-alist', and `network-coding-system-alist'.");
6563   Vcoding_system_for_read = Qnil;
6564
6565   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
6566     "Specify the coding system for write operations.\n\
6567 Programs bind this variable with `let', but you should not set it globally.\n\
6568 If the value is a coding system, it is used for encoding of output,\n\
6569 when writing it to a file and when sending it to a file or subprocess.\n\
6570 \n\
6571 If this does not specify a coding system, an appropriate element\n\
6572 is used from one of the coding system alists:\n\
6573 There are three such tables, `file-coding-system-alist',\n\
6574 `process-coding-system-alist', and `network-coding-system-alist'.\n\
6575 For output to files, if the above procedure does not specify a coding system,\n\
6576 the value of `buffer-file-coding-system' is used.");
6577   Vcoding_system_for_write = Qnil;
6578
6579   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
6580     "Coding system used in the latest file or process I/O.");
6581   Vlast_coding_system_used = Qnil;
6582
6583   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
6584     "*Non-nil means always inhibit code conversion of end-of-line format.\n\
6585 See info node `Coding Systems' and info node `Text and Binary' concerning\n\
6586 such conversion.");
6587   inhibit_eol_conversion = 0;
6588
6589   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
6590     "Non-nil means process buffer inherits coding system of process output.\n\
6591 Bind it to t if the process output is to be treated as if it were a file\n\
6592 read from some filesystem.");
6593   inherit_process_coding_system = 0;
6594
6595   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
6596     "Alist to decide a coding system to use for a file I/O operation.\n\
6597 The format is ((PATTERN . VAL) ...),\n\
6598 where PATTERN is a regular expression matching a file name,\n\
6599 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
6600 If VAL is a coding system, it is used for both decoding and encoding\n\
6601 the file contents.\n\
6602 If VAL is a cons of coding systems, the car part is used for decoding,\n\
6603 and the cdr part is used for encoding.\n\
6604 If VAL is a function symbol, the function must return a coding system\n\
6605 or a cons of coding systems which are used as above.\n\
6606 \n\
6607 See also the function `find-operation-coding-system'\n\
6608 and the variable `auto-coding-alist'.");
6609   Vfile_coding_system_alist = Qnil;
6610
6611   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
6612     "Alist to decide a coding system to use for a process I/O operation.\n\
6613 The format is ((PATTERN . VAL) ...),\n\
6614 where PATTERN is a regular expression matching a program name,\n\
6615 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
6616 If VAL is a coding system, it is used for both decoding what received\n\
6617 from the program and encoding what sent to the program.\n\
6618 If VAL is a cons of coding systems, the car part is used for decoding,\n\
6619 and the cdr part is used for encoding.\n\
6620 If VAL is a function symbol, the function must return a coding system\n\
6621 or a cons of coding systems which are used as above.\n\
6622 \n\
6623 See also the function `find-operation-coding-system'.");
6624   Vprocess_coding_system_alist = Qnil;
6625
6626   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
6627     "Alist to decide a coding system to use for a network I/O operation.\n\
6628 The format is ((PATTERN . VAL) ...),\n\
6629 where PATTERN is a regular expression matching a network service name\n\
6630 or is a port number to connect to,\n\
6631 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
6632 If VAL is a coding system, it is used for both decoding what received\n\
6633 from the network stream and encoding what sent to the network stream.\n\
6634 If VAL is a cons of coding systems, the car part is used for decoding,\n\
6635 and the cdr part is used for encoding.\n\
6636 If VAL is a function symbol, the function must return a coding system\n\
6637 or a cons of coding systems which are used as above.\n\
6638 \n\
6639 See also the function `find-operation-coding-system'.");
6640   Vnetwork_coding_system_alist = Qnil;
6641
6642   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
6643     "Coding system to use with system messages.");
6644   Vlocale_coding_system = Qnil;
6645
6646   /* The eol mnemonics are reset in startup.el system-dependently.  */
6647   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
6648     "*String displayed in mode line for UNIX-like (LF) end-of-line format.");
6649   eol_mnemonic_unix = build_string (":");
6650
6651   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
6652     "*String displayed in mode line for DOS-like (CRLF) end-of-line format.");
6653   eol_mnemonic_dos = build_string ("\\");
6654
6655   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
6656     "*String displayed in mode line for MAC-like (CR) end-of-line format.");
6657   eol_mnemonic_mac = build_string ("/");
6658
6659   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
6660     "*String displayed in mode line when end-of-line format is not yet determined.");
6661   eol_mnemonic_undecided = build_string (":");
6662
6663   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
6664     "*Non-nil enables character translation while encoding and decoding.");
6665   Venable_character_translation = Qt;
6666
6667   DEFVAR_LISP ("standard-translation-table-for-decode",
6668     &Vstandard_translation_table_for_decode,
6669     "Table for translating characters while decoding.");
6670   Vstandard_translation_table_for_decode = Qnil;
6671
6672   DEFVAR_LISP ("standard-translation-table-for-encode",
6673     &Vstandard_translation_table_for_encode,
6674     "Table for translationg characters while encoding.");
6675   Vstandard_translation_table_for_encode = Qnil;
6676
6677   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
6678     "Alist of charsets vs revision numbers.\n\
6679 While encoding, if a charset (car part of an element) is found,\n\
6680 designate it with the escape sequence identifing revision (cdr part of the element).");
6681   Vcharset_revision_alist = Qnil;
6682
6683   DEFVAR_LISP ("default-process-coding-system",
6684                &Vdefault_process_coding_system,
6685     "Cons of coding systems used for process I/O by default.\n\
6686 The car part is used for decoding a process output,\n\
6687 the cdr part is used for encoding a text to be sent to a process.");
6688   Vdefault_process_coding_system = Qnil;
6689
6690   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
6691     "Table of extra Latin codes in the range 128..159 (inclusive).\n\
6692 This is a vector of length 256.\n\
6693 If Nth element is non-nil, the existence of code N in a file\n\
6694 \(or output of subprocess) doesn't prevent it to be detected as\n\
6695 a coding system of ISO 2022 variant which has a flag\n\
6696 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file\n\
6697 or reading output of a subprocess.\n\
6698 Only 128th through 159th elements has a meaning.");
6699   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
6700
6701   DEFVAR_LISP ("select-safe-coding-system-function",
6702                &Vselect_safe_coding_system_function,
6703     "Function to call to select safe coding system for encoding a text.\n\
6704 \n\
6705 If set, this function is called to force a user to select a proper\n\
6706 coding system which can encode the text in the case that a default\n\
6707 coding system used in each operation can't encode the text.\n\
6708 \n\
6709 The default value is `select-safe-coding-system' (which see).");
6710   Vselect_safe_coding_system_function = Qnil;
6711
6712   DEFVAR_LISP ("char-coding-system-table", &Vchar_coding_system_table,
6713     "Char-table containing safe coding systems of each characters.\n\
6714 Each element doesn't include such generic coding systems that can\n\
6715 encode any characters.   They are in the first extra slot.");
6716   Vchar_coding_system_table = Fmake_char_table (Qchar_coding_system, Qnil);
6717
6718   DEFVAR_BOOL ("inhibit-iso-escape-detection",
6719                &inhibit_iso_escape_detection,
6720     "If non-nil, Emacs ignores ISO2022's escape sequence on code detection.\n\
6721 \n\
6722 By default, on reading a file, Emacs tries to detect how the text is\n\
6723 encoded.  This code detection is sensitive to escape sequences.  If\n\
6724 the sequence is valid as ISO2022, the code is determined as one of\n\
6725 the ISO2022 encodings, and the file is decoded by the corresponding\n\
6726 coding system (e.g. `iso-2022-7bit').\n\
6727 \n\
6728 However, there may be a case that you want to read escape sequences in\n\
6729 a file as is.  In such a case, you can set this variable to non-nil.\n\
6730 Then, as the code detection ignores any escape sequences, no file is\n\
6731 detected as encoded in some ISO2022 encoding.  The result is that all\n\
6732 escape sequences become visible in a buffer.\n\
6733 \n\
6734 The default value is nil, and it is strongly recommended not to change\n\
6735 it.  That is because many Emacs Lisp source files that contain\n\
6736 non-ASCII characters are encoded by the coding system `iso-2022-7bit'\n\
6737 in Emacs's distribution, and they won't be decoded correctly on\n\
6738 reading if you suppress escape sequence detection.\n\
6739 \n\
6740 The other way to read escape sequences in a file without decoding is\n\
6741 to explicitly specify some coding system that doesn't use ISO2022's\n\
6742 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].");
6743   inhibit_iso_escape_detection = 0;
6744 }
6745
6746 char *
6747 emacs_strerror (error_number)
6748      int error_number;
6749 {
6750   char *str;
6751
6752   synchronize_system_messages_locale ();
6753   str = strerror (error_number);
6754
6755   if (! NILP (Vlocale_coding_system))
6756     {
6757       Lisp_Object dec = code_convert_string_norecord (build_string (str),
6758                                                       Vlocale_coding_system,
6759                                                       0);
6760       str = (char *) XSTRING (dec)->data;
6761     }
6762
6763   return str;
6764 }
6765
6766 #endif /* emacs */
6767