src/coding.c

   1 /* Coding system handler (conversion, detection, and etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /*** TABLE OF CONTENTS ***
  23
  24   0. General comments
  25   1. Preamble
  26   2. Emacs' internal format (emacs-mule) handlers
  27   3. ISO2022 handlers
  28   4. Shift-JIS and BIG5 handlers
  29   5. CCL handlers
  30   6. End-of-line handlers
  31   7. C library functions
  32   8. Emacs Lisp library functions
  33   9. Post-amble
  34
  35 */
  36
  37 /*** 0. General comments ***/
  38
  39
  40 /*** GENERAL NOTE on CODING SYSTEM ***
  41
  42   Coding system is an encoding mechanism of one or more character
  43   sets.  Here's a list of coding systems which Emacs can handle.  When
  44   we say "decode", it means converting some other coding system to
  45   Emacs' internal format (emacs-internal), and when we say "encode",
  46   it means converting the coding system emacs-mule to some other
  47   coding system.
  48
  49   0. Emacs' internal format (emacs-mule)
  50
  51   Emacs itself holds a multi-lingual character in a buffer and a string
  52   in a special format.  Details are described in section 2.
  53
  54   1. ISO2022
  55
  56   The most famous coding system for multiple character sets.  X's
  57   Compound Text, various EUCs (Extended Unix Code), and coding
  58   systems used in Internet communication such as ISO-2022-JP are
  59   all variants of ISO2022.  Details are described in section 3.
  60
  61   2. SJIS (or Shift-JIS or MS-Kanji-Code)
  62
  63   A coding system to encode character sets: ASCII, JISX0201, and
  64   JISX0208.  Widely used for PC's in Japan.  Details are described in
  65   section 4.
  66
  67   3. BIG5
  68
  69   A coding system to encode character sets: ASCII and Big5.  Widely
  70   used by Chinese (mainly in Taiwan and Hong Kong).  Details are
  71   described in section 4.  In this file, when we write "BIG5"
  72   (all uppercase), we mean the coding system, and when we write
  73   "Big5" (capitalized), we mean the character set.
  74
  75   4. Raw text
  76
  77   A coding system for a text containing random 8-bit code.  Emacs does
  78   no code conversion on such a text except for end-of-line format.
  79
  80   5. Other
  81
  82   If a user wants to read/write a text encoded in a coding system not
  83   listed above, he can supply a decoder and an encoder for it in CCL
  84   (Code Conversion Language) programs.  Emacs executes the CCL program
  85   while reading/writing.
  86
  87   Emacs represents a coding system by a Lisp symbol that has a property
  88   `coding-system'.  But, before actually using the coding system, the
  89   information about it is set in a structure of type `struct
  90   coding_system' for rapid processing.  See section 6 for more details.
  91
  92 */
  93
  94 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
  95
  96   How end-of-line of a text is encoded depends on a system.  For
  97   instance, Unix's format is just one byte of `line-feed' code,
  98   whereas DOS's format is two-byte sequence of `carriage-return' and
  99   `line-feed' codes.  MacOS's format is usually one byte of
 100   `carriage-return'.
 101
 102   Since text characters encoding and end-of-line encoding are
 103   independent, any coding system described above can take
 104   any format of end-of-line.  So, Emacs has information of format of
 105   end-of-line in each coding-system.  See section 6 for more details.
 106
 107 */
 108
 109 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 110
 111   These functions check if a text between SRC and SRC_END is encoded
 112   in the coding system category XXX.  Each returns an integer value in
 113   which appropriate flag bits for the category XXX is set.  The flag
 114   bits are defined in macros CODING_CATEGORY_MASK_XXX.  Below is the
 115   template of these functions.  */
 116 #if 0
 117 int
 118 detect_coding_emacs_mule (src, src_end)
 119      unsigned char *src, *src_end;
 120 {
 121   ...
 122 }
 123 #endif
 124
 125 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 126
 127   These functions decode SRC_BYTES length of unibyte text at SOURCE
 128   encoded in CODING to Emacs' internal format.  The resulting
 129   multibyte text goes to a place pointed to by DESTINATION, the length
 130   of which should not exceed DST_BYTES.
 131
 132   These functions set the information of original and decoded texts in
 133   the members produced, produced_char, consumed, and consumed_char of
 134   the structure *CODING.  They also set the member result to one of
 135   CODING_FINISH_XXX indicating how the decoding finished.
 136
 137   DST_BYTES zero means that source area and destination area are
 138   overlapped, which means that we can produce a decoded text until it
 139   reaches at the head of not-yet-decoded source text.
 140
 141   Below is a template of these functions.  */
 142 #if 0
 143 static void
 144 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 145      struct coding_system *coding;
 146      unsigned char *source, *destination;
 147      int src_bytes, dst_bytes;
 148 {
 149   ...
 150 }
 151 #endif
 152
 153 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 154
 155   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 156   internal multibyte format to CODING.  The resulting unibyte text
 157   goes to a place pointed to by DESTINATION, the length of which
 158   should not exceed DST_BYTES.
 159
 160   These functions set the information of original and encoded texts in
 161   the members produced, produced_char, consumed, and consumed_char of
 162   the structure *CODING.  They also set the member result to one of
 163   CODING_FINISH_XXX indicating how the encoding finished.
 164
 165   DST_BYTES zero means that source area and destination area are
 166   overlapped, which means that we can produce a encoded text until it
 167   reaches at the head of not-yet-encoded source text.
 168
 169   Below is a template of these functions.  */
 170 #if 0
 171 static void
 172 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 173      struct coding_system *coding;
 174      unsigned char *source, *destination;
 175      int src_bytes, dst_bytes;
 176 {
 177   ...
 178 }
 179 #endif
 180
 181 /*** COMMONLY USED MACROS ***/
 182
 183 /* The following two macros ONE_MORE_BYTE and TWO_MORE_BYTES safely
 184    get one, two, and three bytes from the source text respectively.
 185    If there are not enough bytes in the source, they jump to
 186    `label_end_of_loop'.  The caller should set variables `coding',
 187    `src' and `src_end' to appropriate pointer in advance.  These
 188    macros are called from decoding routines `decode_coding_XXX', thus
 189    it is assumed that the source text is unibyte.  */
 190
 191 #define ONE_MORE_BYTE(c1)                                       \
 192   do {                                                          \
 193     if (src >= src_end)                                         \
 194       {                                                         \
 195         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 196         goto label_end_of_loop;                                 \
 197       }                                                         \
 198     c1 = *src++;                                                \
 199   } while (0)
 200
 201 #define TWO_MORE_BYTES(c1, c2)                                  \
 202   do {                                                          \
 203     if (src + 1 >= src_end)                                     \
 204       {                                                         \
 205         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 206         goto label_end_of_loop;                                 \
 207       }                                                         \
 208     c1 = *src++;                                                \
 209     c2 = *src++;                                                \
 210   } while (0)
 211
 212
 213 /* Set C to the next character at the source text pointed by `src'.
 214    If there are not enough characters in the source, jump to
 215    `label_end_of_loop'.  The caller should set variables `coding'
 216    `src', `src_end', and `translation_table' to appropriate pointers
 217    in advance.  This macro is used in encoding routines
 218    `encode_coding_XXX', thus it assumes that the source text is in
 219    multibyte form except for 8-bit characters.  8-bit characters are
 220    in multibyte form if coding->src_multibyte is nonzero, else they
 221    are represented by a single byte.  */
 222
 223 #define ONE_MORE_CHAR(c)                                        \
 224   do {                                                          \
 225     int len = src_end - src;                                    \
 226     int bytes;                                                  \
 227     if (len <= 0)                                               \
 228       {                                                         \
 229         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 230         goto label_end_of_loop;                                 \
 231       }                                                         \
 232     if (coding->src_multibyte                                   \
 233         || UNIBYTE_STR_AS_MULTIBYTE_P (src, len, bytes))        \
 234       c = STRING_CHAR_AND_LENGTH (src, len, bytes);             \
 235     else                                                        \
 236       c = *src, bytes = 1;                                      \
 237     if (!NILP (translation_table))                              \
 238       c = translate_char (translation_table, c, -1, 0, 0);      \
 239     src += bytes;                                               \
 240   } while (0)
 241
 242
 243 /* Produce a multibyte form of characater C to `dst'.  Jump to
 244    `label_end_of_loop' if there's not enough space at `dst'.
 245
 246    If we are now in the middle of composition sequence, the decoded
 247    character may be ALTCHAR (for the current composition).  In that
 248    case, the character goes to coding->cmp_data->data instead of
 249    `dst'.
 250
 251    This macro is used in decoding routines.  */
 252
 253 #define EMIT_CHAR(c)                                                    \
 254   do {                                                                  \
 255     if (! COMPOSING_P (coding)                                          \
 256         || coding->composing == COMPOSITION_RELATIVE                    \
 257         || coding->composing == COMPOSITION_WITH_RULE)                  \
 258       {                                                                 \
 259         int bytes = CHAR_BYTES (c);                                     \
 260         if ((dst + bytes) > (dst_bytes ? dst_end : src))                \
 261           {                                                             \
 262             coding->result = CODING_FINISH_INSUFFICIENT_DST;            \
 263             goto label_end_of_loop;                                     \
 264           }                                                             \
 265         dst += CHAR_STRING (c, dst);                                    \
 266         coding->produced_char++;                                        \
 267       }                                                                 \
 268                                                                         \
 269     if (COMPOSING_P (coding)                                            \
 270         && coding->composing != COMPOSITION_RELATIVE)                   \
 271       {                                                                 \
 272         CODING_ADD_COMPOSITION_COMPONENT (coding, c);                   \
 273         coding->composition_rule_follows                                \
 274           = coding->composing != COMPOSITION_WITH_ALTCHARS;             \
 275       }                                                                 \
 276   } while (0)
 277
 278
 279 #define EMIT_ONE_BYTE(c)                                        \
 280   do {                                                          \
 281     if (dst >= (dst_bytes ? dst_end : src))                     \
 282       {                                                         \
 283         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 284         goto label_end_of_loop;                                 \
 285       }                                                         \
 286     *dst++ = c;                                                 \
 287   } while (0)
 288
 289 #define EMIT_TWO_BYTES(c1, c2)                                  \
 290   do {                                                          \
 291     if (dst + 2 > (dst_bytes ? dst_end : src))                  \
 292       {                                                         \
 293         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 294         goto label_end_of_loop;                                 \
 295       }                                                         \
 296     *dst++ = c1, *dst++ = c2;                                   \
 297   } while (0)
 298
 299 #define EMIT_BYTES(from, to)                                    \
 300   do {                                                          \
 301     if (dst + (to - from) > (dst_bytes ? dst_end : src))        \
 302       {                                                         \
 303         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 304         goto label_end_of_loop;                                 \
 305       }                                                         \
 306     while (from < to)                                           \
 307       *dst++ = *from++;                                         \
 308   } while (0)
 309
 310 \f
 311 /*** 1. Preamble ***/
 312
 313 #ifdef emacs
 314 #include <config.h>
 315 #endif
 316
 317 #include <stdio.h>
 318
 319 #ifdef emacs
 320
 321 #include "lisp.h"
 322 #include "buffer.h"
 323 #include "charset.h"
 324 #include "composite.h"
 325 #include "ccl.h"
 326 #include "coding.h"
 327 #include "window.h"
 328
 329 #else  /* not emacs */
 330
 331 #include "mulelib.h"
 332
 333 #endif /* not emacs */
 334
 335 Lisp_Object Qcoding_system, Qeol_type;
 336 Lisp_Object Qbuffer_file_coding_system;
 337 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 338 Lisp_Object Qno_conversion, Qundecided;
 339 Lisp_Object Qcoding_system_history;
 340 Lisp_Object Qsafe_chars;
 341 Lisp_Object Qvalid_codes;
 342
 343 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 344 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 345 Lisp_Object Qstart_process, Qopen_network_stream;
 346 Lisp_Object Qtarget_idx;
 347
 348 Lisp_Object Vselect_safe_coding_system_function;
 349
 350 /* Mnemonic string for each format of end-of-line.  */
 351 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 352 /* Mnemonic string to indicate format of end-of-line is not yet
 353    decided.  */
 354 Lisp_Object eol_mnemonic_undecided;
 355
 356 /* Format of end-of-line decided by system.  This is CODING_EOL_LF on
 357    Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.  */
 358 int system_eol_type;
 359
 360 #ifdef emacs
 361
 362 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 363
 364 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 365
 366 /* Coding system emacs-mule and raw-text are for converting only
 367    end-of-line format.  */
 368 Lisp_Object Qemacs_mule, Qraw_text;
 369
 370 /* Coding-systems are handed between Emacs Lisp programs and C internal
 371    routines by the following three variables.  */
 372 /* Coding-system for reading files and receiving data from process.  */
 373 Lisp_Object Vcoding_system_for_read;
 374 /* Coding-system for writing files and sending data to process.  */
 375 Lisp_Object Vcoding_system_for_write;
 376 /* Coding-system actually used in the latest I/O.  */
 377 Lisp_Object Vlast_coding_system_used;
 378
 379 /* A vector of length 256 which contains information about special
 380    Latin codes (especially for dealing with Microsoft codes).  */
 381 Lisp_Object Vlatin_extra_code_table;
 382
 383 /* Flag to inhibit code conversion of end-of-line format.  */
 384 int inhibit_eol_conversion;
 385
 386 /* Flag to inhibit ISO2022 escape sequence detection.  */
 387 int inhibit_iso_escape_detection;
 388
 389 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 390 int inherit_process_coding_system;
 391
 392 /* Coding system to be used to encode text for terminal display.  */
 393 struct coding_system terminal_coding;
 394
 395 /* Coding system to be used to encode text for terminal display when
 396    terminal coding system is nil.  */
 397 struct coding_system safe_terminal_coding;
 398
 399 /* Coding system of what is sent from terminal keyboard.  */
 400 struct coding_system keyboard_coding;
 401
 402 /* Default coding system to be used to write a file.  */
 403 struct coding_system default_buffer_file_coding;
 404
 405 Lisp_Object Vfile_coding_system_alist;
 406 Lisp_Object Vprocess_coding_system_alist;
 407 Lisp_Object Vnetwork_coding_system_alist;
 408
 409 Lisp_Object Vlocale_coding_system;
 410
 411 #endif /* emacs */
 412
 413 Lisp_Object Qcoding_category, Qcoding_category_index;
 414
 415 /* List of symbols `coding-category-xxx' ordered by priority.  */
 416 Lisp_Object Vcoding_category_list;
 417
 418 /* Table of coding categories (Lisp symbols).  */
 419 Lisp_Object Vcoding_category_table;
 420
 421 /* Table of names of symbol for each coding-category.  */
 422 char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
 423   "coding-category-emacs-mule",
 424   "coding-category-sjis",
 425   "coding-category-iso-7",
 426   "coding-category-iso-7-tight",
 427   "coding-category-iso-8-1",
 428   "coding-category-iso-8-2",
 429   "coding-category-iso-7-else",
 430   "coding-category-iso-8-else",
 431   "coding-category-ccl",
 432   "coding-category-big5",
 433   "coding-category-utf-8",
 434   "coding-category-utf-16-be",
 435   "coding-category-utf-16-le",
 436   "coding-category-raw-text",
 437   "coding-category-binary"
 438 };
 439
 440 /* Table of pointers to coding systems corresponding to each coding
 441    categories.  */
 442 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
 443
 444 /* Table of coding category masks.  Nth element is a mask for a coding
 445    cateogry of which priority is Nth.  */
 446 static
 447 int coding_priorities[CODING_CATEGORY_IDX_MAX];
 448
 449 /* Flag to tell if we look up translation table on character code
 450    conversion.  */
 451 Lisp_Object Venable_character_translation;
 452 /* Standard translation table to look up on decoding (reading).  */
 453 Lisp_Object Vstandard_translation_table_for_decode;
 454 /* Standard translation table to look up on encoding (writing).  */
 455 Lisp_Object Vstandard_translation_table_for_encode;
 456
 457 Lisp_Object Qtranslation_table;
 458 Lisp_Object Qtranslation_table_id;
 459 Lisp_Object Qtranslation_table_for_decode;
 460 Lisp_Object Qtranslation_table_for_encode;
 461
 462 /* Alist of charsets vs revision number.  */
 463 Lisp_Object Vcharset_revision_alist;
 464
 465 /* Default coding systems used for process I/O.  */
 466 Lisp_Object Vdefault_process_coding_system;
 467
 468 /* Global flag to tell that we can't call post-read-conversion and
 469    pre-write-conversion functions.  Usually the value is zero, but it
 470    is set to 1 temporarily while such functions are running.  This is
 471    to avoid infinite recursive call.  */
 472 static int inhibit_pre_post_conversion;
 473
 474 /* Char-table containing safe coding systems of each character.  */
 475 Lisp_Object Vchar_coding_system_table;
 476 Lisp_Object Qchar_coding_system;
 477
 478 /* Return `safe-chars' property of coding system CODING.  Don't check
 479    validity of CODING.  */
 480
 481 Lisp_Object
 482 coding_safe_chars (coding)
 483      struct coding_system *coding;
 484 {
 485   Lisp_Object coding_spec, plist, safe_chars;
 486
 487   coding_spec = Fget (coding->symbol, Qcoding_system);
 488   plist = XVECTOR (coding_spec)->contents[3];
 489   safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars);
 490   return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt);
 491 }
 492
 493 #define CODING_SAFE_CHAR_P(safe_chars, c) \
 494   (EQ (safe_chars, Qt) || !NILP (CHAR_TABLE_REF (safe_chars, c)))
 495
 496 \f
 497 /*** 2. Emacs internal format (emacs-mule) handlers ***/
 498
 499 /* Emacs' internal format for encoding multiple character sets is a
 500    kind of multi-byte encoding, i.e. characters are encoded by
 501    variable-length sequences of one-byte codes.
 502
 503    ASCII characters and control characters (e.g. `tab', `newline') are
 504    represented by one-byte sequences which are their ASCII codes, in
 505    the range 0x00 through 0x7F.
 506
 507    8-bit characters of the range 0x80..0x9F are represented by
 508    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
 509    code + 0x20).
 510
 511    8-bit characters of the range 0xA0..0xFF are represented by
 512    one-byte sequences which are their 8-bit code.
 513
 514    The other characters are represented by a sequence of `base
 515    leading-code', optional `extended leading-code', and one or two
 516    `position-code's.  The length of the sequence is determined by the
 517    base leading-code.  Leading-code takes the range 0x80 through 0x9F,
 518    whereas extended leading-code and position-code take the range 0xA0
 519    through 0xFF.  See `charset.h' for more details about leading-code
 520    and position-code.
 521
 522    --- CODE RANGE of Emacs' internal format ---
 523    character set        range
 524    -------------        -----
 525    ascii                0x00..0x7F
 526    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
 527    eight-bit-graphic    0xA0..0xBF
 528    ELSE                 0x81..0x9F + [0xA0..0xFF]+
 529    ---------------------------------------------
 530
 531   */
 532
 533 enum emacs_code_class_type emacs_code_class[256];
 534
 535 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 536    Check if a text is encoded in Emacs' internal format.  If it is,
 537    return CODING_CATEGORY_MASK_EMACS_MULE, else return 0.  */
 538
 539 int
 540 detect_coding_emacs_mule (src, src_end)
 541       unsigned char *src, *src_end;
 542 {
 543   unsigned char c;
 544   int composing = 0;
 545   /* Dummy for ONE_MORE_BYTE.  */
 546   struct coding_system dummy_coding;
 547   struct coding_system *coding = &dummy_coding;
 548
 549   while (1)
 550     {
 551       ONE_MORE_BYTE (c);
 552
 553       if (composing)
 554         {
 555           if (c < 0xA0)
 556             composing = 0;
 557           else if (c == 0xA0)
 558             {
 559               ONE_MORE_BYTE (c);
 560               c &= 0x7F;
 561             }
 562           else
 563             c -= 0x20;
 564         }
 565
 566       if (c < 0x20)
 567         {
 568           if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
 569             return 0;
 570         }
 571       else if (c >= 0x80 && c < 0xA0)
 572         {
 573           if (c == 0x80)
 574             /* Old leading code for a composite character.  */
 575             composing = 1;
 576           else
 577             {
 578               unsigned char *src_base = src - 1;
 579               int bytes;
 580
 581               if (!UNIBYTE_STR_AS_MULTIBYTE_P (src_base, src_end - src_base,
 582                                                bytes))
 583                 return 0;
 584               src = src_base + bytes;
 585             }
 586         }
 587     }
 588  label_end_of_loop:
 589   return CODING_CATEGORY_MASK_EMACS_MULE;
 590 }
 591
 592
 593 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
 594
 595 static void
 596 decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
 597      struct coding_system *coding;
 598      unsigned char *source, *destination;
 599      int src_bytes, dst_bytes;
 600 {
 601   unsigned char *src = source;
 602   unsigned char *src_end = source + src_bytes;
 603   unsigned char *dst = destination;
 604   unsigned char *dst_end = destination + dst_bytes;
 605   /* SRC_BASE remembers the start position in source in each loop.
 606      The loop will be exited when there's not enough source code, or
 607      when there's not enough destination area to produce a
 608      character.  */
 609   unsigned char *src_base;
 610
 611   coding->produced_char = 0;
 612   while ((src_base = src) < src_end)
 613     {
 614       unsigned char tmp[MAX_MULTIBYTE_LENGTH], *p;
 615       int bytes;
 616
 617       if (*src == '\r')
 618         {
 619           int c = *src++;
 620
 621           if (coding->eol_type == CODING_EOL_CR)
 622             c = '\n';
 623           else if (coding->eol_type == CODING_EOL_CRLF)
 624             {
 625               ONE_MORE_BYTE (c);
 626               if (c != '\n')
 627                 {
 628                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
 629                     {
 630                       coding->result = CODING_FINISH_INCONSISTENT_EOL;
 631                       goto label_end_of_loop;
 632                     }
 633                   src--;
 634                   c = '\r';
 635                 }
 636             }
 637           *dst++ = c;
 638           coding->produced_char++;
 639           continue;
 640         }
 641       else if (*src == '\n')
 642         {
 643           if ((coding->eol_type == CODING_EOL_CR
 644                || coding->eol_type == CODING_EOL_CRLF)
 645               && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
 646             {
 647               coding->result = CODING_FINISH_INCONSISTENT_EOL;
 648               goto label_end_of_loop;
 649             }
 650           *dst++ = *src++;
 651           coding->produced_char++;
 652           continue;
 653         }
 654       else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
 655         {
 656           p = src;
 657           src += bytes;
 658         }
 659       else
 660         {
 661           bytes = CHAR_STRING (*src, tmp);
 662           p = tmp;
 663           src++;
 664         }
 665       if (dst + bytes >= (dst_bytes ? dst_end : src))
 666         {
 667           coding->result = CODING_FINISH_INSUFFICIENT_DST;
 668           break;
 669         }
 670       while (bytes--) *dst++ = *p++;
 671       coding->produced_char++;
 672     }
 673  label_end_of_loop:
 674   coding->consumed = coding->consumed_char = src_base - source;
 675   coding->produced = dst - destination;
 676 }
 677
 678 #define encode_coding_emacs_mule(coding, source, destination, src_bytes, dst_bytes) \
 679   encode_eol (coding, source, destination, src_bytes, dst_bytes)
 680
 681
 682 \f
 683 /*** 3. ISO2022 handlers ***/
 684
 685 /* The following note describes the coding system ISO2022 briefly.
 686    Since the intention of this note is to help understand the
 687    functions in this file, some parts are NOT ACCURATE or OVERLY
 688    SIMPLIFIED.  For thorough understanding, please refer to the
 689    original document of ISO2022.
 690
 691    ISO2022 provides many mechanisms to encode several character sets
 692    in 7-bit and 8-bit environments.  For 7-bite environments, all text
 693    is encoded using bytes less than 128.  This may make the encoded
 694    text a little bit longer, but the text passes more easily through
 695    several gateways, some of which strip off MSB (Most Signigant Bit).
 696
 697    There are two kinds of character sets: control character set and
 698    graphic character set.  The former contains control characters such
 699    as `newline' and `escape' to provide control functions (control
 700    functions are also provided by escape sequences).  The latter
 701    contains graphic characters such as 'A' and '-'.  Emacs recognizes
 702    two control character sets and many graphic character sets.
 703
 704    Graphic character sets are classified into one of the following
 705    four classes, according to the number of bytes (DIMENSION) and
 706    number of characters in one dimension (CHARS) of the set:
 707    - DIMENSION1_CHARS94
 708    - DIMENSION1_CHARS96
 709    - DIMENSION2_CHARS94
 710    - DIMENSION2_CHARS96
 711
 712    In addition, each character set is assigned an identification tag,
 713    unique for each set, called "final character" (denoted as <F>
 714    hereafter).  The <F> of each character set is decided by ECMA(*)
 715    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
 716    (0x30..0x3F are for private use only).
 717
 718    Note (*): ECMA = European Computer Manufacturers Association
 719
 720    Here are examples of graphic character set [NAME(<F>)]:
 721         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
 722         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
 723         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
 724         o DIMENSION2_CHARS96 -- none for the moment
 725
 726    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
 727         C0 [0x00..0x1F] -- control character plane 0
 728         GL [0x20..0x7F] -- graphic character plane 0
 729         C1 [0x80..0x9F] -- control character plane 1
 730         GR [0xA0..0xFF] -- graphic character plane 1
 731
 732    A control character set is directly designated and invoked to C0 or
 733    C1 by an escape sequence.  The most common case is that:
 734    - ISO646's  control character set is designated/invoked to C0, and
 735    - ISO6429's control character set is designated/invoked to C1,
 736    and usually these designations/invocations are omitted in encoded
 737    text.  In a 7-bit environment, only C0 can be used, and a control
 738    character for C1 is encoded by an appropriate escape sequence to
 739    fit into the environment.  All control characters for C1 are
 740    defined to have corresponding escape sequences.
 741
 742    A graphic character set is at first designated to one of four
 743    graphic registers (G0 through G3), then these graphic registers are
 744    invoked to GL or GR.  These designations and invocations can be
 745    done independently.  The most common case is that G0 is invoked to
 746    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
 747    these invocations and designations are omitted in encoded text.
 748    In a 7-bit environment, only GL can be used.
 749
 750    When a graphic character set of CHARS94 is invoked to GL, codes
 751    0x20 and 0x7F of the GL area work as control characters SPACE and
 752    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
 753    be used.
 754
 755    There are two ways of invocation: locking-shift and single-shift.
 756    With locking-shift, the invocation lasts until the next different
 757    invocation, whereas with single-shift, the invocation affects the
 758    following character only and doesn't affect the locking-shift
 759    state.  Invocations are done by the following control characters or
 760    escape sequences:
 761
 762    ----------------------------------------------------------------------
 763    abbrev  function                  cntrl escape seq   description
 764    ----------------------------------------------------------------------
 765    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
 766    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
 767    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
 768    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
 769    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
 770    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
 771    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
 772    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
 773    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
 774    ----------------------------------------------------------------------
 775    (*) These are not used by any known coding system.
 776
 777    Control characters for these functions are defined by macros
 778    ISO_CODE_XXX in `coding.h'.
 779
 780    Designations are done by the following escape sequences:
 781    ----------------------------------------------------------------------
 782    escape sequence      description
 783    ----------------------------------------------------------------------
 784    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
 785    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
 786    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
 787    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
 788    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
 789    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
 790    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
 791    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
 792    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
 793    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
 794    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
 795    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
 796    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
 797    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
 798    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
 799    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
 800    ----------------------------------------------------------------------
 801
 802    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
 803    of dimension 1, chars 94, and final character <F>, etc...
 804
 805    Note (*): Although these designations are not allowed in ISO2022,
 806    Emacs accepts them on decoding, and produces them on encoding
 807    CHARS96 character sets in a coding system which is characterized as
 808    7-bit environment, non-locking-shift, and non-single-shift.
 809
 810    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
 811    '(' can be omitted.  We refer to this as "short-form" hereafter.
 812
 813    Now you may notice that there are a lot of ways for encoding the
 814    same multilingual text in ISO2022.  Actually, there exist many
 815    coding systems such as Compound Text (used in X11's inter client
 816    communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR
 817    (used in Korean internet), EUC (Extended UNIX Code, used in Asian
 818    localized platforms), and all of these are variants of ISO2022.
 819
 820    In addition to the above, Emacs handles two more kinds of escape
 821    sequences: ISO6429's direction specification and Emacs' private
 822    sequence for specifying character composition.
 823
 824    ISO6429's direction specification takes the following form:
 825         o CSI ']'      -- end of the current direction
 826         o CSI '0' ']'  -- end of the current direction
 827         o CSI '1' ']'  -- start of left-to-right text
 828         o CSI '2' ']'  -- start of right-to-left text
 829    The control character CSI (0x9B: control sequence introducer) is
 830    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
 831
 832    Character composition specification takes the following form:
 833         o ESC '0' -- start relative composition
 834         o ESC '1' -- end composition
 835         o ESC '2' -- start rule-base composition (*)
 836         o ESC '3' -- start relative composition with alternate chars  (**)
 837         o ESC '4' -- start rule-base composition with alternate chars  (**)
 838   Since these are not standard escape sequences of any ISO standard,
 839   the use of them for these meaning is restricted to Emacs only.
 840
 841   (*) This form is used only in Emacs 20.5 and the older versions,
 842   but the newer versions can safely decode it.
 843   (**) This form is used only in Emacs 21.1 and the newer versions,
 844   and the older versions can't decode it.
 845
 846   Here's a list of examples usages of these composition escape
 847   sequences (categorized by `enum composition_method').
 848
 849   COMPOSITION_RELATIVE:
 850         ESC 0 CHAR [ CHAR ] ESC 1
 851   COMPOSITOIN_WITH_RULE:
 852         ESC 2 CHAR [ RULE CHAR ] ESC 1
 853   COMPOSITION_WITH_ALTCHARS:
 854         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
 855   COMPOSITION_WITH_RULE_ALTCHARS:
 856         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
 857
 858 enum iso_code_class_type iso_code_class[256];
 859
 860 #define CHARSET_OK(idx, charset, c)                                     \
 861   (coding_system_table[idx]                                             \
 862    && (charset == CHARSET_ASCII                                         \
 863        || (safe_chars = coding_safe_chars (coding_system_table[idx]),   \
 864            CODING_SAFE_CHAR_P (safe_chars, c)))                         \
 865    && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding_system_table[idx], \
 866                                               charset)                  \
 867        != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
 868
 869 #define SHIFT_OUT_OK(idx) \
 870   (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
 871
 872 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 873    Check if a text is encoded in ISO2022.  If it is, returns an
 874    integer in which appropriate flag bits any of:
 875         CODING_CATEGORY_MASK_ISO_7
 876         CODING_CATEGORY_MASK_ISO_7_TIGHT
 877         CODING_CATEGORY_MASK_ISO_8_1
 878         CODING_CATEGORY_MASK_ISO_8_2
 879         CODING_CATEGORY_MASK_ISO_7_ELSE
 880         CODING_CATEGORY_MASK_ISO_8_ELSE
 881    are set.  If a code which should never appear in ISO2022 is found,
 882    returns 0.  */
 883
 884 int
 885 detect_coding_iso2022 (src, src_end)
 886      unsigned char *src, *src_end;
 887 {
 888   int mask = CODING_CATEGORY_MASK_ISO;
 889   int mask_found = 0;
 890   int reg[4], shift_out = 0, single_shifting = 0;
 891   int c, c1, i, charset;
 892   /* Dummy for ONE_MORE_BYTE.  */
 893   struct coding_system dummy_coding;
 894   struct coding_system *coding = &dummy_coding;
 895   Lisp_Object safe_chars;
 896
 897   reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
 898   while (mask && src < src_end)
 899     {
 900       ONE_MORE_BYTE (c);
 901       switch (c)
 902         {
 903         case ISO_CODE_ESC:
 904           if (inhibit_iso_escape_detection)
 905             break;
 906           single_shifting = 0;
 907           ONE_MORE_BYTE (c);
 908           if (c >= '(' && c <= '/')
 909             {
 910               /* Designation sequence for a charset of dimension 1.  */
 911               ONE_MORE_BYTE (c1);
 912               if (c1 < ' ' || c1 >= 0x80
 913                   || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
 914                 /* Invalid designation sequence.  Just ignore.  */
 915                 break;
 916               reg[(c - '(') % 4] = charset;
 917             }
 918           else if (c == '$')
 919             {
 920               /* Designation sequence for a charset of dimension 2.  */
 921               ONE_MORE_BYTE (c);
 922               if (c >= '@' && c <= 'B')
 923                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
 924                 reg[0] = charset = iso_charset_table[1][0][c];
 925               else if (c >= '(' && c <= '/')
 926                 {
 927                   ONE_MORE_BYTE (c1);
 928                   if (c1 < ' ' || c1 >= 0x80
 929                       || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
 930                     /* Invalid designation sequence.  Just ignore.  */
 931                     break;
 932                   reg[(c - '(') % 4] = charset;
 933                 }
 934               else
 935                 /* Invalid designation sequence.  Just ignore.  */
 936                 break;
 937             }
 938           else if (c == 'N' || c == 'O')
 939             {
 940               /* ESC <Fe> for SS2 or SS3.  */
 941               mask &= CODING_CATEGORY_MASK_ISO_7_ELSE;
 942               break;
 943             }
 944           else if (c >= '0' && c <= '4')
 945             {
 946               /* ESC <Fp> for start/end composition.  */
 947               mask_found |= CODING_CATEGORY_MASK_ISO;
 948               break;
 949             }
 950           else
 951             /* Invalid escape sequence.  Just ignore.  */
 952             break;
 953
 954           /* We found a valid designation sequence for CHARSET.  */
 955           mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
 956           c = MAKE_CHAR (charset, 0, 0);
 957           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset, c))
 958             mask_found |= CODING_CATEGORY_MASK_ISO_7;
 959           else
 960             mask &= ~CODING_CATEGORY_MASK_ISO_7;
 961           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset, c))
 962             mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
 963           else
 964             mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
 965           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset, c))
 966             mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
 967           else
 968             mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
 969           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset, c))
 970             mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
 971           else
 972             mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
 973           break;
 974
 975         case ISO_CODE_SO:
 976           if (inhibit_iso_escape_detection)
 977             break;
 978           single_shifting = 0;
 979           if (shift_out == 0
 980               && (reg[1] >= 0
 981                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
 982                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
 983             {
 984               /* Locking shift out.  */
 985               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 986               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 987             }
 988           break;
 989
 990         case ISO_CODE_SI:
 991           if (inhibit_iso_escape_detection)
 992             break;
 993           single_shifting = 0;
 994           if (shift_out == 1)
 995             {
 996               /* Locking shift in.  */
 997               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 998               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 999             }
1000           break;
1001
1002         case ISO_CODE_CSI:
1003           single_shifting = 0;
1004         case ISO_CODE_SS2:
1005         case ISO_CODE_SS3:
1006           {
1007             int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
1008
1009             if (inhibit_iso_escape_detection)
1010               break;
1011             if (c != ISO_CODE_CSI)
1012               {
1013                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1014                     & CODING_FLAG_ISO_SINGLE_SHIFT)
1015                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1016                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1017                     & CODING_FLAG_ISO_SINGLE_SHIFT)
1018                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1019                 single_shifting = 1;
1020               }
1021             if (VECTORP (Vlatin_extra_code_table)
1022                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
1023               {
1024                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1025                     & CODING_FLAG_ISO_LATIN_EXTRA)
1026                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1027                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1028                     & CODING_FLAG_ISO_LATIN_EXTRA)
1029                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1030               }
1031             mask &= newmask;
1032             mask_found |= newmask;
1033           }
1034           break;
1035
1036         default:
1037           if (c < 0x80)
1038             {
1039               single_shifting = 0;
1040               break;
1041             }
1042           else if (c < 0xA0)
1043             {
1044               single_shifting = 0;
1045               if (VECTORP (Vlatin_extra_code_table)
1046                   && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
1047                 {
1048                   int newmask = 0;
1049
1050                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1051                       & CODING_FLAG_ISO_LATIN_EXTRA)
1052                     newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1053                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1054                       & CODING_FLAG_ISO_LATIN_EXTRA)
1055                     newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1056                   mask &= newmask;
1057                   mask_found |= newmask;
1058                 }
1059               else
1060                 return 0;
1061             }
1062           else
1063             {
1064               mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
1065                         | CODING_CATEGORY_MASK_ISO_7_ELSE);
1066               mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
1067               /* Check the length of succeeding codes of the range
1068                  0xA0..0FF.  If the byte length is odd, we exclude
1069                  CODING_CATEGORY_MASK_ISO_8_2.  We can check this only
1070                  when we are not single shifting.  */
1071               if (!single_shifting
1072                   && mask & CODING_CATEGORY_MASK_ISO_8_2)
1073                 {
1074                   int i = 1;
1075                   while (src < src_end)
1076                     {
1077                       ONE_MORE_BYTE (c);
1078                       if (c < 0xA0)
1079                         break;
1080                       i++;
1081                     }
1082
1083                   if (i & 1 && src < src_end)
1084                     mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
1085                   else
1086                     mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
1087                 }
1088             }
1089           break;
1090         }
1091     }
1092  label_end_of_loop:
1093   return (mask & mask_found);
1094 }
1095
1096 /* Decode a character of which charset is CHARSET, the 1st position
1097    code is C1, the 2nd position code is C2, and return the decoded
1098    character code.  If the variable `translation_table' is non-nil,
1099    returned the translated code.  */
1100
1101 #define DECODE_ISO_CHARACTER(charset, c1, c2)   \
1102   (NILP (translation_table)                     \
1103    ? MAKE_CHAR (charset, c1, c2)                \
1104    : translate_char (translation_table, -1, charset, c1, c2))
1105
1106 /* Set designation state into CODING.  */
1107 #define DECODE_DESIGNATION(reg, dimension, chars, final_char)              \
1108   do {                                                                     \
1109     int charset, c;                                                        \
1110                                                                            \
1111     if (final_char < '0' || final_char >= 128)                             \
1112       goto label_invalid_code;                                             \
1113     charset = ISO_CHARSET_TABLE (make_number (dimension),                  \
1114                                  make_number (chars),                      \
1115                                  make_number (final_char));                \
1116     c = MAKE_CHAR (charset, 0, 0);                                         \
1117     if (charset >= 0                                                       \
1118         && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
1119             || CODING_SAFE_CHAR_P (safe_chars, c)))                        \
1120       {                                                                    \
1121         if (coding->spec.iso2022.last_invalid_designation_register == 0    \
1122             && reg == 0                                                    \
1123             && charset == CHARSET_ASCII)                                   \
1124           {                                                                \
1125             /* We should insert this designation sequence as is so         \
1126                that it is surely written back to a file.  */               \
1127             coding->spec.iso2022.last_invalid_designation_register = -1;   \
1128             goto label_invalid_code;                                       \
1129           }                                                                \
1130         coding->spec.iso2022.last_invalid_designation_register = -1;       \
1131         if ((coding->mode & CODING_MODE_DIRECTION)                         \
1132             && CHARSET_REVERSE_CHARSET (charset) >= 0)                     \
1133           charset = CHARSET_REVERSE_CHARSET (charset);                     \
1134         CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;               \
1135       }                                                                    \
1136     else                                                                   \
1137       {                                                                    \
1138         coding->spec.iso2022.last_invalid_designation_register = reg;      \
1139         goto label_invalid_code;                                           \
1140       }                                                                    \
1141   } while (0)
1142
1143 /* Allocate a memory block for storing information about compositions.
1144    The block is chained to the already allocated blocks.  */
1145
1146 void
1147 coding_allocate_composition_data (coding, char_offset)
1148      struct coding_system *coding;
1149      int char_offset;
1150 {
1151   struct composition_data *cmp_data
1152     = (struct composition_data *) xmalloc (sizeof *cmp_data);
1153
1154   cmp_data->char_offset = char_offset;
1155   cmp_data->used = 0;
1156   cmp_data->prev = coding->cmp_data;
1157   cmp_data->next = NULL;
1158   if (coding->cmp_data)
1159     coding->cmp_data->next = cmp_data;
1160   coding->cmp_data = cmp_data;
1161   coding->cmp_data_start = 0;
1162 }
1163
1164 /* Record the starting position START and METHOD of one composition.  */
1165
1166 #define CODING_ADD_COMPOSITION_START(coding, start, method)     \
1167   do {                                                          \
1168     struct composition_data *cmp_data = coding->cmp_data;       \
1169     int *data = cmp_data->data + cmp_data->used;                \
1170     coding->cmp_data_start = cmp_data->used;                    \
1171     data[0] = -1;                                               \
1172     data[1] = cmp_data->char_offset + start;                    \
1173     data[3] = (int) method;                                     \
1174     cmp_data->used += 4;                                        \
1175   } while (0)
1176
1177 /* Record the ending position END of the current composition.  */
1178
1179 #define CODING_ADD_COMPOSITION_END(coding, end)                 \
1180   do {                                                          \
1181     struct composition_data *cmp_data = coding->cmp_data;       \
1182     int *data = cmp_data->data + coding->cmp_data_start;        \
1183     data[0] = cmp_data->used - coding->cmp_data_start;          \
1184     data[2] = cmp_data->char_offset + end;                      \
1185   } while (0)
1186
1187 /* Record one COMPONENT (alternate character or composition rule).  */
1188
1189 #define CODING_ADD_COMPOSITION_COMPONENT(coding, component)     \
1190   (coding->cmp_data->data[coding->cmp_data->used++] = component)
1191
1192 /* Handle compositoin start sequence ESC 0, ESC 2, ESC 3, or ESC 4.  */
1193
1194 #define DECODE_COMPOSITION_START(c1)                                       \
1195   do {                                                                     \
1196     if (coding->composing == COMPOSITION_DISABLED)                         \
1197       {                                                                    \
1198         *dst++ = ISO_CODE_ESC;                                             \
1199         *dst++ = c1 & 0x7f;                                                \
1200         coding->produced_char += 2;                                        \
1201       }                                                                    \
1202     else if (!COMPOSING_P (coding))                                        \
1203       {                                                                    \
1204         /* This is surely the start of a composition.  We must be sure     \
1205            that coding->cmp_data has enough space to store the             \
1206            information about the composition.  If not, terminate the       \
1207            current decoding loop, allocate one more memory block for       \
1208            coding->cmp_data in the calller, then start the decoding        \
1209            loop again.  We can't allocate memory here directly because     \
1210            it may cause buffer/string relocation.  */                      \
1211         if (!coding->cmp_data                                              \
1212             || (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \
1213                 >= COMPOSITION_DATA_SIZE))                                 \
1214           {                                                                \
1215             coding->result = CODING_FINISH_INSUFFICIENT_CMP;               \
1216             goto label_end_of_loop;                                        \
1217           }                                                                \
1218         coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE              \
1219                              : c1 == '2' ? COMPOSITION_WITH_RULE           \
1220                              : c1 == '3' ? COMPOSITION_WITH_ALTCHARS       \
1221                              : COMPOSITION_WITH_RULE_ALTCHARS);            \
1222         CODING_ADD_COMPOSITION_START (coding, coding->produced_char,       \
1223                                       coding->composing);                  \
1224         coding->composition_rule_follows = 0;                              \
1225       }                                                                    \
1226     else                                                                   \
1227       {                                                                    \
1228         /* We are already handling a composition.  If the method is        \
1229            the following two, the codes following the current escape       \
1230            sequence are actual characters stored in a buffer.  */          \
1231         if (coding->composing == COMPOSITION_WITH_ALTCHARS                 \
1232             || coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)        \
1233           {                                                                \
1234             coding->composing = COMPOSITION_RELATIVE;                      \
1235             coding->composition_rule_follows = 0;                          \
1236           }                                                                \
1237       }                                                                    \
1238   } while (0)
1239
1240 /* Handle compositoin end sequence ESC 1.  */
1241
1242 #define DECODE_COMPOSITION_END(c1)                                      \
1243   do {                                                                  \
1244     if (coding->composing == COMPOSITION_DISABLED)                      \
1245       {                                                                 \
1246         *dst++ = ISO_CODE_ESC;                                          \
1247         *dst++ = c1;                                                    \
1248         coding->produced_char += 2;                                     \
1249       }                                                                 \
1250     else                                                                \
1251       {                                                                 \
1252         CODING_ADD_COMPOSITION_END (coding, coding->produced_char);     \
1253         coding->composing = COMPOSITION_NO;                             \
1254       }                                                                 \
1255   } while (0)
1256
1257 /* Decode a composition rule from the byte C1 (and maybe one more byte
1258    from SRC) and store one encoded composition rule in
1259    coding->cmp_data.  */
1260
1261 #define DECODE_COMPOSITION_RULE(c1)                                     \
1262   do {                                                                  \
1263     int rule = 0;                                                       \
1264     (c1) -= 32;                                                         \
1265     if (c1 < 81)                /* old format (before ver.21) */        \
1266       {                                                                 \
1267         int gref = (c1) / 9;                                            \
1268         int nref = (c1) % 9;                                            \
1269         if (gref == 4) gref = 10;                                       \
1270         if (nref == 4) nref = 10;                                       \
1271         rule = COMPOSITION_ENCODE_RULE (gref, nref);                    \
1272       }                                                                 \
1273     else if (c1 < 93)           /* new format (after ver.21) */         \
1274       {                                                                 \
1275         ONE_MORE_BYTE (c2);                                             \
1276         rule = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);              \
1277       }                                                                 \
1278     CODING_ADD_COMPOSITION_COMPONENT (coding, rule);                    \
1279     coding->composition_rule_follows = 0;                               \
1280   } while (0)
1281
1282
1283 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1284
1285 static void
1286 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1287      struct coding_system *coding;
1288      unsigned char *source, *destination;
1289      int src_bytes, dst_bytes;
1290 {
1291   unsigned char *src = source;
1292   unsigned char *src_end = source + src_bytes;
1293   unsigned char *dst = destination;
1294   unsigned char *dst_end = destination + dst_bytes;
1295   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
1296   int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1297   int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1298   /* SRC_BASE remembers the start position in source in each loop.
1299      The loop will be exited when there's not enough source code
1300      (within macro ONE_MORE_BYTE), or when there's not enough
1301      destination area to produce a character (within macro
1302      EMIT_CHAR).  */
1303   unsigned char *src_base;
1304   int c, charset;
1305   Lisp_Object translation_table;
1306   Lisp_Object safe_chars;
1307
1308   safe_chars = coding_safe_chars (coding);
1309
1310   if (NILP (Venable_character_translation))
1311     translation_table = Qnil;
1312   else
1313     {
1314       translation_table = coding->translation_table_for_decode;
1315       if (NILP (translation_table))
1316         translation_table = Vstandard_translation_table_for_decode;
1317     }
1318
1319   coding->result = CODING_FINISH_NORMAL;
1320
1321   while (1)
1322     {
1323       int c1, c2;
1324
1325       src_base = src;
1326       ONE_MORE_BYTE (c1);
1327
1328       /* We produce no character or one character.  */
1329       switch (iso_code_class [c1])
1330         {
1331         case ISO_0x20_or_0x7F:
1332           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1333             {
1334               DECODE_COMPOSITION_RULE (c1);
1335               continue;
1336             }
1337           if (charset0 < 0 || CHARSET_CHARS (charset0) == 94)
1338             {
1339               /* This is SPACE or DEL.  */
1340               charset = CHARSET_ASCII;
1341               break;
1342             }
1343           /* This is a graphic character, we fall down ...  */
1344
1345         case ISO_graphic_plane_0:
1346           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1347             {
1348               DECODE_COMPOSITION_RULE (c1);
1349               continue;
1350             }
1351           charset = charset0;
1352           break;
1353
1354         case ISO_0xA0_or_0xFF:
1355           if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1356               || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1357             goto label_invalid_code;
1358           /* This is a graphic character, we fall down ... */
1359
1360         case ISO_graphic_plane_1:
1361           if (charset1 < 0)
1362             goto label_invalid_code;
1363           charset = charset1;
1364           break;
1365
1366         case ISO_control_0:
1367           if (COMPOSING_P (coding))
1368             DECODE_COMPOSITION_END ('1');
1369
1370           /* All ISO2022 control characters in this class have the
1371              same representation in Emacs internal format.  */
1372           if (c1 == '\n'
1373               && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1374               && (coding->eol_type == CODING_EOL_CR
1375                   || coding->eol_type == CODING_EOL_CRLF))
1376             {
1377               coding->result = CODING_FINISH_INCONSISTENT_EOL;
1378               goto label_end_of_loop;
1379             }
1380           charset = CHARSET_ASCII;
1381           break;
1382
1383         case ISO_control_1:
1384           if (COMPOSING_P (coding))
1385             DECODE_COMPOSITION_END ('1');
1386           goto label_invalid_code;
1387
1388         case ISO_carriage_return:
1389           if (COMPOSING_P (coding))
1390             DECODE_COMPOSITION_END ('1');
1391
1392           if (coding->eol_type == CODING_EOL_CR)
1393             c1 = '\n';
1394           else if (coding->eol_type == CODING_EOL_CRLF)
1395             {
1396               ONE_MORE_BYTE (c1);
1397               if (c1 != ISO_CODE_LF)
1398                 {
1399                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1400                     {
1401                       coding->result = CODING_FINISH_INCONSISTENT_EOL;
1402                       goto label_end_of_loop;
1403                     }
1404                   src--;
1405                   c1 = '\r';
1406                 }
1407             }
1408           charset = CHARSET_ASCII;
1409           break;
1410
1411         case ISO_shift_out:
1412           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1413               || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1414             goto label_invalid_code;
1415           CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1416           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1417           continue;
1418
1419         case ISO_shift_in:
1420           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1421             goto label_invalid_code;
1422           CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1423           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1424           continue;
1425
1426         case ISO_single_shift_2_7:
1427         case ISO_single_shift_2:
1428           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1429             goto label_invalid_code;
1430           /* SS2 is handled as an escape sequence of ESC 'N' */
1431           c1 = 'N';
1432           goto label_escape_sequence;
1433
1434         case ISO_single_shift_3:
1435           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1436             goto label_invalid_code;
1437           /* SS2 is handled as an escape sequence of ESC 'O' */
1438           c1 = 'O';
1439           goto label_escape_sequence;
1440
1441         case ISO_control_sequence_introducer:
1442           /* CSI is handled as an escape sequence of ESC '[' ...  */
1443           c1 = '[';
1444           goto label_escape_sequence;
1445
1446         case ISO_escape:
1447           ONE_MORE_BYTE (c1);
1448         label_escape_sequence:
1449           /* Escape sequences handled by Emacs are invocation,
1450              designation, direction specification, and character
1451              composition specification.  */
1452           switch (c1)
1453             {
1454             case '&':           /* revision of following character set */
1455               ONE_MORE_BYTE (c1);
1456               if (!(c1 >= '@' && c1 <= '~'))
1457                 goto label_invalid_code;
1458               ONE_MORE_BYTE (c1);
1459               if (c1 != ISO_CODE_ESC)
1460                 goto label_invalid_code;
1461               ONE_MORE_BYTE (c1);
1462               goto label_escape_sequence;
1463
1464             case '$':           /* designation of 2-byte character set */
1465               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1466                 goto label_invalid_code;
1467               ONE_MORE_BYTE (c1);
1468               if (c1 >= '@' && c1 <= 'B')
1469                 {       /* designation of JISX0208.1978, GB2312.1980,
1470                            or JISX0208.1980 */
1471                   DECODE_DESIGNATION (0, 2, 94, c1);
1472                 }
1473               else if (c1 >= 0x28 && c1 <= 0x2B)
1474                 {       /* designation of DIMENSION2_CHARS94 character set */
1475                   ONE_MORE_BYTE (c2);
1476                   DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
1477                 }
1478               else if (c1 >= 0x2C && c1 <= 0x2F)
1479                 {       /* designation of DIMENSION2_CHARS96 character set */
1480                   ONE_MORE_BYTE (c2);
1481                   DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
1482                 }
1483               else
1484                 goto label_invalid_code;
1485               /* We must update these variables now.  */
1486               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1487               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1488               continue;
1489
1490             case 'n':           /* invocation of locking-shift-2 */
1491               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1492                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1493                 goto label_invalid_code;
1494               CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
1495               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1496               continue;
1497
1498             case 'o':           /* invocation of locking-shift-3 */
1499               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1500                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1501                 goto label_invalid_code;
1502               CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
1503               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1504               continue;
1505
1506             case 'N':           /* invocation of single-shift-2 */
1507               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1508                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1509                 goto label_invalid_code;
1510               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
1511               ONE_MORE_BYTE (c1);
1512               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
1513                 goto label_invalid_code;
1514               break;
1515
1516             case 'O':           /* invocation of single-shift-3 */
1517               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1518                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1519                 goto label_invalid_code;
1520               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
1521               ONE_MORE_BYTE (c1);
1522               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
1523                 goto label_invalid_code;
1524               break;
1525
1526             case '0': case '2': case '3': case '4': /* start composition */
1527               DECODE_COMPOSITION_START (c1);
1528               continue;
1529
1530             case '1':           /* end composition */
1531               DECODE_COMPOSITION_END (c1);
1532               continue;
1533
1534             case '[':           /* specification of direction */
1535               if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
1536                 goto label_invalid_code;
1537               /* For the moment, nested direction is not supported.
1538                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
1539                  left-to-right, and nozero means right-to-left.  */
1540               ONE_MORE_BYTE (c1);
1541               switch (c1)
1542                 {
1543                 case ']':       /* end of the current direction */
1544                   coding->mode &= ~CODING_MODE_DIRECTION;
1545
1546                 case '0':       /* end of the current direction */
1547                 case '1':       /* start of left-to-right direction */
1548                   ONE_MORE_BYTE (c1);
1549                   if (c1 == ']')
1550                     coding->mode &= ~CODING_MODE_DIRECTION;
1551                   else
1552                     goto label_invalid_code;
1553                   break;
1554
1555                 case '2':       /* start of right-to-left direction */
1556                   ONE_MORE_BYTE (c1);
1557                   if (c1 == ']')
1558                     coding->mode |= CODING_MODE_DIRECTION;
1559                   else
1560                     goto label_invalid_code;
1561                   break;
1562
1563                 default:
1564                   goto label_invalid_code;
1565                 }
1566               continue;
1567
1568             default:
1569               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1570                 goto label_invalid_code;
1571               if (c1 >= 0x28 && c1 <= 0x2B)
1572                 {       /* designation of DIMENSION1_CHARS94 character set */
1573                   ONE_MORE_BYTE (c2);
1574                   DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
1575                 }
1576               else if (c1 >= 0x2C && c1 <= 0x2F)
1577                 {       /* designation of DIMENSION1_CHARS96 character set */
1578                   ONE_MORE_BYTE (c2);
1579                   DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
1580                 }
1581               else
1582                 goto label_invalid_code;
1583               /* We must update these variables now.  */
1584               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1585               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1586               continue;
1587             }
1588         }
1589
1590       /* Now we know CHARSET and 1st position code C1 of a character.
1591          Produce a multibyte sequence for that character while getting
1592          2nd position code C2 if necessary.  */
1593       if (CHARSET_DIMENSION (charset) == 2)
1594         {
1595           ONE_MORE_BYTE (c2);
1596           if (c1 < 0x80 ? c2 < 0x20 || c2 >= 0x80 : c2 < 0xA0)
1597             /* C2 is not in a valid range.  */
1598             goto label_invalid_code;
1599         }
1600       c = DECODE_ISO_CHARACTER (charset, c1, c2);
1601       EMIT_CHAR (c);
1602       continue;
1603
1604     label_invalid_code:
1605       coding->errors++;
1606       if (COMPOSING_P (coding))
1607         DECODE_COMPOSITION_END ('1');
1608       src = src_base;
1609       c = *src++;
1610       EMIT_CHAR (c);
1611     }
1612
1613  label_end_of_loop:
1614   coding->consumed = coding->consumed_char = src_base - source;
1615   coding->produced = dst - destination;
1616   return;
1617 }
1618
1619
1620 /* ISO2022 encoding stuff.  */
1621
1622 /*
1623    It is not enough to say just "ISO2022" on encoding, we have to
1624    specify more details.  In Emacs, each coding system of ISO2022
1625    variant has the following specifications:
1626         1. Initial designation to G0 thru G3.
1627         2. Allows short-form designation?
1628         3. ASCII should be designated to G0 before control characters?
1629         4. ASCII should be designated to G0 at end of line?
1630         5. 7-bit environment or 8-bit environment?
1631         6. Use locking-shift?
1632         7. Use Single-shift?
1633    And the following two are only for Japanese:
1634         8. Use ASCII in place of JIS0201-1976-Roman?
1635         9. Use JISX0208-1983 in place of JISX0208-1978?
1636    These specifications are encoded in `coding->flags' as flag bits
1637    defined by macros CODING_FLAG_ISO_XXX.  See `coding.h' for more
1638    details.
1639 */
1640
1641 /* Produce codes (escape sequence) for designating CHARSET to graphic
1642    register REG at DST, and increment DST.  If <final-char> of CHARSET is
1643    '@', 'A', or 'B' and the coding system CODING allows, produce
1644    designation sequence of short-form.  */
1645
1646 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
1647   do {                                                                  \
1648     unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset);        \
1649     char *intermediate_char_94 = "()*+";                                \
1650     char *intermediate_char_96 = ",-./";                                \
1651     int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset);    \
1652                                                                         \
1653     if (revision < 255)                                                 \
1654       {                                                                 \
1655         *dst++ = ISO_CODE_ESC;                                          \
1656         *dst++ = '&';                                                   \
1657         *dst++ = '@' + revision;                                        \
1658       }                                                                 \
1659     *dst++ = ISO_CODE_ESC;                                              \
1660     if (CHARSET_DIMENSION (charset) == 1)                               \
1661       {                                                                 \
1662         if (CHARSET_CHARS (charset) == 94)                              \
1663           *dst++ = (unsigned char) (intermediate_char_94[reg]);         \
1664         else                                                            \
1665           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1666       }                                                                 \
1667     else                                                                \
1668       {                                                                 \
1669         *dst++ = '$';                                                   \
1670         if (CHARSET_CHARS (charset) == 94)                              \
1671           {                                                             \
1672             if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM)          \
1673                 || reg != 0                                             \
1674                 || final_char < '@' || final_char > 'B')                \
1675               *dst++ = (unsigned char) (intermediate_char_94[reg]);     \
1676           }                                                             \
1677         else                                                            \
1678           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1679       }                                                                 \
1680     *dst++ = final_char;                                                \
1681     CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;                \
1682   } while (0)
1683
1684 /* The following two macros produce codes (control character or escape
1685    sequence) for ISO2022 single-shift functions (single-shift-2 and
1686    single-shift-3).  */
1687
1688 #define ENCODE_SINGLE_SHIFT_2                           \
1689   do {                                                  \
1690     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1691       *dst++ = ISO_CODE_ESC, *dst++ = 'N';              \
1692     else                                                \
1693       *dst++ = ISO_CODE_SS2;                            \
1694     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1695   } while (0)
1696
1697 #define ENCODE_SINGLE_SHIFT_3                           \
1698   do {                                                  \
1699     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1700       *dst++ = ISO_CODE_ESC, *dst++ = 'O';              \
1701     else                                                \
1702       *dst++ = ISO_CODE_SS3;                            \
1703     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1704   } while (0)
1705
1706 /* The following four macros produce codes (control character or
1707    escape sequence) for ISO2022 locking-shift functions (shift-in,
1708    shift-out, locking-shift-2, and locking-shift-3).  */
1709
1710 #define ENCODE_SHIFT_IN                         \
1711   do {                                          \
1712     *dst++ = ISO_CODE_SI;                       \
1713     CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
1714   } while (0)
1715
1716 #define ENCODE_SHIFT_OUT                        \
1717   do {                                          \
1718     *dst++ = ISO_CODE_SO;                       \
1719     CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
1720   } while (0)
1721
1722 #define ENCODE_LOCKING_SHIFT_2                  \
1723   do {                                          \
1724     *dst++ = ISO_CODE_ESC, *dst++ = 'n';        \
1725     CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
1726   } while (0)
1727
1728 #define ENCODE_LOCKING_SHIFT_3                  \
1729   do {                                          \
1730     *dst++ = ISO_CODE_ESC, *dst++ = 'o';        \
1731     CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
1732   } while (0)
1733
1734 /* Produce codes for a DIMENSION1 character whose character set is
1735    CHARSET and whose position-code is C1.  Designation and invocation
1736    sequences are also produced in advance if necessary.  */
1737
1738 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
1739   do {                                                                  \
1740     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1741       {                                                                 \
1742         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1743           *dst++ = c1 & 0x7F;                                           \
1744         else                                                            \
1745           *dst++ = c1 | 0x80;                                           \
1746         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1747         break;                                                          \
1748       }                                                                 \
1749     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1750       {                                                                 \
1751         *dst++ = c1 & 0x7F;                                             \
1752         break;                                                          \
1753       }                                                                 \
1754     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1755       {                                                                 \
1756         *dst++ = c1 | 0x80;                                             \
1757         break;                                                          \
1758       }                                                                 \
1759     else                                                                \
1760       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1761          must invoke it, or, at first, designate it to some graphic     \
1762          register.  Then repeat the loop to actually produce the        \
1763          character.  */                                                 \
1764       dst = encode_invocation_designation (charset, coding, dst);       \
1765   } while (1)
1766
1767 /* Produce codes for a DIMENSION2 character whose character set is
1768    CHARSET and whose position-codes are C1 and C2.  Designation and
1769    invocation codes are also produced in advance if necessary.  */
1770
1771 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
1772   do {                                                                  \
1773     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1774       {                                                                 \
1775         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1776           *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F;                       \
1777         else                                                            \
1778           *dst++ = c1 | 0x80, *dst++ = c2 | 0x80;                       \
1779         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1780         break;                                                          \
1781       }                                                                 \
1782     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1783       {                                                                 \
1784         *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F;                          \
1785         break;                                                          \
1786       }                                                                 \
1787     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1788       {                                                                 \
1789         *dst++ = c1 | 0x80, *dst++= c2 | 0x80;                          \
1790         break;                                                          \
1791       }                                                                 \
1792     else                                                                \
1793       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1794          must invoke it, or, at first, designate it to some graphic     \
1795          register.  Then repeat the loop to actually produce the        \
1796          character.  */                                                 \
1797       dst = encode_invocation_designation (charset, coding, dst);       \
1798   } while (1)
1799
1800 #define ENCODE_ISO_CHARACTER(c)                                 \
1801   do {                                                          \
1802     int charset, c1, c2;                                        \
1803                                                                 \
1804     SPLIT_CHAR (c, charset, c1, c2);                            \
1805     if (CHARSET_DEFINED_P (charset))                            \
1806       {                                                         \
1807         if (CHARSET_DIMENSION (charset) == 1)                   \
1808           {                                                     \
1809             if (charset == CHARSET_ASCII                        \
1810                 && coding->flags & CODING_FLAG_ISO_USE_ROMAN)   \
1811               charset = charset_latin_jisx0201;                 \
1812             ENCODE_ISO_CHARACTER_DIMENSION1 (charset, c1);      \
1813           }                                                     \
1814         else                                                    \
1815           {                                                     \
1816             if (charset == charset_jisx0208                     \
1817                 && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)  \
1818               charset = charset_jisx0208_1978;                  \
1819             ENCODE_ISO_CHARACTER_DIMENSION2 (charset, c1, c2);  \
1820           }                                                     \
1821       }                                                         \
1822     else                                                        \
1823       {                                                         \
1824         *dst++ = c1;                                            \
1825         if (c2 >= 0)                                            \
1826           *dst++ = c2;                                          \
1827       }                                                         \
1828   } while (0)
1829
1830
1831 /* Instead of encoding character C, produce one or two `?'s.  */
1832
1833 #define ENCODE_UNSAFE_CHARACTER(c)                                      \
1834   do {                                                                  \
1835     ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION);       \
1836     if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1)                           \
1837       ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION);     \
1838   } while (0)
1839
1840
1841 /* Produce designation and invocation codes at a place pointed by DST
1842    to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
1843    Return new DST.  */
1844
1845 unsigned char *
1846 encode_invocation_designation (charset, coding, dst)
1847      int charset;
1848      struct coding_system *coding;
1849      unsigned char *dst;
1850 {
1851   int reg;                      /* graphic register number */
1852
1853   /* At first, check designations.  */
1854   for (reg = 0; reg < 4; reg++)
1855     if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
1856       break;
1857
1858   if (reg >= 4)
1859     {
1860       /* CHARSET is not yet designated to any graphic registers.  */
1861       /* At first check the requested designation.  */
1862       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1863       if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
1864         /* Since CHARSET requests no special designation, designate it
1865            to graphic register 0.  */
1866         reg = 0;
1867
1868       ENCODE_DESIGNATION (charset, reg, coding);
1869     }
1870
1871   if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
1872       && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
1873     {
1874       /* Since the graphic register REG is not invoked to any graphic
1875          planes, invoke it to graphic plane 0.  */
1876       switch (reg)
1877         {
1878         case 0:                 /* graphic register 0 */
1879           ENCODE_SHIFT_IN;
1880           break;
1881
1882         case 1:                 /* graphic register 1 */
1883           ENCODE_SHIFT_OUT;
1884           break;
1885
1886         case 2:                 /* graphic register 2 */
1887           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1888             ENCODE_SINGLE_SHIFT_2;
1889           else
1890             ENCODE_LOCKING_SHIFT_2;
1891           break;
1892
1893         case 3:                 /* graphic register 3 */
1894           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1895             ENCODE_SINGLE_SHIFT_3;
1896           else
1897             ENCODE_LOCKING_SHIFT_3;
1898           break;
1899         }
1900     }
1901
1902   return dst;
1903 }
1904
1905 /* Produce 2-byte codes for encoded composition rule RULE.  */
1906
1907 #define ENCODE_COMPOSITION_RULE(rule)           \
1908   do {                                          \
1909     int gref, nref;                             \
1910     COMPOSITION_DECODE_RULE (rule, gref, nref); \
1911     *dst++ = 32 + 81 + gref;                    \
1912     *dst++ = 32 + nref;                         \
1913   } while (0)
1914
1915 /* Produce codes for indicating the start of a composition sequence
1916    (ESC 0, ESC 3, or ESC 4).  DATA points to an array of integers
1917    which specify information about the composition.  See the comment
1918    in coding.h for the format of DATA.  */
1919
1920 #define ENCODE_COMPOSITION_START(coding, data)                          \
1921   do {                                                                  \
1922     coding->composing = data[3];                                        \
1923     *dst++ = ISO_CODE_ESC;                                              \
1924     if (coding->composing == COMPOSITION_RELATIVE)                      \
1925       *dst++ = '0';                                                     \
1926     else                                                                \
1927       {                                                                 \
1928         *dst++ = (coding->composing == COMPOSITION_WITH_ALTCHARS        \
1929                   ? '3' : '4');                                         \
1930         coding->cmp_data_index = coding->cmp_data_start + 4;            \
1931         coding->composition_rule_follows = 0;                           \
1932       }                                                                 \
1933   } while (0)
1934
1935 /* Produce codes for indicating the end of the current composition.  */
1936
1937 #define ENCODE_COMPOSITION_END(coding, data)                    \
1938   do {                                                          \
1939     *dst++ = ISO_CODE_ESC;                                      \
1940     *dst++ = '1';                                               \
1941     coding->cmp_data_start += data[0];                          \
1942     coding->composing = COMPOSITION_NO;                         \
1943     if (coding->cmp_data_start == coding->cmp_data->used        \
1944         && coding->cmp_data->next)                              \
1945       {                                                         \
1946         coding->cmp_data = coding->cmp_data->next;              \
1947         coding->cmp_data_start = 0;                             \
1948       }                                                         \
1949   } while (0)
1950
1951 /* Produce composition start sequence ESC 0.  Here, this sequence
1952    doesn't mean the start of a new composition but means that we have
1953    just produced components (alternate chars and composition rules) of
1954    the composition and the actual text follows in SRC.  */
1955
1956 #define ENCODE_COMPOSITION_FAKE_START(coding)   \
1957   do {                                          \
1958     *dst++ = ISO_CODE_ESC;                      \
1959     *dst++ = '0';                               \
1960     coding->composing = COMPOSITION_RELATIVE;   \
1961   } while (0)
1962
1963 /* The following three macros produce codes for indicating direction
1964    of text.  */
1965 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER              \
1966   do {                                                  \
1967     if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS)    \
1968       *dst++ = ISO_CODE_ESC, *dst++ = '[';              \
1969     else                                                \
1970       *dst++ = ISO_CODE_CSI;                            \
1971   } while (0)
1972
1973 #define ENCODE_DIRECTION_R2L    \
1974   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '2', *dst++ = ']'
1975
1976 #define ENCODE_DIRECTION_L2R    \
1977   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '0', *dst++ = ']'
1978
1979 /* Produce codes for designation and invocation to reset the graphic
1980    planes and registers to initial state.  */
1981 #define ENCODE_RESET_PLANE_AND_REGISTER                                     \
1982   do {                                                                      \
1983     int reg;                                                                \
1984     if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                        \
1985       ENCODE_SHIFT_IN;                                                      \
1986     for (reg = 0; reg < 4; reg++)                                           \
1987       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0            \
1988           && (CODING_SPEC_ISO_DESIGNATION (coding, reg)                     \
1989               != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)))        \
1990         ENCODE_DESIGNATION                                                  \
1991           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
1992   } while (0)
1993
1994 /* Produce designation sequences of charsets in the line started from
1995    SRC to a place pointed by DST, and return updated DST.
1996
1997    If the current block ends before any end-of-line, we may fail to
1998    find all the necessary designations.  */
1999
2000 static unsigned char *
2001 encode_designation_at_bol (coding, translation_table, src, src_end, dst)
2002      struct coding_system *coding;
2003      Lisp_Object translation_table;
2004      unsigned char *src, *src_end, *dst;
2005 {
2006   int charset, c, found = 0, reg;
2007   /* Table of charsets to be designated to each graphic register.  */
2008   int r[4];
2009
2010   for (reg = 0; reg < 4; reg++)
2011     r[reg] = -1;
2012
2013   while (found < 4)
2014     {
2015       ONE_MORE_CHAR (c);
2016       if (c == '\n')
2017         break;
2018
2019       charset = CHAR_CHARSET (c);
2020       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
2021       if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
2022         {
2023           found++;
2024           r[reg] = charset;
2025         }
2026     }
2027
2028  label_end_of_loop:
2029   if (found)
2030     {
2031       for (reg = 0; reg < 4; reg++)
2032         if (r[reg] >= 0
2033             && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
2034           ENCODE_DESIGNATION (r[reg], reg, coding);
2035     }
2036
2037   return dst;
2038 }
2039
2040 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
2041
2042 static void
2043 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
2044      struct coding_system *coding;
2045      unsigned char *source, *destination;
2046      int src_bytes, dst_bytes;
2047 {
2048   unsigned char *src = source;
2049   unsigned char *src_end = source + src_bytes;
2050   unsigned char *dst = destination;
2051   unsigned char *dst_end = destination + dst_bytes;
2052   /* Since the maximum bytes produced by each loop is 20, we subtract 19
2053      from DST_END to assure overflow checking is necessary only at the
2054      head of loop.  */
2055   unsigned char *adjusted_dst_end = dst_end - 19;
2056   /* SRC_BASE remembers the start position in source in each loop.
2057      The loop will be exited when there's not enough source text to
2058      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2059      there's not enough destination area to produce encoded codes
2060      (within macro EMIT_BYTES).  */
2061   unsigned char *src_base;
2062   int c;
2063   Lisp_Object translation_table;
2064   Lisp_Object safe_chars;
2065
2066   safe_chars = coding_safe_chars (coding);
2067
2068   if (NILP (Venable_character_translation))
2069     translation_table = Qnil;
2070   else
2071     {
2072       translation_table = coding->translation_table_for_encode;
2073       if (NILP (translation_table))
2074         translation_table = Vstandard_translation_table_for_encode;
2075     }
2076
2077   coding->consumed_char = 0;
2078   coding->errors = 0;
2079   while (1)
2080     {
2081       src_base = src;
2082
2083       if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19)))
2084         {
2085           coding->result = CODING_FINISH_INSUFFICIENT_DST;
2086           break;
2087         }
2088
2089       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
2090           && CODING_SPEC_ISO_BOL (coding))
2091         {
2092           /* We have to produce designation sequences if any now.  */
2093           dst = encode_designation_at_bol (coding, translation_table,
2094                                            src, src_end, dst);
2095           CODING_SPEC_ISO_BOL (coding) = 0;
2096         }
2097
2098       /* Check composition start and end.  */
2099       if (coding->composing != COMPOSITION_DISABLED
2100           && coding->cmp_data_start < coding->cmp_data->used)
2101         {
2102           struct composition_data *cmp_data = coding->cmp_data;
2103           int *data = cmp_data->data + coding->cmp_data_start;
2104           int this_pos = cmp_data->char_offset + coding->consumed_char;
2105
2106           if (coding->composing == COMPOSITION_RELATIVE)
2107             {
2108               if (this_pos == data[2])
2109                 {
2110                   ENCODE_COMPOSITION_END (coding, data);
2111                   cmp_data = coding->cmp_data;
2112                   data = cmp_data->data + coding->cmp_data_start;
2113                 }
2114             }
2115           else if (COMPOSING_P (coding))
2116             {
2117               /* COMPOSITION_WITH_ALTCHARS or COMPOSITION_WITH_RULE_ALTCHAR  */
2118               if (coding->cmp_data_index == coding->cmp_data_start + data[0])
2119                 /* We have consumed components of the composition.
2120                    What follows in SRC is the compositions's base
2121                    text.  */
2122                 ENCODE_COMPOSITION_FAKE_START (coding);
2123               else
2124                 {
2125                   int c = cmp_data->data[coding->cmp_data_index++];
2126                   if (coding->composition_rule_follows)
2127                     {
2128                       ENCODE_COMPOSITION_RULE (c);
2129                       coding->composition_rule_follows = 0;
2130                     }
2131                   else
2132                     {
2133                       if (coding->flags & CODING_FLAG_ISO_SAFE
2134                           && ! CODING_SAFE_CHAR_P (safe_chars, c))
2135                         ENCODE_UNSAFE_CHARACTER (c);
2136                       else
2137                         ENCODE_ISO_CHARACTER (c);
2138                       if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)
2139                         coding->composition_rule_follows = 1;
2140                     }
2141                   continue;
2142                 }
2143             }
2144           if (!COMPOSING_P (coding))
2145             {
2146               if (this_pos == data[1])
2147                 {
2148                   ENCODE_COMPOSITION_START (coding, data);
2149                   continue;
2150                 }
2151             }
2152         }
2153
2154       ONE_MORE_CHAR (c);
2155
2156       /* Now encode the character C.  */
2157       if (c < 0x20 || c == 0x7F)
2158         {
2159           if (c == '\r')
2160             {
2161               if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2162                 {
2163                   if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2164                     ENCODE_RESET_PLANE_AND_REGISTER;
2165                   *dst++ = c;
2166                   continue;
2167                 }
2168               /* fall down to treat '\r' as '\n' ...  */
2169               c = '\n';
2170             }
2171           if (c == '\n')
2172             {
2173               if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
2174                 ENCODE_RESET_PLANE_AND_REGISTER;
2175               if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
2176                 bcopy (coding->spec.iso2022.initial_designation,
2177                        coding->spec.iso2022.current_designation,
2178                        sizeof coding->spec.iso2022.initial_designation);
2179               if (coding->eol_type == CODING_EOL_LF
2180                   || coding->eol_type == CODING_EOL_UNDECIDED)
2181                 *dst++ = ISO_CODE_LF;
2182               else if (coding->eol_type == CODING_EOL_CRLF)
2183                 *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
2184               else
2185                 *dst++ = ISO_CODE_CR;
2186               CODING_SPEC_ISO_BOL (coding) = 1;
2187             }
2188           else
2189             {
2190               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2191                 ENCODE_RESET_PLANE_AND_REGISTER;
2192               *dst++ = c;
2193             }
2194         }
2195       else if (ASCII_BYTE_P (c))
2196         ENCODE_ISO_CHARACTER (c);
2197       else if (SINGLE_BYTE_CHAR_P (c))
2198         {
2199           *dst++ = c;
2200           coding->errors++;
2201         }
2202       else if (coding->flags & CODING_FLAG_ISO_SAFE
2203                && ! CODING_SAFE_CHAR_P (safe_chars, c))
2204         ENCODE_UNSAFE_CHARACTER (c);
2205       else
2206         ENCODE_ISO_CHARACTER (c);
2207
2208       coding->consumed_char++;
2209     }
2210
2211  label_end_of_loop:
2212   coding->consumed = src_base - source;
2213   coding->produced = coding->produced_char = dst - destination;
2214 }
2215
2216 \f
2217 /*** 4. SJIS and BIG5 handlers ***/
2218
2219 /* Although SJIS and BIG5 are not ISO's coding system, they are used
2220    quite widely.  So, for the moment, Emacs supports them in the bare
2221    C code.  But, in the future, they may be supported only by CCL.  */
2222
2223 /* SJIS is a coding system encoding three character sets: ASCII, right
2224    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
2225    as is.  A character of charset katakana-jisx0201 is encoded by
2226    "position-code + 0x80".  A character of charset japanese-jisx0208
2227    is encoded in 2-byte but two position-codes are divided and shifted
2228    so that it fit in the range below.
2229
2230    --- CODE RANGE of SJIS ---
2231    (character set)      (range)
2232    ASCII                0x00 .. 0x7F
2233    KATAKANA-JISX0201    0xA0 .. 0xDF
2234    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
2235             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
2236    -------------------------------
2237
2238 */
2239
2240 /* BIG5 is a coding system encoding two character sets: ASCII and
2241    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
2242    character set and is encoded in two-byte.
2243
2244    --- CODE RANGE of BIG5 ---
2245    (character set)      (range)
2246    ASCII                0x00 .. 0x7F
2247    Big5 (1st byte)      0xA1 .. 0xFE
2248         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
2249    --------------------------
2250
2251    Since the number of characters in Big5 is larger than maximum
2252    characters in Emacs' charset (96x96), it can't be handled as one
2253    charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
2254    and `charset-big5-2'.  Both are DIMENSION2 and CHARS94.  The former
2255    contains frequently used characters and the latter contains less
2256    frequently used characters.  */
2257
2258 /* Macros to decode or encode a character of Big5 in BIG5.  B1 and B2
2259    are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2260    C1 and C2 are the 1st and 2nd position-codes of of Emacs' internal
2261    format.  CHARSET is `charset_big5_1' or `charset_big5_2'.  */
2262
2263 /* Number of Big5 characters which have the same code in 1st byte.  */
2264 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2265
2266 #define DECODE_BIG5(b1, b2, charset, c1, c2)                            \
2267   do {                                                                  \
2268     unsigned int temp                                                   \
2269       = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62);   \
2270     if (b1 < 0xC9)                                                      \
2271       charset = charset_big5_1;                                         \
2272     else                                                                \
2273       {                                                                 \
2274         charset = charset_big5_2;                                       \
2275         temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW;                          \
2276       }                                                                 \
2277     c1 = temp / (0xFF - 0xA1) + 0x21;                                   \
2278     c2 = temp % (0xFF - 0xA1) + 0x21;                                   \
2279   } while (0)
2280
2281 #define ENCODE_BIG5(charset, c1, c2, b1, b2)                            \
2282   do {                                                                  \
2283     unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21);      \
2284     if (charset == charset_big5_2)                                      \
2285       temp += BIG5_SAME_ROW * (0xC9 - 0xA1);                            \
2286     b1 = temp / BIG5_SAME_ROW + 0xA1;                                   \
2287     b2 = temp % BIG5_SAME_ROW;                                          \
2288     b2 += b2 < 0x3F ? 0x40 : 0x62;                                      \
2289   } while (0)
2290
2291 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2292    Check if a text is encoded in SJIS.  If it is, return
2293    CODING_CATEGORY_MASK_SJIS, else return 0.  */
2294
2295 int
2296 detect_coding_sjis (src, src_end)
2297      unsigned char *src, *src_end;
2298 {
2299   int c;
2300   /* Dummy for ONE_MORE_BYTE.  */
2301   struct coding_system dummy_coding;
2302   struct coding_system *coding = &dummy_coding;
2303
2304   while (1)
2305     {
2306       ONE_MORE_BYTE (c);
2307       if (c >= 0x81)
2308         {
2309           if (c <= 0x9F || (c >= 0xE0 && c <= 0xEF))
2310             {
2311               ONE_MORE_BYTE (c);
2312               if (c < 0x40 || c == 0x7F || c > 0xFC)
2313                 return 0;
2314             }
2315           else if (c > 0xDF)
2316             return 0;
2317         }
2318     }
2319  label_end_of_loop:
2320   return CODING_CATEGORY_MASK_SJIS;
2321 }
2322
2323 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2324    Check if a text is encoded in BIG5.  If it is, return
2325    CODING_CATEGORY_MASK_BIG5, else return 0.  */
2326
2327 int
2328 detect_coding_big5 (src, src_end)
2329      unsigned char *src, *src_end;
2330 {
2331   int c;
2332   /* Dummy for ONE_MORE_BYTE.  */
2333   struct coding_system dummy_coding;
2334   struct coding_system *coding = &dummy_coding;
2335
2336   while (1)
2337     {
2338       ONE_MORE_BYTE (c);
2339       if (c >= 0xA1)
2340         {
2341           ONE_MORE_BYTE (c);
2342           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
2343             return 0;
2344         }
2345     }
2346  label_end_of_loop:
2347   return CODING_CATEGORY_MASK_BIG5;
2348 }
2349
2350 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2351    Check if a text is encoded in UTF-8.  If it is, return
2352    CODING_CATEGORY_MASK_UTF_8, else return 0.  */
2353
2354 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
2355 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
2356 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
2357 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
2358 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
2359 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
2360 #define UTF_8_6_OCTET_LEADING_P(c) (((c) & 0xFE) == 0xFC)
2361
2362 int
2363 detect_coding_utf_8 (src, src_end)
2364      unsigned char *src, *src_end;
2365 {
2366   unsigned char c;
2367   int seq_maybe_bytes;
2368   /* Dummy for ONE_MORE_BYTE.  */
2369   struct coding_system dummy_coding;
2370   struct coding_system *coding = &dummy_coding;
2371
2372   while (1)
2373     {
2374       ONE_MORE_BYTE (c);
2375       if (UTF_8_1_OCTET_P (c))
2376         continue;
2377       else if (UTF_8_2_OCTET_LEADING_P (c))
2378         seq_maybe_bytes = 1;
2379       else if (UTF_8_3_OCTET_LEADING_P (c))
2380         seq_maybe_bytes = 2;
2381       else if (UTF_8_4_OCTET_LEADING_P (c))
2382         seq_maybe_bytes = 3;
2383       else if (UTF_8_5_OCTET_LEADING_P (c))
2384         seq_maybe_bytes = 4;
2385       else if (UTF_8_6_OCTET_LEADING_P (c))
2386         seq_maybe_bytes = 5;
2387       else
2388         return 0;
2389
2390       do
2391         {
2392           ONE_MORE_BYTE (c);
2393           if (!UTF_8_EXTRA_OCTET_P (c))
2394             return 0;
2395           seq_maybe_bytes--;
2396         }
2397       while (seq_maybe_bytes > 0);
2398     }
2399
2400  label_end_of_loop:
2401   return CODING_CATEGORY_MASK_UTF_8;
2402 }
2403
2404 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2405    Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
2406    Little Endian (otherwise).  If it is, return
2407    CODING_CATEGORY_MASK_UTF_16_BE or CODING_CATEGORY_MASK_UTF_16_LE,
2408    else return 0.  */
2409
2410 #define UTF_16_INVALID_P(val)   \
2411   (((val) == 0xFFFE)            \
2412    || ((val) == 0xFFFF))
2413
2414 #define UTF_16_HIGH_SURROGATE_P(val) \
2415   (((val) & 0xD800) == 0xD800)
2416
2417 #define UTF_16_LOW_SURROGATE_P(val) \
2418   (((val) & 0xDC00) == 0xDC00)
2419
2420 int
2421 detect_coding_utf_16 (src, src_end)
2422      unsigned char *src, *src_end;
2423 {
2424   unsigned char c1, c2;
2425   /* Dummy for TWO_MORE_BYTES.  */
2426   struct coding_system dummy_coding;
2427   struct coding_system *coding = &dummy_coding;
2428
2429   TWO_MORE_BYTES (c1, c2);
2430
2431   if ((c1 == 0xFF) && (c2 == 0xFE))
2432     return CODING_CATEGORY_MASK_UTF_16_LE;
2433   else if ((c1 == 0xFE) && (c2 == 0xFF))
2434     return CODING_CATEGORY_MASK_UTF_16_BE;
2435
2436  label_end_of_loop:
2437   return 0;
2438 }
2439
2440 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2441    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
2442
2443 static void
2444 decode_coding_sjis_big5 (coding, source, destination,
2445                          src_bytes, dst_bytes, sjis_p)
2446      struct coding_system *coding;
2447      unsigned char *source, *destination;
2448      int src_bytes, dst_bytes;
2449      int sjis_p;
2450 {
2451   unsigned char *src = source;
2452   unsigned char *src_end = source + src_bytes;
2453   unsigned char *dst = destination;
2454   unsigned char *dst_end = destination + dst_bytes;
2455   /* SRC_BASE remembers the start position in source in each loop.
2456      The loop will be exited when there's not enough source code
2457      (within macro ONE_MORE_BYTE), or when there's not enough
2458      destination area to produce a character (within macro
2459      EMIT_CHAR).  */
2460   unsigned char *src_base;
2461   Lisp_Object translation_table;
2462
2463   if (NILP (Venable_character_translation))
2464     translation_table = Qnil;
2465   else
2466     {
2467       translation_table = coding->translation_table_for_decode;
2468       if (NILP (translation_table))
2469         translation_table = Vstandard_translation_table_for_decode;
2470     }
2471
2472   coding->produced_char = 0;
2473   while (1)
2474     {
2475       int c, charset, c1, c2;
2476
2477       src_base = src;
2478       ONE_MORE_BYTE (c1);
2479
2480       if (c1 < 0x80)
2481         {
2482           charset = CHARSET_ASCII;
2483           if (c1 < 0x20)
2484             {
2485               if (c1 == '\r')
2486                 {
2487                   if (coding->eol_type == CODING_EOL_CRLF)
2488                     {
2489                       ONE_MORE_BYTE (c2);
2490                       if (c2 == '\n')
2491                         c1 = c2;
2492                       else if (coding->mode
2493                                & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2494                         {
2495                           coding->result = CODING_FINISH_INCONSISTENT_EOL;
2496                           goto label_end_of_loop;
2497                         }
2498                       else
2499                         /* To process C2 again, SRC is subtracted by 1.  */
2500                         src--;
2501                     }
2502                   else if (coding->eol_type == CODING_EOL_CR)
2503                     c1 = '\n';
2504                 }
2505               else if (c1 == '\n'
2506                        && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2507                        && (coding->eol_type == CODING_EOL_CR
2508                            || coding->eol_type == CODING_EOL_CRLF))
2509                 {
2510                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
2511                   goto label_end_of_loop;
2512                 }
2513             }
2514         }
2515       else
2516         {
2517           if (sjis_p)
2518             {
2519               if (c1 >= 0xF0)
2520                 goto label_invalid_code;
2521               if (c1 < 0xA0 || c1 >= 0xE0)
2522                 {
2523                   /* SJIS -> JISX0208 */
2524                   ONE_MORE_BYTE (c2);
2525                   if (c2 < 0x40 || c2 == 0x7F || c2 > 0xFC)
2526                     goto label_invalid_code;
2527                   DECODE_SJIS (c1, c2, c1, c2);
2528                   charset = charset_jisx0208;
2529                 }
2530               else
2531                 /* SJIS -> JISX0201-Kana */
2532                 charset = charset_katakana_jisx0201;
2533             }
2534           else
2535             {
2536               /* BIG5 -> Big5 */
2537               if (c1 < 0xA1 || c1 > 0xFE)
2538                 goto label_invalid_code;
2539               ONE_MORE_BYTE (c2);
2540               if (c2 < 0x40 || (c2 > 0x7E && c2 < 0xA1) || c2 > 0xFE)
2541                 goto label_invalid_code;
2542               DECODE_BIG5 (c1, c2, charset, c1, c2);
2543             }
2544         }
2545
2546       c = DECODE_ISO_CHARACTER (charset, c1, c2);
2547       EMIT_CHAR (c);
2548       continue;
2549
2550     label_invalid_code:
2551       coding->errors++;
2552       src = src_base;
2553       c = *src++;
2554       EMIT_CHAR (c);
2555     }
2556
2557  label_end_of_loop:
2558   coding->consumed = coding->consumed_char = src_base - source;
2559   coding->produced = dst - destination;
2560   return;
2561 }
2562
2563 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
2564    This function can encode charsets `ascii', `katakana-jisx0201',
2565    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
2566    are sure that all these charsets are registered as official charset
2567    (i.e. do not have extended leading-codes).  Characters of other
2568    charsets are produced without any encoding.  If SJIS_P is 1, encode
2569    SJIS text, else encode BIG5 text.  */
2570
2571 static void
2572 encode_coding_sjis_big5 (coding, source, destination,
2573                          src_bytes, dst_bytes, sjis_p)
2574      struct coding_system *coding;
2575      unsigned char *source, *destination;
2576      int src_bytes, dst_bytes;
2577      int sjis_p;
2578 {
2579   unsigned char *src = source;
2580   unsigned char *src_end = source + src_bytes;
2581   unsigned char *dst = destination;
2582   unsigned char *dst_end = destination + dst_bytes;
2583   /* SRC_BASE remembers the start position in source in each loop.
2584      The loop will be exited when there's not enough source text to
2585      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2586      there's not enough destination area to produce encoded codes
2587      (within macro EMIT_BYTES).  */
2588   unsigned char *src_base;
2589   Lisp_Object translation_table;
2590
2591   if (NILP (Venable_character_translation))
2592     translation_table = Qnil;
2593   else
2594     {
2595       translation_table = coding->translation_table_for_encode;
2596       if (NILP (translation_table))
2597         translation_table = Vstandard_translation_table_for_encode;
2598     }
2599
2600   while (1)
2601     {
2602       int c, charset, c1, c2;
2603
2604       src_base = src;
2605       ONE_MORE_CHAR (c);
2606
2607       /* Now encode the character C.  */
2608       if (SINGLE_BYTE_CHAR_P (c))
2609         {
2610           switch (c)
2611             {
2612             case '\r':
2613               if (!coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
2614                 {
2615                   EMIT_ONE_BYTE (c);
2616                   break;
2617                 }
2618               c = '\n';
2619             case '\n':
2620               if (coding->eol_type == CODING_EOL_CRLF)
2621                 {
2622                   EMIT_TWO_BYTES ('\r', c);
2623                   break;
2624                 }
2625               else if (coding->eol_type == CODING_EOL_CR)
2626                 c = '\r';
2627             default:
2628               EMIT_ONE_BYTE (c);
2629             }
2630         }
2631       else
2632         {
2633           SPLIT_CHAR (c, charset, c1, c2);
2634           if (sjis_p)
2635             {
2636               if (charset == charset_jisx0208
2637                   || charset == charset_jisx0208_1978)
2638                 {
2639                   ENCODE_SJIS (c1, c2, c1, c2);
2640                   EMIT_TWO_BYTES (c1, c2);
2641                 }
2642               else if (charset == charset_katakana_jisx0201)
2643                 EMIT_ONE_BYTE (c1 | 0x80);
2644               else if (charset == charset_latin_jisx0201)
2645                 EMIT_ONE_BYTE (c1);
2646               else
2647                 /* There's no way other than producing the internal
2648                    codes as is.  */
2649                 EMIT_BYTES (src_base, src);
2650             }
2651           else
2652             {
2653               if (charset == charset_big5_1 || charset == charset_big5_2)
2654                 {
2655                   ENCODE_BIG5 (charset, c1, c2, c1, c2);
2656                   EMIT_TWO_BYTES (c1, c2);
2657                 }
2658               else
2659                 /* There's no way other than producing the internal
2660                    codes as is.  */
2661                 EMIT_BYTES (src_base, src);
2662             }
2663         }
2664       coding->consumed_char++;
2665     }
2666
2667  label_end_of_loop:
2668   coding->consumed = src_base - source;
2669   coding->produced = coding->produced_char = dst - destination;
2670 }
2671
2672 \f
2673 /*** 5. CCL handlers ***/
2674
2675 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2676    Check if a text is encoded in a coding system of which
2677    encoder/decoder are written in CCL program.  If it is, return
2678    CODING_CATEGORY_MASK_CCL, else return 0.  */
2679
2680 int
2681 detect_coding_ccl (src, src_end)
2682      unsigned char *src, *src_end;
2683 {
2684   unsigned char *valid;
2685   int c;
2686   /* Dummy for ONE_MORE_BYTE.  */
2687   struct coding_system dummy_coding;
2688   struct coding_system *coding = &dummy_coding;
2689
2690   /* No coding system is assigned to coding-category-ccl.  */
2691   if (!coding_system_table[CODING_CATEGORY_IDX_CCL])
2692     return 0;
2693
2694   valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
2695   while (1)
2696     {
2697       ONE_MORE_BYTE (c);
2698       if (! valid[c])
2699         return 0;
2700     }
2701  label_end_of_loop:
2702   return CODING_CATEGORY_MASK_CCL;
2703 }
2704
2705 \f
2706 /*** 6. End-of-line handlers ***/
2707
2708 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2709
2710 static void
2711 decode_eol (coding, source, destination, src_bytes, dst_bytes)
2712      struct coding_system *coding;
2713      unsigned char *source, *destination;
2714      int src_bytes, dst_bytes;
2715 {
2716   unsigned char *src = source;
2717   unsigned char *dst = destination;
2718   unsigned char *src_end = src + src_bytes;
2719   unsigned char *dst_end = dst + dst_bytes;
2720   Lisp_Object translation_table;
2721   /* SRC_BASE remembers the start position in source in each loop.
2722      The loop will be exited when there's not enough source code
2723      (within macro ONE_MORE_BYTE), or when there's not enough
2724      destination area to produce a character (within macro
2725      EMIT_CHAR).  */
2726   unsigned char *src_base;
2727   int c;
2728
2729   translation_table = Qnil;
2730   switch (coding->eol_type)
2731     {
2732     case CODING_EOL_CRLF:
2733       while (1)
2734         {
2735           src_base = src;
2736           ONE_MORE_BYTE (c);
2737           if (c == '\r')
2738             {
2739               ONE_MORE_BYTE (c);
2740               if (c != '\n')
2741                 {
2742                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2743                     {
2744                       coding->result = CODING_FINISH_INCONSISTENT_EOL;
2745                       goto label_end_of_loop;
2746                     }
2747                   src--;
2748                   c = '\r';
2749                 }
2750             }
2751           else if (c == '\n'
2752                    && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
2753             {
2754               coding->result = CODING_FINISH_INCONSISTENT_EOL;
2755               goto label_end_of_loop;
2756             }
2757           EMIT_CHAR (c);
2758         }
2759       break;
2760
2761     case CODING_EOL_CR:
2762       while (1)
2763         {
2764           src_base = src;
2765           ONE_MORE_BYTE (c);
2766           if (c == '\n')
2767             {
2768               if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2769                 {
2770                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
2771                   goto label_end_of_loop;
2772                 }
2773             }
2774           else if (c == '\r')
2775             c = '\n';
2776           EMIT_CHAR (c);
2777         }
2778       break;
2779
2780     default:                    /* no need for EOL handling */
2781       while (1)
2782         {
2783           src_base = src;
2784           ONE_MORE_BYTE (c);
2785           EMIT_CHAR (c);
2786         }
2787     }
2788
2789  label_end_of_loop:
2790   coding->consumed = coding->consumed_char = src_base - source;
2791   coding->produced = dst - destination;
2792   return;
2793 }
2794
2795 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  Encode
2796    format of end-of-line according to `coding->eol_type'.  It also
2797    convert multibyte form 8-bit characers to unibyte if
2798    CODING->src_multibyte is nonzero.  If `coding->mode &
2799    CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code '\r' in source text
2800    also means end-of-line.  */
2801
2802 static void
2803 encode_eol (coding, source, destination, src_bytes, dst_bytes)
2804      struct coding_system *coding;
2805      unsigned char *source, *destination;
2806      int src_bytes, dst_bytes;
2807 {
2808   unsigned char *src = source;
2809   unsigned char *dst = destination;
2810   unsigned char *src_end = src + src_bytes;
2811   unsigned char *dst_end = dst + dst_bytes;
2812   Lisp_Object translation_table;
2813   /* SRC_BASE remembers the start position in source in each loop.
2814      The loop will be exited when there's not enough source text to
2815      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2816      there's not enough destination area to produce encoded codes
2817      (within macro EMIT_BYTES).  */
2818   unsigned char *src_base;
2819   int c;
2820   int selective_display = coding->mode & CODING_MODE_SELECTIVE_DISPLAY;
2821
2822   translation_table = Qnil;
2823   if (coding->src_multibyte
2824       && *(src_end - 1) == LEADING_CODE_8_BIT_CONTROL)
2825     {
2826       src_end--;
2827       src_bytes--;
2828       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
2829     }
2830
2831   if (coding->eol_type == CODING_EOL_CRLF)
2832     {
2833       while (src < src_end)
2834         {
2835           src_base = src;
2836           c = *src++;
2837           if (c >= 0x20)
2838             EMIT_ONE_BYTE (c);
2839           else if (c == '\n' || (c == '\r' && selective_display))
2840             EMIT_TWO_BYTES ('\r', '\n');
2841           else
2842             EMIT_ONE_BYTE (c);
2843         }
2844       src_base = src;
2845     label_end_of_loop:
2846       ;
2847     }
2848   else
2849     {
2850       if (!dst_bytes || src_bytes <= dst_bytes)
2851         {
2852           safe_bcopy (src, dst, src_bytes);
2853           src_base = src_end;
2854           dst += src_bytes;
2855         }
2856       else
2857         {
2858           if (coding->src_multibyte
2859               && *(src + dst_bytes - 1) == LEADING_CODE_8_BIT_CONTROL)
2860             dst_bytes--;
2861           safe_bcopy (src, dst, dst_bytes);
2862           src_base = src + dst_bytes;
2863           dst = destination + dst_bytes;
2864           coding->result = CODING_FINISH_INSUFFICIENT_DST;
2865         }
2866       if (coding->eol_type == CODING_EOL_CR)
2867         {
2868           for (src = destination; src < dst; src++)
2869             if (*src == '\n') *src = '\r';
2870         }
2871       else if (selective_display)
2872         {
2873           for (src = destination; src < dst; src++)
2874             if (*src == '\r') *src = '\n';
2875         }
2876     }
2877   if (coding->src_multibyte)
2878     dst = destination + str_as_unibyte (destination, dst - destination);
2879
2880   coding->consumed = src_base - source;
2881   coding->produced = dst - destination;
2882   coding->produced_char = coding->produced;
2883 }
2884
2885 \f
2886 /*** 7. C library functions ***/
2887
2888 /* In Emacs Lisp, coding system is represented by a Lisp symbol which
2889    has a property `coding-system'.  The value of this property is a
2890    vector of length 5 (called as coding-vector).  Among elements of
2891    this vector, the first (element[0]) and the fifth (element[4])
2892    carry important information for decoding/encoding.  Before
2893    decoding/encoding, this information should be set in fields of a
2894    structure of type `coding_system'.
2895
2896    A value of property `coding-system' can be a symbol of another
2897    subsidiary coding-system.  In that case, Emacs gets coding-vector
2898    from that symbol.
2899
2900    `element[0]' contains information to be set in `coding->type'.  The
2901    value and its meaning is as follows:
2902
2903    0 -- coding_type_emacs_mule
2904    1 -- coding_type_sjis
2905    2 -- coding_type_iso2022
2906    3 -- coding_type_big5
2907    4 -- coding_type_ccl encoder/decoder written in CCL
2908    nil -- coding_type_no_conversion
2909    t -- coding_type_undecided (automatic conversion on decoding,
2910                                no-conversion on encoding)
2911
2912    `element[4]' contains information to be set in `coding->flags' and
2913    `coding->spec'.  The meaning varies by `coding->type'.
2914
2915    If `coding->type' is `coding_type_iso2022', element[4] is a vector
2916    of length 32 (of which the first 13 sub-elements are used now).
2917    Meanings of these sub-elements are:
2918
2919    sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
2920         If the value is an integer of valid charset, the charset is
2921         assumed to be designated to graphic register N initially.
2922
2923         If the value is minus, it is a minus value of charset which
2924         reserves graphic register N, which means that the charset is
2925         not designated initially but should be designated to graphic
2926         register N just before encoding a character in that charset.
2927
2928         If the value is nil, graphic register N is never used on
2929         encoding.
2930
2931    sub-element[N] where N is 4 through 11: to be set in `coding->flags'
2932         Each value takes t or nil.  See the section ISO2022 of
2933         `coding.h' for more information.
2934
2935    If `coding->type' is `coding_type_big5', element[4] is t to denote
2936    BIG5-ETen or nil to denote BIG5-HKU.
2937
2938    If `coding->type' takes the other value, element[4] is ignored.
2939
2940    Emacs Lisp's coding system also carries information about format of
2941    end-of-line in a value of property `eol-type'.  If the value is
2942    integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
2943    means CODING_EOL_CR.  If it is not integer, it should be a vector
2944    of subsidiary coding systems of which property `eol-type' has one
2945    of above values.
2946
2947 */
2948
2949 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
2950    and set it in CODING.  If CODING_SYSTEM_SYMBOL is invalid, CODING
2951    is setup so that no conversion is necessary and return -1, else
2952    return 0.  */
2953
2954 int
2955 setup_coding_system (coding_system, coding)
2956      Lisp_Object coding_system;
2957      struct coding_system *coding;
2958 {
2959   Lisp_Object coding_spec, coding_type, eol_type, plist;
2960   Lisp_Object val;
2961   int i;
2962
2963   /* Initialize some fields required for all kinds of coding systems.  */
2964   coding->symbol = coding_system;
2965   coding->common_flags = 0;
2966   coding->mode = 0;
2967   coding->heading_ascii = -1;
2968   coding->post_read_conversion = coding->pre_write_conversion = Qnil;
2969   coding->composing = COMPOSITION_DISABLED;
2970   coding->cmp_data = NULL;
2971
2972   if (NILP (coding_system))
2973     goto label_invalid_coding_system;
2974
2975   coding_spec = Fget (coding_system, Qcoding_system);
2976
2977   if (!VECTORP (coding_spec)
2978       || XVECTOR (coding_spec)->size != 5
2979       || !CONSP (XVECTOR (coding_spec)->contents[3]))
2980     goto label_invalid_coding_system;
2981
2982   eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
2983   if (VECTORP (eol_type))
2984     {
2985       coding->eol_type = CODING_EOL_UNDECIDED;
2986       coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
2987     }
2988   else if (XFASTINT (eol_type) == 1)
2989     {
2990       coding->eol_type = CODING_EOL_CRLF;
2991       coding->common_flags
2992         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2993     }
2994   else if (XFASTINT (eol_type) == 2)
2995     {
2996       coding->eol_type = CODING_EOL_CR;
2997       coding->common_flags
2998         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2999     }
3000   else
3001     coding->eol_type = CODING_EOL_LF;
3002
3003   coding_type = XVECTOR (coding_spec)->contents[0];
3004   /* Try short cut.  */
3005   if (SYMBOLP (coding_type))
3006     {
3007       if (EQ (coding_type, Qt))
3008         {
3009           coding->type = coding_type_undecided;
3010           coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
3011         }
3012       else
3013         coding->type = coding_type_no_conversion;
3014       return 0;
3015     }
3016
3017   /* Get values of coding system properties:
3018      `post-read-conversion', `pre-write-conversion',
3019      `translation-table-for-decode', `translation-table-for-encode'.  */
3020   plist = XVECTOR (coding_spec)->contents[3];
3021   /* Pre & post conversion functions should be disabled if
3022      inhibit_eol_conversion is nozero.  This is the case that a code
3023      conversion function is called while those functions are running.  */
3024   if (! inhibit_pre_post_conversion)
3025     {
3026       coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
3027       coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
3028     }
3029   val = Fplist_get (plist, Qtranslation_table_for_decode);
3030   if (SYMBOLP (val))
3031     val = Fget (val, Qtranslation_table_for_decode);
3032   coding->translation_table_for_decode = CHAR_TABLE_P (val) ? val : Qnil;
3033   val = Fplist_get (plist, Qtranslation_table_for_encode);
3034   if (SYMBOLP (val))
3035     val = Fget (val, Qtranslation_table_for_encode);
3036   coding->translation_table_for_encode = CHAR_TABLE_P (val) ? val : Qnil;
3037   val = Fplist_get (plist, Qcoding_category);
3038   if (!NILP (val))
3039     {
3040       val = Fget (val, Qcoding_category_index);
3041       if (INTEGERP (val))
3042         coding->category_idx = XINT (val);
3043       else
3044         goto label_invalid_coding_system;
3045     }
3046   else
3047     goto label_invalid_coding_system;
3048
3049   /* If the coding system has non-nil `composition' property, enable
3050      composition handling.  */
3051   val = Fplist_get (plist, Qcomposition);
3052   if (!NILP (val))
3053     coding->composing = COMPOSITION_NO;
3054
3055   switch (XFASTINT (coding_type))
3056     {
3057     case 0:
3058       coding->type = coding_type_emacs_mule;
3059       if (!NILP (coding->post_read_conversion))
3060         coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
3061       if (!NILP (coding->pre_write_conversion))
3062         coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
3063       break;
3064
3065     case 1:
3066       coding->type = coding_type_sjis;
3067       coding->common_flags
3068         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3069       break;
3070
3071     case 2:
3072       coding->type = coding_type_iso2022;
3073       coding->common_flags
3074         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3075       {
3076         Lisp_Object val, temp;
3077         Lisp_Object *flags;
3078         int i, charset, reg_bits = 0;
3079
3080         val = XVECTOR (coding_spec)->contents[4];
3081
3082         if (!VECTORP (val) || XVECTOR (val)->size != 32)
3083           goto label_invalid_coding_system;
3084
3085         flags = XVECTOR (val)->contents;
3086         coding->flags
3087           = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
3088              | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
3089              | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
3090              | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
3091              | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
3092              | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
3093              | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
3094              | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
3095              | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
3096              | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
3097              | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
3098              | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
3099              | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
3100              );
3101
3102         /* Invoke graphic register 0 to plane 0.  */
3103         CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
3104         /* Invoke graphic register 1 to plane 1 if we can use full 8-bit.  */
3105         CODING_SPEC_ISO_INVOCATION (coding, 1)
3106           = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
3107         /* Not single shifting at first.  */
3108         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
3109         /* Beginning of buffer should also be regarded as bol. */
3110         CODING_SPEC_ISO_BOL (coding) = 1;
3111
3112         for (charset = 0; charset <= MAX_CHARSET; charset++)
3113           CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
3114         val = Vcharset_revision_alist;
3115         while (CONSP (val))
3116           {
3117             charset = get_charset_id (Fcar_safe (XCAR (val)));
3118             if (charset >= 0
3119                 && (temp = Fcdr_safe (XCAR (val)), INTEGERP (temp))
3120                 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
3121               CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
3122             val = XCDR (val);
3123           }
3124
3125         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
3126            FLAGS[REG] can be one of below:
3127                 integer CHARSET: CHARSET occupies register I,
3128                 t: designate nothing to REG initially, but can be used
3129                   by any charsets,
3130                 list of integer, nil, or t: designate the first
3131                   element (if integer) to REG initially, the remaining
3132                   elements (if integer) is designated to REG on request,
3133                   if an element is t, REG can be used by any charsets,
3134                 nil: REG is never used.  */
3135         for (charset = 0; charset <= MAX_CHARSET; charset++)
3136           CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3137             = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
3138         for (i = 0; i < 4; i++)
3139           {
3140             if (INTEGERP (flags[i])
3141                 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))
3142                 || (charset = get_charset_id (flags[i])) >= 0)
3143               {
3144                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3145                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
3146               }
3147             else if (EQ (flags[i], Qt))
3148               {
3149                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3150                 reg_bits |= 1 << i;
3151                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3152               }
3153             else if (CONSP (flags[i]))
3154               {
3155                 Lisp_Object tail;
3156                 tail = flags[i];
3157
3158                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3159                 if (INTEGERP (XCAR (tail))
3160                     && (charset = XINT (XCAR (tail)),
3161                         CHARSET_VALID_P (charset))
3162                     || (charset = get_charset_id (XCAR (tail))) >= 0)
3163                   {
3164                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3165                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
3166                   }
3167                 else
3168                   CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3169                 tail = XCDR (tail);
3170                 while (CONSP (tail))
3171                   {
3172                     if (INTEGERP (XCAR (tail))
3173                         && (charset = XINT (XCAR (tail)),
3174                             CHARSET_VALID_P (charset))
3175                         || (charset = get_charset_id (XCAR (tail))) >= 0)
3176                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3177                         = i;
3178                     else if (EQ (XCAR (tail), Qt))
3179                       reg_bits |= 1 << i;
3180                     tail = XCDR (tail);
3181                   }
3182               }
3183             else
3184               CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3185
3186             CODING_SPEC_ISO_DESIGNATION (coding, i)
3187               = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
3188           }
3189
3190         if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
3191           {
3192             /* REG 1 can be used only by locking shift in 7-bit env.  */
3193             if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
3194               reg_bits &= ~2;
3195             if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
3196               /* Without any shifting, only REG 0 and 1 can be used.  */
3197               reg_bits &= 3;
3198           }
3199
3200         if (reg_bits)
3201           for (charset = 0; charset <= MAX_CHARSET; charset++)
3202             {
3203               if (CHARSET_VALID_P (charset)
3204                   && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3205                       == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
3206                 {
3207                   /* There exist some default graphic registers to be
3208                      used by CHARSET.  */
3209
3210                   /* We had better avoid designating a charset of
3211                      CHARS96 to REG 0 as far as possible.  */
3212                   if (CHARSET_CHARS (charset) == 96)
3213                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3214                       = (reg_bits & 2
3215                          ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
3216                   else
3217                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3218                       = (reg_bits & 1
3219                          ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
3220                 }
3221             }
3222       }
3223       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3224       coding->spec.iso2022.last_invalid_designation_register = -1;
3225       break;
3226
3227     case 3:
3228       coding->type = coding_type_big5;
3229       coding->common_flags
3230         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3231       coding->flags
3232         = (NILP (XVECTOR (coding_spec)->contents[4])
3233            ? CODING_FLAG_BIG5_HKU
3234            : CODING_FLAG_BIG5_ETEN);
3235       break;
3236
3237     case 4:
3238       coding->type = coding_type_ccl;
3239       coding->common_flags
3240         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3241       {
3242         val = XVECTOR (coding_spec)->contents[4];
3243         if (! CONSP (val)
3244             || setup_ccl_program (&(coding->spec.ccl.decoder),
3245                                   XCAR (val)) < 0
3246             || setup_ccl_program (&(coding->spec.ccl.encoder),
3247                                   XCDR (val)) < 0)
3248           goto label_invalid_coding_system;
3249
3250         bzero (coding->spec.ccl.valid_codes, 256);
3251         val = Fplist_get (plist, Qvalid_codes);
3252         if (CONSP (val))
3253           {
3254             Lisp_Object this;
3255
3256             for (; CONSP (val); val = XCDR (val))
3257               {
3258                 this = XCAR (val);
3259                 if (INTEGERP (this)
3260                     && XINT (this) >= 0 && XINT (this) < 256)
3261                   coding->spec.ccl.valid_codes[XINT (this)] = 1;
3262                 else if (CONSP (this)
3263                          && INTEGERP (XCAR (this))
3264                          && INTEGERP (XCDR (this)))
3265                   {
3266                     int start = XINT (XCAR (this));
3267                     int end = XINT (XCDR (this));
3268
3269                     if (start >= 0 && start <= end && end < 256)
3270                       while (start <= end)
3271                         coding->spec.ccl.valid_codes[start++] = 1;
3272                   }
3273               }
3274           }
3275       }
3276       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3277       coding->spec.ccl.cr_carryover = 0;
3278       break;
3279
3280     case 5:
3281       coding->type = coding_type_raw_text;
3282       break;
3283
3284     default:
3285       goto label_invalid_coding_system;
3286     }
3287   return 0;
3288
3289  label_invalid_coding_system:
3290   coding->type = coding_type_no_conversion;
3291   coding->category_idx = CODING_CATEGORY_IDX_BINARY;
3292   coding->common_flags = 0;
3293   coding->eol_type = CODING_EOL_LF;
3294   coding->pre_write_conversion = coding->post_read_conversion = Qnil;
3295   return -1;
3296 }
3297
3298 /* Free memory blocks allocated for storing composition information.  */
3299
3300 void
3301 coding_free_composition_data (coding)
3302      struct coding_system *coding;
3303 {
3304   struct composition_data *cmp_data = coding->cmp_data, *next;
3305
3306   if (!cmp_data)
3307     return;
3308   /* Memory blocks are chained.  At first, rewind to the first, then,
3309      free blocks one by one.  */
3310   while (cmp_data->prev)
3311     cmp_data = cmp_data->prev;
3312   while (cmp_data)
3313     {
3314       next = cmp_data->next;
3315       xfree (cmp_data);
3316       cmp_data = next;
3317     }
3318   coding->cmp_data = NULL;
3319 }
3320
3321 /* Set `char_offset' member of all memory blocks pointed by
3322    coding->cmp_data to POS.  */
3323
3324 void
3325 coding_adjust_composition_offset (coding, pos)
3326      struct coding_system *coding;
3327      int pos;
3328 {
3329   struct composition_data *cmp_data;
3330
3331   for (cmp_data = coding->cmp_data; cmp_data; cmp_data = cmp_data->next)
3332     cmp_data->char_offset = pos;
3333 }
3334
3335 /* Setup raw-text or one of its subsidiaries in the structure
3336    coding_system CODING according to the already setup value eol_type
3337    in CODING.  CODING should be setup for some coding system in
3338    advance.  */
3339
3340 void
3341 setup_raw_text_coding_system (coding)
3342      struct coding_system *coding;
3343 {
3344   if (coding->type != coding_type_raw_text)
3345     {
3346       coding->symbol = Qraw_text;
3347       coding->type = coding_type_raw_text;
3348       if (coding->eol_type != CODING_EOL_UNDECIDED)
3349         {
3350           Lisp_Object subsidiaries;
3351           subsidiaries = Fget (Qraw_text, Qeol_type);
3352
3353           if (VECTORP (subsidiaries)
3354               && XVECTOR (subsidiaries)->size == 3)
3355             coding->symbol
3356               = XVECTOR (subsidiaries)->contents[coding->eol_type];
3357         }
3358       setup_coding_system (coding->symbol, coding);
3359     }
3360   return;
3361 }
3362
3363 /* Emacs has a mechanism to automatically detect a coding system if it
3364    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
3365    it's impossible to distinguish some coding systems accurately
3366    because they use the same range of codes.  So, at first, coding
3367    systems are categorized into 7, those are:
3368
3369    o coding-category-emacs-mule
3370
3371         The category for a coding system which has the same code range
3372         as Emacs' internal format.  Assigned the coding-system (Lisp
3373         symbol) `emacs-mule' by default.
3374
3375    o coding-category-sjis
3376
3377         The category for a coding system which has the same code range
3378         as SJIS.  Assigned the coding-system (Lisp
3379         symbol) `japanese-shift-jis' by default.
3380
3381    o coding-category-iso-7
3382
3383         The category for a coding system which has the same code range
3384         as ISO2022 of 7-bit environment.  This doesn't use any locking
3385         shift and single shift functions.  This can encode/decode all
3386         charsets.  Assigned the coding-system (Lisp symbol)
3387         `iso-2022-7bit' by default.
3388
3389    o coding-category-iso-7-tight
3390
3391         Same as coding-category-iso-7 except that this can
3392         encode/decode only the specified charsets.
3393
3394    o coding-category-iso-8-1
3395
3396         The category for a coding system which has the same code range
3397         as ISO2022 of 8-bit environment and graphic plane 1 used only
3398         for DIMENSION1 charset.  This doesn't use any locking shift
3399         and single shift functions.  Assigned the coding-system (Lisp
3400         symbol) `iso-latin-1' by default.
3401
3402    o coding-category-iso-8-2
3403
3404         The category for a coding system which has the same code range
3405         as ISO2022 of 8-bit environment and graphic plane 1 used only
3406         for DIMENSION2 charset.  This doesn't use any locking shift
3407         and single shift functions.  Assigned the coding-system (Lisp
3408         symbol) `japanese-iso-8bit' by default.
3409
3410    o coding-category-iso-7-else
3411
3412         The category for a coding system which has the same code range
3413         as ISO2022 of 7-bit environemnt but uses locking shift or
3414         single shift functions.  Assigned the coding-system (Lisp
3415         symbol) `iso-2022-7bit-lock' by default.
3416
3417    o coding-category-iso-8-else
3418
3419         The category for a coding system which has the same code range
3420         as ISO2022 of 8-bit environemnt but uses locking shift or
3421         single shift functions.  Assigned the coding-system (Lisp
3422         symbol) `iso-2022-8bit-ss2' by default.
3423
3424    o coding-category-big5
3425
3426         The category for a coding system which has the same code range
3427         as BIG5.  Assigned the coding-system (Lisp symbol)
3428         `cn-big5' by default.
3429
3430    o coding-category-utf-8
3431
3432         The category for a coding system which has the same code range
3433         as UTF-8 (cf. RFC2279).  Assigned the coding-system (Lisp
3434         symbol) `utf-8' by default.
3435
3436    o coding-category-utf-16-be
3437
3438         The category for a coding system in which a text has an
3439         Unicode signature (cf. Unicode Standard) in the order of BIG
3440         endian at the head.  Assigned the coding-system (Lisp symbol)
3441         `utf-16-be' by default.
3442
3443    o coding-category-utf-16-le
3444
3445         The category for a coding system in which a text has an
3446         Unicode signature (cf. Unicode Standard) in the order of
3447         LITTLE endian at the head.  Assigned the coding-system (Lisp
3448         symbol) `utf-16-le' by default.
3449
3450    o coding-category-ccl
3451
3452         The category for a coding system of which encoder/decoder is
3453         written in CCL programs.  The default value is nil, i.e., no
3454         coding system is assigned.
3455
3456    o coding-category-binary
3457
3458         The category for a coding system not categorized in any of the
3459         above.  Assigned the coding-system (Lisp symbol)
3460         `no-conversion' by default.
3461
3462    Each of them is a Lisp symbol and the value is an actual
3463    `coding-system's (this is also a Lisp symbol) assigned by a user.
3464    What Emacs does actually is to detect a category of coding system.
3465    Then, it uses a `coding-system' assigned to it.  If Emacs can't
3466    decide only one possible category, it selects a category of the
3467    highest priority.  Priorities of categories are also specified by a
3468    user in a Lisp variable `coding-category-list'.
3469
3470 */
3471
3472 static
3473 int ascii_skip_code[256];
3474
3475 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
3476    If it detects possible coding systems, return an integer in which
3477    appropriate flag bits are set.  Flag bits are defined by macros
3478    CODING_CATEGORY_MASK_XXX in `coding.h'.  If PRIORITIES is non-NULL,
3479    it should point the table `coding_priorities'.  In that case, only
3480    the flag bit for a coding system of the highest priority is set in
3481    the returned value.
3482
3483    How many ASCII characters are at the head is returned as *SKIP.  */
3484
3485 static int
3486 detect_coding_mask (source, src_bytes, priorities, skip)
3487      unsigned char *source;
3488      int src_bytes, *priorities, *skip;
3489 {
3490   register unsigned char c;
3491   unsigned char *src = source, *src_end = source + src_bytes;
3492   unsigned int mask, utf16_examined_p, iso2022_examined_p;
3493   int i, idx;
3494
3495   /* At first, skip all ASCII characters and control characters except
3496      for three ISO2022 specific control characters.  */
3497   ascii_skip_code[ISO_CODE_SO] = 0;
3498   ascii_skip_code[ISO_CODE_SI] = 0;
3499   ascii_skip_code[ISO_CODE_ESC] = 0;
3500
3501  label_loop_detect_coding:
3502   while (src < src_end && ascii_skip_code[*src]) src++;
3503   *skip = src - source;
3504
3505   if (src >= src_end)
3506     /* We found nothing other than ASCII.  There's nothing to do.  */
3507     return 0;
3508
3509   c = *src;
3510   /* The text seems to be encoded in some multilingual coding system.
3511      Now, try to find in which coding system the text is encoded.  */
3512   if (c < 0x80)
3513     {
3514       /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
3515       /* C is an ISO2022 specific control code of C0.  */
3516       mask = detect_coding_iso2022 (src, src_end);
3517       if (mask == 0)
3518         {
3519           /* No valid ISO2022 code follows C.  Try again.  */
3520           src++;
3521           if (c == ISO_CODE_ESC)
3522             ascii_skip_code[ISO_CODE_ESC] = 1;
3523           else
3524             ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1;
3525           goto label_loop_detect_coding;
3526         }
3527       if (priorities)
3528         {
3529           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3530             {
3531               if (mask & priorities[i])
3532                 return priorities[i];
3533             }
3534           return CODING_CATEGORY_MASK_RAW_TEXT;
3535         }
3536     }
3537   else
3538     {
3539       int try;
3540
3541       if (c < 0xA0)
3542         {
3543           /* C is the first byte of SJIS character code,
3544              or a leading-code of Emacs' internal format (emacs-mule),
3545              or the first byte of UTF-16.  */
3546           try = (CODING_CATEGORY_MASK_SJIS
3547                   | CODING_CATEGORY_MASK_EMACS_MULE
3548                   | CODING_CATEGORY_MASK_UTF_16_BE
3549                   | CODING_CATEGORY_MASK_UTF_16_LE);
3550
3551           /* Or, if C is a special latin extra code,
3552              or is an ISO2022 specific control code of C1 (SS2 or SS3),
3553              or is an ISO2022 control-sequence-introducer (CSI),
3554              we should also consider the possibility of ISO2022 codings.  */
3555           if ((VECTORP (Vlatin_extra_code_table)
3556                && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3557               || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
3558               || (c == ISO_CODE_CSI
3559                   && (src < src_end
3560                       && (*src == ']'
3561                           || ((*src == '0' || *src == '1' || *src == '2')
3562                               && src + 1 < src_end
3563                               && src[1] == ']')))))
3564             try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
3565                      | CODING_CATEGORY_MASK_ISO_8BIT);
3566         }
3567       else
3568         /* C is a character of ISO2022 in graphic plane right,
3569            or a SJIS's 1-byte character code (i.e. JISX0201),
3570            or the first byte of BIG5's 2-byte code,
3571            or the first byte of UTF-8/16.  */
3572         try = (CODING_CATEGORY_MASK_ISO_8_ELSE
3573                 | CODING_CATEGORY_MASK_ISO_8BIT
3574                 | CODING_CATEGORY_MASK_SJIS
3575                 | CODING_CATEGORY_MASK_BIG5
3576                 | CODING_CATEGORY_MASK_UTF_8
3577                 | CODING_CATEGORY_MASK_UTF_16_BE
3578                 | CODING_CATEGORY_MASK_UTF_16_LE);
3579
3580       /* Or, we may have to consider the possibility of CCL.  */
3581       if (coding_system_table[CODING_CATEGORY_IDX_CCL]
3582           && (coding_system_table[CODING_CATEGORY_IDX_CCL]
3583               ->spec.ccl.valid_codes)[c])
3584         try |= CODING_CATEGORY_MASK_CCL;
3585
3586       mask = 0;
3587       utf16_examined_p = iso2022_examined_p = 0;
3588       if (priorities)
3589         {
3590           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3591             {
3592               if (!iso2022_examined_p
3593                   && (priorities[i] & try & CODING_CATEGORY_MASK_ISO))
3594                 {
3595                   mask |= detect_coding_iso2022 (src, src_end);
3596                   iso2022_examined_p = 1;
3597                 }
3598               else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
3599                 mask |= detect_coding_sjis (src, src_end);
3600               else if (priorities[i] & try & CODING_CATEGORY_MASK_UTF_8)
3601                 mask |= detect_coding_utf_8 (src, src_end);
3602               else if (!utf16_examined_p
3603                        && (priorities[i] & try &
3604                            CODING_CATEGORY_MASK_UTF_16_BE_LE))
3605                 {
3606                   mask |= detect_coding_utf_16 (src, src_end);
3607                   utf16_examined_p = 1;
3608                 }
3609               else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
3610                 mask |= detect_coding_big5 (src, src_end);
3611               else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
3612                 mask |= detect_coding_emacs_mule (src, src_end);
3613               else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL)
3614                 mask |= detect_coding_ccl (src, src_end);
3615               else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
3616                 mask |= CODING_CATEGORY_MASK_RAW_TEXT;
3617               else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
3618                 mask |= CODING_CATEGORY_MASK_BINARY;
3619               if (mask & priorities[i])
3620                 return priorities[i];
3621             }
3622           return CODING_CATEGORY_MASK_RAW_TEXT;
3623         }
3624       if (try & CODING_CATEGORY_MASK_ISO)
3625         mask |= detect_coding_iso2022 (src, src_end);
3626       if (try & CODING_CATEGORY_MASK_SJIS)
3627         mask |= detect_coding_sjis (src, src_end);
3628       if (try & CODING_CATEGORY_MASK_BIG5)
3629         mask |= detect_coding_big5 (src, src_end);
3630       if (try & CODING_CATEGORY_MASK_UTF_8)
3631         mask |= detect_coding_utf_8 (src, src_end);
3632       if (try & CODING_CATEGORY_MASK_UTF_16_BE_LE)
3633         mask |= detect_coding_utf_16 (src, src_end);
3634       if (try & CODING_CATEGORY_MASK_EMACS_MULE)
3635         mask |= detect_coding_emacs_mule (src, src_end);
3636       if (try & CODING_CATEGORY_MASK_CCL)
3637         mask |= detect_coding_ccl (src, src_end);
3638     }
3639   return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
3640 }
3641
3642 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
3643    The information of the detected coding system is set in CODING.  */
3644
3645 void
3646 detect_coding (coding, src, src_bytes)
3647      struct coding_system *coding;
3648      unsigned char *src;
3649      int src_bytes;
3650 {
3651   unsigned int idx;
3652   int skip, mask, i;
3653   Lisp_Object val;
3654
3655   val = Vcoding_category_list;
3656   mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip);
3657   coding->heading_ascii = skip;
3658
3659   if (!mask) return;
3660
3661   /* We found a single coding system of the highest priority in MASK.  */
3662   idx = 0;
3663   while (mask && ! (mask & 1)) mask >>= 1, idx++;
3664   if (! mask)
3665     idx = CODING_CATEGORY_IDX_RAW_TEXT;
3666
3667   val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[idx])->value;
3668
3669   if (coding->eol_type != CODING_EOL_UNDECIDED)
3670     {
3671       Lisp_Object tmp;
3672
3673       tmp = Fget (val, Qeol_type);
3674       if (VECTORP (tmp))
3675         val = XVECTOR (tmp)->contents[coding->eol_type];
3676     }
3677
3678   /* Setup this new coding system while preserving some slots.  */
3679   {
3680     int src_multibyte = coding->src_multibyte;
3681     int dst_multibyte = coding->dst_multibyte;
3682
3683     setup_coding_system (val, coding);
3684     coding->src_multibyte = src_multibyte;
3685     coding->dst_multibyte = dst_multibyte;
3686     coding->heading_ascii = skip;
3687   }
3688 }
3689
3690 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
3691    SOURCE is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
3692    CODING_EOL_CR, and CODING_EOL_UNDECIDED.
3693
3694    How many non-eol characters are at the head is returned as *SKIP.  */
3695
3696 #define MAX_EOL_CHECK_COUNT 3
3697
3698 static int
3699 detect_eol_type (source, src_bytes, skip)
3700      unsigned char *source;
3701      int src_bytes, *skip;
3702 {
3703   unsigned char *src = source, *src_end = src + src_bytes;
3704   unsigned char c;
3705   int total = 0;                /* How many end-of-lines are found so far.  */
3706   int eol_type = CODING_EOL_UNDECIDED;
3707   int this_eol_type;
3708
3709   *skip = 0;
3710
3711   while (src < src_end && total < MAX_EOL_CHECK_COUNT)
3712     {
3713       c = *src++;
3714       if (c == '\n' || c == '\r')
3715         {
3716           if (*skip == 0)
3717             *skip = src - 1 - source;
3718           total++;
3719           if (c == '\n')
3720             this_eol_type = CODING_EOL_LF;
3721           else if (src >= src_end || *src != '\n')
3722             this_eol_type = CODING_EOL_CR;
3723           else
3724             this_eol_type = CODING_EOL_CRLF, src++;
3725
3726           if (eol_type == CODING_EOL_UNDECIDED)
3727             /* This is the first end-of-line.  */
3728             eol_type = this_eol_type;
3729           else if (eol_type != this_eol_type)
3730             {
3731               /* The found type is different from what found before.  */
3732               eol_type = CODING_EOL_INCONSISTENT;
3733               break;
3734             }
3735         }
3736     }
3737
3738   if (*skip == 0)
3739     *skip = src_end - source;
3740   return eol_type;
3741 }
3742
3743 /* Like detect_eol_type, but detect EOL type in 2-octet
3744    big-endian/little-endian format for coding systems utf-16-be and
3745    utf-16-le.  */
3746
3747 static int
3748 detect_eol_type_in_2_octet_form (source, src_bytes, skip, big_endian_p)
3749      unsigned char *source;
3750      int src_bytes, *skip;
3751 {
3752   unsigned char *src = source, *src_end = src + src_bytes;
3753   unsigned int c1, c2;
3754   int total = 0;                /* How many end-of-lines are found so far.  */
3755   int eol_type = CODING_EOL_UNDECIDED;
3756   int this_eol_type;
3757   int msb, lsb;
3758
3759   if (big_endian_p)
3760     msb = 0, lsb = 1;
3761   else
3762     msb = 1, lsb = 0;
3763
3764   *skip = 0;
3765
3766   while ((src + 1) < src_end && total < MAX_EOL_CHECK_COUNT)
3767     {
3768       c1 = (src[msb] << 8) | (src[lsb]);
3769       src += 2;
3770
3771       if (c1 == '\n' || c1 == '\r')
3772         {
3773           if (*skip == 0)
3774             *skip = src - 2 - source;
3775           total++;
3776           if (c1 == '\n')
3777             {
3778               this_eol_type = CODING_EOL_LF;
3779             }
3780           else
3781             {
3782               if ((src + 1) >= src_end)
3783                 {
3784                   this_eol_type = CODING_EOL_CR;
3785                 }
3786               else
3787                 {
3788                   c2 = (src[msb] << 8) | (src[lsb]);
3789                   if (c2 == '\n')
3790                     this_eol_type = CODING_EOL_CRLF, src += 2;
3791                   else
3792                     this_eol_type = CODING_EOL_CR;
3793                 }
3794             }
3795
3796           if (eol_type == CODING_EOL_UNDECIDED)
3797             /* This is the first end-of-line.  */
3798             eol_type = this_eol_type;
3799           else if (eol_type != this_eol_type)
3800             {
3801               /* The found type is different from what found before.  */
3802               eol_type = CODING_EOL_INCONSISTENT;
3803               break;
3804             }
3805         }
3806     }
3807
3808   if (*skip == 0)
3809     *skip = src_end - source;
3810   return eol_type;
3811 }
3812
3813 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
3814    is encoded.  If it detects an appropriate format of end-of-line, it
3815    sets the information in *CODING.  */
3816
3817 void
3818 detect_eol (coding, src, src_bytes)
3819      struct coding_system *coding;
3820      unsigned char *src;
3821      int src_bytes;
3822 {
3823   Lisp_Object val;
3824   int skip;
3825   int eol_type;
3826
3827   switch (coding->category_idx)
3828     {
3829     case CODING_CATEGORY_IDX_UTF_16_BE:
3830       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 1);
3831       break;
3832     case CODING_CATEGORY_IDX_UTF_16_LE:
3833       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 0);
3834       break;
3835     default:
3836       eol_type = detect_eol_type (src, src_bytes, &skip);
3837       break;
3838     }
3839
3840   if (coding->heading_ascii > skip)
3841     coding->heading_ascii = skip;
3842   else
3843     skip = coding->heading_ascii;
3844
3845   if (eol_type == CODING_EOL_UNDECIDED)
3846     return;
3847   if (eol_type == CODING_EOL_INCONSISTENT)
3848     {
3849 #if 0
3850       /* This code is suppressed until we find a better way to
3851          distinguish raw text file and binary file.  */
3852
3853       /* If we have already detected that the coding is raw-text, the
3854          coding should actually be no-conversion.  */
3855       if (coding->type == coding_type_raw_text)
3856         {
3857           setup_coding_system (Qno_conversion, coding);
3858           return;
3859         }
3860       /* Else, let's decode only text code anyway.  */
3861 #endif /* 0 */
3862       eol_type = CODING_EOL_LF;
3863     }
3864
3865   val = Fget (coding->symbol, Qeol_type);
3866   if (VECTORP (val) && XVECTOR (val)->size == 3)
3867     {
3868       int src_multibyte = coding->src_multibyte;
3869       int dst_multibyte = coding->dst_multibyte;
3870
3871       setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
3872       coding->src_multibyte = src_multibyte;
3873       coding->dst_multibyte = dst_multibyte;
3874       coding->heading_ascii = skip;
3875     }
3876 }
3877
3878 #define CONVERSION_BUFFER_EXTRA_ROOM 256
3879
3880 #define DECODING_BUFFER_MAG(coding)                     \
3881   (coding->type == coding_type_iso2022                  \
3882    ? 3                                                  \
3883    : (coding->type == coding_type_ccl                   \
3884       ? coding->spec.ccl.decoder.buf_magnification      \
3885       : 2))
3886
3887 /* Return maximum size (bytes) of a buffer enough for decoding
3888    SRC_BYTES of text encoded in CODING.  */
3889
3890 int
3891 decoding_buffer_size (coding, src_bytes)
3892      struct coding_system *coding;
3893      int src_bytes;
3894 {
3895   return (src_bytes * DECODING_BUFFER_MAG (coding)
3896           + CONVERSION_BUFFER_EXTRA_ROOM);
3897 }
3898
3899 /* Return maximum size (bytes) of a buffer enough for encoding
3900    SRC_BYTES of text to CODING.  */
3901
3902 int
3903 encoding_buffer_size (coding, src_bytes)
3904      struct coding_system *coding;
3905      int src_bytes;
3906 {
3907   int magnification;
3908
3909   if (coding->type == coding_type_ccl)
3910     magnification = coding->spec.ccl.encoder.buf_magnification;
3911   else if (CODING_REQUIRE_ENCODING (coding))
3912     magnification = 3;
3913   else
3914     magnification = 1;
3915
3916   return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
3917 }
3918
3919 /* Working buffer for code conversion.  */
3920 struct conversion_buffer
3921 {
3922   int size;                     /* size of data.  */
3923   int on_stack;                 /* 1 if allocated by alloca.  */
3924   unsigned char *data;
3925 };
3926
3927 /* Don't use alloca for allocating memory space larger than this, lest
3928    we overflow their stack.  */
3929 #define MAX_ALLOCA 16*1024
3930
3931 /* Allocate LEN bytes of memory for BUF (struct conversion_buffer).  */
3932 #define allocate_conversion_buffer(buf, len)            \
3933   do {                                                  \
3934     if (len < MAX_ALLOCA)                               \
3935       {                                                 \
3936         buf.data = (unsigned char *) alloca (len);      \
3937         buf.on_stack = 1;                               \
3938       }                                                 \
3939     else                                                \
3940       {                                                 \
3941         buf.data = (unsigned char *) xmalloc (len);     \
3942         buf.on_stack = 0;                               \
3943       }                                                 \
3944     buf.size = len;                                     \
3945   } while (0)
3946
3947 /* Double the allocated memory for *BUF.  */
3948 static void
3949 extend_conversion_buffer (buf)
3950      struct conversion_buffer *buf;
3951 {
3952   if (buf->on_stack)
3953     {
3954       unsigned char *save = buf->data;
3955       buf->data = (unsigned char *) xmalloc (buf->size * 2);
3956       bcopy (save, buf->data, buf->size);
3957       buf->on_stack = 0;
3958     }
3959   else
3960     {
3961       buf->data = (unsigned char *) xrealloc (buf->data, buf->size * 2);
3962     }
3963   buf->size *= 2;
3964 }
3965
3966 /* Free the allocated memory for BUF if it is not on stack.  */
3967 static void
3968 free_conversion_buffer (buf)
3969      struct conversion_buffer *buf;
3970 {
3971   if (!buf->on_stack)
3972     xfree (buf->data);
3973 }
3974
3975 int
3976 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
3977      struct coding_system *coding;
3978      unsigned char *source, *destination;
3979      int src_bytes, dst_bytes, encodep;
3980 {
3981   struct ccl_program *ccl
3982     = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
3983   int result;
3984
3985   ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
3986   if (encodep)
3987     ccl->eol_type = coding->eol_type;
3988   ccl->multibyte = coding->src_multibyte;
3989   coding->produced = ccl_driver (ccl, source, destination,
3990                                  src_bytes, dst_bytes, &(coding->consumed));
3991   if (encodep)
3992     coding->produced_char = coding->produced;
3993   else
3994     {
3995       int bytes
3996         = dst_bytes ? dst_bytes : source + coding->consumed - destination;
3997       coding->produced = str_as_multibyte (destination, bytes,
3998                                            coding->produced,
3999                                            &(coding->produced_char));
4000     }
4001
4002   switch (ccl->status)
4003     {
4004     case CCL_STAT_SUSPEND_BY_SRC:
4005       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
4006       break;
4007     case CCL_STAT_SUSPEND_BY_DST:
4008       coding->result = CODING_FINISH_INSUFFICIENT_DST;
4009       break;
4010     case CCL_STAT_QUIT:
4011     case CCL_STAT_INVALID_CMD:
4012       coding->result = CODING_FINISH_INTERRUPT;
4013       break;
4014     default:
4015       coding->result = CODING_FINISH_NORMAL;
4016       break;
4017     }
4018   return coding->result;
4019 }
4020
4021 /* Decode EOL format of the text at PTR of BYTES length destructively
4022    according to CODING->eol_type.  This is called after the CCL
4023    program produced a decoded text at PTR.  If we do CRLF->LF
4024    conversion, update CODING->produced and CODING->produced_char.  */
4025
4026 static void
4027 decode_eol_post_ccl (coding, ptr, bytes)
4028      struct coding_system *coding;
4029      unsigned char *ptr;
4030      int bytes;
4031 {
4032   Lisp_Object val, saved_coding_symbol;
4033   unsigned char *pend = ptr + bytes;
4034   int dummy;
4035
4036   /* Remember the current coding system symbol.  We set it back when
4037      an inconsistent EOL is found so that `last-coding-system-used' is
4038      set to the coding system that doesn't specify EOL conversion.  */
4039   saved_coding_symbol = coding->symbol;
4040
4041   coding->spec.ccl.cr_carryover = 0;
4042   if (coding->eol_type == CODING_EOL_UNDECIDED)
4043     {
4044       /* Here, to avoid the call of setup_coding_system, we directly
4045          call detect_eol_type.  */
4046       coding->eol_type = detect_eol_type (ptr, bytes, &dummy);
4047       if (coding->eol_type == CODING_EOL_INCONSISTENT)
4048         coding->eol_type = CODING_EOL_LF;
4049       if (coding->eol_type != CODING_EOL_UNDECIDED)
4050         {
4051           val = Fget (coding->symbol, Qeol_type);
4052           if (VECTORP (val) && XVECTOR (val)->size == 3)
4053             coding->symbol = XVECTOR (val)->contents[coding->eol_type];
4054         }
4055       coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4056     }
4057
4058   if (coding->eol_type == CODING_EOL_LF
4059       || coding->eol_type == CODING_EOL_UNDECIDED)
4060     {
4061       /* We have nothing to do.  */
4062       ptr = pend;
4063     }
4064   else if (coding->eol_type == CODING_EOL_CRLF)
4065     {
4066       unsigned char *pstart = ptr, *p = ptr;
4067
4068       if (! (coding->mode & CODING_MODE_LAST_BLOCK)
4069           && *(pend - 1) == '\r')
4070         {
4071           /* If the last character is CR, we can't handle it here
4072              because LF will be in the not-yet-decoded source text.
4073              Recorded that the CR is not yet processed.  */
4074           coding->spec.ccl.cr_carryover = 1;
4075           coding->produced--;
4076           coding->produced_char--;
4077           pend--;
4078         }
4079       while (ptr < pend)
4080         {
4081           if (*ptr == '\r')
4082             {
4083               if (ptr + 1 < pend && *(ptr + 1) == '\n')
4084                 {
4085                   *p++ = '\n';
4086                   ptr += 2;
4087                 }
4088               else
4089                 {
4090                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4091                     goto undo_eol_conversion;
4092                   *p++ = *ptr++;
4093                 }
4094             }
4095           else if (*ptr == '\n'
4096                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4097             goto undo_eol_conversion;
4098           else
4099             *p++ = *ptr++;
4100           continue;
4101
4102         undo_eol_conversion:
4103           /* We have faced with inconsistent EOL format at PTR.
4104              Convert all LFs before PTR back to CRLFs.  */
4105           for (p--, ptr--; p >= pstart; p--)
4106             {
4107               if (*p == '\n')
4108                 *ptr-- = '\n', *ptr-- = '\r';
4109               else
4110                 *ptr-- = *p;
4111             }
4112           /*  If carryover is recorded, cancel it because we don't
4113               convert CRLF anymore.  */
4114           if (coding->spec.ccl.cr_carryover)
4115             {
4116               coding->spec.ccl.cr_carryover = 0;
4117               coding->produced++;
4118               coding->produced_char++;
4119               pend++;
4120             }
4121           p = ptr = pend;
4122           coding->eol_type = CODING_EOL_LF;
4123           coding->symbol = saved_coding_symbol;
4124         }
4125       if (p < pend)
4126         {
4127           /* As each two-byte sequence CRLF was converted to LF, (PEND
4128              - P) is the number of deleted characters.  */
4129           coding->produced -= pend - p;
4130           coding->produced_char -= pend - p;
4131         }
4132     }
4133   else                  /* i.e. coding->eol_type == CODING_EOL_CR */
4134     {
4135       unsigned char *p = ptr;
4136
4137       for (; ptr < pend; ptr++)
4138         {
4139           if (*ptr == '\r')
4140             *ptr = '\n';
4141           else if (*ptr == '\n'
4142                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4143             {
4144               for (; p < ptr; p++)
4145                 {
4146                   if (*p == '\n')
4147                     *p = '\r';
4148                 }
4149               ptr = pend;
4150               coding->eol_type = CODING_EOL_LF;
4151               coding->symbol = saved_coding_symbol;
4152             }
4153         }
4154     }
4155 }
4156
4157 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions".  Before
4158    decoding, it may detect coding system and format of end-of-line if
4159    those are not yet decided.  The source should be unibyte, the
4160    result is multibyte if CODING->dst_multibyte is nonzero, else
4161    unibyte.  */
4162
4163 int
4164 decode_coding (coding, source, destination, src_bytes, dst_bytes)
4165      struct coding_system *coding;
4166      unsigned char *source, *destination;
4167      int src_bytes, dst_bytes;
4168 {
4169   if (coding->type == coding_type_undecided)
4170     detect_coding (coding, source, src_bytes);
4171
4172   if (coding->eol_type == CODING_EOL_UNDECIDED
4173       && coding->type != coding_type_ccl)
4174     detect_eol (coding, source, src_bytes);
4175
4176   coding->produced = coding->produced_char = 0;
4177   coding->consumed = coding->consumed_char = 0;
4178   coding->errors = 0;
4179   coding->result = CODING_FINISH_NORMAL;
4180
4181   switch (coding->type)
4182     {
4183     case coding_type_sjis:
4184       decode_coding_sjis_big5 (coding, source, destination,
4185                                src_bytes, dst_bytes, 1);
4186       break;
4187
4188     case coding_type_iso2022:
4189       decode_coding_iso2022 (coding, source, destination,
4190                              src_bytes, dst_bytes);
4191       break;
4192
4193     case coding_type_big5:
4194       decode_coding_sjis_big5 (coding, source, destination,
4195                                src_bytes, dst_bytes, 0);
4196       break;
4197
4198     case coding_type_emacs_mule:
4199       decode_coding_emacs_mule (coding, source, destination,
4200                                 src_bytes, dst_bytes);
4201       break;
4202
4203     case coding_type_ccl:
4204       if (coding->spec.ccl.cr_carryover)
4205         {
4206           /* Set the CR which is not processed by the previous call of
4207              decode_eol_post_ccl in DESTINATION.  */
4208           *destination = '\r';
4209           coding->produced++;
4210           coding->produced_char++;
4211           dst_bytes--;
4212         }
4213       ccl_coding_driver (coding, source,
4214                          destination + coding->spec.ccl.cr_carryover,
4215                          src_bytes, dst_bytes, 0);
4216       if (coding->eol_type != CODING_EOL_LF)
4217         decode_eol_post_ccl (coding, destination, coding->produced);
4218       break;
4219
4220     default:
4221       decode_eol (coding, source, destination, src_bytes, dst_bytes);
4222     }
4223
4224   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
4225       && coding->mode & CODING_MODE_LAST_BLOCK
4226       && coding->consumed == src_bytes)
4227     coding->result = CODING_FINISH_NORMAL;
4228
4229   if (coding->mode & CODING_MODE_LAST_BLOCK
4230       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
4231     {
4232       unsigned char *src = source + coding->consumed;
4233       unsigned char *dst = destination + coding->produced;
4234
4235       src_bytes -= coding->consumed;
4236       coding->errors++;
4237       if (COMPOSING_P (coding))
4238         DECODE_COMPOSITION_END ('1');
4239       while (src_bytes--)
4240         {
4241           int c = *src++;
4242           dst += CHAR_STRING (c, dst);
4243           coding->produced_char++;
4244         }
4245       coding->consumed = coding->consumed_char = src - source;
4246       coding->produced = dst - destination;
4247       coding->result = CODING_FINISH_NORMAL;
4248     }
4249
4250   if (!coding->dst_multibyte)
4251     {
4252       coding->produced = str_as_unibyte (destination, coding->produced);
4253       coding->produced_char = coding->produced;
4254     }
4255
4256   return coding->result;
4257 }
4258
4259 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  The
4260    multibyteness of the source is CODING->src_multibyte, the
4261    multibyteness of the result is always unibyte.  */
4262
4263 int
4264 encode_coding (coding, source, destination, src_bytes, dst_bytes)
4265      struct coding_system *coding;
4266      unsigned char *source, *destination;
4267      int src_bytes, dst_bytes;
4268 {
4269   coding->produced = coding->produced_char = 0;
4270   coding->consumed = coding->consumed_char = 0;
4271   coding->errors = 0;
4272   coding->result = CODING_FINISH_NORMAL;
4273
4274   switch (coding->type)
4275     {
4276     case coding_type_sjis:
4277       encode_coding_sjis_big5 (coding, source, destination,
4278                                src_bytes, dst_bytes, 1);
4279       break;
4280
4281     case coding_type_iso2022:
4282       encode_coding_iso2022 (coding, source, destination,
4283                              src_bytes, dst_bytes);
4284       break;
4285
4286     case coding_type_big5:
4287       encode_coding_sjis_big5 (coding, source, destination,
4288                                src_bytes, dst_bytes, 0);
4289       break;
4290
4291     case coding_type_emacs_mule:
4292       encode_coding_emacs_mule (coding, source, destination,
4293                                 src_bytes, dst_bytes);
4294       break;
4295
4296     case coding_type_ccl:
4297       ccl_coding_driver (coding, source, destination,
4298                          src_bytes, dst_bytes, 1);
4299       break;
4300
4301     default:
4302       encode_eol (coding, source, destination, src_bytes, dst_bytes);
4303     }
4304
4305   if (coding->mode & CODING_MODE_LAST_BLOCK
4306       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
4307     {
4308       unsigned char *src = source + coding->consumed;
4309       unsigned char *src_end = src + src_bytes;
4310       unsigned char *dst = destination + coding->produced;
4311
4312       if (coding->type == coding_type_iso2022)
4313         ENCODE_RESET_PLANE_AND_REGISTER;
4314       if (COMPOSING_P (coding))
4315         *dst++ = ISO_CODE_ESC, *dst++ = '1';
4316       if (coding->consumed < src_bytes)
4317         {
4318           int len = src_bytes - coding->consumed;
4319
4320           BCOPY_SHORT (source + coding->consumed, dst, len);
4321           if (coding->src_multibyte)
4322             len = str_as_unibyte (dst, len);
4323           dst += len;
4324           coding->consumed = src_bytes;
4325         }
4326       coding->produced = coding->produced_char = dst - destination;
4327       coding->result = CODING_FINISH_NORMAL;
4328     }
4329
4330   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
4331       && coding->consumed == src_bytes)
4332     coding->result = CODING_FINISH_NORMAL;
4333
4334   return coding->result;
4335 }
4336
4337 /* Scan text in the region between *BEG and *END (byte positions),
4338    skip characters which we don't have to decode by coding system
4339    CODING at the head and tail, then set *BEG and *END to the region
4340    of the text we actually have to convert.  The caller should move
4341    the gap out of the region in advance if the region is from a
4342    buffer.
4343
4344    If STR is not NULL, *BEG and *END are indices into STR.  */
4345
4346 static void
4347 shrink_decoding_region (beg, end, coding, str)
4348      int *beg, *end;
4349      struct coding_system *coding;
4350      unsigned char *str;
4351 {
4352   unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
4353   int eol_conversion;
4354   Lisp_Object translation_table;
4355
4356   if (coding->type == coding_type_ccl
4357       || coding->type == coding_type_undecided
4358       || coding->eol_type != CODING_EOL_LF
4359       || !NILP (coding->post_read_conversion)
4360       || coding->composing != COMPOSITION_DISABLED)
4361     {
4362       /* We can't skip any data.  */
4363       return;
4364     }
4365   if (coding->type == coding_type_no_conversion
4366       || coding->type == coding_type_raw_text
4367       || coding->type == coding_type_emacs_mule)
4368     {
4369       /* We need no conversion, but don't have to skip any data here.
4370          Decoding routine handles them effectively anyway.  */
4371       return;
4372     }
4373
4374   translation_table = coding->translation_table_for_decode;
4375   if (NILP (translation_table) && !NILP (Venable_character_translation))
4376     translation_table = Vstandard_translation_table_for_decode;
4377   if (CHAR_TABLE_P (translation_table))
4378     {
4379       int i;
4380       for (i = 0; i < 128; i++)
4381         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
4382           break;
4383       if (i < 128)
4384         /* Some ASCII character should be translated.  We give up
4385            shrinking.  */
4386         return;
4387     }
4388
4389   if (coding->heading_ascii >= 0)
4390     /* Detection routine has already found how much we can skip at the
4391        head.  */
4392     *beg += coding->heading_ascii;
4393
4394   if (str)
4395     {
4396       begp_orig = begp = str + *beg;
4397       endp_orig = endp = str + *end;
4398     }
4399   else
4400     {
4401       begp_orig = begp = BYTE_POS_ADDR (*beg);
4402       endp_orig = endp = begp + *end - *beg;
4403     }
4404
4405   eol_conversion = (coding->eol_type == CODING_EOL_CR
4406                     || coding->eol_type == CODING_EOL_CRLF);
4407
4408   switch (coding->type)
4409     {
4410     case coding_type_sjis:
4411     case coding_type_big5:
4412       /* We can skip all ASCII characters at the head.  */
4413       if (coding->heading_ascii < 0)
4414         {
4415           if (eol_conversion)
4416             while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
4417           else
4418             while (begp < endp && *begp < 0x80) begp++;
4419         }
4420       /* We can skip all ASCII characters at the tail except for the
4421          second byte of SJIS or BIG5 code.  */
4422       if (eol_conversion)
4423         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
4424       else
4425         while (begp < endp && endp[-1] < 0x80) endp--;
4426       /* Do not consider LF as ascii if preceded by CR, since that
4427          confuses eol decoding. */
4428       if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
4429         endp++;
4430       if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
4431         endp++;
4432       break;
4433
4434     case coding_type_iso2022:
4435       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
4436         /* We can't skip any data.  */
4437         break;
4438       if (coding->heading_ascii < 0)
4439         {
4440           /* We can skip all ASCII characters at the head except for a
4441              few control codes.  */
4442           while (begp < endp && (c = *begp) < 0x80
4443                  && c != ISO_CODE_CR && c != ISO_CODE_SO
4444                  && c != ISO_CODE_SI && c != ISO_CODE_ESC
4445                  && (!eol_conversion || c != ISO_CODE_LF))
4446             begp++;
4447         }
4448       switch (coding->category_idx)
4449         {
4450         case CODING_CATEGORY_IDX_ISO_8_1:
4451         case CODING_CATEGORY_IDX_ISO_8_2:
4452           /* We can skip all ASCII characters at the tail.  */
4453           if (eol_conversion)
4454             while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
4455           else
4456             while (begp < endp && endp[-1] < 0x80) endp--;
4457           /* Do not consider LF as ascii if preceded by CR, since that
4458              confuses eol decoding. */
4459           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
4460             endp++;
4461           break;
4462
4463         case CODING_CATEGORY_IDX_ISO_7:
4464         case CODING_CATEGORY_IDX_ISO_7_TIGHT:
4465           {
4466             /* We can skip all charactes at the tail except for 8-bit
4467                codes and ESC and the following 2-byte at the tail.  */
4468             unsigned char *eight_bit = NULL;
4469
4470             if (eol_conversion)
4471               while (begp < endp
4472                      && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
4473                 {
4474                   if (!eight_bit && c & 0x80) eight_bit = endp;
4475                   endp--;
4476                 }
4477             else
4478               while (begp < endp
4479                      && (c = endp[-1]) != ISO_CODE_ESC)
4480                 {
4481                   if (!eight_bit && c & 0x80) eight_bit = endp;
4482                   endp--;
4483                 }
4484             /* Do not consider LF as ascii if preceded by CR, since that
4485                confuses eol decoding. */
4486             if (begp < endp && endp < endp_orig
4487                 && endp[-1] == '\r' && endp[0] == '\n')
4488               endp++;
4489             if (begp < endp && endp[-1] == ISO_CODE_ESC)
4490               {
4491                 if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
4492                   /* This is an ASCII designation sequence.  We can
4493                      surely skip the tail.  But, if we have
4494                      encountered an 8-bit code, skip only the codes
4495                      after that.  */
4496                   endp = eight_bit ? eight_bit : endp + 2;
4497                 else
4498                   /* Hmmm, we can't skip the tail.  */
4499                   endp = endp_orig;
4500               }
4501             else if (eight_bit)
4502               endp = eight_bit;
4503           }
4504         }
4505       break;
4506
4507     default:
4508       abort ();
4509     }
4510   *beg += begp - begp_orig;
4511   *end += endp - endp_orig;
4512   return;
4513 }
4514
4515 /* Like shrink_decoding_region but for encoding.  */
4516
4517 static void
4518 shrink_encoding_region (beg, end, coding, str)
4519      int *beg, *end;
4520      struct coding_system *coding;
4521      unsigned char *str;
4522 {
4523   unsigned char *begp_orig, *begp, *endp_orig, *endp;
4524   int eol_conversion;
4525   Lisp_Object translation_table;
4526
4527   if (coding->type == coding_type_ccl
4528       || coding->eol_type == CODING_EOL_CRLF
4529       || coding->eol_type == CODING_EOL_CR
4530       || coding->cmp_data && coding->cmp_data->used > 0)
4531     {
4532       /* We can't skip any data.  */
4533       return;
4534     }
4535   if (coding->type == coding_type_no_conversion
4536       || coding->type == coding_type_raw_text
4537       || coding->type == coding_type_emacs_mule
4538       || coding->type == coding_type_undecided)
4539     {
4540       /* We need no conversion, but don't have to skip any data here.
4541          Encoding routine handles them effectively anyway.  */
4542       return;
4543     }
4544
4545   translation_table = coding->translation_table_for_encode;
4546   if (NILP (translation_table) && !NILP (Venable_character_translation))
4547     translation_table = Vstandard_translation_table_for_encode;
4548   if (CHAR_TABLE_P (translation_table))
4549     {
4550       int i;
4551       for (i = 0; i < 128; i++)
4552         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
4553           break;
4554       if (i < 128)
4555         /* Some ASCII character should be tranlsated.  We give up
4556            shrinking.  */
4557         return;
4558     }
4559
4560   if (str)
4561     {
4562       begp_orig = begp = str + *beg;
4563       endp_orig = endp = str + *end;
4564     }
4565   else
4566     {
4567       begp_orig = begp = BYTE_POS_ADDR (*beg);
4568       endp_orig = endp = begp + *end - *beg;
4569     }
4570
4571   eol_conversion = (coding->eol_type == CODING_EOL_CR
4572                     || coding->eol_type == CODING_EOL_CRLF);
4573
4574   /* Here, we don't have to check coding->pre_write_conversion because
4575      the caller is expected to have handled it already.  */
4576   switch (coding->type)
4577     {
4578     case coding_type_iso2022:
4579       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
4580         /* We can't skip any data.  */
4581         break;
4582       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
4583         {
4584           unsigned char *bol = begp;
4585           while (begp < endp && *begp < 0x80)
4586             {
4587               begp++;
4588               if (begp[-1] == '\n')
4589                 bol = begp;
4590             }
4591           begp = bol;
4592           goto label_skip_tail;
4593         }
4594       /* fall down ... */
4595
4596     case coding_type_sjis:
4597     case coding_type_big5:
4598       /* We can skip all ASCII characters at the head and tail.  */
4599       if (eol_conversion)
4600         while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
4601       else
4602         while (begp < endp && *begp < 0x80) begp++;
4603     label_skip_tail:
4604       if (eol_conversion)
4605         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
4606       else
4607         while (begp < endp && *(endp - 1) < 0x80) endp--;
4608       break;
4609
4610     default:
4611       abort ();
4612     }
4613
4614   *beg += begp - begp_orig;
4615   *end += endp - endp_orig;
4616   return;
4617 }
4618
4619 /* As shrinking conversion region requires some overhead, we don't try
4620    shrinking if the length of conversion region is less than this
4621    value.  */
4622 static int shrink_conversion_region_threshhold = 1024;
4623
4624 #define SHRINK_CONVERSION_REGION(beg, end, coding, str, encodep)        \
4625   do {                                                                  \
4626     if (*(end) - *(beg) > shrink_conversion_region_threshhold)          \
4627       {                                                                 \
4628         if (encodep) shrink_encoding_region (beg, end, coding, str);    \
4629         else shrink_decoding_region (beg, end, coding, str);            \
4630       }                                                                 \
4631   } while (0)
4632
4633 static Lisp_Object
4634 code_convert_region_unwind (dummy)
4635      Lisp_Object dummy;
4636 {
4637   inhibit_pre_post_conversion = 0;
4638   return Qnil;
4639 }
4640
4641 /* Store information about all compositions in the range FROM and TO
4642    of OBJ in memory blocks pointed by CODING->cmp_data.  OBJ is a
4643    buffer or a string, defaults to the current buffer.  */
4644
4645 void
4646 coding_save_composition (coding, from, to, obj)
4647      struct coding_system *coding;
4648      int from, to;
4649      Lisp_Object obj;
4650 {
4651   Lisp_Object prop;
4652   int start, end;
4653
4654   if (coding->composing == COMPOSITION_DISABLED)
4655     return;
4656   if (!coding->cmp_data)
4657     coding_allocate_composition_data (coding, from);
4658   if (!find_composition (from, to, &start, &end, &prop, obj)
4659       || end > to)
4660     return;
4661   if (start < from
4662       && (!find_composition (end, to, &start, &end, &prop, obj)
4663           || end > to))
4664     return;
4665   coding->composing = COMPOSITION_NO;
4666   do
4667     {
4668       if (COMPOSITION_VALID_P (start, end, prop))
4669         {
4670           enum composition_method method = COMPOSITION_METHOD (prop);
4671           if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
4672               >= COMPOSITION_DATA_SIZE)
4673             coding_allocate_composition_data (coding, from);
4674           /* For relative composition, we remember start and end
4675              positions, for the other compositions, we also remember
4676              components.  */
4677           CODING_ADD_COMPOSITION_START (coding, start - from, method);
4678           if (method != COMPOSITION_RELATIVE)
4679             {
4680               /* We must store a*/
4681               Lisp_Object val, ch;
4682
4683               val = COMPOSITION_COMPONENTS (prop);
4684               if (CONSP (val))
4685                 while (CONSP (val))
4686                   {
4687                     ch = XCAR (val), val = XCDR (val);
4688                     CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
4689                   }
4690               else if (VECTORP (val) || STRINGP (val))
4691                 {
4692                   int len = (VECTORP (val)
4693                              ? XVECTOR (val)->size : XSTRING (val)->size);
4694                   int i;
4695                   for (i = 0; i < len; i++)
4696                     {
4697                       ch = (STRINGP (val)
4698                             ? Faref (val, make_number (i))
4699                             : XVECTOR (val)->contents[i]);
4700                       CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
4701                     }
4702                 }
4703               else              /* INTEGERP (val) */
4704                 CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (val));
4705             }
4706           CODING_ADD_COMPOSITION_END (coding, end - from);
4707         }
4708       start = end;
4709     }
4710   while (start < to
4711          && find_composition (start, to, &start, &end, &prop, obj)
4712          && end <= to);
4713
4714   /* Make coding->cmp_data point to the first memory block.  */
4715   while (coding->cmp_data->prev)
4716     coding->cmp_data = coding->cmp_data->prev;
4717   coding->cmp_data_start = 0;
4718 }
4719
4720 /* Reflect the saved information about compositions to OBJ.
4721    CODING->cmp_data points to a memory block for the informaiton.  OBJ
4722    is a buffer or a string, defaults to the current buffer.  */
4723
4724 void
4725 coding_restore_composition (coding, obj)
4726      struct coding_system *coding;
4727      Lisp_Object obj;
4728 {
4729   struct composition_data *cmp_data = coding->cmp_data;
4730
4731   if (!cmp_data)
4732     return;
4733
4734   while (cmp_data->prev)
4735     cmp_data = cmp_data->prev;
4736
4737   while (cmp_data)
4738     {
4739       int i;
4740
4741       for (i = 0; i < cmp_data->used && cmp_data->data[i] > 0;
4742            i += cmp_data->data[i])
4743         {
4744           int *data = cmp_data->data + i;
4745           enum composition_method method = (enum composition_method) data[3];
4746           Lisp_Object components;
4747
4748           if (method == COMPOSITION_RELATIVE)
4749             components = Qnil;
4750           else
4751             {
4752               int len = data[0] - 4, j;
4753               Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
4754
4755               for (j = 0; j < len; j++)
4756                 args[j] = make_number (data[4 + j]);
4757               components = (method == COMPOSITION_WITH_ALTCHARS
4758                             ? Fstring (len, args) : Fvector (len, args));
4759             }
4760           compose_text (data[1], data[2], components, Qnil, obj);
4761         }
4762       cmp_data = cmp_data->next;
4763     }
4764 }
4765
4766 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
4767    text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
4768    coding system CODING, and return the status code of code conversion
4769    (currently, this value has no meaning).
4770
4771    How many characters (and bytes) are converted to how many
4772    characters (and bytes) are recorded in members of the structure
4773    CODING.
4774
4775    If REPLACE is nonzero, we do various things as if the original text
4776    is deleted and a new text is inserted.  See the comments in
4777    replace_range (insdel.c) to know what we are doing.
4778
4779    If REPLACE is zero, it is assumed that the source text is unibyte.
4780    Otherwize, it is assumed that the source text is multibyte.  */
4781
4782 int
4783 code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
4784      int from, from_byte, to, to_byte, encodep, replace;
4785      struct coding_system *coding;
4786 {
4787   int len = to - from, len_byte = to_byte - from_byte;
4788   int require, inserted, inserted_byte;
4789   int head_skip, tail_skip, total_skip = 0;
4790   Lisp_Object saved_coding_symbol;
4791   int first = 1;
4792   unsigned char *src, *dst;
4793   Lisp_Object deletion;
4794   int orig_point = PT, orig_len = len;
4795   int prev_Z;
4796   int multibyte_p = !NILP (current_buffer->enable_multibyte_characters);
4797
4798   coding->src_multibyte = replace && multibyte_p;
4799   coding->dst_multibyte = multibyte_p;
4800
4801   deletion = Qnil;
4802   saved_coding_symbol = Qnil;
4803
4804   if (from < PT && PT < to)
4805     {
4806       TEMP_SET_PT_BOTH (from, from_byte);
4807       orig_point = from;
4808     }
4809
4810   if (replace)
4811     {
4812       int saved_from = from;
4813       int saved_inhibit_modification_hooks;
4814
4815       prepare_to_modify_buffer (from, to, &from);
4816       if (saved_from != from)
4817         {
4818           to = from + len;
4819           from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
4820           len_byte = to_byte - from_byte;
4821         }
4822
4823       /* The code conversion routine can not preserve text properties
4824          for now.  So, we must remove all text properties in the
4825          region.  Here, we must suppress all modification hooks.  */
4826       saved_inhibit_modification_hooks = inhibit_modification_hooks;
4827       inhibit_modification_hooks = 1;
4828       Fset_text_properties (make_number (from), make_number (to), Qnil, Qnil);
4829       inhibit_modification_hooks = saved_inhibit_modification_hooks;
4830     }
4831
4832   if (! encodep && CODING_REQUIRE_DETECTION (coding))
4833     {
4834       /* We must detect encoding of text and eol format.  */
4835
4836       if (from < GPT && to > GPT)
4837         move_gap_both (from, from_byte);
4838       if (coding->type == coding_type_undecided)
4839         {
4840           detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
4841           if (coding->type == coding_type_undecided)
4842             {
4843               /* It seems that the text contains only ASCII, but we
4844                  should not leave it undecided because the deeper
4845                  decoding routine (decode_coding) tries to detect the
4846                  encodings again in vain.  */
4847               coding->type = coding_type_emacs_mule;
4848               coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
4849             }
4850         }
4851       if (coding->eol_type == CODING_EOL_UNDECIDED
4852           && coding->type != coding_type_ccl)
4853         {
4854           saved_coding_symbol = coding->symbol;
4855           detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
4856           if (coding->eol_type == CODING_EOL_UNDECIDED)
4857             coding->eol_type = CODING_EOL_LF;
4858           /* We had better recover the original eol format if we
4859              encounter an inconsitent eol format while decoding.  */
4860           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4861         }
4862     }
4863
4864   /* Now we convert the text.  */
4865
4866   /* For encoding, we must process pre-write-conversion in advance.  */
4867   if (! inhibit_pre_post_conversion
4868       && encodep
4869       && SYMBOLP (coding->pre_write_conversion)
4870       && ! NILP (Ffboundp (coding->pre_write_conversion)))
4871     {
4872       /* The function in pre-write-conversion may put a new text in a
4873          new buffer.  */
4874       struct buffer *prev = current_buffer;
4875       Lisp_Object new;
4876       int count = specpdl_ptr - specpdl;
4877
4878       record_unwind_protect (code_convert_region_unwind, Qnil);
4879       /* We should not call any more pre-write/post-read-conversion
4880          functions while this pre-write-conversion is running.  */
4881       inhibit_pre_post_conversion = 1;
4882       call2 (coding->pre_write_conversion,
4883              make_number (from), make_number (to));
4884       inhibit_pre_post_conversion = 0;
4885       /* Discard the unwind protect.  */
4886       specpdl_ptr--;
4887
4888       if (current_buffer != prev)
4889         {
4890           len = ZV - BEGV;
4891           new = Fcurrent_buffer ();
4892           set_buffer_internal_1 (prev);
4893           del_range_2 (from, from_byte, to, to_byte, 0);
4894           TEMP_SET_PT_BOTH (from, from_byte);
4895           insert_from_buffer (XBUFFER (new), 1, len, 0);
4896           Fkill_buffer (new);
4897           if (orig_point >= to)
4898             orig_point += len - orig_len;
4899           else if (orig_point > from)
4900             orig_point = from;
4901           orig_len = len;
4902           to = from + len;
4903           from_byte = CHAR_TO_BYTE (from);
4904           to_byte = CHAR_TO_BYTE (to);
4905           len_byte = to_byte - from_byte;
4906           TEMP_SET_PT_BOTH (from, from_byte);
4907         }
4908     }
4909
4910   if (replace)
4911     deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
4912
4913   if (coding->composing != COMPOSITION_DISABLED)
4914     {
4915       if (encodep)
4916         coding_save_composition (coding, from, to, Fcurrent_buffer ());
4917       else
4918         coding_allocate_composition_data (coding, from);
4919     }
4920
4921   /* Try to skip the heading and tailing ASCIIs.  */
4922   if (coding->type != coding_type_ccl)
4923     {
4924       int from_byte_orig = from_byte, to_byte_orig = to_byte;
4925
4926       if (from < GPT && GPT < to)
4927         move_gap_both (from, from_byte);
4928       SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
4929       if (from_byte == to_byte
4930           && (encodep || NILP (coding->post_read_conversion))
4931           && ! CODING_REQUIRE_FLUSHING (coding))
4932         {
4933           coding->produced = len_byte;
4934           coding->produced_char = len;
4935           if (!replace)
4936             /* We must record and adjust for this new text now.  */
4937             adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
4938           return 0;
4939         }
4940
4941       head_skip = from_byte - from_byte_orig;
4942       tail_skip = to_byte_orig - to_byte;
4943       total_skip = head_skip + tail_skip;
4944       from += head_skip;
4945       to -= tail_skip;
4946       len -= total_skip; len_byte -= total_skip;
4947     }
4948
4949   /* For converion, we must put the gap before the text in addition to
4950      making the gap larger for efficient decoding.  The required gap
4951      size starts from 2000 which is the magic number used in make_gap.
4952      But, after one batch of conversion, it will be incremented if we
4953      find that it is not enough .  */
4954   require = 2000;
4955
4956   if (GAP_SIZE  < require)
4957     make_gap (require - GAP_SIZE);
4958   move_gap_both (from, from_byte);
4959
4960   inserted = inserted_byte = 0;
4961
4962   GAP_SIZE += len_byte;
4963   ZV -= len;
4964   Z -= len;
4965   ZV_BYTE -= len_byte;
4966   Z_BYTE -= len_byte;
4967
4968   if (GPT - BEG < BEG_UNCHANGED)
4969     BEG_UNCHANGED = GPT - BEG;
4970   if (Z - GPT < END_UNCHANGED)
4971     END_UNCHANGED = Z - GPT;
4972
4973   if (!encodep && coding->src_multibyte)
4974     {
4975       /* Decoding routines expects that the source text is unibyte.
4976          We must convert 8-bit characters of multibyte form to
4977          unibyte.  */
4978       int len_byte_orig = len_byte;
4979       len_byte = str_as_unibyte (GAP_END_ADDR - len_byte, len_byte);
4980       if (len_byte < len_byte_orig)
4981         safe_bcopy (GAP_END_ADDR - len_byte_orig, GAP_END_ADDR - len_byte,
4982                     len_byte);
4983       coding->src_multibyte = 0;
4984     }
4985
4986   for (;;)
4987     {
4988       int result;
4989
4990       /* The buffer memory is now:
4991          +--------+converted-text+---------+-------original-text-------+---+
4992          |<-from->|<--inserted-->|---------|<--------len_byte--------->|---|
4993                   |<---------------------- GAP ----------------------->|  */
4994       src = GAP_END_ADDR - len_byte;
4995       dst = GPT_ADDR + inserted_byte;
4996
4997       if (encodep)
4998         result = encode_coding (coding, src, dst, len_byte, 0);
4999       else
5000         result = decode_coding (coding, src, dst, len_byte, 0);
5001
5002       /* The buffer memory is now:
5003          +--------+-------converted-text----+--+------original-text----+---+
5004          |<-from->|<-inserted->|<-produced->|--|<-(len_byte-consumed)->|---|
5005                   |<---------------------- GAP ----------------------->|  */
5006
5007       inserted += coding->produced_char;
5008       inserted_byte += coding->produced;
5009       len_byte -= coding->consumed;
5010
5011       if (result == CODING_FINISH_INSUFFICIENT_CMP)
5012         {
5013           coding_allocate_composition_data (coding, from + inserted);
5014           continue;
5015         }
5016
5017       src += coding->consumed;
5018       dst += coding->produced;
5019
5020       if (result == CODING_FINISH_NORMAL)
5021         {
5022           src += len_byte;
5023           break;
5024         }
5025       if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
5026         {
5027           unsigned char *pend = dst, *p = pend - inserted_byte;
5028           Lisp_Object eol_type;
5029
5030           /* Encode LFs back to the original eol format (CR or CRLF).  */
5031           if (coding->eol_type == CODING_EOL_CR)
5032             {
5033               while (p < pend) if (*p++ == '\n') p[-1] = '\r';
5034             }
5035           else
5036             {
5037               int count = 0;
5038
5039               while (p < pend) if (*p++ == '\n') count++;
5040               if (src - dst < count)
5041                 {
5042                   /* We don't have sufficient room for encoding LFs
5043                      back to CRLF.  We must record converted and
5044                      not-yet-converted text back to the buffer
5045                      content, enlarge the gap, then record them out of
5046                      the buffer contents again.  */
5047                   int add = len_byte + inserted_byte;
5048
5049                   GAP_SIZE -= add;
5050                   ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5051                   GPT += inserted_byte; GPT_BYTE += inserted_byte;
5052                   make_gap (count - GAP_SIZE);
5053                   GAP_SIZE += add;
5054                   ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5055                   GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5056                   /* Don't forget to update SRC, DST, and PEND.  */
5057                   src = GAP_END_ADDR - len_byte;
5058                   dst = GPT_ADDR + inserted_byte;
5059                   pend = dst;
5060                 }
5061               inserted += count;
5062               inserted_byte += count;
5063               coding->produced += count;
5064               p = dst = pend + count;
5065               while (count)
5066                 {
5067                   *--p = *--pend;
5068                   if (*p == '\n') count--, *--p = '\r';
5069                 }
5070             }
5071
5072           /* Suppress eol-format conversion in the further conversion.  */
5073           coding->eol_type = CODING_EOL_LF;
5074
5075           /* Set the coding system symbol to that for Unix-like EOL.  */
5076           eol_type = Fget (saved_coding_symbol, Qeol_type);
5077           if (VECTORP (eol_type)
5078               && XVECTOR (eol_type)->size == 3
5079               && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
5080             coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
5081           else
5082             coding->symbol = saved_coding_symbol;
5083
5084           continue;
5085         }
5086       if (len_byte <= 0)
5087         {
5088           if (coding->type != coding_type_ccl
5089               || coding->mode & CODING_MODE_LAST_BLOCK)
5090             break;
5091           coding->mode |= CODING_MODE_LAST_BLOCK;
5092           continue;
5093         }
5094       if (result == CODING_FINISH_INSUFFICIENT_SRC)
5095         {
5096           /* The source text ends in invalid codes.  Let's just
5097              make them valid buffer contents, and finish conversion.  */
5098           inserted += len_byte;
5099           inserted_byte += len_byte;
5100           while (len_byte--)
5101             *dst++ = *src++;
5102           break;
5103         }
5104       if (result == CODING_FINISH_INTERRUPT)
5105         {
5106           /* The conversion procedure was interrupted by a user.  */
5107           break;
5108         }
5109       /* Now RESULT == CODING_FINISH_INSUFFICIENT_DST  */
5110       if (coding->consumed < 1)
5111         {
5112           /* It's quite strange to require more memory without
5113              consuming any bytes.  Perhaps CCL program bug.  */
5114           break;
5115         }
5116       if (first)
5117         {
5118           /* We have just done the first batch of conversion which was
5119              stoped because of insufficient gap.  Let's reconsider the
5120              required gap size (i.e. SRT - DST) now.
5121
5122              We have converted ORIG bytes (== coding->consumed) into
5123              NEW bytes (coding->produced).  To convert the remaining
5124              LEN bytes, we may need REQUIRE bytes of gap, where:
5125                 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
5126                 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
5127              Here, we are sure that NEW >= ORIG.  */
5128           float ratio = coding->produced - coding->consumed;
5129           ratio /= coding->consumed;
5130           require = len_byte * ratio;
5131           first = 0;
5132         }
5133       if ((src - dst) < (require + 2000))
5134         {
5135           /* See the comment above the previous call of make_gap.  */
5136           int add = len_byte + inserted_byte;
5137
5138           GAP_SIZE -= add;
5139           ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5140           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5141           make_gap (require + 2000);
5142           GAP_SIZE += add;
5143           ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5144           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5145         }
5146     }
5147   if (src - dst > 0) *dst = 0; /* Put an anchor.  */
5148
5149   if (encodep && coding->dst_multibyte)
5150     {
5151       /* The output is unibyte.  We must convert 8-bit characters to
5152          multibyte form.  */
5153       if (inserted_byte * 2 > GAP_SIZE)
5154         {
5155           GAP_SIZE -= inserted_byte;
5156           ZV += inserted_byte; Z += inserted_byte;
5157           ZV_BYTE += inserted_byte; Z_BYTE += inserted_byte;
5158           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5159           make_gap (inserted_byte - GAP_SIZE);
5160           GAP_SIZE += inserted_byte;
5161           ZV -= inserted_byte; Z -= inserted_byte;
5162           ZV_BYTE -= inserted_byte; Z_BYTE -= inserted_byte;
5163           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5164         }
5165       inserted_byte = str_to_multibyte (GPT_ADDR, GAP_SIZE, inserted_byte);
5166     }
5167
5168   /* If we have shrinked the conversion area, adjust it now.  */
5169   if (total_skip > 0)
5170     {
5171       if (tail_skip > 0)
5172         safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
5173       inserted += total_skip; inserted_byte += total_skip;
5174       GAP_SIZE += total_skip;
5175       GPT -= head_skip; GPT_BYTE -= head_skip;
5176       ZV -= total_skip; ZV_BYTE -= total_skip;
5177       Z -= total_skip; Z_BYTE -= total_skip;
5178       from -= head_skip; from_byte -= head_skip;
5179       to += tail_skip; to_byte += tail_skip;
5180     }
5181
5182   prev_Z = Z;
5183   adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
5184   inserted = Z - prev_Z;
5185
5186   if (!encodep && coding->cmp_data && coding->cmp_data->used)
5187     coding_restore_composition (coding, Fcurrent_buffer ());
5188   coding_free_composition_data (coding);
5189
5190   if (! inhibit_pre_post_conversion
5191       && ! encodep && ! NILP (coding->post_read_conversion))
5192     {
5193       Lisp_Object val;
5194       int count = specpdl_ptr - specpdl;
5195
5196       if (from != PT)
5197         TEMP_SET_PT_BOTH (from, from_byte);
5198       prev_Z = Z;
5199       record_unwind_protect (code_convert_region_unwind, Qnil);
5200       /* We should not call any more pre-write/post-read-conversion
5201          functions while this post-read-conversion is running.  */
5202       inhibit_pre_post_conversion = 1;
5203       val = call1 (coding->post_read_conversion, make_number (inserted));
5204       inhibit_pre_post_conversion = 0;
5205       /* Discard the unwind protect.  */
5206       specpdl_ptr--;
5207       CHECK_NUMBER (val, 0);
5208       inserted += Z - prev_Z;
5209     }
5210
5211   if (orig_point >= from)
5212     {
5213       if (orig_point >= from + orig_len)
5214         orig_point += inserted - orig_len;
5215       else
5216         orig_point = from;
5217       TEMP_SET_PT (orig_point);
5218     }
5219
5220   if (replace)
5221     {
5222       signal_after_change (from, to - from, inserted);
5223       update_compositions (from, from + inserted, CHECK_BORDER);
5224     }
5225
5226   {
5227     coding->consumed = to_byte - from_byte;
5228     coding->consumed_char = to - from;
5229     coding->produced = inserted_byte;
5230     coding->produced_char = inserted;
5231   }
5232
5233   return 0;
5234 }
5235
5236 Lisp_Object
5237 run_pre_post_conversion_on_str (str, coding, encodep)
5238      Lisp_Object str;
5239      struct coding_system *coding;
5240      int encodep;
5241 {
5242   int count = specpdl_ptr - specpdl;
5243   struct gcpro gcpro1;
5244   struct buffer *prev = current_buffer;
5245   int multibyte = STRING_MULTIBYTE (str);
5246
5247   record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
5248   record_unwind_protect (code_convert_region_unwind, Qnil);
5249   GCPRO1 (str);
5250   temp_output_buffer_setup (" *code-converting-work*");
5251   set_buffer_internal (XBUFFER (Vstandard_output));
5252   /* We must insert the contents of STR as is without
5253      unibyte<->multibyte conversion.  For that, we adjust the
5254      multibyteness of the working buffer to that of STR.  */
5255   Ferase_buffer ();
5256   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
5257   insert_from_string (str, 0, 0,
5258                       XSTRING (str)->size, STRING_BYTES (XSTRING (str)), 0);
5259   UNGCPRO;
5260   inhibit_pre_post_conversion = 1;
5261   if (encodep)
5262     call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z));
5263   else
5264     {
5265       TEMP_SET_PT_BOTH (BEG, BEG_BYTE);
5266       call1 (coding->post_read_conversion, make_number (Z - BEG));
5267     }
5268   inhibit_pre_post_conversion = 0;
5269   str = make_buffer_string (BEG, Z, 1);
5270   return unbind_to (count, str);
5271 }
5272
5273 Lisp_Object
5274 decode_coding_string (str, coding, nocopy)
5275      Lisp_Object str;
5276      struct coding_system *coding;
5277      int nocopy;
5278 {
5279   int len;
5280   struct conversion_buffer buf;
5281   int from, to, to_byte;
5282   struct gcpro gcpro1;
5283   Lisp_Object saved_coding_symbol;
5284   int result;
5285   int require_decoding;
5286   int shrinked_bytes = 0;
5287   Lisp_Object newstr;
5288   int consumed, consumed_char, produced, produced_char;
5289
5290   from = 0;
5291   to = XSTRING (str)->size;
5292   to_byte = STRING_BYTES (XSTRING (str));
5293
5294   saved_coding_symbol = Qnil;
5295   if (CODING_REQUIRE_DETECTION (coding))
5296     {
5297       /* See the comments in code_convert_region.  */
5298       if (coding->type == coding_type_undecided)
5299         {
5300           detect_coding (coding, XSTRING (str)->data, to_byte);
5301           if (coding->type == coding_type_undecided)
5302             coding->type = coding_type_emacs_mule;
5303         }
5304       if (coding->eol_type == CODING_EOL_UNDECIDED
5305           && coding->type != coding_type_ccl)
5306         {
5307           saved_coding_symbol = coding->symbol;
5308           detect_eol (coding, XSTRING (str)->data, to_byte);
5309           if (coding->eol_type == CODING_EOL_UNDECIDED)
5310             coding->eol_type = CODING_EOL_LF;
5311           /* We had better recover the original eol format if we
5312              encounter an inconsitent eol format while decoding.  */
5313           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
5314         }
5315     }
5316
5317   coding->src_multibyte = 0;
5318   coding->dst_multibyte = (coding->type != coding_type_no_conversion
5319                            && coding->type != coding_type_raw_text);
5320   require_decoding = CODING_REQUIRE_DECODING (coding);
5321
5322   if (STRING_MULTIBYTE (str))
5323     {
5324       /* Decoding routines expect the source text to be unibyte.  */
5325       str = Fstring_as_unibyte (str);
5326       to_byte = STRING_BYTES (XSTRING (str));
5327       nocopy = 1;
5328     }
5329
5330   /* Try to skip the heading and tailing ASCIIs.  */
5331   if (require_decoding && coding->type != coding_type_ccl)
5332     {
5333       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
5334                                 0);
5335       if (from == to_byte)
5336         require_decoding = 0;
5337       shrinked_bytes = from + (STRING_BYTES (XSTRING (str)) - to_byte);
5338     }
5339
5340   if (!require_decoding)
5341     {
5342       coding->consumed = STRING_BYTES (XSTRING (str));
5343       coding->consumed_char = XSTRING (str)->size;
5344       if (coding->dst_multibyte)
5345         {
5346           str = Fstring_as_multibyte (str);
5347           nocopy = 1;
5348         }
5349       coding->produced = STRING_BYTES (XSTRING (str));
5350       coding->produced_char = XSTRING (str)->size;
5351       return (nocopy ? str : Fcopy_sequence (str));
5352     }
5353
5354   if (coding->composing != COMPOSITION_DISABLED)
5355     coding_allocate_composition_data (coding, from);
5356   len = decoding_buffer_size (coding, to_byte - from);
5357   allocate_conversion_buffer (buf, len);
5358
5359   consumed = consumed_char = produced = produced_char = 0;
5360   while (1)
5361     {
5362       result = decode_coding (coding, XSTRING (str)->data + from + consumed,
5363                               buf.data + produced, to_byte - from - consumed,
5364                               buf.size - produced);
5365       consumed += coding->consumed;
5366       consumed_char += coding->consumed_char;
5367       produced += coding->produced;
5368       produced_char += coding->produced_char;
5369       if (result == CODING_FINISH_NORMAL
5370           || (result == CODING_FINISH_INSUFFICIENT_SRC
5371               && coding->consumed == 0))
5372         break;
5373       if (result == CODING_FINISH_INSUFFICIENT_CMP)
5374         coding_allocate_composition_data (coding, from + produced_char);
5375       else if (result == CODING_FINISH_INSUFFICIENT_DST)
5376         extend_conversion_buffer (&buf);
5377       else if (result == CODING_FINISH_INCONSISTENT_EOL)
5378         {
5379           /* Recover the original EOL format.  */
5380           if (coding->eol_type == CODING_EOL_CR)
5381             {
5382               unsigned char *p;
5383               for (p = buf.data; p < buf.data + produced; p++)
5384                 if (*p == '\n') *p = '\r';
5385             }
5386           else if (coding->eol_type == CODING_EOL_CRLF)
5387             {
5388               int num_eol = 0;
5389               unsigned char *p0, *p1;
5390               for (p0 = buf.data, p1 = p0 + produced; p0 < p1; p0++)
5391                 if (*p0 == '\n') num_eol++;
5392               if (produced + num_eol >= buf.size)
5393                 extend_conversion_buffer (&buf);
5394               for (p0 = buf.data + produced, p1 = p0 + num_eol; p0 > buf.data;)
5395                 {
5396                   *--p1 = *--p0;
5397                   if (*p0 == '\n') *--p1 = '\r';
5398                 }
5399               produced += num_eol;
5400               produced_char += num_eol;
5401             }
5402           coding->eol_type = CODING_EOL_LF;
5403           coding->symbol = saved_coding_symbol;
5404         }
5405     }
5406
5407   coding->consumed = consumed;
5408   coding->consumed_char = consumed_char;
5409   coding->produced = produced;
5410   coding->produced_char = produced_char;
5411
5412   if (coding->dst_multibyte)
5413     newstr = make_uninit_multibyte_string (produced_char + shrinked_bytes,
5414                                            produced + shrinked_bytes);
5415   else
5416     newstr = make_uninit_string (produced + shrinked_bytes);
5417   if (from > 0)
5418     bcopy (XSTRING (str)->data, XSTRING (newstr)->data, from);
5419   bcopy (buf.data, XSTRING (newstr)->data + from, produced);
5420   if (shrinked_bytes > from)
5421     bcopy (XSTRING (str)->data + to_byte,
5422            XSTRING (newstr)->data + from + produced,
5423            shrinked_bytes - from);
5424   free_conversion_buffer (&buf);
5425
5426   if (coding->cmp_data && coding->cmp_data->used)
5427     coding_restore_composition (coding, newstr);
5428   coding_free_composition_data (coding);
5429
5430   if (SYMBOLP (coding->post_read_conversion)
5431       && !NILP (Ffboundp (coding->post_read_conversion)))
5432     newstr = run_pre_post_conversion_on_str (newstr, coding, 0);
5433
5434   return newstr;
5435 }
5436
5437 Lisp_Object
5438 encode_coding_string (str, coding, nocopy)
5439      Lisp_Object str;
5440      struct coding_system *coding;
5441      int nocopy;
5442 {
5443   int len;
5444   struct conversion_buffer buf;
5445   int from, to, to_byte;
5446   struct gcpro gcpro1;
5447   Lisp_Object saved_coding_symbol;
5448   int result;
5449   int shrinked_bytes = 0;
5450   Lisp_Object newstr;
5451   int consumed, consumed_char, produced, produced_char;
5452
5453   if (SYMBOLP (coding->pre_write_conversion)
5454       && !NILP (Ffboundp (coding->pre_write_conversion)))
5455     str = run_pre_post_conversion_on_str (str, coding, 1);
5456
5457   from = 0;
5458   to = XSTRING (str)->size;
5459   to_byte = STRING_BYTES (XSTRING (str));
5460
5461   saved_coding_symbol = Qnil;
5462
5463   /* Encoding routines determine the multibyteness of the source text
5464      by coding->src_multibyte.  */
5465   coding->src_multibyte = STRING_MULTIBYTE (str);
5466   coding->dst_multibyte = 0;
5467   if (! CODING_REQUIRE_ENCODING (coding))
5468     {
5469       coding->consumed = STRING_BYTES (XSTRING (str));
5470       coding->consumed_char = XSTRING (str)->size;
5471       if (STRING_MULTIBYTE (str))
5472         {
5473           str = Fstring_as_unibyte (str);
5474           nocopy = 1;
5475         }
5476       coding->produced = STRING_BYTES (XSTRING (str));
5477       coding->produced_char = XSTRING (str)->size;
5478       return (nocopy ? str : Fcopy_sequence (str));
5479     }
5480
5481   if (coding->composing != COMPOSITION_DISABLED)
5482     coding_save_composition (coding, from, to, str);
5483
5484   /* Try to skip the heading and tailing ASCIIs.  */
5485   if (coding->type != coding_type_ccl)
5486     {
5487       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
5488                                 1);
5489       if (from == to_byte)
5490         return (nocopy ? str : Fcopy_sequence (str));
5491       shrinked_bytes = from + (STRING_BYTES (XSTRING (str)) - to_byte);
5492     }
5493
5494   len = encoding_buffer_size (coding, to_byte - from);
5495   allocate_conversion_buffer (buf, len);
5496
5497   consumed = consumed_char = produced = produced_char = 0;
5498   while (1)
5499     {
5500       result = encode_coding (coding, XSTRING (str)->data + from + consumed,
5501                               buf.data + produced, to_byte - from - consumed,
5502                               buf.size - produced);
5503       consumed += coding->consumed;
5504       consumed_char += coding->consumed_char;
5505       produced += coding->produced;
5506       produced_char += coding->produced_char;
5507       if (result == CODING_FINISH_NORMAL
5508           || (result == CODING_FINISH_INSUFFICIENT_SRC
5509               && coding->consumed == 0))
5510         break;
5511       /* Now result should be CODING_FINISH_INSUFFICIENT_DST.  */
5512       extend_conversion_buffer (&buf);
5513     }
5514
5515   coding->consumed = consumed;
5516   coding->consumed_char = consumed_char;
5517   coding->produced = produced;
5518   coding->produced_char = produced_char;
5519
5520   newstr = make_uninit_string (produced + shrinked_bytes);
5521   if (from > 0)
5522     bcopy (XSTRING (str)->data, XSTRING (newstr)->data, from);
5523   bcopy (buf.data, XSTRING (newstr)->data + from, produced);
5524   if (shrinked_bytes > from)
5525     bcopy (XSTRING (str)->data + to_byte,
5526            XSTRING (newstr)->data + from + produced,
5527            shrinked_bytes - from);
5528
5529   free_conversion_buffer (&buf);
5530   coding_free_composition_data (coding);
5531
5532   return newstr;
5533 }
5534
5535 \f
5536 #ifdef emacs
5537 /*** 8. Emacs Lisp library functions ***/
5538
5539 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
5540   "Return t if OBJECT is nil or a coding-system.\n\
5541 See the documentation of `make-coding-system' for information\n\
5542 about coding-system objects.")
5543   (obj)
5544      Lisp_Object obj;
5545 {
5546   if (NILP (obj))
5547     return Qt;
5548   if (!SYMBOLP (obj))
5549     return Qnil;
5550   /* Get coding-spec vector for OBJ.  */
5551   obj = Fget (obj, Qcoding_system);
5552   return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
5553           ? Qt : Qnil);
5554 }
5555
5556 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
5557        Sread_non_nil_coding_system, 1, 1, 0,
5558   "Read a coding system from the minibuffer, prompting with string PROMPT.")
5559   (prompt)
5560      Lisp_Object prompt;
5561 {
5562   Lisp_Object val;
5563   do
5564     {
5565       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
5566                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
5567     }
5568   while (XSTRING (val)->size == 0);
5569   return (Fintern (val, Qnil));
5570 }
5571
5572 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
5573   "Read a coding system from the minibuffer, prompting with string PROMPT.\n\
5574 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.")
5575   (prompt, default_coding_system)
5576      Lisp_Object prompt, default_coding_system;
5577 {
5578   Lisp_Object val;
5579   if (SYMBOLP (default_coding_system))
5580     XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
5581   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
5582                           Qt, Qnil, Qcoding_system_history,
5583                           default_coding_system, Qnil);
5584   return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
5585 }
5586
5587 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
5588        1, 1, 0,
5589   "Check validity of CODING-SYSTEM.\n\
5590 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.\n\
5591 It is valid if it is a symbol with a non-nil `coding-system' property.\n\
5592 The value of property should be a vector of length 5.")
5593   (coding_system)
5594      Lisp_Object coding_system;
5595 {
5596   CHECK_SYMBOL (coding_system, 0);
5597   if (!NILP (Fcoding_system_p (coding_system)))
5598     return coding_system;
5599   while (1)
5600     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
5601 }
5602 \f
5603 Lisp_Object
5604 detect_coding_system (src, src_bytes, highest)
5605      unsigned char *src;
5606      int src_bytes, highest;
5607 {
5608   int coding_mask, eol_type;
5609   Lisp_Object val, tmp;
5610   int dummy;
5611
5612   coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy);
5613   eol_type  = detect_eol_type (src, src_bytes, &dummy);
5614   if (eol_type == CODING_EOL_INCONSISTENT)
5615     eol_type = CODING_EOL_UNDECIDED;
5616
5617   if (!coding_mask)
5618     {
5619       val = Qundecided;
5620       if (eol_type != CODING_EOL_UNDECIDED)
5621         {
5622           Lisp_Object val2;
5623           val2 = Fget (Qundecided, Qeol_type);
5624           if (VECTORP (val2))
5625             val = XVECTOR (val2)->contents[eol_type];
5626         }
5627       return (highest ? val : Fcons (val, Qnil));
5628     }
5629
5630   /* At first, gather possible coding systems in VAL.  */
5631   val = Qnil;
5632   for (tmp = Vcoding_category_list; CONSP (tmp); tmp = XCDR (tmp))
5633     {
5634       Lisp_Object category_val, category_index;
5635
5636       category_index = Fget (XCAR (tmp), Qcoding_category_index);
5637       category_val = Fsymbol_value (XCAR (tmp));
5638       if (!NILP (category_val)
5639           && NATNUMP (category_index)
5640           && (coding_mask & (1 << XFASTINT (category_index))))
5641         {
5642           val = Fcons (category_val, val);
5643           if (highest)
5644             break;
5645         }
5646     }
5647   if (!highest)
5648     val = Fnreverse (val);
5649
5650   /* Then, replace the elements with subsidiary coding systems.  */
5651   for (tmp = val; CONSP (tmp); tmp = XCDR (tmp))
5652     {
5653       if (eol_type != CODING_EOL_UNDECIDED
5654           && eol_type != CODING_EOL_INCONSISTENT)
5655         {
5656           Lisp_Object eol;
5657           eol = Fget (XCAR (tmp), Qeol_type);
5658           if (VECTORP (eol))
5659             XCAR (tmp) = XVECTOR (eol)->contents[eol_type];
5660         }
5661     }
5662   return (highest ? XCAR (val) : val);
5663 }
5664
5665 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
5666        2, 3, 0,
5667   "Detect coding system of the text in the region between START and END.\n\
5668 Return a list of possible coding systems ordered by priority.\n\
5669 \n\
5670 If only ASCII characters are found, it returns a list of single element\n\
5671 `undecided' or its subsidiary coding system according to a detected\n\
5672 end-of-line format.\n\
5673 \n\
5674 If optional argument HIGHEST is non-nil, return the coding system of\n\
5675 highest priority.")
5676   (start, end, highest)
5677      Lisp_Object start, end, highest;
5678 {
5679   int from, to;
5680   int from_byte, to_byte;
5681
5682   CHECK_NUMBER_COERCE_MARKER (start, 0);
5683   CHECK_NUMBER_COERCE_MARKER (end, 1);
5684
5685   validate_region (&start, &end);
5686   from = XINT (start), to = XINT (end);
5687   from_byte = CHAR_TO_BYTE (from);
5688   to_byte = CHAR_TO_BYTE (to);
5689
5690   if (from < GPT && to >= GPT)
5691     move_gap_both (to, to_byte);
5692
5693   return detect_coding_system (BYTE_POS_ADDR (from_byte),
5694                                to_byte - from_byte,
5695                                !NILP (highest));
5696 }
5697
5698 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
5699        1, 2, 0,
5700   "Detect coding system of the text in STRING.\n\
5701 Return a list of possible coding systems ordered by priority.\n\
5702 \n\
5703 If only ASCII characters are found, it returns a list of single element\n\
5704 `undecided' or its subsidiary coding system according to a detected\n\
5705 end-of-line format.\n\
5706 \n\
5707 If optional argument HIGHEST is non-nil, return the coding system of\n\
5708 highest priority.")
5709   (string, highest)
5710      Lisp_Object string, highest;
5711 {
5712   CHECK_STRING (string, 0);
5713
5714   return detect_coding_system (XSTRING (string)->data,
5715                                STRING_BYTES (XSTRING (string)),
5716                                !NILP (highest));
5717 }
5718
5719 /* Return an intersection of lists L1 and L2.  */
5720
5721 static Lisp_Object
5722 intersection (l1, l2)
5723      Lisp_Object l1, l2;
5724 {
5725   Lisp_Object val;
5726
5727   for (val = Qnil; CONSP (l1); l1 = XCDR (l1))
5728     {
5729       if (!NILP (Fmemq (XCAR (l1), l2)))
5730         val = Fcons (XCAR (l1), val);
5731     }
5732   return val;
5733 }
5734
5735
5736 /*  Subroutine for Fsafe_coding_systems_region_internal.
5737
5738     Return a list of coding systems that safely encode the multibyte
5739     text between P and PEND.  SAFE_CODINGS, if non-nil, is a list of
5740     possible coding systems.  If it is nil, it means that we have not
5741     yet found any coding systems.
5742
5743     WORK_TABLE is a copy of the char-table Vchar_coding_system_table.  An
5744     element of WORK_TABLE is set to t once the element is looked up.
5745
5746     If a non-ASCII single byte char is found, set
5747     *single_byte_char_found to 1.  */
5748
5749 static Lisp_Object
5750 find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found)
5751      unsigned char *p, *pend;
5752      Lisp_Object safe_codings, work_table;
5753      int *single_byte_char_found;
5754 {
5755   int c, len, idx;
5756   Lisp_Object val;
5757
5758   while (p < pend)
5759     {
5760       c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
5761       p += len;
5762       if (ASCII_BYTE_P (c))
5763         /* We can ignore ASCII characters here.  */
5764         continue;
5765       if (SINGLE_BYTE_CHAR_P (c))
5766         *single_byte_char_found = 1;
5767       if (NILP (safe_codings))
5768         continue;
5769       /* Check the safe coding systems for C.  */
5770       val = char_table_ref_and_index (work_table, c, &idx);
5771       if (EQ (val, Qt))
5772         /* This element was already checked.  Ignore it.  */
5773         continue;
5774       /* Remember that we checked this element.  */
5775       CHAR_TABLE_SET (work_table, make_number (idx), Qt);
5776
5777       /* If there are some safe coding systems for C and we have
5778          already found the other set of coding systems for the
5779          different characters, get the intersection of them.  */
5780       if (!EQ (safe_codings, Qt) && !NILP (val))
5781         val = intersection (safe_codings, val);
5782       safe_codings = val;
5783     }
5784   return safe_codings;
5785 }
5786
5787
5788 /* Return a list of coding systems that safely encode the text between
5789    START and END.  If the text contains only ASCII or is unibyte,
5790    return t.  */
5791
5792 DEFUN ("find-coding-systems-region-internal",
5793        Ffind_coding_systems_region_internal,
5794        Sfind_coding_systems_region_internal, 2, 2, 0,
5795   "Internal use only.")
5796   (start, end)
5797      Lisp_Object start, end;
5798 {
5799   Lisp_Object work_table, safe_codings;
5800   int non_ascii_p = 0;
5801   int single_byte_char_found = 0;
5802   unsigned char *p1, *p1end, *p2, *p2end, *p;
5803   Lisp_Object args[2];
5804
5805   if (STRINGP (start))
5806     {
5807       if (!STRING_MULTIBYTE (start))
5808         return Qt;
5809       p1 = XSTRING (start)->data, p1end = p1 + STRING_BYTES (XSTRING (start));
5810       p2 = p2end = p1end;
5811       if (XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
5812         non_ascii_p = 1;
5813     }
5814   else
5815     {
5816       int from, to, stop;
5817
5818       CHECK_NUMBER_COERCE_MARKER (start, 0);
5819       CHECK_NUMBER_COERCE_MARKER (end, 1);
5820       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
5821         args_out_of_range (start, end);
5822       if (NILP (current_buffer->enable_multibyte_characters))
5823         return Qt;
5824       from = CHAR_TO_BYTE (XINT (start));
5825       to = CHAR_TO_BYTE (XINT (end));
5826       stop = from < GPT_BYTE && GPT_BYTE < to ? GPT_BYTE : to;
5827       p1 = BYTE_POS_ADDR (from), p1end = p1 + (stop - from);
5828       if (stop == to)
5829         p2 = p2end = p1end;
5830       else
5831         p2 = BYTE_POS_ADDR (stop), p2end = p2 + (to - stop);
5832       if (XINT (end) - XINT (start) != to - from)
5833         non_ascii_p = 1;
5834     }
5835
5836   if (!non_ascii_p)
5837     {
5838       /* We are sure that the text contains no multibyte character.
5839          Check if it contains eight-bit-graphic.  */
5840       p = p1;
5841       for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++);
5842       if (p == p1end)
5843         {
5844           for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++);
5845           if (p == p2end)
5846             return Qt;
5847         }
5848     }
5849
5850   /* The text contains non-ASCII characters.  */
5851   work_table = Fcopy_sequence (Vchar_coding_system_table);
5852   safe_codings = find_safe_codings (p1, p1end, Qt, work_table,
5853                                     &single_byte_char_found);
5854   if (p2 < p2end)
5855     safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table,
5856                                       &single_byte_char_found);
5857
5858   if (!single_byte_char_found)
5859     {
5860       /* Append generic coding systems.  */
5861       Lisp_Object args[2];
5862       args[0] = safe_codings;
5863       args[1] = Fchar_table_extra_slot (Vchar_coding_system_table,
5864                                         make_number (0));
5865       safe_codings = Fappend (2, args);
5866     }
5867   else
5868     safe_codings = Fcons (Qraw_text, Fcons (Qemacs_mule, safe_codings));
5869   return safe_codings;
5870 }
5871
5872
5873 Lisp_Object
5874 code_convert_region1 (start, end, coding_system, encodep)
5875      Lisp_Object start, end, coding_system;
5876      int encodep;
5877 {
5878   struct coding_system coding;
5879   int from, to, len;
5880
5881   CHECK_NUMBER_COERCE_MARKER (start, 0);
5882   CHECK_NUMBER_COERCE_MARKER (end, 1);
5883   CHECK_SYMBOL (coding_system, 2);
5884
5885   validate_region (&start, &end);
5886   from = XFASTINT (start);
5887   to = XFASTINT (end);
5888
5889   if (NILP (coding_system))
5890     return make_number (to - from);
5891
5892   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
5893     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
5894
5895   coding.mode |= CODING_MODE_LAST_BLOCK;
5896   coding.src_multibyte = coding.dst_multibyte
5897     = !NILP (current_buffer->enable_multibyte_characters);
5898   code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
5899                        &coding, encodep, 1);
5900   Vlast_coding_system_used = coding.symbol;
5901   return make_number (coding.produced_char);
5902 }
5903
5904 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
5905        3, 3, "r\nzCoding system: ",
5906   "Decode the current region by specified coding system.\n\
5907 When called from a program, takes three arguments:\n\
5908 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
5909 This function sets `last-coding-system-used' to the precise coding system\n\
5910 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5911 not fully specified.)\n\
5912 It returns the length of the decoded text.")
5913   (start, end, coding_system)
5914      Lisp_Object start, end, coding_system;
5915 {
5916   return code_convert_region1 (start, end, coding_system, 0);
5917 }
5918
5919 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
5920        3, 3, "r\nzCoding system: ",
5921   "Encode the current region by specified coding system.\n\
5922 When called from a program, takes three arguments:\n\
5923 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
5924 This function sets `last-coding-system-used' to the precise coding system\n\
5925 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5926 not fully specified.)\n\
5927 It returns the length of the encoded text.")
5928   (start, end, coding_system)
5929      Lisp_Object start, end, coding_system;
5930 {
5931   return code_convert_region1 (start, end, coding_system, 1);
5932 }
5933
5934 Lisp_Object
5935 code_convert_string1 (string, coding_system, nocopy, encodep)
5936      Lisp_Object string, coding_system, nocopy;
5937      int encodep;
5938 {
5939   struct coding_system coding;
5940
5941   CHECK_STRING (string, 0);
5942   CHECK_SYMBOL (coding_system, 1);
5943
5944   if (NILP (coding_system))
5945     return (NILP (nocopy) ? Fcopy_sequence (string) : string);
5946
5947   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
5948     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
5949
5950   coding.mode |= CODING_MODE_LAST_BLOCK;
5951   string = (encodep
5952             ? encode_coding_string (string, &coding, !NILP (nocopy))
5953             : decode_coding_string (string, &coding, !NILP (nocopy)));
5954   Vlast_coding_system_used = coding.symbol;
5955
5956   return string;
5957 }
5958
5959 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
5960        2, 3, 0,
5961   "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
5962 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
5963 if the decoding operation is trivial.\n\
5964 This function sets `last-coding-system-used' to the precise coding system\n\
5965 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5966 not fully specified.)")
5967   (string, coding_system, nocopy)
5968      Lisp_Object string, coding_system, nocopy;
5969 {
5970   return code_convert_string1 (string, coding_system, nocopy, 0);
5971 }
5972
5973 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
5974        2, 3, 0,
5975   "Encode STRING to CODING-SYSTEM, and return the result.\n\
5976 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
5977 if the encoding operation is trivial.\n\
5978 This function sets `last-coding-system-used' to the precise coding system\n\
5979 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5980 not fully specified.)")
5981   (string, coding_system, nocopy)
5982      Lisp_Object string, coding_system, nocopy;
5983 {
5984   return code_convert_string1 (string, coding_system, nocopy, 1);
5985 }
5986
5987 /* Encode or decode STRING according to CODING_SYSTEM.
5988    Do not set Vlast_coding_system_used.
5989
5990    This function is called only from macros DECODE_FILE and
5991    ENCODE_FILE, thus we ignore character composition.  */
5992
5993 Lisp_Object
5994 code_convert_string_norecord (string, coding_system, encodep)
5995      Lisp_Object string, coding_system;
5996      int encodep;
5997 {
5998   struct coding_system coding;
5999
6000   CHECK_STRING (string, 0);
6001   CHECK_SYMBOL (coding_system, 1);
6002
6003   if (NILP (coding_system))
6004     return string;
6005
6006   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
6007     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
6008
6009   coding.composing = COMPOSITION_DISABLED;
6010   coding.mode |= CODING_MODE_LAST_BLOCK;
6011   return (encodep
6012           ? encode_coding_string (string, &coding, 1)
6013           : decode_coding_string (string, &coding, 1));
6014 }
6015 \f
6016 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
6017   "Decode a Japanese character which has CODE in shift_jis encoding.\n\
6018 Return the corresponding character.")
6019   (code)
6020      Lisp_Object code;
6021 {
6022   unsigned char c1, c2, s1, s2;
6023   Lisp_Object val;
6024
6025   CHECK_NUMBER (code, 0);
6026   s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
6027   if (s1 == 0)
6028     {
6029       if (s2 < 0x80)
6030         XSETFASTINT (val, s2);
6031       else if (s2 >= 0xA0 || s2 <= 0xDF)
6032         XSETFASTINT (val, MAKE_CHAR (charset_katakana_jisx0201, s2, 0));
6033       else
6034         error ("Invalid Shift JIS code: %x", XFASTINT (code));
6035     }
6036   else
6037     {
6038       if ((s1 < 0x80 || s1 > 0x9F && s1 < 0xE0 || s1 > 0xEF)
6039           || (s2 < 0x40 || s2 == 0x7F || s2 > 0xFC))
6040         error ("Invalid Shift JIS code: %x", XFASTINT (code));
6041       DECODE_SJIS (s1, s2, c1, c2);
6042       XSETFASTINT (val, MAKE_CHAR (charset_jisx0208, c1, c2));
6043     }
6044   return val;
6045 }
6046
6047 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
6048   "Encode a Japanese character CHAR to shift_jis encoding.\n\
6049 Return the corresponding code in SJIS.")
6050   (ch)
6051      Lisp_Object ch;
6052 {
6053   int charset, c1, c2, s1, s2;
6054   Lisp_Object val;
6055
6056   CHECK_NUMBER (ch, 0);
6057   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
6058   if (charset == CHARSET_ASCII)
6059     {
6060       val = ch;
6061     }
6062   else if (charset == charset_jisx0208
6063            && c1 > 0x20 && c1 < 0x7F && c2 > 0x20 && c2 < 0x7F)
6064     {
6065       ENCODE_SJIS (c1, c2, s1, s2);
6066       XSETFASTINT (val, (s1 << 8) | s2);
6067     }
6068   else if (charset == charset_katakana_jisx0201
6069            && c1 > 0x20 && c2 < 0xE0)
6070     {
6071       XSETFASTINT (val, c1 | 0x80);
6072     }
6073   else
6074     error ("Can't encode to shift_jis: %d", XFASTINT (ch));
6075   return val;
6076 }
6077
6078 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
6079   "Decode a Big5 character which has CODE in BIG5 coding system.\n\
6080 Return the corresponding character.")
6081   (code)
6082      Lisp_Object code;
6083 {
6084   int charset;
6085   unsigned char b1, b2, c1, c2;
6086   Lisp_Object val;
6087
6088   CHECK_NUMBER (code, 0);
6089   b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
6090   if (b1 == 0)
6091     {
6092       if (b2 >= 0x80)
6093         error ("Invalid BIG5 code: %x", XFASTINT (code));
6094       val = code;
6095     }
6096   else
6097     {
6098       if ((b1 < 0xA1 || b1 > 0xFE)
6099           || (b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE))
6100         error ("Invalid BIG5 code: %x", XFASTINT (code));
6101       DECODE_BIG5 (b1, b2, charset, c1, c2);
6102       XSETFASTINT (val, MAKE_CHAR (charset, c1, c2));
6103     }
6104   return val;
6105 }
6106
6107 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
6108   "Encode the Big5 character CHAR to BIG5 coding system.\n\
6109 Return the corresponding character code in Big5.")
6110   (ch)
6111      Lisp_Object ch;
6112 {
6113   int charset, c1, c2, b1, b2;
6114   Lisp_Object val;
6115
6116   CHECK_NUMBER (ch, 0);
6117   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
6118   if (charset == CHARSET_ASCII)
6119     {
6120       val = ch;
6121     }
6122   else if ((charset == charset_big5_1
6123             && (XFASTINT (ch) >= 0x250a1 && XFASTINT (ch) <= 0x271ec))
6124            || (charset == charset_big5_2
6125                && XFASTINT (ch) >= 0x290a1 && XFASTINT (ch) <= 0x2bdb2))
6126     {
6127       ENCODE_BIG5 (charset, c1, c2, b1, b2);
6128       XSETFASTINT (val, (b1 << 8) | b2);
6129     }
6130   else
6131     error ("Can't encode to Big5: %d", XFASTINT (ch));
6132   return val;
6133 }
6134 \f
6135 DEFUN ("set-terminal-coding-system-internal",
6136        Fset_terminal_coding_system_internal,
6137        Sset_terminal_coding_system_internal, 1, 1, 0, "")
6138   (coding_system)
6139      Lisp_Object coding_system;
6140 {
6141   CHECK_SYMBOL (coding_system, 0);
6142   setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
6143   /* We had better not send unsafe characters to terminal.  */
6144   terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
6145   /* Characer composition should be disabled.  */
6146   terminal_coding.composing = COMPOSITION_DISABLED;
6147   terminal_coding.src_multibyte = 1;
6148   terminal_coding.dst_multibyte = 0;
6149   return Qnil;
6150 }
6151
6152 DEFUN ("set-safe-terminal-coding-system-internal",
6153        Fset_safe_terminal_coding_system_internal,
6154        Sset_safe_terminal_coding_system_internal, 1, 1, 0, "")
6155   (coding_system)
6156      Lisp_Object coding_system;
6157 {
6158   CHECK_SYMBOL (coding_system, 0);
6159   setup_coding_system (Fcheck_coding_system (coding_system),
6160                        &safe_terminal_coding);
6161   /* Characer composition should be disabled.  */
6162   safe_terminal_coding.composing = COMPOSITION_DISABLED;
6163   safe_terminal_coding.src_multibyte = 1;
6164   safe_terminal_coding.dst_multibyte = 0;
6165   return Qnil;
6166 }
6167
6168 DEFUN ("terminal-coding-system",
6169        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
6170   "Return coding system specified for terminal output.")
6171   ()
6172 {
6173   return terminal_coding.symbol;
6174 }
6175
6176 DEFUN ("set-keyboard-coding-system-internal",
6177        Fset_keyboard_coding_system_internal,
6178        Sset_keyboard_coding_system_internal, 1, 1, 0, "")
6179   (coding_system)
6180      Lisp_Object coding_system;
6181 {
6182   CHECK_SYMBOL (coding_system, 0);
6183   setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
6184   /* Characer composition should be disabled.  */
6185   keyboard_coding.composing = COMPOSITION_DISABLED;
6186   return Qnil;
6187 }
6188
6189 DEFUN ("keyboard-coding-system",
6190        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
6191   "Return coding system specified for decoding keyboard input.")
6192   ()
6193 {
6194   return keyboard_coding.symbol;
6195 }
6196
6197 \f
6198 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
6199        Sfind_operation_coding_system,  1, MANY, 0,
6200   "Choose a coding system for an operation based on the target name.\n\
6201 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).\n\
6202 DECODING-SYSTEM is the coding system to use for decoding\n\
6203 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system\n\
6204 for encoding (in case OPERATION does encoding).\n\
6205 \n\
6206 The first argument OPERATION specifies an I/O primitive:\n\
6207   For file I/O, `insert-file-contents' or `write-region'.\n\
6208   For process I/O, `call-process', `call-process-region', or `start-process'.\n\
6209   For network I/O, `open-network-stream'.\n\
6210 \n\
6211 The remaining arguments should be the same arguments that were passed\n\
6212 to the primitive.  Depending on which primitive, one of those arguments\n\
6213 is selected as the TARGET.  For example, if OPERATION does file I/O,\n\
6214 whichever argument specifies the file name is TARGET.\n\
6215 \n\
6216 TARGET has a meaning which depends on OPERATION:\n\
6217   For file I/O, TARGET is a file name.\n\
6218   For process I/O, TARGET is a process name.\n\
6219   For network I/O, TARGET is a service name or a port number\n\
6220 \n\
6221 This function looks up what specified for TARGET in,\n\
6222 `file-coding-system-alist', `process-coding-system-alist',\n\
6223 or `network-coding-system-alist' depending on OPERATION.\n\
6224 They may specify a coding system, a cons of coding systems,\n\
6225 or a function symbol to call.\n\
6226 In the last case, we call the function with one argument,\n\
6227 which is a list of all the arguments given to this function.")
6228   (nargs, args)
6229      int nargs;
6230      Lisp_Object *args;
6231 {
6232   Lisp_Object operation, target_idx, target, val;
6233   register Lisp_Object chain;
6234
6235   if (nargs < 2)
6236     error ("Too few arguments");
6237   operation = args[0];
6238   if (!SYMBOLP (operation)
6239       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
6240     error ("Invalid first arguement");
6241   if (nargs < 1 + XINT (target_idx))
6242     error ("Too few arguments for operation: %s",
6243            XSYMBOL (operation)->name->data);
6244   target = args[XINT (target_idx) + 1];
6245   if (!(STRINGP (target)
6246         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
6247     error ("Invalid %dth argument", XINT (target_idx) + 1);
6248
6249   chain = ((EQ (operation, Qinsert_file_contents)
6250             || EQ (operation, Qwrite_region))
6251            ? Vfile_coding_system_alist
6252            : (EQ (operation, Qopen_network_stream)
6253               ? Vnetwork_coding_system_alist
6254               : Vprocess_coding_system_alist));
6255   if (NILP (chain))
6256     return Qnil;
6257
6258   for (; CONSP (chain); chain = XCDR (chain))
6259     {
6260       Lisp_Object elt;
6261       elt = XCAR (chain);
6262
6263       if (CONSP (elt)
6264           && ((STRINGP (target)
6265                && STRINGP (XCAR (elt))
6266                && fast_string_match (XCAR (elt), target) >= 0)
6267               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
6268         {
6269           val = XCDR (elt);
6270           /* Here, if VAL is both a valid coding system and a valid
6271              function symbol, we return VAL as a coding system.  */
6272           if (CONSP (val))
6273             return val;
6274           if (! SYMBOLP (val))
6275             return Qnil;
6276           if (! NILP (Fcoding_system_p (val)))
6277             return Fcons (val, val);
6278           if (! NILP (Ffboundp (val)))
6279             {
6280               val = call1 (val, Flist (nargs, args));
6281               if (CONSP (val))
6282                 return val;
6283               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
6284                 return Fcons (val, val);
6285             }
6286           return Qnil;
6287         }
6288     }
6289   return Qnil;
6290 }
6291
6292 DEFUN ("update-coding-systems-internal",  Fupdate_coding_systems_internal,
6293        Supdate_coding_systems_internal, 0, 0, 0,
6294   "Update internal database for ISO2022 and CCL based coding systems.\n\
6295 When values of any coding categories are changed, you must\n\
6296 call this function")
6297   ()
6298 {
6299   int i;
6300
6301   for (i = CODING_CATEGORY_IDX_EMACS_MULE; i < CODING_CATEGORY_IDX_MAX; i++)
6302     {
6303       Lisp_Object val;
6304
6305       val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[i])->value;
6306       if (!NILP (val))
6307         {
6308           if (! coding_system_table[i])
6309             coding_system_table[i] = ((struct coding_system *)
6310                                       xmalloc (sizeof (struct coding_system)));
6311           setup_coding_system (val, coding_system_table[i]);
6312         }
6313       else if (coding_system_table[i])
6314         {
6315           xfree (coding_system_table[i]);
6316           coding_system_table[i] = NULL;
6317         }
6318     }
6319
6320   return Qnil;
6321 }
6322
6323 DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal,
6324        Sset_coding_priority_internal, 0, 0, 0,
6325   "Update internal database for the current value of `coding-category-list'.\n\
6326 This function is internal use only.")
6327   ()
6328 {
6329   int i = 0, idx;
6330   Lisp_Object val;
6331
6332   val = Vcoding_category_list;
6333
6334   while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
6335     {
6336       if (! SYMBOLP (XCAR (val)))
6337         break;
6338       idx = XFASTINT (Fget (XCAR (val), Qcoding_category_index));
6339       if (idx >= CODING_CATEGORY_IDX_MAX)
6340         break;
6341       coding_priorities[i++] = (1 << idx);
6342       val = XCDR (val);
6343     }
6344   /* If coding-category-list is valid and contains all coding
6345      categories, `i' should be CODING_CATEGORY_IDX_MAX now.  If not,
6346      the following code saves Emacs from crashing.  */
6347   while (i < CODING_CATEGORY_IDX_MAX)
6348     coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
6349
6350   return Qnil;
6351 }
6352
6353 #endif /* emacs */
6354
6355 \f
6356 /*** 9. Post-amble ***/
6357
6358 void
6359 init_coding_once ()
6360 {
6361   int i;
6362
6363   /* Emacs' internal format specific initialize routine.  */
6364   for (i = 0; i <= 0x20; i++)
6365     emacs_code_class[i] = EMACS_control_code;
6366   emacs_code_class[0x0A] = EMACS_linefeed_code;
6367   emacs_code_class[0x0D] = EMACS_carriage_return_code;
6368   for (i = 0x21 ; i < 0x7F; i++)
6369     emacs_code_class[i] = EMACS_ascii_code;
6370   emacs_code_class[0x7F] = EMACS_control_code;
6371   for (i = 0x80; i < 0xFF; i++)
6372     emacs_code_class[i] = EMACS_invalid_code;
6373   emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
6374   emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
6375   emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
6376   emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
6377
6378   /* ISO2022 specific initialize routine.  */
6379   for (i = 0; i < 0x20; i++)
6380     iso_code_class[i] = ISO_control_0;
6381   for (i = 0x21; i < 0x7F; i++)
6382     iso_code_class[i] = ISO_graphic_plane_0;
6383   for (i = 0x80; i < 0xA0; i++)
6384     iso_code_class[i] = ISO_control_1;
6385   for (i = 0xA1; i < 0xFF; i++)
6386     iso_code_class[i] = ISO_graphic_plane_1;
6387   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
6388   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
6389   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
6390   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
6391   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
6392   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
6393   iso_code_class[ISO_CODE_ESC] = ISO_escape;
6394   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
6395   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
6396   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
6397
6398   setup_coding_system (Qnil, &keyboard_coding);
6399   setup_coding_system (Qnil, &terminal_coding);
6400   setup_coding_system (Qnil, &safe_terminal_coding);
6401   setup_coding_system (Qnil, &default_buffer_file_coding);
6402
6403   bzero (coding_system_table, sizeof coding_system_table);
6404
6405   bzero (ascii_skip_code, sizeof ascii_skip_code);
6406   for (i = 0; i < 128; i++)
6407     ascii_skip_code[i] = 1;
6408
6409 #if defined (MSDOS) || defined (WINDOWSNT)
6410   system_eol_type = CODING_EOL_CRLF;
6411 #else
6412   system_eol_type = CODING_EOL_LF;
6413 #endif
6414
6415   inhibit_pre_post_conversion = 0;
6416 }
6417
6418 #ifdef emacs
6419
6420 void
6421 syms_of_coding ()
6422 {
6423   Qtarget_idx = intern ("target-idx");
6424   staticpro (&Qtarget_idx);
6425
6426   Qcoding_system_history = intern ("coding-system-history");
6427   staticpro (&Qcoding_system_history);
6428   Fset (Qcoding_system_history, Qnil);
6429
6430   /* Target FILENAME is the first argument.  */
6431   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
6432   /* Target FILENAME is the third argument.  */
6433   Fput (Qwrite_region, Qtarget_idx, make_number (2));
6434
6435   Qcall_process = intern ("call-process");
6436   staticpro (&Qcall_process);
6437   /* Target PROGRAM is the first argument.  */
6438   Fput (Qcall_process, Qtarget_idx, make_number (0));
6439
6440   Qcall_process_region = intern ("call-process-region");
6441   staticpro (&Qcall_process_region);
6442   /* Target PROGRAM is the third argument.  */
6443   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
6444
6445   Qstart_process = intern ("start-process");
6446   staticpro (&Qstart_process);
6447   /* Target PROGRAM is the third argument.  */
6448   Fput (Qstart_process, Qtarget_idx, make_number (2));
6449
6450   Qopen_network_stream = intern ("open-network-stream");
6451   staticpro (&Qopen_network_stream);
6452   /* Target SERVICE is the fourth argument.  */
6453   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
6454
6455   Qcoding_system = intern ("coding-system");
6456   staticpro (&Qcoding_system);
6457
6458   Qeol_type = intern ("eol-type");
6459   staticpro (&Qeol_type);
6460
6461   Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
6462   staticpro (&Qbuffer_file_coding_system);
6463
6464   Qpost_read_conversion = intern ("post-read-conversion");
6465   staticpro (&Qpost_read_conversion);
6466
6467   Qpre_write_conversion = intern ("pre-write-conversion");
6468   staticpro (&Qpre_write_conversion);
6469
6470   Qno_conversion = intern ("no-conversion");
6471   staticpro (&Qno_conversion);
6472
6473   Qundecided = intern ("undecided");
6474   staticpro (&Qundecided);
6475
6476   Qcoding_system_p = intern ("coding-system-p");
6477   staticpro (&Qcoding_system_p);
6478
6479   Qcoding_system_error = intern ("coding-system-error");
6480   staticpro (&Qcoding_system_error);
6481
6482   Fput (Qcoding_system_error, Qerror_conditions,
6483         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
6484   Fput (Qcoding_system_error, Qerror_message,
6485         build_string ("Invalid coding system"));
6486
6487   Qcoding_category = intern ("coding-category");
6488   staticpro (&Qcoding_category);
6489   Qcoding_category_index = intern ("coding-category-index");
6490   staticpro (&Qcoding_category_index);
6491
6492   Vcoding_category_table
6493     = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
6494   staticpro (&Vcoding_category_table);
6495   {
6496     int i;
6497     for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
6498       {
6499         XVECTOR (Vcoding_category_table)->contents[i]
6500           = intern (coding_category_name[i]);
6501         Fput (XVECTOR (Vcoding_category_table)->contents[i],
6502               Qcoding_category_index, make_number (i));
6503       }
6504   }
6505
6506   Qtranslation_table = intern ("translation-table");
6507   staticpro (&Qtranslation_table);
6508   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
6509
6510   Qtranslation_table_id = intern ("translation-table-id");
6511   staticpro (&Qtranslation_table_id);
6512
6513   Qtranslation_table_for_decode = intern ("translation-table-for-decode");
6514   staticpro (&Qtranslation_table_for_decode);
6515
6516   Qtranslation_table_for_encode = intern ("translation-table-for-encode");
6517   staticpro (&Qtranslation_table_for_encode);
6518
6519   Qsafe_chars = intern ("safe-chars");
6520   staticpro (&Qsafe_chars);
6521
6522   Qchar_coding_system = intern ("char-coding-system");
6523   staticpro (&Qchar_coding_system);
6524
6525   /* Intern this now in case it isn't already done.
6526      Setting this variable twice is harmless.
6527      But don't staticpro it here--that is done in alloc.c.  */
6528   Qchar_table_extra_slots = intern ("char-table-extra-slots");
6529   Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0));
6530   Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (1));
6531
6532   Qvalid_codes = intern ("valid-codes");
6533   staticpro (&Qvalid_codes);
6534
6535   Qemacs_mule = intern ("emacs-mule");
6536   staticpro (&Qemacs_mule);
6537
6538   Qraw_text = intern ("raw-text");
6539   staticpro (&Qraw_text);
6540
6541   defsubr (&Scoding_system_p);
6542   defsubr (&Sread_coding_system);
6543   defsubr (&Sread_non_nil_coding_system);
6544   defsubr (&Scheck_coding_system);
6545   defsubr (&Sdetect_coding_region);
6546   defsubr (&Sdetect_coding_string);
6547   defsubr (&Sfind_coding_systems_region_internal);
6548   defsubr (&Sdecode_coding_region);
6549   defsubr (&Sencode_coding_region);
6550   defsubr (&Sdecode_coding_string);
6551   defsubr (&Sencode_coding_string);
6552   defsubr (&Sdecode_sjis_char);
6553   defsubr (&Sencode_sjis_char);
6554   defsubr (&Sdecode_big5_char);
6555   defsubr (&Sencode_big5_char);
6556   defsubr (&Sset_terminal_coding_system_internal);
6557   defsubr (&Sset_safe_terminal_coding_system_internal);
6558   defsubr (&Sterminal_coding_system);
6559   defsubr (&Sset_keyboard_coding_system_internal);
6560   defsubr (&Skeyboard_coding_system);
6561   defsubr (&Sfind_operation_coding_system);
6562   defsubr (&Supdate_coding_systems_internal);
6563   defsubr (&Sset_coding_priority_internal);
6564
6565   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
6566     "List of coding systems.\n\
6567 \n\
6568 Do not alter the value of this variable manually.  This variable should be\n\
6569 updated by the functions `make-coding-system' and\n\
6570 `define-coding-system-alias'.");
6571   Vcoding_system_list = Qnil;
6572
6573   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
6574     "Alist of coding system names.\n\
6575 Each element is one element list of coding system name.\n\
6576 This variable is given to `completing-read' as TABLE argument.\n\
6577 \n\
6578 Do not alter the value of this variable manually.  This variable should be\n\
6579 updated by the functions `make-coding-system' and\n\
6580 `define-coding-system-alias'.");
6581   Vcoding_system_alist = Qnil;
6582
6583   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
6584     "List of coding-categories (symbols) ordered by priority.");
6585   {
6586     int i;
6587
6588     Vcoding_category_list = Qnil;
6589     for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
6590       Vcoding_category_list
6591         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
6592                  Vcoding_category_list);
6593   }
6594
6595   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
6596     "Specify the coding system for read operations.\n\
6597 It is useful to bind this variable with `let', but do not set it globally.\n\
6598 If the value is a coding system, it is used for decoding on read operation.\n\
6599 If not, an appropriate element is used from one of the coding system alists:\n\
6600 There are three such tables, `file-coding-system-alist',\n\
6601 `process-coding-system-alist', and `network-coding-system-alist'.");
6602   Vcoding_system_for_read = Qnil;
6603
6604   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
6605     "Specify the coding system for write operations.\n\
6606 Programs bind this variable with `let', but you should not set it globally.\n\
6607 If the value is a coding system, it is used for encoding of output,\n\
6608 when writing it to a file and when sending it to a file or subprocess.\n\
6609 \n\
6610 If this does not specify a coding system, an appropriate element\n\
6611 is used from one of the coding system alists:\n\
6612 There are three such tables, `file-coding-system-alist',\n\
6613 `process-coding-system-alist', and `network-coding-system-alist'.\n\
6614 For output to files, if the above procedure does not specify a coding system,\n\
6615 the value of `buffer-file-coding-system' is used.");
6616   Vcoding_system_for_write = Qnil;
6617
6618   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
6619     "Coding system used in the latest file or process I/O.");
6620   Vlast_coding_system_used = Qnil;
6621
6622   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
6623     "*Non-nil means always inhibit code conversion of end-of-line format.\n\
6624 See info node `Coding Systems' and info node `Text and Binary' concerning\n\
6625 such conversion.");
6626   inhibit_eol_conversion = 0;
6627
6628   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
6629     "Non-nil means process buffer inherits coding system of process output.\n\
6630 Bind it to t if the process output is to be treated as if it were a file\n\
6631 read from some filesystem.");
6632   inherit_process_coding_system = 0;
6633
6634   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
6635     "Alist to decide a coding system to use for a file I/O operation.\n\
6636 The format is ((PATTERN . VAL) ...),\n\
6637 where PATTERN is a regular expression matching a file name,\n\
6638 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
6639 If VAL is a coding system, it is used for both decoding and encoding\n\
6640 the file contents.\n\
6641 If VAL is a cons of coding systems, the car part is used for decoding,\n\
6642 and the cdr part is used for encoding.\n\
6643 If VAL is a function symbol, the function must return a coding system\n\
6644 or a cons of coding systems which are used as above.\n\
6645 \n\
6646 See also the function `find-operation-coding-system'\n\
6647 and the variable `auto-coding-alist'.");
6648   Vfile_coding_system_alist = Qnil;
6649
6650   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
6651     "Alist to decide a coding system to use for a process I/O operation.\n\
6652 The format is ((PATTERN . VAL) ...),\n\
6653 where PATTERN is a regular expression matching a program name,\n\
6654 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
6655 If VAL is a coding system, it is used for both decoding what received\n\
6656 from the program and encoding what sent to the program.\n\
6657 If VAL is a cons of coding systems, the car part is used for decoding,\n\
6658 and the cdr part is used for encoding.\n\
6659 If VAL is a function symbol, the function must return a coding system\n\
6660 or a cons of coding systems which are used as above.\n\
6661 \n\
6662 See also the function `find-operation-coding-system'.");
6663   Vprocess_coding_system_alist = Qnil;
6664
6665   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
6666     "Alist to decide a coding system to use for a network I/O operation.\n\
6667 The format is ((PATTERN . VAL) ...),\n\
6668 where PATTERN is a regular expression matching a network service name\n\
6669 or is a port number to connect to,\n\
6670 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
6671 If VAL is a coding system, it is used for both decoding what received\n\
6672 from the network stream and encoding what sent to the network stream.\n\
6673 If VAL is a cons of coding systems, the car part is used for decoding,\n\
6674 and the cdr part is used for encoding.\n\
6675 If VAL is a function symbol, the function must return a coding system\n\
6676 or a cons of coding systems which are used as above.\n\
6677 \n\
6678 See also the function `find-operation-coding-system'.");
6679   Vnetwork_coding_system_alist = Qnil;
6680
6681   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
6682     "Coding system to use with system messages.");
6683   Vlocale_coding_system = Qnil;
6684
6685   /* The eol mnemonics are reset in startup.el system-dependently.  */
6686   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
6687     "*String displayed in mode line for UNIX-like (LF) end-of-line format.");
6688   eol_mnemonic_unix = build_string (":");
6689
6690   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
6691     "*String displayed in mode line for DOS-like (CRLF) end-of-line format.");
6692   eol_mnemonic_dos = build_string ("\\");
6693
6694   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
6695     "*String displayed in mode line for MAC-like (CR) end-of-line format.");
6696   eol_mnemonic_mac = build_string ("/");
6697
6698   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
6699     "*String displayed in mode line when end-of-line format is not yet determined.");
6700   eol_mnemonic_undecided = build_string (":");
6701
6702   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
6703     "*Non-nil enables character translation while encoding and decoding.");
6704   Venable_character_translation = Qt;
6705
6706   DEFVAR_LISP ("standard-translation-table-for-decode",
6707     &Vstandard_translation_table_for_decode,
6708     "Table for translating characters while decoding.");
6709   Vstandard_translation_table_for_decode = Qnil;
6710
6711   DEFVAR_LISP ("standard-translation-table-for-encode",
6712     &Vstandard_translation_table_for_encode,
6713     "Table for translationg characters while encoding.");
6714   Vstandard_translation_table_for_encode = Qnil;
6715
6716   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
6717     "Alist of charsets vs revision numbers.\n\
6718 While encoding, if a charset (car part of an element) is found,\n\
6719 designate it with the escape sequence identifing revision (cdr part of the element).");
6720   Vcharset_revision_alist = Qnil;
6721
6722   DEFVAR_LISP ("default-process-coding-system",
6723                &Vdefault_process_coding_system,
6724     "Cons of coding systems used for process I/O by default.\n\
6725 The car part is used for decoding a process output,\n\
6726 the cdr part is used for encoding a text to be sent to a process.");
6727   Vdefault_process_coding_system = Qnil;
6728
6729   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
6730     "Table of extra Latin codes in the range 128..159 (inclusive).\n\
6731 This is a vector of length 256.\n\
6732 If Nth element is non-nil, the existence of code N in a file\n\
6733 \(or output of subprocess) doesn't prevent it to be detected as\n\
6734 a coding system of ISO 2022 variant which has a flag\n\
6735 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file\n\
6736 or reading output of a subprocess.\n\
6737 Only 128th through 159th elements has a meaning.");
6738   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
6739
6740   DEFVAR_LISP ("select-safe-coding-system-function",
6741                &Vselect_safe_coding_system_function,
6742     "Function to call to select safe coding system for encoding a text.\n\
6743 \n\
6744 If set, this function is called to force a user to select a proper\n\
6745 coding system which can encode the text in the case that a default\n\
6746 coding system used in each operation can't encode the text.\n\
6747 \n\
6748 The default value is `select-safe-coding-system' (which see).");
6749   Vselect_safe_coding_system_function = Qnil;
6750
6751   DEFVAR_LISP ("char-coding-system-table", &Vchar_coding_system_table,
6752     "Char-table containing safe coding systems of each characters.\n\
6753 Each element doesn't include such generic coding systems that can\n\
6754 encode any characters.   They are in the first extra slot.");
6755   Vchar_coding_system_table = Fmake_char_table (Qchar_coding_system, Qnil);
6756
6757   DEFVAR_BOOL ("inhibit-iso-escape-detection",
6758                &inhibit_iso_escape_detection,
6759     "If non-nil, Emacs ignores ISO2022's escape sequence on code detection.\n\
6760 \n\
6761 By default, on reading a file, Emacs tries to detect how the text is\n\
6762 encoded.  This code detection is sensitive to escape sequences.  If\n\
6763 the sequence is valid as ISO2022, the code is determined as one of\n\
6764 the ISO2022 encodings, and the file is decoded by the corresponding\n\
6765 coding system (e.g. `iso-2022-7bit').\n\
6766 \n\
6767 However, there may be a case that you want to read escape sequences in\n\
6768 a file as is.  In such a case, you can set this variable to non-nil.\n\
6769 Then, as the code detection ignores any escape sequences, no file is\n\
6770 detected as encoded in some ISO2022 encoding.  The result is that all\n\
6771 escape sequences become visible in a buffer.\n\
6772 \n\
6773 The default value is nil, and it is strongly recommended not to change\n\
6774 it.  That is because many Emacs Lisp source files that contain\n\
6775 non-ASCII characters are encoded by the coding system `iso-2022-7bit'\n\
6776 in Emacs's distribution, and they won't be decoded correctly on\n\
6777 reading if you suppress escape sequence detection.\n\
6778 \n\
6779 The other way to read escape sequences in a file without decoding is\n\
6780 to explicitly specify some coding system that doesn't use ISO2022's\n\
6781 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].");
6782   inhibit_iso_escape_detection = 0;
6783 }
6784
6785 char *
6786 emacs_strerror (error_number)
6787      int error_number;
6788 {
6789   char *str;
6790
6791   synchronize_system_messages_locale ();
6792   str = strerror (error_number);
6793
6794   if (! NILP (Vlocale_coding_system))
6795     {
6796       Lisp_Object dec = code_convert_string_norecord (build_string (str),
6797                                                       Vlocale_coding_system,
6798                                                       0);
6799       str = (char *) XSTRING (dec)->data;
6800     }
6801
6802   return str;
6803 }
6804
6805 #endif /* emacs */
6806