src/coding.c

   1 /* Coding system handler (conversion, detection, and etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /*** TABLE OF CONTENTS ***
  23
  24   1. Preamble
  25   2. Emacs' internal format (emacs-mule) handlers
  26   3. ISO2022 handlers
  27   4. Shift-JIS and BIG5 handlers
  28   5. CCL handlers
  29   6. End-of-line handlers
  30   7. C library functions
  31   8. Emacs Lisp library functions
  32   9. Post-amble
  33
  34 */
  35
  36 /*** GENERAL NOTE on CODING SYSTEM ***
  37
  38   Coding system is an encoding mechanism of one or more character
  39   sets.  Here's a list of coding systems which Emacs can handle.  When
  40   we say "decode", it means converting some other coding system to
  41   Emacs' internal format (emacs-internal), and when we say "encode",
  42   it means converting the coding system emacs-mule to some other
  43   coding system.
  44
  45   0. Emacs' internal format (emacs-mule)
  46
  47   Emacs itself holds a multi-lingual character in a buffer and a string
  48   in a special format.  Details are described in section 2.
  49
  50   1. ISO2022
  51
  52   The most famous coding system for multiple character sets.  X's
  53   Compound Text, various EUCs (Extended Unix Code), and coding
  54   systems used in Internet communication such as ISO-2022-JP are
  55   all variants of ISO2022.  Details are described in section 3.
  56
  57   2. SJIS (or Shift-JIS or MS-Kanji-Code)
  58
  59   A coding system to encode character sets: ASCII, JISX0201, and
  60   JISX0208.  Widely used for PC's in Japan.  Details are described in
  61   section 4.
  62
  63   3. BIG5
  64
  65   A coding system to encode character sets: ASCII and Big5.  Widely
  66   used by Chinese (mainly in Taiwan and Hong Kong).  Details are
  67   described in section 4.  In this file, when we write "BIG5"
  68   (all uppercase), we mean the coding system, and when we write
  69   "Big5" (capitalized), we mean the character set.
  70
  71   4. Raw text
  72
  73   A coding system for a text containing random 8-bit code.  Emacs does
  74   no code conversion on such a text except for end-of-line format.
  75
  76   5. Other
  77
  78   If a user wants to read/write a text encoded in a coding system not
  79   listed above, he can supply a decoder and an encoder for it in CCL
  80   (Code Conversion Language) programs.  Emacs executes the CCL program
  81   while reading/writing.
  82
  83   Emacs represents a coding system by a Lisp symbol that has a property
  84   `coding-system'.  But, before actually using the coding system, the
  85   information about it is set in a structure of type `struct
  86   coding_system' for rapid processing.  See section 6 for more details.
  87
  88 */
  89
  90 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
  91
  92   How end-of-line of a text is encoded depends on a system.  For
  93   instance, Unix's format is just one byte of `line-feed' code,
  94   whereas DOS's format is two-byte sequence of `carriage-return' and
  95   `line-feed' codes.  MacOS's format is usually one byte of
  96   `carriage-return'.
  97
  98   Since text characters encoding and end-of-line encoding are
  99   independent, any coding system described above can take
 100   any format of end-of-line.  So, Emacs has information of format of
 101   end-of-line in each coding-system.  See section 6 for more details.
 102
 103 */
 104
 105 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 106
 107   These functions check if a text between SRC and SRC_END is encoded
 108   in the coding system category XXX.  Each returns an integer value in
 109   which appropriate flag bits for the category XXX is set.  The flag
 110   bits are defined in macros CODING_CATEGORY_MASK_XXX.  Below is the
 111   template of these functions.  */
 112 #if 0
 113 int
 114 detect_coding_emacs_mule (src, src_end)
 115      unsigned char *src, *src_end;
 116 {
 117   ...
 118 }
 119 #endif
 120
 121 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 122
 123   These functions decode SRC_BYTES length text at SOURCE encoded in
 124   CODING to Emacs' internal format (emacs-mule).  The resulting text
 125   goes to a place pointed to by DESTINATION, the length of which
 126   should not exceed DST_BYTES.  These functions set the information of
 127   original and decoded texts in the members produced, produced_char,
 128   consumed, and consumed_char of the structure *CODING.
 129
 130   The return value is an integer (CODING_FINISH_XXX) indicating how
 131   the decoding finished.
 132
 133   DST_BYTES zero means that source area and destination area are
 134   overlapped, which means that we can produce a decoded text until it
 135   reaches at the head of not-yet-decoded source text.
 136
 137   Below is a template of these functions.  */
 138 #if 0
 139 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 140      struct coding_system *coding;
 141      unsigned char *source, *destination;
 142      int src_bytes, dst_bytes;
 143 {
 144   ...
 145 }
 146 #endif
 147
 148 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 149
 150   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 151   internal format (emacs-mule) to CODING.  The resulting text goes to
 152   a place pointed to by DESTINATION, the length of which should not
 153   exceed DST_BYTES.  These functions set the information of
 154   original and encoded texts in the members produced, produced_char,
 155   consumed, and consumed_char of the structure *CODING.
 156
 157   The return value is an integer (CODING_FINISH_XXX) indicating how
 158   the encoding finished.
 159
 160   DST_BYTES zero means that source area and destination area are
 161   overlapped, which means that we can produce a decoded text until it
 162   reaches at the head of not-yet-decoded source text.
 163
 164   Below is a template of these functions.  */
 165 #if 0
 166 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 167      struct coding_system *coding;
 168      unsigned char *source, *destination;
 169      int src_bytes, dst_bytes;
 170 {
 171   ...
 172 }
 173 #endif
 174
 175 /*** COMMONLY USED MACROS ***/
 176
 177 /* The following three macros ONE_MORE_BYTE, TWO_MORE_BYTES, and
 178    THREE_MORE_BYTES safely get one, two, and three bytes from the
 179    source text respectively.  If there are not enough bytes in the
 180    source, they jump to `label_end_of_loop'.  The caller should set
 181    variables `src' and `src_end' to appropriate areas in advance.  */
 182
 183 #define ONE_MORE_BYTE(c1)       \
 184   do {                          \
 185     if (src < src_end)          \
 186       c1 = *src++;              \
 187     else                        \
 188       goto label_end_of_loop;   \
 189   } while (0)
 190
 191 #define TWO_MORE_BYTES(c1, c2)  \
 192   do {                          \
 193     if (src + 1 < src_end)      \
 194       c1 = *src++, c2 = *src++; \
 195     else                        \
 196       goto label_end_of_loop;   \
 197   } while (0)
 198
 199 #define THREE_MORE_BYTES(c1, c2, c3)            \
 200   do {                                          \
 201     if (src + 2 < src_end)                      \
 202       c1 = *src++, c2 = *src++, c3 = *src++;    \
 203     else                                        \
 204       goto label_end_of_loop;                   \
 205   } while (0)
 206
 207 /* The following three macros DECODE_CHARACTER_ASCII,
 208    DECODE_CHARACTER_DIMENSION1, and DECODE_CHARACTER_DIMENSION2 put
 209    the multi-byte form of a character of each class at the place
 210    pointed by `dst'.  The caller should set the variable `dst' to
 211    point to an appropriate area and the variable `coding' to point to
 212    the coding-system of the currently decoding text in advance.  */
 213
 214 /* Decode one ASCII character C.  */
 215
 216 #define DECODE_CHARACTER_ASCII(c)                               \
 217   do {                                                          \
 218     if (COMPOSING_P (coding->composing))                        \
 219       *dst++ = 0xA0, *dst++ = (c) | 0x80;                       \
 220     else                                                        \
 221       {                                                         \
 222         *dst++ = (c);                                           \
 223         coding->produced_char++;                                \
 224       }                                                         \
 225   } while (0)
 226
 227 /* Decode one DIMENSION1 character whose charset is CHARSET and whose
 228    position-code is C.  */
 229
 230 #define DECODE_CHARACTER_DIMENSION1(charset, c)                         \
 231   do {                                                                  \
 232     unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset);   \
 233     if (COMPOSING_P (coding->composing))                                \
 234       *dst++ = leading_code + 0x20;                                     \
 235     else                                                                \
 236       {                                                                 \
 237         *dst++ = leading_code;                                          \
 238         coding->produced_char++;                                        \
 239       }                                                                 \
 240     if (leading_code = CHARSET_LEADING_CODE_EXT (charset))              \
 241       *dst++ = leading_code;                                            \
 242     *dst++ = (c) | 0x80;                                                \
 243   } while (0)
 244
 245 /* Decode one DIMENSION2 character whose charset is CHARSET and whose
 246    position-codes are C1 and C2.  */
 247
 248 #define DECODE_CHARACTER_DIMENSION2(charset, c1, c2)    \
 249   do {                                                  \
 250     DECODE_CHARACTER_DIMENSION1 (charset, c1);          \
 251     *dst++ = (c2) | 0x80;                               \
 252   } while (0)
 253
 254 \f
 255 /*** 1. Preamble ***/
 256
 257 #include <stdio.h>
 258
 259 #ifdef emacs
 260
 261 #include <config.h>
 262 #include "lisp.h"
 263 #include "buffer.h"
 264 #include "charset.h"
 265 #include "ccl.h"
 266 #include "coding.h"
 267 #include "window.h"
 268
 269 #else  /* not emacs */
 270
 271 #include "mulelib.h"
 272
 273 #endif /* not emacs */
 274
 275 Lisp_Object Qcoding_system, Qeol_type;
 276 Lisp_Object Qbuffer_file_coding_system;
 277 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 278 Lisp_Object Qno_conversion, Qundecided;
 279 Lisp_Object Qcoding_system_history;
 280 Lisp_Object Qsafe_charsets;
 281 Lisp_Object Qvalid_codes;
 282
 283 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 284 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 285 Lisp_Object Qstart_process, Qopen_network_stream;
 286 Lisp_Object Qtarget_idx;
 287
 288 Lisp_Object Vselect_safe_coding_system_function;
 289
 290 /* Mnemonic character of each format of end-of-line.  */
 291 int eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 292 /* Mnemonic character to indicate format of end-of-line is not yet
 293    decided.  */
 294 int eol_mnemonic_undecided;
 295
 296 /* Format of end-of-line decided by system.  This is CODING_EOL_LF on
 297    Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.  */
 298 int system_eol_type;
 299
 300 #ifdef emacs
 301
 302 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 303
 304 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 305
 306 /* Coding system emacs-mule and raw-text are for converting only
 307    end-of-line format.  */
 308 Lisp_Object Qemacs_mule, Qraw_text;
 309
 310 /* Coding-systems are handed between Emacs Lisp programs and C internal
 311    routines by the following three variables.  */
 312 /* Coding-system for reading files and receiving data from process.  */
 313 Lisp_Object Vcoding_system_for_read;
 314 /* Coding-system for writing files and sending data to process.  */
 315 Lisp_Object Vcoding_system_for_write;
 316 /* Coding-system actually used in the latest I/O.  */
 317 Lisp_Object Vlast_coding_system_used;
 318
 319 /* A vector of length 256 which contains information about special
 320    Latin codes (especially for dealing with Microsoft codes).  */
 321 Lisp_Object Vlatin_extra_code_table;
 322
 323 /* Flag to inhibit code conversion of end-of-line format.  */
 324 int inhibit_eol_conversion;
 325
 326 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 327 int inherit_process_coding_system;
 328
 329 /* Coding system to be used to encode text for terminal display.  */
 330 struct coding_system terminal_coding;
 331
 332 /* Coding system to be used to encode text for terminal display when
 333    terminal coding system is nil.  */
 334 struct coding_system safe_terminal_coding;
 335
 336 /* Coding system of what is sent from terminal keyboard.  */
 337 struct coding_system keyboard_coding;
 338
 339 /* Default coding system to be used to write a file.  */
 340 struct coding_system default_buffer_file_coding;
 341
 342 Lisp_Object Vfile_coding_system_alist;
 343 Lisp_Object Vprocess_coding_system_alist;
 344 Lisp_Object Vnetwork_coding_system_alist;
 345
 346 #endif /* emacs */
 347
 348 Lisp_Object Qcoding_category, Qcoding_category_index;
 349
 350 /* List of symbols `coding-category-xxx' ordered by priority.  */
 351 Lisp_Object Vcoding_category_list;
 352
 353 /* Table of coding categories (Lisp symbols).  */
 354 Lisp_Object Vcoding_category_table;
 355
 356 /* Table of names of symbol for each coding-category.  */
 357 char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
 358   "coding-category-emacs-mule",
 359   "coding-category-sjis",
 360   "coding-category-iso-7",
 361   "coding-category-iso-7-tight",
 362   "coding-category-iso-8-1",
 363   "coding-category-iso-8-2",
 364   "coding-category-iso-7-else",
 365   "coding-category-iso-8-else",
 366   "coding-category-ccl",
 367   "coding-category-big5",
 368   "coding-category-raw-text",
 369   "coding-category-binary"
 370 };
 371
 372 /* Table of pointers to coding systems corresponding to each coding
 373    categories.  */
 374 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
 375
 376 /* Table of coding category masks.  Nth element is a mask for a coding
 377    cateogry of which priority is Nth.  */
 378 static
 379 int coding_priorities[CODING_CATEGORY_IDX_MAX];
 380
 381 /* Flag to tell if we look up translation table on character code
 382    conversion.  */
 383 Lisp_Object Venable_character_translation;
 384 /* Standard translation table to look up on decoding (reading).  */
 385 Lisp_Object Vstandard_translation_table_for_decode;
 386 /* Standard translation table to look up on encoding (writing).  */
 387 Lisp_Object Vstandard_translation_table_for_encode;
 388
 389 Lisp_Object Qtranslation_table;
 390 Lisp_Object Qtranslation_table_id;
 391 Lisp_Object Qtranslation_table_for_decode;
 392 Lisp_Object Qtranslation_table_for_encode;
 393
 394 /* Alist of charsets vs revision number.  */
 395 Lisp_Object Vcharset_revision_alist;
 396
 397 /* Default coding systems used for process I/O.  */
 398 Lisp_Object Vdefault_process_coding_system;
 399
 400 \f
 401 /*** 2. Emacs internal format (emacs-mule) handlers ***/
 402
 403 /* Emacs' internal format for encoding multiple character sets is a
 404    kind of multi-byte encoding, i.e. characters are encoded by
 405    variable-length sequences of one-byte codes.  ASCII characters
 406    and control characters (e.g. `tab', `newline') are represented by
 407    one-byte sequences which are their ASCII codes, in the range 0x00
 408    through 0x7F.  The other characters are represented by a sequence
 409    of `base leading-code', optional `extended leading-code', and one
 410    or two `position-code's.  The length of the sequence is determined
 411    by the base leading-code.  Leading-code takes the range 0x80
 412    through 0x9F, whereas extended leading-code and position-code take
 413    the range 0xA0 through 0xFF.  See `charset.h' for more details
 414    about leading-code and position-code.
 415
 416    There's one exception to this rule.  Special leading-code
 417    `leading-code-composition' denotes that the following several
 418    characters should be composed into one character.  Leading-codes of
 419    components (except for ASCII) are added 0x20.  An ASCII character
 420    component is represented by a 2-byte sequence of `0xA0' and
 421    `ASCII-code + 0x80'.  See also the comments in `charset.h' for the
 422    details of composite character.  Hence, we can summarize the code
 423    range as follows:
 424
 425    --- CODE RANGE of Emacs' internal format ---
 426    (character set)      (range)
 427    ASCII                0x00 .. 0x7F
 428    ELSE (1st byte)      0x80 .. 0x9F
 429         (rest bytes)    0xA0 .. 0xFF
 430    ---------------------------------------------
 431
 432   */
 433
 434 enum emacs_code_class_type emacs_code_class[256];
 435
 436 /* Go to the next statement only if *SRC is accessible and the code is
 437    greater than 0xA0.  */
 438 #define CHECK_CODE_RANGE_A0_FF  \
 439   do {                          \
 440     if (src >= src_end)         \
 441       goto label_end_of_switch; \
 442     else if (*src++ < 0xA0)     \
 443       return 0;                 \
 444   } while (0)
 445
 446 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 447    Check if a text is encoded in Emacs' internal format.  If it is,
 448    return CODING_CATEGORY_MASK_EMACS_MULE, else return 0.  */
 449
 450 int
 451 detect_coding_emacs_mule (src, src_end)
 452      unsigned char *src, *src_end;
 453 {
 454   unsigned char c;
 455   int composing = 0;
 456
 457   while (src < src_end)
 458     {
 459       c = *src++;
 460
 461       if (composing)
 462         {
 463           if (c < 0xA0)
 464             composing = 0;
 465           else
 466             c -= 0x20;
 467         }
 468
 469       switch (emacs_code_class[c])
 470         {
 471         case EMACS_ascii_code:
 472         case EMACS_linefeed_code:
 473           break;
 474
 475         case EMACS_control_code:
 476           if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
 477             return 0;
 478           break;
 479
 480         case EMACS_invalid_code:
 481           return 0;
 482
 483         case EMACS_leading_code_composition: /* c == 0x80 */
 484           if (composing)
 485             CHECK_CODE_RANGE_A0_FF;
 486           else
 487             composing = 1;
 488           break;
 489
 490         case EMACS_leading_code_4:
 491           CHECK_CODE_RANGE_A0_FF;
 492           /* fall down to check it two more times ...  */
 493
 494         case EMACS_leading_code_3:
 495           CHECK_CODE_RANGE_A0_FF;
 496           /* fall down to check it one more time ...  */
 497
 498         case EMACS_leading_code_2:
 499           CHECK_CODE_RANGE_A0_FF;
 500           break;
 501
 502         default:
 503         label_end_of_switch:
 504           break;
 505         }
 506     }
 507   return CODING_CATEGORY_MASK_EMACS_MULE;
 508 }
 509
 510 \f
 511 /*** 3. ISO2022 handlers ***/
 512
 513 /* The following note describes the coding system ISO2022 briefly.
 514    Since the intention of this note is to help in understanding of
 515    the programs in this file, some parts are NOT ACCURATE or OVERLY
 516    SIMPLIFIED.  For the thorough understanding, please refer to the
 517    original document of ISO2022.
 518
 519    ISO2022 provides many mechanisms to encode several character sets
 520    in 7-bit and 8-bit environment.  If one chooses 7-bite environment,
 521    all text is encoded by codes of less than 128.  This may make the
 522    encoded text a little bit longer, but the text gets more stability
 523    to pass through several gateways (some of them strip off the MSB).
 524
 525    There are two kinds of character set: control character set and
 526    graphic character set.  The former contains control characters such
 527    as `newline' and `escape' to provide control functions (control
 528    functions are provided also by escape sequences).  The latter
 529    contains graphic characters such as ' A' and '-'.  Emacs recognizes
 530    two control character sets and many graphic character sets.
 531
 532    Graphic character sets are classified into one of the following
 533    four classes, DIMENSION1_CHARS94, DIMENSION1_CHARS96,
 534    DIMENSION2_CHARS94, DIMENSION2_CHARS96 according to the number of
 535    bytes (DIMENSION) and the number of characters in one dimension
 536    (CHARS) of the set.  In addition, each character set is assigned an
 537    identification tag (called "final character" and denoted as <F>
 538    here after) which is unique in each class.  <F> of each character
 539    set is decided by ECMA(*) when it is registered in ISO.  Code range
 540    of <F> is 0x30..0x7F (0x30..0x3F are for private use only).
 541
 542    Note (*): ECMA = European Computer Manufacturers Association
 543
 544    Here are examples of graphic character set [NAME(<F>)]:
 545         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
 546         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
 547         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
 548         o DIMENSION2_CHARS96 -- none for the moment
 549
 550    A code area (1byte=8bits) is divided into 4 areas, C0, GL, C1, and GR.
 551         C0 [0x00..0x1F] -- control character plane 0
 552         GL [0x20..0x7F] -- graphic character plane 0
 553         C1 [0x80..0x9F] -- control character plane 1
 554         GR [0xA0..0xFF] -- graphic character plane 1
 555
 556    A control character set is directly designated and invoked to C0 or
 557    C1 by an escape sequence.  The most common case is that ISO646's
 558    control character set is designated/invoked to C0 and ISO6429's
 559    control character set is designated/invoked to C1, and usually
 560    these designations/invocations are omitted in a coded text.  With
 561    7-bit environment, only C0 can be used, and a control character for
 562    C1 is encoded by an appropriate escape sequence to fit in the
 563    environment.  All control characters for C1 are defined the
 564    corresponding escape sequences.
 565
 566    A graphic character set is at first designated to one of four
 567    graphic registers (G0 through G3), then these graphic registers are
 568    invoked to GL or GR.  These designations and invocations can be
 569    done independently.  The most common case is that G0 is invoked to
 570    GL, G1 is invoked to GR, and ASCII is designated to G0, and usually
 571    these invocations and designations are omitted in a coded text.
 572    With 7-bit environment, only GL can be used.
 573
 574    When a graphic character set of CHARS94 is invoked to GL, code 0x20
 575    and 0x7F of GL area work as control characters SPACE and DEL
 576    respectively, and code 0xA0 and 0xFF of GR area should not be used.
 577
 578    There are two ways of invocation: locking-shift and single-shift.
 579    With locking-shift, the invocation lasts until the next different
 580    invocation, whereas with single-shift, the invocation works only
 581    for the following character and doesn't affect locking-shift.
 582    Invocations are done by the following control characters or escape
 583    sequences.
 584
 585    ----------------------------------------------------------------------
 586    function             control char    escape sequence description
 587    ----------------------------------------------------------------------
 588    SI  (shift-in)               0x0F    none            invoke G0 to GL
 589    SO  (shift-out)              0x0E    none            invoke G1 to GL
 590    LS2 (locking-shift-2)        none    ESC 'n'         invoke G2 into GL
 591    LS3 (locking-shift-3)        none    ESC 'o'         invoke G3 into GL
 592    SS2 (single-shift-2)         0x8E    ESC 'N'         invoke G2 into GL
 593    SS3 (single-shift-3)         0x8F    ESC 'O'         invoke G3 into GL
 594    ----------------------------------------------------------------------
 595    The first four are for locking-shift.  Control characters for these
 596    functions are defined by macros ISO_CODE_XXX in `coding.h'.
 597
 598    Designations are done by the following escape sequences.
 599    ----------------------------------------------------------------------
 600    escape sequence      description
 601    ----------------------------------------------------------------------
 602    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
 603    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
 604    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
 605    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
 606    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
 607    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
 608    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
 609    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
 610    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
 611    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
 612    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
 613    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
 614    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
 615    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
 616    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
 617    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
 618    ----------------------------------------------------------------------
 619
 620    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
 621    of dimension 1, chars 94, and final character <F>, and etc.
 622
 623    Note (*): Although these designations are not allowed in ISO2022,
 624    Emacs accepts them on decoding, and produces them on encoding
 625    CHARS96 character set in a coding system which is characterized as
 626    7-bit environment, non-locking-shift, and non-single-shift.
 627
 628    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
 629    '(' can be omitted.  We call this as "short-form" here after.
 630
 631    Now you may notice that there are a lot of ways for encoding the
 632    same multilingual text in ISO2022.  Actually, there exists many
 633    coding systems such as Compound Text (used in X's inter client
 634    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
 635    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
 636    localized platforms), and all of these are variants of ISO2022.
 637
 638    In addition to the above, Emacs handles two more kinds of escape
 639    sequences: ISO6429's direction specification and Emacs' private
 640    sequence for specifying character composition.
 641
 642    ISO6429's direction specification takes the following format:
 643         o CSI ']'      -- end of the current direction
 644         o CSI '0' ']'  -- end of the current direction
 645         o CSI '1' ']'  -- start of left-to-right text
 646         o CSI '2' ']'  -- start of right-to-left text
 647    The control character CSI (0x9B: control sequence introducer) is
 648    abbreviated to the escape sequence ESC '[' in 7-bit environment.
 649
 650    Character composition specification takes the following format:
 651         o ESC '0' -- start character composition
 652         o ESC '1' -- end character composition
 653    Since these are not standard escape sequences of any ISO, the use
 654    of them for these meaning is restricted to Emacs only.  */
 655
 656 enum iso_code_class_type iso_code_class[256];
 657
 658 #define CHARSET_OK(idx, charset)                                \
 659   (coding_system_table[idx]                                     \
 660    && (coding_system_table[idx]->safe_charsets[charset]         \
 661        || (CODING_SPEC_ISO_REQUESTED_DESIGNATION                \
 662             (coding_system_table[idx], charset)                 \
 663            != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)))
 664
 665 #define SHIFT_OUT_OK(idx) \
 666   (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
 667
 668 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 669    Check if a text is encoded in ISO2022.  If it is, returns an
 670    integer in which appropriate flag bits any of:
 671         CODING_CATEGORY_MASK_ISO_7
 672         CODING_CATEGORY_MASK_ISO_7_TIGHT
 673         CODING_CATEGORY_MASK_ISO_8_1
 674         CODING_CATEGORY_MASK_ISO_8_2
 675         CODING_CATEGORY_MASK_ISO_7_ELSE
 676         CODING_CATEGORY_MASK_ISO_8_ELSE
 677    are set.  If a code which should never appear in ISO2022 is found,
 678    returns 0.  */
 679
 680 int
 681 detect_coding_iso2022 (src, src_end)
 682      unsigned char *src, *src_end;
 683 {
 684   int mask = CODING_CATEGORY_MASK_ISO;
 685   int mask_found = 0;
 686   int reg[4], shift_out = 0, single_shifting = 0;
 687   int c, c1, i, charset;
 688
 689   reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
 690   while (mask && src < src_end)
 691     {
 692       c = *src++;
 693       switch (c)
 694         {
 695         case ISO_CODE_ESC:
 696           single_shifting = 0;
 697           if (src >= src_end)
 698             break;
 699           c = *src++;
 700           if (c >= '(' && c <= '/')
 701             {
 702               /* Designation sequence for a charset of dimension 1.  */
 703               if (src >= src_end)
 704                 break;
 705               c1 = *src++;
 706               if (c1 < ' ' || c1 >= 0x80
 707                   || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
 708                 /* Invalid designation sequence.  Just ignore.  */
 709                 break;
 710               reg[(c - '(') % 4] = charset;
 711             }
 712           else if (c == '$')
 713             {
 714               /* Designation sequence for a charset of dimension 2.  */
 715               if (src >= src_end)
 716                 break;
 717               c = *src++;
 718               if (c >= '@' && c <= 'B')
 719                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
 720                 reg[0] = charset = iso_charset_table[1][0][c];
 721               else if (c >= '(' && c <= '/')
 722                 {
 723                   if (src >= src_end)
 724                     break;
 725                   c1 = *src++;
 726                   if (c1 < ' ' || c1 >= 0x80
 727                       || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
 728                     /* Invalid designation sequence.  Just ignore.  */
 729                     break;
 730                   reg[(c - '(') % 4] = charset;
 731                 }
 732               else
 733                 /* Invalid designation sequence.  Just ignore.  */
 734                 break;
 735             }
 736           else if (c == 'N' || c == 'O')
 737             {
 738               /* ESC <Fe> for SS2 or SS3.  */
 739               mask &= CODING_CATEGORY_MASK_ISO_7_ELSE;
 740               break;
 741             }
 742           else if (c == '0' || c == '1' || c == '2')
 743             /* ESC <Fp> for start/end composition.  Just ignore.  */
 744             break;
 745           else
 746             /* Invalid escape sequence.  Just ignore.  */
 747             break;
 748
 749           /* We found a valid designation sequence for CHARSET.  */
 750           mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
 751           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset))
 752             mask_found |= CODING_CATEGORY_MASK_ISO_7;
 753           else
 754             mask &= ~CODING_CATEGORY_MASK_ISO_7;
 755           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset))
 756             mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
 757           else
 758             mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
 759           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset))
 760             mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
 761           else
 762             mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
 763           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset))
 764             mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
 765           else
 766             mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
 767           break;
 768
 769         case ISO_CODE_SO:
 770           single_shifting = 0;
 771           if (shift_out == 0
 772               && (reg[1] >= 0
 773                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
 774                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
 775             {
 776               /* Locking shift out.  */
 777               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 778               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 779             }
 780           break;
 781
 782         case ISO_CODE_SI:
 783           single_shifting = 0;
 784           if (shift_out == 1)
 785             {
 786               /* Locking shift in.  */
 787               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 788               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 789             }
 790           break;
 791
 792         case ISO_CODE_CSI:
 793           single_shifting = 0;
 794         case ISO_CODE_SS2:
 795         case ISO_CODE_SS3:
 796           {
 797             int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
 798
 799             if (c != ISO_CODE_CSI)
 800               {
 801                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 802                     & CODING_FLAG_ISO_SINGLE_SHIFT)
 803                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 804                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 805                     & CODING_FLAG_ISO_SINGLE_SHIFT)
 806                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 807                 single_shifting = 1;
 808               }
 809             if (VECTORP (Vlatin_extra_code_table)
 810                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
 811               {
 812                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 813                     & CODING_FLAG_ISO_LATIN_EXTRA)
 814                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 815                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 816                     & CODING_FLAG_ISO_LATIN_EXTRA)
 817                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 818               }
 819             mask &= newmask;
 820             mask_found |= newmask;
 821           }
 822           break;
 823
 824         default:
 825           if (c < 0x80)
 826             {
 827               single_shifting = 0;
 828               break;
 829             }
 830           else if (c < 0xA0)
 831             {
 832               single_shifting = 0;
 833               if (VECTORP (Vlatin_extra_code_table)
 834                   && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
 835                 {
 836                   int newmask = 0;
 837
 838                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 839                       & CODING_FLAG_ISO_LATIN_EXTRA)
 840                     newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 841                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 842                       & CODING_FLAG_ISO_LATIN_EXTRA)
 843                     newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 844                   mask &= newmask;
 845                   mask_found |= newmask;
 846                 }
 847               else
 848                 return 0;
 849             }
 850           else
 851             {
 852               unsigned char *src_begin = src;
 853
 854               mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
 855                         | CODING_CATEGORY_MASK_ISO_7_ELSE);
 856               mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
 857               /* Check the length of succeeding codes of the range
 858                  0xA0..0FF.  If the byte length is odd, we exclude
 859                  CODING_CATEGORY_MASK_ISO_8_2.  We can check this only
 860                  when we are not single shifting.  */
 861               if (!single_shifting)
 862                 {
 863                   while (src < src_end && *src >= 0xA0)
 864                     src++;
 865                   if ((src - src_begin - 1) & 1 && src < src_end)
 866                     mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
 867                   else
 868                     mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
 869                 }
 870             }
 871           break;
 872         }
 873     }
 874
 875   return (mask & mask_found);
 876 }
 877
 878 /* Decode a character of which charset is CHARSET and the 1st position
 879    code is C1.  If dimension of CHARSET is 2, the 2nd position code is
 880    fetched from SRC and set to C2.  If CHARSET is negative, it means
 881    that we are decoding ill formed text, and what we can do is just to
 882    read C1 as is.  */
 883
 884 #define DECODE_ISO_CHARACTER(charset, c1)                               \
 885   do {                                                                  \
 886     int c_alt, charset_alt = (charset);                                 \
 887     if (COMPOSING_HEAD_P (coding->composing))                           \
 888       {                                                                 \
 889         *dst++ = LEADING_CODE_COMPOSITION;                              \
 890         if (COMPOSING_WITH_RULE_P (coding->composing))                  \
 891           /* To tell composition rules are embeded.  */                 \
 892           *dst++ = 0xFF;                                                \
 893         coding->composing += 2;                                         \
 894       }                                                                 \
 895     if (charset_alt >= 0)                                               \
 896       {                                                                 \
 897         if (CHARSET_DIMENSION (charset_alt) == 2)                       \
 898           {                                                             \
 899             ONE_MORE_BYTE (c2);                                         \
 900             if (iso_code_class[(c2) & 0x7F] != ISO_0x20_or_0x7F         \
 901                 && iso_code_class[(c2) & 0x7F] != ISO_graphic_plane_0)  \
 902               {                                                         \
 903                 src--;                                                  \
 904                 charset_alt = CHARSET_ASCII;                            \
 905               }                                                         \
 906           }                                                             \
 907         if (!NILP (translation_table)                                   \
 908             && ((c_alt = translate_char (translation_table,             \
 909                                          -1, charset_alt, c1, c2)) >= 0)) \
 910           SPLIT_CHAR (c_alt, charset_alt, c1, c2);                      \
 911       }                                                                 \
 912     if (charset_alt == CHARSET_ASCII || charset_alt < 0)                \
 913       DECODE_CHARACTER_ASCII (c1);                                      \
 914     else if (CHARSET_DIMENSION (charset_alt) == 1)                      \
 915       DECODE_CHARACTER_DIMENSION1 (charset_alt, c1);                    \
 916     else                                                                \
 917       DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2);                \
 918     if (COMPOSING_WITH_RULE_P (coding->composing))                      \
 919       /* To tell a composition rule follows.  */                        \
 920       coding->composing = COMPOSING_WITH_RULE_RULE;                     \
 921   } while (0)
 922
 923 /* Set designation state into CODING.  */
 924 #define DECODE_DESIGNATION(reg, dimension, chars, final_char)              \
 925   do {                                                                     \
 926     int charset = ISO_CHARSET_TABLE (make_number (dimension),              \
 927                                      make_number (chars),                  \
 928                                      make_number (final_char));            \
 929     if (charset >= 0                                                       \
 930         && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
 931             || coding->safe_charsets[charset]))                            \
 932       {                                                                    \
 933         if (coding->spec.iso2022.last_invalid_designation_register == 0    \
 934             && reg == 0                                                    \
 935             && charset == CHARSET_ASCII)                                   \
 936           {                                                                \
 937             /* We should insert this designation sequence as is so         \
 938                that it is surely written back to a file.  */               \
 939             coding->spec.iso2022.last_invalid_designation_register = -1;   \
 940             goto label_invalid_code;                                       \
 941           }                                                                \
 942         coding->spec.iso2022.last_invalid_designation_register = -1;       \
 943         if ((coding->mode & CODING_MODE_DIRECTION)                         \
 944             && CHARSET_REVERSE_CHARSET (charset) >= 0)                     \
 945           charset = CHARSET_REVERSE_CHARSET (charset);                     \
 946         CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;               \
 947       }                                                                    \
 948     else                                                                   \
 949       {                                                                    \
 950         coding->spec.iso2022.last_invalid_designation_register = reg;      \
 951         goto label_invalid_code;                                           \
 952       }                                                                    \
 953   } while (0)
 954
 955 /* Check if the current composing sequence contains only valid codes.
 956    If the composing sequence doesn't end before SRC_END, return -1.
 957    Else, if it contains only valid codes, return 0.
 958    Else return the length of the composing sequence.  */
 959
 960 int
 961 check_composing_code (coding, src, src_end)
 962      struct coding_system *coding;
 963      unsigned char *src, *src_end;
 964 {
 965   unsigned char *src_start = src;
 966   int invalid_code_found = 0;
 967   int charset, c, c1, dim;
 968
 969   while (src < src_end)
 970     {
 971       if (*src++ != ISO_CODE_ESC) continue;
 972       if (src >= src_end) break;
 973       if ((c = *src++) == '1') /* end of compsition */
 974         return (invalid_code_found ? src - src_start : 0);
 975       if (src + 2 >= src_end) break;
 976       if (!coding->flags & CODING_FLAG_ISO_DESIGNATION)
 977         invalid_code_found = 1;
 978       else
 979         {
 980           dim = 0;
 981           if (c == '$')
 982             {
 983               dim = 1;
 984               c = (*src >= '@' && *src <= 'B') ? '(' : *src++;
 985             }
 986           if (c >= '(' && c <= '/')
 987             {
 988               c1 = *src++;
 989               if ((c1 < ' ' || c1 >= 0x80)
 990                   || (charset = iso_charset_table[dim][c >= ','][c1]) < 0
 991                   || ! coding->safe_charsets[charset]
 992                   || (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
 993                       == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
 994                 invalid_code_found = 1;
 995             }
 996           else
 997             invalid_code_found = 1;
 998         }
 999     }
1000   return (invalid_code_found
1001           ? src - src_start
1002           : (coding->mode & CODING_MODE_LAST_BLOCK ? 0 : -1));
1003 }
1004
1005 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1006
1007 int
1008 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1009      struct coding_system *coding;
1010      unsigned char *source, *destination;
1011      int src_bytes, dst_bytes;
1012 {
1013   unsigned char *src = source;
1014   unsigned char *src_end = source + src_bytes;
1015   unsigned char *dst = destination;
1016   unsigned char *dst_end = destination + dst_bytes;
1017   /* Since the maximum bytes produced by each loop is 7, we subtract 6
1018      from DST_END to assure that overflow checking is necessary only
1019      at the head of loop.  */
1020   unsigned char *adjusted_dst_end = dst_end - 6;
1021   int charset;
1022   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
1023   int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1024   int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1025   Lisp_Object translation_table
1026     = coding->translation_table_for_decode;
1027   int result = CODING_FINISH_NORMAL;
1028
1029   if (!NILP (Venable_character_translation) && NILP (translation_table))
1030     translation_table = Vstandard_translation_table_for_decode;
1031
1032   coding->produced_char = 0;
1033   coding->fake_multibyte = 0;
1034   while (src < src_end && (dst_bytes
1035                            ? (dst < adjusted_dst_end)
1036                            : (dst < src - 6)))
1037     {
1038       /* SRC_BASE remembers the start position in source in each loop.
1039          The loop will be exited when there's not enough source text
1040          to analyze long escape sequence or 2-byte code (within macros
1041          ONE_MORE_BYTE or TWO_MORE_BYTES).  In that case, SRC is reset
1042          to SRC_BASE before exiting.  */
1043       unsigned char *src_base = src;
1044       int c1 = *src++, c2;
1045
1046       switch (iso_code_class [c1])
1047         {
1048         case ISO_0x20_or_0x7F:
1049           if (!coding->composing
1050               && (charset0 < 0 || CHARSET_CHARS (charset0) == 94))
1051             {
1052               /* This is SPACE or DEL.  */
1053               *dst++ = c1;
1054               coding->produced_char++;
1055               break;
1056             }
1057           /* This is a graphic character, we fall down ...  */
1058
1059         case ISO_graphic_plane_0:
1060           if (coding->composing == COMPOSING_WITH_RULE_RULE)
1061             {
1062               /* This is a composition rule.  */
1063               *dst++ = c1 | 0x80;
1064               coding->composing = COMPOSING_WITH_RULE_TAIL;
1065             }
1066           else
1067             DECODE_ISO_CHARACTER (charset0, c1);
1068           break;
1069
1070         case ISO_0xA0_or_0xFF:
1071           if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1072               || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1073             goto label_invalid_code;
1074           /* This is a graphic character, we fall down ... */
1075
1076         case ISO_graphic_plane_1:
1077           if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1078             goto label_invalid_code;
1079           else
1080             DECODE_ISO_CHARACTER (charset1, c1);
1081           break;
1082
1083         case ISO_control_code:
1084           /* All ISO2022 control characters in this class have the
1085              same representation in Emacs internal format.  */
1086           if (c1 == '\n'
1087               && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1088               && (coding->eol_type == CODING_EOL_CR
1089                   || coding->eol_type == CODING_EOL_CRLF))
1090             {
1091               result = CODING_FINISH_INCONSISTENT_EOL;
1092               goto label_end_of_loop_2;
1093             }
1094           *dst++ = c1;
1095           coding->produced_char++;
1096           break;
1097
1098         case ISO_carriage_return:
1099           if (coding->eol_type == CODING_EOL_CR)
1100             *dst++ = '\n';
1101           else if (coding->eol_type == CODING_EOL_CRLF)
1102             {
1103               ONE_MORE_BYTE (c1);
1104               if (c1 == ISO_CODE_LF)
1105                 *dst++ = '\n';
1106               else
1107                 {
1108                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1109                     {
1110                       result = CODING_FINISH_INCONSISTENT_EOL;
1111                       goto label_end_of_loop_2;
1112                     }
1113                   src--;
1114                   *dst++ = '\r';
1115                 }
1116             }
1117           else
1118             *dst++ = c1;
1119           coding->produced_char++;
1120           break;
1121
1122         case ISO_shift_out:
1123           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1124               || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1125             goto label_invalid_code;
1126           CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1127           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1128           break;
1129
1130         case ISO_shift_in:
1131           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1132             goto label_invalid_code;
1133           CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1134           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1135           break;
1136
1137         case ISO_single_shift_2_7:
1138         case ISO_single_shift_2:
1139           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1140             goto label_invalid_code;
1141           /* SS2 is handled as an escape sequence of ESC 'N' */
1142           c1 = 'N';
1143           goto label_escape_sequence;
1144
1145         case ISO_single_shift_3:
1146           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1147             goto label_invalid_code;
1148           /* SS2 is handled as an escape sequence of ESC 'O' */
1149           c1 = 'O';
1150           goto label_escape_sequence;
1151
1152         case ISO_control_sequence_introducer:
1153           /* CSI is handled as an escape sequence of ESC '[' ...  */
1154           c1 = '[';
1155           goto label_escape_sequence;
1156
1157         case ISO_escape:
1158           ONE_MORE_BYTE (c1);
1159         label_escape_sequence:
1160           /* Escape sequences handled by Emacs are invocation,
1161              designation, direction specification, and character
1162              composition specification.  */
1163           switch (c1)
1164             {
1165             case '&':           /* revision of following character set */
1166               ONE_MORE_BYTE (c1);
1167               if (!(c1 >= '@' && c1 <= '~'))
1168                 goto label_invalid_code;
1169               ONE_MORE_BYTE (c1);
1170               if (c1 != ISO_CODE_ESC)
1171                 goto label_invalid_code;
1172               ONE_MORE_BYTE (c1);
1173               goto label_escape_sequence;
1174
1175             case '$':           /* designation of 2-byte character set */
1176               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1177                 goto label_invalid_code;
1178               ONE_MORE_BYTE (c1);
1179               if (c1 >= '@' && c1 <= 'B')
1180                 {       /* designation of JISX0208.1978, GB2312.1980,
1181                                    or JISX0208.1980 */
1182                   DECODE_DESIGNATION (0, 2, 94, c1);
1183                 }
1184               else if (c1 >= 0x28 && c1 <= 0x2B)
1185                 {       /* designation of DIMENSION2_CHARS94 character set */
1186                   ONE_MORE_BYTE (c2);
1187                   DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
1188                 }
1189               else if (c1 >= 0x2C && c1 <= 0x2F)
1190                 {       /* designation of DIMENSION2_CHARS96 character set */
1191                   ONE_MORE_BYTE (c2);
1192                   DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
1193                 }
1194               else
1195                 goto label_invalid_code;
1196               break;
1197
1198             case 'n':           /* invocation of locking-shift-2 */
1199               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1200                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1201                 goto label_invalid_code;
1202               CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
1203               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1204               break;
1205
1206             case 'o':           /* invocation of locking-shift-3 */
1207               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1208                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1209                 goto label_invalid_code;
1210               CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
1211               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1212               break;
1213
1214             case 'N':           /* invocation of single-shift-2 */
1215               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1216                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1217                 goto label_invalid_code;
1218               ONE_MORE_BYTE (c1);
1219               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
1220               DECODE_ISO_CHARACTER (charset, c1);
1221               break;
1222
1223             case 'O':           /* invocation of single-shift-3 */
1224               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1225                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1226                 goto label_invalid_code;
1227               ONE_MORE_BYTE (c1);
1228               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
1229               DECODE_ISO_CHARACTER (charset, c1);
1230               break;
1231
1232             case '0': case '2': /* start composing */
1233               /* Before processing composing, we must be sure that all
1234                  characters being composed are supported by CODING.
1235                  If not, we must give up composing and insert the
1236                  bunch of codes for composing as is without decoding.  */
1237               {
1238                 int result1;
1239
1240                 result1 = check_composing_code (coding, src, src_end);
1241                 if (result1 == 0)
1242                   {
1243                     coding->composing = (c1 == '0'
1244                                          ? COMPOSING_NO_RULE_HEAD
1245                                          : COMPOSING_WITH_RULE_HEAD);
1246                     coding->produced_char++;
1247                   }
1248                 else if (result1 > 0)
1249                   {
1250                     if (result1 + 2 < (dst_bytes ? dst_end : src_base) - dst)
1251                       {
1252                         bcopy (src_base, dst, result1 + 2);
1253                         src += result1;
1254                         dst += result1 + 2;
1255                         coding->produced_char += result1 + 2;
1256                       }
1257                     else
1258                       {
1259                         result = CODING_FINISH_INSUFFICIENT_DST;
1260                         goto label_end_of_loop_2;
1261                       }
1262                   }
1263                 else
1264                   goto label_end_of_loop;
1265               }
1266               break;
1267
1268             case '1':           /* end composing */
1269               coding->composing = COMPOSING_NO;
1270               break;
1271
1272             case '[':           /* specification of direction */
1273               if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
1274                 goto label_invalid_code;
1275               /* For the moment, nested direction is not supported.
1276                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
1277                  left-to-right, and nozero means right-to-left.  */
1278               ONE_MORE_BYTE (c1);
1279               switch (c1)
1280                 {
1281                 case ']':       /* end of the current direction */
1282                   coding->mode &= ~CODING_MODE_DIRECTION;
1283
1284                 case '0':       /* end of the current direction */
1285                 case '1':       /* start of left-to-right direction */
1286                   ONE_MORE_BYTE (c1);
1287                   if (c1 == ']')
1288                     coding->mode &= ~CODING_MODE_DIRECTION;
1289                   else
1290                     goto label_invalid_code;
1291                   break;
1292
1293                 case '2':       /* start of right-to-left direction */
1294                   ONE_MORE_BYTE (c1);
1295                   if (c1 == ']')
1296                     coding->mode |= CODING_MODE_DIRECTION;
1297                   else
1298                     goto label_invalid_code;
1299                   break;
1300
1301                 default:
1302                   goto label_invalid_code;
1303                 }
1304               break;
1305
1306             default:
1307               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1308                 goto label_invalid_code;
1309               if (c1 >= 0x28 && c1 <= 0x2B)
1310                 {       /* designation of DIMENSION1_CHARS94 character set */
1311                   ONE_MORE_BYTE (c2);
1312                   DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
1313                 }
1314               else if (c1 >= 0x2C && c1 <= 0x2F)
1315                 {       /* designation of DIMENSION1_CHARS96 character set */
1316                   ONE_MORE_BYTE (c2);
1317                   DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
1318                 }
1319               else
1320                 {
1321                   goto label_invalid_code;
1322                 }
1323             }
1324           /* We must update these variables now.  */
1325           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1326           charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1327           break;
1328
1329         label_invalid_code:
1330           while (src_base < src)
1331             *dst++ = *src_base++;
1332           coding->fake_multibyte = 1;
1333         }
1334       continue;
1335
1336     label_end_of_loop:
1337       result = CODING_FINISH_INSUFFICIENT_SRC;
1338     label_end_of_loop_2:
1339       src = src_base;
1340       break;
1341     }
1342
1343   if (src < src_end)
1344     {
1345       if (result == CODING_FINISH_NORMAL)
1346         result = CODING_FINISH_INSUFFICIENT_DST;
1347       else if (result != CODING_FINISH_INCONSISTENT_EOL
1348                && coding->mode & CODING_MODE_LAST_BLOCK)
1349         {
1350           /* This is the last block of the text to be decoded.  We had
1351              better just flush out all remaining codes in the text
1352              although they are not valid characters.  */
1353           src_bytes = src_end - src;
1354           if (dst_bytes && (dst_end - dst < src_bytes))
1355             src_bytes = dst_end - dst;
1356           bcopy (src, dst, src_bytes);
1357           dst += src_bytes;
1358           src += src_bytes;
1359           coding->fake_multibyte = 1;
1360         }
1361     }
1362
1363   coding->consumed = coding->consumed_char = src - source;
1364   coding->produced = dst - destination;
1365   return result;
1366 }
1367
1368 /* ISO2022 encoding stuff.  */
1369
1370 /*
1371    It is not enough to say just "ISO2022" on encoding, we have to
1372    specify more details.  In Emacs, each coding system of ISO2022
1373    variant has the following specifications:
1374         1. Initial designation to G0 thru G3.
1375         2. Allows short-form designation?
1376         3. ASCII should be designated to G0 before control characters?
1377         4. ASCII should be designated to G0 at end of line?
1378         5. 7-bit environment or 8-bit environment?
1379         6. Use locking-shift?
1380         7. Use Single-shift?
1381    And the following two are only for Japanese:
1382         8. Use ASCII in place of JIS0201-1976-Roman?
1383         9. Use JISX0208-1983 in place of JISX0208-1978?
1384    These specifications are encoded in `coding->flags' as flag bits
1385    defined by macros CODING_FLAG_ISO_XXX.  See `coding.h' for more
1386    details.
1387 */
1388
1389 /* Produce codes (escape sequence) for designating CHARSET to graphic
1390    register REG.  If <final-char> of CHARSET is '@', 'A', or 'B' and
1391    the coding system CODING allows, produce designation sequence of
1392    short-form.  */
1393
1394 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
1395   do {                                                                  \
1396     unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset);        \
1397     char *intermediate_char_94 = "()*+";                                \
1398     char *intermediate_char_96 = ",-./";                                \
1399     int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset);    \
1400     if (revision < 255)                                                 \
1401       {                                                                 \
1402         *dst++ = ISO_CODE_ESC;                                          \
1403         *dst++ = '&';                                                   \
1404         *dst++ = '@' + revision;                                        \
1405       }                                                                 \
1406     *dst++ = ISO_CODE_ESC;                                              \
1407     if (CHARSET_DIMENSION (charset) == 1)                               \
1408       {                                                                 \
1409         if (CHARSET_CHARS (charset) == 94)                              \
1410           *dst++ = (unsigned char) (intermediate_char_94[reg]);         \
1411         else                                                            \
1412           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1413       }                                                                 \
1414     else                                                                \
1415       {                                                                 \
1416         *dst++ = '$';                                                   \
1417         if (CHARSET_CHARS (charset) == 94)                              \
1418           {                                                             \
1419             if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM)          \
1420                 || reg != 0                                             \
1421                 || final_char < '@' || final_char > 'B')                \
1422               *dst++ = (unsigned char) (intermediate_char_94[reg]);     \
1423           }                                                             \
1424         else                                                            \
1425           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1426       }                                                                 \
1427     *dst++ = final_char;                                                \
1428     CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;                \
1429   } while (0)
1430
1431 /* The following two macros produce codes (control character or escape
1432    sequence) for ISO2022 single-shift functions (single-shift-2 and
1433    single-shift-3).  */
1434
1435 #define ENCODE_SINGLE_SHIFT_2                           \
1436   do {                                                  \
1437     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1438       *dst++ = ISO_CODE_ESC, *dst++ = 'N';              \
1439     else                                                \
1440       {                                                 \
1441         *dst++ = ISO_CODE_SS2;                          \
1442         coding->fake_multibyte = 1;                     \
1443       }                                                 \
1444     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1445   } while (0)
1446
1447 #define ENCODE_SINGLE_SHIFT_3                           \
1448   do {                                                  \
1449     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1450       *dst++ = ISO_CODE_ESC, *dst++ = 'O';              \
1451     else                                                \
1452       {                                                 \
1453         *dst++ = ISO_CODE_SS3;                          \
1454         coding->fake_multibyte = 1;                     \
1455       }                                                 \
1456     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1457   } while (0)
1458
1459 /* The following four macros produce codes (control character or
1460    escape sequence) for ISO2022 locking-shift functions (shift-in,
1461    shift-out, locking-shift-2, and locking-shift-3).  */
1462
1463 #define ENCODE_SHIFT_IN                         \
1464   do {                                          \
1465     *dst++ = ISO_CODE_SI;                       \
1466     CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
1467   } while (0)
1468
1469 #define ENCODE_SHIFT_OUT                        \
1470   do {                                          \
1471     *dst++ = ISO_CODE_SO;                       \
1472     CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
1473   } while (0)
1474
1475 #define ENCODE_LOCKING_SHIFT_2                  \
1476   do {                                          \
1477     *dst++ = ISO_CODE_ESC, *dst++ = 'n';        \
1478     CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
1479   } while (0)
1480
1481 #define ENCODE_LOCKING_SHIFT_3                  \
1482   do {                                          \
1483     *dst++ = ISO_CODE_ESC, *dst++ = 'o';        \
1484     CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
1485   } while (0)
1486
1487 /* Produce codes for a DIMENSION1 character whose character set is
1488    CHARSET and whose position-code is C1.  Designation and invocation
1489    sequences are also produced in advance if necessary.  */
1490
1491
1492 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
1493   do {                                                                  \
1494     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1495       {                                                                 \
1496         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1497           *dst++ = c1 & 0x7F;                                           \
1498         else                                                            \
1499           *dst++ = c1 | 0x80;                                           \
1500         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1501         break;                                                          \
1502       }                                                                 \
1503     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1504       {                                                                 \
1505         *dst++ = c1 & 0x7F;                                             \
1506         break;                                                          \
1507       }                                                                 \
1508     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1509       {                                                                 \
1510         *dst++ = c1 | 0x80;                                             \
1511         break;                                                          \
1512       }                                                                 \
1513     else if (coding->flags & CODING_FLAG_ISO_SAFE                       \
1514              && !coding->safe_charsets[charset])                        \
1515       {                                                                 \
1516         /* We should not encode this character, instead produce one or  \
1517            two `?'s.  */                                                \
1518         *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;                 \
1519         if (CHARSET_WIDTH (charset) == 2)                               \
1520           *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;               \
1521         break;                                                          \
1522       }                                                                 \
1523     else                                                                \
1524       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1525          must invoke it, or, at first, designate it to some graphic     \
1526          register.  Then repeat the loop to actually produce the        \
1527          character.  */                                                 \
1528       dst = encode_invocation_designation (charset, coding, dst);       \
1529   } while (1)
1530
1531 /* Produce codes for a DIMENSION2 character whose character set is
1532    CHARSET and whose position-codes are C1 and C2.  Designation and
1533    invocation codes are also produced in advance if necessary.  */
1534
1535 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
1536   do {                                                                  \
1537     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1538       {                                                                 \
1539         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1540           *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F;                       \
1541         else                                                            \
1542           *dst++ = c1 | 0x80, *dst++ = c2 | 0x80;                       \
1543         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1544         break;                                                          \
1545       }                                                                 \
1546     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1547       {                                                                 \
1548         *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F;                          \
1549         break;                                                          \
1550       }                                                                 \
1551     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1552       {                                                                 \
1553         *dst++ = c1 | 0x80, *dst++= c2 | 0x80;                          \
1554         break;                                                          \
1555       }                                                                 \
1556     else if (coding->flags & CODING_FLAG_ISO_SAFE                       \
1557              && !coding->safe_charsets[charset])                        \
1558       {                                                                 \
1559         /* We should not encode this character, instead produce one or  \
1560            two `?'s.  */                                                \
1561         *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;                 \
1562         if (CHARSET_WIDTH (charset) == 2)                               \
1563           *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;               \
1564         break;                                                          \
1565       }                                                                 \
1566     else                                                                \
1567       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1568          must invoke it, or, at first, designate it to some graphic     \
1569          register.  Then repeat the loop to actually produce the        \
1570          character.  */                                                 \
1571       dst = encode_invocation_designation (charset, coding, dst);       \
1572   } while (1)
1573
1574 #define ENCODE_ISO_CHARACTER(charset, c1, c2)                   \
1575   do {                                                          \
1576     int c_alt, charset_alt;                                     \
1577     if (!NILP (translation_table)                               \
1578         && ((c_alt = translate_char (translation_table, -1,     \
1579                                      charset, c1, c2))          \
1580             >= 0))                                              \
1581       SPLIT_CHAR (c_alt, charset_alt, c1, c2);                  \
1582     else                                                        \
1583       charset_alt = charset;                                    \
1584     if (CHARSET_DIMENSION (charset_alt) == 1)                   \
1585       {                                                         \
1586         if (charset == CHARSET_ASCII                            \
1587             && coding->flags & CODING_FLAG_ISO_USE_ROMAN)       \
1588           charset_alt = charset_latin_jisx0201;                 \
1589         ENCODE_ISO_CHARACTER_DIMENSION1 (charset_alt, c1);      \
1590       }                                                         \
1591     else                                                        \
1592       {                                                         \
1593         if (charset == charset_jisx0208                         \
1594             && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)      \
1595           charset_alt = charset_jisx0208_1978;                  \
1596         ENCODE_ISO_CHARACTER_DIMENSION2 (charset_alt, c1, c2);  \
1597       }                                                         \
1598     if (! COMPOSING_P (coding->composing))                      \
1599       coding->consumed_char++;                                  \
1600   } while (0)
1601
1602 /* Produce designation and invocation codes at a place pointed by DST
1603    to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
1604    Return new DST.  */
1605
1606 unsigned char *
1607 encode_invocation_designation (charset, coding, dst)
1608      int charset;
1609      struct coding_system *coding;
1610      unsigned char *dst;
1611 {
1612   int reg;                      /* graphic register number */
1613
1614   /* At first, check designations.  */
1615   for (reg = 0; reg < 4; reg++)
1616     if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
1617       break;
1618
1619   if (reg >= 4)
1620     {
1621       /* CHARSET is not yet designated to any graphic registers.  */
1622       /* At first check the requested designation.  */
1623       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1624       if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
1625         /* Since CHARSET requests no special designation, designate it
1626            to graphic register 0.  */
1627         reg = 0;
1628
1629       ENCODE_DESIGNATION (charset, reg, coding);
1630     }
1631
1632   if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
1633       && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
1634     {
1635       /* Since the graphic register REG is not invoked to any graphic
1636          planes, invoke it to graphic plane 0.  */
1637       switch (reg)
1638         {
1639         case 0:                 /* graphic register 0 */
1640           ENCODE_SHIFT_IN;
1641           break;
1642
1643         case 1:                 /* graphic register 1 */
1644           ENCODE_SHIFT_OUT;
1645           break;
1646
1647         case 2:                 /* graphic register 2 */
1648           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1649             ENCODE_SINGLE_SHIFT_2;
1650           else
1651             ENCODE_LOCKING_SHIFT_2;
1652           break;
1653
1654         case 3:                 /* graphic register 3 */
1655           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1656             ENCODE_SINGLE_SHIFT_3;
1657           else
1658             ENCODE_LOCKING_SHIFT_3;
1659           break;
1660         }
1661     }
1662   return dst;
1663 }
1664
1665 /* The following two macros produce codes for indicating composition.  */
1666 #define ENCODE_COMPOSITION_NO_RULE_START  *dst++ = ISO_CODE_ESC, *dst++ = '0'
1667 #define ENCODE_COMPOSITION_WITH_RULE_START  *dst++ = ISO_CODE_ESC, *dst++ = '2'
1668 #define ENCODE_COMPOSITION_END    *dst++ = ISO_CODE_ESC, *dst++ = '1'
1669
1670 /* The following three macros produce codes for indicating direction
1671    of text.  */
1672 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER              \
1673   do {                                                  \
1674     if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS)    \
1675       *dst++ = ISO_CODE_ESC, *dst++ = '[';              \
1676     else                                                \
1677       *dst++ = ISO_CODE_CSI;                            \
1678   } while (0)
1679
1680 #define ENCODE_DIRECTION_R2L    \
1681   ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '2', *dst++ = ']'
1682
1683 #define ENCODE_DIRECTION_L2R    \
1684   ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '0', *dst++ = ']'
1685
1686 /* Produce codes for designation and invocation to reset the graphic
1687    planes and registers to initial state.  */
1688 #define ENCODE_RESET_PLANE_AND_REGISTER                                     \
1689   do {                                                                      \
1690     int reg;                                                                \
1691     if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                        \
1692       ENCODE_SHIFT_IN;                                                      \
1693     for (reg = 0; reg < 4; reg++)                                           \
1694       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0            \
1695           && (CODING_SPEC_ISO_DESIGNATION (coding, reg)                     \
1696               != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)))        \
1697         ENCODE_DESIGNATION                                                  \
1698           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
1699   } while (0)
1700
1701 /* Produce designation sequences of charsets in the line started from
1702    SRC to a place pointed by *DSTP, and update DSTP.
1703
1704    If the current block ends before any end-of-line, we may fail to
1705    find all the necessary designations.  */
1706
1707 void
1708 encode_designation_at_bol (coding, table, src, src_end, dstp)
1709      struct coding_system *coding;
1710      Lisp_Object table;
1711      unsigned char *src, *src_end, **dstp;
1712 {
1713   int charset, c, found = 0, reg;
1714   /* Table of charsets to be designated to each graphic register.  */
1715   int r[4];
1716   unsigned char *dst = *dstp;
1717
1718   for (reg = 0; reg < 4; reg++)
1719     r[reg] = -1;
1720
1721   while (src < src_end && *src != '\n' && found < 4)
1722     {
1723       int bytes = BYTES_BY_CHAR_HEAD (*src);
1724
1725       if (NILP (table))
1726         charset = CHARSET_AT (src);
1727       else
1728         {
1729           int c_alt;
1730           unsigned char c1, c2;
1731
1732           SPLIT_STRING(src, bytes, charset, c1, c2);
1733           if ((c_alt = translate_char (table, -1, charset, c1, c2)) >= 0)
1734             charset = CHAR_CHARSET (c_alt);
1735         }
1736
1737       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1738       if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
1739         {
1740           found++;
1741           r[reg] = charset;
1742         }
1743
1744       src += bytes;
1745     }
1746
1747   if (found)
1748     {
1749       for (reg = 0; reg < 4; reg++)
1750         if (r[reg] >= 0
1751             && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
1752           ENCODE_DESIGNATION (r[reg], reg, coding);
1753       *dstp = dst;
1754     }
1755 }
1756
1757 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
1758
1759 int
1760 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1761      struct coding_system *coding;
1762      unsigned char *source, *destination;
1763      int src_bytes, dst_bytes;
1764 {
1765   unsigned char *src = source;
1766   unsigned char *src_end = source + src_bytes;
1767   unsigned char *dst = destination;
1768   unsigned char *dst_end = destination + dst_bytes;
1769   /* Since the maximum bytes produced by each loop is 20, we subtract 19
1770      from DST_END to assure overflow checking is necessary only at the
1771      head of loop.  */
1772   unsigned char *adjusted_dst_end = dst_end - 19;
1773   Lisp_Object translation_table
1774       = coding->translation_table_for_encode;
1775   int result = CODING_FINISH_NORMAL;
1776
1777   if (!NILP (Venable_character_translation) && NILP (translation_table))
1778     translation_table = Vstandard_translation_table_for_encode;
1779
1780   coding->consumed_char = 0;
1781   coding->fake_multibyte = 0;
1782   while (src < src_end && (dst_bytes
1783                            ? (dst < adjusted_dst_end)
1784                            : (dst < src - 19)))
1785     {
1786       /* SRC_BASE remembers the start position in source in each loop.
1787          The loop will be exited when there's not enough source text
1788          to analyze multi-byte codes (within macros ONE_MORE_BYTE,
1789          TWO_MORE_BYTES, and THREE_MORE_BYTES).  In that case, SRC is
1790          reset to SRC_BASE before exiting.  */
1791       unsigned char *src_base = src;
1792       int charset, c1, c2, c3, c4;
1793
1794       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
1795           && CODING_SPEC_ISO_BOL (coding))
1796         {
1797           /* We have to produce designation sequences if any now.  */
1798           encode_designation_at_bol (coding, translation_table,
1799                                      src, src_end, &dst);
1800           CODING_SPEC_ISO_BOL (coding) = 0;
1801         }
1802
1803       c1 = *src++;
1804       /* If we are seeing a component of a composite character, we are
1805          seeing a leading-code encoded irregularly for composition, or
1806          a composition rule if composing with rule.  We must set C1 to
1807          a normal leading-code or an ASCII code.  If we are not seeing
1808          a composite character, we must reset composition,
1809          designation, and invocation states.  */
1810       if (COMPOSING_P (coding->composing))
1811         {
1812           if (c1 < 0xA0)
1813             {
1814               /* We are not in a composite character any longer.  */
1815               coding->composing = COMPOSING_NO;
1816               ENCODE_RESET_PLANE_AND_REGISTER;
1817               ENCODE_COMPOSITION_END;
1818             }
1819           else
1820             {
1821               if (coding->composing == COMPOSING_WITH_RULE_RULE)
1822                 {
1823                   *dst++ = c1 & 0x7F;
1824                   coding->composing = COMPOSING_WITH_RULE_HEAD;
1825                   continue;
1826                 }
1827               else if (coding->composing == COMPOSING_WITH_RULE_HEAD)
1828                 coding->composing = COMPOSING_WITH_RULE_RULE;
1829               if (c1 == 0xA0)
1830                 {
1831                   /* This is an ASCII component.  */
1832                   ONE_MORE_BYTE (c1);
1833                   c1 &= 0x7F;
1834                 }
1835               else
1836                 /* This is a leading-code of non ASCII component.  */
1837                 c1 -= 0x20;
1838             }
1839         }
1840
1841       /* Now encode one character.  C1 is a control character, an
1842          ASCII character, or a leading-code of multi-byte character.  */
1843       switch (emacs_code_class[c1])
1844         {
1845         case EMACS_ascii_code:
1846           ENCODE_ISO_CHARACTER (CHARSET_ASCII, c1, /* dummy */ c2);
1847           break;
1848
1849         case EMACS_control_code:
1850           if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
1851             ENCODE_RESET_PLANE_AND_REGISTER;
1852           *dst++ = c1;
1853           coding->consumed_char++;
1854           break;
1855
1856         case EMACS_carriage_return_code:
1857           if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
1858             {
1859               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
1860                 ENCODE_RESET_PLANE_AND_REGISTER;
1861               *dst++ = c1;
1862               coding->consumed_char++;
1863               break;
1864             }
1865           /* fall down to treat '\r' as '\n' ...  */
1866
1867         case EMACS_linefeed_code:
1868           if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
1869             ENCODE_RESET_PLANE_AND_REGISTER;
1870           if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
1871             bcopy (coding->spec.iso2022.initial_designation,
1872                    coding->spec.iso2022.current_designation,
1873                    sizeof coding->spec.iso2022.initial_designation);
1874           if (coding->eol_type == CODING_EOL_LF
1875               || coding->eol_type == CODING_EOL_UNDECIDED)
1876             *dst++ = ISO_CODE_LF;
1877           else if (coding->eol_type == CODING_EOL_CRLF)
1878             *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
1879           else
1880             *dst++ = ISO_CODE_CR;
1881           CODING_SPEC_ISO_BOL (coding) = 1;
1882           coding->consumed_char++;
1883           break;
1884
1885         case EMACS_leading_code_2:
1886           ONE_MORE_BYTE (c2);
1887           if (c2 < 0xA0)
1888             {
1889               /* invalid sequence */
1890               *dst++ = c1;
1891               src--;
1892               coding->consumed_char++;
1893             }
1894           else
1895             ENCODE_ISO_CHARACTER (c1, c2, /* dummy */ c3);
1896           break;
1897
1898         case EMACS_leading_code_3:
1899           TWO_MORE_BYTES (c2, c3);
1900           if (c2 < 0xA0 || c3 < 0xA0)
1901             {
1902               /* invalid sequence */
1903               *dst++ = c1;
1904               src -= 2;
1905               coding->consumed_char++;
1906             }
1907           else if (c1 < LEADING_CODE_PRIVATE_11)
1908             ENCODE_ISO_CHARACTER (c1, c2, c3);
1909           else
1910             ENCODE_ISO_CHARACTER (c2, c3, /* dummy */ c4);
1911           break;
1912
1913         case EMACS_leading_code_4:
1914           THREE_MORE_BYTES (c2, c3, c4);
1915           if (c2 < 0xA0 || c3 < 0xA0 || c4 < 0xA0)
1916             {
1917               /* invalid sequence */
1918               *dst++ = c1;
1919               src -= 3;
1920               coding->consumed_char++;
1921             }
1922           else
1923             ENCODE_ISO_CHARACTER (c2, c3, c4);
1924           break;
1925
1926         case EMACS_leading_code_composition:
1927           ONE_MORE_BYTE (c2);
1928           if (c2 < 0xA0)
1929             {
1930               /* invalid sequence */
1931               *dst++ = c1;
1932               src--;
1933               coding->consumed_char++;
1934             }
1935           else if (c2 == 0xFF)
1936             {
1937               ENCODE_RESET_PLANE_AND_REGISTER;
1938               coding->composing = COMPOSING_WITH_RULE_HEAD;
1939               ENCODE_COMPOSITION_WITH_RULE_START;
1940               coding->consumed_char++;
1941             }
1942           else
1943             {
1944               ENCODE_RESET_PLANE_AND_REGISTER;
1945               /* Rewind one byte because it is a character code of
1946                  composition elements.  */
1947               src--;
1948               coding->composing = COMPOSING_NO_RULE_HEAD;
1949               ENCODE_COMPOSITION_NO_RULE_START;
1950               coding->consumed_char++;
1951             }
1952           break;
1953
1954         case EMACS_invalid_code:
1955           *dst++ = c1;
1956           coding->consumed_char++;
1957           break;
1958         }
1959       continue;
1960     label_end_of_loop:
1961       result = CODING_FINISH_INSUFFICIENT_SRC;
1962       src = src_base;
1963       break;
1964     }
1965
1966   if (src < src_end && result == CODING_FINISH_NORMAL)
1967     result = CODING_FINISH_INSUFFICIENT_DST;
1968
1969   /* If this is the last block of the text to be encoded, we must
1970      reset graphic planes and registers to the initial state, and
1971      flush out the carryover if any.  */
1972   if (coding->mode & CODING_MODE_LAST_BLOCK)
1973     {
1974       ENCODE_RESET_PLANE_AND_REGISTER;
1975       if (COMPOSING_P (coding->composing))
1976         ENCODE_COMPOSITION_END;
1977     }
1978   coding->consumed = src - source;
1979   coding->produced = coding->produced_char = dst - destination;
1980   return result;
1981 }
1982
1983 \f
1984 /*** 4. SJIS and BIG5 handlers ***/
1985
1986 /* Although SJIS and BIG5 are not ISO's coding system, they are used
1987    quite widely.  So, for the moment, Emacs supports them in the bare
1988    C code.  But, in the future, they may be supported only by CCL.  */
1989
1990 /* SJIS is a coding system encoding three character sets: ASCII, right
1991    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
1992    as is.  A character of charset katakana-jisx0201 is encoded by
1993    "position-code + 0x80".  A character of charset japanese-jisx0208
1994    is encoded in 2-byte but two position-codes are divided and shifted
1995    so that it fit in the range below.
1996
1997    --- CODE RANGE of SJIS ---
1998    (character set)      (range)
1999    ASCII                0x00 .. 0x7F
2000    KATAKANA-JISX0201    0xA0 .. 0xDF
2001    JISX0208 (1st byte)  0x80 .. 0x9F and 0xE0 .. 0xEF
2002             (2nd byte)  0x40 .. 0xFF
2003    -------------------------------
2004
2005 */
2006
2007 /* BIG5 is a coding system encoding two character sets: ASCII and
2008    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
2009    character set and is encoded in two-byte.
2010
2011    --- CODE RANGE of BIG5 ---
2012    (character set)      (range)
2013    ASCII                0x00 .. 0x7F
2014    Big5 (1st byte)      0xA1 .. 0xFE
2015         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
2016    --------------------------
2017
2018    Since the number of characters in Big5 is larger than maximum
2019    characters in Emacs' charset (96x96), it can't be handled as one
2020    charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
2021    and `charset-big5-2'.  Both are DIMENSION2 and CHARS94.  The former
2022    contains frequently used characters and the latter contains less
2023    frequently used characters.  */
2024
2025 /* Macros to decode or encode a character of Big5 in BIG5.  B1 and B2
2026    are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2027    C1 and C2 are the 1st and 2nd position-codes of of Emacs' internal
2028    format.  CHARSET is `charset_big5_1' or `charset_big5_2'.  */
2029
2030 /* Number of Big5 characters which have the same code in 1st byte.  */
2031 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2032
2033 #define DECODE_BIG5(b1, b2, charset, c1, c2)                            \
2034   do {                                                                  \
2035     unsigned int temp                                                   \
2036       = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62);   \
2037     if (b1 < 0xC9)                                                      \
2038       charset = charset_big5_1;                                         \
2039     else                                                                \
2040       {                                                                 \
2041         charset = charset_big5_2;                                       \
2042         temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW;                          \
2043       }                                                                 \
2044     c1 = temp / (0xFF - 0xA1) + 0x21;                                   \
2045     c2 = temp % (0xFF - 0xA1) + 0x21;                                   \
2046   } while (0)
2047
2048 #define ENCODE_BIG5(charset, c1, c2, b1, b2)                            \
2049   do {                                                                  \
2050     unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21);      \
2051     if (charset == charset_big5_2)                                      \
2052       temp += BIG5_SAME_ROW * (0xC9 - 0xA1);                            \
2053     b1 = temp / BIG5_SAME_ROW + 0xA1;                                   \
2054     b2 = temp % BIG5_SAME_ROW;                                          \
2055     b2 += b2 < 0x3F ? 0x40 : 0x62;                                      \
2056   } while (0)
2057
2058 #define DECODE_SJIS_BIG5_CHARACTER(charset, c1, c2)                     \
2059   do {                                                                  \
2060     int c_alt, charset_alt = (charset);                                 \
2061     if (!NILP (translation_table)                                       \
2062         && ((c_alt = translate_char (translation_table,                 \
2063                                      -1, (charset), c1, c2)) >= 0))     \
2064           SPLIT_CHAR (c_alt, charset_alt, c1, c2);                      \
2065     if (charset_alt == CHARSET_ASCII || charset_alt < 0)                \
2066       DECODE_CHARACTER_ASCII (c1);                                      \
2067     else if (CHARSET_DIMENSION (charset_alt) == 1)                      \
2068       DECODE_CHARACTER_DIMENSION1 (charset_alt, c1);                    \
2069     else                                                                \
2070       DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2);                \
2071   } while (0)
2072
2073 #define ENCODE_SJIS_BIG5_CHARACTER(charset, c1, c2)             \
2074   do {                                                          \
2075     int c_alt, charset_alt;                                     \
2076     if (!NILP (translation_table)                               \
2077         && ((c_alt = translate_char (translation_table, -1,     \
2078                                      charset, c1, c2))          \
2079             >= 0))                                              \
2080       SPLIT_CHAR (c_alt, charset_alt, c1, c2);                  \
2081     else                                                        \
2082       charset_alt = charset;                                    \
2083     if (charset_alt == charset_ascii)                           \
2084       *dst++ = c1;                                              \
2085     else if (CHARSET_DIMENSION (charset_alt) == 1)              \
2086       {                                                         \
2087         if (sjis_p && charset_alt == charset_katakana_jisx0201) \
2088           *dst++ = c1;                                          \
2089         else                                                    \
2090           {                                                     \
2091             *dst++ = charset_alt, *dst++ = c1;                  \
2092             coding->fake_multibyte = 1;                         \
2093           }                                                     \
2094       }                                                         \
2095     else                                                        \
2096       {                                                         \
2097         c1 &= 0x7F, c2 &= 0x7F;                                 \
2098         if (sjis_p && charset_alt == charset_jisx0208)          \
2099           {                                                     \
2100             unsigned char s1, s2;                               \
2101                                                                 \
2102             ENCODE_SJIS (c1, c2, s1, s2);                       \
2103             *dst++ = s1, *dst++ = s2;                           \
2104             coding->fake_multibyte = 1;                         \
2105           }                                                     \
2106         else if (!sjis_p                                        \
2107                  && (charset_alt == charset_big5_1              \
2108                      || charset_alt == charset_big5_2))         \
2109           {                                                     \
2110             unsigned char b1, b2;                               \
2111                                                                 \
2112             ENCODE_BIG5 (charset_alt, c1, c2, b1, b2);          \
2113             *dst++ = b1, *dst++ = b2;                           \
2114           }                                                     \
2115         else                                                    \
2116           {                                                     \
2117             *dst++ = charset_alt, *dst++ = c1, *dst++ = c2;     \
2118             coding->fake_multibyte = 1;                         \
2119           }                                                     \
2120       }                                                         \
2121     coding->consumed_char++;                                    \
2122   } while (0);
2123
2124 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2125    Check if a text is encoded in SJIS.  If it is, return
2126    CODING_CATEGORY_MASK_SJIS, else return 0.  */
2127
2128 int
2129 detect_coding_sjis (src, src_end)
2130      unsigned char *src, *src_end;
2131 {
2132   unsigned char c;
2133
2134   while (src < src_end)
2135     {
2136       c = *src++;
2137       if ((c >= 0x80 && c < 0xA0) || c >= 0xE0)
2138         {
2139           if (src < src_end && *src++ < 0x40)
2140             return 0;
2141         }
2142     }
2143   return CODING_CATEGORY_MASK_SJIS;
2144 }
2145
2146 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2147    Check if a text is encoded in BIG5.  If it is, return
2148    CODING_CATEGORY_MASK_BIG5, else return 0.  */
2149
2150 int
2151 detect_coding_big5 (src, src_end)
2152      unsigned char *src, *src_end;
2153 {
2154   unsigned char c;
2155
2156   while (src < src_end)
2157     {
2158       c = *src++;
2159       if (c >= 0xA1)
2160         {
2161           if (src >= src_end)
2162             break;
2163           c = *src++;
2164           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
2165             return 0;
2166         }
2167     }
2168   return CODING_CATEGORY_MASK_BIG5;
2169 }
2170
2171 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2172    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
2173
2174 int
2175 decode_coding_sjis_big5 (coding, source, destination,
2176                          src_bytes, dst_bytes, sjis_p)
2177      struct coding_system *coding;
2178      unsigned char *source, *destination;
2179      int src_bytes, dst_bytes;
2180      int sjis_p;
2181 {
2182   unsigned char *src = source;
2183   unsigned char *src_end = source + src_bytes;
2184   unsigned char *dst = destination;
2185   unsigned char *dst_end = destination + dst_bytes;
2186   /* Since the maximum bytes produced by each loop is 4, we subtract 3
2187      from DST_END to assure overflow checking is necessary only at the
2188      head of loop.  */
2189   unsigned char *adjusted_dst_end = dst_end - 3;
2190   Lisp_Object translation_table
2191       = coding->translation_table_for_decode;
2192   int result = CODING_FINISH_NORMAL;
2193
2194   if (!NILP (Venable_character_translation) && NILP (translation_table))
2195     translation_table = Vstandard_translation_table_for_decode;
2196
2197   coding->produced_char = 0;
2198   coding->fake_multibyte = 0;
2199   while (src < src_end && (dst_bytes
2200                            ? (dst < adjusted_dst_end)
2201                            : (dst < src - 3)))
2202     {
2203       /* SRC_BASE remembers the start position in source in each loop.
2204          The loop will be exited when there's not enough source text
2205          to analyze two-byte character (within macro ONE_MORE_BYTE).
2206          In that case, SRC is reset to SRC_BASE before exiting.  */
2207       unsigned char *src_base = src;
2208       unsigned char c1 = *src++, c2, c3, c4;
2209
2210       if (c1 < 0x20)
2211         {
2212           if (c1 == '\r')
2213             {
2214               if (coding->eol_type == CODING_EOL_CRLF)
2215                 {
2216                   ONE_MORE_BYTE (c2);
2217                   if (c2 == '\n')
2218                     *dst++ = c2;
2219                   else if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2220                     {
2221                       result = CODING_FINISH_INCONSISTENT_EOL;
2222                       goto label_end_of_loop_2;
2223                     }
2224                   else
2225                     /* To process C2 again, SRC is subtracted by 1.  */
2226                     *dst++ = c1, src--;
2227                 }
2228               else if (coding->eol_type == CODING_EOL_CR)
2229                 *dst++ = '\n';
2230               else
2231                 *dst++ = c1;
2232             }
2233           else if (c1 == '\n'
2234                    && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2235                    && (coding->eol_type == CODING_EOL_CR
2236                        || coding->eol_type == CODING_EOL_CRLF))
2237             {
2238               result = CODING_FINISH_INCONSISTENT_EOL;
2239               goto label_end_of_loop_2;
2240             }
2241           else
2242             *dst++ = c1;
2243           coding->produced_char++;
2244         }
2245       else if (c1 < 0x80)
2246         DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
2247       else
2248         {
2249           if (sjis_p)
2250             {
2251               if (c1 < 0xA0 || (c1 >= 0xE0 && c1 < 0xF0))
2252                 {
2253                   /* SJIS -> JISX0208 */
2254                   ONE_MORE_BYTE (c2);
2255                   if (c2 >= 0x40)
2256                     {
2257                       DECODE_SJIS (c1, c2, c3, c4);
2258                       DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
2259                     }
2260                   else
2261                     goto label_invalid_code_2;
2262                 }
2263               else if (c1 < 0xE0)
2264                 /* SJIS -> JISX0201-Kana */
2265                 DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1,
2266                                             /* dummy */ c2);
2267               else
2268                 goto label_invalid_code_1;
2269             }
2270           else
2271             {
2272               /* BIG5 -> Big5 */
2273               if (c1 >= 0xA1 && c1 <= 0xFE)
2274                 {
2275                   ONE_MORE_BYTE (c2);
2276                   if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE))
2277                     {
2278                       int charset;
2279
2280                       DECODE_BIG5 (c1, c2, charset, c3, c4);
2281                       DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
2282                     }
2283                   else
2284                     goto label_invalid_code_2;
2285                 }
2286               else
2287                 goto label_invalid_code_1;
2288             }
2289         }
2290       continue;
2291
2292     label_invalid_code_1:
2293       *dst++ = c1;
2294       coding->produced_char++;
2295       coding->fake_multibyte = 1;
2296       continue;
2297
2298     label_invalid_code_2:
2299       *dst++ = c1; *dst++= c2;
2300       coding->produced_char += 2;
2301       coding->fake_multibyte = 1;
2302       continue;
2303
2304     label_end_of_loop:
2305       result = CODING_FINISH_INSUFFICIENT_SRC;
2306     label_end_of_loop_2:
2307       src = src_base;
2308       break;
2309     }
2310
2311   if (src < src_end)
2312     {
2313       if (result == CODING_FINISH_NORMAL)
2314         result = CODING_FINISH_INSUFFICIENT_DST;
2315       else if (result != CODING_FINISH_INCONSISTENT_EOL
2316                && coding->mode & CODING_MODE_LAST_BLOCK)
2317         {
2318           src_bytes = src_end - src;
2319           if (dst_bytes && (dst_end - dst < src_bytes))
2320             src_bytes = dst_end - dst;
2321           bcopy (dst, src, src_bytes);
2322           src += src_bytes;
2323           dst += src_bytes;
2324           coding->fake_multibyte = 1;
2325         }
2326     }
2327
2328   coding->consumed = coding->consumed_char = src - source;
2329   coding->produced = dst - destination;
2330   return result;
2331 }
2332
2333 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
2334    This function can encode `charset_ascii', `charset_katakana_jisx0201',
2335    `charset_jisx0208', `charset_big5_1', and `charset_big5-2'.  We are
2336    sure that all these charsets are registered as official charset
2337    (i.e. do not have extended leading-codes).  Characters of other
2338    charsets are produced without any encoding.  If SJIS_P is 1, encode
2339    SJIS text, else encode BIG5 text.  */
2340
2341 int
2342 encode_coding_sjis_big5 (coding, source, destination,
2343                          src_bytes, dst_bytes, sjis_p)
2344      struct coding_system *coding;
2345      unsigned char *source, *destination;
2346      int src_bytes, dst_bytes;
2347      int sjis_p;
2348 {
2349   unsigned char *src = source;
2350   unsigned char *src_end = source + src_bytes;
2351   unsigned char *dst = destination;
2352   unsigned char *dst_end = destination + dst_bytes;
2353   /* Since the maximum bytes produced by each loop is 2, we subtract 1
2354      from DST_END to assure overflow checking is necessary only at the
2355      head of loop.  */
2356   unsigned char *adjusted_dst_end = dst_end - 1;
2357   Lisp_Object translation_table
2358       = coding->translation_table_for_encode;
2359   int result = CODING_FINISH_NORMAL;
2360
2361   if (!NILP (Venable_character_translation) && NILP (translation_table))
2362     translation_table = Vstandard_translation_table_for_encode;
2363
2364   coding->consumed_char = 0;
2365   coding->fake_multibyte = 0;
2366   while (src < src_end && (dst_bytes
2367                            ? (dst < adjusted_dst_end)
2368                            : (dst < src - 1)))
2369     {
2370       /* SRC_BASE remembers the start position in source in each loop.
2371          The loop will be exited when there's not enough source text
2372          to analyze multi-byte codes (within macros ONE_MORE_BYTE and
2373          TWO_MORE_BYTES).  In that case, SRC is reset to SRC_BASE
2374          before exiting.  */
2375       unsigned char *src_base = src;
2376       unsigned char c1 = *src++, c2, c3, c4;
2377
2378       if (coding->composing)
2379         {
2380           if (c1 == 0xA0)
2381             {
2382               ONE_MORE_BYTE (c1);
2383               c1 &= 0x7F;
2384             }
2385           else if (c1 >= 0xA0)
2386             c1 -= 0x20;
2387           else
2388             coding->composing = 0;
2389         }
2390
2391       switch (emacs_code_class[c1])
2392         {
2393         case EMACS_ascii_code:
2394           ENCODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
2395           break;
2396
2397         case EMACS_control_code:
2398           *dst++ = c1;
2399           coding->consumed_char++;
2400           break;
2401
2402         case EMACS_carriage_return_code:
2403           if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2404             {
2405               *dst++ = c1;
2406               coding->consumed_char++;
2407               break;
2408             }
2409           /* fall down to treat '\r' as '\n' ...  */
2410
2411         case EMACS_linefeed_code:
2412           if (coding->eol_type == CODING_EOL_LF
2413               || coding->eol_type == CODING_EOL_UNDECIDED)
2414             *dst++ = '\n';
2415           else if (coding->eol_type == CODING_EOL_CRLF)
2416             *dst++ = '\r', *dst++ = '\n';
2417           else
2418             *dst++ = '\r';
2419           coding->consumed_char++;
2420           break;
2421
2422         case EMACS_leading_code_2:
2423           ONE_MORE_BYTE (c2);
2424           ENCODE_SJIS_BIG5_CHARACTER (c1, c2, /* dummy */ c3);
2425           break;
2426
2427         case EMACS_leading_code_3:
2428           TWO_MORE_BYTES (c2, c3);
2429           ENCODE_SJIS_BIG5_CHARACTER (c1, c2, c3);
2430           break;
2431
2432         case EMACS_leading_code_4:
2433           THREE_MORE_BYTES (c2, c3, c4);
2434           ENCODE_SJIS_BIG5_CHARACTER (c2, c3, c4);
2435           break;
2436
2437         case EMACS_leading_code_composition:
2438           coding->composing = 1;
2439           break;
2440
2441         default:                /* i.e. case EMACS_invalid_code: */
2442           *dst++ = c1;
2443           coding->consumed_char++;
2444         }
2445       continue;
2446
2447     label_end_of_loop:
2448       result = CODING_FINISH_INSUFFICIENT_SRC;
2449       src = src_base;
2450       break;
2451     }
2452
2453   if (result == CODING_FINISH_NORMAL
2454       && src < src_end)
2455     result = CODING_FINISH_INSUFFICIENT_DST;
2456   coding->consumed = src - source;
2457   coding->produced = coding->produced_char = dst - destination;
2458   return result;
2459 }
2460
2461 \f
2462 /*** 5. CCL handlers ***/
2463
2464 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2465    Check if a text is encoded in a coding system of which
2466    encoder/decoder are written in CCL program.  If it is, return
2467    CODING_CATEGORY_MASK_CCL, else return 0.  */
2468
2469 int
2470 detect_coding_ccl (src, src_end)
2471      unsigned char *src, *src_end;
2472 {
2473   unsigned char *valid;
2474
2475   /* No coding system is assigned to coding-category-ccl.  */
2476   if (!coding_system_table[CODING_CATEGORY_IDX_CCL])
2477     return 0;
2478
2479   valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
2480   while (src < src_end)
2481     {
2482       if (! valid[*src]) return 0;
2483       src++;
2484     }
2485   return CODING_CATEGORY_MASK_CCL;
2486 }
2487
2488 \f
2489 /*** 6. End-of-line handlers ***/
2490
2491 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2492    This function is called only when `coding->eol_type' is
2493    CODING_EOL_CRLF or CODING_EOL_CR.  */
2494
2495 int
2496 decode_eol (coding, source, destination, src_bytes, dst_bytes)
2497      struct coding_system *coding;
2498      unsigned char *source, *destination;
2499      int src_bytes, dst_bytes;
2500 {
2501   unsigned char *src = source;
2502   unsigned char *src_end = source + src_bytes;
2503   unsigned char *dst = destination;
2504   unsigned char *dst_end = destination + dst_bytes;
2505   unsigned char c;
2506   int result = CODING_FINISH_NORMAL;
2507
2508   coding->fake_multibyte = 0;
2509
2510   if (src_bytes <= 0)
2511     return result;
2512
2513   switch (coding->eol_type)
2514     {
2515     case CODING_EOL_CRLF:
2516       {
2517         /* Since the maximum bytes produced by each loop is 2, we
2518            subtract 1 from DST_END to assure overflow checking is
2519            necessary only at the head of loop.  */
2520         unsigned char *adjusted_dst_end = dst_end - 1;
2521
2522         while (src < src_end && (dst_bytes
2523                                  ? (dst < adjusted_dst_end)
2524                                  : (dst < src - 1)))
2525           {
2526             unsigned char *src_base = src;
2527
2528             c = *src++;
2529             if (c == '\r')
2530               {
2531                 ONE_MORE_BYTE (c);
2532                 if (c != '\n')
2533                   {
2534                     if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2535                       {
2536                         result = CODING_FINISH_INCONSISTENT_EOL;
2537                         goto label_end_of_loop_2;
2538                       }
2539                     *dst++ = '\r';
2540                     if (BASE_LEADING_CODE_P (c))
2541                       coding->fake_multibyte = 1;
2542                   }
2543                 *dst++ = c;
2544               }
2545             else if (c == '\n'
2546                      && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
2547               {
2548                 result = CODING_FINISH_INCONSISTENT_EOL;
2549                 goto label_end_of_loop_2;
2550               }
2551             else
2552               {
2553                 *dst++ = c;
2554                 if (BASE_LEADING_CODE_P (c))
2555                   coding->fake_multibyte = 1;
2556               }
2557             continue;
2558
2559           label_end_of_loop:
2560             result = CODING_FINISH_INSUFFICIENT_SRC;
2561           label_end_of_loop_2:
2562             src = src_base;
2563             break;
2564           }
2565         if (result == CODING_FINISH_NORMAL
2566             && src < src_end)
2567           result = CODING_FINISH_INSUFFICIENT_DST;
2568       }
2569       break;
2570
2571     case CODING_EOL_CR:
2572       if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2573         {
2574           while (src < src_end)
2575             {
2576               if ((c = *src++) == '\n')
2577                 break;
2578               if (BASE_LEADING_CODE_P (c))
2579                 coding->fake_multibyte = 1;
2580             }
2581           if (*--src == '\n')
2582             {
2583               src_bytes = src - source;
2584               result = CODING_FINISH_INCONSISTENT_EOL;
2585             }
2586         }
2587       if (dst_bytes && src_bytes > dst_bytes)
2588         {
2589           result = CODING_FINISH_INSUFFICIENT_DST;
2590           src_bytes = dst_bytes;
2591         }
2592       if (dst_bytes)
2593         bcopy (source, destination, src_bytes);
2594       else
2595         safe_bcopy (source, destination, src_bytes);
2596       src = source + src_bytes;
2597       while (src_bytes--) if (*dst++ == '\r') dst[-1] = '\n';
2598       break;
2599
2600     default:                    /* i.e. case: CODING_EOL_LF */
2601       if (dst_bytes && src_bytes > dst_bytes)
2602         {
2603           result = CODING_FINISH_INSUFFICIENT_DST;
2604           src_bytes = dst_bytes;
2605         }
2606       if (dst_bytes)
2607         bcopy (source, destination, src_bytes);
2608       else
2609         safe_bcopy (source, destination, src_bytes);
2610       src += src_bytes;
2611       dst += src_bytes;
2612       coding->fake_multibyte = 1;
2613       break;
2614     }
2615
2616   coding->consumed = coding->consumed_char = src - source;
2617   coding->produced = coding->produced_char = dst - destination;
2618   return result;
2619 }
2620
2621 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  Encode
2622    format of end-of-line according to `coding->eol_type'.  If
2623    `coding->mode & CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code
2624    '\r' in source text also means end-of-line.  */
2625
2626 int
2627 encode_eol (coding, source, destination, src_bytes, dst_bytes)
2628      struct coding_system *coding;
2629      unsigned char *source, *destination;
2630      int src_bytes, dst_bytes;
2631 {
2632   unsigned char *src = source;
2633   unsigned char *dst = destination;
2634   int result = CODING_FINISH_NORMAL;
2635
2636   coding->fake_multibyte = 0;
2637
2638   if (coding->eol_type == CODING_EOL_CRLF)
2639     {
2640       unsigned char c;
2641       unsigned char *src_end = source + src_bytes;
2642       unsigned char *dst_end = destination + dst_bytes;
2643       /* Since the maximum bytes produced by each loop is 2, we
2644          subtract 1 from DST_END to assure overflow checking is
2645          necessary only at the head of loop.  */
2646       unsigned char *adjusted_dst_end = dst_end - 1;
2647
2648       while (src < src_end && (dst_bytes
2649                                ? (dst < adjusted_dst_end)
2650                                : (dst < src - 1)))
2651         {
2652           c = *src++;
2653           if (c == '\n'
2654               || (c == '\r' && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)))
2655             *dst++ = '\r', *dst++ = '\n';
2656           else
2657             {
2658               *dst++ = c;
2659               if (BASE_LEADING_CODE_P (c))
2660                 coding->fake_multibyte = 1;
2661             }
2662         }
2663       if (src < src_end)
2664         result = CODING_FINISH_INSUFFICIENT_DST;
2665     }
2666   else
2667     {
2668       unsigned char c;
2669
2670       if (dst_bytes && src_bytes > dst_bytes)
2671         {
2672           src_bytes = dst_bytes;
2673           result = CODING_FINISH_INSUFFICIENT_DST;
2674         }
2675       if (dst_bytes)
2676         bcopy (source, destination, src_bytes);
2677       else
2678         safe_bcopy (source, destination, src_bytes);
2679       dst_bytes = src_bytes;
2680       if (coding->eol_type == CODING_EOL_CR)
2681         {
2682           while (src_bytes--)
2683             {
2684               if ((c = *dst++) == '\n')
2685                 dst[-1] = '\r';
2686               else if (BASE_LEADING_CODE_P (c))
2687                 coding->fake_multibyte = 1;
2688             }
2689         }
2690       else
2691         {
2692           if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
2693             {
2694               while (src_bytes--)
2695                 if (*dst++ == '\r') dst[-1] = '\n';
2696             }
2697           coding->fake_multibyte = 1;
2698         }
2699       src = source + dst_bytes;
2700       dst = destination + dst_bytes;
2701     }
2702
2703   coding->consumed = coding->consumed_char = src - source;
2704   coding->produced = coding->produced_char = dst - destination;
2705   return result;
2706 }
2707
2708 \f
2709 /*** 7. C library functions ***/
2710
2711 /* In Emacs Lisp, coding system is represented by a Lisp symbol which
2712    has a property `coding-system'.  The value of this property is a
2713    vector of length 5 (called as coding-vector).  Among elements of
2714    this vector, the first (element[0]) and the fifth (element[4])
2715    carry important information for decoding/encoding.  Before
2716    decoding/encoding, this information should be set in fields of a
2717    structure of type `coding_system'.
2718
2719    A value of property `coding-system' can be a symbol of another
2720    subsidiary coding-system.  In that case, Emacs gets coding-vector
2721    from that symbol.
2722
2723    `element[0]' contains information to be set in `coding->type'.  The
2724    value and its meaning is as follows:
2725
2726    0 -- coding_type_emacs_mule
2727    1 -- coding_type_sjis
2728    2 -- coding_type_iso2022
2729    3 -- coding_type_big5
2730    4 -- coding_type_ccl encoder/decoder written in CCL
2731    nil -- coding_type_no_conversion
2732    t -- coding_type_undecided (automatic conversion on decoding,
2733                                no-conversion on encoding)
2734
2735    `element[4]' contains information to be set in `coding->flags' and
2736    `coding->spec'.  The meaning varies by `coding->type'.
2737
2738    If `coding->type' is `coding_type_iso2022', element[4] is a vector
2739    of length 32 (of which the first 13 sub-elements are used now).
2740    Meanings of these sub-elements are:
2741
2742    sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
2743         If the value is an integer of valid charset, the charset is
2744         assumed to be designated to graphic register N initially.
2745
2746         If the value is minus, it is a minus value of charset which
2747         reserves graphic register N, which means that the charset is
2748         not designated initially but should be designated to graphic
2749         register N just before encoding a character in that charset.
2750
2751         If the value is nil, graphic register N is never used on
2752         encoding.
2753
2754    sub-element[N] where N is 4 through 11: to be set in `coding->flags'
2755         Each value takes t or nil.  See the section ISO2022 of
2756         `coding.h' for more information.
2757
2758    If `coding->type' is `coding_type_big5', element[4] is t to denote
2759    BIG5-ETen or nil to denote BIG5-HKU.
2760
2761    If `coding->type' takes the other value, element[4] is ignored.
2762
2763    Emacs Lisp's coding system also carries information about format of
2764    end-of-line in a value of property `eol-type'.  If the value is
2765    integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
2766    means CODING_EOL_CR.  If it is not integer, it should be a vector
2767    of subsidiary coding systems of which property `eol-type' has one
2768    of above values.
2769
2770 */
2771
2772 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
2773    and set it in CODING.  If CODING_SYSTEM_SYMBOL is invalid, CODING
2774    is setup so that no conversion is necessary and return -1, else
2775    return 0.  */
2776
2777 int
2778 setup_coding_system (coding_system, coding)
2779      Lisp_Object coding_system;
2780      struct coding_system *coding;
2781 {
2782   Lisp_Object coding_spec, coding_type, eol_type, plist;
2783   Lisp_Object val;
2784   int i;
2785
2786   /* Initialize some fields required for all kinds of coding systems.  */
2787   coding->symbol = coding_system;
2788   coding->common_flags = 0;
2789   coding->mode = 0;
2790   coding->heading_ascii = -1;
2791   coding->post_read_conversion = coding->pre_write_conversion = Qnil;
2792   coding_spec = Fget (coding_system, Qcoding_system);
2793   if (!VECTORP (coding_spec)
2794       || XVECTOR (coding_spec)->size != 5
2795       || !CONSP (XVECTOR (coding_spec)->contents[3]))
2796     goto label_invalid_coding_system;
2797
2798   eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
2799   if (VECTORP (eol_type))
2800     {
2801       coding->eol_type = CODING_EOL_UNDECIDED;
2802       coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
2803     }
2804   else if (XFASTINT (eol_type) == 1)
2805     {
2806       coding->eol_type = CODING_EOL_CRLF;
2807       coding->common_flags
2808         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2809     }
2810   else if (XFASTINT (eol_type) == 2)
2811     {
2812       coding->eol_type = CODING_EOL_CR;
2813       coding->common_flags
2814         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2815     }
2816   else
2817     coding->eol_type = CODING_EOL_LF;
2818
2819   coding_type = XVECTOR (coding_spec)->contents[0];
2820   /* Try short cut.  */
2821   if (SYMBOLP (coding_type))
2822     {
2823       if (EQ (coding_type, Qt))
2824         {
2825           coding->type = coding_type_undecided;
2826           coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
2827         }
2828       else
2829         coding->type = coding_type_no_conversion;
2830       return 0;
2831     }
2832
2833   /* Initialize remaining fields.  */
2834   coding->composing = 0;
2835   coding->translation_table_for_decode = Qnil;
2836   coding->translation_table_for_encode = Qnil;
2837
2838   /* Get values of coding system properties:
2839      `post-read-conversion', `pre-write-conversion',
2840      `translation-table-for-decode', `translation-table-for-encode'.  */
2841   plist = XVECTOR (coding_spec)->contents[3];
2842   coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
2843   coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
2844   val = Fplist_get (plist, Qtranslation_table_for_decode);
2845   if (SYMBOLP (val))
2846     val = Fget (val, Qtranslation_table_for_decode);
2847   coding->translation_table_for_decode = CHAR_TABLE_P (val) ? val : Qnil;
2848   val = Fplist_get (plist, Qtranslation_table_for_encode);
2849   if (SYMBOLP (val))
2850     val = Fget (val, Qtranslation_table_for_encode);
2851   coding->translation_table_for_encode = CHAR_TABLE_P (val) ? val : Qnil;
2852   val = Fplist_get (plist, Qcoding_category);
2853   if (!NILP (val))
2854     {
2855       val = Fget (val, Qcoding_category_index);
2856       if (INTEGERP (val))
2857         coding->category_idx = XINT (val);
2858       else
2859         goto label_invalid_coding_system;
2860     }
2861   else
2862     goto label_invalid_coding_system;
2863
2864   val = Fplist_get (plist, Qsafe_charsets);
2865   if (EQ (val, Qt))
2866     {
2867       for (i = 0; i <= MAX_CHARSET; i++)
2868         coding->safe_charsets[i] = 1;
2869     }
2870   else
2871     {
2872       bzero (coding->safe_charsets, MAX_CHARSET + 1);
2873       while (CONSP (val))
2874         {
2875           if ((i = get_charset_id (XCONS (val)->car)) >= 0)
2876             coding->safe_charsets[i] = 1;
2877           val = XCONS (val)->cdr;
2878         }
2879     }
2880
2881   switch (XFASTINT (coding_type))
2882     {
2883     case 0:
2884       coding->type = coding_type_emacs_mule;
2885       if (!NILP (coding->post_read_conversion))
2886         coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
2887       if (!NILP (coding->pre_write_conversion))
2888         coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
2889       break;
2890
2891     case 1:
2892       coding->type = coding_type_sjis;
2893       coding->common_flags
2894         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2895       break;
2896
2897     case 2:
2898       coding->type = coding_type_iso2022;
2899       coding->common_flags
2900         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2901       {
2902         Lisp_Object val, temp;
2903         Lisp_Object *flags;
2904         int i, charset, reg_bits = 0;
2905
2906         val = XVECTOR (coding_spec)->contents[4];
2907
2908         if (!VECTORP (val) || XVECTOR (val)->size != 32)
2909           goto label_invalid_coding_system;
2910
2911         flags = XVECTOR (val)->contents;
2912         coding->flags
2913           = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
2914              | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
2915              | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
2916              | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
2917              | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
2918              | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
2919              | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
2920              | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
2921              | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
2922              | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
2923              | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
2924              | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
2925              | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
2926              );
2927
2928         /* Invoke graphic register 0 to plane 0.  */
2929         CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
2930         /* Invoke graphic register 1 to plane 1 if we can use full 8-bit.  */
2931         CODING_SPEC_ISO_INVOCATION (coding, 1)
2932           = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
2933         /* Not single shifting at first.  */
2934         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
2935         /* Beginning of buffer should also be regarded as bol. */
2936         CODING_SPEC_ISO_BOL (coding) = 1;
2937
2938         for (charset = 0; charset <= MAX_CHARSET; charset++)
2939           CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
2940         val = Vcharset_revision_alist;
2941         while (CONSP (val))
2942           {
2943             charset = get_charset_id (Fcar_safe (XCONS (val)->car));
2944             if (charset >= 0
2945                 && (temp = Fcdr_safe (XCONS (val)->car), INTEGERP (temp))
2946                 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
2947               CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
2948             val = XCONS (val)->cdr;
2949           }
2950
2951         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
2952            FLAGS[REG] can be one of below:
2953                 integer CHARSET: CHARSET occupies register I,
2954                 t: designate nothing to REG initially, but can be used
2955                   by any charsets,
2956                 list of integer, nil, or t: designate the first
2957                   element (if integer) to REG initially, the remaining
2958                   elements (if integer) is designated to REG on request,
2959                   if an element is t, REG can be used by any charsets,
2960                 nil: REG is never used.  */
2961         for (charset = 0; charset <= MAX_CHARSET; charset++)
2962           CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
2963             = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
2964         for (i = 0; i < 4; i++)
2965           {
2966             if (INTEGERP (flags[i])
2967                 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))
2968                 || (charset = get_charset_id (flags[i])) >= 0)
2969               {
2970                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
2971                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
2972               }
2973             else if (EQ (flags[i], Qt))
2974               {
2975                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2976                 reg_bits |= 1 << i;
2977                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
2978               }
2979             else if (CONSP (flags[i]))
2980               {
2981                 Lisp_Object tail;
2982                 tail = flags[i];
2983
2984                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
2985                 if (INTEGERP (XCONS (tail)->car)
2986                     && (charset = XINT (XCONS (tail)->car),
2987                         CHARSET_VALID_P (charset))
2988                     || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
2989                   {
2990                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
2991                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
2992                   }
2993                 else
2994                   CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2995                 tail = XCONS (tail)->cdr;
2996                 while (CONSP (tail))
2997                   {
2998                     if (INTEGERP (XCONS (tail)->car)
2999                         && (charset = XINT (XCONS (tail)->car),
3000                             CHARSET_VALID_P (charset))
3001                         || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
3002                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3003                         = i;
3004                     else if (EQ (XCONS (tail)->car, Qt))
3005                       reg_bits |= 1 << i;
3006                     tail = XCONS (tail)->cdr;
3007                   }
3008               }
3009             else
3010               CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3011
3012             CODING_SPEC_ISO_DESIGNATION (coding, i)
3013               = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
3014           }
3015
3016         if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
3017           {
3018             /* REG 1 can be used only by locking shift in 7-bit env.  */
3019             if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
3020               reg_bits &= ~2;
3021             if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
3022               /* Without any shifting, only REG 0 and 1 can be used.  */
3023               reg_bits &= 3;
3024           }
3025
3026         if (reg_bits)
3027           for (charset = 0; charset <= MAX_CHARSET; charset++)
3028             {
3029               if (CHARSET_VALID_P (charset))
3030                 {
3031                   /* There exist some default graphic registers to be
3032                      used CHARSET.  */
3033
3034                   /* We had better avoid designating a charset of
3035                      CHARS96 to REG 0 as far as possible.  */
3036                   if (CHARSET_CHARS (charset) == 96)
3037                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3038                       = (reg_bits & 2
3039                          ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
3040                   else
3041                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3042                       = (reg_bits & 1
3043                          ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
3044                 }
3045             }
3046       }
3047       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3048       coding->spec.iso2022.last_invalid_designation_register = -1;
3049       break;
3050
3051     case 3:
3052       coding->type = coding_type_big5;
3053       coding->common_flags
3054         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3055       coding->flags
3056         = (NILP (XVECTOR (coding_spec)->contents[4])
3057            ? CODING_FLAG_BIG5_HKU
3058            : CODING_FLAG_BIG5_ETEN);
3059       break;
3060
3061     case 4:
3062       coding->type = coding_type_ccl;
3063       coding->common_flags
3064         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3065       {
3066         Lisp_Object val;
3067         Lisp_Object decoder, encoder;
3068
3069         val = XVECTOR (coding_spec)->contents[4];
3070         if (CONSP  (val)
3071             && SYMBOLP (XCONS (val)->car)
3072             && !NILP (decoder = Fget (XCONS (val)->car, Qccl_program_idx))
3073             && !NILP (decoder = Fcdr (Faref (Vccl_program_table, decoder)))
3074             && SYMBOLP (XCONS (val)->cdr)
3075             && !NILP (encoder = Fget (XCONS (val)->cdr, Qccl_program_idx))
3076             && !NILP (encoder = Fcdr (Faref (Vccl_program_table, encoder))))
3077           {
3078             setup_ccl_program (&(coding->spec.ccl.decoder), decoder);
3079             setup_ccl_program (&(coding->spec.ccl.encoder), encoder);
3080           }
3081         else
3082           goto label_invalid_coding_system;
3083
3084         bzero (coding->spec.ccl.valid_codes, 256);
3085         val = Fplist_get (plist, Qvalid_codes);
3086         if (CONSP (val))
3087           {
3088             Lisp_Object this;
3089
3090             for (; CONSP (val); val = XCONS (val)->cdr)
3091               {
3092                 this = XCONS (val)->car;
3093                 if (INTEGERP (this)
3094                     && XINT (this) >= 0 && XINT (this) < 256)
3095                   coding->spec.ccl.valid_codes[XINT (this)] = 1;
3096                 else if (CONSP (this)
3097                          && INTEGERP (XCONS (this)->car)
3098                          && INTEGERP (XCONS (this)->cdr))
3099                   {
3100                     int start = XINT (XCONS (this)->car);
3101                     int end = XINT (XCONS (this)->cdr);
3102
3103                     if (start >= 0 && start <= end && end < 256)
3104                       while (start < end)
3105                         coding->spec.ccl.valid_codes[start++] = 1;
3106                   }
3107               }
3108           }
3109       }
3110       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3111       break;
3112
3113     case 5:
3114       coding->type = coding_type_raw_text;
3115       break;
3116
3117     default:
3118       goto label_invalid_coding_system;
3119     }
3120   return 0;
3121
3122  label_invalid_coding_system:
3123   coding->type = coding_type_no_conversion;
3124   coding->category_idx = CODING_CATEGORY_IDX_BINARY;
3125   coding->common_flags = 0;
3126   coding->eol_type = CODING_EOL_LF;
3127   coding->pre_write_conversion = coding->post_read_conversion = Qnil;
3128   return -1;
3129 }
3130
3131 /* Setup raw-text or one of its subsidiaries in the structure
3132    coding_system CODING according to the already setup value eol_type
3133    in CODING.  CODING should be setup for some coding system in
3134    advance.  */
3135
3136 void
3137 setup_raw_text_coding_system (coding)
3138      struct coding_system *coding;
3139 {
3140   if (coding->type != coding_type_raw_text)
3141     {
3142       coding->symbol = Qraw_text;
3143       coding->type = coding_type_raw_text;
3144       if (coding->eol_type != CODING_EOL_UNDECIDED)
3145         {
3146           Lisp_Object subsidiaries;
3147           subsidiaries = Fget (Qraw_text, Qeol_type);
3148
3149           if (VECTORP (subsidiaries)
3150               && XVECTOR (subsidiaries)->size == 3)
3151             coding->symbol
3152               = XVECTOR (subsidiaries)->contents[coding->eol_type];
3153         }
3154     }
3155   return;
3156 }
3157
3158 /* Emacs has a mechanism to automatically detect a coding system if it
3159    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
3160    it's impossible to distinguish some coding systems accurately
3161    because they use the same range of codes.  So, at first, coding
3162    systems are categorized into 7, those are:
3163
3164    o coding-category-emacs-mule
3165
3166         The category for a coding system which has the same code range
3167         as Emacs' internal format.  Assigned the coding-system (Lisp
3168         symbol) `emacs-mule' by default.
3169
3170    o coding-category-sjis
3171
3172         The category for a coding system which has the same code range
3173         as SJIS.  Assigned the coding-system (Lisp
3174         symbol) `japanese-shift-jis' by default.
3175
3176    o coding-category-iso-7
3177
3178         The category for a coding system which has the same code range
3179         as ISO2022 of 7-bit environment.  This doesn't use any locking
3180         shift and single shift functions.  This can encode/decode all
3181         charsets.  Assigned the coding-system (Lisp symbol)
3182         `iso-2022-7bit' by default.
3183
3184    o coding-category-iso-7-tight
3185
3186         Same as coding-category-iso-7 except that this can
3187         encode/decode only the specified charsets.
3188
3189    o coding-category-iso-8-1
3190
3191         The category for a coding system which has the same code range
3192         as ISO2022 of 8-bit environment and graphic plane 1 used only
3193         for DIMENSION1 charset.  This doesn't use any locking shift
3194         and single shift functions.  Assigned the coding-system (Lisp
3195         symbol) `iso-latin-1' by default.
3196
3197    o coding-category-iso-8-2
3198
3199         The category for a coding system which has the same code range
3200         as ISO2022 of 8-bit environment and graphic plane 1 used only
3201         for DIMENSION2 charset.  This doesn't use any locking shift
3202         and single shift functions.  Assigned the coding-system (Lisp
3203         symbol) `japanese-iso-8bit' by default.
3204
3205    o coding-category-iso-7-else
3206
3207         The category for a coding system which has the same code range
3208         as ISO2022 of 7-bit environemnt but uses locking shift or
3209         single shift functions.  Assigned the coding-system (Lisp
3210         symbol) `iso-2022-7bit-lock' by default.
3211
3212    o coding-category-iso-8-else
3213
3214         The category for a coding system which has the same code range
3215         as ISO2022 of 8-bit environemnt but uses locking shift or
3216         single shift functions.  Assigned the coding-system (Lisp
3217         symbol) `iso-2022-8bit-ss2' by default.
3218
3219    o coding-category-big5
3220
3221         The category for a coding system which has the same code range
3222         as BIG5.  Assigned the coding-system (Lisp symbol)
3223         `cn-big5' by default.
3224
3225    o coding-category-ccl
3226
3227         The category for a coding system of which encoder/decoder is
3228         written in CCL programs.  The default value is nil, i.e., no
3229         coding system is assigned.
3230
3231    o coding-category-binary
3232
3233         The category for a coding system not categorized in any of the
3234         above.  Assigned the coding-system (Lisp symbol)
3235         `no-conversion' by default.
3236
3237    Each of them is a Lisp symbol and the value is an actual
3238    `coding-system's (this is also a Lisp symbol) assigned by a user.
3239    What Emacs does actually is to detect a category of coding system.
3240    Then, it uses a `coding-system' assigned to it.  If Emacs can't
3241    decide only one possible category, it selects a category of the
3242    highest priority.  Priorities of categories are also specified by a
3243    user in a Lisp variable `coding-category-list'.
3244
3245 */
3246
3247 static
3248 int ascii_skip_code[256];
3249
3250 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
3251    If it detects possible coding systems, return an integer in which
3252    appropriate flag bits are set.  Flag bits are defined by macros
3253    CODING_CATEGORY_MASK_XXX in `coding.h'.
3254
3255    How many ASCII characters are at the head is returned as *SKIP.  */
3256
3257 static int
3258 detect_coding_mask (source, src_bytes, priorities, skip)
3259      unsigned char *source;
3260      int src_bytes, *priorities, *skip;
3261 {
3262   register unsigned char c;
3263   unsigned char *src = source, *src_end = source + src_bytes;
3264   unsigned int mask;
3265   int i;
3266
3267   /* At first, skip all ASCII characters and control characters except
3268      for three ISO2022 specific control characters.  */
3269   ascii_skip_code[ISO_CODE_SO] = 0;
3270   ascii_skip_code[ISO_CODE_SI] = 0;
3271   ascii_skip_code[ISO_CODE_ESC] = 0;
3272
3273  label_loop_detect_coding:
3274   while (src < src_end && ascii_skip_code[*src]) src++;
3275   *skip = src - source;
3276
3277   if (src >= src_end)
3278     /* We found nothing other than ASCII.  There's nothing to do.  */
3279     return 0;
3280
3281   c = *src;
3282   /* The text seems to be encoded in some multilingual coding system.
3283      Now, try to find in which coding system the text is encoded.  */
3284   if (c < 0x80)
3285     {
3286       /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
3287       /* C is an ISO2022 specific control code of C0.  */
3288       mask = detect_coding_iso2022 (src, src_end);
3289       if (mask == 0)
3290         {
3291           /* No valid ISO2022 code follows C.  Try again.  */
3292           src++;
3293           if (c == ISO_CODE_ESC)
3294             ascii_skip_code[ISO_CODE_ESC] = 1;
3295           else
3296             ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1;
3297           goto label_loop_detect_coding;
3298         }
3299       if (priorities)
3300         goto label_return_highest_only;
3301     }
3302   else
3303     {
3304       int try;
3305
3306       if (c < 0xA0)
3307         {
3308           /* C is the first byte of SJIS character code,
3309              or a leading-code of Emacs' internal format (emacs-mule).  */
3310           try = CODING_CATEGORY_MASK_SJIS | CODING_CATEGORY_MASK_EMACS_MULE;
3311
3312           /* Or, if C is a special latin extra code,
3313              or is an ISO2022 specific control code of C1 (SS2 or SS3),
3314              or is an ISO2022 control-sequence-introducer (CSI),
3315              we should also consider the possibility of ISO2022 codings.  */
3316           if ((VECTORP (Vlatin_extra_code_table)
3317                && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3318               || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
3319               || (c == ISO_CODE_CSI
3320                   && (src < src_end
3321                       && (*src == ']'
3322                           || ((*src == '0' || *src == '1' || *src == '2')
3323                               && src + 1 < src_end
3324                               && src[1] == ']')))))
3325             try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
3326                      | CODING_CATEGORY_MASK_ISO_8BIT);
3327         }
3328       else
3329         /* C is a character of ISO2022 in graphic plane right,
3330            or a SJIS's 1-byte character code (i.e. JISX0201),
3331            or the first byte of BIG5's 2-byte code.  */
3332         try = (CODING_CATEGORY_MASK_ISO_8_ELSE
3333                 | CODING_CATEGORY_MASK_ISO_8BIT
3334                 | CODING_CATEGORY_MASK_SJIS
3335                 | CODING_CATEGORY_MASK_BIG5);
3336
3337       /* Or, we may have to consider the possibility of CCL.  */
3338       if (coding_system_table[CODING_CATEGORY_IDX_CCL]
3339           && (coding_system_table[CODING_CATEGORY_IDX_CCL]
3340               ->spec.ccl.valid_codes)[c])
3341         try |= CODING_CATEGORY_MASK_CCL;
3342
3343       mask = 0;
3344       if (priorities)
3345         {
3346           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3347             {
3348               if (priorities[i] & try & CODING_CATEGORY_MASK_ISO)
3349                 mask = detect_coding_iso2022 (src, src_end);
3350               else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
3351                 mask = detect_coding_sjis (src, src_end);
3352               else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
3353                 mask = detect_coding_big5 (src, src_end);
3354               else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
3355                 mask = detect_coding_emacs_mule (src, src_end);
3356               else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL)
3357                 mask = detect_coding_ccl (src, src_end);
3358               else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
3359                 mask = CODING_CATEGORY_MASK_RAW_TEXT;
3360               else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
3361                 mask = CODING_CATEGORY_MASK_BINARY;
3362               if (mask)
3363                 goto label_return_highest_only;
3364             }
3365           return CODING_CATEGORY_MASK_RAW_TEXT;
3366         }
3367       if (try & CODING_CATEGORY_MASK_ISO)
3368         mask |= detect_coding_iso2022 (src, src_end);
3369       if (try & CODING_CATEGORY_MASK_SJIS)
3370         mask |= detect_coding_sjis (src, src_end);
3371       if (try & CODING_CATEGORY_MASK_BIG5)
3372         mask |= detect_coding_big5 (src, src_end);
3373       if (try & CODING_CATEGORY_MASK_EMACS_MULE)
3374         mask |= detect_coding_emacs_mule (src, src_end);
3375       if (try & CODING_CATEGORY_MASK_CCL)
3376         mask |= detect_coding_ccl (src, src_end);
3377     }
3378   return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
3379
3380  label_return_highest_only:
3381   for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3382     {
3383       if (mask & priorities[i])
3384         return priorities[i];
3385     }
3386   return CODING_CATEGORY_MASK_RAW_TEXT;
3387 }
3388
3389 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
3390    The information of the detected coding system is set in CODING.  */
3391
3392 void
3393 detect_coding (coding, src, src_bytes)
3394      struct coding_system *coding;
3395      unsigned char *src;
3396      int src_bytes;
3397 {
3398   unsigned int idx;
3399   int skip, mask, i;
3400   Lisp_Object val;
3401
3402   val = Vcoding_category_list;
3403   mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip);
3404   coding->heading_ascii = skip;
3405
3406   if (!mask) return;
3407
3408   /* We found a single coding system of the highest priority in MASK.  */
3409   idx = 0;
3410   while (mask && ! (mask & 1)) mask >>= 1, idx++;
3411   if (! mask)
3412     idx = CODING_CATEGORY_IDX_RAW_TEXT;
3413
3414   val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[idx])->value;
3415
3416   if (coding->eol_type != CODING_EOL_UNDECIDED)
3417     {
3418       Lisp_Object tmp;
3419
3420       tmp = Fget (val, Qeol_type);
3421       if (VECTORP (tmp))
3422         val = XVECTOR (tmp)->contents[coding->eol_type];
3423     }
3424   setup_coding_system (val, coding);
3425   /* Set this again because setup_coding_system reset this member.  */
3426   coding->heading_ascii = skip;
3427 }
3428
3429 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
3430    SOURCE is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
3431    CODING_EOL_CR, and CODING_EOL_UNDECIDED.
3432
3433    How many non-eol characters are at the head is returned as *SKIP.  */
3434
3435 #define MAX_EOL_CHECK_COUNT 3
3436
3437 static int
3438 detect_eol_type (source, src_bytes, skip)
3439      unsigned char *source;
3440      int src_bytes, *skip;
3441 {
3442   unsigned char *src = source, *src_end = src + src_bytes;
3443   unsigned char c;
3444   int total = 0;                /* How many end-of-lines are found so far.  */
3445   int eol_type = CODING_EOL_UNDECIDED;
3446   int this_eol_type;
3447
3448   *skip = 0;
3449
3450   while (src < src_end && total < MAX_EOL_CHECK_COUNT)
3451     {
3452       c = *src++;
3453       if (c == '\n' || c == '\r')
3454         {
3455           if (*skip == 0)
3456             *skip = src - 1 - source;
3457           total++;
3458           if (c == '\n')
3459             this_eol_type = CODING_EOL_LF;
3460           else if (src >= src_end || *src != '\n')
3461             this_eol_type = CODING_EOL_CR;
3462           else
3463             this_eol_type = CODING_EOL_CRLF, src++;
3464
3465           if (eol_type == CODING_EOL_UNDECIDED)
3466             /* This is the first end-of-line.  */
3467             eol_type = this_eol_type;
3468           else if (eol_type != this_eol_type)
3469             {
3470               /* The found type is different from what found before.  */
3471               eol_type = CODING_EOL_INCONSISTENT;
3472               break;
3473             }
3474         }
3475     }
3476
3477   if (*skip == 0)
3478     *skip = src_end - source;
3479   return eol_type;
3480 }
3481
3482 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
3483    is encoded.  If it detects an appropriate format of end-of-line, it
3484    sets the information in *CODING.  */
3485
3486 void
3487 detect_eol (coding, src, src_bytes)
3488      struct coding_system *coding;
3489      unsigned char *src;
3490      int src_bytes;
3491 {
3492   Lisp_Object val;
3493   int skip;
3494   int eol_type = detect_eol_type (src, src_bytes, &skip);
3495
3496   if (coding->heading_ascii > skip)
3497     coding->heading_ascii = skip;
3498   else
3499     skip = coding->heading_ascii;
3500
3501   if (eol_type == CODING_EOL_UNDECIDED)
3502     return;
3503   if (eol_type == CODING_EOL_INCONSISTENT)
3504     {
3505 #if 0
3506       /* This code is suppressed until we find a better way to
3507          distinguish raw text file and binary file.  */
3508
3509       /* If we have already detected that the coding is raw-text, the
3510          coding should actually be no-conversion.  */
3511       if (coding->type == coding_type_raw_text)
3512         {
3513           setup_coding_system (Qno_conversion, coding);
3514           return;
3515         }
3516       /* Else, let's decode only text code anyway.  */
3517 #endif /* 0 */
3518       eol_type = CODING_EOL_LF;
3519     }
3520
3521   val = Fget (coding->symbol, Qeol_type);
3522   if (VECTORP (val) && XVECTOR (val)->size == 3)
3523     {
3524       setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
3525       coding->heading_ascii = skip;
3526     }
3527 }
3528
3529 #define CONVERSION_BUFFER_EXTRA_ROOM 256
3530
3531 #define DECODING_BUFFER_MAG(coding)                                          \
3532   (coding->type == coding_type_iso2022                                       \
3533    ? 3                                                                       \
3534    : ((coding->type == coding_type_sjis || coding->type == coding_type_big5) \
3535       ? 2                                                                    \
3536       : (coding->type == coding_type_raw_text                                \
3537          ? 1                                                                 \
3538          : (coding->type == coding_type_ccl                                  \
3539             ? coding->spec.ccl.decoder.buf_magnification                     \
3540             : 2))))
3541
3542 /* Return maximum size (bytes) of a buffer enough for decoding
3543    SRC_BYTES of text encoded in CODING.  */
3544
3545 int
3546 decoding_buffer_size (coding, src_bytes)
3547      struct coding_system *coding;
3548      int src_bytes;
3549 {
3550   return (src_bytes * DECODING_BUFFER_MAG (coding)
3551           + CONVERSION_BUFFER_EXTRA_ROOM);
3552 }
3553
3554 /* Return maximum size (bytes) of a buffer enough for encoding
3555    SRC_BYTES of text to CODING.  */
3556
3557 int
3558 encoding_buffer_size (coding, src_bytes)
3559      struct coding_system *coding;
3560      int src_bytes;
3561 {
3562   int magnification;
3563
3564   if (coding->type == coding_type_ccl)
3565     magnification = coding->spec.ccl.encoder.buf_magnification;
3566   else
3567     magnification = 3;
3568
3569   return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
3570 }
3571
3572 #ifndef MINIMUM_CONVERSION_BUFFER_SIZE
3573 #define MINIMUM_CONVERSION_BUFFER_SIZE 1024
3574 #endif
3575
3576 char *conversion_buffer;
3577 int conversion_buffer_size;
3578
3579 /* Return a pointer to a SIZE bytes of buffer to be used for encoding
3580    or decoding.  Sufficient memory is allocated automatically.  If we
3581    run out of memory, return NULL.  */
3582
3583 char *
3584 get_conversion_buffer (size)
3585      int size;
3586 {
3587   if (size > conversion_buffer_size)
3588     {
3589       char *buf;
3590       int real_size = conversion_buffer_size * 2;
3591
3592       while (real_size < size) real_size *= 2;
3593       buf = (char *) xmalloc (real_size);
3594       xfree (conversion_buffer);
3595       conversion_buffer = buf;
3596       conversion_buffer_size = real_size;
3597     }
3598   return conversion_buffer;
3599 }
3600
3601 int
3602 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
3603      struct coding_system *coding;
3604      unsigned char *source, *destination;
3605      int src_bytes, dst_bytes, encodep;
3606 {
3607   struct ccl_program *ccl
3608     = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
3609   int result;
3610
3611   ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
3612
3613   coding->produced = ccl_driver (ccl, source, destination,
3614                                  src_bytes, dst_bytes, &(coding->consumed));
3615   if (encodep)
3616     {
3617       coding->produced_char = coding->produced;
3618       coding->consumed_char
3619         = multibyte_chars_in_text (source, coding->consumed);
3620     }
3621   else
3622     {
3623       coding->produced_char
3624         = multibyte_chars_in_text (destination, coding->produced);
3625       coding->consumed_char = coding->consumed;
3626     }
3627   switch (ccl->status)
3628     {
3629     case CCL_STAT_SUSPEND_BY_SRC:
3630       result = CODING_FINISH_INSUFFICIENT_SRC;
3631       break;
3632     case CCL_STAT_SUSPEND_BY_DST:
3633       result = CODING_FINISH_INSUFFICIENT_DST;
3634       break;
3635     default:
3636       result = CODING_FINISH_NORMAL;
3637       break;
3638     }
3639   return result;
3640 }
3641
3642 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions".  Before
3643    decoding, it may detect coding system and format of end-of-line if
3644    those are not yet decided.  */
3645
3646 int
3647 decode_coding (coding, source, destination, src_bytes, dst_bytes)
3648      struct coding_system *coding;
3649      unsigned char *source, *destination;
3650      int src_bytes, dst_bytes;
3651 {
3652   int result;
3653
3654   if (src_bytes <= 0)
3655     {
3656       coding->produced = coding->produced_char = 0;
3657       coding->consumed = coding->consumed_char = 0;
3658       coding->fake_multibyte = 0;
3659       return CODING_FINISH_NORMAL;
3660     }
3661
3662   if (coding->type == coding_type_undecided)
3663     detect_coding (coding, source, src_bytes);
3664
3665   if (coding->eol_type == CODING_EOL_UNDECIDED)
3666     detect_eol (coding, source, src_bytes);
3667
3668   switch (coding->type)
3669     {
3670     case coding_type_emacs_mule:
3671     case coding_type_undecided:
3672     case coding_type_raw_text:
3673       if (coding->eol_type == CODING_EOL_LF
3674           ||  coding->eol_type == CODING_EOL_UNDECIDED)
3675         goto label_no_conversion;
3676       result = decode_eol (coding, source, destination, src_bytes, dst_bytes);
3677       break;
3678
3679     case coding_type_sjis:
3680       result = decode_coding_sjis_big5 (coding, source, destination,
3681                                         src_bytes, dst_bytes, 1);
3682       break;
3683
3684     case coding_type_iso2022:
3685       result = decode_coding_iso2022 (coding, source, destination,
3686                                       src_bytes, dst_bytes);
3687       break;
3688
3689     case coding_type_big5:
3690       result = decode_coding_sjis_big5 (coding, source, destination,
3691                                         src_bytes, dst_bytes, 0);
3692       break;
3693
3694     case coding_type_ccl:
3695       result = ccl_coding_driver (coding, source, destination,
3696                                   src_bytes, dst_bytes, 0);
3697       break;
3698
3699     default:                    /* i.e. case coding_type_no_conversion: */
3700     label_no_conversion:
3701       if (dst_bytes && src_bytes > dst_bytes)
3702         {
3703           coding->produced = dst_bytes;
3704           result = CODING_FINISH_INSUFFICIENT_DST;
3705         }
3706       else
3707         {
3708           coding->produced = src_bytes;
3709           result = CODING_FINISH_NORMAL;
3710         }
3711       if (dst_bytes)
3712         bcopy (source, destination, coding->produced);
3713       else
3714         safe_bcopy (source, destination, coding->produced);
3715       coding->fake_multibyte = 1;
3716       coding->consumed
3717         = coding->consumed_char = coding->produced_char = coding->produced;
3718       break;
3719     }
3720
3721   return result;
3722 }
3723
3724 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  */
3725
3726 int
3727 encode_coding (coding, source, destination, src_bytes, dst_bytes)
3728      struct coding_system *coding;
3729      unsigned char *source, *destination;
3730      int src_bytes, dst_bytes;
3731 {
3732   int result;
3733
3734   if (src_bytes <= 0)
3735     {
3736       coding->produced = coding->produced_char = 0;
3737       coding->consumed = coding->consumed_char = 0;
3738       coding->fake_multibyte = 0;
3739       return CODING_FINISH_NORMAL;
3740     }
3741
3742   switch (coding->type)
3743     {
3744     case coding_type_emacs_mule:
3745     case coding_type_undecided:
3746     case coding_type_raw_text:
3747       if (coding->eol_type == CODING_EOL_LF
3748           ||  coding->eol_type == CODING_EOL_UNDECIDED)
3749         goto label_no_conversion;
3750       result = encode_eol (coding, source, destination, src_bytes, dst_bytes);
3751       break;
3752
3753     case coding_type_sjis:
3754       result = encode_coding_sjis_big5 (coding, source, destination,
3755                                         src_bytes, dst_bytes, 1);
3756       break;
3757
3758     case coding_type_iso2022:
3759       result = encode_coding_iso2022 (coding, source, destination,
3760                                       src_bytes, dst_bytes);
3761       break;
3762
3763     case coding_type_big5:
3764       result = encode_coding_sjis_big5 (coding, source, destination,
3765                                         src_bytes, dst_bytes, 0);
3766       break;
3767
3768     case coding_type_ccl:
3769       result = ccl_coding_driver (coding, source, destination,
3770                                   src_bytes, dst_bytes, 1);
3771       break;
3772
3773     default:                    /* i.e. case coding_type_no_conversion: */
3774     label_no_conversion:
3775       if (dst_bytes && src_bytes > dst_bytes)
3776         {
3777           coding->produced = dst_bytes;
3778           result = CODING_FINISH_INSUFFICIENT_DST;
3779         }
3780       else
3781         {
3782           coding->produced = src_bytes;
3783           result = CODING_FINISH_NORMAL;
3784         }
3785       if (dst_bytes)
3786         bcopy (source, destination, coding->produced);
3787       else
3788         safe_bcopy (source, destination, coding->produced);
3789       if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
3790         {
3791           unsigned char *p = destination, *pend = p + coding->produced;
3792           while (p < pend)
3793             if (*p++ == '\015') p[-1] = '\n';
3794         }
3795       coding->fake_multibyte = 1;
3796       coding->consumed
3797         = coding->consumed_char = coding->produced_char = coding->produced;
3798       break;
3799     }
3800
3801   return result;
3802 }
3803
3804 /* Scan text in the region between *BEG and *END (byte positions),
3805    skip characters which we don't have to decode by coding system
3806    CODING at the head and tail, then set *BEG and *END to the region
3807    of the text we actually have to convert.  The caller should move
3808    the gap out of the region in advance.
3809
3810    If STR is not NULL, *BEG and *END are indices into STR.  */
3811
3812 static void
3813 shrink_decoding_region (beg, end, coding, str)
3814      int *beg, *end;
3815      struct coding_system *coding;
3816      unsigned char *str;
3817 {
3818   unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
3819   int eol_conversion;
3820
3821   if (coding->type == coding_type_ccl
3822       || coding->type == coding_type_undecided
3823       || !NILP (coding->post_read_conversion))
3824     {
3825       /* We can't skip any data.  */
3826       return;
3827     }
3828   else if (coding->type == coding_type_no_conversion)
3829     {
3830       /* We need no conversion, but don't have to skip any data here.
3831          Decoding routine handles them effectively anyway.  */
3832       return;
3833     }
3834
3835   eol_conversion = (coding->eol_type != CODING_EOL_LF);
3836
3837   if ((! eol_conversion) && (coding->heading_ascii >= 0))
3838     /* Detection routine has already found how much we can skip at the
3839        head.  */
3840     *beg += coding->heading_ascii;
3841
3842   if (str)
3843     {
3844       begp_orig = begp = str + *beg;
3845       endp_orig = endp = str + *end;
3846     }
3847   else
3848     {
3849       begp_orig = begp = BYTE_POS_ADDR (*beg);
3850       endp_orig = endp = begp + *end - *beg;
3851     }
3852
3853   switch (coding->type)
3854     {
3855     case coding_type_emacs_mule:
3856     case coding_type_raw_text:
3857       if (eol_conversion)
3858         {
3859           if (coding->heading_ascii < 0)
3860             while (begp < endp && *begp != '\r' && *begp < 0x80) begp++;
3861           while (begp < endp && endp[-1] != '\r' && endp[-1] < 0x80)
3862             endp--;
3863           /* Do not consider LF as ascii if preceded by CR, since that
3864              confuses eol decoding. */
3865           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
3866             endp++;
3867         }
3868       else
3869         begp = endp;
3870       break;
3871
3872     case coding_type_sjis:
3873     case coding_type_big5:
3874       /* We can skip all ASCII characters at the head.  */
3875       if (coding->heading_ascii < 0)
3876         {
3877           if (eol_conversion)
3878             while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
3879           else
3880             while (begp < endp && *begp < 0x80) begp++;
3881         }
3882       /* We can skip all ASCII characters at the tail except for the
3883          second byte of SJIS or BIG5 code.  */
3884       if (eol_conversion)
3885         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
3886       else
3887         while (begp < endp && endp[-1] < 0x80) endp--;
3888       /* Do not consider LF as ascii if preceded by CR, since that
3889          confuses eol decoding. */
3890       if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
3891         endp++;
3892       if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
3893         endp++;
3894       break;
3895
3896     default:            /* i.e. case coding_type_iso2022: */
3897       if (coding->heading_ascii < 0)
3898         {
3899           /* We can skip all ASCII characters at the head except for a
3900              few control codes.  */
3901           while (begp < endp && (c = *begp) < 0x80
3902                  && c != ISO_CODE_CR && c != ISO_CODE_SO
3903                  && c != ISO_CODE_SI && c != ISO_CODE_ESC
3904                  && (!eol_conversion || c != ISO_CODE_LF))
3905             begp++;
3906         }
3907       switch (coding->category_idx)
3908         {
3909         case CODING_CATEGORY_IDX_ISO_8_1:
3910         case CODING_CATEGORY_IDX_ISO_8_2:
3911           /* We can skip all ASCII characters at the tail.  */
3912           if (eol_conversion)
3913             while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
3914           else
3915             while (begp < endp && endp[-1] < 0x80) endp--;
3916           /* Do not consider LF as ascii if preceded by CR, since that
3917              confuses eol decoding. */
3918           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
3919             endp++;
3920           break;
3921
3922         case CODING_CATEGORY_IDX_ISO_7:
3923         case CODING_CATEGORY_IDX_ISO_7_TIGHT:
3924           /* We can skip all charactes at the tail except for ESC and
3925              the following 2-byte at the tail.  */
3926           if (eol_conversion)
3927             while (begp < endp
3928                    && (c = endp[-1]) < 0x80 && c != ISO_CODE_ESC && c != '\r')
3929               endp--;
3930           else
3931             while (begp < endp
3932                    && (c = endp[-1]) < 0x80 && c != ISO_CODE_ESC)
3933               endp--;
3934           /* Do not consider LF as ascii if preceded by CR, since that
3935              confuses eol decoding. */
3936           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
3937             endp++;
3938           if (begp < endp && endp[-1] == ISO_CODE_ESC)
3939             {
3940               if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
3941                 /* This is an ASCII designation sequence.  We can
3942                     surely skip the tail.  */
3943                 endp += 2;
3944               else
3945                 /* Hmmm, we can't skip the tail.  */
3946                 endp = endp_orig;
3947             }
3948         }
3949     }
3950   *beg += begp - begp_orig;
3951   *end += endp - endp_orig;
3952   return;
3953 }
3954
3955 /* Like shrink_decoding_region but for encoding.  */
3956
3957 static void
3958 shrink_encoding_region (beg, end, coding, str)
3959      int *beg, *end;
3960      struct coding_system *coding;
3961      unsigned char *str;
3962 {
3963   unsigned char *begp_orig, *begp, *endp_orig, *endp;
3964   int eol_conversion;
3965
3966   if (coding->type == coding_type_ccl)
3967     /* We can't skip any data.  */
3968     return;
3969   else if (coding->type == coding_type_no_conversion)
3970     {
3971       /* We need no conversion.  */
3972       *beg = *end;
3973       return;
3974     }
3975
3976   if (str)
3977     {
3978       begp_orig = begp = str + *beg;
3979       endp_orig = endp = str + *end;
3980     }
3981   else
3982     {
3983       begp_orig = begp = BYTE_POS_ADDR (*beg);
3984       endp_orig = endp = begp + *end - *beg;
3985     }
3986
3987   eol_conversion = (coding->eol_type == CODING_EOL_CR
3988                     || coding->eol_type == CODING_EOL_CRLF);
3989
3990   /* Here, we don't have to check coding->pre_write_conversion because
3991      the caller is expected to have handled it already.  */
3992   switch (coding->type)
3993     {
3994     case coding_type_undecided:
3995     case coding_type_emacs_mule:
3996     case coding_type_raw_text:
3997       if (eol_conversion)
3998         {
3999           while (begp < endp && *begp != '\n') begp++;
4000           while (begp < endp && endp[-1] != '\n') endp--;
4001         }
4002       else
4003         begp = endp;
4004       break;
4005
4006     case coding_type_iso2022:
4007       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
4008         {
4009           unsigned char *bol = begp;
4010           while (begp < endp && *begp < 0x80)
4011             {
4012               begp++;
4013               if (begp[-1] == '\n')
4014                 bol = begp;
4015             }
4016           begp = bol;
4017           goto label_skip_tail;
4018         }
4019       /* fall down ... */
4020
4021     default:
4022       /* We can skip all ASCII characters at the head and tail.  */
4023       if (eol_conversion)
4024         while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
4025       else
4026         while (begp < endp && *begp < 0x80) begp++;
4027     label_skip_tail:
4028       if (eol_conversion)
4029         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
4030       else
4031         while (begp < endp && *(endp - 1) < 0x80) endp--;
4032       break;
4033     }
4034
4035   *beg += begp - begp_orig;
4036   *end += endp - endp_orig;
4037   return;
4038 }
4039
4040 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
4041    text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
4042    coding system CODING, and return the status code of code conversion
4043    (currently, this value has no meaning).
4044
4045    How many characters (and bytes) are converted to how many
4046    characters (and bytes) are recorded in members of the structure
4047    CODING.
4048
4049    If REPLACE is nonzero, we do various things as if the original text
4050    is deleted and a new text is inserted.  See the comments in
4051    replace_range (insdel.c) to know what we are doing.  */
4052
4053 int
4054 code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
4055      int from, from_byte, to, to_byte, encodep, replace;
4056      struct coding_system *coding;
4057 {
4058   int len = to - from, len_byte = to_byte - from_byte;
4059   int require, inserted, inserted_byte;
4060   int head_skip, tail_skip, total_skip;
4061   Lisp_Object saved_coding_symbol;
4062   int multibyte = !NILP (current_buffer->enable_multibyte_characters);
4063   int first = 1;
4064   int fake_multibyte = 0;
4065   unsigned char *src, *dst;
4066   Lisp_Object deletion;
4067
4068   deletion = Qnil;
4069   saved_coding_symbol = Qnil;
4070
4071   if (from < PT && PT < to)
4072     SET_PT_BOTH (from, from_byte);
4073
4074   if (replace)
4075     {
4076       int saved_from = from;
4077
4078       prepare_to_modify_buffer (from, to, &from);
4079       if (saved_from != from)
4080         {
4081           to = from + len;
4082           if (multibyte)
4083             from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
4084           else
4085             from_byte = from, to_byte = to;
4086           len_byte = to_byte - from_byte;
4087         }
4088     }
4089
4090   if (! encodep && CODING_REQUIRE_DETECTION (coding))
4091     {
4092       /* We must detect encoding of text and eol format.  */
4093
4094       if (from < GPT && to > GPT)
4095         move_gap_both (from, from_byte);
4096       if (coding->type == coding_type_undecided)
4097         {
4098           detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
4099           if (coding->type == coding_type_undecided)
4100             /* It seems that the text contains only ASCII, but we
4101                should not left it undecided because the deeper
4102                decoding routine (decode_coding) tries to detect the
4103                encodings again in vain.  */
4104             coding->type = coding_type_emacs_mule;
4105         }
4106       if (coding->eol_type == CODING_EOL_UNDECIDED)
4107         {
4108           saved_coding_symbol = coding->symbol;
4109           detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
4110           if (coding->eol_type == CODING_EOL_UNDECIDED)
4111             coding->eol_type = CODING_EOL_LF;
4112           /* We had better recover the original eol format if we
4113              encounter an inconsitent eol format while decoding.  */
4114           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4115         }
4116     }
4117
4118   coding->consumed_char = len, coding->consumed = len_byte;
4119
4120   if (encodep
4121       ? ! CODING_REQUIRE_ENCODING (coding)
4122       : ! CODING_REQUIRE_DECODING (coding))
4123     {
4124       coding->produced = len_byte;
4125       if (multibyte
4126           && ! replace
4127           /* See the comment of the member heading_ascii in coding.h.  */
4128           && coding->heading_ascii < len_byte)
4129         {
4130           /* We still may have to combine byte at the head and the
4131              tail of the text in the region.  */
4132           if (from < GPT && GPT < to)
4133             move_gap_both (to, to_byte);
4134           len = multibyte_chars_in_text (BYTE_POS_ADDR (from_byte), len_byte);
4135           adjust_after_insert (from, from_byte, to, to_byte, len);
4136           coding->produced_char = len;
4137         }
4138       else
4139         {
4140           if (!replace)
4141             adjust_after_insert (from, from_byte, to, to_byte, len_byte);
4142           coding->produced_char = len_byte;
4143         }
4144       return 0;
4145     }
4146
4147   /* Now we convert the text.  */
4148
4149   /* For encoding, we must process pre-write-conversion in advance.  */
4150   if (encodep
4151       && ! NILP (coding->pre_write_conversion)
4152       && SYMBOLP (coding->pre_write_conversion)
4153       && ! NILP (Ffboundp (coding->pre_write_conversion)))
4154     {
4155       /* The function in pre-write-conversion may put a new text in a
4156          new buffer.  */
4157       struct buffer *prev = current_buffer, *new;
4158
4159       call2 (coding->pre_write_conversion,
4160              make_number (from), make_number (to));
4161       if (current_buffer != prev)
4162         {
4163           len = ZV - BEGV;
4164           new = current_buffer;
4165           set_buffer_internal_1 (prev);
4166           del_range_2 (from, from_byte, to, to_byte);
4167           insert_from_buffer (new, BEG, len, 0);
4168           to = from + len;
4169           to_byte = multibyte ? CHAR_TO_BYTE (to) : to;
4170           len_byte = to_byte - from_byte;
4171         }
4172     }
4173
4174   if (replace)
4175     deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
4176
4177   /* Try to skip the heading and tailing ASCIIs.  */
4178   {
4179     int from_byte_orig = from_byte, to_byte_orig = to_byte;
4180
4181     if (from < GPT && GPT < to)
4182       move_gap_both (from, from_byte);
4183     if (encodep)
4184       shrink_encoding_region (&from_byte, &to_byte, coding, NULL);
4185     else
4186       shrink_decoding_region (&from_byte, &to_byte, coding, NULL);
4187     if (from_byte == to_byte)
4188       {
4189         coding->produced = len_byte;
4190         coding->produced_char = multibyte ? len : len_byte;
4191         if (!replace)
4192           /* We must record and adjust for this new text now.  */
4193           adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
4194         return 0;
4195       }
4196
4197     head_skip = from_byte - from_byte_orig;
4198     tail_skip = to_byte_orig - to_byte;
4199     total_skip = head_skip + tail_skip;
4200     from += head_skip;
4201     to -= tail_skip;
4202     len -= total_skip; len_byte -= total_skip;
4203   }
4204
4205   /* For converion, we must put the gap before the text in addition to
4206      making the gap larger for efficient decoding.  The required gap
4207      size starts from 2000 which is the magic number used in make_gap.
4208      But, after one batch of conversion, it will be incremented if we
4209      find that it is not enough .  */
4210   require = 2000;
4211
4212   if (GAP_SIZE  < require)
4213     make_gap (require - GAP_SIZE);
4214   move_gap_both (from, from_byte);
4215
4216   if (GPT - BEG < beg_unchanged)
4217     beg_unchanged = GPT - BEG;
4218   if (Z - GPT < end_unchanged)
4219     end_unchanged = Z - GPT;
4220
4221   inserted = inserted_byte = 0;
4222   src = GAP_END_ADDR, dst = GPT_ADDR;
4223
4224   GAP_SIZE += len_byte;
4225   ZV -= len;
4226   Z -= len;
4227   ZV_BYTE -= len_byte;
4228   Z_BYTE -= len_byte;
4229
4230   for (;;)
4231     {
4232       int result;
4233
4234       /* The buffer memory is changed from:
4235          +--------+converted-text+---------+-------original-text------+---+
4236          |<-from->|<--inserted-->|---------|<-----------len---------->|---|
4237                   |<------------------- GAP_SIZE -------------------->|  */
4238       if (encodep)
4239         result = encode_coding (coding, src, dst, len_byte, 0);
4240       else
4241         result = decode_coding (coding, src, dst, len_byte, 0);
4242       /* to:
4243          +--------+-------converted-text--------+--+---original-text--+---+
4244          |<-from->|<--inserted-->|<--produced-->|--|<-(len-consumed)->|---|
4245                   |<------------------- GAP_SIZE -------------------->|  */
4246       if (coding->fake_multibyte)
4247         fake_multibyte = 1;
4248
4249       if (!encodep && !multibyte)
4250         coding->produced_char = coding->produced;
4251       inserted += coding->produced_char;
4252       inserted_byte += coding->produced;
4253       len_byte -= coding->consumed;
4254       src += coding->consumed;
4255       dst += inserted_byte;
4256
4257       if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
4258         {
4259           unsigned char *pend = dst, *p = pend - inserted_byte;
4260
4261           /* Encode LFs back to the original eol format (CR or CRLF).  */
4262           if (coding->eol_type == CODING_EOL_CR)
4263             {
4264               while (p < pend) if (*p++ == '\n') p[-1] = '\r';
4265             }
4266           else
4267             {
4268               int count = 0;
4269
4270               while (p < pend) if (*p++ == '\n') count++;
4271               if (src - dst < count)
4272                 {
4273                   /* We don't have sufficient room for putting LFs
4274                      back to CRLF.  We must record converted and
4275                      not-yet-converted text back to the buffer
4276                      content, enlarge the gap, then record them out of
4277                      the buffer contents again.  */
4278                   int add = len_byte + inserted_byte;
4279
4280                   GAP_SIZE -= add;
4281                   ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
4282                   GPT += inserted_byte; GPT_BYTE += inserted_byte;
4283                   make_gap (count - GAP_SIZE);
4284                   GAP_SIZE += add;
4285                   ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
4286                   GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
4287                   /* Don't forget to update SRC, DST, and PEND.  */
4288                   src = GAP_END_ADDR - len_byte;
4289                   dst = GPT_ADDR + inserted_byte;
4290                   pend = dst;
4291                 }
4292               inserted += count;
4293               inserted_byte += count;
4294               coding->produced += count;
4295               p = dst = pend + count;
4296               while (count)
4297                 {
4298                   *--p = *--pend;
4299                   if (*p == '\n') count--, *--p = '\r';
4300                 }
4301             }
4302
4303           /* Suppress eol-format conversion in the further conversion.  */
4304           coding->eol_type = CODING_EOL_LF;
4305
4306           /* Restore the original symbol.  */
4307           coding->symbol = saved_coding_symbol;
4308
4309           continue;
4310         }
4311       if (len_byte <= 0)
4312         break;
4313       if (result == CODING_FINISH_INSUFFICIENT_SRC)
4314         {
4315           /* The source text ends in invalid codes.  Let's just
4316              make them valid buffer contents, and finish conversion.  */
4317           inserted += len_byte;
4318           inserted_byte += len_byte;
4319           while (len_byte--)
4320             *dst++ = *src++;
4321           fake_multibyte = 1;
4322           break;
4323         }
4324       if (first)
4325         {
4326           /* We have just done the first batch of conversion which was
4327              stoped because of insufficient gap.  Let's reconsider the
4328              required gap size (i.e. SRT - DST) now.
4329
4330              We have converted ORIG bytes (== coding->consumed) into
4331              NEW bytes (coding->produced).  To convert the remaining
4332              LEN bytes, we may need REQUIRE bytes of gap, where:
4333                 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
4334                 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
4335              Here, we are sure that NEW >= ORIG.  */
4336           float ratio = coding->produced - coding->consumed;
4337           ratio /= coding->consumed;
4338           require = len_byte * ratio;
4339           first = 0;
4340         }
4341       if ((src - dst) < (require + 2000))
4342         {
4343           /* See the comment above the previous call of make_gap.  */
4344           int add = len_byte + inserted_byte;
4345
4346           GAP_SIZE -= add;
4347           ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
4348           GPT += inserted_byte; GPT_BYTE += inserted_byte;
4349           make_gap (require + 2000);
4350           GAP_SIZE += add;
4351           ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
4352           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
4353           /* Don't forget to update SRC, DST.  */
4354           src = GAP_END_ADDR - len_byte;
4355           dst = GPT_ADDR + inserted_byte;
4356         }
4357     }
4358   if (src - dst > 0) *dst = 0; /* Put an anchor.  */
4359
4360   if (multibyte
4361       && (fake_multibyte
4362           || !encodep && (to - from) != (to_byte - from_byte)))
4363     inserted = multibyte_chars_in_text (GPT_ADDR, inserted_byte);
4364
4365   /* If we have shrinked the conversion area, adjust it now.  */
4366   if (total_skip > 0)
4367     {
4368       if (tail_skip > 0)
4369         safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
4370       inserted += total_skip; inserted_byte += total_skip;
4371       GAP_SIZE += total_skip;
4372       GPT -= head_skip; GPT_BYTE -= head_skip;
4373       ZV -= total_skip; ZV_BYTE -= total_skip;
4374       Z -= total_skip; Z_BYTE -= total_skip;
4375       from -= head_skip; from_byte -= head_skip;
4376       to += tail_skip; to_byte += tail_skip;
4377     }
4378
4379   adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
4380
4381   if (! encodep && ! NILP (coding->post_read_conversion))
4382     {
4383       Lisp_Object val;
4384       int orig_inserted = inserted, pos = PT;
4385
4386       if (from != pos)
4387         temp_set_point_both (current_buffer, from, from_byte);
4388       val = call1 (coding->post_read_conversion, make_number (inserted));
4389       if (! NILP (val))
4390         {
4391           CHECK_NUMBER (val, 0);
4392           inserted = XFASTINT (val);
4393         }
4394       if (pos >= from + orig_inserted)
4395         temp_set_point (current_buffer, pos + (inserted - orig_inserted));
4396     }
4397
4398   signal_after_change (from, to - from, inserted);
4399
4400   {
4401     coding->consumed = to_byte - from_byte;
4402     coding->consumed_char = to - from;
4403     coding->produced = inserted_byte;
4404     coding->produced_char = inserted;
4405   }
4406
4407   return 0;
4408 }
4409
4410 Lisp_Object
4411 code_convert_string (str, coding, encodep, nocopy)
4412      Lisp_Object str;
4413      struct coding_system *coding;
4414      int encodep, nocopy;
4415 {
4416   int len;
4417   char *buf;
4418   int from = 0, to = XSTRING (str)->size;
4419   int to_byte = STRING_BYTES (XSTRING (str));
4420   struct gcpro gcpro1;
4421   Lisp_Object saved_coding_symbol;
4422   int result;
4423
4424   saved_coding_symbol = Qnil;
4425   if (encodep && !NILP (coding->pre_write_conversion)
4426       || !encodep && !NILP (coding->post_read_conversion))
4427     {
4428       /* Since we have to call Lisp functions which assume target text
4429          is in a buffer, after setting a temporary buffer, call
4430          code_convert_region.  */
4431       int count = specpdl_ptr - specpdl;
4432       struct buffer *prev = current_buffer;
4433
4434       record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
4435       temp_output_buffer_setup (" *code-converting-work*");
4436       set_buffer_internal (XBUFFER (Vstandard_output));
4437       if (encodep)
4438         insert_from_string (str, 0, 0, to, to_byte, 0);
4439       else
4440         {
4441           /* We must insert the contents of STR as is without
4442              unibyte<->multibyte conversion.  */
4443           current_buffer->enable_multibyte_characters = Qnil;
4444           insert_from_string (str, 0, 0, to_byte, to_byte, 0);
4445           current_buffer->enable_multibyte_characters = Qt;
4446         }
4447       code_convert_region (BEGV, BEGV_BYTE, ZV, ZV_BYTE, coding, encodep, 1);
4448       if (encodep)
4449         /* We must return the buffer contents as unibyte string.  */
4450         current_buffer->enable_multibyte_characters = Qnil;
4451       str = make_buffer_string (BEGV, ZV, 0);
4452       set_buffer_internal (prev);
4453       return unbind_to (count, str);
4454     }
4455
4456   if (! encodep && CODING_REQUIRE_DETECTION (coding))
4457     {
4458       /* See the comments in code_convert_region.  */
4459       if (coding->type == coding_type_undecided)
4460         {
4461           detect_coding (coding, XSTRING (str)->data, to_byte);
4462           if (coding->type == coding_type_undecided)
4463             coding->type = coding_type_emacs_mule;
4464         }
4465       if (coding->eol_type == CODING_EOL_UNDECIDED)
4466         {
4467           saved_coding_symbol = coding->symbol;
4468           detect_eol (coding, XSTRING (str)->data, to_byte);
4469           if (coding->eol_type == CODING_EOL_UNDECIDED)
4470             coding->eol_type = CODING_EOL_LF;
4471           /* We had better recover the original eol format if we
4472              encounter an inconsitent eol format while decoding.  */
4473           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4474         }
4475     }
4476
4477   if (encodep
4478       ? ! CODING_REQUIRE_ENCODING (coding)
4479       : ! CODING_REQUIRE_DECODING (coding))
4480     from = to_byte;
4481   else
4482     {
4483       /* Try to skip the heading and tailing ASCIIs.  */
4484       if (encodep)
4485         shrink_encoding_region (&from, &to_byte, coding, XSTRING (str)->data);
4486       else
4487         shrink_decoding_region (&from, &to_byte, coding, XSTRING (str)->data);
4488     }
4489   if (from == to_byte)
4490     return (nocopy ? str : Fcopy_sequence (str));
4491
4492   if (encodep)
4493     len = encoding_buffer_size (coding, to_byte - from);
4494   else
4495     len = decoding_buffer_size (coding, to_byte - from);
4496   len += from + STRING_BYTES (XSTRING (str)) - to_byte;
4497   GCPRO1 (str);
4498   buf = get_conversion_buffer (len);
4499   UNGCPRO;
4500
4501   if (from > 0)
4502     bcopy (XSTRING (str)->data, buf, from);
4503   result = (encodep
4504             ? encode_coding (coding, XSTRING (str)->data + from,
4505                              buf + from, to_byte - from, len)
4506             : decode_coding (coding, XSTRING (str)->data + from,
4507                              buf + from, to_byte - from, len));
4508   if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
4509     {
4510       /* We simple try to decode the whole string again but without
4511          eol-conversion this time.  */
4512       coding->eol_type = CODING_EOL_LF;
4513       coding->symbol = saved_coding_symbol;
4514       return code_convert_string (str, coding, encodep, nocopy);
4515     }
4516
4517   bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced,
4518          STRING_BYTES (XSTRING (str)) - to_byte);
4519
4520   len = from + STRING_BYTES (XSTRING (str)) - to_byte;
4521   if (encodep)
4522     str = make_unibyte_string (buf, len + coding->produced);
4523   else
4524     {
4525       int chars= (coding->fake_multibyte
4526                   ? multibyte_chars_in_text (buf + from, coding->produced)
4527                   : coding->produced_char);
4528       str = make_multibyte_string (buf, len + chars, len + coding->produced);
4529     }
4530
4531   return str;
4532 }
4533
4534 \f
4535 #ifdef emacs
4536 /*** 8. Emacs Lisp library functions ***/
4537
4538 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
4539   "Return t if OBJECT is nil or a coding-system.\n\
4540 See the documentation of `make-coding-system' for information\n\
4541 about coding-system objects.")
4542   (obj)
4543      Lisp_Object obj;
4544 {
4545   if (NILP (obj))
4546     return Qt;
4547   if (!SYMBOLP (obj))
4548     return Qnil;
4549   /* Get coding-spec vector for OBJ.  */
4550   obj = Fget (obj, Qcoding_system);
4551   return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
4552           ? Qt : Qnil);
4553 }
4554
4555 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
4556        Sread_non_nil_coding_system, 1, 1, 0,
4557   "Read a coding system from the minibuffer, prompting with string PROMPT.")
4558   (prompt)
4559      Lisp_Object prompt;
4560 {
4561   Lisp_Object val;
4562   do
4563     {
4564       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
4565                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
4566     }
4567   while (XSTRING (val)->size == 0);
4568   return (Fintern (val, Qnil));
4569 }
4570
4571 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
4572   "Read a coding system from the minibuffer, prompting with string PROMPT.\n\
4573 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.")
4574   (prompt, default_coding_system)
4575      Lisp_Object prompt, default_coding_system;
4576 {
4577   Lisp_Object val;
4578   if (SYMBOLP (default_coding_system))
4579     XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
4580   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
4581                           Qt, Qnil, Qcoding_system_history,
4582                           default_coding_system, Qnil);
4583   return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
4584 }
4585
4586 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
4587        1, 1, 0,
4588   "Check validity of CODING-SYSTEM.\n\
4589 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.\n\
4590 It is valid if it is a symbol with a non-nil `coding-system' property.\n\
4591 The value of property should be a vector of length 5.")
4592   (coding_system)
4593      Lisp_Object coding_system;
4594 {
4595   CHECK_SYMBOL (coding_system, 0);
4596   if (!NILP (Fcoding_system_p (coding_system)))
4597     return coding_system;
4598   while (1)
4599     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
4600 }
4601 \f
4602 Lisp_Object
4603 detect_coding_system (src, src_bytes, highest)
4604      unsigned char *src;
4605      int src_bytes, highest;
4606 {
4607   int coding_mask, eol_type;
4608   Lisp_Object val, tmp;
4609   int dummy;
4610
4611   coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy);
4612   eol_type  = detect_eol_type (src, src_bytes, &dummy);
4613   if (eol_type == CODING_EOL_INCONSISTENT)
4614     eol_type = CODING_EOL_UNDECIDED;
4615
4616   if (!coding_mask)
4617     {
4618       val = Qundecided;
4619       if (eol_type != CODING_EOL_UNDECIDED)
4620         {
4621           Lisp_Object val2;
4622           val2 = Fget (Qundecided, Qeol_type);
4623           if (VECTORP (val2))
4624             val = XVECTOR (val2)->contents[eol_type];
4625         }
4626       return (highest ? val : Fcons (val, Qnil));
4627     }
4628
4629   /* At first, gather possible coding systems in VAL.  */
4630   val = Qnil;
4631   for (tmp = Vcoding_category_list; !NILP (tmp); tmp = XCONS (tmp)->cdr)
4632     {
4633       int idx
4634         = XFASTINT (Fget (XCONS (tmp)->car, Qcoding_category_index));
4635       if (coding_mask & (1 << idx))
4636         {
4637           val = Fcons (Fsymbol_value (XCONS (tmp)->car), val);
4638           if (highest)
4639             break;
4640         }
4641     }
4642   if (!highest)
4643     val = Fnreverse (val);
4644
4645   /* Then, replace the elements with subsidiary coding systems.  */
4646   for (tmp = val; !NILP (tmp); tmp = XCONS (tmp)->cdr)
4647     {
4648       if (eol_type != CODING_EOL_UNDECIDED
4649           && eol_type != CODING_EOL_INCONSISTENT)
4650         {
4651           Lisp_Object eol;
4652           eol = Fget (XCONS (tmp)->car, Qeol_type);
4653           if (VECTORP (eol))
4654             XCONS (tmp)->car = XVECTOR (eol)->contents[eol_type];
4655         }
4656     }
4657   return (highest ? XCONS (val)->car : val);
4658 }
4659
4660 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
4661        2, 3, 0,
4662   "Detect coding system of the text in the region between START and END.\n\
4663 Return a list of possible coding systems ordered by priority.\n\
4664 \n\
4665 If only ASCII characters are found, it returns a list of single element\n\
4666 `undecided' or its subsidiary coding system according to a detected\n\
4667 end-of-line format.\n\
4668 \n\
4669 If optional argument HIGHEST is non-nil, return the coding system of\n\
4670 highest priority.")
4671   (start, end, highest)
4672      Lisp_Object start, end, highest;
4673 {
4674   int from, to;
4675   int from_byte, to_byte;
4676
4677   CHECK_NUMBER_COERCE_MARKER (start, 0);
4678   CHECK_NUMBER_COERCE_MARKER (end, 1);
4679
4680   validate_region (&start, &end);
4681   from = XINT (start), to = XINT (end);
4682   from_byte = CHAR_TO_BYTE (from);
4683   to_byte = CHAR_TO_BYTE (to);
4684
4685   if (from < GPT && to >= GPT)
4686     move_gap_both (to, to_byte);
4687
4688   return detect_coding_system (BYTE_POS_ADDR (from_byte),
4689                                to_byte - from_byte,
4690                                !NILP (highest));
4691 }
4692
4693 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
4694        1, 2, 0,
4695   "Detect coding system of the text in STRING.\n\
4696 Return a list of possible coding systems ordered by priority.\n\
4697 \n\
4698 If only ASCII characters are found, it returns a list of single element\n\
4699 `undecided' or its subsidiary coding system according to a detected\n\
4700 end-of-line format.\n\
4701 \n\
4702 If optional argument HIGHEST is non-nil, return the coding system of\n\
4703 highest priority.")
4704   (string, highest)
4705      Lisp_Object string, highest;
4706 {
4707   CHECK_STRING (string, 0);
4708
4709   return detect_coding_system (XSTRING (string)->data,
4710                                STRING_BYTES (XSTRING (string)),
4711                                !NILP (highest));
4712 }
4713
4714 Lisp_Object
4715 code_convert_region1 (start, end, coding_system, encodep)
4716      Lisp_Object start, end, coding_system;
4717      int encodep;
4718 {
4719   struct coding_system coding;
4720   int from, to, len;
4721
4722   CHECK_NUMBER_COERCE_MARKER (start, 0);
4723   CHECK_NUMBER_COERCE_MARKER (end, 1);
4724   CHECK_SYMBOL (coding_system, 2);
4725
4726   validate_region (&start, &end);
4727   from = XFASTINT (start);
4728   to = XFASTINT (end);
4729
4730   if (NILP (coding_system))
4731     return make_number (to - from);
4732
4733   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
4734     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
4735
4736   coding.mode |= CODING_MODE_LAST_BLOCK;
4737   code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
4738                        &coding, encodep, 1);
4739   Vlast_coding_system_used = coding.symbol;
4740   return make_number (coding.produced_char);
4741 }
4742
4743 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
4744        3, 3, "r\nzCoding system: ",
4745   "Decode the current region by specified coding system.\n\
4746 When called from a program, takes three arguments:\n\
4747 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
4748 This function sets `last-coding-system-used' to the precise coding system\n\
4749 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
4750 not fully specified.)\n\
4751 It returns the length of the decoded text.")
4752   (start, end, coding_system)
4753      Lisp_Object start, end, coding_system;
4754 {
4755   return code_convert_region1 (start, end, coding_system, 0);
4756 }
4757
4758 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
4759        3, 3, "r\nzCoding system: ",
4760   "Encode the current region by specified coding system.\n\
4761 When called from a program, takes three arguments:\n\
4762 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
4763 This function sets `last-coding-system-used' to the precise coding system\n\
4764 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
4765 not fully specified.)\n\
4766 It returns the length of the encoded text.")
4767   (start, end, coding_system)
4768      Lisp_Object start, end, coding_system;
4769 {
4770   return code_convert_region1 (start, end, coding_system, 1);
4771 }
4772
4773 Lisp_Object
4774 code_convert_string1 (string, coding_system, nocopy, encodep)
4775      Lisp_Object string, coding_system, nocopy;
4776      int encodep;
4777 {
4778   struct coding_system coding;
4779
4780   CHECK_STRING (string, 0);
4781   CHECK_SYMBOL (coding_system, 1);
4782
4783   if (NILP (coding_system))
4784     return (NILP (nocopy) ? Fcopy_sequence (string) : string);
4785
4786   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
4787     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
4788
4789   coding.mode |= CODING_MODE_LAST_BLOCK;
4790   Vlast_coding_system_used = coding.symbol;
4791   return code_convert_string (string, &coding, encodep, !NILP (nocopy));
4792 }
4793
4794 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
4795        2, 3, 0,
4796   "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
4797 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
4798 if the decoding operation is trivial.\n\
4799 This function sets `last-coding-system-used' to the precise coding system\n\
4800 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
4801 not fully specified.)")
4802   (string, coding_system, nocopy)
4803      Lisp_Object string, coding_system, nocopy;
4804 {
4805   return code_convert_string1 (string, coding_system, nocopy, 0);
4806 }
4807
4808 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
4809        2, 3, 0,
4810   "Encode STRING to CODING-SYSTEM, and return the result.\n\
4811 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
4812 if the encoding operation is trivial.\n\
4813 This function sets `last-coding-system-used' to the precise coding system\n\
4814 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
4815 not fully specified.)")
4816   (string, coding_system, nocopy)
4817      Lisp_Object string, coding_system, nocopy;
4818 {
4819   return code_convert_string1 (string, coding_system, nocopy, 1);
4820 }
4821
4822 /* Encode or decode STRING according to CODING_SYSTEM.
4823    Do not set Vlast_coding_system_used.  */
4824
4825 Lisp_Object
4826 code_convert_string_norecord (string, coding_system, encodep)
4827      Lisp_Object string, coding_system;
4828      int encodep;
4829 {
4830   struct coding_system coding;
4831
4832   CHECK_STRING (string, 0);
4833   CHECK_SYMBOL (coding_system, 1);
4834
4835   if (NILP (coding_system))
4836     return string;
4837
4838   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
4839     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
4840
4841   coding.mode |= CODING_MODE_LAST_BLOCK;
4842   return code_convert_string (string, &coding, encodep, Qt);
4843 }
4844 \f
4845 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
4846   "Decode a JISX0208 character of shift-jis encoding.\n\
4847 CODE is the character code in SJIS.\n\
4848 Return the corresponding character.")
4849   (code)
4850      Lisp_Object code;
4851 {
4852   unsigned char c1, c2, s1, s2;
4853   Lisp_Object val;
4854
4855   CHECK_NUMBER (code, 0);
4856   s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
4857   DECODE_SJIS (s1, s2, c1, c2);
4858   XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset_jisx0208, c1, c2));
4859   return val;
4860 }
4861
4862 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
4863   "Encode a JISX0208 character CHAR to SJIS coding system.\n\
4864 Return the corresponding character code in SJIS.")
4865   (ch)
4866      Lisp_Object ch;
4867 {
4868   int charset, c1, c2, s1, s2;
4869   Lisp_Object val;
4870
4871   CHECK_NUMBER (ch, 0);
4872   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
4873   if (charset == charset_jisx0208)
4874     {
4875       ENCODE_SJIS (c1, c2, s1, s2);
4876       XSETFASTINT (val, (s1 << 8) | s2);
4877     }
4878   else
4879     XSETFASTINT (val, 0);
4880   return val;
4881 }
4882
4883 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
4884   "Decode a Big5 character CODE of BIG5 coding system.\n\
4885 CODE is the character code in BIG5.\n\
4886 Return the corresponding character.")
4887   (code)
4888      Lisp_Object code;
4889 {
4890   int charset;
4891   unsigned char b1, b2, c1, c2;
4892   Lisp_Object val;
4893
4894   CHECK_NUMBER (code, 0);
4895   b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
4896   DECODE_BIG5 (b1, b2, charset, c1, c2);
4897   XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset, c1, c2));
4898   return val;
4899 }
4900
4901 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
4902   "Encode the Big5 character CHAR to BIG5 coding system.\n\
4903 Return the corresponding character code in Big5.")
4904   (ch)
4905      Lisp_Object ch;
4906 {
4907   int charset, c1, c2, b1, b2;
4908   Lisp_Object val;
4909
4910   CHECK_NUMBER (ch, 0);
4911   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
4912   if (charset == charset_big5_1 || charset == charset_big5_2)
4913     {
4914       ENCODE_BIG5 (charset, c1, c2, b1, b2);
4915       XSETFASTINT (val, (b1 << 8) | b2);
4916     }
4917   else
4918     XSETFASTINT (val, 0);
4919   return val;
4920 }
4921 \f
4922 DEFUN ("set-terminal-coding-system-internal",
4923        Fset_terminal_coding_system_internal,
4924        Sset_terminal_coding_system_internal, 1, 1, 0, "")
4925   (coding_system)
4926      Lisp_Object coding_system;
4927 {
4928   CHECK_SYMBOL (coding_system, 0);
4929   setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
4930   /* We had better not send unsafe characters to terminal.  */
4931   terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
4932
4933   return Qnil;
4934 }
4935
4936 DEFUN ("set-safe-terminal-coding-system-internal",
4937        Fset_safe_terminal_coding_system_internal,
4938        Sset_safe_terminal_coding_system_internal, 1, 1, 0, "")
4939   (coding_system)
4940      Lisp_Object coding_system;
4941 {
4942   CHECK_SYMBOL (coding_system, 0);
4943   setup_coding_system (Fcheck_coding_system (coding_system),
4944                        &safe_terminal_coding);
4945   return Qnil;
4946 }
4947
4948 DEFUN ("terminal-coding-system",
4949        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
4950   "Return coding system specified for terminal output.")
4951   ()
4952 {
4953   return terminal_coding.symbol;
4954 }
4955
4956 DEFUN ("set-keyboard-coding-system-internal",
4957        Fset_keyboard_coding_system_internal,
4958        Sset_keyboard_coding_system_internal, 1, 1, 0, "")
4959   (coding_system)
4960      Lisp_Object coding_system;
4961 {
4962   CHECK_SYMBOL (coding_system, 0);
4963   setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
4964   return Qnil;
4965 }
4966
4967 DEFUN ("keyboard-coding-system",
4968        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
4969   "Return coding system specified for decoding keyboard input.")
4970   ()
4971 {
4972   return keyboard_coding.symbol;
4973 }
4974
4975 \f
4976 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
4977        Sfind_operation_coding_system,  1, MANY, 0,
4978   "Choose a coding system for an operation based on the target name.\n\
4979 The value names a pair of coding systems: (DECODING-SYSTEM ENCODING-SYSTEM).\n\
4980 DECODING-SYSTEM is the coding system to use for decoding\n\
4981 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system\n\
4982 for encoding (in case OPERATION does encoding).\n\
4983 \n\
4984 The first argument OPERATION specifies an I/O primitive:\n\
4985   For file I/O, `insert-file-contents' or `write-region'.\n\
4986   For process I/O, `call-process', `call-process-region', or `start-process'.\n\
4987   For network I/O, `open-network-stream'.\n\
4988 \n\
4989 The remaining arguments should be the same arguments that were passed\n\
4990 to the primitive.  Depending on which primitive, one of those arguments\n\
4991 is selected as the TARGET.  For example, if OPERATION does file I/O,\n\
4992 whichever argument specifies the file name is TARGET.\n\
4993 \n\
4994 TARGET has a meaning which depends on OPERATION:\n\
4995   For file I/O, TARGET is a file name.\n\
4996   For process I/O, TARGET is a process name.\n\
4997   For network I/O, TARGET is a service name or a port number\n\
4998 \n\
4999 This function looks up what specified for TARGET in,\n\
5000 `file-coding-system-alist', `process-coding-system-alist',\n\
5001 or `network-coding-system-alist' depending on OPERATION.\n\
5002 They may specify a coding system, a cons of coding systems,\n\
5003 or a function symbol to call.\n\
5004 In the last case, we call the function with one argument,\n\
5005 which is a list of all the arguments given to this function.")
5006   (nargs, args)
5007      int nargs;
5008      Lisp_Object *args;
5009 {
5010   Lisp_Object operation, target_idx, target, val;
5011   register Lisp_Object chain;
5012
5013   if (nargs < 2)
5014     error ("Too few arguments");
5015   operation = args[0];
5016   if (!SYMBOLP (operation)
5017       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
5018     error ("Invalid first arguement");
5019   if (nargs < 1 + XINT (target_idx))
5020     error ("Too few arguments for operation: %s",
5021            XSYMBOL (operation)->name->data);
5022   target = args[XINT (target_idx) + 1];
5023   if (!(STRINGP (target)
5024         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
5025     error ("Invalid %dth argument", XINT (target_idx) + 1);
5026
5027   chain = ((EQ (operation, Qinsert_file_contents)
5028             || EQ (operation, Qwrite_region))
5029            ? Vfile_coding_system_alist
5030            : (EQ (operation, Qopen_network_stream)
5031               ? Vnetwork_coding_system_alist
5032               : Vprocess_coding_system_alist));
5033   if (NILP (chain))
5034     return Qnil;
5035
5036   for (; CONSP (chain); chain = XCONS (chain)->cdr)
5037     {
5038       Lisp_Object elt;
5039       elt = XCONS (chain)->car;
5040
5041       if (CONSP (elt)
5042           && ((STRINGP (target)
5043                && STRINGP (XCONS (elt)->car)
5044                && fast_string_match (XCONS (elt)->car, target) >= 0)
5045               || (INTEGERP (target) && EQ (target, XCONS (elt)->car))))
5046         {
5047           val = XCONS (elt)->cdr;
5048           /* Here, if VAL is both a valid coding system and a valid
5049              function symbol, we return VAL as a coding system.  */
5050           if (CONSP (val))
5051             return val;
5052           if (! SYMBOLP (val))
5053             return Qnil;
5054           if (! NILP (Fcoding_system_p (val)))
5055             return Fcons (val, val);
5056           if (! NILP (Ffboundp (val)))
5057             {
5058               val = call1 (val, Flist (nargs, args));
5059               if (CONSP (val))
5060                 return val;
5061               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
5062                 return Fcons (val, val);
5063             }
5064           return Qnil;
5065         }
5066     }
5067   return Qnil;
5068 }
5069
5070 DEFUN ("update-coding-systems-internal",  Fupdate_coding_systems_internal,
5071        Supdate_coding_systems_internal, 0, 0, 0,
5072   "Update internal database for ISO2022 and CCL based coding systems.\n\
5073 When values of the following coding categories are changed, you must\n\
5074 call this function:\n\
5075   coding-category-iso-7, coding-category-iso-7-tight,\n\
5076   coding-category-iso-8-1, coding-category-iso-8-2,\n\
5077   coding-category-iso-7-else, coding-category-iso-8-else,\n\
5078   coding-category-ccl")
5079   ()
5080 {
5081   int i;
5082
5083   for (i = CODING_CATEGORY_IDX_ISO_7; i <= CODING_CATEGORY_IDX_CCL; i++)
5084     {
5085       Lisp_Object val;
5086
5087       val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[i])->value;
5088       if (!NILP (val))
5089         {
5090           if (! coding_system_table[i])
5091             coding_system_table[i] = ((struct coding_system *)
5092                                       xmalloc (sizeof (struct coding_system)));
5093           setup_coding_system (val, coding_system_table[i]);
5094         }
5095       else if (coding_system_table[i])
5096         {
5097           xfree (coding_system_table[i]);
5098           coding_system_table[i] = NULL;
5099         }
5100     }
5101
5102   return Qnil;
5103 }
5104
5105 DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal,
5106        Sset_coding_priority_internal, 0, 0, 0,
5107   "Update internal database for the current value of `coding-category-list'.\n\
5108 This function is internal use only.")
5109   ()
5110 {
5111   int i = 0, idx;
5112   Lisp_Object val;
5113
5114   val = Vcoding_category_list;
5115
5116   while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
5117     {
5118       if (! SYMBOLP (XCONS (val)->car))
5119         break;
5120       idx = XFASTINT (Fget (XCONS (val)->car, Qcoding_category_index));
5121       if (idx >= CODING_CATEGORY_IDX_MAX)
5122         break;
5123       coding_priorities[i++] = (1 << idx);
5124       val = XCONS (val)->cdr;
5125     }
5126   /* If coding-category-list is valid and contains all coding
5127      categories, `i' should be CODING_CATEGORY_IDX_MAX now.  If not,
5128      the following code saves Emacs from craching.  */
5129   while (i < CODING_CATEGORY_IDX_MAX)
5130     coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
5131
5132   return Qnil;
5133 }
5134
5135 #endif /* emacs */
5136
5137 \f
5138 /*** 9. Post-amble ***/
5139
5140 void
5141 init_coding ()
5142 {
5143   conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE);
5144 }
5145
5146 void
5147 init_coding_once ()
5148 {
5149   int i;
5150
5151   /* Emacs' internal format specific initialize routine.  */
5152   for (i = 0; i <= 0x20; i++)
5153     emacs_code_class[i] = EMACS_control_code;
5154   emacs_code_class[0x0A] = EMACS_linefeed_code;
5155   emacs_code_class[0x0D] = EMACS_carriage_return_code;
5156   for (i = 0x21 ; i < 0x7F; i++)
5157     emacs_code_class[i] = EMACS_ascii_code;
5158   emacs_code_class[0x7F] = EMACS_control_code;
5159   emacs_code_class[0x80] = EMACS_leading_code_composition;
5160   for (i = 0x81; i < 0xFF; i++)
5161     emacs_code_class[i] = EMACS_invalid_code;
5162   emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
5163   emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
5164   emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
5165   emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
5166
5167   /* ISO2022 specific initialize routine.  */
5168   for (i = 0; i < 0x20; i++)
5169     iso_code_class[i] = ISO_control_code;
5170   for (i = 0x21; i < 0x7F; i++)
5171     iso_code_class[i] = ISO_graphic_plane_0;
5172   for (i = 0x80; i < 0xA0; i++)
5173     iso_code_class[i] = ISO_control_code;
5174   for (i = 0xA1; i < 0xFF; i++)
5175     iso_code_class[i] = ISO_graphic_plane_1;
5176   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
5177   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
5178   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
5179   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
5180   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
5181   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
5182   iso_code_class[ISO_CODE_ESC] = ISO_escape;
5183   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
5184   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
5185   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
5186
5187   conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE;
5188
5189   setup_coding_system (Qnil, &keyboard_coding);
5190   setup_coding_system (Qnil, &terminal_coding);
5191   setup_coding_system (Qnil, &safe_terminal_coding);
5192   setup_coding_system (Qnil, &default_buffer_file_coding);
5193
5194   bzero (coding_system_table, sizeof coding_system_table);
5195
5196   bzero (ascii_skip_code, sizeof ascii_skip_code);
5197   for (i = 0; i < 128; i++)
5198     ascii_skip_code[i] = 1;
5199
5200 #if defined (MSDOS) || defined (WINDOWSNT)
5201   system_eol_type = CODING_EOL_CRLF;
5202 #else
5203   system_eol_type = CODING_EOL_LF;
5204 #endif
5205 }
5206
5207 #ifdef emacs
5208
5209 void
5210 syms_of_coding ()
5211 {
5212   Qtarget_idx = intern ("target-idx");
5213   staticpro (&Qtarget_idx);
5214
5215   Qcoding_system_history = intern ("coding-system-history");
5216   staticpro (&Qcoding_system_history);
5217   Fset (Qcoding_system_history, Qnil);
5218
5219   /* Target FILENAME is the first argument.  */
5220   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
5221   /* Target FILENAME is the third argument.  */
5222   Fput (Qwrite_region, Qtarget_idx, make_number (2));
5223
5224   Qcall_process = intern ("call-process");
5225   staticpro (&Qcall_process);
5226   /* Target PROGRAM is the first argument.  */
5227   Fput (Qcall_process, Qtarget_idx, make_number (0));
5228
5229   Qcall_process_region = intern ("call-process-region");
5230   staticpro (&Qcall_process_region);
5231   /* Target PROGRAM is the third argument.  */
5232   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
5233
5234   Qstart_process = intern ("start-process");
5235   staticpro (&Qstart_process);
5236   /* Target PROGRAM is the third argument.  */
5237   Fput (Qstart_process, Qtarget_idx, make_number (2));
5238
5239   Qopen_network_stream = intern ("open-network-stream");
5240   staticpro (&Qopen_network_stream);
5241   /* Target SERVICE is the fourth argument.  */
5242   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
5243
5244   Qcoding_system = intern ("coding-system");
5245   staticpro (&Qcoding_system);
5246
5247   Qeol_type = intern ("eol-type");
5248   staticpro (&Qeol_type);
5249
5250   Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
5251   staticpro (&Qbuffer_file_coding_system);
5252
5253   Qpost_read_conversion = intern ("post-read-conversion");
5254   staticpro (&Qpost_read_conversion);
5255
5256   Qpre_write_conversion = intern ("pre-write-conversion");
5257   staticpro (&Qpre_write_conversion);
5258
5259   Qno_conversion = intern ("no-conversion");
5260   staticpro (&Qno_conversion);
5261
5262   Qundecided = intern ("undecided");
5263   staticpro (&Qundecided);
5264
5265   Qcoding_system_p = intern ("coding-system-p");
5266   staticpro (&Qcoding_system_p);
5267
5268   Qcoding_system_error = intern ("coding-system-error");
5269   staticpro (&Qcoding_system_error);
5270
5271   Fput (Qcoding_system_error, Qerror_conditions,
5272         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
5273   Fput (Qcoding_system_error, Qerror_message,
5274         build_string ("Invalid coding system"));
5275
5276   Qcoding_category = intern ("coding-category");
5277   staticpro (&Qcoding_category);
5278   Qcoding_category_index = intern ("coding-category-index");
5279   staticpro (&Qcoding_category_index);
5280
5281   Vcoding_category_table
5282     = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
5283   staticpro (&Vcoding_category_table);
5284   {
5285     int i;
5286     for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
5287       {
5288         XVECTOR (Vcoding_category_table)->contents[i]
5289           = intern (coding_category_name[i]);
5290         Fput (XVECTOR (Vcoding_category_table)->contents[i],
5291               Qcoding_category_index, make_number (i));
5292       }
5293   }
5294
5295   Qtranslation_table = intern ("translation-table");
5296   staticpro (&Qtranslation_table);
5297   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
5298
5299   Qtranslation_table_id = intern ("translation-table-id");
5300   staticpro (&Qtranslation_table_id);
5301
5302   Qtranslation_table_for_decode = intern ("translation-table-for-decode");
5303   staticpro (&Qtranslation_table_for_decode);
5304
5305   Qtranslation_table_for_encode = intern ("translation-table-for-encode");
5306   staticpro (&Qtranslation_table_for_encode);
5307
5308   Qsafe_charsets = intern ("safe-charsets");
5309   staticpro (&Qsafe_charsets);
5310
5311   Qvalid_codes = intern ("valid-codes");
5312   staticpro (&Qvalid_codes);
5313
5314   Qemacs_mule = intern ("emacs-mule");
5315   staticpro (&Qemacs_mule);
5316
5317   Qraw_text = intern ("raw-text");
5318   staticpro (&Qraw_text);
5319
5320   defsubr (&Scoding_system_p);
5321   defsubr (&Sread_coding_system);
5322   defsubr (&Sread_non_nil_coding_system);
5323   defsubr (&Scheck_coding_system);
5324   defsubr (&Sdetect_coding_region);
5325   defsubr (&Sdetect_coding_string);
5326   defsubr (&Sdecode_coding_region);
5327   defsubr (&Sencode_coding_region);
5328   defsubr (&Sdecode_coding_string);
5329   defsubr (&Sencode_coding_string);
5330   defsubr (&Sdecode_sjis_char);
5331   defsubr (&Sencode_sjis_char);
5332   defsubr (&Sdecode_big5_char);
5333   defsubr (&Sencode_big5_char);
5334   defsubr (&Sset_terminal_coding_system_internal);
5335   defsubr (&Sset_safe_terminal_coding_system_internal);
5336   defsubr (&Sterminal_coding_system);
5337   defsubr (&Sset_keyboard_coding_system_internal);
5338   defsubr (&Skeyboard_coding_system);
5339   defsubr (&Sfind_operation_coding_system);
5340   defsubr (&Supdate_coding_systems_internal);
5341   defsubr (&Sset_coding_priority_internal);
5342
5343   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
5344     "List of coding systems.\n\
5345 \n\
5346 Do not alter the value of this variable manually.  This variable should be\n\
5347 updated by the functions `make-coding-system' and\n\
5348 `define-coding-system-alias'.");
5349   Vcoding_system_list = Qnil;
5350
5351   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
5352     "Alist of coding system names.\n\
5353 Each element is one element list of coding system name.\n\
5354 This variable is given to `completing-read' as TABLE argument.\n\
5355 \n\
5356 Do not alter the value of this variable manually.  This variable should be\n\
5357 updated by the functions `make-coding-system' and\n\
5358 `define-coding-system-alias'.");
5359   Vcoding_system_alist = Qnil;
5360
5361   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
5362     "List of coding-categories (symbols) ordered by priority.");
5363   {
5364     int i;
5365
5366     Vcoding_category_list = Qnil;
5367     for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
5368       Vcoding_category_list
5369         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
5370                  Vcoding_category_list);
5371   }
5372
5373   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
5374     "Specify the coding system for read operations.\n\
5375 It is useful to bind this variable with `let', but do not set it globally.\n\
5376 If the value is a coding system, it is used for decoding on read operation.\n\
5377 If not, an appropriate element is used from one of the coding system alists:\n\
5378 There are three such tables, `file-coding-system-alist',\n\
5379 `process-coding-system-alist', and `network-coding-system-alist'.");
5380   Vcoding_system_for_read = Qnil;
5381
5382   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
5383     "Specify the coding system for write operations.\n\
5384 It is useful to bind this variable with `let', but do not set it globally.\n\
5385 If the value is a coding system, it is used for encoding on write operation.\n\
5386 If not, an appropriate element is used from one of the coding system alists:\n\
5387 There are three such tables, `file-coding-system-alist',\n\
5388 `process-coding-system-alist', and `network-coding-system-alist'.");
5389   Vcoding_system_for_write = Qnil;
5390
5391   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
5392     "Coding system used in the latest file or process I/O.");
5393   Vlast_coding_system_used = Qnil;
5394
5395   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
5396     "*Non-nil inhibit code conversion of end-of-line format in any cases.");
5397   inhibit_eol_conversion = 0;
5398
5399   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
5400     "Non-nil means process buffer inherits coding system of process output.\n\
5401 Bind it to t if the process output is to be treated as if it were a file\n\
5402 read from some filesystem.");
5403   inherit_process_coding_system = 0;
5404
5405   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
5406     "Alist to decide a coding system to use for a file I/O operation.\n\
5407 The format is ((PATTERN . VAL) ...),\n\
5408 where PATTERN is a regular expression matching a file name,\n\
5409 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5410 If VAL is a coding system, it is used for both decoding and encoding\n\
5411 the file contents.\n\
5412 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5413 and the cdr part is used for encoding.\n\
5414 If VAL is a function symbol, the function must return a coding system\n\
5415 or a cons of coding systems which are used as above.\n\
5416 \n\
5417 See also the function `find-operation-coding-system'\n\
5418 and the variable `auto-coding-alist'.");
5419   Vfile_coding_system_alist = Qnil;
5420
5421   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
5422     "Alist to decide a coding system to use for a process I/O operation.\n\
5423 The format is ((PATTERN . VAL) ...),\n\
5424 where PATTERN is a regular expression matching a program name,\n\
5425 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5426 If VAL is a coding system, it is used for both decoding what received\n\
5427 from the program and encoding what sent to the program.\n\
5428 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5429 and the cdr part is used for encoding.\n\
5430 If VAL is a function symbol, the function must return a coding system\n\
5431 or a cons of coding systems which are used as above.\n\
5432 \n\
5433 See also the function `find-operation-coding-system'.");
5434   Vprocess_coding_system_alist = Qnil;
5435
5436   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
5437     "Alist to decide a coding system to use for a network I/O operation.\n\
5438 The format is ((PATTERN . VAL) ...),\n\
5439 where PATTERN is a regular expression matching a network service name\n\
5440 or is a port number to connect to,\n\
5441 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5442 If VAL is a coding system, it is used for both decoding what received\n\
5443 from the network stream and encoding what sent to the network stream.\n\
5444 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5445 and the cdr part is used for encoding.\n\
5446 If VAL is a function symbol, the function must return a coding system\n\
5447 or a cons of coding systems which are used as above.\n\
5448 \n\
5449 See also the function `find-operation-coding-system'.");
5450   Vnetwork_coding_system_alist = Qnil;
5451
5452   DEFVAR_INT ("eol-mnemonic-unix", &eol_mnemonic_unix,
5453     "Mnemonic character indicating UNIX-like end-of-line format (i.e. LF) .");
5454   eol_mnemonic_unix = ':';
5455
5456   DEFVAR_INT ("eol-mnemonic-dos", &eol_mnemonic_dos,
5457     "Mnemonic character indicating DOS-like end-of-line format (i.e. CRLF).");
5458   eol_mnemonic_dos = '\\';
5459
5460   DEFVAR_INT ("eol-mnemonic-mac", &eol_mnemonic_mac,
5461     "Mnemonic character indicating MAC-like end-of-line format (i.e. CR).");
5462   eol_mnemonic_mac = '/';
5463
5464   DEFVAR_INT ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
5465     "Mnemonic character indicating end-of-line format is not yet decided.");
5466   eol_mnemonic_undecided = ':';
5467
5468   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
5469     "*Non-nil enables character translation while encoding and decoding.");
5470   Venable_character_translation = Qt;
5471
5472   DEFVAR_LISP ("standard-translation-table-for-decode",
5473     &Vstandard_translation_table_for_decode,
5474     "Table for translating characters while decoding.");
5475   Vstandard_translation_table_for_decode = Qnil;
5476
5477   DEFVAR_LISP ("standard-translation-table-for-encode",
5478     &Vstandard_translation_table_for_encode,
5479     "Table for translationg characters while encoding.");
5480   Vstandard_translation_table_for_encode = Qnil;
5481
5482   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
5483     "Alist of charsets vs revision numbers.\n\
5484 While encoding, if a charset (car part of an element) is found,\n\
5485 designate it with the escape sequence identifing revision (cdr part of the element).");
5486   Vcharset_revision_alist = Qnil;
5487
5488   DEFVAR_LISP ("default-process-coding-system",
5489                &Vdefault_process_coding_system,
5490     "Cons of coding systems used for process I/O by default.\n\
5491 The car part is used for decoding a process output,\n\
5492 the cdr part is used for encoding a text to be sent to a process.");
5493   Vdefault_process_coding_system = Qnil;
5494
5495   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
5496     "Table of extra Latin codes in the range 128..159 (inclusive).\n\
5497 This is a vector of length 256.\n\
5498 If Nth element is non-nil, the existence of code N in a file\n\
5499 \(or output of subprocess) doesn't prevent it to be detected as\n\
5500 a coding system of ISO 2022 variant which has a flag\n\
5501 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file\n\
5502 or reading output of a subprocess.\n\
5503 Only 128th through 159th elements has a meaning.");
5504   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
5505
5506   DEFVAR_LISP ("select-safe-coding-system-function",
5507                &Vselect_safe_coding_system_function,
5508     "Function to call to select safe coding system for encoding a text.\n\
5509 \n\
5510 If set, this function is called to force a user to select a proper\n\
5511 coding system which can encode the text in the case that a default\n\
5512 coding system used in each operation can't encode the text.\n\
5513 \n\
5514 The default value is `select-safe-coding-system' (which see).");
5515   Vselect_safe_coding_system_function = Qnil;
5516
5517 }
5518
5519 #endif /* emacs */