src/coding.c

   1 /* Coding system handler (conversion, detection, and etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /*** TABLE OF CONTENTS ***
  23
  24   1. Preamble
  25   2. Emacs' internal format (emacs-mule) handlers
  26   3. ISO2022 handlers
  27   4. Shift-JIS and BIG5 handlers
  28   5. CCL handlers
  29   6. End-of-line handlers
  30   7. C library functions
  31   8. Emacs Lisp library functions
  32   9. Post-amble
  33
  34 */
  35
  36 /*** GENERAL NOTE on CODING SYSTEM ***
  37
  38   Coding system is an encoding mechanism of one or more character
  39   sets.  Here's a list of coding systems which Emacs can handle.  When
  40   we say "decode", it means converting some other coding system to
  41   Emacs' internal format (emacs-internal), and when we say "encode",
  42   it means converting the coding system emacs-mule to some other
  43   coding system.
  44
  45   0. Emacs' internal format (emacs-mule)
  46
  47   Emacs itself holds a multi-lingual character in a buffer and a string
  48   in a special format.  Details are described in section 2.
  49
  50   1. ISO2022
  51
  52   The most famous coding system for multiple character sets.  X's
  53   Compound Text, various EUCs (Extended Unix Code), and coding
  54   systems used in Internet communication such as ISO-2022-JP are
  55   all variants of ISO2022.  Details are described in section 3.
  56
  57   2. SJIS (or Shift-JIS or MS-Kanji-Code)
  58
  59   A coding system to encode character sets: ASCII, JISX0201, and
  60   JISX0208.  Widely used for PC's in Japan.  Details are described in
  61   section 4.
  62
  63   3. BIG5
  64
  65   A coding system to encode character sets: ASCII and Big5.  Widely
  66   used by Chinese (mainly in Taiwan and Hong Kong).  Details are
  67   described in section 4.  In this file, when we write "BIG5"
  68   (all uppercase), we mean the coding system, and when we write
  69   "Big5" (capitalized), we mean the character set.
  70
  71   4. Raw text
  72
  73   A coding system for a text containing random 8-bit code.  Emacs does
  74   no code conversion on such a text except for end-of-line format.
  75
  76   5. Other
  77
  78   If a user wants to read/write a text encoded in a coding system not
  79   listed above, he can supply a decoder and an encoder for it in CCL
  80   (Code Conversion Language) programs.  Emacs executes the CCL program
  81   while reading/writing.
  82
  83   Emacs represents a coding system by a Lisp symbol that has a property
  84   `coding-system'.  But, before actually using the coding system, the
  85   information about it is set in a structure of type `struct
  86   coding_system' for rapid processing.  See section 6 for more details.
  87
  88 */
  89
  90 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
  91
  92   How end-of-line of a text is encoded depends on a system.  For
  93   instance, Unix's format is just one byte of `line-feed' code,
  94   whereas DOS's format is two-byte sequence of `carriage-return' and
  95   `line-feed' codes.  MacOS's format is usually one byte of
  96   `carriage-return'.
  97
  98   Since text characters encoding and end-of-line encoding are
  99   independent, any coding system described above can take
 100   any format of end-of-line.  So, Emacs has information of format of
 101   end-of-line in each coding-system.  See section 6 for more details.
 102
 103 */
 104
 105 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 106
 107   These functions check if a text between SRC and SRC_END is encoded
 108   in the coding system category XXX.  Each returns an integer value in
 109   which appropriate flag bits for the category XXX is set.  The flag
 110   bits are defined in macros CODING_CATEGORY_MASK_XXX.  Below is the
 111   template of these functions.  */
 112 #if 0
 113 int
 114 detect_coding_emacs_mule (src, src_end)
 115      unsigned char *src, *src_end;
 116 {
 117   ...
 118 }
 119 #endif
 120
 121 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 122
 123   These functions decode SRC_BYTES length text at SOURCE encoded in
 124   CODING to Emacs' internal format (emacs-mule).  The resulting text
 125   goes to a place pointed to by DESTINATION, the length of which
 126   should not exceed DST_BYTES.  These functions set the information of
 127   original and decoded texts in the members produced, produced_char,
 128   consumed, and consumed_char of the structure *CODING.
 129
 130   The return value is an integer (CODING_FINISH_XXX) indicating how
 131   the decoding finished.
 132
 133   DST_BYTES zero means that source area and destination area are
 134   overlapped, which means that we can produce a decoded text until it
 135   reaches at the head of not-yet-decoded source text.
 136
 137   Below is a template of these functions.  */
 138 #if 0
 139 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 140      struct coding_system *coding;
 141      unsigned char *source, *destination;
 142      int src_bytes, dst_bytes;
 143 {
 144   ...
 145 }
 146 #endif
 147
 148 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 149
 150   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 151   internal format (emacs-mule) to CODING.  The resulting text goes to
 152   a place pointed to by DESTINATION, the length of which should not
 153   exceed DST_BYTES.  These functions set the information of
 154   original and encoded texts in the members produced, produced_char,
 155   consumed, and consumed_char of the structure *CODING.
 156
 157   The return value is an integer (CODING_FINISH_XXX) indicating how
 158   the encoding finished.
 159
 160   DST_BYTES zero means that source area and destination area are
 161   overlapped, which means that we can produce a decoded text until it
 162   reaches at the head of not-yet-decoded source text.
 163
 164   Below is a template of these functions.  */
 165 #if 0
 166 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 167      struct coding_system *coding;
 168      unsigned char *source, *destination;
 169      int src_bytes, dst_bytes;
 170 {
 171   ...
 172 }
 173 #endif
 174
 175 /*** COMMONLY USED MACROS ***/
 176
 177 /* The following three macros ONE_MORE_BYTE, TWO_MORE_BYTES, and
 178    THREE_MORE_BYTES safely get one, two, and three bytes from the
 179    source text respectively.  If there are not enough bytes in the
 180    source, they jump to `label_end_of_loop'.  The caller should set
 181    variables `src' and `src_end' to appropriate areas in advance.  */
 182
 183 #define ONE_MORE_BYTE(c1)       \
 184   do {                          \
 185     if (src < src_end)          \
 186       c1 = *src++;              \
 187     else                        \
 188       goto label_end_of_loop;   \
 189   } while (0)
 190
 191 #define TWO_MORE_BYTES(c1, c2)  \
 192   do {                          \
 193     if (src + 1 < src_end)      \
 194       c1 = *src++, c2 = *src++; \
 195     else                        \
 196       goto label_end_of_loop;   \
 197   } while (0)
 198
 199 #define THREE_MORE_BYTES(c1, c2, c3)            \
 200   do {                                          \
 201     if (src + 2 < src_end)                      \
 202       c1 = *src++, c2 = *src++, c3 = *src++;    \
 203     else                                        \
 204       goto label_end_of_loop;                   \
 205   } while (0)
 206
 207 /* The following three macros DECODE_CHARACTER_ASCII,
 208    DECODE_CHARACTER_DIMENSION1, and DECODE_CHARACTER_DIMENSION2 put
 209    the multi-byte form of a character of each class at the place
 210    pointed by `dst'.  The caller should set the variable `dst' to
 211    point to an appropriate area and the variable `coding' to point to
 212    the coding-system of the currently decoding text in advance.  */
 213
 214 /* Decode one ASCII character C.  */
 215
 216 #define DECODE_CHARACTER_ASCII(c)                               \
 217   do {                                                          \
 218     if (COMPOSING_P (coding->composing))                        \
 219       *dst++ = 0xA0, *dst++ = (c) | 0x80;                       \
 220     else                                                        \
 221       {                                                         \
 222         *dst++ = (c);                                           \
 223         coding->produced_char++;                                \
 224       }                                                         \
 225   } while (0)
 226
 227 /* Decode one DIMENSION1 character whose charset is CHARSET and whose
 228    position-code is C.  */
 229
 230 #define DECODE_CHARACTER_DIMENSION1(charset, c)                         \
 231   do {                                                                  \
 232     unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset);   \
 233     if (COMPOSING_P (coding->composing))                                \
 234       *dst++ = leading_code + 0x20;                                     \
 235     else                                                                \
 236       {                                                                 \
 237         *dst++ = leading_code;                                          \
 238         coding->produced_char++;                                        \
 239       }                                                                 \
 240     if (leading_code = CHARSET_LEADING_CODE_EXT (charset))              \
 241       *dst++ = leading_code;                                            \
 242     *dst++ = (c) | 0x80;                                                \
 243   } while (0)
 244
 245 /* Decode one DIMENSION2 character whose charset is CHARSET and whose
 246    position-codes are C1 and C2.  */
 247
 248 #define DECODE_CHARACTER_DIMENSION2(charset, c1, c2)    \
 249   do {                                                  \
 250     DECODE_CHARACTER_DIMENSION1 (charset, c1);          \
 251     *dst++ = (c2) | 0x80;                               \
 252   } while (0)
 253
 254 \f
 255 /*** 1. Preamble ***/
 256
 257 #include <stdio.h>
 258
 259 #ifdef emacs
 260
 261 #include <config.h>
 262 #include "lisp.h"
 263 #include "buffer.h"
 264 #include "charset.h"
 265 #include "ccl.h"
 266 #include "coding.h"
 267 #include "window.h"
 268
 269 #else  /* not emacs */
 270
 271 #include "mulelib.h"
 272
 273 #endif /* not emacs */
 274
 275 Lisp_Object Qcoding_system, Qeol_type;
 276 Lisp_Object Qbuffer_file_coding_system;
 277 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 278 Lisp_Object Qno_conversion, Qundecided;
 279 Lisp_Object Qcoding_system_history;
 280 Lisp_Object Qsafe_charsets;
 281 Lisp_Object Qvalid_codes;
 282
 283 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 284 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 285 Lisp_Object Qstart_process, Qopen_network_stream;
 286 Lisp_Object Qtarget_idx;
 287
 288 Lisp_Object Vselect_safe_coding_system_function;
 289
 290 /* Mnemonic character of each format of end-of-line.  */
 291 int eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 292 /* Mnemonic character to indicate format of end-of-line is not yet
 293    decided.  */
 294 int eol_mnemonic_undecided;
 295
 296 /* Format of end-of-line decided by system.  This is CODING_EOL_LF on
 297    Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.  */
 298 int system_eol_type;
 299
 300 #ifdef emacs
 301
 302 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 303
 304 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 305
 306 /* Coding system emacs-mule and raw-text are for converting only
 307    end-of-line format.  */
 308 Lisp_Object Qemacs_mule, Qraw_text;
 309
 310 /* Coding-systems are handed between Emacs Lisp programs and C internal
 311    routines by the following three variables.  */
 312 /* Coding-system for reading files and receiving data from process.  */
 313 Lisp_Object Vcoding_system_for_read;
 314 /* Coding-system for writing files and sending data to process.  */
 315 Lisp_Object Vcoding_system_for_write;
 316 /* Coding-system actually used in the latest I/O.  */
 317 Lisp_Object Vlast_coding_system_used;
 318
 319 /* A vector of length 256 which contains information about special
 320    Latin codes (especially for dealing with Microsoft codes).  */
 321 Lisp_Object Vlatin_extra_code_table;
 322
 323 /* Flag to inhibit code conversion of end-of-line format.  */
 324 int inhibit_eol_conversion;
 325
 326 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 327 int inherit_process_coding_system;
 328
 329 /* Coding system to be used to encode text for terminal display.  */
 330 struct coding_system terminal_coding;
 331
 332 /* Coding system to be used to encode text for terminal display when
 333    terminal coding system is nil.  */
 334 struct coding_system safe_terminal_coding;
 335
 336 /* Coding system of what is sent from terminal keyboard.  */
 337 struct coding_system keyboard_coding;
 338
 339 /* Default coding system to be used to write a file.  */
 340 struct coding_system default_buffer_file_coding;
 341
 342 Lisp_Object Vfile_coding_system_alist;
 343 Lisp_Object Vprocess_coding_system_alist;
 344 Lisp_Object Vnetwork_coding_system_alist;
 345
 346 #endif /* emacs */
 347
 348 Lisp_Object Qcoding_category, Qcoding_category_index;
 349
 350 /* List of symbols `coding-category-xxx' ordered by priority.  */
 351 Lisp_Object Vcoding_category_list;
 352
 353 /* Table of coding categories (Lisp symbols).  */
 354 Lisp_Object Vcoding_category_table;
 355
 356 /* Table of names of symbol for each coding-category.  */
 357 char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
 358   "coding-category-emacs-mule",
 359   "coding-category-sjis",
 360   "coding-category-iso-7",
 361   "coding-category-iso-7-tight",
 362   "coding-category-iso-8-1",
 363   "coding-category-iso-8-2",
 364   "coding-category-iso-7-else",
 365   "coding-category-iso-8-else",
 366   "coding-category-ccl",
 367   "coding-category-big5",
 368   "coding-category-raw-text",
 369   "coding-category-binary"
 370 };
 371
 372 /* Table of pointers to coding systems corresponding to each coding
 373    categories.  */
 374 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
 375
 376 /* Table of coding category masks.  Nth element is a mask for a coding
 377    cateogry of which priority is Nth.  */
 378 static
 379 int coding_priorities[CODING_CATEGORY_IDX_MAX];
 380
 381 /* Flag to tell if we look up translation table on character code
 382    conversion.  */
 383 Lisp_Object Venable_character_translation;
 384 /* Standard translation table to look up on decoding (reading).  */
 385 Lisp_Object Vstandard_translation_table_for_decode;
 386 /* Standard translation table to look up on encoding (writing).  */
 387 Lisp_Object Vstandard_translation_table_for_encode;
 388
 389 Lisp_Object Qtranslation_table;
 390 Lisp_Object Qtranslation_table_id;
 391 Lisp_Object Qtranslation_table_for_decode;
 392 Lisp_Object Qtranslation_table_for_encode;
 393
 394 /* Alist of charsets vs revision number.  */
 395 Lisp_Object Vcharset_revision_alist;
 396
 397 /* Default coding systems used for process I/O.  */
 398 Lisp_Object Vdefault_process_coding_system;
 399
 400 \f
 401 /*** 2. Emacs internal format (emacs-mule) handlers ***/
 402
 403 /* Emacs' internal format for encoding multiple character sets is a
 404    kind of multi-byte encoding, i.e. characters are encoded by
 405    variable-length sequences of one-byte codes.  ASCII characters
 406    and control characters (e.g. `tab', `newline') are represented by
 407    one-byte sequences which are their ASCII codes, in the range 0x00
 408    through 0x7F.  The other characters are represented by a sequence
 409    of `base leading-code', optional `extended leading-code', and one
 410    or two `position-code's.  The length of the sequence is determined
 411    by the base leading-code.  Leading-code takes the range 0x80
 412    through 0x9F, whereas extended leading-code and position-code take
 413    the range 0xA0 through 0xFF.  See `charset.h' for more details
 414    about leading-code and position-code.
 415
 416    There's one exception to this rule.  Special leading-code
 417    `leading-code-composition' denotes that the following several
 418    characters should be composed into one character.  Leading-codes of
 419    components (except for ASCII) are added 0x20.  An ASCII character
 420    component is represented by a 2-byte sequence of `0xA0' and
 421    `ASCII-code + 0x80'.  See also the comments in `charset.h' for the
 422    details of composite character.  Hence, we can summarize the code
 423    range as follows:
 424
 425    --- CODE RANGE of Emacs' internal format ---
 426    (character set)      (range)
 427    ASCII                0x00 .. 0x7F
 428    ELSE (1st byte)      0x80 .. 0x9F
 429         (rest bytes)    0xA0 .. 0xFF
 430    ---------------------------------------------
 431
 432   */
 433
 434 enum emacs_code_class_type emacs_code_class[256];
 435
 436 /* Go to the next statement only if *SRC is accessible and the code is
 437    greater than 0xA0.  */
 438 #define CHECK_CODE_RANGE_A0_FF  \
 439   do {                          \
 440     if (src >= src_end)         \
 441       goto label_end_of_switch; \
 442     else if (*src++ < 0xA0)     \
 443       return 0;                 \
 444   } while (0)
 445
 446 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 447    Check if a text is encoded in Emacs' internal format.  If it is,
 448    return CODING_CATEGORY_MASK_EMACS_MULE, else return 0.  */
 449
 450 int
 451 detect_coding_emacs_mule (src, src_end)
 452      unsigned char *src, *src_end;
 453 {
 454   unsigned char c;
 455   int composing = 0;
 456
 457   while (src < src_end)
 458     {
 459       c = *src++;
 460
 461       if (composing)
 462         {
 463           if (c < 0xA0)
 464             composing = 0;
 465           else
 466             c -= 0x20;
 467         }
 468
 469       switch (emacs_code_class[c])
 470         {
 471         case EMACS_ascii_code:
 472         case EMACS_linefeed_code:
 473           break;
 474
 475         case EMACS_control_code:
 476           if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
 477             return 0;
 478           break;
 479
 480         case EMACS_invalid_code:
 481           return 0;
 482
 483         case EMACS_leading_code_composition: /* c == 0x80 */
 484           if (composing)
 485             CHECK_CODE_RANGE_A0_FF;
 486           else
 487             composing = 1;
 488           break;
 489
 490         case EMACS_leading_code_4:
 491           CHECK_CODE_RANGE_A0_FF;
 492           /* fall down to check it two more times ...  */
 493
 494         case EMACS_leading_code_3:
 495           CHECK_CODE_RANGE_A0_FF;
 496           /* fall down to check it one more time ...  */
 497
 498         case EMACS_leading_code_2:
 499           CHECK_CODE_RANGE_A0_FF;
 500           break;
 501
 502         default:
 503         label_end_of_switch:
 504           break;
 505         }
 506     }
 507   return CODING_CATEGORY_MASK_EMACS_MULE;
 508 }
 509
 510 \f
 511 /*** 3. ISO2022 handlers ***/
 512
 513 /* The following note describes the coding system ISO2022 briefly.
 514    Since the intention of this note is to help in understanding of
 515    the programs in this file, some parts are NOT ACCURATE or OVERLY
 516    SIMPLIFIED.  For the thorough understanding, please refer to the
 517    original document of ISO2022.
 518
 519    ISO2022 provides many mechanisms to encode several character sets
 520    in 7-bit and 8-bit environment.  If one chooses 7-bite environment,
 521    all text is encoded by codes of less than 128.  This may make the
 522    encoded text a little bit longer, but the text gets more stability
 523    to pass through several gateways (some of them strip off the MSB).
 524
 525    There are two kinds of character set: control character set and
 526    graphic character set.  The former contains control characters such
 527    as `newline' and `escape' to provide control functions (control
 528    functions are provided also by escape sequences).  The latter
 529    contains graphic characters such as ' A' and '-'.  Emacs recognizes
 530    two control character sets and many graphic character sets.
 531
 532    Graphic character sets are classified into one of the following
 533    four classes, DIMENSION1_CHARS94, DIMENSION1_CHARS96,
 534    DIMENSION2_CHARS94, DIMENSION2_CHARS96 according to the number of
 535    bytes (DIMENSION) and the number of characters in one dimension
 536    (CHARS) of the set.  In addition, each character set is assigned an
 537    identification tag (called "final character" and denoted as <F>
 538    here after) which is unique in each class.  <F> of each character
 539    set is decided by ECMA(*) when it is registered in ISO.  Code range
 540    of <F> is 0x30..0x7F (0x30..0x3F are for private use only).
 541
 542    Note (*): ECMA = European Computer Manufacturers Association
 543
 544    Here are examples of graphic character set [NAME(<F>)]:
 545         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
 546         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
 547         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
 548         o DIMENSION2_CHARS96 -- none for the moment
 549
 550    A code area (1byte=8bits) is divided into 4 areas, C0, GL, C1, and GR.
 551         C0 [0x00..0x1F] -- control character plane 0
 552         GL [0x20..0x7F] -- graphic character plane 0
 553         C1 [0x80..0x9F] -- control character plane 1
 554         GR [0xA0..0xFF] -- graphic character plane 1
 555
 556    A control character set is directly designated and invoked to C0 or
 557    C1 by an escape sequence.  The most common case is that ISO646's
 558    control character set is designated/invoked to C0 and ISO6429's
 559    control character set is designated/invoked to C1, and usually
 560    these designations/invocations are omitted in a coded text.  With
 561    7-bit environment, only C0 can be used, and a control character for
 562    C1 is encoded by an appropriate escape sequence to fit in the
 563    environment.  All control characters for C1 are defined the
 564    corresponding escape sequences.
 565
 566    A graphic character set is at first designated to one of four
 567    graphic registers (G0 through G3), then these graphic registers are
 568    invoked to GL or GR.  These designations and invocations can be
 569    done independently.  The most common case is that G0 is invoked to
 570    GL, G1 is invoked to GR, and ASCII is designated to G0, and usually
 571    these invocations and designations are omitted in a coded text.
 572    With 7-bit environment, only GL can be used.
 573
 574    When a graphic character set of CHARS94 is invoked to GL, code 0x20
 575    and 0x7F of GL area work as control characters SPACE and DEL
 576    respectively, and code 0xA0 and 0xFF of GR area should not be used.
 577
 578    There are two ways of invocation: locking-shift and single-shift.
 579    With locking-shift, the invocation lasts until the next different
 580    invocation, whereas with single-shift, the invocation works only
 581    for the following character and doesn't affect locking-shift.
 582    Invocations are done by the following control characters or escape
 583    sequences.
 584
 585    ----------------------------------------------------------------------
 586    function             control char    escape sequence description
 587    ----------------------------------------------------------------------
 588    SI  (shift-in)               0x0F    none            invoke G0 to GL
 589    SO  (shift-out)              0x0E    none            invoke G1 to GL
 590    LS2 (locking-shift-2)        none    ESC 'n'         invoke G2 into GL
 591    LS3 (locking-shift-3)        none    ESC 'o'         invoke G3 into GL
 592    SS2 (single-shift-2)         0x8E    ESC 'N'         invoke G2 into GL
 593    SS3 (single-shift-3)         0x8F    ESC 'O'         invoke G3 into GL
 594    ----------------------------------------------------------------------
 595    The first four are for locking-shift.  Control characters for these
 596    functions are defined by macros ISO_CODE_XXX in `coding.h'.
 597
 598    Designations are done by the following escape sequences.
 599    ----------------------------------------------------------------------
 600    escape sequence      description
 601    ----------------------------------------------------------------------
 602    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
 603    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
 604    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
 605    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
 606    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
 607    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
 608    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
 609    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
 610    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
 611    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
 612    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
 613    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
 614    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
 615    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
 616    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
 617    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
 618    ----------------------------------------------------------------------
 619
 620    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
 621    of dimension 1, chars 94, and final character <F>, and etc.
 622
 623    Note (*): Although these designations are not allowed in ISO2022,
 624    Emacs accepts them on decoding, and produces them on encoding
 625    CHARS96 character set in a coding system which is characterized as
 626    7-bit environment, non-locking-shift, and non-single-shift.
 627
 628    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
 629    '(' can be omitted.  We call this as "short-form" here after.
 630
 631    Now you may notice that there are a lot of ways for encoding the
 632    same multilingual text in ISO2022.  Actually, there exists many
 633    coding systems such as Compound Text (used in X's inter client
 634    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
 635    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
 636    localized platforms), and all of these are variants of ISO2022.
 637
 638    In addition to the above, Emacs handles two more kinds of escape
 639    sequences: ISO6429's direction specification and Emacs' private
 640    sequence for specifying character composition.
 641
 642    ISO6429's direction specification takes the following format:
 643         o CSI ']'      -- end of the current direction
 644         o CSI '0' ']'  -- end of the current direction
 645         o CSI '1' ']'  -- start of left-to-right text
 646         o CSI '2' ']'  -- start of right-to-left text
 647    The control character CSI (0x9B: control sequence introducer) is
 648    abbreviated to the escape sequence ESC '[' in 7-bit environment.
 649
 650    Character composition specification takes the following format:
 651         o ESC '0' -- start character composition
 652         o ESC '1' -- end character composition
 653    Since these are not standard escape sequences of any ISO, the use
 654    of them for these meaning is restricted to Emacs only.  */
 655
 656 enum iso_code_class_type iso_code_class[256];
 657
 658 #define CHARSET_OK(idx, charset)                                \
 659   (coding_system_table[idx]                                     \
 660    && (coding_system_table[idx]->safe_charsets[charset]         \
 661        || (CODING_SPEC_ISO_REQUESTED_DESIGNATION                \
 662             (coding_system_table[idx], charset)                 \
 663            != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)))
 664
 665 #define SHIFT_OUT_OK(idx) \
 666   (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
 667
 668 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 669    Check if a text is encoded in ISO2022.  If it is, returns an
 670    integer in which appropriate flag bits any of:
 671         CODING_CATEGORY_MASK_ISO_7
 672         CODING_CATEGORY_MASK_ISO_7_TIGHT
 673         CODING_CATEGORY_MASK_ISO_8_1
 674         CODING_CATEGORY_MASK_ISO_8_2
 675         CODING_CATEGORY_MASK_ISO_7_ELSE
 676         CODING_CATEGORY_MASK_ISO_8_ELSE
 677    are set.  If a code which should never appear in ISO2022 is found,
 678    returns 0.  */
 679
 680 int
 681 detect_coding_iso2022 (src, src_end)
 682      unsigned char *src, *src_end;
 683 {
 684   int mask = CODING_CATEGORY_MASK_ISO;
 685   int mask_found = 0;
 686   int reg[4], shift_out = 0, single_shifting = 0;
 687   int c, c1, i, charset;
 688
 689   reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
 690   while (mask && src < src_end)
 691     {
 692       c = *src++;
 693       switch (c)
 694         {
 695         case ISO_CODE_ESC:
 696           single_shifting = 0;
 697           if (src >= src_end)
 698             break;
 699           c = *src++;
 700           if (c >= '(' && c <= '/')
 701             {
 702               /* Designation sequence for a charset of dimension 1.  */
 703               if (src >= src_end)
 704                 break;
 705               c1 = *src++;
 706               if (c1 < ' ' || c1 >= 0x80
 707                   || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
 708                 /* Invalid designation sequence.  Just ignore.  */
 709                 break;
 710               reg[(c - '(') % 4] = charset;
 711             }
 712           else if (c == '$')
 713             {
 714               /* Designation sequence for a charset of dimension 2.  */
 715               if (src >= src_end)
 716                 break;
 717               c = *src++;
 718               if (c >= '@' && c <= 'B')
 719                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
 720                 reg[0] = charset = iso_charset_table[1][0][c];
 721               else if (c >= '(' && c <= '/')
 722                 {
 723                   if (src >= src_end)
 724                     break;
 725                   c1 = *src++;
 726                   if (c1 < ' ' || c1 >= 0x80
 727                       || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
 728                     /* Invalid designation sequence.  Just ignore.  */
 729                     break;
 730                   reg[(c - '(') % 4] = charset;
 731                 }
 732               else
 733                 /* Invalid designation sequence.  Just ignore.  */
 734                 break;
 735             }
 736           else if (c == 'N' || c == 'O')
 737             {
 738               /* ESC <Fe> for SS2 or SS3.  */
 739               mask &= CODING_CATEGORY_MASK_ISO_7_ELSE;
 740               break;
 741             }
 742           else if (c == '0' || c == '1' || c == '2')
 743             /* ESC <Fp> for start/end composition.  Just ignore.  */
 744             break;
 745           else
 746             /* Invalid escape sequence.  Just ignore.  */
 747             break;
 748
 749           /* We found a valid designation sequence for CHARSET.  */
 750           mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
 751           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset))
 752             mask_found |= CODING_CATEGORY_MASK_ISO_7;
 753           else
 754             mask &= ~CODING_CATEGORY_MASK_ISO_7;
 755           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset))
 756             mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
 757           else
 758             mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
 759           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset))
 760             mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
 761           else
 762             mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
 763           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset))
 764             mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
 765           else
 766             mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
 767           break;
 768
 769         case ISO_CODE_SO:
 770           single_shifting = 0;
 771           if (shift_out == 0
 772               && (reg[1] >= 0
 773                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
 774                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
 775             {
 776               /* Locking shift out.  */
 777               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 778               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 779             }
 780           break;
 781
 782         case ISO_CODE_SI:
 783           single_shifting = 0;
 784           if (shift_out == 1)
 785             {
 786               /* Locking shift in.  */
 787               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 788               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 789             }
 790           break;
 791
 792         case ISO_CODE_CSI:
 793           single_shifting = 0;
 794         case ISO_CODE_SS2:
 795         case ISO_CODE_SS3:
 796           {
 797             int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
 798
 799             if (c != ISO_CODE_CSI)
 800               {
 801                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 802                     & CODING_FLAG_ISO_SINGLE_SHIFT)
 803                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 804                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 805                     & CODING_FLAG_ISO_SINGLE_SHIFT)
 806                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 807                 single_shifting = 1;
 808               }
 809             if (VECTORP (Vlatin_extra_code_table)
 810                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
 811               {
 812                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 813                     & CODING_FLAG_ISO_LATIN_EXTRA)
 814                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 815                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 816                     & CODING_FLAG_ISO_LATIN_EXTRA)
 817                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 818               }
 819             mask &= newmask;
 820             mask_found |= newmask;
 821           }
 822           break;
 823
 824         default:
 825           if (c < 0x80)
 826             {
 827               single_shifting = 0;
 828               break;
 829             }
 830           else if (c < 0xA0)
 831             {
 832               single_shifting = 0;
 833               if (VECTORP (Vlatin_extra_code_table)
 834                   && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
 835                 {
 836                   int newmask = 0;
 837
 838                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 839                       & CODING_FLAG_ISO_LATIN_EXTRA)
 840                     newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 841                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 842                       & CODING_FLAG_ISO_LATIN_EXTRA)
 843                     newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 844                   mask &= newmask;
 845                   mask_found |= newmask;
 846                 }
 847               else
 848                 return 0;
 849             }
 850           else
 851             {
 852               unsigned char *src_begin = src;
 853
 854               mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
 855                         | CODING_CATEGORY_MASK_ISO_7_ELSE);
 856               mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
 857               /* Check the length of succeeding codes of the range
 858                  0xA0..0FF.  If the byte length is odd, we exclude
 859                  CODING_CATEGORY_MASK_ISO_8_2.  We can check this only
 860                  when we are not single shifting.  */
 861               if (!single_shifting)
 862                 {
 863                   while (src < src_end && *src >= 0xA0)
 864                     src++;
 865                   if ((src - src_begin - 1) & 1 && src < src_end)
 866                     mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
 867                   else
 868                     mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
 869                 }
 870             }
 871           break;
 872         }
 873     }
 874
 875   return (mask & mask_found);
 876 }
 877
 878 /* Decode a character of which charset is CHARSET and the 1st position
 879    code is C1.  If dimension of CHARSET is 2, the 2nd position code is
 880    fetched from SRC and set to C2.  If CHARSET is negative, it means
 881    that we are decoding ill formed text, and what we can do is just to
 882    read C1 as is.  */
 883
 884 #define DECODE_ISO_CHARACTER(charset, c1)                               \
 885   do {                                                                  \
 886     int c_alt, charset_alt = (charset);                                 \
 887     if (COMPOSING_HEAD_P (coding->composing))                           \
 888       {                                                                 \
 889         *dst++ = LEADING_CODE_COMPOSITION;                              \
 890         if (COMPOSING_WITH_RULE_P (coding->composing))                  \
 891           /* To tell composition rules are embeded.  */                 \
 892           *dst++ = 0xFF;                                                \
 893         coding->composing += 2;                                         \
 894       }                                                                 \
 895     if (charset_alt >= 0)                                               \
 896       {                                                                 \
 897         if (CHARSET_DIMENSION (charset_alt) == 2)                       \
 898           {                                                             \
 899             ONE_MORE_BYTE (c2);                                         \
 900             if (iso_code_class[(c2) & 0x7F] != ISO_0x20_or_0x7F         \
 901                 && iso_code_class[(c2) & 0x7F] != ISO_graphic_plane_0)  \
 902               {                                                         \
 903                 src--;                                                  \
 904                 charset_alt = CHARSET_ASCII;                            \
 905               }                                                         \
 906           }                                                             \
 907         if (!NILP (translation_table)                                   \
 908             && ((c_alt = translate_char (translation_table,             \
 909                                          -1, charset_alt, c1, c2)) >= 0)) \
 910           SPLIT_CHAR (c_alt, charset_alt, c1, c2);                      \
 911       }                                                                 \
 912     if (charset_alt == CHARSET_ASCII || charset_alt < 0)                \
 913       DECODE_CHARACTER_ASCII (c1);                                      \
 914     else if (CHARSET_DIMENSION (charset_alt) == 1)                      \
 915       DECODE_CHARACTER_DIMENSION1 (charset_alt, c1);                    \
 916     else                                                                \
 917       DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2);                \
 918     if (COMPOSING_WITH_RULE_P (coding->composing))                      \
 919       /* To tell a composition rule follows.  */                        \
 920       coding->composing = COMPOSING_WITH_RULE_RULE;                     \
 921   } while (0)
 922
 923 /* Set designation state into CODING.  */
 924 #define DECODE_DESIGNATION(reg, dimension, chars, final_char)              \
 925   do {                                                                     \
 926     int charset = ISO_CHARSET_TABLE (make_number (dimension),              \
 927                                      make_number (chars),                  \
 928                                      make_number (final_char));            \
 929     if (charset >= 0                                                       \
 930         && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
 931             || coding->safe_charsets[charset]))                            \
 932       {                                                                    \
 933         if (coding->spec.iso2022.last_invalid_designation_register == 0    \
 934             && reg == 0                                                    \
 935             && charset == CHARSET_ASCII)                                   \
 936           {                                                                \
 937             /* We should insert this designation sequence as is so         \
 938                that it is surely written back to a file.  */               \
 939             coding->spec.iso2022.last_invalid_designation_register = -1;   \
 940             goto label_invalid_code;                                       \
 941           }                                                                \
 942         coding->spec.iso2022.last_invalid_designation_register = -1;       \
 943         if ((coding->mode & CODING_MODE_DIRECTION)                         \
 944             && CHARSET_REVERSE_CHARSET (charset) >= 0)                     \
 945           charset = CHARSET_REVERSE_CHARSET (charset);                     \
 946         CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;               \
 947       }                                                                    \
 948     else                                                                   \
 949       {                                                                    \
 950         coding->spec.iso2022.last_invalid_designation_register = reg;      \
 951         goto label_invalid_code;                                           \
 952       }                                                                    \
 953   } while (0)
 954
 955 /* Check if the current composing sequence contains only valid codes.
 956    If the composing sequence doesn't end before SRC_END, return -1.
 957    Else, if it contains only valid codes, return 0.
 958    Else return the length of the composing sequence.  */
 959
 960 int
 961 check_composing_code (coding, src, src_end)
 962      struct coding_system *coding;
 963      unsigned char *src, *src_end;
 964 {
 965   unsigned char *src_start = src;
 966   int invalid_code_found = 0;
 967   int charset, c, c1, dim;
 968
 969   while (src < src_end)
 970     {
 971       if (*src++ != ISO_CODE_ESC) continue;
 972       if (src >= src_end) break;
 973       if ((c = *src++) == '1') /* end of compsition */
 974         return (invalid_code_found ? src - src_start : 0);
 975       if (src + 2 >= src_end) break;
 976       if (!coding->flags & CODING_FLAG_ISO_DESIGNATION)
 977         invalid_code_found = 1;
 978       else
 979         {
 980           dim = 0;
 981           if (c == '$')
 982             {
 983               dim = 1;
 984               c = (*src >= '@' && *src <= 'B') ? '(' : *src++;
 985             }
 986           if (c >= '(' && c <= '/')
 987             {
 988               c1 = *src++;
 989               if ((c1 < ' ' || c1 >= 0x80)
 990                   || (charset = iso_charset_table[dim][c >= ','][c1]) < 0
 991                   || ! coding->safe_charsets[charset]
 992                   || (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
 993                       == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
 994                 invalid_code_found = 1;
 995             }
 996           else
 997             invalid_code_found = 1;
 998         }
 999     }
1000   return (invalid_code_found
1001           ? src - src_start
1002           : (coding->mode & CODING_MODE_LAST_BLOCK ? 0 : -1));
1003 }
1004
1005 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1006
1007 int
1008 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1009      struct coding_system *coding;
1010      unsigned char *source, *destination;
1011      int src_bytes, dst_bytes;
1012 {
1013   unsigned char *src = source;
1014   unsigned char *src_end = source + src_bytes;
1015   unsigned char *dst = destination;
1016   unsigned char *dst_end = destination + dst_bytes;
1017   /* Since the maximum bytes produced by each loop is 7, we subtract 6
1018      from DST_END to assure that overflow checking is necessary only
1019      at the head of loop.  */
1020   unsigned char *adjusted_dst_end = dst_end - 6;
1021   int charset;
1022   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
1023   int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1024   int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1025   Lisp_Object translation_table
1026     = coding->translation_table_for_decode;
1027   int result = CODING_FINISH_NORMAL;
1028
1029   if (!NILP (Venable_character_translation) && NILP (translation_table))
1030     translation_table = Vstandard_translation_table_for_decode;
1031
1032   coding->produced_char = 0;
1033   coding->fake_multibyte = 0;
1034   while (src < src_end && (dst_bytes
1035                            ? (dst < adjusted_dst_end)
1036                            : (dst < src - 6)))
1037     {
1038       /* SRC_BASE remembers the start position in source in each loop.
1039          The loop will be exited when there's not enough source text
1040          to analyze long escape sequence or 2-byte code (within macros
1041          ONE_MORE_BYTE or TWO_MORE_BYTES).  In that case, SRC is reset
1042          to SRC_BASE before exiting.  */
1043       unsigned char *src_base = src;
1044       int c1 = *src++, c2;
1045
1046       switch (iso_code_class [c1])
1047         {
1048         case ISO_0x20_or_0x7F:
1049           if (!coding->composing
1050               && (charset0 < 0 || CHARSET_CHARS (charset0) == 94))
1051             {
1052               /* This is SPACE or DEL.  */
1053               *dst++ = c1;
1054               coding->produced_char++;
1055               break;
1056             }
1057           /* This is a graphic character, we fall down ...  */
1058
1059         case ISO_graphic_plane_0:
1060           if (coding->composing == COMPOSING_WITH_RULE_RULE)
1061             {
1062               /* This is a composition rule.  */
1063               *dst++ = c1 | 0x80;
1064               coding->composing = COMPOSING_WITH_RULE_TAIL;
1065             }
1066           else
1067             DECODE_ISO_CHARACTER (charset0, c1);
1068           break;
1069
1070         case ISO_0xA0_or_0xFF:
1071           if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1072               || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1073             goto label_invalid_code;
1074           /* This is a graphic character, we fall down ... */
1075
1076         case ISO_graphic_plane_1:
1077           if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1078             goto label_invalid_code;
1079           else
1080             DECODE_ISO_CHARACTER (charset1, c1);
1081           break;
1082
1083         case ISO_control_code:
1084           /* All ISO2022 control characters in this class have the
1085              same representation in Emacs internal format.  */
1086           if (c1 == '\n'
1087               && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1088               && (coding->eol_type == CODING_EOL_CR
1089                   || coding->eol_type == CODING_EOL_CRLF))
1090             {
1091               result = CODING_FINISH_INCONSISTENT_EOL;
1092               goto label_end_of_loop_2;
1093             }
1094           *dst++ = c1;
1095           coding->produced_char++;
1096           break;
1097
1098         case ISO_carriage_return:
1099           if (coding->eol_type == CODING_EOL_CR)
1100             *dst++ = '\n';
1101           else if (coding->eol_type == CODING_EOL_CRLF)
1102             {
1103               ONE_MORE_BYTE (c1);
1104               if (c1 == ISO_CODE_LF)
1105                 *dst++ = '\n';
1106               else
1107                 {
1108                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1109                     {
1110                       result = CODING_FINISH_INCONSISTENT_EOL;
1111                       goto label_end_of_loop_2;
1112                     }
1113                   src--;
1114                   *dst++ = '\r';
1115                 }
1116             }
1117           else
1118             *dst++ = c1;
1119           coding->produced_char++;
1120           break;
1121
1122         case ISO_shift_out:
1123           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1124               || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1125             goto label_invalid_code;
1126           CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1127           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1128           break;
1129
1130         case ISO_shift_in:
1131           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1132             goto label_invalid_code;
1133           CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1134           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1135           break;
1136
1137         case ISO_single_shift_2_7:
1138         case ISO_single_shift_2:
1139           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1140             goto label_invalid_code;
1141           /* SS2 is handled as an escape sequence of ESC 'N' */
1142           c1 = 'N';
1143           goto label_escape_sequence;
1144
1145         case ISO_single_shift_3:
1146           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1147             goto label_invalid_code;
1148           /* SS2 is handled as an escape sequence of ESC 'O' */
1149           c1 = 'O';
1150           goto label_escape_sequence;
1151
1152         case ISO_control_sequence_introducer:
1153           /* CSI is handled as an escape sequence of ESC '[' ...  */
1154           c1 = '[';
1155           goto label_escape_sequence;
1156
1157         case ISO_escape:
1158           ONE_MORE_BYTE (c1);
1159         label_escape_sequence:
1160           /* Escape sequences handled by Emacs are invocation,
1161              designation, direction specification, and character
1162              composition specification.  */
1163           switch (c1)
1164             {
1165             case '&':           /* revision of following character set */
1166               ONE_MORE_BYTE (c1);
1167               if (!(c1 >= '@' && c1 <= '~'))
1168                 goto label_invalid_code;
1169               ONE_MORE_BYTE (c1);
1170               if (c1 != ISO_CODE_ESC)
1171                 goto label_invalid_code;
1172               ONE_MORE_BYTE (c1);
1173               goto label_escape_sequence;
1174
1175             case '$':           /* designation of 2-byte character set */
1176               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1177                 goto label_invalid_code;
1178               ONE_MORE_BYTE (c1);
1179               if (c1 >= '@' && c1 <= 'B')
1180                 {       /* designation of JISX0208.1978, GB2312.1980,
1181                                    or JISX0208.1980 */
1182                   DECODE_DESIGNATION (0, 2, 94, c1);
1183                 }
1184               else if (c1 >= 0x28 && c1 <= 0x2B)
1185                 {       /* designation of DIMENSION2_CHARS94 character set */
1186                   ONE_MORE_BYTE (c2);
1187                   DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
1188                 }
1189               else if (c1 >= 0x2C && c1 <= 0x2F)
1190                 {       /* designation of DIMENSION2_CHARS96 character set */
1191                   ONE_MORE_BYTE (c2);
1192                   DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
1193                 }
1194               else
1195                 goto label_invalid_code;
1196               break;
1197
1198             case 'n':           /* invocation of locking-shift-2 */
1199               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1200                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1201                 goto label_invalid_code;
1202               CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
1203               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1204               break;
1205
1206             case 'o':           /* invocation of locking-shift-3 */
1207               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1208                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1209                 goto label_invalid_code;
1210               CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
1211               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1212               break;
1213
1214             case 'N':           /* invocation of single-shift-2 */
1215               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1216                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1217                 goto label_invalid_code;
1218               ONE_MORE_BYTE (c1);
1219               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
1220               DECODE_ISO_CHARACTER (charset, c1);
1221               break;
1222
1223             case 'O':           /* invocation of single-shift-3 */
1224               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1225                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1226                 goto label_invalid_code;
1227               ONE_MORE_BYTE (c1);
1228               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
1229               DECODE_ISO_CHARACTER (charset, c1);
1230               break;
1231
1232             case '0': case '2': /* start composing */
1233               /* Before processing composing, we must be sure that all
1234                  characters being composed are supported by CODING.
1235                  If not, we must give up composing and insert the
1236                  bunch of codes for composing as is without decoding.  */
1237               {
1238                 int result1;
1239
1240                 result1 = check_composing_code (coding, src, src_end);
1241                 if (result1 == 0)
1242                   {
1243                     coding->composing = (c1 == '0'
1244                                          ? COMPOSING_NO_RULE_HEAD
1245                                          : COMPOSING_WITH_RULE_HEAD);
1246                     coding->produced_char++;
1247                   }
1248                 else if (result1 > 0)
1249                   {
1250                     if (result1 + 2 < (dst_bytes ? dst_end : src_base) - dst)
1251                       {
1252                         bcopy (src_base, dst, result1 + 2);
1253                         src += result1;
1254                         dst += result1 + 2;
1255                         coding->produced_char += result1 + 2;
1256                       }
1257                     else
1258                       {
1259                         result = CODING_FINISH_INSUFFICIENT_DST;
1260                         goto label_end_of_loop_2;
1261                       }
1262                   }
1263                 else
1264                   goto label_end_of_loop;
1265               }
1266               break;
1267
1268             case '1':           /* end composing */
1269               coding->composing = COMPOSING_NO;
1270               break;
1271
1272             case '[':           /* specification of direction */
1273               if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
1274                 goto label_invalid_code;
1275               /* For the moment, nested direction is not supported.
1276                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
1277                  left-to-right, and nozero means right-to-left.  */
1278               ONE_MORE_BYTE (c1);
1279               switch (c1)
1280                 {
1281                 case ']':       /* end of the current direction */
1282                   coding->mode &= ~CODING_MODE_DIRECTION;
1283
1284                 case '0':       /* end of the current direction */
1285                 case '1':       /* start of left-to-right direction */
1286                   ONE_MORE_BYTE (c1);
1287                   if (c1 == ']')
1288                     coding->mode &= ~CODING_MODE_DIRECTION;
1289                   else
1290                     goto label_invalid_code;
1291                   break;
1292
1293                 case '2':       /* start of right-to-left direction */
1294                   ONE_MORE_BYTE (c1);
1295                   if (c1 == ']')
1296                     coding->mode |= CODING_MODE_DIRECTION;
1297                   else
1298                     goto label_invalid_code;
1299                   break;
1300
1301                 default:
1302                   goto label_invalid_code;
1303                 }
1304               break;
1305
1306             default:
1307               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1308                 goto label_invalid_code;
1309               if (c1 >= 0x28 && c1 <= 0x2B)
1310                 {       /* designation of DIMENSION1_CHARS94 character set */
1311                   ONE_MORE_BYTE (c2);
1312                   DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
1313                 }
1314               else if (c1 >= 0x2C && c1 <= 0x2F)
1315                 {       /* designation of DIMENSION1_CHARS96 character set */
1316                   ONE_MORE_BYTE (c2);
1317                   DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
1318                 }
1319               else
1320                 {
1321                   goto label_invalid_code;
1322                 }
1323             }
1324           /* We must update these variables now.  */
1325           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1326           charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1327           break;
1328
1329         label_invalid_code:
1330           while (src_base < src)
1331             *dst++ = *src_base++;
1332           coding->fake_multibyte = 1;
1333         }
1334       continue;
1335
1336     label_end_of_loop:
1337       result = CODING_FINISH_INSUFFICIENT_SRC;
1338     label_end_of_loop_2:
1339       src = src_base;
1340       break;
1341     }
1342
1343   if (src < src_end)
1344     {
1345       if (result == CODING_FINISH_NORMAL)
1346         result = CODING_FINISH_INSUFFICIENT_DST;
1347       else if (result != CODING_FINISH_INCONSISTENT_EOL
1348                && coding->mode & CODING_MODE_LAST_BLOCK)
1349         {
1350           /* This is the last block of the text to be decoded.  We had
1351              better just flush out all remaining codes in the text
1352              although they are not valid characters.  */
1353           src_bytes = src_end - src;
1354           if (dst_bytes && (dst_end - dst < src_bytes))
1355             src_bytes = dst_end - dst;
1356           bcopy (src, dst, src_bytes);
1357           dst += src_bytes;
1358           src += src_bytes;
1359           coding->fake_multibyte = 1;
1360         }
1361     }
1362
1363   coding->consumed = coding->consumed_char = src - source;
1364   coding->produced = dst - destination;
1365   return result;
1366 }
1367
1368 /* ISO2022 encoding stuff.  */
1369
1370 /*
1371    It is not enough to say just "ISO2022" on encoding, we have to
1372    specify more details.  In Emacs, each coding system of ISO2022
1373    variant has the following specifications:
1374         1. Initial designation to G0 thru G3.
1375         2. Allows short-form designation?
1376         3. ASCII should be designated to G0 before control characters?
1377         4. ASCII should be designated to G0 at end of line?
1378         5. 7-bit environment or 8-bit environment?
1379         6. Use locking-shift?
1380         7. Use Single-shift?
1381    And the following two are only for Japanese:
1382         8. Use ASCII in place of JIS0201-1976-Roman?
1383         9. Use JISX0208-1983 in place of JISX0208-1978?
1384    These specifications are encoded in `coding->flags' as flag bits
1385    defined by macros CODING_FLAG_ISO_XXX.  See `coding.h' for more
1386    details.
1387 */
1388
1389 /* Produce codes (escape sequence) for designating CHARSET to graphic
1390    register REG.  If <final-char> of CHARSET is '@', 'A', or 'B' and
1391    the coding system CODING allows, produce designation sequence of
1392    short-form.  */
1393
1394 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
1395   do {                                                                  \
1396     unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset);        \
1397     char *intermediate_char_94 = "()*+";                                \
1398     char *intermediate_char_96 = ",-./";                                \
1399     int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset);    \
1400     if (revision < 255)                                                 \
1401       {                                                                 \
1402         *dst++ = ISO_CODE_ESC;                                          \
1403         *dst++ = '&';                                                   \
1404         *dst++ = '@' + revision;                                        \
1405       }                                                                 \
1406     *dst++ = ISO_CODE_ESC;                                              \
1407     if (CHARSET_DIMENSION (charset) == 1)                               \
1408       {                                                                 \
1409         if (CHARSET_CHARS (charset) == 94)                              \
1410           *dst++ = (unsigned char) (intermediate_char_94[reg]);         \
1411         else                                                            \
1412           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1413       }                                                                 \
1414     else                                                                \
1415       {                                                                 \
1416         *dst++ = '$';                                                   \
1417         if (CHARSET_CHARS (charset) == 94)                              \
1418           {                                                             \
1419             if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM)          \
1420                 || reg != 0                                             \
1421                 || final_char < '@' || final_char > 'B')                \
1422               *dst++ = (unsigned char) (intermediate_char_94[reg]);     \
1423           }                                                             \
1424         else                                                            \
1425           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1426       }                                                                 \
1427     *dst++ = final_char;                                                \
1428     CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;                \
1429   } while (0)
1430
1431 /* The following two macros produce codes (control character or escape
1432    sequence) for ISO2022 single-shift functions (single-shift-2 and
1433    single-shift-3).  */
1434
1435 #define ENCODE_SINGLE_SHIFT_2                           \
1436   do {                                                  \
1437     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1438       *dst++ = ISO_CODE_ESC, *dst++ = 'N';              \
1439     else                                                \
1440       {                                                 \
1441         *dst++ = ISO_CODE_SS2;                          \
1442         coding->fake_multibyte = 1;                     \
1443       }                                                 \
1444     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1445   } while (0)
1446
1447 #define ENCODE_SINGLE_SHIFT_3                           \
1448   do {                                                  \
1449     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1450       *dst++ = ISO_CODE_ESC, *dst++ = 'O';              \
1451     else                                                \
1452       {                                                 \
1453         *dst++ = ISO_CODE_SS3;                          \
1454         coding->fake_multibyte = 1;                     \
1455       }                                                 \
1456     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1457   } while (0)
1458
1459 /* The following four macros produce codes (control character or
1460    escape sequence) for ISO2022 locking-shift functions (shift-in,
1461    shift-out, locking-shift-2, and locking-shift-3).  */
1462
1463 #define ENCODE_SHIFT_IN                         \
1464   do {                                          \
1465     *dst++ = ISO_CODE_SI;                       \
1466     CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
1467   } while (0)
1468
1469 #define ENCODE_SHIFT_OUT                        \
1470   do {                                          \
1471     *dst++ = ISO_CODE_SO;                       \
1472     CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
1473   } while (0)
1474
1475 #define ENCODE_LOCKING_SHIFT_2                  \
1476   do {                                          \
1477     *dst++ = ISO_CODE_ESC, *dst++ = 'n';        \
1478     CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
1479   } while (0)
1480
1481 #define ENCODE_LOCKING_SHIFT_3                  \
1482   do {                                          \
1483     *dst++ = ISO_CODE_ESC, *dst++ = 'o';        \
1484     CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
1485   } while (0)
1486
1487 /* Produce codes for a DIMENSION1 character whose character set is
1488    CHARSET and whose position-code is C1.  Designation and invocation
1489    sequences are also produced in advance if necessary.  */
1490
1491
1492 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
1493   do {                                                                  \
1494     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1495       {                                                                 \
1496         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1497           *dst++ = c1 & 0x7F;                                           \
1498         else                                                            \
1499           *dst++ = c1 | 0x80;                                           \
1500         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1501         break;                                                          \
1502       }                                                                 \
1503     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1504       {                                                                 \
1505         *dst++ = c1 & 0x7F;                                             \
1506         break;                                                          \
1507       }                                                                 \
1508     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1509       {                                                                 \
1510         *dst++ = c1 | 0x80;                                             \
1511         break;                                                          \
1512       }                                                                 \
1513     else if (coding->flags & CODING_FLAG_ISO_SAFE                       \
1514              && !coding->safe_charsets[charset])                        \
1515       {                                                                 \
1516         /* We should not encode this character, instead produce one or  \
1517            two `?'s.  */                                                \
1518         *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;                 \
1519         if (CHARSET_WIDTH (charset) == 2)                               \
1520           *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;               \
1521         break;                                                          \
1522       }                                                                 \
1523     else                                                                \
1524       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1525          must invoke it, or, at first, designate it to some graphic     \
1526          register.  Then repeat the loop to actually produce the        \
1527          character.  */                                                 \
1528       dst = encode_invocation_designation (charset, coding, dst);       \
1529   } while (1)
1530
1531 /* Produce codes for a DIMENSION2 character whose character set is
1532    CHARSET and whose position-codes are C1 and C2.  Designation and
1533    invocation codes are also produced in advance if necessary.  */
1534
1535 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
1536   do {                                                                  \
1537     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1538       {                                                                 \
1539         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1540           *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F;                       \
1541         else                                                            \
1542           *dst++ = c1 | 0x80, *dst++ = c2 | 0x80;                       \
1543         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1544         break;                                                          \
1545       }                                                                 \
1546     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1547       {                                                                 \
1548         *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F;                          \
1549         break;                                                          \
1550       }                                                                 \
1551     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1552       {                                                                 \
1553         *dst++ = c1 | 0x80, *dst++= c2 | 0x80;                          \
1554         break;                                                          \
1555       }                                                                 \
1556     else if (coding->flags & CODING_FLAG_ISO_SAFE                       \
1557              && !coding->safe_charsets[charset])                        \
1558       {                                                                 \
1559         /* We should not encode this character, instead produce one or  \
1560            two `?'s.  */                                                \
1561         *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;                 \
1562         if (CHARSET_WIDTH (charset) == 2)                               \
1563           *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;               \
1564         break;                                                          \
1565       }                                                                 \
1566     else                                                                \
1567       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1568          must invoke it, or, at first, designate it to some graphic     \
1569          register.  Then repeat the loop to actually produce the        \
1570          character.  */                                                 \
1571       dst = encode_invocation_designation (charset, coding, dst);       \
1572   } while (1)
1573
1574 #define ENCODE_ISO_CHARACTER(charset, c1, c2)                   \
1575   do {                                                          \
1576     int c_alt, charset_alt;                                     \
1577     if (!NILP (translation_table)                               \
1578         && ((c_alt = translate_char (translation_table, -1,     \
1579                                      charset, c1, c2))          \
1580             >= 0))                                              \
1581       SPLIT_CHAR (c_alt, charset_alt, c1, c2);                  \
1582     else                                                        \
1583       charset_alt = charset;                                    \
1584     if (CHARSET_DIMENSION (charset_alt) == 1)                   \
1585       {                                                         \
1586         if (charset == CHARSET_ASCII                            \
1587             && coding->flags & CODING_FLAG_ISO_USE_ROMAN)       \
1588           charset_alt = charset_latin_jisx0201;                 \
1589         ENCODE_ISO_CHARACTER_DIMENSION1 (charset_alt, c1);      \
1590       }                                                         \
1591     else                                                        \
1592       {                                                         \
1593         if (charset == charset_jisx0208                         \
1594             && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)      \
1595           charset_alt = charset_jisx0208_1978;                  \
1596         ENCODE_ISO_CHARACTER_DIMENSION2 (charset_alt, c1, c2);  \
1597       }                                                         \
1598     if (! COMPOSING_P (coding->composing))                      \
1599       coding->consumed_char++;                                  \
1600   } while (0)
1601
1602 /* Produce designation and invocation codes at a place pointed by DST
1603    to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
1604    Return new DST.  */
1605
1606 unsigned char *
1607 encode_invocation_designation (charset, coding, dst)
1608      int charset;
1609      struct coding_system *coding;
1610      unsigned char *dst;
1611 {
1612   int reg;                      /* graphic register number */
1613
1614   /* At first, check designations.  */
1615   for (reg = 0; reg < 4; reg++)
1616     if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
1617       break;
1618
1619   if (reg >= 4)
1620     {
1621       /* CHARSET is not yet designated to any graphic registers.  */
1622       /* At first check the requested designation.  */
1623       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1624       if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
1625         /* Since CHARSET requests no special designation, designate it
1626            to graphic register 0.  */
1627         reg = 0;
1628
1629       ENCODE_DESIGNATION (charset, reg, coding);
1630     }
1631
1632   if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
1633       && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
1634     {
1635       /* Since the graphic register REG is not invoked to any graphic
1636          planes, invoke it to graphic plane 0.  */
1637       switch (reg)
1638         {
1639         case 0:                 /* graphic register 0 */
1640           ENCODE_SHIFT_IN;
1641           break;
1642
1643         case 1:                 /* graphic register 1 */
1644           ENCODE_SHIFT_OUT;
1645           break;
1646
1647         case 2:                 /* graphic register 2 */
1648           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1649             ENCODE_SINGLE_SHIFT_2;
1650           else
1651             ENCODE_LOCKING_SHIFT_2;
1652           break;
1653
1654         case 3:                 /* graphic register 3 */
1655           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1656             ENCODE_SINGLE_SHIFT_3;
1657           else
1658             ENCODE_LOCKING_SHIFT_3;
1659           break;
1660         }
1661     }
1662   return dst;
1663 }
1664
1665 /* The following two macros produce codes for indicating composition.  */
1666 #define ENCODE_COMPOSITION_NO_RULE_START  *dst++ = ISO_CODE_ESC, *dst++ = '0'
1667 #define ENCODE_COMPOSITION_WITH_RULE_START  *dst++ = ISO_CODE_ESC, *dst++ = '2'
1668 #define ENCODE_COMPOSITION_END    *dst++ = ISO_CODE_ESC, *dst++ = '1'
1669
1670 /* The following three macros produce codes for indicating direction
1671    of text.  */
1672 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER              \
1673   do {                                                  \
1674     if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS)    \
1675       *dst++ = ISO_CODE_ESC, *dst++ = '[';              \
1676     else                                                \
1677       *dst++ = ISO_CODE_CSI;                            \
1678   } while (0)
1679
1680 #define ENCODE_DIRECTION_R2L    \
1681   ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '2', *dst++ = ']'
1682
1683 #define ENCODE_DIRECTION_L2R    \
1684   ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '0', *dst++ = ']'
1685
1686 /* Produce codes for designation and invocation to reset the graphic
1687    planes and registers to initial state.  */
1688 #define ENCODE_RESET_PLANE_AND_REGISTER                                     \
1689   do {                                                                      \
1690     int reg;                                                                \
1691     if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                        \
1692       ENCODE_SHIFT_IN;                                                      \
1693     for (reg = 0; reg < 4; reg++)                                           \
1694       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0            \
1695           && (CODING_SPEC_ISO_DESIGNATION (coding, reg)                     \
1696               != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)))        \
1697         ENCODE_DESIGNATION                                                  \
1698           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
1699   } while (0)
1700
1701 /* Produce designation sequences of charsets in the line started from
1702    SRC to a place pointed by *DSTP, and update DSTP.
1703
1704    If the current block ends before any end-of-line, we may fail to
1705    find all the necessary designations.  */
1706
1707 void
1708 encode_designation_at_bol (coding, table, src, src_end, dstp)
1709      struct coding_system *coding;
1710      Lisp_Object table;
1711      unsigned char *src, *src_end, **dstp;
1712 {
1713   int charset, c, found = 0, reg;
1714   /* Table of charsets to be designated to each graphic register.  */
1715   int r[4];
1716   unsigned char *dst = *dstp;
1717
1718   for (reg = 0; reg < 4; reg++)
1719     r[reg] = -1;
1720
1721   while (src < src_end && *src != '\n' && found < 4)
1722     {
1723       int bytes = BYTES_BY_CHAR_HEAD (*src);
1724
1725       if (NILP (table))
1726         charset = CHARSET_AT (src);
1727       else
1728         {
1729           int c_alt;
1730           unsigned char c1, c2;
1731
1732           SPLIT_STRING(src, bytes, charset, c1, c2);
1733           if ((c_alt = translate_char (table, -1, charset, c1, c2)) >= 0)
1734             charset = CHAR_CHARSET (c_alt);
1735         }
1736
1737       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1738       if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
1739         {
1740           found++;
1741           r[reg] = charset;
1742         }
1743
1744       src += bytes;
1745     }
1746
1747   if (found)
1748     {
1749       for (reg = 0; reg < 4; reg++)
1750         if (r[reg] >= 0
1751             && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
1752           ENCODE_DESIGNATION (r[reg], reg, coding);
1753       *dstp = dst;
1754     }
1755 }
1756
1757 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
1758
1759 int
1760 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1761      struct coding_system *coding;
1762      unsigned char *source, *destination;
1763      int src_bytes, dst_bytes;
1764 {
1765   unsigned char *src = source;
1766   unsigned char *src_end = source + src_bytes;
1767   unsigned char *dst = destination;
1768   unsigned char *dst_end = destination + dst_bytes;
1769   /* Since the maximum bytes produced by each loop is 20, we subtract 19
1770      from DST_END to assure overflow checking is necessary only at the
1771      head of loop.  */
1772   unsigned char *adjusted_dst_end = dst_end - 19;
1773   Lisp_Object translation_table
1774       = coding->translation_table_for_encode;
1775   int result = CODING_FINISH_NORMAL;
1776
1777   if (!NILP (Venable_character_translation) && NILP (translation_table))
1778     translation_table = Vstandard_translation_table_for_encode;
1779
1780   coding->consumed_char = 0;
1781   coding->fake_multibyte = 0;
1782   while (src < src_end && (dst_bytes
1783                            ? (dst < adjusted_dst_end)
1784                            : (dst < src - 19)))
1785     {
1786       /* SRC_BASE remembers the start position in source in each loop.
1787          The loop will be exited when there's not enough source text
1788          to analyze multi-byte codes (within macros ONE_MORE_BYTE,
1789          TWO_MORE_BYTES, and THREE_MORE_BYTES).  In that case, SRC is
1790          reset to SRC_BASE before exiting.  */
1791       unsigned char *src_base = src;
1792       int charset, c1, c2, c3, c4;
1793
1794       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
1795           && CODING_SPEC_ISO_BOL (coding))
1796         {
1797           /* We have to produce designation sequences if any now.  */
1798           encode_designation_at_bol (coding, translation_table,
1799                                      src, src_end, &dst);
1800           CODING_SPEC_ISO_BOL (coding) = 0;
1801         }
1802
1803       c1 = *src++;
1804       /* If we are seeing a component of a composite character, we are
1805          seeing a leading-code encoded irregularly for composition, or
1806          a composition rule if composing with rule.  We must set C1 to
1807          a normal leading-code or an ASCII code.  If we are not seeing
1808          a composite character, we must reset composition,
1809          designation, and invocation states.  */
1810       if (COMPOSING_P (coding->composing))
1811         {
1812           if (c1 < 0xA0)
1813             {
1814               /* We are not in a composite character any longer.  */
1815               coding->composing = COMPOSING_NO;
1816               ENCODE_RESET_PLANE_AND_REGISTER;
1817               ENCODE_COMPOSITION_END;
1818             }
1819           else
1820             {
1821               if (coding->composing == COMPOSING_WITH_RULE_RULE)
1822                 {
1823                   *dst++ = c1 & 0x7F;
1824                   coding->composing = COMPOSING_WITH_RULE_HEAD;
1825                   continue;
1826                 }
1827               else if (coding->composing == COMPOSING_WITH_RULE_HEAD)
1828                 coding->composing = COMPOSING_WITH_RULE_RULE;
1829               if (c1 == 0xA0)
1830                 {
1831                   /* This is an ASCII component.  */
1832                   ONE_MORE_BYTE (c1);
1833                   c1 &= 0x7F;
1834                 }
1835               else
1836                 /* This is a leading-code of non ASCII component.  */
1837                 c1 -= 0x20;
1838             }
1839         }
1840
1841       /* Now encode one character.  C1 is a control character, an
1842          ASCII character, or a leading-code of multi-byte character.  */
1843       switch (emacs_code_class[c1])
1844         {
1845         case EMACS_ascii_code:
1846           ENCODE_ISO_CHARACTER (CHARSET_ASCII, c1, /* dummy */ c2);
1847           break;
1848
1849         case EMACS_control_code:
1850           if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
1851             ENCODE_RESET_PLANE_AND_REGISTER;
1852           *dst++ = c1;
1853           coding->consumed_char++;
1854           break;
1855
1856         case EMACS_carriage_return_code:
1857           if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
1858             {
1859               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
1860                 ENCODE_RESET_PLANE_AND_REGISTER;
1861               *dst++ = c1;
1862               coding->consumed_char++;
1863               break;
1864             }
1865           /* fall down to treat '\r' as '\n' ...  */
1866
1867         case EMACS_linefeed_code:
1868           if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
1869             ENCODE_RESET_PLANE_AND_REGISTER;
1870           if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
1871             bcopy (coding->spec.iso2022.initial_designation,
1872                    coding->spec.iso2022.current_designation,
1873                    sizeof coding->spec.iso2022.initial_designation);
1874           if (coding->eol_type == CODING_EOL_LF
1875               || coding->eol_type == CODING_EOL_UNDECIDED)
1876             *dst++ = ISO_CODE_LF;
1877           else if (coding->eol_type == CODING_EOL_CRLF)
1878             *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
1879           else
1880             *dst++ = ISO_CODE_CR;
1881           CODING_SPEC_ISO_BOL (coding) = 1;
1882           coding->consumed_char++;
1883           break;
1884
1885         case EMACS_leading_code_2:
1886           ONE_MORE_BYTE (c2);
1887           if (c2 < 0xA0)
1888             {
1889               /* invalid sequence */
1890               *dst++ = c1;
1891               src--;
1892               coding->consumed_char++;
1893             }
1894           else
1895             ENCODE_ISO_CHARACTER (c1, c2, /* dummy */ c3);
1896           break;
1897
1898         case EMACS_leading_code_3:
1899           TWO_MORE_BYTES (c2, c3);
1900           if (c2 < 0xA0 || c3 < 0xA0)
1901             {
1902               /* invalid sequence */
1903               *dst++ = c1;
1904               src -= 2;
1905               coding->consumed_char++;
1906             }
1907           else if (c1 < LEADING_CODE_PRIVATE_11)
1908             ENCODE_ISO_CHARACTER (c1, c2, c3);
1909           else
1910             ENCODE_ISO_CHARACTER (c2, c3, /* dummy */ c4);
1911           break;
1912
1913         case EMACS_leading_code_4:
1914           THREE_MORE_BYTES (c2, c3, c4);
1915           if (c2 < 0xA0 || c3 < 0xA0 || c4 < 0xA0)
1916             {
1917               /* invalid sequence */
1918               *dst++ = c1;
1919               src -= 3;
1920               coding->consumed_char++;
1921             }
1922           else
1923             ENCODE_ISO_CHARACTER (c2, c3, c4);
1924           break;
1925
1926         case EMACS_leading_code_composition:
1927           ONE_MORE_BYTE (c2);
1928           if (c2 < 0xA0)
1929             {
1930               /* invalid sequence */
1931               *dst++ = c1;
1932               src--;
1933               coding->consumed_char++;
1934             }
1935           else if (c2 == 0xFF)
1936             {
1937               ENCODE_RESET_PLANE_AND_REGISTER;
1938               coding->composing = COMPOSING_WITH_RULE_HEAD;
1939               ENCODE_COMPOSITION_WITH_RULE_START;
1940               coding->consumed_char++;
1941             }
1942           else
1943             {
1944               ENCODE_RESET_PLANE_AND_REGISTER;
1945               /* Rewind one byte because it is a character code of
1946                  composition elements.  */
1947               src--;
1948               coding->composing = COMPOSING_NO_RULE_HEAD;
1949               ENCODE_COMPOSITION_NO_RULE_START;
1950               coding->consumed_char++;
1951             }
1952           break;
1953
1954         case EMACS_invalid_code:
1955           *dst++ = c1;
1956           coding->consumed_char++;
1957           break;
1958         }
1959       continue;
1960     label_end_of_loop:
1961       result = CODING_FINISH_INSUFFICIENT_SRC;
1962       src = src_base;
1963       break;
1964     }
1965
1966   if (src < src_end && result == CODING_FINISH_NORMAL)
1967     result = CODING_FINISH_INSUFFICIENT_DST;
1968
1969   /* If this is the last block of the text to be encoded, we must
1970      reset graphic planes and registers to the initial state, and
1971      flush out the carryover if any.  */
1972   if (coding->mode & CODING_MODE_LAST_BLOCK)
1973     {
1974       ENCODE_RESET_PLANE_AND_REGISTER;
1975       if (COMPOSING_P (coding->composing))
1976         ENCODE_COMPOSITION_END;
1977     }
1978   coding->consumed = src - source;
1979   coding->produced = coding->produced_char = dst - destination;
1980   return result;
1981 }
1982
1983 \f
1984 /*** 4. SJIS and BIG5 handlers ***/
1985
1986 /* Although SJIS and BIG5 are not ISO's coding system, they are used
1987    quite widely.  So, for the moment, Emacs supports them in the bare
1988    C code.  But, in the future, they may be supported only by CCL.  */
1989
1990 /* SJIS is a coding system encoding three character sets: ASCII, right
1991    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
1992    as is.  A character of charset katakana-jisx0201 is encoded by
1993    "position-code + 0x80".  A character of charset japanese-jisx0208
1994    is encoded in 2-byte but two position-codes are divided and shifted
1995    so that it fit in the range below.
1996
1997    --- CODE RANGE of SJIS ---
1998    (character set)      (range)
1999    ASCII                0x00 .. 0x7F
2000    KATAKANA-JISX0201    0xA0 .. 0xDF
2001    JISX0208 (1st byte)  0x80 .. 0x9F and 0xE0 .. 0xEF
2002             (2nd byte)  0x40 .. 0xFF
2003    -------------------------------
2004
2005 */
2006
2007 /* BIG5 is a coding system encoding two character sets: ASCII and
2008    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
2009    character set and is encoded in two-byte.
2010
2011    --- CODE RANGE of BIG5 ---
2012    (character set)      (range)
2013    ASCII                0x00 .. 0x7F
2014    Big5 (1st byte)      0xA1 .. 0xFE
2015         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
2016    --------------------------
2017
2018    Since the number of characters in Big5 is larger than maximum
2019    characters in Emacs' charset (96x96), it can't be handled as one
2020    charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
2021    and `charset-big5-2'.  Both are DIMENSION2 and CHARS94.  The former
2022    contains frequently used characters and the latter contains less
2023    frequently used characters.  */
2024
2025 /* Macros to decode or encode a character of Big5 in BIG5.  B1 and B2
2026    are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2027    C1 and C2 are the 1st and 2nd position-codes of of Emacs' internal
2028    format.  CHARSET is `charset_big5_1' or `charset_big5_2'.  */
2029
2030 /* Number of Big5 characters which have the same code in 1st byte.  */
2031 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2032
2033 #define DECODE_BIG5(b1, b2, charset, c1, c2)                            \
2034   do {                                                                  \
2035     unsigned int temp                                                   \
2036       = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62);   \
2037     if (b1 < 0xC9)                                                      \
2038       charset = charset_big5_1;                                         \
2039     else                                                                \
2040       {                                                                 \
2041         charset = charset_big5_2;                                       \
2042         temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW;                          \
2043       }                                                                 \
2044     c1 = temp / (0xFF - 0xA1) + 0x21;                                   \
2045     c2 = temp % (0xFF - 0xA1) + 0x21;                                   \
2046   } while (0)
2047
2048 #define ENCODE_BIG5(charset, c1, c2, b1, b2)                            \
2049   do {                                                                  \
2050     unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21);      \
2051     if (charset == charset_big5_2)                                      \
2052       temp += BIG5_SAME_ROW * (0xC9 - 0xA1);                            \
2053     b1 = temp / BIG5_SAME_ROW + 0xA1;                                   \
2054     b2 = temp % BIG5_SAME_ROW;                                          \
2055     b2 += b2 < 0x3F ? 0x40 : 0x62;                                      \
2056   } while (0)
2057
2058 #define DECODE_SJIS_BIG5_CHARACTER(charset, c1, c2)                     \
2059   do {                                                                  \
2060     int c_alt, charset_alt = (charset);                                 \
2061     if (!NILP (translation_table)                                       \
2062         && ((c_alt = translate_char (translation_table,                 \
2063                                      -1, (charset), c1, c2)) >= 0))     \
2064           SPLIT_CHAR (c_alt, charset_alt, c1, c2);                      \
2065     if (charset_alt == CHARSET_ASCII || charset_alt < 0)                \
2066       DECODE_CHARACTER_ASCII (c1);                                      \
2067     else if (CHARSET_DIMENSION (charset_alt) == 1)                      \
2068       DECODE_CHARACTER_DIMENSION1 (charset_alt, c1);                    \
2069     else                                                                \
2070       DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2);                \
2071   } while (0)
2072
2073 #define ENCODE_SJIS_BIG5_CHARACTER(charset, c1, c2)             \
2074   do {                                                          \
2075     int c_alt, charset_alt;                                     \
2076     if (!NILP (translation_table)                               \
2077         && ((c_alt = translate_char (translation_table, -1,     \
2078                                      charset, c1, c2))          \
2079             >= 0))                                              \
2080       SPLIT_CHAR (c_alt, charset_alt, c1, c2);                  \
2081     else                                                        \
2082       charset_alt = charset;                                    \
2083     if (charset_alt == charset_ascii)                           \
2084       *dst++ = c1;                                              \
2085     else if (CHARSET_DIMENSION (charset_alt) == 1)              \
2086       {                                                         \
2087         if (sjis_p && charset_alt == charset_katakana_jisx0201) \
2088           *dst++ = c1;                                          \
2089         else                                                    \
2090           {                                                     \
2091             *dst++ = charset_alt, *dst++ = c1;                  \
2092             coding->fake_multibyte = 1;                         \
2093           }                                                     \
2094       }                                                         \
2095     else                                                        \
2096       {                                                         \
2097         c1 &= 0x7F, c2 &= 0x7F;                                 \
2098         if (sjis_p && charset_alt == charset_jisx0208)          \
2099           {                                                     \
2100             unsigned char s1, s2;                               \
2101                                                                 \
2102             ENCODE_SJIS (c1, c2, s1, s2);                       \
2103             *dst++ = s1, *dst++ = s2;                           \
2104             coding->fake_multibyte = 1;                         \
2105           }                                                     \
2106         else if (!sjis_p                                        \
2107                  && (charset_alt == charset_big5_1              \
2108                      || charset_alt == charset_big5_2))         \
2109           {                                                     \
2110             unsigned char b1, b2;                               \
2111                                                                 \
2112             ENCODE_BIG5 (charset_alt, c1, c2, b1, b2);          \
2113             *dst++ = b1, *dst++ = b2;                           \
2114           }                                                     \
2115         else                                                    \
2116           {                                                     \
2117             *dst++ = charset_alt, *dst++ = c1, *dst++ = c2;     \
2118             coding->fake_multibyte = 1;                         \
2119           }                                                     \
2120       }                                                         \
2121     coding->consumed_char++;                                    \
2122   } while (0);
2123
2124 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2125    Check if a text is encoded in SJIS.  If it is, return
2126    CODING_CATEGORY_MASK_SJIS, else return 0.  */
2127
2128 int
2129 detect_coding_sjis (src, src_end)
2130      unsigned char *src, *src_end;
2131 {
2132   unsigned char c;
2133
2134   while (src < src_end)
2135     {
2136       c = *src++;
2137       if ((c >= 0x80 && c < 0xA0) || c >= 0xE0)
2138         {
2139           if (src < src_end && *src++ < 0x40)
2140             return 0;
2141         }
2142     }
2143   return CODING_CATEGORY_MASK_SJIS;
2144 }
2145
2146 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2147    Check if a text is encoded in BIG5.  If it is, return
2148    CODING_CATEGORY_MASK_BIG5, else return 0.  */
2149
2150 int
2151 detect_coding_big5 (src, src_end)
2152      unsigned char *src, *src_end;
2153 {
2154   unsigned char c;
2155
2156   while (src < src_end)
2157     {
2158       c = *src++;
2159       if (c >= 0xA1)
2160         {
2161           if (src >= src_end)
2162             break;
2163           c = *src++;
2164           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
2165             return 0;
2166         }
2167     }
2168   return CODING_CATEGORY_MASK_BIG5;
2169 }
2170
2171 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2172    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
2173
2174 int
2175 decode_coding_sjis_big5 (coding, source, destination,
2176                          src_bytes, dst_bytes, sjis_p)
2177      struct coding_system *coding;
2178      unsigned char *source, *destination;
2179      int src_bytes, dst_bytes;
2180      int sjis_p;
2181 {
2182   unsigned char *src = source;
2183   unsigned char *src_end = source + src_bytes;
2184   unsigned char *dst = destination;
2185   unsigned char *dst_end = destination + dst_bytes;
2186   /* Since the maximum bytes produced by each loop is 4, we subtract 3
2187      from DST_END to assure overflow checking is necessary only at the
2188      head of loop.  */
2189   unsigned char *adjusted_dst_end = dst_end - 3;
2190   Lisp_Object translation_table
2191       = coding->translation_table_for_decode;
2192   int result = CODING_FINISH_NORMAL;
2193
2194   if (!NILP (Venable_character_translation) && NILP (translation_table))
2195     translation_table = Vstandard_translation_table_for_decode;
2196
2197   coding->produced_char = 0;
2198   coding->fake_multibyte = 0;
2199   while (src < src_end && (dst_bytes
2200                            ? (dst < adjusted_dst_end)
2201                            : (dst < src - 3)))
2202     {
2203       /* SRC_BASE remembers the start position in source in each loop.
2204          The loop will be exited when there's not enough source text
2205          to analyze two-byte character (within macro ONE_MORE_BYTE).
2206          In that case, SRC is reset to SRC_BASE before exiting.  */
2207       unsigned char *src_base = src;
2208       unsigned char c1 = *src++, c2, c3, c4;
2209
2210       if (c1 < 0x20)
2211         {
2212           if (c1 == '\r')
2213             {
2214               if (coding->eol_type == CODING_EOL_CRLF)
2215                 {
2216                   ONE_MORE_BYTE (c2);
2217                   if (c2 == '\n')
2218                     *dst++ = c2;
2219                   else if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2220                     {
2221                       result = CODING_FINISH_INCONSISTENT_EOL;
2222                       goto label_end_of_loop_2;
2223                     }
2224                   else
2225                     /* To process C2 again, SRC is subtracted by 1.  */
2226                     *dst++ = c1, src--;
2227                 }
2228               else if (coding->eol_type == CODING_EOL_CR)
2229                 *dst++ = '\n';
2230               else
2231                 *dst++ = c1;
2232             }
2233           else if (c1 == '\n'
2234                    && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2235                    && (coding->eol_type == CODING_EOL_CR
2236                        || coding->eol_type == CODING_EOL_CRLF))
2237             {
2238               result = CODING_FINISH_INCONSISTENT_EOL;
2239               goto label_end_of_loop_2;
2240             }
2241           else
2242             *dst++ = c1;
2243           coding->produced_char++;
2244         }
2245       else if (c1 < 0x80)
2246         DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
2247       else
2248         {
2249           if (sjis_p)
2250             {
2251               if (c1 < 0xA0 || (c1 >= 0xE0 && c1 < 0xF0))
2252                 {
2253                   /* SJIS -> JISX0208 */
2254                   ONE_MORE_BYTE (c2);
2255                   if (c2 >= 0x40)
2256                     {
2257                       DECODE_SJIS (c1, c2, c3, c4);
2258                       DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
2259                     }
2260                   else
2261                     goto label_invalid_code_2;
2262                 }
2263               else if (c1 < 0xE0)
2264                 /* SJIS -> JISX0201-Kana */
2265                 DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1,
2266                                             /* dummy */ c2);
2267               else
2268                 goto label_invalid_code_1;
2269             }
2270           else
2271             {
2272               /* BIG5 -> Big5 */
2273               if (c1 >= 0xA1 && c1 <= 0xFE)
2274                 {
2275                   ONE_MORE_BYTE (c2);
2276                   if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE))
2277                     {
2278                       int charset;
2279
2280                       DECODE_BIG5 (c1, c2, charset, c3, c4);
2281                       DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
2282                     }
2283                   else
2284                     goto label_invalid_code_2;
2285                 }
2286               else
2287                 goto label_invalid_code_1;
2288             }
2289         }
2290       continue;
2291
2292     label_invalid_code_1:
2293       *dst++ = c1;
2294       coding->produced_char++;
2295       coding->fake_multibyte = 1;
2296       continue;
2297
2298     label_invalid_code_2:
2299       *dst++ = c1; *dst++= c2;
2300       coding->produced_char += 2;
2301       coding->fake_multibyte = 1;
2302       continue;
2303
2304     label_end_of_loop:
2305       result = CODING_FINISH_INSUFFICIENT_SRC;
2306     label_end_of_loop_2:
2307       src = src_base;
2308       break;
2309     }
2310
2311   if (src < src_end)
2312     {
2313       if (result == CODING_FINISH_NORMAL)
2314         result = CODING_FINISH_INSUFFICIENT_DST;
2315       else if (result != CODING_FINISH_INCONSISTENT_EOL
2316                && coding->mode & CODING_MODE_LAST_BLOCK)
2317         {
2318           src_bytes = src_end - src;
2319           if (dst_bytes && (dst_end - dst < src_bytes))
2320             src_bytes = dst_end - dst;
2321           bcopy (dst, src, src_bytes);
2322           src += src_bytes;
2323           dst += src_bytes;
2324           coding->fake_multibyte = 1;
2325         }
2326     }
2327
2328   coding->consumed = coding->consumed_char = src - source;
2329   coding->produced = dst - destination;
2330   return result;
2331 }
2332
2333 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
2334    This function can encode `charset_ascii', `charset_katakana_jisx0201',
2335    `charset_jisx0208', `charset_big5_1', and `charset_big5-2'.  We are
2336    sure that all these charsets are registered as official charset
2337    (i.e. do not have extended leading-codes).  Characters of other
2338    charsets are produced without any encoding.  If SJIS_P is 1, encode
2339    SJIS text, else encode BIG5 text.  */
2340
2341 int
2342 encode_coding_sjis_big5 (coding, source, destination,
2343                          src_bytes, dst_bytes, sjis_p)
2344      struct coding_system *coding;
2345      unsigned char *source, *destination;
2346      int src_bytes, dst_bytes;
2347      int sjis_p;
2348 {
2349   unsigned char *src = source;
2350   unsigned char *src_end = source + src_bytes;
2351   unsigned char *dst = destination;
2352   unsigned char *dst_end = destination + dst_bytes;
2353   /* Since the maximum bytes produced by each loop is 2, we subtract 1
2354      from DST_END to assure overflow checking is necessary only at the
2355      head of loop.  */
2356   unsigned char *adjusted_dst_end = dst_end - 1;
2357   Lisp_Object translation_table
2358       = coding->translation_table_for_encode;
2359   int result = CODING_FINISH_NORMAL;
2360
2361   if (!NILP (Venable_character_translation) && NILP (translation_table))
2362     translation_table = Vstandard_translation_table_for_encode;
2363
2364   coding->consumed_char = 0;
2365   coding->fake_multibyte = 0;
2366   while (src < src_end && (dst_bytes
2367                            ? (dst < adjusted_dst_end)
2368                            : (dst < src - 1)))
2369     {
2370       /* SRC_BASE remembers the start position in source in each loop.
2371          The loop will be exited when there's not enough source text
2372          to analyze multi-byte codes (within macros ONE_MORE_BYTE and
2373          TWO_MORE_BYTES).  In that case, SRC is reset to SRC_BASE
2374          before exiting.  */
2375       unsigned char *src_base = src;
2376       unsigned char c1 = *src++, c2, c3, c4;
2377
2378       if (coding->composing)
2379         {
2380           if (c1 == 0xA0)
2381             {
2382               ONE_MORE_BYTE (c1);
2383               c1 &= 0x7F;
2384             }
2385           else if (c1 >= 0xA0)
2386             c1 -= 0x20;
2387           else
2388             coding->composing = 0;
2389         }
2390
2391       switch (emacs_code_class[c1])
2392         {
2393         case EMACS_ascii_code:
2394           ENCODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
2395           break;
2396
2397         case EMACS_control_code:
2398           *dst++ = c1;
2399           coding->consumed_char++;
2400           break;
2401
2402         case EMACS_carriage_return_code:
2403           if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2404             {
2405               *dst++ = c1;
2406               coding->consumed_char++;
2407               break;
2408             }
2409           /* fall down to treat '\r' as '\n' ...  */
2410
2411         case EMACS_linefeed_code:
2412           if (coding->eol_type == CODING_EOL_LF
2413               || coding->eol_type == CODING_EOL_UNDECIDED)
2414             *dst++ = '\n';
2415           else if (coding->eol_type == CODING_EOL_CRLF)
2416             *dst++ = '\r', *dst++ = '\n';
2417           else
2418             *dst++ = '\r';
2419           coding->consumed_char++;
2420           break;
2421
2422         case EMACS_leading_code_2:
2423           ONE_MORE_BYTE (c2);
2424           ENCODE_SJIS_BIG5_CHARACTER (c1, c2, /* dummy */ c3);
2425           break;
2426
2427         case EMACS_leading_code_3:
2428           TWO_MORE_BYTES (c2, c3);
2429           ENCODE_SJIS_BIG5_CHARACTER (c1, c2, c3);
2430           break;
2431
2432         case EMACS_leading_code_4:
2433           THREE_MORE_BYTES (c2, c3, c4);
2434           ENCODE_SJIS_BIG5_CHARACTER (c2, c3, c4);
2435           break;
2436
2437         case EMACS_leading_code_composition:
2438           coding->composing = 1;
2439           break;
2440
2441         default:                /* i.e. case EMACS_invalid_code: */
2442           *dst++ = c1;
2443           coding->consumed_char++;
2444         }
2445       continue;
2446
2447     label_end_of_loop:
2448       result = CODING_FINISH_INSUFFICIENT_SRC;
2449       src = src_base;
2450       break;
2451     }
2452
2453   if (result == CODING_FINISH_NORMAL
2454       && src < src_end)
2455     result = CODING_FINISH_INSUFFICIENT_DST;
2456   coding->consumed = src - source;
2457   coding->produced = coding->produced_char = dst - destination;
2458   return result;
2459 }
2460
2461 \f
2462 /*** 5. CCL handlers ***/
2463
2464 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2465    Check if a text is encoded in a coding system of which
2466    encoder/decoder are written in CCL program.  If it is, return
2467    CODING_CATEGORY_MASK_CCL, else return 0.  */
2468
2469 int
2470 detect_coding_ccl (src, src_end)
2471      unsigned char *src, *src_end;
2472 {
2473   unsigned char *valid;
2474
2475   /* No coding system is assigned to coding-category-ccl.  */
2476   if (!coding_system_table[CODING_CATEGORY_IDX_CCL])
2477     return 0;
2478
2479   valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
2480   while (src < src_end)
2481     {
2482       if (! valid[*src]) return 0;
2483       src++;
2484     }
2485   return CODING_CATEGORY_MASK_CCL;
2486 }
2487
2488 \f
2489 /*** 6. End-of-line handlers ***/
2490
2491 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2492    This function is called only when `coding->eol_type' is
2493    CODING_EOL_CRLF or CODING_EOL_CR.  */
2494
2495 int
2496 decode_eol (coding, source, destination, src_bytes, dst_bytes)
2497      struct coding_system *coding;
2498      unsigned char *source, *destination;
2499      int src_bytes, dst_bytes;
2500 {
2501   unsigned char *src = source;
2502   unsigned char *src_end = source + src_bytes;
2503   unsigned char *dst = destination;
2504   unsigned char *dst_end = destination + dst_bytes;
2505   unsigned char c;
2506   int result = CODING_FINISH_NORMAL;
2507
2508   coding->fake_multibyte = 0;
2509
2510   if (src_bytes <= 0)
2511     return result;
2512
2513   switch (coding->eol_type)
2514     {
2515     case CODING_EOL_CRLF:
2516       {
2517         /* Since the maximum bytes produced by each loop is 2, we
2518            subtract 1 from DST_END to assure overflow checking is
2519            necessary only at the head of loop.  */
2520         unsigned char *adjusted_dst_end = dst_end - 1;
2521
2522         while (src < src_end && (dst_bytes
2523                                  ? (dst < adjusted_dst_end)
2524                                  : (dst < src - 1)))
2525           {
2526             unsigned char *src_base = src;
2527
2528             c = *src++;
2529             if (c == '\r')
2530               {
2531                 ONE_MORE_BYTE (c);
2532                 if (c != '\n')
2533                   {
2534                     if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2535                       {
2536                         result = CODING_FINISH_INCONSISTENT_EOL;
2537                         goto label_end_of_loop_2;
2538                       }
2539                     *dst++ = '\r';
2540                     if (BASE_LEADING_CODE_P (c))
2541                       coding->fake_multibyte = 1;
2542                   }
2543                 *dst++ = c;
2544               }
2545             else if (c == '\n'
2546                      && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
2547               {
2548                 result = CODING_FINISH_INCONSISTENT_EOL;
2549                 goto label_end_of_loop_2;
2550               }
2551             else
2552               {
2553                 *dst++ = c;
2554                 if (BASE_LEADING_CODE_P (c))
2555                   coding->fake_multibyte = 1;
2556               }
2557             continue;
2558
2559           label_end_of_loop:
2560             result = CODING_FINISH_INSUFFICIENT_SRC;
2561           label_end_of_loop_2:
2562             src = src_base;
2563             break;
2564           }
2565         if (result == CODING_FINISH_NORMAL
2566             && src < src_end)
2567           result = CODING_FINISH_INSUFFICIENT_DST;
2568       }
2569       break;
2570
2571     case CODING_EOL_CR:
2572       if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2573         {
2574           while (src < src_end)
2575             {
2576               if ((c = *src++) == '\n')
2577                 break;
2578               if (BASE_LEADING_CODE_P (c))
2579                 coding->fake_multibyte = 1;
2580             }
2581           if (*--src == '\n')
2582             {
2583               src_bytes = src - source;
2584               result = CODING_FINISH_INCONSISTENT_EOL;
2585             }
2586         }
2587       if (dst_bytes && src_bytes > dst_bytes)
2588         {
2589           result = CODING_FINISH_INSUFFICIENT_DST;
2590           src_bytes = dst_bytes;
2591         }
2592       if (dst_bytes)
2593         bcopy (source, destination, src_bytes);
2594       else
2595         safe_bcopy (source, destination, src_bytes);
2596       src = source + src_bytes;
2597       while (src_bytes--) if (*dst++ == '\r') dst[-1] = '\n';
2598       break;
2599
2600     default:                    /* i.e. case: CODING_EOL_LF */
2601       if (dst_bytes && src_bytes > dst_bytes)
2602         {
2603           result = CODING_FINISH_INSUFFICIENT_DST;
2604           src_bytes = dst_bytes;
2605         }
2606       if (dst_bytes)
2607         bcopy (source, destination, src_bytes);
2608       else
2609         safe_bcopy (source, destination, src_bytes);
2610       src += src_bytes;
2611       dst += src_bytes;
2612       coding->fake_multibyte = 1;
2613       break;
2614     }
2615
2616   coding->consumed = coding->consumed_char = src - source;
2617   coding->produced = coding->produced_char = dst - destination;
2618   return result;
2619 }
2620
2621 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  Encode
2622    format of end-of-line according to `coding->eol_type'.  If
2623    `coding->mode & CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code
2624    '\r' in source text also means end-of-line.  */
2625
2626 int
2627 encode_eol (coding, source, destination, src_bytes, dst_bytes)
2628      struct coding_system *coding;
2629      unsigned char *source, *destination;
2630      int src_bytes, dst_bytes;
2631 {
2632   unsigned char *src = source;
2633   unsigned char *dst = destination;
2634   int result = CODING_FINISH_NORMAL;
2635
2636   coding->fake_multibyte = 0;
2637
2638   if (coding->eol_type == CODING_EOL_CRLF)
2639     {
2640       unsigned char c;
2641       unsigned char *src_end = source + src_bytes;
2642       unsigned char *dst_end = destination + dst_bytes;
2643       /* Since the maximum bytes produced by each loop is 2, we
2644          subtract 1 from DST_END to assure overflow checking is
2645          necessary only at the head of loop.  */
2646       unsigned char *adjusted_dst_end = dst_end - 1;
2647
2648       while (src < src_end && (dst_bytes
2649                                ? (dst < adjusted_dst_end)
2650                                : (dst < src - 1)))
2651         {
2652           c = *src++;
2653           if (c == '\n'
2654               || (c == '\r' && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)))
2655             *dst++ = '\r', *dst++ = '\n';
2656           else
2657             {
2658               *dst++ = c;
2659               if (BASE_LEADING_CODE_P (c))
2660                 coding->fake_multibyte = 1;
2661             }
2662         }
2663       if (src < src_end)
2664         result = CODING_FINISH_INSUFFICIENT_DST;
2665     }
2666   else
2667     {
2668       unsigned char c;
2669
2670       if (dst_bytes && src_bytes > dst_bytes)
2671         {
2672           src_bytes = dst_bytes;
2673           result = CODING_FINISH_INSUFFICIENT_DST;
2674         }
2675       if (dst_bytes)
2676         bcopy (source, destination, src_bytes);
2677       else
2678         safe_bcopy (source, destination, src_bytes);
2679       dst_bytes = src_bytes;
2680       if (coding->eol_type == CODING_EOL_CR)
2681         {
2682           while (src_bytes--)
2683             {
2684               if ((c = *dst++) == '\n')
2685                 dst[-1] = '\r';
2686               else if (BASE_LEADING_CODE_P (c))
2687                 coding->fake_multibyte = 1;
2688             }
2689         }
2690       else
2691         {
2692           if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
2693             {
2694               while (src_bytes--)
2695                 if (*dst++ == '\r') dst[-1] = '\n';
2696             }
2697           coding->fake_multibyte = 1;
2698         }
2699       src = source + dst_bytes;
2700       dst = destination + dst_bytes;
2701     }
2702
2703   coding->consumed = coding->consumed_char = src - source;
2704   coding->produced = coding->produced_char = dst - destination;
2705   return result;
2706 }
2707
2708 \f
2709 /*** 7. C library functions ***/
2710
2711 /* In Emacs Lisp, coding system is represented by a Lisp symbol which
2712    has a property `coding-system'.  The value of this property is a
2713    vector of length 5 (called as coding-vector).  Among elements of
2714    this vector, the first (element[0]) and the fifth (element[4])
2715    carry important information for decoding/encoding.  Before
2716    decoding/encoding, this information should be set in fields of a
2717    structure of type `coding_system'.
2718
2719    A value of property `coding-system' can be a symbol of another
2720    subsidiary coding-system.  In that case, Emacs gets coding-vector
2721    from that symbol.
2722
2723    `element[0]' contains information to be set in `coding->type'.  The
2724    value and its meaning is as follows:
2725
2726    0 -- coding_type_emacs_mule
2727    1 -- coding_type_sjis
2728    2 -- coding_type_iso2022
2729    3 -- coding_type_big5
2730    4 -- coding_type_ccl encoder/decoder written in CCL
2731    nil -- coding_type_no_conversion
2732    t -- coding_type_undecided (automatic conversion on decoding,
2733                                no-conversion on encoding)
2734
2735    `element[4]' contains information to be set in `coding->flags' and
2736    `coding->spec'.  The meaning varies by `coding->type'.
2737
2738    If `coding->type' is `coding_type_iso2022', element[4] is a vector
2739    of length 32 (of which the first 13 sub-elements are used now).
2740    Meanings of these sub-elements are:
2741
2742    sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
2743         If the value is an integer of valid charset, the charset is
2744         assumed to be designated to graphic register N initially.
2745
2746         If the value is minus, it is a minus value of charset which
2747         reserves graphic register N, which means that the charset is
2748         not designated initially but should be designated to graphic
2749         register N just before encoding a character in that charset.
2750
2751         If the value is nil, graphic register N is never used on
2752         encoding.
2753
2754    sub-element[N] where N is 4 through 11: to be set in `coding->flags'
2755         Each value takes t or nil.  See the section ISO2022 of
2756         `coding.h' for more information.
2757
2758    If `coding->type' is `coding_type_big5', element[4] is t to denote
2759    BIG5-ETen or nil to denote BIG5-HKU.
2760
2761    If `coding->type' takes the other value, element[4] is ignored.
2762
2763    Emacs Lisp's coding system also carries information about format of
2764    end-of-line in a value of property `eol-type'.  If the value is
2765    integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
2766    means CODING_EOL_CR.  If it is not integer, it should be a vector
2767    of subsidiary coding systems of which property `eol-type' has one
2768    of above values.
2769
2770 */
2771
2772 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
2773    and set it in CODING.  If CODING_SYSTEM_SYMBOL is invalid, CODING
2774    is setup so that no conversion is necessary and return -1, else
2775    return 0.  */
2776
2777 int
2778 setup_coding_system (coding_system, coding)
2779      Lisp_Object coding_system;
2780      struct coding_system *coding;
2781 {
2782   Lisp_Object coding_spec, coding_type, eol_type, plist;
2783   Lisp_Object val;
2784   int i;
2785
2786   /* Initialize some fields required for all kinds of coding systems.  */
2787   coding->symbol = coding_system;
2788   coding->common_flags = 0;
2789   coding->mode = 0;
2790   coding->heading_ascii = -1;
2791   coding->post_read_conversion = coding->pre_write_conversion = Qnil;
2792   coding_spec = Fget (coding_system, Qcoding_system);
2793   if (!VECTORP (coding_spec)
2794       || XVECTOR (coding_spec)->size != 5
2795       || !CONSP (XVECTOR (coding_spec)->contents[3]))
2796     goto label_invalid_coding_system;
2797
2798   eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
2799   if (VECTORP (eol_type))
2800     {
2801       coding->eol_type = CODING_EOL_UNDECIDED;
2802       coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
2803     }
2804   else if (XFASTINT (eol_type) == 1)
2805     {
2806       coding->eol_type = CODING_EOL_CRLF;
2807       coding->common_flags
2808         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2809     }
2810   else if (XFASTINT (eol_type) == 2)
2811     {
2812       coding->eol_type = CODING_EOL_CR;
2813       coding->common_flags
2814         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2815     }
2816   else
2817     coding->eol_type = CODING_EOL_LF;
2818
2819   coding_type = XVECTOR (coding_spec)->contents[0];
2820   /* Try short cut.  */
2821   if (SYMBOLP (coding_type))
2822     {
2823       if (EQ (coding_type, Qt))
2824         {
2825           coding->type = coding_type_undecided;
2826           coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
2827         }
2828       else
2829         coding->type = coding_type_no_conversion;
2830       return 0;
2831     }
2832
2833   /* Initialize remaining fields.  */
2834   coding->composing = 0;
2835   coding->translation_table_for_decode = Qnil;
2836   coding->translation_table_for_encode = Qnil;
2837
2838   /* Get values of coding system properties:
2839      `post-read-conversion', `pre-write-conversion',
2840      `translation-table-for-decode', `translation-table-for-encode'.  */
2841   plist = XVECTOR (coding_spec)->contents[3];
2842   coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
2843   coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
2844   val = Fplist_get (plist, Qtranslation_table_for_decode);
2845   if (SYMBOLP (val))
2846     val = Fget (val, Qtranslation_table_for_decode);
2847   coding->translation_table_for_decode = CHAR_TABLE_P (val) ? val : Qnil;
2848   val = Fplist_get (plist, Qtranslation_table_for_encode);
2849   if (SYMBOLP (val))
2850     val = Fget (val, Qtranslation_table_for_encode);
2851   coding->translation_table_for_encode = CHAR_TABLE_P (val) ? val : Qnil;
2852   val = Fplist_get (plist, Qcoding_category);
2853   if (!NILP (val))
2854     {
2855       val = Fget (val, Qcoding_category_index);
2856       if (INTEGERP (val))
2857         coding->category_idx = XINT (val);
2858       else
2859         goto label_invalid_coding_system;
2860     }
2861   else
2862     goto label_invalid_coding_system;
2863
2864   val = Fplist_get (plist, Qsafe_charsets);
2865   if (EQ (val, Qt))
2866     {
2867       for (i = 0; i <= MAX_CHARSET; i++)
2868         coding->safe_charsets[i] = 1;
2869     }
2870   else
2871     {
2872       bzero (coding->safe_charsets, MAX_CHARSET + 1);
2873       while (CONSP (val))
2874         {
2875           if ((i = get_charset_id (XCONS (val)->car)) >= 0)
2876             coding->safe_charsets[i] = 1;
2877           val = XCONS (val)->cdr;
2878         }
2879     }
2880
2881   switch (XFASTINT (coding_type))
2882     {
2883     case 0:
2884       coding->type = coding_type_emacs_mule;
2885       if (!NILP (coding->post_read_conversion))
2886         coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
2887       if (!NILP (coding->pre_write_conversion))
2888         coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
2889       break;
2890
2891     case 1:
2892       coding->type = coding_type_sjis;
2893       coding->common_flags
2894         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2895       break;
2896
2897     case 2:
2898       coding->type = coding_type_iso2022;
2899       coding->common_flags
2900         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2901       {
2902         Lisp_Object val, temp;
2903         Lisp_Object *flags;
2904         int i, charset, reg_bits = 0;
2905
2906         val = XVECTOR (coding_spec)->contents[4];
2907
2908         if (!VECTORP (val) || XVECTOR (val)->size != 32)
2909           goto label_invalid_coding_system;
2910
2911         flags = XVECTOR (val)->contents;
2912         coding->flags
2913           = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
2914              | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
2915              | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
2916              | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
2917              | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
2918              | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
2919              | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
2920              | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
2921              | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
2922              | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
2923              | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
2924              | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
2925              | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
2926              );
2927
2928         /* Invoke graphic register 0 to plane 0.  */
2929         CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
2930         /* Invoke graphic register 1 to plane 1 if we can use full 8-bit.  */
2931         CODING_SPEC_ISO_INVOCATION (coding, 1)
2932           = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
2933         /* Not single shifting at first.  */
2934         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
2935         /* Beginning of buffer should also be regarded as bol. */
2936         CODING_SPEC_ISO_BOL (coding) = 1;
2937
2938         for (charset = 0; charset <= MAX_CHARSET; charset++)
2939           CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
2940         val = Vcharset_revision_alist;
2941         while (CONSP (val))
2942           {
2943             charset = get_charset_id (Fcar_safe (XCONS (val)->car));
2944             if (charset >= 0
2945                 && (temp = Fcdr_safe (XCONS (val)->car), INTEGERP (temp))
2946                 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
2947               CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
2948             val = XCONS (val)->cdr;
2949           }
2950
2951         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
2952            FLAGS[REG] can be one of below:
2953                 integer CHARSET: CHARSET occupies register I,
2954                 t: designate nothing to REG initially, but can be used
2955                   by any charsets,
2956                 list of integer, nil, or t: designate the first
2957                   element (if integer) to REG initially, the remaining
2958                   elements (if integer) is designated to REG on request,
2959                   if an element is t, REG can be used by any charsets,
2960                 nil: REG is never used.  */
2961         for (charset = 0; charset <= MAX_CHARSET; charset++)
2962           CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
2963             = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
2964         for (i = 0; i < 4; i++)
2965           {
2966             if (INTEGERP (flags[i])
2967                 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))
2968                 || (charset = get_charset_id (flags[i])) >= 0)
2969               {
2970                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
2971                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
2972               }
2973             else if (EQ (flags[i], Qt))
2974               {
2975                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2976                 reg_bits |= 1 << i;
2977                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
2978               }
2979             else if (CONSP (flags[i]))
2980               {
2981                 Lisp_Object tail;
2982                 tail = flags[i];
2983
2984                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
2985                 if (INTEGERP (XCONS (tail)->car)
2986                     && (charset = XINT (XCONS (tail)->car),
2987                         CHARSET_VALID_P (charset))
2988                     || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
2989                   {
2990                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
2991                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
2992                   }
2993                 else
2994                   CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2995                 tail = XCONS (tail)->cdr;
2996                 while (CONSP (tail))
2997                   {
2998                     if (INTEGERP (XCONS (tail)->car)
2999                         && (charset = XINT (XCONS (tail)->car),
3000                             CHARSET_VALID_P (charset))
3001                         || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
3002                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3003                         = i;
3004                     else if (EQ (XCONS (tail)->car, Qt))
3005                       reg_bits |= 1 << i;
3006                     tail = XCONS (tail)->cdr;
3007                   }
3008               }
3009             else
3010               CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3011
3012             CODING_SPEC_ISO_DESIGNATION (coding, i)
3013               = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
3014           }
3015
3016         if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
3017           {
3018             /* REG 1 can be used only by locking shift in 7-bit env.  */
3019             if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
3020               reg_bits &= ~2;
3021             if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
3022               /* Without any shifting, only REG 0 and 1 can be used.  */
3023               reg_bits &= 3;
3024           }
3025
3026         if (reg_bits)
3027           for (charset = 0; charset <= MAX_CHARSET; charset++)
3028             {
3029               if (CHARSET_VALID_P (charset))
3030                 {
3031                   /* There exist some default graphic registers to be
3032                      used CHARSET.  */
3033
3034                   /* We had better avoid designating a charset of
3035                      CHARS96 to REG 0 as far as possible.  */
3036                   if (CHARSET_CHARS (charset) == 96)
3037                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3038                       = (reg_bits & 2
3039                          ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
3040                   else
3041                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3042                       = (reg_bits & 1
3043                          ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
3044                 }
3045             }
3046       }
3047       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3048       coding->spec.iso2022.last_invalid_designation_register = -1;
3049       break;
3050
3051     case 3:
3052       coding->type = coding_type_big5;
3053       coding->common_flags
3054         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3055       coding->flags
3056         = (NILP (XVECTOR (coding_spec)->contents[4])
3057            ? CODING_FLAG_BIG5_HKU
3058            : CODING_FLAG_BIG5_ETEN);
3059       break;
3060
3061     case 4:
3062       coding->type = coding_type_ccl;
3063       coding->common_flags
3064         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3065       {
3066         Lisp_Object val;
3067         Lisp_Object decoder, encoder;
3068
3069         val = XVECTOR (coding_spec)->contents[4];
3070         if (CONSP  (val)
3071             && SYMBOLP (XCONS (val)->car)
3072             && !NILP (decoder = Fget (XCONS (val)->car, Qccl_program_idx))
3073             && !NILP (decoder = Fcdr (Faref (Vccl_program_table, decoder)))
3074             && SYMBOLP (XCONS (val)->cdr)
3075             && !NILP (encoder = Fget (XCONS (val)->cdr, Qccl_program_idx))
3076             && !NILP (encoder = Fcdr (Faref (Vccl_program_table, encoder))))
3077           {
3078             setup_ccl_program (&(coding->spec.ccl.decoder), decoder);
3079             setup_ccl_program (&(coding->spec.ccl.encoder), encoder);
3080           }
3081         else
3082           goto label_invalid_coding_system;
3083
3084         bzero (coding->spec.ccl.valid_codes, 256);
3085         val = Fplist_get (plist, Qvalid_codes);
3086         if (CONSP (val))
3087           {
3088             Lisp_Object this;
3089
3090             for (; CONSP (val); val = XCONS (val)->cdr)
3091               {
3092                 this = XCONS (val)->car;
3093                 if (INTEGERP (this)
3094                     && XINT (this) >= 0 && XINT (this) < 256)
3095                   coding->spec.ccl.valid_codes[XINT (this)] = 1;
3096                 else if (CONSP (this)
3097                          && INTEGERP (XCONS (this)->car)
3098                          && INTEGERP (XCONS (this)->cdr))
3099                   {
3100                     int start = XINT (XCONS (this)->car);
3101                     int end = XINT (XCONS (this)->cdr);
3102
3103                     if (start >= 0 && start <= end && end < 256)
3104                       while (start < end)
3105                         coding->spec.ccl.valid_codes[start++] = 1;
3106                   }
3107               }
3108           }
3109       }
3110       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3111       break;
3112
3113     case 5:
3114       coding->type = coding_type_raw_text;
3115       break;
3116
3117     default:
3118       goto label_invalid_coding_system;
3119     }
3120   return 0;
3121
3122  label_invalid_coding_system:
3123   coding->type = coding_type_no_conversion;
3124   coding->category_idx = CODING_CATEGORY_IDX_BINARY;
3125   coding->common_flags = 0;
3126   coding->eol_type = CODING_EOL_LF;
3127   coding->pre_write_conversion = coding->post_read_conversion = Qnil;
3128   return -1;
3129 }
3130
3131 /* Setup raw-text or one of its subsidiaries in the structure
3132    coding_system CODING according to the already setup value eol_type
3133    in CODING.  CODING should be setup for some coding system in
3134    advance.  */
3135
3136 void
3137 setup_raw_text_coding_system (coding)
3138      struct coding_system *coding;
3139 {
3140   if (coding->type != coding_type_raw_text)
3141     {
3142       coding->symbol = Qraw_text;
3143       coding->type = coding_type_raw_text;
3144       if (coding->eol_type != CODING_EOL_UNDECIDED)
3145         {
3146           Lisp_Object subsidiaries;
3147           subsidiaries = Fget (Qraw_text, Qeol_type);
3148
3149           if (VECTORP (subsidiaries)
3150               && XVECTOR (subsidiaries)->size == 3)
3151             coding->symbol
3152               = XVECTOR (subsidiaries)->contents[coding->eol_type];
3153         }
3154     }
3155   return;
3156 }
3157
3158 /* Emacs has a mechanism to automatically detect a coding system if it
3159    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
3160    it's impossible to distinguish some coding systems accurately
3161    because they use the same range of codes.  So, at first, coding
3162    systems are categorized into 7, those are:
3163
3164    o coding-category-emacs-mule
3165
3166         The category for a coding system which has the same code range
3167         as Emacs' internal format.  Assigned the coding-system (Lisp
3168         symbol) `emacs-mule' by default.
3169
3170    o coding-category-sjis
3171
3172         The category for a coding system which has the same code range
3173         as SJIS.  Assigned the coding-system (Lisp
3174         symbol) `japanese-shift-jis' by default.
3175
3176    o coding-category-iso-7
3177
3178         The category for a coding system which has the same code range
3179         as ISO2022 of 7-bit environment.  This doesn't use any locking
3180         shift and single shift functions.  This can encode/decode all
3181         charsets.  Assigned the coding-system (Lisp symbol)
3182         `iso-2022-7bit' by default.
3183
3184    o coding-category-iso-7-tight
3185
3186         Same as coding-category-iso-7 except that this can
3187         encode/decode only the specified charsets.
3188
3189    o coding-category-iso-8-1
3190
3191         The category for a coding system which has the same code range
3192         as ISO2022 of 8-bit environment and graphic plane 1 used only
3193         for DIMENSION1 charset.  This doesn't use any locking shift
3194         and single shift functions.  Assigned the coding-system (Lisp
3195         symbol) `iso-latin-1' by default.
3196
3197    o coding-category-iso-8-2
3198
3199         The category for a coding system which has the same code range
3200         as ISO2022 of 8-bit environment and graphic plane 1 used only
3201         for DIMENSION2 charset.  This doesn't use any locking shift
3202         and single shift functions.  Assigned the coding-system (Lisp
3203         symbol) `japanese-iso-8bit' by default.
3204
3205    o coding-category-iso-7-else
3206
3207         The category for a coding system which has the same code range
3208         as ISO2022 of 7-bit environemnt but uses locking shift or
3209         single shift functions.  Assigned the coding-system (Lisp
3210         symbol) `iso-2022-7bit-lock' by default.
3211
3212    o coding-category-iso-8-else
3213
3214         The category for a coding system which has the same code range
3215         as ISO2022 of 8-bit environemnt but uses locking shift or
3216         single shift functions.  Assigned the coding-system (Lisp
3217         symbol) `iso-2022-8bit-ss2' by default.
3218
3219    o coding-category-big5
3220
3221         The category for a coding system which has the same code range
3222         as BIG5.  Assigned the coding-system (Lisp symbol)
3223         `cn-big5' by default.
3224
3225    o coding-category-ccl
3226
3227         The category for a coding system of which encoder/decoder is
3228         written in CCL programs.  The default value is nil, i.e., no
3229         coding system is assigned.
3230
3231    o coding-category-binary
3232
3233         The category for a coding system not categorized in any of the
3234         above.  Assigned the coding-system (Lisp symbol)
3235         `no-conversion' by default.
3236
3237    Each of them is a Lisp symbol and the value is an actual
3238    `coding-system's (this is also a Lisp symbol) assigned by a user.
3239    What Emacs does actually is to detect a category of coding system.
3240    Then, it uses a `coding-system' assigned to it.  If Emacs can't
3241    decide only one possible category, it selects a category of the
3242    highest priority.  Priorities of categories are also specified by a
3243    user in a Lisp variable `coding-category-list'.
3244
3245 */
3246
3247 static
3248 int ascii_skip_code[256];
3249
3250 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
3251    If it detects possible coding systems, return an integer in which
3252    appropriate flag bits are set.  Flag bits are defined by macros
3253    CODING_CATEGORY_MASK_XXX in `coding.h'.
3254
3255    How many ASCII characters are at the head is returned as *SKIP.  */
3256
3257 static int
3258 detect_coding_mask (source, src_bytes, priorities, skip)
3259      unsigned char *source;
3260      int src_bytes, *priorities, *skip;
3261 {
3262   register unsigned char c;
3263   unsigned char *src = source, *src_end = source + src_bytes;
3264   unsigned int mask;
3265   int i;
3266
3267   /* At first, skip all ASCII characters and control characters except
3268      for three ISO2022 specific control characters.  */
3269   ascii_skip_code[ISO_CODE_SO] = 0;
3270   ascii_skip_code[ISO_CODE_SI] = 0;
3271   ascii_skip_code[ISO_CODE_ESC] = 0;
3272
3273  label_loop_detect_coding:
3274   while (src < src_end && ascii_skip_code[*src]) src++;
3275   *skip = src - source;
3276
3277   if (src >= src_end)
3278     /* We found nothing other than ASCII.  There's nothing to do.  */
3279     return 0;
3280
3281   c = *src;
3282   /* The text seems to be encoded in some multilingual coding system.
3283      Now, try to find in which coding system the text is encoded.  */
3284   if (c < 0x80)
3285     {
3286       /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
3287       /* C is an ISO2022 specific control code of C0.  */
3288       mask = detect_coding_iso2022 (src, src_end);
3289       if (mask == 0)
3290         {
3291           /* No valid ISO2022 code follows C.  Try again.  */
3292           src++;
3293           if (c == ISO_CODE_ESC)
3294             ascii_skip_code[ISO_CODE_ESC] = 1;
3295           else
3296             ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1;
3297           goto label_loop_detect_coding;
3298         }
3299       if (priorities)
3300         goto label_return_highest_only;
3301     }
3302   else
3303     {
3304       int try;
3305
3306       if (c < 0xA0)
3307         {
3308           /* C is the first byte of SJIS character code,
3309              or a leading-code of Emacs' internal format (emacs-mule).  */
3310           try = CODING_CATEGORY_MASK_SJIS | CODING_CATEGORY_MASK_EMACS_MULE;
3311
3312           /* Or, if C is a special latin extra code,
3313              or is an ISO2022 specific control code of C1 (SS2 or SS3),
3314              or is an ISO2022 control-sequence-introducer (CSI),
3315              we should also consider the possibility of ISO2022 codings.  */
3316           if ((VECTORP (Vlatin_extra_code_table)
3317                && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3318               || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
3319               || (c == ISO_CODE_CSI
3320                   && (src < src_end
3321                       && (*src == ']'
3322                           || ((*src == '0' || *src == '1' || *src == '2')
3323                               && src + 1 < src_end
3324                               && src[1] == ']')))))
3325             try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
3326                      | CODING_CATEGORY_MASK_ISO_8BIT);
3327         }
3328       else
3329         /* C is a character of ISO2022 in graphic plane right,
3330            or a SJIS's 1-byte character code (i.e. JISX0201),
3331            or the first byte of BIG5's 2-byte code.  */
3332         try = (CODING_CATEGORY_MASK_ISO_8_ELSE
3333                 | CODING_CATEGORY_MASK_ISO_8BIT
3334                 | CODING_CATEGORY_MASK_SJIS
3335                 | CODING_CATEGORY_MASK_BIG5);
3336
3337       /* Or, we may have to consider the possibility of CCL.  */
3338       if (coding_system_table[CODING_CATEGORY_IDX_CCL]
3339           && (coding_system_table[CODING_CATEGORY_IDX_CCL]
3340               ->spec.ccl.valid_codes)[c])
3341         try |= CODING_CATEGORY_MASK_CCL;
3342
3343       mask = 0;
3344       if (priorities)
3345         {
3346           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3347             {
3348               if (priorities[i] & try & CODING_CATEGORY_MASK_ISO)
3349                 mask = detect_coding_iso2022 (src, src_end);
3350               else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
3351                 mask = detect_coding_sjis (src, src_end);
3352               else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
3353                 mask = detect_coding_big5 (src, src_end);
3354               else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
3355                 mask = detect_coding_emacs_mule (src, src_end);
3356               else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL)
3357                 mask = detect_coding_ccl (src, src_end);
3358               else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
3359                 mask = CODING_CATEGORY_MASK_RAW_TEXT;
3360               else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
3361                 mask = CODING_CATEGORY_MASK_BINARY;
3362               if (mask)
3363                 goto label_return_highest_only;
3364             }
3365           return CODING_CATEGORY_MASK_RAW_TEXT;
3366         }
3367       if (try & CODING_CATEGORY_MASK_ISO)
3368         mask |= detect_coding_iso2022 (src, src_end);
3369       if (try & CODING_CATEGORY_MASK_SJIS)
3370         mask |= detect_coding_sjis (src, src_end);
3371       if (try & CODING_CATEGORY_MASK_BIG5)
3372         mask |= detect_coding_big5 (src, src_end);
3373       if (try & CODING_CATEGORY_MASK_EMACS_MULE)
3374         mask |= detect_coding_emacs_mule (src, src_end);
3375       if (try & CODING_CATEGORY_MASK_CCL)
3376         mask |= detect_coding_ccl (src, src_end);
3377     }
3378   return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
3379
3380  label_return_highest_only:
3381   for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3382     {
3383       if (mask & priorities[i])
3384         return priorities[i];
3385     }
3386   return CODING_CATEGORY_MASK_RAW_TEXT;
3387 }
3388
3389 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
3390    The information of the detected coding system is set in CODING.  */
3391
3392 void
3393 detect_coding (coding, src, src_bytes)
3394      struct coding_system *coding;
3395      unsigned char *src;
3396      int src_bytes;
3397 {
3398   unsigned int idx;
3399   int skip, mask, i;
3400   Lisp_Object val;
3401
3402   val = Vcoding_category_list;
3403   mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip);
3404   coding->heading_ascii = skip;
3405
3406   if (!mask) return;
3407
3408   /* We found a single coding system of the highest priority in MASK.  */
3409   idx = 0;
3410   while (mask && ! (mask & 1)) mask >>= 1, idx++;
3411   if (! mask)
3412     idx = CODING_CATEGORY_IDX_RAW_TEXT;
3413
3414   val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[idx])->value;
3415
3416   if (coding->eol_type != CODING_EOL_UNDECIDED)
3417     {
3418       Lisp_Object tmp;
3419
3420       tmp = Fget (val, Qeol_type);
3421       if (VECTORP (tmp))
3422         val = XVECTOR (tmp)->contents[coding->eol_type];
3423     }
3424   setup_coding_system (val, coding);
3425   /* Set this again because setup_coding_system reset this member.  */
3426   coding->heading_ascii = skip;
3427 }
3428
3429 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
3430    SOURCE is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
3431    CODING_EOL_CR, and CODING_EOL_UNDECIDED.
3432
3433    How many non-eol characters are at the head is returned as *SKIP.  */
3434
3435 #define MAX_EOL_CHECK_COUNT 3
3436
3437 static int
3438 detect_eol_type (source, src_bytes, skip)
3439      unsigned char *source;
3440      int src_bytes, *skip;
3441 {
3442   unsigned char *src = source, *src_end = src + src_bytes;
3443   unsigned char c;
3444   int total = 0;                /* How many end-of-lines are found so far.  */
3445   int eol_type = CODING_EOL_UNDECIDED;
3446   int this_eol_type;
3447
3448   *skip = 0;
3449
3450   while (src < src_end && total < MAX_EOL_CHECK_COUNT)
3451     {
3452       c = *src++;
3453       if (c == '\n' || c == '\r')
3454         {
3455           if (*skip == 0)
3456             *skip = src - 1 - source;
3457           total++;
3458           if (c == '\n')
3459             this_eol_type = CODING_EOL_LF;
3460           else if (src >= src_end || *src != '\n')
3461             this_eol_type = CODING_EOL_CR;
3462           else
3463             this_eol_type = CODING_EOL_CRLF, src++;
3464
3465           if (eol_type == CODING_EOL_UNDECIDED)
3466             /* This is the first end-of-line.  */
3467             eol_type = this_eol_type;
3468           else if (eol_type != this_eol_type)
3469             {
3470               /* The found type is different from what found before.  */
3471               eol_type = CODING_EOL_INCONSISTENT;
3472               break;
3473             }
3474         }
3475     }
3476
3477   if (*skip == 0)
3478     *skip = src_end - source;
3479   return eol_type;
3480 }
3481
3482 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
3483    is encoded.  If it detects an appropriate format of end-of-line, it
3484    sets the information in *CODING.  */
3485
3486 void
3487 detect_eol (coding, src, src_bytes)
3488      struct coding_system *coding;
3489      unsigned char *src;
3490      int src_bytes;
3491 {
3492   Lisp_Object val;
3493   int skip;
3494   int eol_type = detect_eol_type (src, src_bytes, &skip);
3495
3496   if (coding->heading_ascii > skip)
3497     coding->heading_ascii = skip;
3498   else
3499     skip = coding->heading_ascii;
3500
3501   if (eol_type == CODING_EOL_UNDECIDED)
3502     return;
3503   if (eol_type == CODING_EOL_INCONSISTENT)
3504     {
3505 #if 0
3506       /* This code is suppressed until we find a better way to
3507          distinguish raw text file and binary file.  */
3508
3509       /* If we have already detected that the coding is raw-text, the
3510          coding should actually be no-conversion.  */
3511       if (coding->type == coding_type_raw_text)
3512         {
3513           setup_coding_system (Qno_conversion, coding);
3514           return;
3515         }
3516       /* Else, let's decode only text code anyway.  */
3517 #endif /* 0 */
3518       eol_type = CODING_EOL_LF;
3519     }
3520
3521   val = Fget (coding->symbol, Qeol_type);
3522   if (VECTORP (val) && XVECTOR (val)->size == 3)
3523     {
3524       setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
3525       coding->heading_ascii = skip;
3526     }
3527 }
3528
3529 #define CONVERSION_BUFFER_EXTRA_ROOM 256
3530
3531 #define DECODING_BUFFER_MAG(coding)                                          \
3532   (coding->type == coding_type_iso2022                                       \
3533    ? 3                                                                       \
3534    : ((coding->type == coding_type_sjis || coding->type == coding_type_big5) \
3535       ? 2                                                                    \
3536       : (coding->type == coding_type_raw_text                                \
3537          ? 1                                                                 \
3538          : (coding->type == coding_type_ccl                                  \
3539             ? coding->spec.ccl.decoder.buf_magnification                     \
3540             : 2))))
3541
3542 /* Return maximum size (bytes) of a buffer enough for decoding
3543    SRC_BYTES of text encoded in CODING.  */
3544
3545 int
3546 decoding_buffer_size (coding, src_bytes)
3547      struct coding_system *coding;
3548      int src_bytes;
3549 {
3550   return (src_bytes * DECODING_BUFFER_MAG (coding)
3551           + CONVERSION_BUFFER_EXTRA_ROOM);
3552 }
3553
3554 /* Return maximum size (bytes) of a buffer enough for encoding
3555    SRC_BYTES of text to CODING.  */
3556
3557 int
3558 encoding_buffer_size (coding, src_bytes)
3559      struct coding_system *coding;
3560      int src_bytes;
3561 {
3562   int magnification;
3563
3564   if (coding->type == coding_type_ccl)
3565     magnification = coding->spec.ccl.encoder.buf_magnification;
3566   else
3567     magnification = 3;
3568
3569   return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
3570 }
3571
3572 #ifndef MINIMUM_CONVERSION_BUFFER_SIZE
3573 #define MINIMUM_CONVERSION_BUFFER_SIZE 1024
3574 #endif
3575
3576 char *conversion_buffer;
3577 int conversion_buffer_size;
3578
3579 /* Return a pointer to a SIZE bytes of buffer to be used for encoding
3580    or decoding.  Sufficient memory is allocated automatically.  If we
3581    run out of memory, return NULL.  */
3582
3583 char *
3584 get_conversion_buffer (size)
3585      int size;
3586 {
3587   if (size > conversion_buffer_size)
3588     {
3589       char *buf;
3590       int real_size = conversion_buffer_size * 2;
3591
3592       while (real_size < size) real_size *= 2;
3593       buf = (char *) xmalloc (real_size);
3594       xfree (conversion_buffer);
3595       conversion_buffer = buf;
3596       conversion_buffer_size = real_size;
3597     }
3598   return conversion_buffer;
3599 }
3600
3601 int
3602 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
3603      struct coding_system *coding;
3604      unsigned char *source, *destination;
3605      int src_bytes, dst_bytes, encodep;
3606 {
3607   struct ccl_program *ccl
3608     = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
3609   int result;
3610
3611   ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
3612
3613   coding->produced = ccl_driver (ccl, source, destination,
3614                                  src_bytes, dst_bytes, &(coding->consumed));
3615   coding->produced_char
3616     = multibyte_chars_in_text (destination, coding->produced);
3617   coding->consumed_char
3618     = multibyte_chars_in_text (source, coding->consumed);
3619
3620   switch (ccl->status)
3621     {
3622     case CCL_STAT_SUSPEND_BY_SRC:
3623       result = CODING_FINISH_INSUFFICIENT_SRC;
3624       break;
3625     case CCL_STAT_SUSPEND_BY_DST:
3626       result = CODING_FINISH_INSUFFICIENT_DST;
3627       break;
3628     case CCL_STAT_QUIT:
3629     case CCL_STAT_INVALID_CMD:
3630       result = CODING_FINISH_INTERRUPT;
3631       break;
3632     default:
3633       result = CODING_FINISH_NORMAL;
3634       break;
3635     }
3636   return result;
3637 }
3638
3639 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions".  Before
3640    decoding, it may detect coding system and format of end-of-line if
3641    those are not yet decided.  */
3642
3643 int
3644 decode_coding (coding, source, destination, src_bytes, dst_bytes)
3645      struct coding_system *coding;
3646      unsigned char *source, *destination;
3647      int src_bytes, dst_bytes;
3648 {
3649   int result;
3650
3651   if (src_bytes <= 0
3652       && ! (coding->mode & CODING_MODE_LAST_BLOCK
3653             && CODING_REQUIRE_FLUSHING (coding)))
3654     {
3655       coding->produced = coding->produced_char = 0;
3656       coding->consumed = coding->consumed_char = 0;
3657       coding->fake_multibyte = 0;
3658       return CODING_FINISH_NORMAL;
3659     }
3660
3661   if (coding->type == coding_type_undecided)
3662     detect_coding (coding, source, src_bytes);
3663
3664   if (coding->eol_type == CODING_EOL_UNDECIDED)
3665     detect_eol (coding, source, src_bytes);
3666
3667   switch (coding->type)
3668     {
3669     case coding_type_emacs_mule:
3670     case coding_type_undecided:
3671     case coding_type_raw_text:
3672       if (coding->eol_type == CODING_EOL_LF
3673           ||  coding->eol_type == CODING_EOL_UNDECIDED)
3674         goto label_no_conversion;
3675       result = decode_eol (coding, source, destination, src_bytes, dst_bytes);
3676       break;
3677
3678     case coding_type_sjis:
3679       result = decode_coding_sjis_big5 (coding, source, destination,
3680                                         src_bytes, dst_bytes, 1);
3681       break;
3682
3683     case coding_type_iso2022:
3684       result = decode_coding_iso2022 (coding, source, destination,
3685                                       src_bytes, dst_bytes);
3686       break;
3687
3688     case coding_type_big5:
3689       result = decode_coding_sjis_big5 (coding, source, destination,
3690                                         src_bytes, dst_bytes, 0);
3691       break;
3692
3693     case coding_type_ccl:
3694       result = ccl_coding_driver (coding, source, destination,
3695                                   src_bytes, dst_bytes, 0);
3696       break;
3697
3698     default:                    /* i.e. case coding_type_no_conversion: */
3699     label_no_conversion:
3700       if (dst_bytes && src_bytes > dst_bytes)
3701         {
3702           coding->produced = dst_bytes;
3703           result = CODING_FINISH_INSUFFICIENT_DST;
3704         }
3705       else
3706         {
3707           coding->produced = src_bytes;
3708           result = CODING_FINISH_NORMAL;
3709         }
3710       if (dst_bytes)
3711         bcopy (source, destination, coding->produced);
3712       else
3713         safe_bcopy (source, destination, coding->produced);
3714       coding->fake_multibyte = 1;
3715       coding->consumed
3716         = coding->consumed_char = coding->produced_char = coding->produced;
3717       break;
3718     }
3719
3720   return result;
3721 }
3722
3723 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  */
3724
3725 int
3726 encode_coding (coding, source, destination, src_bytes, dst_bytes)
3727      struct coding_system *coding;
3728      unsigned char *source, *destination;
3729      int src_bytes, dst_bytes;
3730 {
3731   int result;
3732
3733   if (src_bytes <= 0
3734       && ! (coding->mode & CODING_MODE_LAST_BLOCK
3735             && CODING_REQUIRE_FLUSHING (coding)))
3736     {
3737       coding->produced = coding->produced_char = 0;
3738       coding->consumed = coding->consumed_char = 0;
3739       coding->fake_multibyte = 0;
3740       return CODING_FINISH_NORMAL;
3741     }
3742
3743   switch (coding->type)
3744     {
3745     case coding_type_emacs_mule:
3746     case coding_type_undecided:
3747     case coding_type_raw_text:
3748       if (coding->eol_type == CODING_EOL_LF
3749           ||  coding->eol_type == CODING_EOL_UNDECIDED)
3750         goto label_no_conversion;
3751       result = encode_eol (coding, source, destination, src_bytes, dst_bytes);
3752       break;
3753
3754     case coding_type_sjis:
3755       result = encode_coding_sjis_big5 (coding, source, destination,
3756                                         src_bytes, dst_bytes, 1);
3757       break;
3758
3759     case coding_type_iso2022:
3760       result = encode_coding_iso2022 (coding, source, destination,
3761                                       src_bytes, dst_bytes);
3762       break;
3763
3764     case coding_type_big5:
3765       result = encode_coding_sjis_big5 (coding, source, destination,
3766                                         src_bytes, dst_bytes, 0);
3767       break;
3768
3769     case coding_type_ccl:
3770       result = ccl_coding_driver (coding, source, destination,
3771                                   src_bytes, dst_bytes, 1);
3772       break;
3773
3774     default:                    /* i.e. case coding_type_no_conversion: */
3775     label_no_conversion:
3776       if (dst_bytes && src_bytes > dst_bytes)
3777         {
3778           coding->produced = dst_bytes;
3779           result = CODING_FINISH_INSUFFICIENT_DST;
3780         }
3781       else
3782         {
3783           coding->produced = src_bytes;
3784           result = CODING_FINISH_NORMAL;
3785         }
3786       if (dst_bytes)
3787         bcopy (source, destination, coding->produced);
3788       else
3789         safe_bcopy (source, destination, coding->produced);
3790       if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
3791         {
3792           unsigned char *p = destination, *pend = p + coding->produced;
3793           while (p < pend)
3794             if (*p++ == '\015') p[-1] = '\n';
3795         }
3796       coding->fake_multibyte = 1;
3797       coding->consumed
3798         = coding->consumed_char = coding->produced_char = coding->produced;
3799       break;
3800     }
3801
3802   return result;
3803 }
3804
3805 /* Scan text in the region between *BEG and *END (byte positions),
3806    skip characters which we don't have to decode by coding system
3807    CODING at the head and tail, then set *BEG and *END to the region
3808    of the text we actually have to convert.  The caller should move
3809    the gap out of the region in advance.
3810
3811    If STR is not NULL, *BEG and *END are indices into STR.  */
3812
3813 static void
3814 shrink_decoding_region (beg, end, coding, str)
3815      int *beg, *end;
3816      struct coding_system *coding;
3817      unsigned char *str;
3818 {
3819   unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
3820   int eol_conversion;
3821
3822   if (coding->type == coding_type_ccl
3823       || coding->type == coding_type_undecided
3824       || !NILP (coding->post_read_conversion))
3825     {
3826       /* We can't skip any data.  */
3827       return;
3828     }
3829   else if (coding->type == coding_type_no_conversion)
3830     {
3831       /* We need no conversion, but don't have to skip any data here.
3832          Decoding routine handles them effectively anyway.  */
3833       return;
3834     }
3835
3836   eol_conversion = (coding->eol_type != CODING_EOL_LF);
3837
3838   if ((! eol_conversion) && (coding->heading_ascii >= 0))
3839     /* Detection routine has already found how much we can skip at the
3840        head.  */
3841     *beg += coding->heading_ascii;
3842
3843   if (str)
3844     {
3845       begp_orig = begp = str + *beg;
3846       endp_orig = endp = str + *end;
3847     }
3848   else
3849     {
3850       begp_orig = begp = BYTE_POS_ADDR (*beg);
3851       endp_orig = endp = begp + *end - *beg;
3852     }
3853
3854   switch (coding->type)
3855     {
3856     case coding_type_emacs_mule:
3857     case coding_type_raw_text:
3858       if (eol_conversion)
3859         {
3860           if (coding->heading_ascii < 0)
3861             while (begp < endp && *begp != '\r' && *begp < 0x80) begp++;
3862           while (begp < endp && endp[-1] != '\r' && endp[-1] < 0x80)
3863             endp--;
3864           /* Do not consider LF as ascii if preceded by CR, since that
3865              confuses eol decoding. */
3866           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
3867             endp++;
3868         }
3869       else
3870         begp = endp;
3871       break;
3872
3873     case coding_type_sjis:
3874     case coding_type_big5:
3875       /* We can skip all ASCII characters at the head.  */
3876       if (coding->heading_ascii < 0)
3877         {
3878           if (eol_conversion)
3879             while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
3880           else
3881             while (begp < endp && *begp < 0x80) begp++;
3882         }
3883       /* We can skip all ASCII characters at the tail except for the
3884          second byte of SJIS or BIG5 code.  */
3885       if (eol_conversion)
3886         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
3887       else
3888         while (begp < endp && endp[-1] < 0x80) endp--;
3889       /* Do not consider LF as ascii if preceded by CR, since that
3890          confuses eol decoding. */
3891       if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
3892         endp++;
3893       if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
3894         endp++;
3895       break;
3896
3897     default:            /* i.e. case coding_type_iso2022: */
3898       if (coding->heading_ascii < 0)
3899         {
3900           /* We can skip all ASCII characters at the head except for a
3901              few control codes.  */
3902           while (begp < endp && (c = *begp) < 0x80
3903                  && c != ISO_CODE_CR && c != ISO_CODE_SO
3904                  && c != ISO_CODE_SI && c != ISO_CODE_ESC
3905                  && (!eol_conversion || c != ISO_CODE_LF))
3906             begp++;
3907         }
3908       switch (coding->category_idx)
3909         {
3910         case CODING_CATEGORY_IDX_ISO_8_1:
3911         case CODING_CATEGORY_IDX_ISO_8_2:
3912           /* We can skip all ASCII characters at the tail.  */
3913           if (eol_conversion)
3914             while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
3915           else
3916             while (begp < endp && endp[-1] < 0x80) endp--;
3917           /* Do not consider LF as ascii if preceded by CR, since that
3918              confuses eol decoding. */
3919           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
3920             endp++;
3921           break;
3922
3923         case CODING_CATEGORY_IDX_ISO_7:
3924         case CODING_CATEGORY_IDX_ISO_7_TIGHT:
3925           /* We can skip all charactes at the tail except for ESC and
3926              the following 2-byte at the tail.  */
3927           if (eol_conversion)
3928             while (begp < endp
3929                    && (c = endp[-1]) < 0x80 && c != ISO_CODE_ESC && c != '\r')
3930               endp--;
3931           else
3932             while (begp < endp
3933                    && (c = endp[-1]) < 0x80 && c != ISO_CODE_ESC)
3934               endp--;
3935           /* Do not consider LF as ascii if preceded by CR, since that
3936              confuses eol decoding. */
3937           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
3938             endp++;
3939           if (begp < endp && endp[-1] == ISO_CODE_ESC)
3940             {
3941               if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
3942                 /* This is an ASCII designation sequence.  We can
3943                     surely skip the tail.  */
3944                 endp += 2;
3945               else
3946                 /* Hmmm, we can't skip the tail.  */
3947                 endp = endp_orig;
3948             }
3949         }
3950     }
3951   *beg += begp - begp_orig;
3952   *end += endp - endp_orig;
3953   return;
3954 }
3955
3956 /* Like shrink_decoding_region but for encoding.  */
3957
3958 static void
3959 shrink_encoding_region (beg, end, coding, str)
3960      int *beg, *end;
3961      struct coding_system *coding;
3962      unsigned char *str;
3963 {
3964   unsigned char *begp_orig, *begp, *endp_orig, *endp;
3965   int eol_conversion;
3966
3967   if (coding->type == coding_type_ccl)
3968     /* We can't skip any data.  */
3969     return;
3970   else if (coding->type == coding_type_no_conversion)
3971     {
3972       /* We need no conversion.  */
3973       *beg = *end;
3974       return;
3975     }
3976
3977   if (str)
3978     {
3979       begp_orig = begp = str + *beg;
3980       endp_orig = endp = str + *end;
3981     }
3982   else
3983     {
3984       begp_orig = begp = BYTE_POS_ADDR (*beg);
3985       endp_orig = endp = begp + *end - *beg;
3986     }
3987
3988   eol_conversion = (coding->eol_type == CODING_EOL_CR
3989                     || coding->eol_type == CODING_EOL_CRLF);
3990
3991   /* Here, we don't have to check coding->pre_write_conversion because
3992      the caller is expected to have handled it already.  */
3993   switch (coding->type)
3994     {
3995     case coding_type_undecided:
3996     case coding_type_emacs_mule:
3997     case coding_type_raw_text:
3998       if (eol_conversion)
3999         {
4000           while (begp < endp && *begp != '\n') begp++;
4001           while (begp < endp && endp[-1] != '\n') endp--;
4002         }
4003       else
4004         begp = endp;
4005       break;
4006
4007     case coding_type_iso2022:
4008       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
4009         {
4010           unsigned char *bol = begp;
4011           while (begp < endp && *begp < 0x80)
4012             {
4013               begp++;
4014               if (begp[-1] == '\n')
4015                 bol = begp;
4016             }
4017           begp = bol;
4018           goto label_skip_tail;
4019         }
4020       /* fall down ... */
4021
4022     default:
4023       /* We can skip all ASCII characters at the head and tail.  */
4024       if (eol_conversion)
4025         while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
4026       else
4027         while (begp < endp && *begp < 0x80) begp++;
4028     label_skip_tail:
4029       if (eol_conversion)
4030         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
4031       else
4032         while (begp < endp && *(endp - 1) < 0x80) endp--;
4033       break;
4034     }
4035
4036   *beg += begp - begp_orig;
4037   *end += endp - endp_orig;
4038   return;
4039 }
4040
4041 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
4042    text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
4043    coding system CODING, and return the status code of code conversion
4044    (currently, this value has no meaning).
4045
4046    How many characters (and bytes) are converted to how many
4047    characters (and bytes) are recorded in members of the structure
4048    CODING.
4049
4050    If REPLACE is nonzero, we do various things as if the original text
4051    is deleted and a new text is inserted.  See the comments in
4052    replace_range (insdel.c) to know what we are doing.  */
4053
4054 int
4055 code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
4056      int from, from_byte, to, to_byte, encodep, replace;
4057      struct coding_system *coding;
4058 {
4059   int len = to - from, len_byte = to_byte - from_byte;
4060   int require, inserted, inserted_byte;
4061   int head_skip, tail_skip, total_skip;
4062   Lisp_Object saved_coding_symbol;
4063   int multibyte = !NILP (current_buffer->enable_multibyte_characters);
4064   int first = 1;
4065   int fake_multibyte = 0;
4066   unsigned char *src, *dst;
4067   Lisp_Object deletion;
4068
4069   deletion = Qnil;
4070   saved_coding_symbol = Qnil;
4071
4072   if (from < PT && PT < to)
4073     SET_PT_BOTH (from, from_byte);
4074
4075   if (replace)
4076     {
4077       int saved_from = from;
4078
4079       prepare_to_modify_buffer (from, to, &from);
4080       if (saved_from != from)
4081         {
4082           to = from + len;
4083           if (multibyte)
4084             from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
4085           else
4086             from_byte = from, to_byte = to;
4087           len_byte = to_byte - from_byte;
4088         }
4089     }
4090
4091   if (! encodep && CODING_REQUIRE_DETECTION (coding))
4092     {
4093       /* We must detect encoding of text and eol format.  */
4094
4095       if (from < GPT && to > GPT)
4096         move_gap_both (from, from_byte);
4097       if (coding->type == coding_type_undecided)
4098         {
4099           detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
4100           if (coding->type == coding_type_undecided)
4101             /* It seems that the text contains only ASCII, but we
4102                should not left it undecided because the deeper
4103                decoding routine (decode_coding) tries to detect the
4104                encodings again in vain.  */
4105             coding->type = coding_type_emacs_mule;
4106         }
4107       if (coding->eol_type == CODING_EOL_UNDECIDED)
4108         {
4109           saved_coding_symbol = coding->symbol;
4110           detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
4111           if (coding->eol_type == CODING_EOL_UNDECIDED)
4112             coding->eol_type = CODING_EOL_LF;
4113           /* We had better recover the original eol format if we
4114              encounter an inconsitent eol format while decoding.  */
4115           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4116         }
4117     }
4118
4119   coding->consumed_char = len, coding->consumed = len_byte;
4120
4121   if (encodep
4122       ? ! CODING_REQUIRE_ENCODING (coding)
4123       : ! CODING_REQUIRE_DECODING (coding))
4124     {
4125       coding->produced = len_byte;
4126       if (multibyte
4127           && ! replace
4128           /* See the comment of the member heading_ascii in coding.h.  */
4129           && coding->heading_ascii < len_byte)
4130         {
4131           /* We still may have to combine byte at the head and the
4132              tail of the text in the region.  */
4133           if (from < GPT && GPT < to)
4134             move_gap_both (to, to_byte);
4135           len = multibyte_chars_in_text (BYTE_POS_ADDR (from_byte), len_byte);
4136           adjust_after_insert (from, from_byte, to, to_byte, len);
4137           coding->produced_char = len;
4138         }
4139       else
4140         {
4141           if (!replace)
4142             adjust_after_insert (from, from_byte, to, to_byte, len_byte);
4143           coding->produced_char = len_byte;
4144         }
4145       return 0;
4146     }
4147
4148   /* Now we convert the text.  */
4149
4150   /* For encoding, we must process pre-write-conversion in advance.  */
4151   if (encodep
4152       && ! NILP (coding->pre_write_conversion)
4153       && SYMBOLP (coding->pre_write_conversion)
4154       && ! NILP (Ffboundp (coding->pre_write_conversion)))
4155     {
4156       /* The function in pre-write-conversion may put a new text in a
4157          new buffer.  */
4158       struct buffer *prev = current_buffer, *new;
4159
4160       call2 (coding->pre_write_conversion,
4161              make_number (from), make_number (to));
4162       if (current_buffer != prev)
4163         {
4164           len = ZV - BEGV;
4165           new = current_buffer;
4166           set_buffer_internal_1 (prev);
4167           del_range_2 (from, from_byte, to, to_byte);
4168           insert_from_buffer (new, BEG, len, 0);
4169           to = from + len;
4170           to_byte = multibyte ? CHAR_TO_BYTE (to) : to;
4171           len_byte = to_byte - from_byte;
4172         }
4173     }
4174
4175   if (replace)
4176     deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
4177
4178   /* Try to skip the heading and tailing ASCIIs.  */
4179   {
4180     int from_byte_orig = from_byte, to_byte_orig = to_byte;
4181
4182     if (from < GPT && GPT < to)
4183       move_gap_both (from, from_byte);
4184     if (encodep)
4185       shrink_encoding_region (&from_byte, &to_byte, coding, NULL);
4186     else
4187       shrink_decoding_region (&from_byte, &to_byte, coding, NULL);
4188     if (from_byte == to_byte
4189         && ! (coding->mode & CODING_MODE_LAST_BLOCK
4190               && CODING_REQUIRE_FLUSHING (coding)))
4191       {
4192         coding->produced = len_byte;
4193         coding->produced_char = multibyte ? len : len_byte;
4194         if (!replace)
4195           /* We must record and adjust for this new text now.  */
4196           adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
4197         return 0;
4198       }
4199
4200     head_skip = from_byte - from_byte_orig;
4201     tail_skip = to_byte_orig - to_byte;
4202     total_skip = head_skip + tail_skip;
4203     from += head_skip;
4204     to -= tail_skip;
4205     len -= total_skip; len_byte -= total_skip;
4206   }
4207
4208   /* For converion, we must put the gap before the text in addition to
4209      making the gap larger for efficient decoding.  The required gap
4210      size starts from 2000 which is the magic number used in make_gap.
4211      But, after one batch of conversion, it will be incremented if we
4212      find that it is not enough .  */
4213   require = 2000;
4214
4215   if (GAP_SIZE  < require)
4216     make_gap (require - GAP_SIZE);
4217   move_gap_both (from, from_byte);
4218
4219   inserted = inserted_byte = 0;
4220   src = GAP_END_ADDR, dst = GPT_ADDR;
4221
4222   GAP_SIZE += len_byte;
4223   ZV -= len;
4224   Z -= len;
4225   ZV_BYTE -= len_byte;
4226   Z_BYTE -= len_byte;
4227
4228   if (GPT - BEG < beg_unchanged)
4229     beg_unchanged = GPT - BEG;
4230   if (Z - GPT < end_unchanged)
4231     end_unchanged = Z - GPT;
4232
4233   for (;;)
4234     {
4235       int result;
4236
4237       /* The buffer memory is changed from:
4238          +--------+converted-text+---------+-------original-text------+---+
4239          |<-from->|<--inserted-->|---------|<-----------len---------->|---|
4240                   |<------------------- GAP_SIZE -------------------->|  */
4241       if (encodep)
4242         result = encode_coding (coding, src, dst, len_byte, 0);
4243       else
4244         result = decode_coding (coding, src, dst, len_byte, 0);
4245       /* to:
4246          +--------+-------converted-text--------+--+---original-text--+---+
4247          |<-from->|<--inserted-->|<--produced-->|--|<-(len-consumed)->|---|
4248                   |<------------------- GAP_SIZE -------------------->|  */
4249       if (coding->fake_multibyte)
4250         fake_multibyte = 1;
4251
4252       if (!encodep && !multibyte)
4253         coding->produced_char = coding->produced;
4254       inserted += coding->produced_char;
4255       inserted_byte += coding->produced;
4256       len_byte -= coding->consumed;
4257       src += coding->consumed;
4258       dst += inserted_byte;
4259
4260       if (result == CODING_FINISH_NORMAL)
4261         {
4262           src += len_byte;
4263           break;
4264         }
4265       if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
4266         {
4267           unsigned char *pend = dst, *p = pend - inserted_byte;
4268
4269           /* Encode LFs back to the original eol format (CR or CRLF).  */
4270           if (coding->eol_type == CODING_EOL_CR)
4271             {
4272               while (p < pend) if (*p++ == '\n') p[-1] = '\r';
4273             }
4274           else
4275             {
4276               int count = 0;
4277
4278               while (p < pend) if (*p++ == '\n') count++;
4279               if (src - dst < count)
4280                 {
4281                   /* We don't have sufficient room for putting LFs
4282                      back to CRLF.  We must record converted and
4283                      not-yet-converted text back to the buffer
4284                      content, enlarge the gap, then record them out of
4285                      the buffer contents again.  */
4286                   int add = len_byte + inserted_byte;
4287
4288                   GAP_SIZE -= add;
4289                   ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
4290                   GPT += inserted_byte; GPT_BYTE += inserted_byte;
4291                   make_gap (count - GAP_SIZE);
4292                   GAP_SIZE += add;
4293                   ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
4294                   GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
4295                   /* Don't forget to update SRC, DST, and PEND.  */
4296                   src = GAP_END_ADDR - len_byte;
4297                   dst = GPT_ADDR + inserted_byte;
4298                   pend = dst;
4299                 }
4300               inserted += count;
4301               inserted_byte += count;
4302               coding->produced += count;
4303               p = dst = pend + count;
4304               while (count)
4305                 {
4306                   *--p = *--pend;
4307                   if (*p == '\n') count--, *--p = '\r';
4308                 }
4309             }
4310
4311           /* Suppress eol-format conversion in the further conversion.  */
4312           coding->eol_type = CODING_EOL_LF;
4313
4314           /* Restore the original symbol.  */
4315           coding->symbol = saved_coding_symbol;
4316
4317           continue;
4318         }
4319       if (len_byte <= 0)
4320         break;
4321       if (result == CODING_FINISH_INSUFFICIENT_SRC)
4322         {
4323           /* The source text ends in invalid codes.  Let's just
4324              make them valid buffer contents, and finish conversion.  */
4325           inserted += len_byte;
4326           inserted_byte += len_byte;
4327           while (len_byte--)
4328             *dst++ = *src++;
4329           fake_multibyte = 1;
4330           break;
4331         }
4332       if (result == CODING_FINISH_INTERRUPT)
4333         {
4334           /* The conversion procedure was interrupted by a user.  */
4335           fake_multibyte = 1;
4336           break;
4337         }
4338       /* Now RESULT == CODING_FINISH_INSUFFICIENT_DST  */
4339       if (coding->consumed < 1)
4340         {
4341           /* It's quite strange to require more memory without
4342              consuming any bytes.  Perhaps CCL program bug.  */
4343           fake_multibyte = 1;
4344           break;
4345         }
4346       if (first)
4347         {
4348           /* We have just done the first batch of conversion which was
4349              stoped because of insufficient gap.  Let's reconsider the
4350              required gap size (i.e. SRT - DST) now.
4351
4352              We have converted ORIG bytes (== coding->consumed) into
4353              NEW bytes (coding->produced).  To convert the remaining
4354              LEN bytes, we may need REQUIRE bytes of gap, where:
4355                 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
4356                 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
4357              Here, we are sure that NEW >= ORIG.  */
4358           float ratio = coding->produced - coding->consumed;
4359           ratio /= coding->consumed;
4360           require = len_byte * ratio;
4361           first = 0;
4362         }
4363       if ((src - dst) < (require + 2000))
4364         {
4365           /* See the comment above the previous call of make_gap.  */
4366           int add = len_byte + inserted_byte;
4367
4368           GAP_SIZE -= add;
4369           ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
4370           GPT += inserted_byte; GPT_BYTE += inserted_byte;
4371           make_gap (require + 2000);
4372           GAP_SIZE += add;
4373           ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
4374           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
4375           /* Don't forget to update SRC, DST.  */
4376           src = GAP_END_ADDR - len_byte;
4377           dst = GPT_ADDR + inserted_byte;
4378         }
4379     }
4380   if (src - dst > 0) *dst = 0; /* Put an anchor.  */
4381
4382   if (multibyte
4383       && (fake_multibyte
4384           || !encodep && (to - from) != (to_byte - from_byte)))
4385     inserted = multibyte_chars_in_text (GPT_ADDR, inserted_byte);
4386
4387   /* If we have shrinked the conversion area, adjust it now.  */
4388   if (total_skip > 0)
4389     {
4390       if (tail_skip > 0)
4391         safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
4392       inserted += total_skip; inserted_byte += total_skip;
4393       GAP_SIZE += total_skip;
4394       GPT -= head_skip; GPT_BYTE -= head_skip;
4395       ZV -= total_skip; ZV_BYTE -= total_skip;
4396       Z -= total_skip; Z_BYTE -= total_skip;
4397       from -= head_skip; from_byte -= head_skip;
4398       to += tail_skip; to_byte += tail_skip;
4399     }
4400
4401   adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
4402
4403   if (! encodep && ! NILP (coding->post_read_conversion))
4404     {
4405       Lisp_Object val;
4406       int orig_inserted = inserted, pos = PT;
4407
4408       if (from != pos)
4409         temp_set_point_both (current_buffer, from, from_byte);
4410       val = call1 (coding->post_read_conversion, make_number (inserted));
4411       if (! NILP (val))
4412         {
4413           CHECK_NUMBER (val, 0);
4414           inserted = XFASTINT (val);
4415         }
4416       if (pos >= from + orig_inserted)
4417         temp_set_point (current_buffer, pos + (inserted - orig_inserted));
4418     }
4419
4420   signal_after_change (from, to - from, inserted);
4421
4422   {
4423     coding->consumed = to_byte - from_byte;
4424     coding->consumed_char = to - from;
4425     coding->produced = inserted_byte;
4426     coding->produced_char = inserted;
4427   }
4428
4429   return 0;
4430 }
4431
4432 Lisp_Object
4433 code_convert_string (str, coding, encodep, nocopy)
4434      Lisp_Object str;
4435      struct coding_system *coding;
4436      int encodep, nocopy;
4437 {
4438   int len;
4439   char *buf;
4440   int from = 0, to = XSTRING (str)->size;
4441   int to_byte = STRING_BYTES (XSTRING (str));
4442   struct gcpro gcpro1;
4443   Lisp_Object saved_coding_symbol;
4444   int result;
4445
4446   saved_coding_symbol = Qnil;
4447   if (encodep && !NILP (coding->pre_write_conversion)
4448       || !encodep && !NILP (coding->post_read_conversion))
4449     {
4450       /* Since we have to call Lisp functions which assume target text
4451          is in a buffer, after setting a temporary buffer, call
4452          code_convert_region.  */
4453       int count = specpdl_ptr - specpdl;
4454       struct buffer *prev = current_buffer;
4455
4456       record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
4457       temp_output_buffer_setup (" *code-converting-work*");
4458       set_buffer_internal (XBUFFER (Vstandard_output));
4459       if (encodep)
4460         insert_from_string (str, 0, 0, to, to_byte, 0);
4461       else
4462         {
4463           /* We must insert the contents of STR as is without
4464              unibyte<->multibyte conversion.  */
4465           current_buffer->enable_multibyte_characters = Qnil;
4466           insert_from_string (str, 0, 0, to_byte, to_byte, 0);
4467           current_buffer->enable_multibyte_characters = Qt;
4468         }
4469       code_convert_region (BEGV, BEGV_BYTE, ZV, ZV_BYTE, coding, encodep, 1);
4470       if (encodep)
4471         /* We must return the buffer contents as unibyte string.  */
4472         current_buffer->enable_multibyte_characters = Qnil;
4473       str = make_buffer_string (BEGV, ZV, 0);
4474       set_buffer_internal (prev);
4475       return unbind_to (count, str);
4476     }
4477
4478   if (! encodep && CODING_REQUIRE_DETECTION (coding))
4479     {
4480       /* See the comments in code_convert_region.  */
4481       if (coding->type == coding_type_undecided)
4482         {
4483           detect_coding (coding, XSTRING (str)->data, to_byte);
4484           if (coding->type == coding_type_undecided)
4485             coding->type = coding_type_emacs_mule;
4486         }
4487       if (coding->eol_type == CODING_EOL_UNDECIDED)
4488         {
4489           saved_coding_symbol = coding->symbol;
4490           detect_eol (coding, XSTRING (str)->data, to_byte);
4491           if (coding->eol_type == CODING_EOL_UNDECIDED)
4492             coding->eol_type = CODING_EOL_LF;
4493           /* We had better recover the original eol format if we
4494              encounter an inconsitent eol format while decoding.  */
4495           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4496         }
4497     }
4498
4499   if (encodep
4500       ? ! CODING_REQUIRE_ENCODING (coding)
4501       : ! CODING_REQUIRE_DECODING (coding))
4502     from = to_byte;
4503   else
4504     {
4505       /* Try to skip the heading and tailing ASCIIs.  */
4506       if (encodep)
4507         shrink_encoding_region (&from, &to_byte, coding, XSTRING (str)->data);
4508       else
4509         shrink_decoding_region (&from, &to_byte, coding, XSTRING (str)->data);
4510     }
4511   if (from == to_byte
4512       && ! (coding->mode & CODING_MODE_LAST_BLOCK
4513             && CODING_REQUIRE_FLUSHING (coding)))
4514     return (nocopy ? str : Fcopy_sequence (str));
4515
4516   if (encodep)
4517     len = encoding_buffer_size (coding, to_byte - from);
4518   else
4519     len = decoding_buffer_size (coding, to_byte - from);
4520   len += from + STRING_BYTES (XSTRING (str)) - to_byte;
4521   GCPRO1 (str);
4522   buf = get_conversion_buffer (len);
4523   UNGCPRO;
4524
4525   if (from > 0)
4526     bcopy (XSTRING (str)->data, buf, from);
4527   result = (encodep
4528             ? encode_coding (coding, XSTRING (str)->data + from,
4529                              buf + from, to_byte - from, len)
4530             : decode_coding (coding, XSTRING (str)->data + from,
4531                              buf + from, to_byte - from, len));
4532   if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
4533     {
4534       /* We simple try to decode the whole string again but without
4535          eol-conversion this time.  */
4536       coding->eol_type = CODING_EOL_LF;
4537       coding->symbol = saved_coding_symbol;
4538       return code_convert_string (str, coding, encodep, nocopy);
4539     }
4540
4541   bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced,
4542          STRING_BYTES (XSTRING (str)) - to_byte);
4543
4544   len = from + STRING_BYTES (XSTRING (str)) - to_byte;
4545   if (encodep)
4546     str = make_unibyte_string (buf, len + coding->produced);
4547   else
4548     {
4549       int chars= (coding->fake_multibyte
4550                   ? multibyte_chars_in_text (buf + from, coding->produced)
4551                   : coding->produced_char);
4552       str = make_multibyte_string (buf, len + chars, len + coding->produced);
4553     }
4554
4555   return str;
4556 }
4557
4558 \f
4559 #ifdef emacs
4560 /*** 8. Emacs Lisp library functions ***/
4561
4562 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
4563   "Return t if OBJECT is nil or a coding-system.\n\
4564 See the documentation of `make-coding-system' for information\n\
4565 about coding-system objects.")
4566   (obj)
4567      Lisp_Object obj;
4568 {
4569   if (NILP (obj))
4570     return Qt;
4571   if (!SYMBOLP (obj))
4572     return Qnil;
4573   /* Get coding-spec vector for OBJ.  */
4574   obj = Fget (obj, Qcoding_system);
4575   return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
4576           ? Qt : Qnil);
4577 }
4578
4579 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
4580        Sread_non_nil_coding_system, 1, 1, 0,
4581   "Read a coding system from the minibuffer, prompting with string PROMPT.")
4582   (prompt)
4583      Lisp_Object prompt;
4584 {
4585   Lisp_Object val;
4586   do
4587     {
4588       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
4589                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
4590     }
4591   while (XSTRING (val)->size == 0);
4592   return (Fintern (val, Qnil));
4593 }
4594
4595 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
4596   "Read a coding system from the minibuffer, prompting with string PROMPT.\n\
4597 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.")
4598   (prompt, default_coding_system)
4599      Lisp_Object prompt, default_coding_system;
4600 {
4601   Lisp_Object val;
4602   if (SYMBOLP (default_coding_system))
4603     XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
4604   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
4605                           Qt, Qnil, Qcoding_system_history,
4606                           default_coding_system, Qnil);
4607   return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
4608 }
4609
4610 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
4611        1, 1, 0,
4612   "Check validity of CODING-SYSTEM.\n\
4613 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.\n\
4614 It is valid if it is a symbol with a non-nil `coding-system' property.\n\
4615 The value of property should be a vector of length 5.")
4616   (coding_system)
4617      Lisp_Object coding_system;
4618 {
4619   CHECK_SYMBOL (coding_system, 0);
4620   if (!NILP (Fcoding_system_p (coding_system)))
4621     return coding_system;
4622   while (1)
4623     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
4624 }
4625 \f
4626 Lisp_Object
4627 detect_coding_system (src, src_bytes, highest)
4628      unsigned char *src;
4629      int src_bytes, highest;
4630 {
4631   int coding_mask, eol_type;
4632   Lisp_Object val, tmp;
4633   int dummy;
4634
4635   coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy);
4636   eol_type  = detect_eol_type (src, src_bytes, &dummy);
4637   if (eol_type == CODING_EOL_INCONSISTENT)
4638     eol_type = CODING_EOL_UNDECIDED;
4639
4640   if (!coding_mask)
4641     {
4642       val = Qundecided;
4643       if (eol_type != CODING_EOL_UNDECIDED)
4644         {
4645           Lisp_Object val2;
4646           val2 = Fget (Qundecided, Qeol_type);
4647           if (VECTORP (val2))
4648             val = XVECTOR (val2)->contents[eol_type];
4649         }
4650       return (highest ? val : Fcons (val, Qnil));
4651     }
4652
4653   /* At first, gather possible coding systems in VAL.  */
4654   val = Qnil;
4655   for (tmp = Vcoding_category_list; !NILP (tmp); tmp = XCONS (tmp)->cdr)
4656     {
4657       int idx
4658         = XFASTINT (Fget (XCONS (tmp)->car, Qcoding_category_index));
4659       if (coding_mask & (1 << idx))
4660         {
4661           val = Fcons (Fsymbol_value (XCONS (tmp)->car), val);
4662           if (highest)
4663             break;
4664         }
4665     }
4666   if (!highest)
4667     val = Fnreverse (val);
4668
4669   /* Then, replace the elements with subsidiary coding systems.  */
4670   for (tmp = val; !NILP (tmp); tmp = XCONS (tmp)->cdr)
4671     {
4672       if (eol_type != CODING_EOL_UNDECIDED
4673           && eol_type != CODING_EOL_INCONSISTENT)
4674         {
4675           Lisp_Object eol;
4676           eol = Fget (XCONS (tmp)->car, Qeol_type);
4677           if (VECTORP (eol))
4678             XCONS (tmp)->car = XVECTOR (eol)->contents[eol_type];
4679         }
4680     }
4681   return (highest ? XCONS (val)->car : val);
4682 }
4683
4684 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
4685        2, 3, 0,
4686   "Detect coding system of the text in the region between START and END.\n\
4687 Return a list of possible coding systems ordered by priority.\n\
4688 \n\
4689 If only ASCII characters are found, it returns a list of single element\n\
4690 `undecided' or its subsidiary coding system according to a detected\n\
4691 end-of-line format.\n\
4692 \n\
4693 If optional argument HIGHEST is non-nil, return the coding system of\n\
4694 highest priority.")
4695   (start, end, highest)
4696      Lisp_Object start, end, highest;
4697 {
4698   int from, to;
4699   int from_byte, to_byte;
4700
4701   CHECK_NUMBER_COERCE_MARKER (start, 0);
4702   CHECK_NUMBER_COERCE_MARKER (end, 1);
4703
4704   validate_region (&start, &end);
4705   from = XINT (start), to = XINT (end);
4706   from_byte = CHAR_TO_BYTE (from);
4707   to_byte = CHAR_TO_BYTE (to);
4708
4709   if (from < GPT && to >= GPT)
4710     move_gap_both (to, to_byte);
4711
4712   return detect_coding_system (BYTE_POS_ADDR (from_byte),
4713                                to_byte - from_byte,
4714                                !NILP (highest));
4715 }
4716
4717 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
4718        1, 2, 0,
4719   "Detect coding system of the text in STRING.\n\
4720 Return a list of possible coding systems ordered by priority.\n\
4721 \n\
4722 If only ASCII characters are found, it returns a list of single element\n\
4723 `undecided' or its subsidiary coding system according to a detected\n\
4724 end-of-line format.\n\
4725 \n\
4726 If optional argument HIGHEST is non-nil, return the coding system of\n\
4727 highest priority.")
4728   (string, highest)
4729      Lisp_Object string, highest;
4730 {
4731   CHECK_STRING (string, 0);
4732
4733   return detect_coding_system (XSTRING (string)->data,
4734                                STRING_BYTES (XSTRING (string)),
4735                                !NILP (highest));
4736 }
4737
4738 Lisp_Object
4739 code_convert_region1 (start, end, coding_system, encodep)
4740      Lisp_Object start, end, coding_system;
4741      int encodep;
4742 {
4743   struct coding_system coding;
4744   int from, to, len;
4745
4746   CHECK_NUMBER_COERCE_MARKER (start, 0);
4747   CHECK_NUMBER_COERCE_MARKER (end, 1);
4748   CHECK_SYMBOL (coding_system, 2);
4749
4750   validate_region (&start, &end);
4751   from = XFASTINT (start);
4752   to = XFASTINT (end);
4753
4754   if (NILP (coding_system))
4755     return make_number (to - from);
4756
4757   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
4758     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
4759
4760   /* The code conversion routine can not preserve text properties for
4761      now.  So, we must remove all text properties in the region.  */
4762   Fset_text_properties (start, end, Qnil, Qnil);
4763
4764   coding.mode |= CODING_MODE_LAST_BLOCK;
4765   code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
4766                        &coding, encodep, 1);
4767   Vlast_coding_system_used = coding.symbol;
4768   return make_number (coding.produced_char);
4769 }
4770
4771 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
4772        3, 3, "r\nzCoding system: ",
4773   "Decode the current region by specified coding system.\n\
4774 When called from a program, takes three arguments:\n\
4775 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
4776 This function sets `last-coding-system-used' to the precise coding system\n\
4777 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
4778 not fully specified.)\n\
4779 It returns the length of the decoded text.")
4780   (start, end, coding_system)
4781      Lisp_Object start, end, coding_system;
4782 {
4783   return code_convert_region1 (start, end, coding_system, 0);
4784 }
4785
4786 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
4787        3, 3, "r\nzCoding system: ",
4788   "Encode the current region by specified coding system.\n\
4789 When called from a program, takes three arguments:\n\
4790 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
4791 This function sets `last-coding-system-used' to the precise coding system\n\
4792 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
4793 not fully specified.)\n\
4794 It returns the length of the encoded text.")
4795   (start, end, coding_system)
4796      Lisp_Object start, end, coding_system;
4797 {
4798   return code_convert_region1 (start, end, coding_system, 1);
4799 }
4800
4801 Lisp_Object
4802 code_convert_string1 (string, coding_system, nocopy, encodep)
4803      Lisp_Object string, coding_system, nocopy;
4804      int encodep;
4805 {
4806   struct coding_system coding;
4807
4808   CHECK_STRING (string, 0);
4809   CHECK_SYMBOL (coding_system, 1);
4810
4811   if (NILP (coding_system))
4812     return (NILP (nocopy) ? Fcopy_sequence (string) : string);
4813
4814   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
4815     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
4816
4817   coding.mode |= CODING_MODE_LAST_BLOCK;
4818   Vlast_coding_system_used = coding.symbol;
4819   return code_convert_string (string, &coding, encodep, !NILP (nocopy));
4820 }
4821
4822 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
4823        2, 3, 0,
4824   "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
4825 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
4826 if the decoding operation is trivial.\n\
4827 This function sets `last-coding-system-used' to the precise coding system\n\
4828 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
4829 not fully specified.)")
4830   (string, coding_system, nocopy)
4831      Lisp_Object string, coding_system, nocopy;
4832 {
4833   return code_convert_string1 (string, coding_system, nocopy, 0);
4834 }
4835
4836 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
4837        2, 3, 0,
4838   "Encode STRING to CODING-SYSTEM, and return the result.\n\
4839 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
4840 if the encoding operation is trivial.\n\
4841 This function sets `last-coding-system-used' to the precise coding system\n\
4842 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
4843 not fully specified.)")
4844   (string, coding_system, nocopy)
4845      Lisp_Object string, coding_system, nocopy;
4846 {
4847   return code_convert_string1 (string, coding_system, nocopy, 1);
4848 }
4849
4850 /* Encode or decode STRING according to CODING_SYSTEM.
4851    Do not set Vlast_coding_system_used.  */
4852
4853 Lisp_Object
4854 code_convert_string_norecord (string, coding_system, encodep)
4855      Lisp_Object string, coding_system;
4856      int encodep;
4857 {
4858   struct coding_system coding;
4859
4860   CHECK_STRING (string, 0);
4861   CHECK_SYMBOL (coding_system, 1);
4862
4863   if (NILP (coding_system))
4864     return string;
4865
4866   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
4867     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
4868
4869   coding.mode |= CODING_MODE_LAST_BLOCK;
4870   return code_convert_string (string, &coding, encodep, Qt);
4871 }
4872 \f
4873 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
4874   "Decode a JISX0208 character of shift-jis encoding.\n\
4875 CODE is the character code in SJIS.\n\
4876 Return the corresponding character.")
4877   (code)
4878      Lisp_Object code;
4879 {
4880   unsigned char c1, c2, s1, s2;
4881   Lisp_Object val;
4882
4883   CHECK_NUMBER (code, 0);
4884   s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
4885   DECODE_SJIS (s1, s2, c1, c2);
4886   XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset_jisx0208, c1, c2));
4887   return val;
4888 }
4889
4890 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
4891   "Encode a JISX0208 character CHAR to SJIS coding system.\n\
4892 Return the corresponding character code in SJIS.")
4893   (ch)
4894      Lisp_Object ch;
4895 {
4896   int charset, c1, c2, s1, s2;
4897   Lisp_Object val;
4898
4899   CHECK_NUMBER (ch, 0);
4900   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
4901   if (charset == charset_jisx0208)
4902     {
4903       ENCODE_SJIS (c1, c2, s1, s2);
4904       XSETFASTINT (val, (s1 << 8) | s2);
4905     }
4906   else
4907     XSETFASTINT (val, 0);
4908   return val;
4909 }
4910
4911 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
4912   "Decode a Big5 character CODE of BIG5 coding system.\n\
4913 CODE is the character code in BIG5.\n\
4914 Return the corresponding character.")
4915   (code)
4916      Lisp_Object code;
4917 {
4918   int charset;
4919   unsigned char b1, b2, c1, c2;
4920   Lisp_Object val;
4921
4922   CHECK_NUMBER (code, 0);
4923   b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
4924   DECODE_BIG5 (b1, b2, charset, c1, c2);
4925   XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset, c1, c2));
4926   return val;
4927 }
4928
4929 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
4930   "Encode the Big5 character CHAR to BIG5 coding system.\n\
4931 Return the corresponding character code in Big5.")
4932   (ch)
4933      Lisp_Object ch;
4934 {
4935   int charset, c1, c2, b1, b2;
4936   Lisp_Object val;
4937
4938   CHECK_NUMBER (ch, 0);
4939   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
4940   if (charset == charset_big5_1 || charset == charset_big5_2)
4941     {
4942       ENCODE_BIG5 (charset, c1, c2, b1, b2);
4943       XSETFASTINT (val, (b1 << 8) | b2);
4944     }
4945   else
4946     XSETFASTINT (val, 0);
4947   return val;
4948 }
4949 \f
4950 DEFUN ("set-terminal-coding-system-internal",
4951        Fset_terminal_coding_system_internal,
4952        Sset_terminal_coding_system_internal, 1, 1, 0, "")
4953   (coding_system)
4954      Lisp_Object coding_system;
4955 {
4956   CHECK_SYMBOL (coding_system, 0);
4957   setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
4958   /* We had better not send unsafe characters to terminal.  */
4959   terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
4960
4961   return Qnil;
4962 }
4963
4964 DEFUN ("set-safe-terminal-coding-system-internal",
4965        Fset_safe_terminal_coding_system_internal,
4966        Sset_safe_terminal_coding_system_internal, 1, 1, 0, "")
4967   (coding_system)
4968      Lisp_Object coding_system;
4969 {
4970   CHECK_SYMBOL (coding_system, 0);
4971   setup_coding_system (Fcheck_coding_system (coding_system),
4972                        &safe_terminal_coding);
4973   return Qnil;
4974 }
4975
4976 DEFUN ("terminal-coding-system",
4977        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
4978   "Return coding system specified for terminal output.")
4979   ()
4980 {
4981   return terminal_coding.symbol;
4982 }
4983
4984 DEFUN ("set-keyboard-coding-system-internal",
4985        Fset_keyboard_coding_system_internal,
4986        Sset_keyboard_coding_system_internal, 1, 1, 0, "")
4987   (coding_system)
4988      Lisp_Object coding_system;
4989 {
4990   CHECK_SYMBOL (coding_system, 0);
4991   setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
4992   return Qnil;
4993 }
4994
4995 DEFUN ("keyboard-coding-system",
4996        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
4997   "Return coding system specified for decoding keyboard input.")
4998   ()
4999 {
5000   return keyboard_coding.symbol;
5001 }
5002
5003 \f
5004 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
5005        Sfind_operation_coding_system,  1, MANY, 0,
5006   "Choose a coding system for an operation based on the target name.\n\
5007 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).\n\
5008 DECODING-SYSTEM is the coding system to use for decoding\n\
5009 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system\n\
5010 for encoding (in case OPERATION does encoding).\n\
5011 \n\
5012 The first argument OPERATION specifies an I/O primitive:\n\
5013   For file I/O, `insert-file-contents' or `write-region'.\n\
5014   For process I/O, `call-process', `call-process-region', or `start-process'.\n\
5015   For network I/O, `open-network-stream'.\n\
5016 \n\
5017 The remaining arguments should be the same arguments that were passed\n\
5018 to the primitive.  Depending on which primitive, one of those arguments\n\
5019 is selected as the TARGET.  For example, if OPERATION does file I/O,\n\
5020 whichever argument specifies the file name is TARGET.\n\
5021 \n\
5022 TARGET has a meaning which depends on OPERATION:\n\
5023   For file I/O, TARGET is a file name.\n\
5024   For process I/O, TARGET is a process name.\n\
5025   For network I/O, TARGET is a service name or a port number\n\
5026 \n\
5027 This function looks up what specified for TARGET in,\n\
5028 `file-coding-system-alist', `process-coding-system-alist',\n\
5029 or `network-coding-system-alist' depending on OPERATION.\n\
5030 They may specify a coding system, a cons of coding systems,\n\
5031 or a function symbol to call.\n\
5032 In the last case, we call the function with one argument,\n\
5033 which is a list of all the arguments given to this function.")
5034   (nargs, args)
5035      int nargs;
5036      Lisp_Object *args;
5037 {
5038   Lisp_Object operation, target_idx, target, val;
5039   register Lisp_Object chain;
5040
5041   if (nargs < 2)
5042     error ("Too few arguments");
5043   operation = args[0];
5044   if (!SYMBOLP (operation)
5045       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
5046     error ("Invalid first arguement");
5047   if (nargs < 1 + XINT (target_idx))
5048     error ("Too few arguments for operation: %s",
5049            XSYMBOL (operation)->name->data);
5050   target = args[XINT (target_idx) + 1];
5051   if (!(STRINGP (target)
5052         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
5053     error ("Invalid %dth argument", XINT (target_idx) + 1);
5054
5055   chain = ((EQ (operation, Qinsert_file_contents)
5056             || EQ (operation, Qwrite_region))
5057            ? Vfile_coding_system_alist
5058            : (EQ (operation, Qopen_network_stream)
5059               ? Vnetwork_coding_system_alist
5060               : Vprocess_coding_system_alist));
5061   if (NILP (chain))
5062     return Qnil;
5063
5064   for (; CONSP (chain); chain = XCONS (chain)->cdr)
5065     {
5066       Lisp_Object elt;
5067       elt = XCONS (chain)->car;
5068
5069       if (CONSP (elt)
5070           && ((STRINGP (target)
5071                && STRINGP (XCONS (elt)->car)
5072                && fast_string_match (XCONS (elt)->car, target) >= 0)
5073               || (INTEGERP (target) && EQ (target, XCONS (elt)->car))))
5074         {
5075           val = XCONS (elt)->cdr;
5076           /* Here, if VAL is both a valid coding system and a valid
5077              function symbol, we return VAL as a coding system.  */
5078           if (CONSP (val))
5079             return val;
5080           if (! SYMBOLP (val))
5081             return Qnil;
5082           if (! NILP (Fcoding_system_p (val)))
5083             return Fcons (val, val);
5084           if (! NILP (Ffboundp (val)))
5085             {
5086               val = call1 (val, Flist (nargs, args));
5087               if (CONSP (val))
5088                 return val;
5089               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
5090                 return Fcons (val, val);
5091             }
5092           return Qnil;
5093         }
5094     }
5095   return Qnil;
5096 }
5097
5098 DEFUN ("update-coding-systems-internal",  Fupdate_coding_systems_internal,
5099        Supdate_coding_systems_internal, 0, 0, 0,
5100   "Update internal database for ISO2022 and CCL based coding systems.\n\
5101 When values of the following coding categories are changed, you must\n\
5102 call this function:\n\
5103   coding-category-iso-7, coding-category-iso-7-tight,\n\
5104   coding-category-iso-8-1, coding-category-iso-8-2,\n\
5105   coding-category-iso-7-else, coding-category-iso-8-else,\n\
5106   coding-category-ccl")
5107   ()
5108 {
5109   int i;
5110
5111   for (i = CODING_CATEGORY_IDX_ISO_7; i <= CODING_CATEGORY_IDX_CCL; i++)
5112     {
5113       Lisp_Object val;
5114
5115       val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[i])->value;
5116       if (!NILP (val))
5117         {
5118           if (! coding_system_table[i])
5119             coding_system_table[i] = ((struct coding_system *)
5120                                       xmalloc (sizeof (struct coding_system)));
5121           setup_coding_system (val, coding_system_table[i]);
5122         }
5123       else if (coding_system_table[i])
5124         {
5125           xfree (coding_system_table[i]);
5126           coding_system_table[i] = NULL;
5127         }
5128     }
5129
5130   return Qnil;
5131 }
5132
5133 DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal,
5134        Sset_coding_priority_internal, 0, 0, 0,
5135   "Update internal database for the current value of `coding-category-list'.\n\
5136 This function is internal use only.")
5137   ()
5138 {
5139   int i = 0, idx;
5140   Lisp_Object val;
5141
5142   val = Vcoding_category_list;
5143
5144   while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
5145     {
5146       if (! SYMBOLP (XCONS (val)->car))
5147         break;
5148       idx = XFASTINT (Fget (XCONS (val)->car, Qcoding_category_index));
5149       if (idx >= CODING_CATEGORY_IDX_MAX)
5150         break;
5151       coding_priorities[i++] = (1 << idx);
5152       val = XCONS (val)->cdr;
5153     }
5154   /* If coding-category-list is valid and contains all coding
5155      categories, `i' should be CODING_CATEGORY_IDX_MAX now.  If not,
5156      the following code saves Emacs from craching.  */
5157   while (i < CODING_CATEGORY_IDX_MAX)
5158     coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
5159
5160   return Qnil;
5161 }
5162
5163 #endif /* emacs */
5164
5165 \f
5166 /*** 9. Post-amble ***/
5167
5168 void
5169 init_coding ()
5170 {
5171   conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE);
5172 }
5173
5174 void
5175 init_coding_once ()
5176 {
5177   int i;
5178
5179   /* Emacs' internal format specific initialize routine.  */
5180   for (i = 0; i <= 0x20; i++)
5181     emacs_code_class[i] = EMACS_control_code;
5182   emacs_code_class[0x0A] = EMACS_linefeed_code;
5183   emacs_code_class[0x0D] = EMACS_carriage_return_code;
5184   for (i = 0x21 ; i < 0x7F; i++)
5185     emacs_code_class[i] = EMACS_ascii_code;
5186   emacs_code_class[0x7F] = EMACS_control_code;
5187   emacs_code_class[0x80] = EMACS_leading_code_composition;
5188   for (i = 0x81; i < 0xFF; i++)
5189     emacs_code_class[i] = EMACS_invalid_code;
5190   emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
5191   emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
5192   emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
5193   emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
5194
5195   /* ISO2022 specific initialize routine.  */
5196   for (i = 0; i < 0x20; i++)
5197     iso_code_class[i] = ISO_control_code;
5198   for (i = 0x21; i < 0x7F; i++)
5199     iso_code_class[i] = ISO_graphic_plane_0;
5200   for (i = 0x80; i < 0xA0; i++)
5201     iso_code_class[i] = ISO_control_code;
5202   for (i = 0xA1; i < 0xFF; i++)
5203     iso_code_class[i] = ISO_graphic_plane_1;
5204   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
5205   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
5206   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
5207   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
5208   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
5209   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
5210   iso_code_class[ISO_CODE_ESC] = ISO_escape;
5211   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
5212   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
5213   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
5214
5215   conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE;
5216
5217   setup_coding_system (Qnil, &keyboard_coding);
5218   setup_coding_system (Qnil, &terminal_coding);
5219   setup_coding_system (Qnil, &safe_terminal_coding);
5220   setup_coding_system (Qnil, &default_buffer_file_coding);
5221
5222   bzero (coding_system_table, sizeof coding_system_table);
5223
5224   bzero (ascii_skip_code, sizeof ascii_skip_code);
5225   for (i = 0; i < 128; i++)
5226     ascii_skip_code[i] = 1;
5227
5228 #if defined (MSDOS) || defined (WINDOWSNT)
5229   system_eol_type = CODING_EOL_CRLF;
5230 #else
5231   system_eol_type = CODING_EOL_LF;
5232 #endif
5233 }
5234
5235 #ifdef emacs
5236
5237 void
5238 syms_of_coding ()
5239 {
5240   Qtarget_idx = intern ("target-idx");
5241   staticpro (&Qtarget_idx);
5242
5243   Qcoding_system_history = intern ("coding-system-history");
5244   staticpro (&Qcoding_system_history);
5245   Fset (Qcoding_system_history, Qnil);
5246
5247   /* Target FILENAME is the first argument.  */
5248   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
5249   /* Target FILENAME is the third argument.  */
5250   Fput (Qwrite_region, Qtarget_idx, make_number (2));
5251
5252   Qcall_process = intern ("call-process");
5253   staticpro (&Qcall_process);
5254   /* Target PROGRAM is the first argument.  */
5255   Fput (Qcall_process, Qtarget_idx, make_number (0));
5256
5257   Qcall_process_region = intern ("call-process-region");
5258   staticpro (&Qcall_process_region);
5259   /* Target PROGRAM is the third argument.  */
5260   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
5261
5262   Qstart_process = intern ("start-process");
5263   staticpro (&Qstart_process);
5264   /* Target PROGRAM is the third argument.  */
5265   Fput (Qstart_process, Qtarget_idx, make_number (2));
5266
5267   Qopen_network_stream = intern ("open-network-stream");
5268   staticpro (&Qopen_network_stream);
5269   /* Target SERVICE is the fourth argument.  */
5270   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
5271
5272   Qcoding_system = intern ("coding-system");
5273   staticpro (&Qcoding_system);
5274
5275   Qeol_type = intern ("eol-type");
5276   staticpro (&Qeol_type);
5277
5278   Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
5279   staticpro (&Qbuffer_file_coding_system);
5280
5281   Qpost_read_conversion = intern ("post-read-conversion");
5282   staticpro (&Qpost_read_conversion);
5283
5284   Qpre_write_conversion = intern ("pre-write-conversion");
5285   staticpro (&Qpre_write_conversion);
5286
5287   Qno_conversion = intern ("no-conversion");
5288   staticpro (&Qno_conversion);
5289
5290   Qundecided = intern ("undecided");
5291   staticpro (&Qundecided);
5292
5293   Qcoding_system_p = intern ("coding-system-p");
5294   staticpro (&Qcoding_system_p);
5295
5296   Qcoding_system_error = intern ("coding-system-error");
5297   staticpro (&Qcoding_system_error);
5298
5299   Fput (Qcoding_system_error, Qerror_conditions,
5300         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
5301   Fput (Qcoding_system_error, Qerror_message,
5302         build_string ("Invalid coding system"));
5303
5304   Qcoding_category = intern ("coding-category");
5305   staticpro (&Qcoding_category);
5306   Qcoding_category_index = intern ("coding-category-index");
5307   staticpro (&Qcoding_category_index);
5308
5309   Vcoding_category_table
5310     = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
5311   staticpro (&Vcoding_category_table);
5312   {
5313     int i;
5314     for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
5315       {
5316         XVECTOR (Vcoding_category_table)->contents[i]
5317           = intern (coding_category_name[i]);
5318         Fput (XVECTOR (Vcoding_category_table)->contents[i],
5319               Qcoding_category_index, make_number (i));
5320       }
5321   }
5322
5323   Qtranslation_table = intern ("translation-table");
5324   staticpro (&Qtranslation_table);
5325   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
5326
5327   Qtranslation_table_id = intern ("translation-table-id");
5328   staticpro (&Qtranslation_table_id);
5329
5330   Qtranslation_table_for_decode = intern ("translation-table-for-decode");
5331   staticpro (&Qtranslation_table_for_decode);
5332
5333   Qtranslation_table_for_encode = intern ("translation-table-for-encode");
5334   staticpro (&Qtranslation_table_for_encode);
5335
5336   Qsafe_charsets = intern ("safe-charsets");
5337   staticpro (&Qsafe_charsets);
5338
5339   Qvalid_codes = intern ("valid-codes");
5340   staticpro (&Qvalid_codes);
5341
5342   Qemacs_mule = intern ("emacs-mule");
5343   staticpro (&Qemacs_mule);
5344
5345   Qraw_text = intern ("raw-text");
5346   staticpro (&Qraw_text);
5347
5348   defsubr (&Scoding_system_p);
5349   defsubr (&Sread_coding_system);
5350   defsubr (&Sread_non_nil_coding_system);
5351   defsubr (&Scheck_coding_system);
5352   defsubr (&Sdetect_coding_region);
5353   defsubr (&Sdetect_coding_string);
5354   defsubr (&Sdecode_coding_region);
5355   defsubr (&Sencode_coding_region);
5356   defsubr (&Sdecode_coding_string);
5357   defsubr (&Sencode_coding_string);
5358   defsubr (&Sdecode_sjis_char);
5359   defsubr (&Sencode_sjis_char);
5360   defsubr (&Sdecode_big5_char);
5361   defsubr (&Sencode_big5_char);
5362   defsubr (&Sset_terminal_coding_system_internal);
5363   defsubr (&Sset_safe_terminal_coding_system_internal);
5364   defsubr (&Sterminal_coding_system);
5365   defsubr (&Sset_keyboard_coding_system_internal);
5366   defsubr (&Skeyboard_coding_system);
5367   defsubr (&Sfind_operation_coding_system);
5368   defsubr (&Supdate_coding_systems_internal);
5369   defsubr (&Sset_coding_priority_internal);
5370
5371   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
5372     "List of coding systems.\n\
5373 \n\
5374 Do not alter the value of this variable manually.  This variable should be\n\
5375 updated by the functions `make-coding-system' and\n\
5376 `define-coding-system-alias'.");
5377   Vcoding_system_list = Qnil;
5378
5379   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
5380     "Alist of coding system names.\n\
5381 Each element is one element list of coding system name.\n\
5382 This variable is given to `completing-read' as TABLE argument.\n\
5383 \n\
5384 Do not alter the value of this variable manually.  This variable should be\n\
5385 updated by the functions `make-coding-system' and\n\
5386 `define-coding-system-alias'.");
5387   Vcoding_system_alist = Qnil;
5388
5389   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
5390     "List of coding-categories (symbols) ordered by priority.");
5391   {
5392     int i;
5393
5394     Vcoding_category_list = Qnil;
5395     for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
5396       Vcoding_category_list
5397         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
5398                  Vcoding_category_list);
5399   }
5400
5401   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
5402     "Specify the coding system for read operations.\n\
5403 It is useful to bind this variable with `let', but do not set it globally.\n\
5404 If the value is a coding system, it is used for decoding on read operation.\n\
5405 If not, an appropriate element is used from one of the coding system alists:\n\
5406 There are three such tables, `file-coding-system-alist',\n\
5407 `process-coding-system-alist', and `network-coding-system-alist'.");
5408   Vcoding_system_for_read = Qnil;
5409
5410   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
5411     "Specify the coding system for write operations.\n\
5412 It is useful to bind this variable with `let', but do not set it globally.\n\
5413 If the value is a coding system, it is used for encoding on write operation.\n\
5414 If not, an appropriate element is used from one of the coding system alists:\n\
5415 There are three such tables, `file-coding-system-alist',\n\
5416 `process-coding-system-alist', and `network-coding-system-alist'.");
5417   Vcoding_system_for_write = Qnil;
5418
5419   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
5420     "Coding system used in the latest file or process I/O.");
5421   Vlast_coding_system_used = Qnil;
5422
5423   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
5424     "*Non-nil inhibit code conversion of end-of-line format in any cases.");
5425   inhibit_eol_conversion = 0;
5426
5427   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
5428     "Non-nil means process buffer inherits coding system of process output.\n\
5429 Bind it to t if the process output is to be treated as if it were a file\n\
5430 read from some filesystem.");
5431   inherit_process_coding_system = 0;
5432
5433   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
5434     "Alist to decide a coding system to use for a file I/O operation.\n\
5435 The format is ((PATTERN . VAL) ...),\n\
5436 where PATTERN is a regular expression matching a file name,\n\
5437 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5438 If VAL is a coding system, it is used for both decoding and encoding\n\
5439 the file contents.\n\
5440 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5441 and the cdr part is used for encoding.\n\
5442 If VAL is a function symbol, the function must return a coding system\n\
5443 or a cons of coding systems which are used as above.\n\
5444 \n\
5445 See also the function `find-operation-coding-system'\n\
5446 and the variable `auto-coding-alist'.");
5447   Vfile_coding_system_alist = Qnil;
5448
5449   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
5450     "Alist to decide a coding system to use for a process I/O operation.\n\
5451 The format is ((PATTERN . VAL) ...),\n\
5452 where PATTERN is a regular expression matching a program name,\n\
5453 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5454 If VAL is a coding system, it is used for both decoding what received\n\
5455 from the program and encoding what sent to the program.\n\
5456 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5457 and the cdr part is used for encoding.\n\
5458 If VAL is a function symbol, the function must return a coding system\n\
5459 or a cons of coding systems which are used as above.\n\
5460 \n\
5461 See also the function `find-operation-coding-system'.");
5462   Vprocess_coding_system_alist = Qnil;
5463
5464   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
5465     "Alist to decide a coding system to use for a network I/O operation.\n\
5466 The format is ((PATTERN . VAL) ...),\n\
5467 where PATTERN is a regular expression matching a network service name\n\
5468 or is a port number to connect to,\n\
5469 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5470 If VAL is a coding system, it is used for both decoding what received\n\
5471 from the network stream and encoding what sent to the network stream.\n\
5472 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5473 and the cdr part is used for encoding.\n\
5474 If VAL is a function symbol, the function must return a coding system\n\
5475 or a cons of coding systems which are used as above.\n\
5476 \n\
5477 See also the function `find-operation-coding-system'.");
5478   Vnetwork_coding_system_alist = Qnil;
5479
5480   DEFVAR_INT ("eol-mnemonic-unix", &eol_mnemonic_unix,
5481     "Mnemonic character indicating UNIX-like end-of-line format (i.e. LF) .");
5482   eol_mnemonic_unix = ':';
5483
5484   DEFVAR_INT ("eol-mnemonic-dos", &eol_mnemonic_dos,
5485     "Mnemonic character indicating DOS-like end-of-line format (i.e. CRLF).");
5486   eol_mnemonic_dos = '\\';
5487
5488   DEFVAR_INT ("eol-mnemonic-mac", &eol_mnemonic_mac,
5489     "Mnemonic character indicating MAC-like end-of-line format (i.e. CR).");
5490   eol_mnemonic_mac = '/';
5491
5492   DEFVAR_INT ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
5493     "Mnemonic character indicating end-of-line format is not yet decided.");
5494   eol_mnemonic_undecided = ':';
5495
5496   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
5497     "*Non-nil enables character translation while encoding and decoding.");
5498   Venable_character_translation = Qt;
5499
5500   DEFVAR_LISP ("standard-translation-table-for-decode",
5501     &Vstandard_translation_table_for_decode,
5502     "Table for translating characters while decoding.");
5503   Vstandard_translation_table_for_decode = Qnil;
5504
5505   DEFVAR_LISP ("standard-translation-table-for-encode",
5506     &Vstandard_translation_table_for_encode,
5507     "Table for translationg characters while encoding.");
5508   Vstandard_translation_table_for_encode = Qnil;
5509
5510   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
5511     "Alist of charsets vs revision numbers.\n\
5512 While encoding, if a charset (car part of an element) is found,\n\
5513 designate it with the escape sequence identifing revision (cdr part of the element).");
5514   Vcharset_revision_alist = Qnil;
5515
5516   DEFVAR_LISP ("default-process-coding-system",
5517                &Vdefault_process_coding_system,
5518     "Cons of coding systems used for process I/O by default.\n\
5519 The car part is used for decoding a process output,\n\
5520 the cdr part is used for encoding a text to be sent to a process.");
5521   Vdefault_process_coding_system = Qnil;
5522
5523   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
5524     "Table of extra Latin codes in the range 128..159 (inclusive).\n\
5525 This is a vector of length 256.\n\
5526 If Nth element is non-nil, the existence of code N in a file\n\
5527 \(or output of subprocess) doesn't prevent it to be detected as\n\
5528 a coding system of ISO 2022 variant which has a flag\n\
5529 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file\n\
5530 or reading output of a subprocess.\n\
5531 Only 128th through 159th elements has a meaning.");
5532   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
5533
5534   DEFVAR_LISP ("select-safe-coding-system-function",
5535                &Vselect_safe_coding_system_function,
5536     "Function to call to select safe coding system for encoding a text.\n\
5537 \n\
5538 If set, this function is called to force a user to select a proper\n\
5539 coding system which can encode the text in the case that a default\n\
5540 coding system used in each operation can't encode the text.\n\
5541 \n\
5542 The default value is `select-safe-coding-system' (which see).");
5543   Vselect_safe_coding_system_function = Qnil;
5544
5545 }
5546
5547 #endif /* emacs */