src/coding.c

   1 /* Coding system handler (conversion, detection, and etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /*** TABLE OF CONTENTS ***
  23
  24   1. Preamble
  25   2. Emacs' internal format (emacs-mule) handlers
  26   3. ISO2022 handlers
  27   4. Shift-JIS and BIG5 handlers
  28   5. CCL handlers
  29   6. End-of-line handlers
  30   7. C library functions
  31   8. Emacs Lisp library functions
  32   9. Post-amble
  33
  34 */
  35
  36 /*** GENERAL NOTE on CODING SYSTEM ***
  37
  38   Coding system is an encoding mechanism of one or more character
  39   sets.  Here's a list of coding systems which Emacs can handle.  When
  40   we say "decode", it means converting some other coding system to
  41   Emacs' internal format (emacs-internal), and when we say "encode",
  42   it means converting the coding system emacs-mule to some other
  43   coding system.
  44
  45   0. Emacs' internal format (emacs-mule)
  46
  47   Emacs itself holds a multi-lingual character in a buffer and a string
  48   in a special format.  Details are described in section 2.
  49
  50   1. ISO2022
  51
  52   The most famous coding system for multiple character sets.  X's
  53   Compound Text, various EUCs (Extended Unix Code), and coding
  54   systems used in Internet communication such as ISO-2022-JP are
  55   all variants of ISO2022.  Details are described in section 3.
  56
  57   2. SJIS (or Shift-JIS or MS-Kanji-Code)
  58
  59   A coding system to encode character sets: ASCII, JISX0201, and
  60   JISX0208.  Widely used for PC's in Japan.  Details are described in
  61   section 4.
  62
  63   3. BIG5
  64
  65   A coding system to encode character sets: ASCII and Big5.  Widely
  66   used by Chinese (mainly in Taiwan and Hong Kong).  Details are
  67   described in section 4.  In this file, when we write "BIG5"
  68   (all uppercase), we mean the coding system, and when we write
  69   "Big5" (capitalized), we mean the character set.
  70
  71   4. Raw text
  72
  73   A coding system for a text containing random 8-bit code.  Emacs does
  74   no code conversion on such a text except for end-of-line format.
  75
  76   5. Other
  77
  78   If a user wants to read/write a text encoded in a coding system not
  79   listed above, he can supply a decoder and an encoder for it in CCL
  80   (Code Conversion Language) programs.  Emacs executes the CCL program
  81   while reading/writing.
  82
  83   Emacs represents a coding system by a Lisp symbol that has a property
  84   `coding-system'.  But, before actually using the coding system, the
  85   information about it is set in a structure of type `struct
  86   coding_system' for rapid processing.  See section 6 for more details.
  87
  88 */
  89
  90 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
  91
  92   How end-of-line of a text is encoded depends on a system.  For
  93   instance, Unix's format is just one byte of `line-feed' code,
  94   whereas DOS's format is two-byte sequence of `carriage-return' and
  95   `line-feed' codes.  MacOS's format is usually one byte of
  96   `carriage-return'.
  97
  98   Since text characters encoding and end-of-line encoding are
  99   independent, any coding system described above can take
 100   any format of end-of-line.  So, Emacs has information of format of
 101   end-of-line in each coding-system.  See section 6 for more details.
 102
 103 */
 104
 105 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 106
 107   These functions check if a text between SRC and SRC_END is encoded
 108   in the coding system category XXX.  Each returns an integer value in
 109   which appropriate flag bits for the category XXX is set.  The flag
 110   bits are defined in macros CODING_CATEGORY_MASK_XXX.  Below is the
 111   template of these functions.  */
 112 #if 0
 113 int
 114 detect_coding_emacs_mule (src, src_end)
 115      unsigned char *src, *src_end;
 116 {
 117   ...
 118 }
 119 #endif
 120
 121 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 122
 123   These functions decode SRC_BYTES length text at SOURCE encoded in
 124   CODING to Emacs' internal format (emacs-mule).  The resulting text
 125   goes to a place pointed to by DESTINATION, the length of which
 126   should not exceed DST_BYTES.  These functions set the information of
 127   original and decoded texts in the members produced, produced_char,
 128   consumed, and consumed_char of the structure *CODING.
 129
 130   The return value is an integer (CODING_FINISH_XXX) indicating how
 131   the decoding finished.
 132
 133   DST_BYTES zero means that source area and destination area are
 134   overlapped, which means that we can produce a decoded text until it
 135   reaches at the head of not-yet-decoded source text.
 136
 137   Below is a template of these functions.  */
 138 #if 0
 139 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 140      struct coding_system *coding;
 141      unsigned char *source, *destination;
 142      int src_bytes, dst_bytes;
 143 {
 144   ...
 145 }
 146 #endif
 147
 148 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 149
 150   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 151   internal format (emacs-mule) to CODING.  The resulting text goes to
 152   a place pointed to by DESTINATION, the length of which should not
 153   exceed DST_BYTES.  These functions set the information of
 154   original and encoded texts in the members produced, produced_char,
 155   consumed, and consumed_char of the structure *CODING.
 156
 157   The return value is an integer (CODING_FINISH_XXX) indicating how
 158   the encoding finished.
 159
 160   DST_BYTES zero means that source area and destination area are
 161   overlapped, which means that we can produce a decoded text until it
 162   reaches at the head of not-yet-decoded source text.
 163
 164   Below is a template of these functions.  */
 165 #if 0
 166 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 167      struct coding_system *coding;
 168      unsigned char *source, *destination;
 169      int src_bytes, dst_bytes;
 170 {
 171   ...
 172 }
 173 #endif
 174
 175 /*** COMMONLY USED MACROS ***/
 176
 177 /* The following three macros ONE_MORE_BYTE, TWO_MORE_BYTES, and
 178    THREE_MORE_BYTES safely get one, two, and three bytes from the
 179    source text respectively.  If there are not enough bytes in the
 180    source, they jump to `label_end_of_loop'.  The caller should set
 181    variables `src' and `src_end' to appropriate areas in advance.  */
 182
 183 #define ONE_MORE_BYTE(c1)       \
 184   do {                          \
 185     if (src < src_end)          \
 186       c1 = *src++;              \
 187     else                        \
 188       goto label_end_of_loop;   \
 189   } while (0)
 190
 191 #define TWO_MORE_BYTES(c1, c2)  \
 192   do {                          \
 193     if (src + 1 < src_end)      \
 194       c1 = *src++, c2 = *src++; \
 195     else                        \
 196       goto label_end_of_loop;   \
 197   } while (0)
 198
 199 #define THREE_MORE_BYTES(c1, c2, c3)            \
 200   do {                                          \
 201     if (src + 2 < src_end)                      \
 202       c1 = *src++, c2 = *src++, c3 = *src++;    \
 203     else                                        \
 204       goto label_end_of_loop;                   \
 205   } while (0)
 206
 207 /* The following three macros DECODE_CHARACTER_ASCII,
 208    DECODE_CHARACTER_DIMENSION1, and DECODE_CHARACTER_DIMENSION2 put
 209    the multi-byte form of a character of each class at the place
 210    pointed by `dst'.  The caller should set the variable `dst' to
 211    point to an appropriate area and the variable `coding' to point to
 212    the coding-system of the currently decoding text in advance.  */
 213
 214 /* Decode one ASCII character C.  */
 215
 216 #define DECODE_CHARACTER_ASCII(c)                               \
 217   do {                                                          \
 218     if (COMPOSING_P (coding->composing))                        \
 219       *dst++ = 0xA0, *dst++ = (c) | 0x80;                       \
 220     else                                                        \
 221       {                                                         \
 222         *dst++ = (c);                                           \
 223         coding->produced_char++;                                \
 224       }                                                         \
 225   } while (0)
 226
 227 /* Decode one DIMENSION1 character whose charset is CHARSET and whose
 228    position-code is C.  */
 229
 230 #define DECODE_CHARACTER_DIMENSION1(charset, c)                         \
 231   do {                                                                  \
 232     unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset);   \
 233     if (COMPOSING_P (coding->composing))                                \
 234       *dst++ = leading_code + 0x20;                                     \
 235     else                                                                \
 236       {                                                                 \
 237         *dst++ = leading_code;                                          \
 238         coding->produced_char++;                                        \
 239       }                                                                 \
 240     if (leading_code = CHARSET_LEADING_CODE_EXT (charset))              \
 241       *dst++ = leading_code;                                            \
 242     *dst++ = (c) | 0x80;                                                \
 243   } while (0)
 244
 245 /* Decode one DIMENSION2 character whose charset is CHARSET and whose
 246    position-codes are C1 and C2.  */
 247
 248 #define DECODE_CHARACTER_DIMENSION2(charset, c1, c2)    \
 249   do {                                                  \
 250     DECODE_CHARACTER_DIMENSION1 (charset, c1);          \
 251     *dst++ = (c2) | 0x80;                               \
 252   } while (0)
 253
 254 \f
 255 /*** 1. Preamble ***/
 256
 257 #include <stdio.h>
 258
 259 #ifdef emacs
 260
 261 #include <config.h>
 262 #include "lisp.h"
 263 #include "buffer.h"
 264 #include "charset.h"
 265 #include "ccl.h"
 266 #include "coding.h"
 267 #include "window.h"
 268
 269 #else  /* not emacs */
 270
 271 #include "mulelib.h"
 272
 273 #endif /* not emacs */
 274
 275 Lisp_Object Qcoding_system, Qeol_type;
 276 Lisp_Object Qbuffer_file_coding_system;
 277 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 278 Lisp_Object Qno_conversion, Qundecided;
 279 Lisp_Object Qcoding_system_history;
 280 Lisp_Object Qsafe_charsets;
 281 Lisp_Object Qvalid_codes;
 282
 283 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 284 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 285 Lisp_Object Qstart_process, Qopen_network_stream;
 286 Lisp_Object Qtarget_idx;
 287
 288 Lisp_Object Vselect_safe_coding_system_function;
 289
 290 /* Mnemonic character of each format of end-of-line.  */
 291 int eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 292 /* Mnemonic character to indicate format of end-of-line is not yet
 293    decided.  */
 294 int eol_mnemonic_undecided;
 295
 296 /* Format of end-of-line decided by system.  This is CODING_EOL_LF on
 297    Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.  */
 298 int system_eol_type;
 299
 300 #ifdef emacs
 301
 302 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 303
 304 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 305
 306 /* Coding system emacs-mule and raw-text are for converting only
 307    end-of-line format.  */
 308 Lisp_Object Qemacs_mule, Qraw_text;
 309
 310 /* Coding-systems are handed between Emacs Lisp programs and C internal
 311    routines by the following three variables.  */
 312 /* Coding-system for reading files and receiving data from process.  */
 313 Lisp_Object Vcoding_system_for_read;
 314 /* Coding-system for writing files and sending data to process.  */
 315 Lisp_Object Vcoding_system_for_write;
 316 /* Coding-system actually used in the latest I/O.  */
 317 Lisp_Object Vlast_coding_system_used;
 318
 319 /* A vector of length 256 which contains information about special
 320    Latin codes (especially for dealing with Microsoft codes).  */
 321 Lisp_Object Vlatin_extra_code_table;
 322
 323 /* Flag to inhibit code conversion of end-of-line format.  */
 324 int inhibit_eol_conversion;
 325
 326 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 327 int inherit_process_coding_system;
 328
 329 /* Coding system to be used to encode text for terminal display.  */
 330 struct coding_system terminal_coding;
 331
 332 /* Coding system to be used to encode text for terminal display when
 333    terminal coding system is nil.  */
 334 struct coding_system safe_terminal_coding;
 335
 336 /* Coding system of what is sent from terminal keyboard.  */
 337 struct coding_system keyboard_coding;
 338
 339 /* Default coding system to be used to write a file.  */
 340 struct coding_system default_buffer_file_coding;
 341
 342 Lisp_Object Vfile_coding_system_alist;
 343 Lisp_Object Vprocess_coding_system_alist;
 344 Lisp_Object Vnetwork_coding_system_alist;
 345
 346 #endif /* emacs */
 347
 348 Lisp_Object Qcoding_category, Qcoding_category_index;
 349
 350 /* List of symbols `coding-category-xxx' ordered by priority.  */
 351 Lisp_Object Vcoding_category_list;
 352
 353 /* Table of coding categories (Lisp symbols).  */
 354 Lisp_Object Vcoding_category_table;
 355
 356 /* Table of names of symbol for each coding-category.  */
 357 char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
 358   "coding-category-emacs-mule",
 359   "coding-category-sjis",
 360   "coding-category-iso-7",
 361   "coding-category-iso-7-tight",
 362   "coding-category-iso-8-1",
 363   "coding-category-iso-8-2",
 364   "coding-category-iso-7-else",
 365   "coding-category-iso-8-else",
 366   "coding-category-ccl",
 367   "coding-category-big5",
 368   "coding-category-raw-text",
 369   "coding-category-binary"
 370 };
 371
 372 /* Table of pointers to coding systems corresponding to each coding
 373    categories.  */
 374 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
 375
 376 /* Table of coding category masks.  Nth element is a mask for a coding
 377    cateogry of which priority is Nth.  */
 378 static
 379 int coding_priorities[CODING_CATEGORY_IDX_MAX];
 380
 381 /* Flag to tell if we look up translation table on character code
 382    conversion.  */
 383 Lisp_Object Venable_character_translation;
 384 /* Standard translation table to look up on decoding (reading).  */
 385 Lisp_Object Vstandard_translation_table_for_decode;
 386 /* Standard translation table to look up on encoding (writing).  */
 387 Lisp_Object Vstandard_translation_table_for_encode;
 388
 389 Lisp_Object Qtranslation_table;
 390 Lisp_Object Qtranslation_table_id;
 391 Lisp_Object Qtranslation_table_for_decode;
 392 Lisp_Object Qtranslation_table_for_encode;
 393
 394 /* Alist of charsets vs revision number.  */
 395 Lisp_Object Vcharset_revision_alist;
 396
 397 /* Default coding systems used for process I/O.  */
 398 Lisp_Object Vdefault_process_coding_system;
 399
 400 \f
 401 /*** 2. Emacs internal format (emacs-mule) handlers ***/
 402
 403 /* Emacs' internal format for encoding multiple character sets is a
 404    kind of multi-byte encoding, i.e. characters are encoded by
 405    variable-length sequences of one-byte codes.  ASCII characters
 406    and control characters (e.g. `tab', `newline') are represented by
 407    one-byte sequences which are their ASCII codes, in the range 0x00
 408    through 0x7F.  The other characters are represented by a sequence
 409    of `base leading-code', optional `extended leading-code', and one
 410    or two `position-code's.  The length of the sequence is determined
 411    by the base leading-code.  Leading-code takes the range 0x80
 412    through 0x9F, whereas extended leading-code and position-code take
 413    the range 0xA0 through 0xFF.  See `charset.h' for more details
 414    about leading-code and position-code.
 415
 416    There's one exception to this rule.  Special leading-code
 417    `leading-code-composition' denotes that the following several
 418    characters should be composed into one character.  Leading-codes of
 419    components (except for ASCII) are added 0x20.  An ASCII character
 420    component is represented by a 2-byte sequence of `0xA0' and
 421    `ASCII-code + 0x80'.  See also the comments in `charset.h' for the
 422    details of composite character.  Hence, we can summarize the code
 423    range as follows:
 424
 425    --- CODE RANGE of Emacs' internal format ---
 426    (character set)      (range)
 427    ASCII                0x00 .. 0x7F
 428    ELSE (1st byte)      0x80 .. 0x9F
 429         (rest bytes)    0xA0 .. 0xFF
 430    ---------------------------------------------
 431
 432   */
 433
 434 enum emacs_code_class_type emacs_code_class[256];
 435
 436 /* Go to the next statement only if *SRC is accessible and the code is
 437    greater than 0xA0.  */
 438 #define CHECK_CODE_RANGE_A0_FF  \
 439   do {                          \
 440     if (src >= src_end)         \
 441       goto label_end_of_switch; \
 442     else if (*src++ < 0xA0)     \
 443       return 0;                 \
 444   } while (0)
 445
 446 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 447    Check if a text is encoded in Emacs' internal format.  If it is,
 448    return CODING_CATEGORY_MASK_EMACS_MULE, else return 0.  */
 449
 450 int
 451 detect_coding_emacs_mule (src, src_end)
 452      unsigned char *src, *src_end;
 453 {
 454   unsigned char c;
 455   int composing = 0;
 456
 457   while (src < src_end)
 458     {
 459       c = *src++;
 460
 461       if (composing)
 462         {
 463           if (c < 0xA0)
 464             composing = 0;
 465           else
 466             c -= 0x20;
 467         }
 468
 469       switch (emacs_code_class[c])
 470         {
 471         case EMACS_ascii_code:
 472         case EMACS_linefeed_code:
 473           break;
 474
 475         case EMACS_control_code:
 476           if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
 477             return 0;
 478           break;
 479
 480         case EMACS_invalid_code:
 481           return 0;
 482
 483         case EMACS_leading_code_composition: /* c == 0x80 */
 484           if (composing)
 485             CHECK_CODE_RANGE_A0_FF;
 486           else
 487             composing = 1;
 488           break;
 489
 490         case EMACS_leading_code_4:
 491           CHECK_CODE_RANGE_A0_FF;
 492           /* fall down to check it two more times ...  */
 493
 494         case EMACS_leading_code_3:
 495           CHECK_CODE_RANGE_A0_FF;
 496           /* fall down to check it one more time ...  */
 497
 498         case EMACS_leading_code_2:
 499           CHECK_CODE_RANGE_A0_FF;
 500           break;
 501
 502         default:
 503         label_end_of_switch:
 504           break;
 505         }
 506     }
 507   return CODING_CATEGORY_MASK_EMACS_MULE;
 508 }
 509
 510 \f
 511 /*** 3. ISO2022 handlers ***/
 512
 513 /* The following note describes the coding system ISO2022 briefly.
 514    Since the intention of this note is to help in understanding of
 515    the programs in this file, some parts are NOT ACCURATE or OVERLY
 516    SIMPLIFIED.  For the thorough understanding, please refer to the
 517    original document of ISO2022.
 518
 519    ISO2022 provides many mechanisms to encode several character sets
 520    in 7-bit and 8-bit environment.  If one chooses 7-bite environment,
 521    all text is encoded by codes of less than 128.  This may make the
 522    encoded text a little bit longer, but the text gets more stability
 523    to pass through several gateways (some of them strip off the MSB).
 524
 525    There are two kinds of character set: control character set and
 526    graphic character set.  The former contains control characters such
 527    as `newline' and `escape' to provide control functions (control
 528    functions are provided also by escape sequences).  The latter
 529    contains graphic characters such as ' A' and '-'.  Emacs recognizes
 530    two control character sets and many graphic character sets.
 531
 532    Graphic character sets are classified into one of the following
 533    four classes, DIMENSION1_CHARS94, DIMENSION1_CHARS96,
 534    DIMENSION2_CHARS94, DIMENSION2_CHARS96 according to the number of
 535    bytes (DIMENSION) and the number of characters in one dimension
 536    (CHARS) of the set.  In addition, each character set is assigned an
 537    identification tag (called "final character" and denoted as <F>
 538    here after) which is unique in each class.  <F> of each character
 539    set is decided by ECMA(*) when it is registered in ISO.  Code range
 540    of <F> is 0x30..0x7F (0x30..0x3F are for private use only).
 541
 542    Note (*): ECMA = European Computer Manufacturers Association
 543
 544    Here are examples of graphic character set [NAME(<F>)]:
 545         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
 546         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
 547         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
 548         o DIMENSION2_CHARS96 -- none for the moment
 549
 550    A code area (1byte=8bits) is divided into 4 areas, C0, GL, C1, and GR.
 551         C0 [0x00..0x1F] -- control character plane 0
 552         GL [0x20..0x7F] -- graphic character plane 0
 553         C1 [0x80..0x9F] -- control character plane 1
 554         GR [0xA0..0xFF] -- graphic character plane 1
 555
 556    A control character set is directly designated and invoked to C0 or
 557    C1 by an escape sequence.  The most common case is that ISO646's
 558    control character set is designated/invoked to C0 and ISO6429's
 559    control character set is designated/invoked to C1, and usually
 560    these designations/invocations are omitted in a coded text.  With
 561    7-bit environment, only C0 can be used, and a control character for
 562    C1 is encoded by an appropriate escape sequence to fit in the
 563    environment.  All control characters for C1 are defined the
 564    corresponding escape sequences.
 565
 566    A graphic character set is at first designated to one of four
 567    graphic registers (G0 through G3), then these graphic registers are
 568    invoked to GL or GR.  These designations and invocations can be
 569    done independently.  The most common case is that G0 is invoked to
 570    GL, G1 is invoked to GR, and ASCII is designated to G0, and usually
 571    these invocations and designations are omitted in a coded text.
 572    With 7-bit environment, only GL can be used.
 573
 574    When a graphic character set of CHARS94 is invoked to GL, code 0x20
 575    and 0x7F of GL area work as control characters SPACE and DEL
 576    respectively, and code 0xA0 and 0xFF of GR area should not be used.
 577
 578    There are two ways of invocation: locking-shift and single-shift.
 579    With locking-shift, the invocation lasts until the next different
 580    invocation, whereas with single-shift, the invocation works only
 581    for the following character and doesn't affect locking-shift.
 582    Invocations are done by the following control characters or escape
 583    sequences.
 584
 585    ----------------------------------------------------------------------
 586    function             control char    escape sequence description
 587    ----------------------------------------------------------------------
 588    SI  (shift-in)               0x0F    none            invoke G0 to GL
 589    SO  (shift-out)              0x0E    none            invoke G1 to GL
 590    LS2 (locking-shift-2)        none    ESC 'n'         invoke G2 into GL
 591    LS3 (locking-shift-3)        none    ESC 'o'         invoke G3 into GL
 592    SS2 (single-shift-2)         0x8E    ESC 'N'         invoke G2 into GL
 593    SS3 (single-shift-3)         0x8F    ESC 'O'         invoke G3 into GL
 594    ----------------------------------------------------------------------
 595    The first four are for locking-shift.  Control characters for these
 596    functions are defined by macros ISO_CODE_XXX in `coding.h'.
 597
 598    Designations are done by the following escape sequences.
 599    ----------------------------------------------------------------------
 600    escape sequence      description
 601    ----------------------------------------------------------------------
 602    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
 603    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
 604    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
 605    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
 606    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
 607    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
 608    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
 609    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
 610    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
 611    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
 612    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
 613    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
 614    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
 615    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
 616    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
 617    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
 618    ----------------------------------------------------------------------
 619
 620    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
 621    of dimension 1, chars 94, and final character <F>, and etc.
 622
 623    Note (*): Although these designations are not allowed in ISO2022,
 624    Emacs accepts them on decoding, and produces them on encoding
 625    CHARS96 character set in a coding system which is characterized as
 626    7-bit environment, non-locking-shift, and non-single-shift.
 627
 628    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
 629    '(' can be omitted.  We call this as "short-form" here after.
 630
 631    Now you may notice that there are a lot of ways for encoding the
 632    same multilingual text in ISO2022.  Actually, there exists many
 633    coding systems such as Compound Text (used in X's inter client
 634    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
 635    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
 636    localized platforms), and all of these are variants of ISO2022.
 637
 638    In addition to the above, Emacs handles two more kinds of escape
 639    sequences: ISO6429's direction specification and Emacs' private
 640    sequence for specifying character composition.
 641
 642    ISO6429's direction specification takes the following format:
 643         o CSI ']'      -- end of the current direction
 644         o CSI '0' ']'  -- end of the current direction
 645         o CSI '1' ']'  -- start of left-to-right text
 646         o CSI '2' ']'  -- start of right-to-left text
 647    The control character CSI (0x9B: control sequence introducer) is
 648    abbreviated to the escape sequence ESC '[' in 7-bit environment.
 649
 650    Character composition specification takes the following format:
 651         o ESC '0' -- start character composition
 652         o ESC '1' -- end character composition
 653    Since these are not standard escape sequences of any ISO, the use
 654    of them for these meaning is restricted to Emacs only.  */
 655
 656 enum iso_code_class_type iso_code_class[256];
 657
 658 #define CHARSET_OK(idx, charset)                                \
 659   (coding_system_table[idx]                                     \
 660    && (coding_system_table[idx]->safe_charsets[charset]         \
 661        || (CODING_SPEC_ISO_REQUESTED_DESIGNATION                \
 662             (coding_system_table[idx], charset)                 \
 663            != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)))
 664
 665 #define SHIFT_OUT_OK(idx) \
 666   (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
 667
 668 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 669    Check if a text is encoded in ISO2022.  If it is, returns an
 670    integer in which appropriate flag bits any of:
 671         CODING_CATEGORY_MASK_ISO_7
 672         CODING_CATEGORY_MASK_ISO_7_TIGHT
 673         CODING_CATEGORY_MASK_ISO_8_1
 674         CODING_CATEGORY_MASK_ISO_8_2
 675         CODING_CATEGORY_MASK_ISO_7_ELSE
 676         CODING_CATEGORY_MASK_ISO_8_ELSE
 677    are set.  If a code which should never appear in ISO2022 is found,
 678    returns 0.  */
 679
 680 int
 681 detect_coding_iso2022 (src, src_end)
 682      unsigned char *src, *src_end;
 683 {
 684   int mask = CODING_CATEGORY_MASK_ISO;
 685   int mask_found = 0;
 686   int reg[4], shift_out = 0, single_shifting = 0;
 687   int c, c1, i, charset;
 688
 689   reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
 690   while (mask && src < src_end)
 691     {
 692       c = *src++;
 693       switch (c)
 694         {
 695         case ISO_CODE_ESC:
 696           single_shifting = 0;
 697           if (src >= src_end)
 698             break;
 699           c = *src++;
 700           if (c >= '(' && c <= '/')
 701             {
 702               /* Designation sequence for a charset of dimension 1.  */
 703               if (src >= src_end)
 704                 break;
 705               c1 = *src++;
 706               if (c1 < ' ' || c1 >= 0x80
 707                   || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
 708                 /* Invalid designation sequence.  Just ignore.  */
 709                 break;
 710               reg[(c - '(') % 4] = charset;
 711             }
 712           else if (c == '$')
 713             {
 714               /* Designation sequence for a charset of dimension 2.  */
 715               if (src >= src_end)
 716                 break;
 717               c = *src++;
 718               if (c >= '@' && c <= 'B')
 719                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
 720                 reg[0] = charset = iso_charset_table[1][0][c];
 721               else if (c >= '(' && c <= '/')
 722                 {
 723                   if (src >= src_end)
 724                     break;
 725                   c1 = *src++;
 726                   if (c1 < ' ' || c1 >= 0x80
 727                       || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
 728                     /* Invalid designation sequence.  Just ignore.  */
 729                     break;
 730                   reg[(c - '(') % 4] = charset;
 731                 }
 732               else
 733                 /* Invalid designation sequence.  Just ignore.  */
 734                 break;
 735             }
 736           else if (c == 'N' || c == 'O')
 737             {
 738               /* ESC <Fe> for SS2 or SS3.  */
 739               mask &= CODING_CATEGORY_MASK_ISO_7_ELSE;
 740               break;
 741             }
 742           else if (c == '0' || c == '1' || c == '2')
 743             /* ESC <Fp> for start/end composition.  Just ignore.  */
 744             break;
 745           else
 746             /* Invalid escape sequence.  Just ignore.  */
 747             break;
 748
 749           /* We found a valid designation sequence for CHARSET.  */
 750           mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
 751           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset))
 752             mask_found |= CODING_CATEGORY_MASK_ISO_7;
 753           else
 754             mask &= ~CODING_CATEGORY_MASK_ISO_7;
 755           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset))
 756             mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
 757           else
 758             mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
 759           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset))
 760             mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
 761           else
 762             mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
 763           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset))
 764             mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
 765           else
 766             mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
 767           break;
 768
 769         case ISO_CODE_SO:
 770           single_shifting = 0;
 771           if (shift_out == 0
 772               && (reg[1] >= 0
 773                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
 774                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
 775             {
 776               /* Locking shift out.  */
 777               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 778               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 779             }
 780           break;
 781
 782         case ISO_CODE_SI:
 783           single_shifting = 0;
 784           if (shift_out == 1)
 785             {
 786               /* Locking shift in.  */
 787               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 788               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 789             }
 790           break;
 791
 792         case ISO_CODE_CSI:
 793           single_shifting = 0;
 794         case ISO_CODE_SS2:
 795         case ISO_CODE_SS3:
 796           {
 797             int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
 798
 799             if (c != ISO_CODE_CSI)
 800               {
 801                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 802                     & CODING_FLAG_ISO_SINGLE_SHIFT)
 803                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 804                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 805                     & CODING_FLAG_ISO_SINGLE_SHIFT)
 806                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 807                 single_shifting = 1;
 808               }
 809             if (VECTORP (Vlatin_extra_code_table)
 810                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
 811               {
 812                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 813                     & CODING_FLAG_ISO_LATIN_EXTRA)
 814                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 815                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 816                     & CODING_FLAG_ISO_LATIN_EXTRA)
 817                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 818               }
 819             mask &= newmask;
 820             mask_found |= newmask;
 821           }
 822           break;
 823
 824         default:
 825           if (c < 0x80)
 826             {
 827               single_shifting = 0;
 828               break;
 829             }
 830           else if (c < 0xA0)
 831             {
 832               single_shifting = 0;
 833               if (VECTORP (Vlatin_extra_code_table)
 834                   && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
 835                 {
 836                   int newmask = 0;
 837
 838                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 839                       & CODING_FLAG_ISO_LATIN_EXTRA)
 840                     newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 841                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 842                       & CODING_FLAG_ISO_LATIN_EXTRA)
 843                     newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 844                   mask &= newmask;
 845                   mask_found |= newmask;
 846                 }
 847               else
 848                 return 0;
 849             }
 850           else
 851             {
 852               unsigned char *src_begin = src;
 853
 854               mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
 855                         | CODING_CATEGORY_MASK_ISO_7_ELSE);
 856               mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
 857               /* Check the length of succeeding codes of the range
 858                  0xA0..0FF.  If the byte length is odd, we exclude
 859                  CODING_CATEGORY_MASK_ISO_8_2.  We can check this only
 860                  when we are not single shifting.  */
 861               if (!single_shifting)
 862                 {
 863                   while (src < src_end && *src >= 0xA0)
 864                     src++;
 865                   if ((src - src_begin - 1) & 1 && src < src_end)
 866                     mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
 867                   else
 868                     mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
 869                 }
 870             }
 871           break;
 872         }
 873     }
 874
 875   return (mask & mask_found);
 876 }
 877
 878 /* Decode a character of which charset is CHARSET and the 1st position
 879    code is C1.  If dimension of CHARSET is 2, the 2nd position code is
 880    fetched from SRC and set to C2.  If CHARSET is negative, it means
 881    that we are decoding ill formed text, and what we can do is just to
 882    read C1 as is.  */
 883
 884 #define DECODE_ISO_CHARACTER(charset, c1)                               \
 885   do {                                                                  \
 886     int c_alt, charset_alt = (charset);                                 \
 887     if (COMPOSING_HEAD_P (coding->composing))                           \
 888       {                                                                 \
 889         *dst++ = LEADING_CODE_COMPOSITION;                              \
 890         if (COMPOSING_WITH_RULE_P (coding->composing))                  \
 891           /* To tell composition rules are embeded.  */                 \
 892           *dst++ = 0xFF;                                                \
 893         coding->composing += 2;                                         \
 894       }                                                                 \
 895     if (charset_alt >= 0)                                               \
 896       {                                                                 \
 897         if (CHARSET_DIMENSION (charset_alt) == 2)                       \
 898           {                                                             \
 899             ONE_MORE_BYTE (c2);                                         \
 900             if (iso_code_class[(c2) & 0x7F] != ISO_0x20_or_0x7F         \
 901                 && iso_code_class[(c2) & 0x7F] != ISO_graphic_plane_0)  \
 902               {                                                         \
 903                 src--;                                                  \
 904                 charset_alt = CHARSET_ASCII;                            \
 905               }                                                         \
 906           }                                                             \
 907         if (!NILP (translation_table)                                   \
 908             && ((c_alt = translate_char (translation_table,             \
 909                                          -1, charset_alt, c1, c2)) >= 0)) \
 910           SPLIT_CHAR (c_alt, charset_alt, c1, c2);                      \
 911       }                                                                 \
 912     if (charset_alt == CHARSET_ASCII || charset_alt < 0)                \
 913       DECODE_CHARACTER_ASCII (c1);                                      \
 914     else if (CHARSET_DIMENSION (charset_alt) == 1)                      \
 915       DECODE_CHARACTER_DIMENSION1 (charset_alt, c1);                    \
 916     else                                                                \
 917       DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2);                \
 918     if (COMPOSING_WITH_RULE_P (coding->composing))                      \
 919       /* To tell a composition rule follows.  */                        \
 920       coding->composing = COMPOSING_WITH_RULE_RULE;                     \
 921   } while (0)
 922
 923 /* Set designation state into CODING.  */
 924 #define DECODE_DESIGNATION(reg, dimension, chars, final_char)              \
 925   do {                                                                     \
 926     int charset = ISO_CHARSET_TABLE (make_number (dimension),              \
 927                                      make_number (chars),                  \
 928                                      make_number (final_char));            \
 929     if (charset >= 0                                                       \
 930         && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
 931             || coding->safe_charsets[charset]))                            \
 932       {                                                                    \
 933         if (coding->spec.iso2022.last_invalid_designation_register == 0    \
 934             && reg == 0                                                    \
 935             && charset == CHARSET_ASCII)                                   \
 936           {                                                                \
 937             /* We should insert this designation sequence as is so         \
 938                that it is surely written back to a file.  */               \
 939             coding->spec.iso2022.last_invalid_designation_register = -1;   \
 940             goto label_invalid_code;                                       \
 941           }                                                                \
 942         coding->spec.iso2022.last_invalid_designation_register = -1;       \
 943         if ((coding->mode & CODING_MODE_DIRECTION)                         \
 944             && CHARSET_REVERSE_CHARSET (charset) >= 0)                     \
 945           charset = CHARSET_REVERSE_CHARSET (charset);                     \
 946         CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;               \
 947       }                                                                    \
 948     else                                                                   \
 949       {                                                                    \
 950         coding->spec.iso2022.last_invalid_designation_register = reg;      \
 951         goto label_invalid_code;                                           \
 952       }                                                                    \
 953   } while (0)
 954
 955 /* Check if the current composing sequence contains only valid codes.
 956    If the composing sequence doesn't end before SRC_END, return -1.
 957    Else, if it contains only valid codes, return 0.
 958    Else return the length of the composing sequence.  */
 959
 960 int
 961 check_composing_code (coding, src, src_end)
 962      struct coding_system *coding;
 963      unsigned char *src, *src_end;
 964 {
 965   unsigned char *src_start = src;
 966   int invalid_code_found = 0;
 967   int charset, c, c1, dim;
 968
 969   while (src < src_end)
 970     {
 971       if (*src++ != ISO_CODE_ESC) continue;
 972       if (src >= src_end) break;
 973       if ((c = *src++) == '1') /* end of compsition */
 974         return (invalid_code_found ? src - src_start : 0);
 975       if (src + 2 >= src_end) break;
 976       if (!coding->flags & CODING_FLAG_ISO_DESIGNATION)
 977         invalid_code_found = 1;
 978       else
 979         {
 980           dim = 0;
 981           if (c == '$')
 982             {
 983               dim = 1;
 984               c = (*src >= '@' && *src <= 'B') ? '(' : *src++;
 985             }
 986           if (c >= '(' && c <= '/')
 987             {
 988               c1 = *src++;
 989               if ((c1 < ' ' || c1 >= 0x80)
 990                   || (charset = iso_charset_table[dim][c >= ','][c1]) < 0
 991                   || ! coding->safe_charsets[charset]
 992                   || (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
 993                       == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
 994                 invalid_code_found = 1;
 995             }
 996           else
 997             invalid_code_found = 1;
 998         }
 999     }
1000   return (invalid_code_found
1001           ? src - src_start
1002           : (coding->mode & CODING_MODE_LAST_BLOCK ? 0 : -1));
1003 }
1004
1005 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1006
1007 int
1008 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1009      struct coding_system *coding;
1010      unsigned char *source, *destination;
1011      int src_bytes, dst_bytes;
1012 {
1013   unsigned char *src = source;
1014   unsigned char *src_end = source + src_bytes;
1015   unsigned char *dst = destination;
1016   unsigned char *dst_end = destination + dst_bytes;
1017   /* Since the maximum bytes produced by each loop is 7, we subtract 6
1018      from DST_END to assure that overflow checking is necessary only
1019      at the head of loop.  */
1020   unsigned char *adjusted_dst_end = dst_end - 6;
1021   int charset;
1022   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
1023   int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1024   int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1025   Lisp_Object translation_table
1026     = coding->translation_table_for_decode;
1027   int result = CODING_FINISH_NORMAL;
1028
1029   if (!NILP (Venable_character_translation) && NILP (translation_table))
1030     translation_table = Vstandard_translation_table_for_decode;
1031
1032   coding->produced_char = 0;
1033   coding->fake_multibyte = 0;
1034   while (src < src_end && (dst_bytes
1035                            ? (dst < adjusted_dst_end)
1036                            : (dst < src - 6)))
1037     {
1038       /* SRC_BASE remembers the start position in source in each loop.
1039          The loop will be exited when there's not enough source text
1040          to analyze long escape sequence or 2-byte code (within macros
1041          ONE_MORE_BYTE or TWO_MORE_BYTES).  In that case, SRC is reset
1042          to SRC_BASE before exiting.  */
1043       unsigned char *src_base = src;
1044       int c1 = *src++, c2;
1045
1046       switch (iso_code_class [c1])
1047         {
1048         case ISO_0x20_or_0x7F:
1049           if (!coding->composing
1050               && (charset0 < 0 || CHARSET_CHARS (charset0) == 94))
1051             {
1052               /* This is SPACE or DEL.  */
1053               *dst++ = c1;
1054               coding->produced_char++;
1055               break;
1056             }
1057           /* This is a graphic character, we fall down ...  */
1058
1059         case ISO_graphic_plane_0:
1060           if (coding->composing == COMPOSING_WITH_RULE_RULE)
1061             {
1062               /* This is a composition rule.  */
1063               *dst++ = c1 | 0x80;
1064               coding->composing = COMPOSING_WITH_RULE_TAIL;
1065             }
1066           else
1067             DECODE_ISO_CHARACTER (charset0, c1);
1068           break;
1069
1070         case ISO_0xA0_or_0xFF:
1071           if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1072               || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1073             goto label_invalid_code;
1074           /* This is a graphic character, we fall down ... */
1075
1076         case ISO_graphic_plane_1:
1077           if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1078             goto label_invalid_code;
1079           else
1080             DECODE_ISO_CHARACTER (charset1, c1);
1081           break;
1082
1083         case ISO_control_code:
1084           /* All ISO2022 control characters in this class have the
1085              same representation in Emacs internal format.  */
1086           if (c1 == '\n'
1087               && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1088               && (coding->eol_type == CODING_EOL_CR
1089                   || coding->eol_type == CODING_EOL_CRLF))
1090             {
1091               result = CODING_FINISH_INCONSISTENT_EOL;
1092               goto label_end_of_loop_2;
1093             }
1094           *dst++ = c1;
1095           coding->produced_char++;
1096           break;
1097
1098         case ISO_carriage_return:
1099           if (coding->eol_type == CODING_EOL_CR)
1100             *dst++ = '\n';
1101           else if (coding->eol_type == CODING_EOL_CRLF)
1102             {
1103               ONE_MORE_BYTE (c1);
1104               if (c1 == ISO_CODE_LF)
1105                 *dst++ = '\n';
1106               else
1107                 {
1108                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1109                     {
1110                       result = CODING_FINISH_INCONSISTENT_EOL;
1111                       goto label_end_of_loop_2;
1112                     }
1113                   src--;
1114                   *dst++ = '\r';
1115                 }
1116             }
1117           else
1118             *dst++ = c1;
1119           coding->produced_char++;
1120           break;
1121
1122         case ISO_shift_out:
1123           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1124               || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1125             goto label_invalid_code;
1126           CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1127           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1128           break;
1129
1130         case ISO_shift_in:
1131           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1132             goto label_invalid_code;
1133           CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1134           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1135           break;
1136
1137         case ISO_single_shift_2_7:
1138         case ISO_single_shift_2:
1139           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1140             goto label_invalid_code;
1141           /* SS2 is handled as an escape sequence of ESC 'N' */
1142           c1 = 'N';
1143           goto label_escape_sequence;
1144
1145         case ISO_single_shift_3:
1146           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1147             goto label_invalid_code;
1148           /* SS2 is handled as an escape sequence of ESC 'O' */
1149           c1 = 'O';
1150           goto label_escape_sequence;
1151
1152         case ISO_control_sequence_introducer:
1153           /* CSI is handled as an escape sequence of ESC '[' ...  */
1154           c1 = '[';
1155           goto label_escape_sequence;
1156
1157         case ISO_escape:
1158           ONE_MORE_BYTE (c1);
1159         label_escape_sequence:
1160           /* Escape sequences handled by Emacs are invocation,
1161              designation, direction specification, and character
1162              composition specification.  */
1163           switch (c1)
1164             {
1165             case '&':           /* revision of following character set */
1166               ONE_MORE_BYTE (c1);
1167               if (!(c1 >= '@' && c1 <= '~'))
1168                 goto label_invalid_code;
1169               ONE_MORE_BYTE (c1);
1170               if (c1 != ISO_CODE_ESC)
1171                 goto label_invalid_code;
1172               ONE_MORE_BYTE (c1);
1173               goto label_escape_sequence;
1174
1175             case '$':           /* designation of 2-byte character set */
1176               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1177                 goto label_invalid_code;
1178               ONE_MORE_BYTE (c1);
1179               if (c1 >= '@' && c1 <= 'B')
1180                 {       /* designation of JISX0208.1978, GB2312.1980,
1181                                    or JISX0208.1980 */
1182                   DECODE_DESIGNATION (0, 2, 94, c1);
1183                 }
1184               else if (c1 >= 0x28 && c1 <= 0x2B)
1185                 {       /* designation of DIMENSION2_CHARS94 character set */
1186                   ONE_MORE_BYTE (c2);
1187                   DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
1188                 }
1189               else if (c1 >= 0x2C && c1 <= 0x2F)
1190                 {       /* designation of DIMENSION2_CHARS96 character set */
1191                   ONE_MORE_BYTE (c2);
1192                   DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
1193                 }
1194               else
1195                 goto label_invalid_code;
1196               break;
1197
1198             case 'n':           /* invocation of locking-shift-2 */
1199               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1200                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1201                 goto label_invalid_code;
1202               CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
1203               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1204               break;
1205
1206             case 'o':           /* invocation of locking-shift-3 */
1207               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1208                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1209                 goto label_invalid_code;
1210               CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
1211               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1212               break;
1213
1214             case 'N':           /* invocation of single-shift-2 */
1215               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1216                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1217                 goto label_invalid_code;
1218               ONE_MORE_BYTE (c1);
1219               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
1220               DECODE_ISO_CHARACTER (charset, c1);
1221               break;
1222
1223             case 'O':           /* invocation of single-shift-3 */
1224               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1225                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1226                 goto label_invalid_code;
1227               ONE_MORE_BYTE (c1);
1228               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
1229               DECODE_ISO_CHARACTER (charset, c1);
1230               break;
1231
1232             case '0': case '2': /* start composing */
1233               /* Before processing composing, we must be sure that all
1234                  characters being composed are supported by CODING.
1235                  If not, we must give up composing and insert the
1236                  bunch of codes for composing as is without decoding.  */
1237               {
1238                 int result1;
1239
1240                 result1 = check_composing_code (coding, src, src_end);
1241                 if (result1 == 0)
1242                   {
1243                     coding->composing = (c1 == '0'
1244                                          ? COMPOSING_NO_RULE_HEAD
1245                                          : COMPOSING_WITH_RULE_HEAD);
1246                     coding->produced_char++;
1247                   }
1248                 else if (result1 > 0)
1249                   {
1250                     if (result1 + 2 < (dst_bytes ? dst_end : src_base) - dst)
1251                       {
1252                         bcopy (src_base, dst, result1 + 2);
1253                         src += result1;
1254                         dst += result1 + 2;
1255                         coding->produced_char += result1 + 2;
1256                       }
1257                     else
1258                       {
1259                         result = CODING_FINISH_INSUFFICIENT_DST;
1260                         goto label_end_of_loop_2;
1261                       }
1262                   }
1263                 else
1264                   goto label_end_of_loop;
1265               }
1266               break;
1267
1268             case '1':           /* end composing */
1269               coding->composing = COMPOSING_NO;
1270               break;
1271
1272             case '[':           /* specification of direction */
1273               if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
1274                 goto label_invalid_code;
1275               /* For the moment, nested direction is not supported.
1276                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
1277                  left-to-right, and nozero means right-to-left.  */
1278               ONE_MORE_BYTE (c1);
1279               switch (c1)
1280                 {
1281                 case ']':       /* end of the current direction */
1282                   coding->mode &= ~CODING_MODE_DIRECTION;
1283
1284                 case '0':       /* end of the current direction */
1285                 case '1':       /* start of left-to-right direction */
1286                   ONE_MORE_BYTE (c1);
1287                   if (c1 == ']')
1288                     coding->mode &= ~CODING_MODE_DIRECTION;
1289                   else
1290                     goto label_invalid_code;
1291                   break;
1292
1293                 case '2':       /* start of right-to-left direction */
1294                   ONE_MORE_BYTE (c1);
1295                   if (c1 == ']')
1296                     coding->mode |= CODING_MODE_DIRECTION;
1297                   else
1298                     goto label_invalid_code;
1299                   break;
1300
1301                 default:
1302                   goto label_invalid_code;
1303                 }
1304               break;
1305
1306             default:
1307               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1308                 goto label_invalid_code;
1309               if (c1 >= 0x28 && c1 <= 0x2B)
1310                 {       /* designation of DIMENSION1_CHARS94 character set */
1311                   ONE_MORE_BYTE (c2);
1312                   DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
1313                 }
1314               else if (c1 >= 0x2C && c1 <= 0x2F)
1315                 {       /* designation of DIMENSION1_CHARS96 character set */
1316                   ONE_MORE_BYTE (c2);
1317                   DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
1318                 }
1319               else
1320                 {
1321                   goto label_invalid_code;
1322                 }
1323             }
1324           /* We must update these variables now.  */
1325           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1326           charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1327           break;
1328
1329         label_invalid_code:
1330           while (src_base < src)
1331             *dst++ = *src_base++;
1332           coding->fake_multibyte = 1;
1333         }
1334       continue;
1335
1336     label_end_of_loop:
1337       result = CODING_FINISH_INSUFFICIENT_SRC;
1338     label_end_of_loop_2:
1339       src = src_base;
1340       break;
1341     }
1342
1343   if (src < src_end)
1344     {
1345       if (result == CODING_FINISH_NORMAL)
1346         result = CODING_FINISH_INSUFFICIENT_DST;
1347       else if (result != CODING_FINISH_INCONSISTENT_EOL
1348                && coding->mode & CODING_MODE_LAST_BLOCK)
1349         {
1350           /* This is the last block of the text to be decoded.  We had
1351              better just flush out all remaining codes in the text
1352              although they are not valid characters.  */
1353           src_bytes = src_end - src;
1354           if (dst_bytes && (dst_end - dst < src_bytes))
1355             src_bytes = dst_end - dst;
1356           bcopy (src, dst, src_bytes);
1357           dst += src_bytes;
1358           src += src_bytes;
1359           coding->fake_multibyte = 1;
1360         }
1361     }
1362
1363   coding->consumed = coding->consumed_char = src - source;
1364   coding->produced = dst - destination;
1365   return result;
1366 }
1367
1368 /* ISO2022 encoding stuff.  */
1369
1370 /*
1371    It is not enough to say just "ISO2022" on encoding, we have to
1372    specify more details.  In Emacs, each coding system of ISO2022
1373    variant has the following specifications:
1374         1. Initial designation to G0 thru G3.
1375         2. Allows short-form designation?
1376         3. ASCII should be designated to G0 before control characters?
1377         4. ASCII should be designated to G0 at end of line?
1378         5. 7-bit environment or 8-bit environment?
1379         6. Use locking-shift?
1380         7. Use Single-shift?
1381    And the following two are only for Japanese:
1382         8. Use ASCII in place of JIS0201-1976-Roman?
1383         9. Use JISX0208-1983 in place of JISX0208-1978?
1384    These specifications are encoded in `coding->flags' as flag bits
1385    defined by macros CODING_FLAG_ISO_XXX.  See `coding.h' for more
1386    details.
1387 */
1388
1389 /* Produce codes (escape sequence) for designating CHARSET to graphic
1390    register REG.  If <final-char> of CHARSET is '@', 'A', or 'B' and
1391    the coding system CODING allows, produce designation sequence of
1392    short-form.  */
1393
1394 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
1395   do {                                                                  \
1396     unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset);        \
1397     char *intermediate_char_94 = "()*+";                                \
1398     char *intermediate_char_96 = ",-./";                                \
1399     int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset);    \
1400     if (revision < 255)                                                 \
1401       {                                                                 \
1402         *dst++ = ISO_CODE_ESC;                                          \
1403         *dst++ = '&';                                                   \
1404         *dst++ = '@' + revision;                                        \
1405       }                                                                 \
1406     *dst++ = ISO_CODE_ESC;                                              \
1407     if (CHARSET_DIMENSION (charset) == 1)                               \
1408       {                                                                 \
1409         if (CHARSET_CHARS (charset) == 94)                              \
1410           *dst++ = (unsigned char) (intermediate_char_94[reg]);         \
1411         else                                                            \
1412           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1413       }                                                                 \
1414     else                                                                \
1415       {                                                                 \
1416         *dst++ = '$';                                                   \
1417         if (CHARSET_CHARS (charset) == 94)                              \
1418           {                                                             \
1419             if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM)          \
1420                 || reg != 0                                             \
1421                 || final_char < '@' || final_char > 'B')                \
1422               *dst++ = (unsigned char) (intermediate_char_94[reg]);     \
1423           }                                                             \
1424         else                                                            \
1425           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1426       }                                                                 \
1427     *dst++ = final_char;                                                \
1428     CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;                \
1429   } while (0)
1430
1431 /* The following two macros produce codes (control character or escape
1432    sequence) for ISO2022 single-shift functions (single-shift-2 and
1433    single-shift-3).  */
1434
1435 #define ENCODE_SINGLE_SHIFT_2                           \
1436   do {                                                  \
1437     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1438       *dst++ = ISO_CODE_ESC, *dst++ = 'N';              \
1439     else                                                \
1440       {                                                 \
1441         *dst++ = ISO_CODE_SS2;                          \
1442         coding->fake_multibyte = 1;                     \
1443       }                                                 \
1444     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1445   } while (0)
1446
1447 #define ENCODE_SINGLE_SHIFT_3                           \
1448   do {                                                  \
1449     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1450       *dst++ = ISO_CODE_ESC, *dst++ = 'O';              \
1451     else                                                \
1452       {                                                 \
1453         *dst++ = ISO_CODE_SS3;                          \
1454         coding->fake_multibyte = 1;                     \
1455       }                                                 \
1456     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1457   } while (0)
1458
1459 /* The following four macros produce codes (control character or
1460    escape sequence) for ISO2022 locking-shift functions (shift-in,
1461    shift-out, locking-shift-2, and locking-shift-3).  */
1462
1463 #define ENCODE_SHIFT_IN                         \
1464   do {                                          \
1465     *dst++ = ISO_CODE_SI;                       \
1466     CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
1467   } while (0)
1468
1469 #define ENCODE_SHIFT_OUT                        \
1470   do {                                          \
1471     *dst++ = ISO_CODE_SO;                       \
1472     CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
1473   } while (0)
1474
1475 #define ENCODE_LOCKING_SHIFT_2                  \
1476   do {                                          \
1477     *dst++ = ISO_CODE_ESC, *dst++ = 'n';        \
1478     CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
1479   } while (0)
1480
1481 #define ENCODE_LOCKING_SHIFT_3                  \
1482   do {                                          \
1483     *dst++ = ISO_CODE_ESC, *dst++ = 'o';        \
1484     CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
1485   } while (0)
1486
1487 /* Produce codes for a DIMENSION1 character whose character set is
1488    CHARSET and whose position-code is C1.  Designation and invocation
1489    sequences are also produced in advance if necessary.  */
1490
1491
1492 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
1493   do {                                                                  \
1494     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1495       {                                                                 \
1496         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1497           *dst++ = c1 & 0x7F;                                           \
1498         else                                                            \
1499           *dst++ = c1 | 0x80;                                           \
1500         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1501         break;                                                          \
1502       }                                                                 \
1503     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1504       {                                                                 \
1505         *dst++ = c1 & 0x7F;                                             \
1506         break;                                                          \
1507       }                                                                 \
1508     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1509       {                                                                 \
1510         *dst++ = c1 | 0x80;                                             \
1511         break;                                                          \
1512       }                                                                 \
1513     else if (coding->flags & CODING_FLAG_ISO_SAFE                       \
1514              && !coding->safe_charsets[charset])                        \
1515       {                                                                 \
1516         /* We should not encode this character, instead produce one or  \
1517            two `?'s.  */                                                \
1518         *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;                 \
1519         if (CHARSET_WIDTH (charset) == 2)                               \
1520           *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;               \
1521         break;                                                          \
1522       }                                                                 \
1523     else                                                                \
1524       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1525          must invoke it, or, at first, designate it to some graphic     \
1526          register.  Then repeat the loop to actually produce the        \
1527          character.  */                                                 \
1528       dst = encode_invocation_designation (charset, coding, dst);       \
1529   } while (1)
1530
1531 /* Produce codes for a DIMENSION2 character whose character set is
1532    CHARSET and whose position-codes are C1 and C2.  Designation and
1533    invocation codes are also produced in advance if necessary.  */
1534
1535 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
1536   do {                                                                  \
1537     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1538       {                                                                 \
1539         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1540           *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F;                       \
1541         else                                                            \
1542           *dst++ = c1 | 0x80, *dst++ = c2 | 0x80;                       \
1543         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1544         break;                                                          \
1545       }                                                                 \
1546     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1547       {                                                                 \
1548         *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F;                          \
1549         break;                                                          \
1550       }                                                                 \
1551     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1552       {                                                                 \
1553         *dst++ = c1 | 0x80, *dst++= c2 | 0x80;                          \
1554         break;                                                          \
1555       }                                                                 \
1556     else if (coding->flags & CODING_FLAG_ISO_SAFE                       \
1557              && !coding->safe_charsets[charset])                        \
1558       {                                                                 \
1559         /* We should not encode this character, instead produce one or  \
1560            two `?'s.  */                                                \
1561         *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;                 \
1562         if (CHARSET_WIDTH (charset) == 2)                               \
1563           *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;               \
1564         break;                                                          \
1565       }                                                                 \
1566     else                                                                \
1567       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1568          must invoke it, or, at first, designate it to some graphic     \
1569          register.  Then repeat the loop to actually produce the        \
1570          character.  */                                                 \
1571       dst = encode_invocation_designation (charset, coding, dst);       \
1572   } while (1)
1573
1574 #define ENCODE_ISO_CHARACTER(charset, c1, c2)                   \
1575   do {                                                          \
1576     int c_alt, charset_alt;                                     \
1577     if (!NILP (translation_table)                               \
1578         && ((c_alt = translate_char (translation_table, -1,     \
1579                                      charset, c1, c2))          \
1580             >= 0))                                              \
1581       SPLIT_CHAR (c_alt, charset_alt, c1, c2);                  \
1582     else                                                        \
1583       charset_alt = charset;                                    \
1584     if (CHARSET_DIMENSION (charset_alt) == 1)                   \
1585       {                                                         \
1586         if (charset == CHARSET_ASCII                            \
1587             && coding->flags & CODING_FLAG_ISO_USE_ROMAN)       \
1588           charset_alt = charset_latin_jisx0201;                 \
1589         ENCODE_ISO_CHARACTER_DIMENSION1 (charset_alt, c1);      \
1590       }                                                         \
1591     else                                                        \
1592       {                                                         \
1593         if (charset == charset_jisx0208                         \
1594             && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)      \
1595           charset_alt = charset_jisx0208_1978;                  \
1596         ENCODE_ISO_CHARACTER_DIMENSION2 (charset_alt, c1, c2);  \
1597       }                                                         \
1598     if (! COMPOSING_P (coding->composing))                      \
1599       coding->consumed_char++;                                  \
1600   } while (0)
1601
1602 /* Produce designation and invocation codes at a place pointed by DST
1603    to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
1604    Return new DST.  */
1605
1606 unsigned char *
1607 encode_invocation_designation (charset, coding, dst)
1608      int charset;
1609      struct coding_system *coding;
1610      unsigned char *dst;
1611 {
1612   int reg;                      /* graphic register number */
1613
1614   /* At first, check designations.  */
1615   for (reg = 0; reg < 4; reg++)
1616     if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
1617       break;
1618
1619   if (reg >= 4)
1620     {
1621       /* CHARSET is not yet designated to any graphic registers.  */
1622       /* At first check the requested designation.  */
1623       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1624       if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
1625         /* Since CHARSET requests no special designation, designate it
1626            to graphic register 0.  */
1627         reg = 0;
1628
1629       ENCODE_DESIGNATION (charset, reg, coding);
1630     }
1631
1632   if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
1633       && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
1634     {
1635       /* Since the graphic register REG is not invoked to any graphic
1636          planes, invoke it to graphic plane 0.  */
1637       switch (reg)
1638         {
1639         case 0:                 /* graphic register 0 */
1640           ENCODE_SHIFT_IN;
1641           break;
1642
1643         case 1:                 /* graphic register 1 */
1644           ENCODE_SHIFT_OUT;
1645           break;
1646
1647         case 2:                 /* graphic register 2 */
1648           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1649             ENCODE_SINGLE_SHIFT_2;
1650           else
1651             ENCODE_LOCKING_SHIFT_2;
1652           break;
1653
1654         case 3:                 /* graphic register 3 */
1655           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1656             ENCODE_SINGLE_SHIFT_3;
1657           else
1658             ENCODE_LOCKING_SHIFT_3;
1659           break;
1660         }
1661     }
1662   return dst;
1663 }
1664
1665 /* The following two macros produce codes for indicating composition.  */
1666 #define ENCODE_COMPOSITION_NO_RULE_START  *dst++ = ISO_CODE_ESC, *dst++ = '0'
1667 #define ENCODE_COMPOSITION_WITH_RULE_START  *dst++ = ISO_CODE_ESC, *dst++ = '2'
1668 #define ENCODE_COMPOSITION_END    *dst++ = ISO_CODE_ESC, *dst++ = '1'
1669
1670 /* The following three macros produce codes for indicating direction
1671    of text.  */
1672 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER              \
1673   do {                                                  \
1674     if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS)    \
1675       *dst++ = ISO_CODE_ESC, *dst++ = '[';              \
1676     else                                                \
1677       *dst++ = ISO_CODE_CSI;                            \
1678   } while (0)
1679
1680 #define ENCODE_DIRECTION_R2L    \
1681   ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '2', *dst++ = ']'
1682
1683 #define ENCODE_DIRECTION_L2R    \
1684   ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '0', *dst++ = ']'
1685
1686 /* Produce codes for designation and invocation to reset the graphic
1687    planes and registers to initial state.  */
1688 #define ENCODE_RESET_PLANE_AND_REGISTER                                     \
1689   do {                                                                      \
1690     int reg;                                                                \
1691     if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                        \
1692       ENCODE_SHIFT_IN;                                                      \
1693     for (reg = 0; reg < 4; reg++)                                           \
1694       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0            \
1695           && (CODING_SPEC_ISO_DESIGNATION (coding, reg)                     \
1696               != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)))        \
1697         ENCODE_DESIGNATION                                                  \
1698           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
1699   } while (0)
1700
1701 /* Produce designation sequences of charsets in the line started from
1702    SRC to a place pointed by *DSTP, and update DSTP.
1703
1704    If the current block ends before any end-of-line, we may fail to
1705    find all the necessary designations.  */
1706
1707 void
1708 encode_designation_at_bol (coding, table, src, src_end, dstp)
1709      struct coding_system *coding;
1710      Lisp_Object table;
1711      unsigned char *src, *src_end, **dstp;
1712 {
1713   int charset, c, found = 0, reg;
1714   /* Table of charsets to be designated to each graphic register.  */
1715   int r[4];
1716   unsigned char *dst = *dstp;
1717
1718   for (reg = 0; reg < 4; reg++)
1719     r[reg] = -1;
1720
1721   while (src < src_end && *src != '\n' && found < 4)
1722     {
1723       int bytes = BYTES_BY_CHAR_HEAD (*src);
1724
1725       if (NILP (table))
1726         charset = CHARSET_AT (src);
1727       else
1728         {
1729           int c_alt;
1730           unsigned char c1, c2;
1731
1732           SPLIT_STRING(src, bytes, charset, c1, c2);
1733           if ((c_alt = translate_char (table, -1, charset, c1, c2)) >= 0)
1734             charset = CHAR_CHARSET (c_alt);
1735         }
1736
1737       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1738       if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
1739         {
1740           found++;
1741           r[reg] = charset;
1742         }
1743
1744       src += bytes;
1745     }
1746
1747   if (found)
1748     {
1749       for (reg = 0; reg < 4; reg++)
1750         if (r[reg] >= 0
1751             && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
1752           ENCODE_DESIGNATION (r[reg], reg, coding);
1753       *dstp = dst;
1754     }
1755 }
1756
1757 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
1758
1759 int
1760 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1761      struct coding_system *coding;
1762      unsigned char *source, *destination;
1763      int src_bytes, dst_bytes;
1764 {
1765   unsigned char *src = source;
1766   unsigned char *src_end = source + src_bytes;
1767   unsigned char *dst = destination;
1768   unsigned char *dst_end = destination + dst_bytes;
1769   /* Since the maximum bytes produced by each loop is 20, we subtract 19
1770      from DST_END to assure overflow checking is necessary only at the
1771      head of loop.  */
1772   unsigned char *adjusted_dst_end = dst_end - 19;
1773   Lisp_Object translation_table
1774       = coding->translation_table_for_encode;
1775   int result = CODING_FINISH_NORMAL;
1776
1777   if (!NILP (Venable_character_translation) && NILP (translation_table))
1778     translation_table = Vstandard_translation_table_for_encode;
1779
1780   coding->consumed_char = 0;
1781   coding->fake_multibyte = 0;
1782   while (src < src_end && (dst_bytes
1783                            ? (dst < adjusted_dst_end)
1784                            : (dst < src - 19)))
1785     {
1786       /* SRC_BASE remembers the start position in source in each loop.
1787          The loop will be exited when there's not enough source text
1788          to analyze multi-byte codes (within macros ONE_MORE_BYTE,
1789          TWO_MORE_BYTES, and THREE_MORE_BYTES).  In that case, SRC is
1790          reset to SRC_BASE before exiting.  */
1791       unsigned char *src_base = src;
1792       int charset, c1, c2, c3, c4;
1793
1794       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
1795           && CODING_SPEC_ISO_BOL (coding))
1796         {
1797           /* We have to produce designation sequences if any now.  */
1798           encode_designation_at_bol (coding, translation_table,
1799                                      src, src_end, &dst);
1800           CODING_SPEC_ISO_BOL (coding) = 0;
1801         }
1802
1803       c1 = *src++;
1804       /* If we are seeing a component of a composite character, we are
1805          seeing a leading-code encoded irregularly for composition, or
1806          a composition rule if composing with rule.  We must set C1 to
1807          a normal leading-code or an ASCII code.  If we are not seeing
1808          a composite character, we must reset composition,
1809          designation, and invocation states.  */
1810       if (COMPOSING_P (coding->composing))
1811         {
1812           if (c1 < 0xA0)
1813             {
1814               /* We are not in a composite character any longer.  */
1815               coding->composing = COMPOSING_NO;
1816               ENCODE_RESET_PLANE_AND_REGISTER;
1817               ENCODE_COMPOSITION_END;
1818             }
1819           else
1820             {
1821               if (coding->composing == COMPOSING_WITH_RULE_RULE)
1822                 {
1823                   *dst++ = c1 & 0x7F;
1824                   coding->composing = COMPOSING_WITH_RULE_HEAD;
1825                   continue;
1826                 }
1827               else if (coding->composing == COMPOSING_WITH_RULE_HEAD)
1828                 coding->composing = COMPOSING_WITH_RULE_RULE;
1829               if (c1 == 0xA0)
1830                 {
1831                   /* This is an ASCII component.  */
1832                   ONE_MORE_BYTE (c1);
1833                   c1 &= 0x7F;
1834                 }
1835               else
1836                 /* This is a leading-code of non ASCII component.  */
1837                 c1 -= 0x20;
1838             }
1839         }
1840
1841       /* Now encode one character.  C1 is a control character, an
1842          ASCII character, or a leading-code of multi-byte character.  */
1843       switch (emacs_code_class[c1])
1844         {
1845         case EMACS_ascii_code:
1846           ENCODE_ISO_CHARACTER (CHARSET_ASCII, c1, /* dummy */ c2);
1847           break;
1848
1849         case EMACS_control_code:
1850           if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
1851             ENCODE_RESET_PLANE_AND_REGISTER;
1852           *dst++ = c1;
1853           coding->consumed_char++;
1854           break;
1855
1856         case EMACS_carriage_return_code:
1857           if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
1858             {
1859               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
1860                 ENCODE_RESET_PLANE_AND_REGISTER;
1861               *dst++ = c1;
1862               coding->consumed_char++;
1863               break;
1864             }
1865           /* fall down to treat '\r' as '\n' ...  */
1866
1867         case EMACS_linefeed_code:
1868           if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
1869             ENCODE_RESET_PLANE_AND_REGISTER;
1870           if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
1871             bcopy (coding->spec.iso2022.initial_designation,
1872                    coding->spec.iso2022.current_designation,
1873                    sizeof coding->spec.iso2022.initial_designation);
1874           if (coding->eol_type == CODING_EOL_LF
1875               || coding->eol_type == CODING_EOL_UNDECIDED)
1876             *dst++ = ISO_CODE_LF;
1877           else if (coding->eol_type == CODING_EOL_CRLF)
1878             *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
1879           else
1880             *dst++ = ISO_CODE_CR;
1881           CODING_SPEC_ISO_BOL (coding) = 1;
1882           coding->consumed_char++;
1883           break;
1884
1885         case EMACS_leading_code_2:
1886           ONE_MORE_BYTE (c2);
1887           if (c2 < 0xA0)
1888             {
1889               /* invalid sequence */
1890               *dst++ = c1;
1891               src--;
1892               coding->consumed_char++;
1893             }
1894           else
1895             ENCODE_ISO_CHARACTER (c1, c2, /* dummy */ c3);
1896           break;
1897
1898         case EMACS_leading_code_3:
1899           TWO_MORE_BYTES (c2, c3);
1900           if (c2 < 0xA0 || c3 < 0xA0)
1901             {
1902               /* invalid sequence */
1903               *dst++ = c1;
1904               src -= 2;
1905               coding->consumed_char++;
1906             }
1907           else if (c1 < LEADING_CODE_PRIVATE_11)
1908             ENCODE_ISO_CHARACTER (c1, c2, c3);
1909           else
1910             ENCODE_ISO_CHARACTER (c2, c3, /* dummy */ c4);
1911           break;
1912
1913         case EMACS_leading_code_4:
1914           THREE_MORE_BYTES (c2, c3, c4);
1915           if (c2 < 0xA0 || c3 < 0xA0 || c4 < 0xA0)
1916             {
1917               /* invalid sequence */
1918               *dst++ = c1;
1919               src -= 3;
1920               coding->consumed_char++;
1921             }
1922           else
1923             ENCODE_ISO_CHARACTER (c2, c3, c4);
1924           break;
1925
1926         case EMACS_leading_code_composition:
1927           ONE_MORE_BYTE (c2);
1928           if (c2 < 0xA0)
1929             {
1930               /* invalid sequence */
1931               *dst++ = c1;
1932               src--;
1933               coding->consumed_char++;
1934             }
1935           else if (c2 == 0xFF)
1936             {
1937               ENCODE_RESET_PLANE_AND_REGISTER;
1938               coding->composing = COMPOSING_WITH_RULE_HEAD;
1939               ENCODE_COMPOSITION_WITH_RULE_START;
1940               coding->consumed_char++;
1941             }
1942           else
1943             {
1944               ENCODE_RESET_PLANE_AND_REGISTER;
1945               /* Rewind one byte because it is a character code of
1946                  composition elements.  */
1947               src--;
1948               coding->composing = COMPOSING_NO_RULE_HEAD;
1949               ENCODE_COMPOSITION_NO_RULE_START;
1950               coding->consumed_char++;
1951             }
1952           break;
1953
1954         case EMACS_invalid_code:
1955           *dst++ = c1;
1956           coding->consumed_char++;
1957           break;
1958         }
1959       continue;
1960     label_end_of_loop:
1961       result = CODING_FINISH_INSUFFICIENT_SRC;
1962       src = src_base;
1963       break;
1964     }
1965
1966   if (src < src_end && result == CODING_FINISH_NORMAL)
1967     result = CODING_FINISH_INSUFFICIENT_DST;
1968
1969   /* If this is the last block of the text to be encoded, we must
1970      reset graphic planes and registers to the initial state, and
1971      flush out the carryover if any.  */
1972   if (coding->mode & CODING_MODE_LAST_BLOCK)
1973     {
1974       ENCODE_RESET_PLANE_AND_REGISTER;
1975       if (COMPOSING_P (coding->composing))
1976         ENCODE_COMPOSITION_END;
1977     }
1978   coding->consumed = src - source;
1979   coding->produced = coding->produced_char = dst - destination;
1980   return result;
1981 }
1982
1983 \f
1984 /*** 4. SJIS and BIG5 handlers ***/
1985
1986 /* Although SJIS and BIG5 are not ISO's coding system, they are used
1987    quite widely.  So, for the moment, Emacs supports them in the bare
1988    C code.  But, in the future, they may be supported only by CCL.  */
1989
1990 /* SJIS is a coding system encoding three character sets: ASCII, right
1991    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
1992    as is.  A character of charset katakana-jisx0201 is encoded by
1993    "position-code + 0x80".  A character of charset japanese-jisx0208
1994    is encoded in 2-byte but two position-codes are divided and shifted
1995    so that it fit in the range below.
1996
1997    --- CODE RANGE of SJIS ---
1998    (character set)      (range)
1999    ASCII                0x00 .. 0x7F
2000    KATAKANA-JISX0201    0xA0 .. 0xDF
2001    JISX0208 (1st byte)  0x80 .. 0x9F and 0xE0 .. 0xEF
2002             (2nd byte)  0x40 .. 0xFF
2003    -------------------------------
2004
2005 */
2006
2007 /* BIG5 is a coding system encoding two character sets: ASCII and
2008    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
2009    character set and is encoded in two-byte.
2010
2011    --- CODE RANGE of BIG5 ---
2012    (character set)      (range)
2013    ASCII                0x00 .. 0x7F
2014    Big5 (1st byte)      0xA1 .. 0xFE
2015         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
2016    --------------------------
2017
2018    Since the number of characters in Big5 is larger than maximum
2019    characters in Emacs' charset (96x96), it can't be handled as one
2020    charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
2021    and `charset-big5-2'.  Both are DIMENSION2 and CHARS94.  The former
2022    contains frequently used characters and the latter contains less
2023    frequently used characters.  */
2024
2025 /* Macros to decode or encode a character of Big5 in BIG5.  B1 and B2
2026    are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2027    C1 and C2 are the 1st and 2nd position-codes of of Emacs' internal
2028    format.  CHARSET is `charset_big5_1' or `charset_big5_2'.  */
2029
2030 /* Number of Big5 characters which have the same code in 1st byte.  */
2031 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2032
2033 #define DECODE_BIG5(b1, b2, charset, c1, c2)                            \
2034   do {                                                                  \
2035     unsigned int temp                                                   \
2036       = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62);   \
2037     if (b1 < 0xC9)                                                      \
2038       charset = charset_big5_1;                                         \
2039     else                                                                \
2040       {                                                                 \
2041         charset = charset_big5_2;                                       \
2042         temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW;                          \
2043       }                                                                 \
2044     c1 = temp / (0xFF - 0xA1) + 0x21;                                   \
2045     c2 = temp % (0xFF - 0xA1) + 0x21;                                   \
2046   } while (0)
2047
2048 #define ENCODE_BIG5(charset, c1, c2, b1, b2)                            \
2049   do {                                                                  \
2050     unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21);      \
2051     if (charset == charset_big5_2)                                      \
2052       temp += BIG5_SAME_ROW * (0xC9 - 0xA1);                            \
2053     b1 = temp / BIG5_SAME_ROW + 0xA1;                                   \
2054     b2 = temp % BIG5_SAME_ROW;                                          \
2055     b2 += b2 < 0x3F ? 0x40 : 0x62;                                      \
2056   } while (0)
2057
2058 #define DECODE_SJIS_BIG5_CHARACTER(charset, c1, c2)                     \
2059   do {                                                                  \
2060     int c_alt, charset_alt = (charset);                                 \
2061     if (!NILP (translation_table)                                       \
2062         && ((c_alt = translate_char (translation_table,                 \
2063                                      -1, (charset), c1, c2)) >= 0))     \
2064           SPLIT_CHAR (c_alt, charset_alt, c1, c2);                      \
2065     if (charset_alt == CHARSET_ASCII || charset_alt < 0)                \
2066       DECODE_CHARACTER_ASCII (c1);                                      \
2067     else if (CHARSET_DIMENSION (charset_alt) == 1)                      \
2068       DECODE_CHARACTER_DIMENSION1 (charset_alt, c1);                    \
2069     else                                                                \
2070       DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2);                \
2071   } while (0)
2072
2073 #define ENCODE_SJIS_BIG5_CHARACTER(charset, c1, c2)             \
2074   do {                                                          \
2075     int c_alt, charset_alt;                                     \
2076     if (!NILP (translation_table)                               \
2077         && ((c_alt = translate_char (translation_table, -1,     \
2078                                      charset, c1, c2))          \
2079             >= 0))                                              \
2080       SPLIT_CHAR (c_alt, charset_alt, c1, c2);                  \
2081     else                                                        \
2082       charset_alt = charset;                                    \
2083     if (charset_alt == charset_ascii)                           \
2084       *dst++ = c1;                                              \
2085     else if (CHARSET_DIMENSION (charset_alt) == 1)              \
2086       {                                                         \
2087         if (sjis_p && charset_alt == charset_katakana_jisx0201) \
2088           *dst++ = c1;                                          \
2089         else                                                    \
2090           {                                                     \
2091             *dst++ = charset_alt, *dst++ = c1;                  \
2092             coding->fake_multibyte = 1;                         \
2093           }                                                     \
2094       }                                                         \
2095     else                                                        \
2096       {                                                         \
2097         c1 &= 0x7F, c2 &= 0x7F;                                 \
2098         if (sjis_p && charset_alt == charset_jisx0208)          \
2099           {                                                     \
2100             unsigned char s1, s2;                               \
2101                                                                 \
2102             ENCODE_SJIS (c1, c2, s1, s2);                       \
2103             *dst++ = s1, *dst++ = s2;                           \
2104             coding->fake_multibyte = 1;                         \
2105           }                                                     \
2106         else if (!sjis_p                                        \
2107                  && (charset_alt == charset_big5_1              \
2108                      || charset_alt == charset_big5_2))         \
2109           {                                                     \
2110             unsigned char b1, b2;                               \
2111                                                                 \
2112             ENCODE_BIG5 (charset_alt, c1, c2, b1, b2);          \
2113             *dst++ = b1, *dst++ = b2;                           \
2114           }                                                     \
2115         else                                                    \
2116           {                                                     \
2117             *dst++ = charset_alt, *dst++ = c1, *dst++ = c2;     \
2118             coding->fake_multibyte = 1;                         \
2119           }                                                     \
2120       }                                                         \
2121     coding->consumed_char++;                                    \
2122   } while (0);
2123
2124 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2125    Check if a text is encoded in SJIS.  If it is, return
2126    CODING_CATEGORY_MASK_SJIS, else return 0.  */
2127
2128 int
2129 detect_coding_sjis (src, src_end)
2130      unsigned char *src, *src_end;
2131 {
2132   unsigned char c;
2133
2134   while (src < src_end)
2135     {
2136       c = *src++;
2137       if ((c >= 0x80 && c < 0xA0) || c >= 0xE0)
2138         {
2139           if (src < src_end && *src++ < 0x40)
2140             return 0;
2141         }
2142     }
2143   return CODING_CATEGORY_MASK_SJIS;
2144 }
2145
2146 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2147    Check if a text is encoded in BIG5.  If it is, return
2148    CODING_CATEGORY_MASK_BIG5, else return 0.  */
2149
2150 int
2151 detect_coding_big5 (src, src_end)
2152      unsigned char *src, *src_end;
2153 {
2154   unsigned char c;
2155
2156   while (src < src_end)
2157     {
2158       c = *src++;
2159       if (c >= 0xA1)
2160         {
2161           if (src >= src_end)
2162             break;
2163           c = *src++;
2164           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
2165             return 0;
2166         }
2167     }
2168   return CODING_CATEGORY_MASK_BIG5;
2169 }
2170
2171 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2172    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
2173
2174 int
2175 decode_coding_sjis_big5 (coding, source, destination,
2176                          src_bytes, dst_bytes, sjis_p)
2177      struct coding_system *coding;
2178      unsigned char *source, *destination;
2179      int src_bytes, dst_bytes;
2180      int sjis_p;
2181 {
2182   unsigned char *src = source;
2183   unsigned char *src_end = source + src_bytes;
2184   unsigned char *dst = destination;
2185   unsigned char *dst_end = destination + dst_bytes;
2186   /* Since the maximum bytes produced by each loop is 4, we subtract 3
2187      from DST_END to assure overflow checking is necessary only at the
2188      head of loop.  */
2189   unsigned char *adjusted_dst_end = dst_end - 3;
2190   Lisp_Object translation_table
2191       = coding->translation_table_for_decode;
2192   int result = CODING_FINISH_NORMAL;
2193
2194   if (!NILP (Venable_character_translation) && NILP (translation_table))
2195     translation_table = Vstandard_translation_table_for_decode;
2196
2197   coding->produced_char = 0;
2198   coding->fake_multibyte = 0;
2199   while (src < src_end && (dst_bytes
2200                            ? (dst < adjusted_dst_end)
2201                            : (dst < src - 3)))
2202     {
2203       /* SRC_BASE remembers the start position in source in each loop.
2204          The loop will be exited when there's not enough source text
2205          to analyze two-byte character (within macro ONE_MORE_BYTE).
2206          In that case, SRC is reset to SRC_BASE before exiting.  */
2207       unsigned char *src_base = src;
2208       unsigned char c1 = *src++, c2, c3, c4;
2209
2210       if (c1 < 0x20)
2211         {
2212           if (c1 == '\r')
2213             {
2214               if (coding->eol_type == CODING_EOL_CRLF)
2215                 {
2216                   ONE_MORE_BYTE (c2);
2217                   if (c2 == '\n')
2218                     *dst++ = c2;
2219                   else if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2220                     {
2221                       result = CODING_FINISH_INCONSISTENT_EOL;
2222                       goto label_end_of_loop_2;
2223                     }
2224                   else
2225                     /* To process C2 again, SRC is subtracted by 1.  */
2226                     *dst++ = c1, src--;
2227                 }
2228               else if (coding->eol_type == CODING_EOL_CR)
2229                 *dst++ = '\n';
2230               else
2231                 *dst++ = c1;
2232             }
2233           else if (c1 == '\n'
2234                    && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2235                    && (coding->eol_type == CODING_EOL_CR
2236                        || coding->eol_type == CODING_EOL_CRLF))
2237             {
2238               result = CODING_FINISH_INCONSISTENT_EOL;
2239               goto label_end_of_loop_2;
2240             }
2241           else
2242             *dst++ = c1;
2243           coding->produced_char++;
2244         }
2245       else if (c1 < 0x80)
2246         DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
2247       else
2248         {
2249           if (sjis_p)
2250             {
2251               if (c1 < 0xA0 || (c1 >= 0xE0 && c1 < 0xF0))
2252                 {
2253                   /* SJIS -> JISX0208 */
2254                   ONE_MORE_BYTE (c2);
2255                   if (c2 >= 0x40)
2256                     {
2257                       DECODE_SJIS (c1, c2, c3, c4);
2258                       DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
2259                     }
2260                   else
2261                     goto label_invalid_code_2;
2262                 }
2263               else if (c1 < 0xE0)
2264                 /* SJIS -> JISX0201-Kana */
2265                 DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1,
2266                                             /* dummy */ c2);
2267               else
2268                 goto label_invalid_code_1;
2269             }
2270           else
2271             {
2272               /* BIG5 -> Big5 */
2273               if (c1 >= 0xA1 && c1 <= 0xFE)
2274                 {
2275                   ONE_MORE_BYTE (c2);
2276                   if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE))
2277                     {
2278                       int charset;
2279
2280                       DECODE_BIG5 (c1, c2, charset, c3, c4);
2281                       DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
2282                     }
2283                   else
2284                     goto label_invalid_code_2;
2285                 }
2286               else
2287                 goto label_invalid_code_1;
2288             }
2289         }
2290       continue;
2291
2292     label_invalid_code_1:
2293       *dst++ = c1;
2294       coding->produced_char++;
2295       coding->fake_multibyte = 1;
2296       continue;
2297
2298     label_invalid_code_2:
2299       *dst++ = c1; *dst++= c2;
2300       coding->produced_char += 2;
2301       coding->fake_multibyte = 1;
2302       continue;
2303
2304     label_end_of_loop:
2305       result = CODING_FINISH_INSUFFICIENT_SRC;
2306     label_end_of_loop_2:
2307       src = src_base;
2308       break;
2309     }
2310
2311   if (src < src_end)
2312     {
2313       if (result == CODING_FINISH_NORMAL)
2314         result = CODING_FINISH_INSUFFICIENT_DST;
2315       else if (result != CODING_FINISH_INCONSISTENT_EOL
2316                && coding->mode & CODING_MODE_LAST_BLOCK)
2317         {
2318           src_bytes = src_end - src;
2319           if (dst_bytes && (dst_end - dst < src_bytes))
2320             src_bytes = dst_end - dst;
2321           bcopy (dst, src, src_bytes);
2322           src += src_bytes;
2323           dst += src_bytes;
2324           coding->fake_multibyte = 1;
2325         }
2326     }
2327
2328   coding->consumed = coding->consumed_char = src - source;
2329   coding->produced = dst - destination;
2330   return result;
2331 }
2332
2333 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
2334    This function can encode `charset_ascii', `charset_katakana_jisx0201',
2335    `charset_jisx0208', `charset_big5_1', and `charset_big5-2'.  We are
2336    sure that all these charsets are registered as official charset
2337    (i.e. do not have extended leading-codes).  Characters of other
2338    charsets are produced without any encoding.  If SJIS_P is 1, encode
2339    SJIS text, else encode BIG5 text.  */
2340
2341 int
2342 encode_coding_sjis_big5 (coding, source, destination,
2343                          src_bytes, dst_bytes, sjis_p)
2344      struct coding_system *coding;
2345      unsigned char *source, *destination;
2346      int src_bytes, dst_bytes;
2347      int sjis_p;
2348 {
2349   unsigned char *src = source;
2350   unsigned char *src_end = source + src_bytes;
2351   unsigned char *dst = destination;
2352   unsigned char *dst_end = destination + dst_bytes;
2353   /* Since the maximum bytes produced by each loop is 2, we subtract 1
2354      from DST_END to assure overflow checking is necessary only at the
2355      head of loop.  */
2356   unsigned char *adjusted_dst_end = dst_end - 1;
2357   Lisp_Object translation_table
2358       = coding->translation_table_for_encode;
2359   int result = CODING_FINISH_NORMAL;
2360
2361   if (!NILP (Venable_character_translation) && NILP (translation_table))
2362     translation_table = Vstandard_translation_table_for_encode;
2363
2364   coding->consumed_char = 0;
2365   coding->fake_multibyte = 0;
2366   while (src < src_end && (dst_bytes
2367                            ? (dst < adjusted_dst_end)
2368                            : (dst < src - 1)))
2369     {
2370       /* SRC_BASE remembers the start position in source in each loop.
2371          The loop will be exited when there's not enough source text
2372          to analyze multi-byte codes (within macros ONE_MORE_BYTE and
2373          TWO_MORE_BYTES).  In that case, SRC is reset to SRC_BASE
2374          before exiting.  */
2375       unsigned char *src_base = src;
2376       unsigned char c1 = *src++, c2, c3, c4;
2377
2378       if (coding->composing)
2379         {
2380           if (c1 == 0xA0)
2381             {
2382               ONE_MORE_BYTE (c1);
2383               c1 &= 0x7F;
2384             }
2385           else if (c1 >= 0xA0)
2386             c1 -= 0x20;
2387           else
2388             coding->composing = 0;
2389         }
2390
2391       switch (emacs_code_class[c1])
2392         {
2393         case EMACS_ascii_code:
2394           ENCODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
2395           break;
2396
2397         case EMACS_control_code:
2398           *dst++ = c1;
2399           coding->consumed_char++;
2400           break;
2401
2402         case EMACS_carriage_return_code:
2403           if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2404             {
2405               *dst++ = c1;
2406               coding->consumed_char++;
2407               break;
2408             }
2409           /* fall down to treat '\r' as '\n' ...  */
2410
2411         case EMACS_linefeed_code:
2412           if (coding->eol_type == CODING_EOL_LF
2413               || coding->eol_type == CODING_EOL_UNDECIDED)
2414             *dst++ = '\n';
2415           else if (coding->eol_type == CODING_EOL_CRLF)
2416             *dst++ = '\r', *dst++ = '\n';
2417           else
2418             *dst++ = '\r';
2419           coding->consumed_char++;
2420           break;
2421
2422         case EMACS_leading_code_2:
2423           ONE_MORE_BYTE (c2);
2424           ENCODE_SJIS_BIG5_CHARACTER (c1, c2, /* dummy */ c3);
2425           break;
2426
2427         case EMACS_leading_code_3:
2428           TWO_MORE_BYTES (c2, c3);
2429           ENCODE_SJIS_BIG5_CHARACTER (c1, c2, c3);
2430           break;
2431
2432         case EMACS_leading_code_4:
2433           THREE_MORE_BYTES (c2, c3, c4);
2434           ENCODE_SJIS_BIG5_CHARACTER (c2, c3, c4);
2435           break;
2436
2437         case EMACS_leading_code_composition:
2438           coding->composing = 1;
2439           break;
2440
2441         default:                /* i.e. case EMACS_invalid_code: */
2442           *dst++ = c1;
2443           coding->consumed_char++;
2444         }
2445       continue;
2446
2447     label_end_of_loop:
2448       result = CODING_FINISH_INSUFFICIENT_SRC;
2449       src = src_base;
2450       break;
2451     }
2452
2453   if (result == CODING_FINISH_NORMAL
2454       && src < src_end)
2455     result = CODING_FINISH_INSUFFICIENT_DST;
2456   coding->consumed = src - source;
2457   coding->produced = coding->produced_char = dst - destination;
2458   return result;
2459 }
2460
2461 \f
2462 /*** 5. CCL handlers ***/
2463
2464 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2465    Check if a text is encoded in a coding system of which
2466    encoder/decoder are written in CCL program.  If it is, return
2467    CODING_CATEGORY_MASK_CCL, else return 0.  */
2468
2469 int
2470 detect_coding_ccl (src, src_end)
2471      unsigned char *src, *src_end;
2472 {
2473   unsigned char *valid;
2474
2475   /* No coding system is assigned to coding-category-ccl.  */
2476   if (!coding_system_table[CODING_CATEGORY_IDX_CCL])
2477     return 0;
2478
2479   valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
2480   while (src < src_end)
2481     {
2482       if (! valid[*src]) return 0;
2483       src++;
2484     }
2485   return CODING_CATEGORY_MASK_CCL;
2486 }
2487
2488 \f
2489 /*** 6. End-of-line handlers ***/
2490
2491 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2492    This function is called only when `coding->eol_type' is
2493    CODING_EOL_CRLF or CODING_EOL_CR.  */
2494
2495 int
2496 decode_eol (coding, source, destination, src_bytes, dst_bytes)
2497      struct coding_system *coding;
2498      unsigned char *source, *destination;
2499      int src_bytes, dst_bytes;
2500 {
2501   unsigned char *src = source;
2502   unsigned char *src_end = source + src_bytes;
2503   unsigned char *dst = destination;
2504   unsigned char *dst_end = destination + dst_bytes;
2505   unsigned char c;
2506   int result = CODING_FINISH_NORMAL;
2507
2508   coding->fake_multibyte = 0;
2509
2510   if (src_bytes <= 0)
2511     return result;
2512
2513   switch (coding->eol_type)
2514     {
2515     case CODING_EOL_CRLF:
2516       {
2517         /* Since the maximum bytes produced by each loop is 2, we
2518            subtract 1 from DST_END to assure overflow checking is
2519            necessary only at the head of loop.  */
2520         unsigned char *adjusted_dst_end = dst_end - 1;
2521
2522         while (src < src_end && (dst_bytes
2523                                  ? (dst < adjusted_dst_end)
2524                                  : (dst < src - 1)))
2525           {
2526             unsigned char *src_base = src;
2527
2528             c = *src++;
2529             if (c == '\r')
2530               {
2531                 ONE_MORE_BYTE (c);
2532                 if (c != '\n')
2533                   {
2534                     if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2535                       {
2536                         result = CODING_FINISH_INCONSISTENT_EOL;
2537                         goto label_end_of_loop_2;
2538                       }
2539                     *dst++ = '\r';
2540                     if (BASE_LEADING_CODE_P (c))
2541                       coding->fake_multibyte = 1;
2542                   }
2543                 *dst++ = c;
2544               }
2545             else if (c == '\n'
2546                      && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
2547               {
2548                 result = CODING_FINISH_INCONSISTENT_EOL;
2549                 goto label_end_of_loop_2;
2550               }
2551             else
2552               {
2553                 *dst++ = c;
2554                 if (BASE_LEADING_CODE_P (c))
2555                   coding->fake_multibyte = 1;
2556               }
2557             continue;
2558
2559           label_end_of_loop:
2560             result = CODING_FINISH_INSUFFICIENT_SRC;
2561           label_end_of_loop_2:
2562             src = src_base;
2563             break;
2564           }
2565         if (result == CODING_FINISH_NORMAL
2566             && src < src_end)
2567           result = CODING_FINISH_INSUFFICIENT_DST;
2568       }
2569       break;
2570
2571     case CODING_EOL_CR:
2572       if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2573         {
2574           while (src < src_end)
2575             {
2576               if ((c = *src++) == '\n')
2577                 break;
2578               if (BASE_LEADING_CODE_P (c))
2579                 coding->fake_multibyte = 1;
2580             }
2581           if (*--src == '\n')
2582             {
2583               src_bytes = src - source;
2584               result = CODING_FINISH_INCONSISTENT_EOL;
2585             }
2586         }
2587       if (dst_bytes && src_bytes > dst_bytes)
2588         {
2589           result = CODING_FINISH_INSUFFICIENT_DST;
2590           src_bytes = dst_bytes;
2591         }
2592       if (dst_bytes)
2593         bcopy (source, destination, src_bytes);
2594       else
2595         safe_bcopy (source, destination, src_bytes);
2596       src = source + src_bytes;
2597       while (src_bytes--) if (*dst++ == '\r') dst[-1] = '\n';
2598       break;
2599
2600     default:                    /* i.e. case: CODING_EOL_LF */
2601       if (dst_bytes && src_bytes > dst_bytes)
2602         {
2603           result = CODING_FINISH_INSUFFICIENT_DST;
2604           src_bytes = dst_bytes;
2605         }
2606       if (dst_bytes)
2607         bcopy (source, destination, src_bytes);
2608       else
2609         safe_bcopy (source, destination, src_bytes);
2610       src += src_bytes;
2611       dst += src_bytes;
2612       coding->fake_multibyte = 1;
2613       break;
2614     }
2615
2616   coding->consumed = coding->consumed_char = src - source;
2617   coding->produced = coding->produced_char = dst - destination;
2618   return result;
2619 }
2620
2621 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  Encode
2622    format of end-of-line according to `coding->eol_type'.  If
2623    `coding->mode & CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code
2624    '\r' in source text also means end-of-line.  */
2625
2626 int
2627 encode_eol (coding, source, destination, src_bytes, dst_bytes)
2628      struct coding_system *coding;
2629      unsigned char *source, *destination;
2630      int src_bytes, dst_bytes;
2631 {
2632   unsigned char *src = source;
2633   unsigned char *dst = destination;
2634   int result = CODING_FINISH_NORMAL;
2635
2636   coding->fake_multibyte = 0;
2637
2638   if (coding->eol_type == CODING_EOL_CRLF)
2639     {
2640       unsigned char c;
2641       unsigned char *src_end = source + src_bytes;
2642       unsigned char *dst_end = destination + dst_bytes;
2643       /* Since the maximum bytes produced by each loop is 2, we
2644          subtract 1 from DST_END to assure overflow checking is
2645          necessary only at the head of loop.  */
2646       unsigned char *adjusted_dst_end = dst_end - 1;
2647
2648       while (src < src_end && (dst_bytes
2649                                ? (dst < adjusted_dst_end)
2650                                : (dst < src - 1)))
2651         {
2652           c = *src++;
2653           if (c == '\n'
2654               || (c == '\r' && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)))
2655             *dst++ = '\r', *dst++ = '\n';
2656           else
2657             {
2658               *dst++ = c;
2659               if (BASE_LEADING_CODE_P (c))
2660                 coding->fake_multibyte = 1;
2661             }
2662         }
2663       if (src < src_end)
2664         result = CODING_FINISH_INSUFFICIENT_DST;
2665     }
2666   else
2667     {
2668       unsigned char c;
2669
2670       if (dst_bytes && src_bytes > dst_bytes)
2671         {
2672           src_bytes = dst_bytes;
2673           result = CODING_FINISH_INSUFFICIENT_DST;
2674         }
2675       if (dst_bytes)
2676         bcopy (source, destination, src_bytes);
2677       else
2678         safe_bcopy (source, destination, src_bytes);
2679       dst_bytes = src_bytes;
2680       if (coding->eol_type == CODING_EOL_CR)
2681         {
2682           while (src_bytes--)
2683             {
2684               if ((c = *dst++) == '\n')
2685                 dst[-1] = '\r';
2686               else if (BASE_LEADING_CODE_P (c))
2687                 coding->fake_multibyte = 1;
2688             }
2689         }
2690       else
2691         {
2692           if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
2693             {
2694               while (src_bytes--)
2695                 if (*dst++ == '\r') dst[-1] = '\n';
2696             }
2697           coding->fake_multibyte = 1;
2698         }
2699       src = source + dst_bytes;
2700       dst = destination + dst_bytes;
2701     }
2702
2703   coding->consumed = coding->consumed_char = src - source;
2704   coding->produced = coding->produced_char = dst - destination;
2705   return result;
2706 }
2707
2708 \f
2709 /*** 7. C library functions ***/
2710
2711 /* In Emacs Lisp, coding system is represented by a Lisp symbol which
2712    has a property `coding-system'.  The value of this property is a
2713    vector of length 5 (called as coding-vector).  Among elements of
2714    this vector, the first (element[0]) and the fifth (element[4])
2715    carry important information for decoding/encoding.  Before
2716    decoding/encoding, this information should be set in fields of a
2717    structure of type `coding_system'.
2718
2719    A value of property `coding-system' can be a symbol of another
2720    subsidiary coding-system.  In that case, Emacs gets coding-vector
2721    from that symbol.
2722
2723    `element[0]' contains information to be set in `coding->type'.  The
2724    value and its meaning is as follows:
2725
2726    0 -- coding_type_emacs_mule
2727    1 -- coding_type_sjis
2728    2 -- coding_type_iso2022
2729    3 -- coding_type_big5
2730    4 -- coding_type_ccl encoder/decoder written in CCL
2731    nil -- coding_type_no_conversion
2732    t -- coding_type_undecided (automatic conversion on decoding,
2733                                no-conversion on encoding)
2734
2735    `element[4]' contains information to be set in `coding->flags' and
2736    `coding->spec'.  The meaning varies by `coding->type'.
2737
2738    If `coding->type' is `coding_type_iso2022', element[4] is a vector
2739    of length 32 (of which the first 13 sub-elements are used now).
2740    Meanings of these sub-elements are:
2741
2742    sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
2743         If the value is an integer of valid charset, the charset is
2744         assumed to be designated to graphic register N initially.
2745
2746         If the value is minus, it is a minus value of charset which
2747         reserves graphic register N, which means that the charset is
2748         not designated initially but should be designated to graphic
2749         register N just before encoding a character in that charset.
2750
2751         If the value is nil, graphic register N is never used on
2752         encoding.
2753
2754    sub-element[N] where N is 4 through 11: to be set in `coding->flags'
2755         Each value takes t or nil.  See the section ISO2022 of
2756         `coding.h' for more information.
2757
2758    If `coding->type' is `coding_type_big5', element[4] is t to denote
2759    BIG5-ETen or nil to denote BIG5-HKU.
2760
2761    If `coding->type' takes the other value, element[4] is ignored.
2762
2763    Emacs Lisp's coding system also carries information about format of
2764    end-of-line in a value of property `eol-type'.  If the value is
2765    integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
2766    means CODING_EOL_CR.  If it is not integer, it should be a vector
2767    of subsidiary coding systems of which property `eol-type' has one
2768    of above values.
2769
2770 */
2771
2772 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
2773    and set it in CODING.  If CODING_SYSTEM_SYMBOL is invalid, CODING
2774    is setup so that no conversion is necessary and return -1, else
2775    return 0.  */
2776
2777 int
2778 setup_coding_system (coding_system, coding)
2779      Lisp_Object coding_system;
2780      struct coding_system *coding;
2781 {
2782   Lisp_Object coding_spec, coding_type, eol_type, plist;
2783   Lisp_Object val;
2784   int i;
2785
2786   /* Initialize some fields required for all kinds of coding systems.  */
2787   coding->symbol = coding_system;
2788   coding->common_flags = 0;
2789   coding->mode = 0;
2790   coding->heading_ascii = -1;
2791   coding->post_read_conversion = coding->pre_write_conversion = Qnil;
2792   coding_spec = Fget (coding_system, Qcoding_system);
2793   if (!VECTORP (coding_spec)
2794       || XVECTOR (coding_spec)->size != 5
2795       || !CONSP (XVECTOR (coding_spec)->contents[3]))
2796     goto label_invalid_coding_system;
2797
2798   eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
2799   if (VECTORP (eol_type))
2800     {
2801       coding->eol_type = CODING_EOL_UNDECIDED;
2802       coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
2803     }
2804   else if (XFASTINT (eol_type) == 1)
2805     {
2806       coding->eol_type = CODING_EOL_CRLF;
2807       coding->common_flags
2808         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2809     }
2810   else if (XFASTINT (eol_type) == 2)
2811     {
2812       coding->eol_type = CODING_EOL_CR;
2813       coding->common_flags
2814         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2815     }
2816   else
2817     coding->eol_type = CODING_EOL_LF;
2818
2819   coding_type = XVECTOR (coding_spec)->contents[0];
2820   /* Try short cut.  */
2821   if (SYMBOLP (coding_type))
2822     {
2823       if (EQ (coding_type, Qt))
2824         {
2825           coding->type = coding_type_undecided;
2826           coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
2827         }
2828       else
2829         coding->type = coding_type_no_conversion;
2830       return 0;
2831     }
2832
2833   /* Initialize remaining fields.  */
2834   coding->composing = 0;
2835   coding->translation_table_for_decode = Qnil;
2836   coding->translation_table_for_encode = Qnil;
2837
2838   /* Get values of coding system properties:
2839      `post-read-conversion', `pre-write-conversion',
2840      `translation-table-for-decode', `translation-table-for-encode'.  */
2841   plist = XVECTOR (coding_spec)->contents[3];
2842   coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
2843   coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
2844   val = Fplist_get (plist, Qtranslation_table_for_decode);
2845   if (SYMBOLP (val))
2846     val = Fget (val, Qtranslation_table_for_decode);
2847   coding->translation_table_for_decode = CHAR_TABLE_P (val) ? val : Qnil;
2848   val = Fplist_get (plist, Qtranslation_table_for_encode);
2849   if (SYMBOLP (val))
2850     val = Fget (val, Qtranslation_table_for_encode);
2851   coding->translation_table_for_encode = CHAR_TABLE_P (val) ? val : Qnil;
2852   val = Fplist_get (plist, Qcoding_category);
2853   if (!NILP (val))
2854     {
2855       val = Fget (val, Qcoding_category_index);
2856       if (INTEGERP (val))
2857         coding->category_idx = XINT (val);
2858       else
2859         goto label_invalid_coding_system;
2860     }
2861   else
2862     goto label_invalid_coding_system;
2863
2864   val = Fplist_get (plist, Qsafe_charsets);
2865   if (EQ (val, Qt))
2866     {
2867       for (i = 0; i <= MAX_CHARSET; i++)
2868         coding->safe_charsets[i] = 1;
2869     }
2870   else
2871     {
2872       bzero (coding->safe_charsets, MAX_CHARSET + 1);
2873       while (CONSP (val))
2874         {
2875           if ((i = get_charset_id (XCONS (val)->car)) >= 0)
2876             coding->safe_charsets[i] = 1;
2877           val = XCONS (val)->cdr;
2878         }
2879     }
2880
2881   switch (XFASTINT (coding_type))
2882     {
2883     case 0:
2884       coding->type = coding_type_emacs_mule;
2885       if (!NILP (coding->post_read_conversion))
2886         coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
2887       if (!NILP (coding->pre_write_conversion))
2888         coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
2889       break;
2890
2891     case 1:
2892       coding->type = coding_type_sjis;
2893       coding->common_flags
2894         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2895       break;
2896
2897     case 2:
2898       coding->type = coding_type_iso2022;
2899       coding->common_flags
2900         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2901       {
2902         Lisp_Object val, temp;
2903         Lisp_Object *flags;
2904         int i, charset, reg_bits = 0;
2905
2906         val = XVECTOR (coding_spec)->contents[4];
2907
2908         if (!VECTORP (val) || XVECTOR (val)->size != 32)
2909           goto label_invalid_coding_system;
2910
2911         flags = XVECTOR (val)->contents;
2912         coding->flags
2913           = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
2914              | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
2915              | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
2916              | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
2917              | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
2918              | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
2919              | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
2920              | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
2921              | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
2922              | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
2923              | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
2924              | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
2925              | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
2926              );
2927
2928         /* Invoke graphic register 0 to plane 0.  */
2929         CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
2930         /* Invoke graphic register 1 to plane 1 if we can use full 8-bit.  */
2931         CODING_SPEC_ISO_INVOCATION (coding, 1)
2932           = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
2933         /* Not single shifting at first.  */
2934         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
2935         /* Beginning of buffer should also be regarded as bol. */
2936         CODING_SPEC_ISO_BOL (coding) = 1;
2937
2938         for (charset = 0; charset <= MAX_CHARSET; charset++)
2939           CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
2940         val = Vcharset_revision_alist;
2941         while (CONSP (val))
2942           {
2943             charset = get_charset_id (Fcar_safe (XCONS (val)->car));
2944             if (charset >= 0
2945                 && (temp = Fcdr_safe (XCONS (val)->car), INTEGERP (temp))
2946                 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
2947               CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
2948             val = XCONS (val)->cdr;
2949           }
2950
2951         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
2952            FLAGS[REG] can be one of below:
2953                 integer CHARSET: CHARSET occupies register I,
2954                 t: designate nothing to REG initially, but can be used
2955                   by any charsets,
2956                 list of integer, nil, or t: designate the first
2957                   element (if integer) to REG initially, the remaining
2958                   elements (if integer) is designated to REG on request,
2959                   if an element is t, REG can be used by any charsets,
2960                 nil: REG is never used.  */
2961         for (charset = 0; charset <= MAX_CHARSET; charset++)
2962           CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
2963             = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
2964         for (i = 0; i < 4; i++)
2965           {
2966             if (INTEGERP (flags[i])
2967                 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))
2968                 || (charset = get_charset_id (flags[i])) >= 0)
2969               {
2970                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
2971                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
2972               }
2973             else if (EQ (flags[i], Qt))
2974               {
2975                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2976                 reg_bits |= 1 << i;
2977                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
2978               }
2979             else if (CONSP (flags[i]))
2980               {
2981                 Lisp_Object tail;
2982                 tail = flags[i];
2983
2984                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
2985                 if (INTEGERP (XCONS (tail)->car)
2986                     && (charset = XINT (XCONS (tail)->car),
2987                         CHARSET_VALID_P (charset))
2988                     || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
2989                   {
2990                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
2991                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
2992                   }
2993                 else
2994                   CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2995                 tail = XCONS (tail)->cdr;
2996                 while (CONSP (tail))
2997                   {
2998                     if (INTEGERP (XCONS (tail)->car)
2999                         && (charset = XINT (XCONS (tail)->car),
3000                             CHARSET_VALID_P (charset))
3001                         || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
3002                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3003                         = i;
3004                     else if (EQ (XCONS (tail)->car, Qt))
3005                       reg_bits |= 1 << i;
3006                     tail = XCONS (tail)->cdr;
3007                   }
3008               }
3009             else
3010               CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3011
3012             CODING_SPEC_ISO_DESIGNATION (coding, i)
3013               = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
3014           }
3015
3016         if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
3017           {
3018             /* REG 1 can be used only by locking shift in 7-bit env.  */
3019             if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
3020               reg_bits &= ~2;
3021             if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
3022               /* Without any shifting, only REG 0 and 1 can be used.  */
3023               reg_bits &= 3;
3024           }
3025
3026         if (reg_bits)
3027           for (charset = 0; charset <= MAX_CHARSET; charset++)
3028             {
3029               if (CHARSET_VALID_P (charset))
3030                 {
3031                   /* There exist some default graphic registers to be
3032                      used CHARSET.  */
3033
3034                   /* We had better avoid designating a charset of
3035                      CHARS96 to REG 0 as far as possible.  */
3036                   if (CHARSET_CHARS (charset) == 96)
3037                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3038                       = (reg_bits & 2
3039                          ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
3040                   else
3041                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3042                       = (reg_bits & 1
3043                          ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
3044                 }
3045             }
3046       }
3047       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3048       coding->spec.iso2022.last_invalid_designation_register = -1;
3049       break;
3050
3051     case 3:
3052       coding->type = coding_type_big5;
3053       coding->common_flags
3054         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3055       coding->flags
3056         = (NILP (XVECTOR (coding_spec)->contents[4])
3057            ? CODING_FLAG_BIG5_HKU
3058            : CODING_FLAG_BIG5_ETEN);
3059       break;
3060
3061     case 4:
3062       coding->type = coding_type_ccl;
3063       coding->common_flags
3064         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3065       {
3066         Lisp_Object val;
3067         Lisp_Object decoder, encoder;
3068
3069         val = XVECTOR (coding_spec)->contents[4];
3070         if (CONSP  (val)
3071             && SYMBOLP (XCONS (val)->car)
3072             && !NILP (decoder = Fget (XCONS (val)->car, Qccl_program_idx))
3073             && !NILP (decoder = Fcdr (Faref (Vccl_program_table, decoder)))
3074             && SYMBOLP (XCONS (val)->cdr)
3075             && !NILP (encoder = Fget (XCONS (val)->cdr, Qccl_program_idx))
3076             && !NILP (encoder = Fcdr (Faref (Vccl_program_table, encoder))))
3077           {
3078             setup_ccl_program (&(coding->spec.ccl.decoder), decoder);
3079             setup_ccl_program (&(coding->spec.ccl.encoder), encoder);
3080           }
3081         else
3082           goto label_invalid_coding_system;
3083
3084         bzero (coding->spec.ccl.valid_codes, 256);
3085         val = Fplist_get (plist, Qvalid_codes);
3086         if (CONSP (val))
3087           {
3088             Lisp_Object this;
3089
3090             for (; CONSP (val); val = XCONS (val)->cdr)
3091               {
3092                 this = XCONS (val)->car;
3093                 if (INTEGERP (this)
3094                     && XINT (this) >= 0 && XINT (this) < 256)
3095                   coding->spec.ccl.valid_codes[XINT (this)] = 1;
3096                 else if (CONSP (this)
3097                          && INTEGERP (XCONS (this)->car)
3098                          && INTEGERP (XCONS (this)->cdr))
3099                   {
3100                     int start = XINT (XCONS (this)->car);
3101                     int end = XINT (XCONS (this)->cdr);
3102
3103                     if (start >= 0 && start <= end && end < 256)
3104                       while (start < end)
3105                         coding->spec.ccl.valid_codes[start++] = 1;
3106                   }
3107               }
3108           }
3109       }
3110       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3111       break;
3112
3113     case 5:
3114       coding->type = coding_type_raw_text;
3115       break;
3116
3117     default:
3118       goto label_invalid_coding_system;
3119     }
3120   return 0;
3121
3122  label_invalid_coding_system:
3123   coding->type = coding_type_no_conversion;
3124   coding->category_idx = CODING_CATEGORY_IDX_BINARY;
3125   coding->common_flags = 0;
3126   coding->eol_type = CODING_EOL_LF;
3127   coding->pre_write_conversion = coding->post_read_conversion = Qnil;
3128   return -1;
3129 }
3130
3131 /* Setup raw-text or one of its subsidiaries in the structure
3132    coding_system CODING according to the already setup value eol_type
3133    in CODING.  CODING should be setup for some coding system in
3134    advance.  */
3135
3136 void
3137 setup_raw_text_coding_system (coding)
3138      struct coding_system *coding;
3139 {
3140   if (coding->type != coding_type_raw_text)
3141     {
3142       coding->symbol = Qraw_text;
3143       coding->type = coding_type_raw_text;
3144       if (coding->eol_type != CODING_EOL_UNDECIDED)
3145         {
3146           Lisp_Object subsidiaries;
3147           subsidiaries = Fget (Qraw_text, Qeol_type);
3148
3149           if (VECTORP (subsidiaries)
3150               && XVECTOR (subsidiaries)->size == 3)
3151             coding->symbol
3152               = XVECTOR (subsidiaries)->contents[coding->eol_type];
3153         }
3154     }
3155   return;
3156 }
3157
3158 /* Emacs has a mechanism to automatically detect a coding system if it
3159    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
3160    it's impossible to distinguish some coding systems accurately
3161    because they use the same range of codes.  So, at first, coding
3162    systems are categorized into 7, those are:
3163
3164    o coding-category-emacs-mule
3165
3166         The category for a coding system which has the same code range
3167         as Emacs' internal format.  Assigned the coding-system (Lisp
3168         symbol) `emacs-mule' by default.
3169
3170    o coding-category-sjis
3171
3172         The category for a coding system which has the same code range
3173         as SJIS.  Assigned the coding-system (Lisp
3174         symbol) `japanese-shift-jis' by default.
3175
3176    o coding-category-iso-7
3177
3178         The category for a coding system which has the same code range
3179         as ISO2022 of 7-bit environment.  This doesn't use any locking
3180         shift and single shift functions.  This can encode/decode all
3181         charsets.  Assigned the coding-system (Lisp symbol)
3182         `iso-2022-7bit' by default.
3183
3184    o coding-category-iso-7-tight
3185
3186         Same as coding-category-iso-7 except that this can
3187         encode/decode only the specified charsets.
3188
3189    o coding-category-iso-8-1
3190
3191         The category for a coding system which has the same code range
3192         as ISO2022 of 8-bit environment and graphic plane 1 used only
3193         for DIMENSION1 charset.  This doesn't use any locking shift
3194         and single shift functions.  Assigned the coding-system (Lisp
3195         symbol) `iso-latin-1' by default.
3196
3197    o coding-category-iso-8-2
3198
3199         The category for a coding system which has the same code range
3200         as ISO2022 of 8-bit environment and graphic plane 1 used only
3201         for DIMENSION2 charset.  This doesn't use any locking shift
3202         and single shift functions.  Assigned the coding-system (Lisp
3203         symbol) `japanese-iso-8bit' by default.
3204
3205    o coding-category-iso-7-else
3206
3207         The category for a coding system which has the same code range
3208         as ISO2022 of 7-bit environemnt but uses locking shift or
3209         single shift functions.  Assigned the coding-system (Lisp
3210         symbol) `iso-2022-7bit-lock' by default.
3211
3212    o coding-category-iso-8-else
3213
3214         The category for a coding system which has the same code range
3215         as ISO2022 of 8-bit environemnt but uses locking shift or
3216         single shift functions.  Assigned the coding-system (Lisp
3217         symbol) `iso-2022-8bit-ss2' by default.
3218
3219    o coding-category-big5
3220
3221         The category for a coding system which has the same code range
3222         as BIG5.  Assigned the coding-system (Lisp symbol)
3223         `cn-big5' by default.
3224
3225    o coding-category-ccl
3226
3227         The category for a coding system of which encoder/decoder is
3228         written in CCL programs.  The default value is nil, i.e., no
3229         coding system is assigned.
3230
3231    o coding-category-binary
3232
3233         The category for a coding system not categorized in any of the
3234         above.  Assigned the coding-system (Lisp symbol)
3235         `no-conversion' by default.
3236
3237    Each of them is a Lisp symbol and the value is an actual
3238    `coding-system's (this is also a Lisp symbol) assigned by a user.
3239    What Emacs does actually is to detect a category of coding system.
3240    Then, it uses a `coding-system' assigned to it.  If Emacs can't
3241    decide only one possible category, it selects a category of the
3242    highest priority.  Priorities of categories are also specified by a
3243    user in a Lisp variable `coding-category-list'.
3244
3245 */
3246
3247 static
3248 int ascii_skip_code[256];
3249
3250 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
3251    If it detects possible coding systems, return an integer in which
3252    appropriate flag bits are set.  Flag bits are defined by macros
3253    CODING_CATEGORY_MASK_XXX in `coding.h'.
3254
3255    How many ASCII characters are at the head is returned as *SKIP.  */
3256
3257 static int
3258 detect_coding_mask (source, src_bytes, priorities, skip)
3259      unsigned char *source;
3260      int src_bytes, *priorities, *skip;
3261 {
3262   register unsigned char c;
3263   unsigned char *src = source, *src_end = source + src_bytes;
3264   unsigned int mask;
3265   int i;
3266
3267   /* At first, skip all ASCII characters and control characters except
3268      for three ISO2022 specific control characters.  */
3269   ascii_skip_code[ISO_CODE_SO] = 0;
3270   ascii_skip_code[ISO_CODE_SI] = 0;
3271   ascii_skip_code[ISO_CODE_ESC] = 0;
3272
3273  label_loop_detect_coding:
3274   while (src < src_end && ascii_skip_code[*src]) src++;
3275   *skip = src - source;
3276
3277   if (src >= src_end)
3278     /* We found nothing other than ASCII.  There's nothing to do.  */
3279     return 0;
3280
3281   c = *src;
3282   /* The text seems to be encoded in some multilingual coding system.
3283      Now, try to find in which coding system the text is encoded.  */
3284   if (c < 0x80)
3285     {
3286       /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
3287       /* C is an ISO2022 specific control code of C0.  */
3288       mask = detect_coding_iso2022 (src, src_end);
3289       if (mask == 0)
3290         {
3291           /* No valid ISO2022 code follows C.  Try again.  */
3292           src++;
3293           if (c == ISO_CODE_ESC)
3294             ascii_skip_code[ISO_CODE_ESC] = 1;
3295           else
3296             ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1;
3297           goto label_loop_detect_coding;
3298         }
3299       if (priorities)
3300         goto label_return_highest_only;
3301     }
3302   else
3303     {
3304       int try;
3305
3306       if (c < 0xA0)
3307         {
3308           /* C is the first byte of SJIS character code,
3309              or a leading-code of Emacs' internal format (emacs-mule).  */
3310           try = CODING_CATEGORY_MASK_SJIS | CODING_CATEGORY_MASK_EMACS_MULE;
3311
3312           /* Or, if C is a special latin extra code,
3313              or is an ISO2022 specific control code of C1 (SS2 or SS3),
3314              or is an ISO2022 control-sequence-introducer (CSI),
3315              we should also consider the possibility of ISO2022 codings.  */
3316           if ((VECTORP (Vlatin_extra_code_table)
3317                && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3318               || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
3319               || (c == ISO_CODE_CSI
3320                   && (src < src_end
3321                       && (*src == ']'
3322                           || ((*src == '0' || *src == '1' || *src == '2')
3323                               && src + 1 < src_end
3324                               && src[1] == ']')))))
3325             try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
3326                      | CODING_CATEGORY_MASK_ISO_8BIT);
3327         }
3328       else
3329         /* C is a character of ISO2022 in graphic plane right,
3330            or a SJIS's 1-byte character code (i.e. JISX0201),
3331            or the first byte of BIG5's 2-byte code.  */
3332         try = (CODING_CATEGORY_MASK_ISO_8_ELSE
3333                 | CODING_CATEGORY_MASK_ISO_8BIT
3334                 | CODING_CATEGORY_MASK_SJIS
3335                 | CODING_CATEGORY_MASK_BIG5);
3336
3337       /* Or, we may have to consider the possibility of CCL.  */
3338       if (coding_system_table[CODING_CATEGORY_IDX_CCL]
3339           && (coding_system_table[CODING_CATEGORY_IDX_CCL]
3340               ->spec.ccl.valid_codes)[c])
3341         try |= CODING_CATEGORY_MASK_CCL;
3342
3343       mask = 0;
3344       if (priorities)
3345         {
3346           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3347             {
3348               if (priorities[i] & try & CODING_CATEGORY_MASK_ISO)
3349                 mask = detect_coding_iso2022 (src, src_end);
3350               else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
3351                 mask = detect_coding_sjis (src, src_end);
3352               else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
3353                 mask = detect_coding_big5 (src, src_end);
3354               else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
3355                 mask = detect_coding_emacs_mule (src, src_end);
3356               else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL)
3357                 mask = detect_coding_ccl (src, src_end);
3358               else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
3359                 mask = CODING_CATEGORY_MASK_RAW_TEXT;
3360               else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
3361                 mask = CODING_CATEGORY_MASK_BINARY;
3362               if (mask)
3363                 goto label_return_highest_only;
3364             }
3365           return CODING_CATEGORY_MASK_RAW_TEXT;
3366         }
3367       if (try & CODING_CATEGORY_MASK_ISO)
3368         mask |= detect_coding_iso2022 (src, src_end);
3369       if (try & CODING_CATEGORY_MASK_SJIS)
3370         mask |= detect_coding_sjis (src, src_end);
3371       if (try & CODING_CATEGORY_MASK_BIG5)
3372         mask |= detect_coding_big5 (src, src_end);
3373       if (try & CODING_CATEGORY_MASK_EMACS_MULE)
3374         mask |= detect_coding_emacs_mule (src, src_end);
3375       if (try & CODING_CATEGORY_MASK_CCL)
3376         mask |= detect_coding_ccl (src, src_end);
3377     }
3378   return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
3379
3380  label_return_highest_only:
3381   for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3382     {
3383       if (mask & priorities[i])
3384         return priorities[i];
3385     }
3386   return CODING_CATEGORY_MASK_RAW_TEXT;
3387 }
3388
3389 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
3390    The information of the detected coding system is set in CODING.  */
3391
3392 void
3393 detect_coding (coding, src, src_bytes)
3394      struct coding_system *coding;
3395      unsigned char *src;
3396      int src_bytes;
3397 {
3398   unsigned int idx;
3399   int skip, mask, i;
3400   Lisp_Object val;
3401
3402   val = Vcoding_category_list;
3403   mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip);
3404   coding->heading_ascii = skip;
3405
3406   if (!mask) return;
3407
3408   /* We found a single coding system of the highest priority in MASK.  */
3409   idx = 0;
3410   while (mask && ! (mask & 1)) mask >>= 1, idx++;
3411   if (! mask)
3412     idx = CODING_CATEGORY_IDX_RAW_TEXT;
3413
3414   val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[idx])->value;
3415
3416   if (coding->eol_type != CODING_EOL_UNDECIDED)
3417     {
3418       Lisp_Object tmp;
3419
3420       tmp = Fget (val, Qeol_type);
3421       if (VECTORP (tmp))
3422         val = XVECTOR (tmp)->contents[coding->eol_type];
3423     }
3424   setup_coding_system (val, coding);
3425   /* Set this again because setup_coding_system reset this member.  */
3426   coding->heading_ascii = skip;
3427 }
3428
3429 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
3430    SOURCE is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
3431    CODING_EOL_CR, and CODING_EOL_UNDECIDED.
3432
3433    How many non-eol characters are at the head is returned as *SKIP.  */
3434
3435 #define MAX_EOL_CHECK_COUNT 3
3436
3437 static int
3438 detect_eol_type (source, src_bytes, skip)
3439      unsigned char *source;
3440      int src_bytes, *skip;
3441 {
3442   unsigned char *src = source, *src_end = src + src_bytes;
3443   unsigned char c;
3444   int total = 0;                /* How many end-of-lines are found so far.  */
3445   int eol_type = CODING_EOL_UNDECIDED;
3446   int this_eol_type;
3447
3448   *skip = 0;
3449
3450   while (src < src_end && total < MAX_EOL_CHECK_COUNT)
3451     {
3452       c = *src++;
3453       if (c == '\n' || c == '\r')
3454         {
3455           if (*skip == 0)
3456             *skip = src - 1 - source;
3457           total++;
3458           if (c == '\n')
3459             this_eol_type = CODING_EOL_LF;
3460           else if (src >= src_end || *src != '\n')
3461             this_eol_type = CODING_EOL_CR;
3462           else
3463             this_eol_type = CODING_EOL_CRLF, src++;
3464
3465           if (eol_type == CODING_EOL_UNDECIDED)
3466             /* This is the first end-of-line.  */
3467             eol_type = this_eol_type;
3468           else if (eol_type != this_eol_type)
3469             {
3470               /* The found type is different from what found before.  */
3471               eol_type = CODING_EOL_INCONSISTENT;
3472               break;
3473             }
3474         }
3475     }
3476
3477   if (*skip == 0)
3478     *skip = src_end - source;
3479   return eol_type;
3480 }
3481
3482 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
3483    is encoded.  If it detects an appropriate format of end-of-line, it
3484    sets the information in *CODING.  */
3485
3486 void
3487 detect_eol (coding, src, src_bytes)
3488      struct coding_system *coding;
3489      unsigned char *src;
3490      int src_bytes;
3491 {
3492   Lisp_Object val;
3493   int skip;
3494   int eol_type = detect_eol_type (src, src_bytes, &skip);
3495
3496   if (coding->heading_ascii > skip)
3497     coding->heading_ascii = skip;
3498   else
3499     skip = coding->heading_ascii;
3500
3501   if (eol_type == CODING_EOL_UNDECIDED)
3502     return;
3503   if (eol_type == CODING_EOL_INCONSISTENT)
3504     {
3505 #if 0
3506       /* This code is suppressed until we find a better way to
3507          distinguish raw text file and binary file.  */
3508
3509       /* If we have already detected that the coding is raw-text, the
3510          coding should actually be no-conversion.  */
3511       if (coding->type == coding_type_raw_text)
3512         {
3513           setup_coding_system (Qno_conversion, coding);
3514           return;
3515         }
3516       /* Else, let's decode only text code anyway.  */
3517 #endif /* 0 */
3518       eol_type = CODING_EOL_LF;
3519     }
3520
3521   val = Fget (coding->symbol, Qeol_type);
3522   if (VECTORP (val) && XVECTOR (val)->size == 3)
3523     {
3524       setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
3525       coding->heading_ascii = skip;
3526     }
3527 }
3528
3529 #define CONVERSION_BUFFER_EXTRA_ROOM 256
3530
3531 #define DECODING_BUFFER_MAG(coding)                                          \
3532   (coding->type == coding_type_iso2022                                       \
3533    ? 3                                                                       \
3534    : ((coding->type == coding_type_sjis || coding->type == coding_type_big5) \
3535       ? 2                                                                    \
3536       : (coding->type == coding_type_raw_text                                \
3537          ? 1                                                                 \
3538          : (coding->type == coding_type_ccl                                  \
3539             ? coding->spec.ccl.decoder.buf_magnification                     \
3540             : 2))))
3541
3542 /* Return maximum size (bytes) of a buffer enough for decoding
3543    SRC_BYTES of text encoded in CODING.  */
3544
3545 int
3546 decoding_buffer_size (coding, src_bytes)
3547      struct coding_system *coding;
3548      int src_bytes;
3549 {
3550   return (src_bytes * DECODING_BUFFER_MAG (coding)
3551           + CONVERSION_BUFFER_EXTRA_ROOM);
3552 }
3553
3554 /* Return maximum size (bytes) of a buffer enough for encoding
3555    SRC_BYTES of text to CODING.  */
3556
3557 int
3558 encoding_buffer_size (coding, src_bytes)
3559      struct coding_system *coding;
3560      int src_bytes;
3561 {
3562   int magnification;
3563
3564   if (coding->type == coding_type_ccl)
3565     magnification = coding->spec.ccl.encoder.buf_magnification;
3566   else
3567     magnification = 3;
3568
3569   return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
3570 }
3571
3572 #ifndef MINIMUM_CONVERSION_BUFFER_SIZE
3573 #define MINIMUM_CONVERSION_BUFFER_SIZE 1024
3574 #endif
3575
3576 char *conversion_buffer;
3577 int conversion_buffer_size;
3578
3579 /* Return a pointer to a SIZE bytes of buffer to be used for encoding
3580    or decoding.  Sufficient memory is allocated automatically.  If we
3581    run out of memory, return NULL.  */
3582
3583 char *
3584 get_conversion_buffer (size)
3585      int size;
3586 {
3587   if (size > conversion_buffer_size)
3588     {
3589       char *buf;
3590       int real_size = conversion_buffer_size * 2;
3591
3592       while (real_size < size) real_size *= 2;
3593       buf = (char *) xmalloc (real_size);
3594       xfree (conversion_buffer);
3595       conversion_buffer = buf;
3596       conversion_buffer_size = real_size;
3597     }
3598   return conversion_buffer;
3599 }
3600
3601 int
3602 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
3603      struct coding_system *coding;
3604      unsigned char *source, *destination;
3605      int src_bytes, dst_bytes, encodep;
3606 {
3607   struct ccl_program *ccl
3608     = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
3609   int result;
3610
3611   ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
3612
3613   coding->produced = ccl_driver (ccl, source, destination,
3614                                  src_bytes, dst_bytes, &(coding->consumed));
3615   coding->produced_char
3616     = multibyte_chars_in_text (destination, coding->produced);
3617   coding->consumed_char
3618     = multibyte_chars_in_text (source, coding->consumed);
3619
3620   switch (ccl->status)
3621     {
3622     case CCL_STAT_SUSPEND_BY_SRC:
3623       result = CODING_FINISH_INSUFFICIENT_SRC;
3624       break;
3625     case CCL_STAT_SUSPEND_BY_DST:
3626       result = CODING_FINISH_INSUFFICIENT_DST;
3627       break;
3628     default:
3629       result = CODING_FINISH_NORMAL;
3630       break;
3631     }
3632   return result;
3633 }
3634
3635 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions".  Before
3636    decoding, it may detect coding system and format of end-of-line if
3637    those are not yet decided.  */
3638
3639 int
3640 decode_coding (coding, source, destination, src_bytes, dst_bytes)
3641      struct coding_system *coding;
3642      unsigned char *source, *destination;
3643      int src_bytes, dst_bytes;
3644 {
3645   int result;
3646
3647   if (src_bytes <= 0
3648       && ! (coding->mode & CODING_MODE_LAST_BLOCK
3649             && CODING_REQUIRE_FLUSHING (coding)))
3650     {
3651       coding->produced = coding->produced_char = 0;
3652       coding->consumed = coding->consumed_char = 0;
3653       coding->fake_multibyte = 0;
3654       return CODING_FINISH_NORMAL;
3655     }
3656
3657   if (coding->type == coding_type_undecided)
3658     detect_coding (coding, source, src_bytes);
3659
3660   if (coding->eol_type == CODING_EOL_UNDECIDED)
3661     detect_eol (coding, source, src_bytes);
3662
3663   switch (coding->type)
3664     {
3665     case coding_type_emacs_mule:
3666     case coding_type_undecided:
3667     case coding_type_raw_text:
3668       if (coding->eol_type == CODING_EOL_LF
3669           ||  coding->eol_type == CODING_EOL_UNDECIDED)
3670         goto label_no_conversion;
3671       result = decode_eol (coding, source, destination, src_bytes, dst_bytes);
3672       break;
3673
3674     case coding_type_sjis:
3675       result = decode_coding_sjis_big5 (coding, source, destination,
3676                                         src_bytes, dst_bytes, 1);
3677       break;
3678
3679     case coding_type_iso2022:
3680       result = decode_coding_iso2022 (coding, source, destination,
3681                                       src_bytes, dst_bytes);
3682       break;
3683
3684     case coding_type_big5:
3685       result = decode_coding_sjis_big5 (coding, source, destination,
3686                                         src_bytes, dst_bytes, 0);
3687       break;
3688
3689     case coding_type_ccl:
3690       result = ccl_coding_driver (coding, source, destination,
3691                                   src_bytes, dst_bytes, 0);
3692       break;
3693
3694     default:                    /* i.e. case coding_type_no_conversion: */
3695     label_no_conversion:
3696       if (dst_bytes && src_bytes > dst_bytes)
3697         {
3698           coding->produced = dst_bytes;
3699           result = CODING_FINISH_INSUFFICIENT_DST;
3700         }
3701       else
3702         {
3703           coding->produced = src_bytes;
3704           result = CODING_FINISH_NORMAL;
3705         }
3706       if (dst_bytes)
3707         bcopy (source, destination, coding->produced);
3708       else
3709         safe_bcopy (source, destination, coding->produced);
3710       coding->fake_multibyte = 1;
3711       coding->consumed
3712         = coding->consumed_char = coding->produced_char = coding->produced;
3713       break;
3714     }
3715
3716   return result;
3717 }
3718
3719 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  */
3720
3721 int
3722 encode_coding (coding, source, destination, src_bytes, dst_bytes)
3723      struct coding_system *coding;
3724      unsigned char *source, *destination;
3725      int src_bytes, dst_bytes;
3726 {
3727   int result;
3728
3729   if (src_bytes <= 0
3730       && ! (coding->mode & CODING_MODE_LAST_BLOCK
3731             && CODING_REQUIRE_FLUSHING (coding)))
3732     {
3733       coding->produced = coding->produced_char = 0;
3734       coding->consumed = coding->consumed_char = 0;
3735       coding->fake_multibyte = 0;
3736       return CODING_FINISH_NORMAL;
3737     }
3738
3739   switch (coding->type)
3740     {
3741     case coding_type_emacs_mule:
3742     case coding_type_undecided:
3743     case coding_type_raw_text:
3744       if (coding->eol_type == CODING_EOL_LF
3745           ||  coding->eol_type == CODING_EOL_UNDECIDED)
3746         goto label_no_conversion;
3747       result = encode_eol (coding, source, destination, src_bytes, dst_bytes);
3748       break;
3749
3750     case coding_type_sjis:
3751       result = encode_coding_sjis_big5 (coding, source, destination,
3752                                         src_bytes, dst_bytes, 1);
3753       break;
3754
3755     case coding_type_iso2022:
3756       result = encode_coding_iso2022 (coding, source, destination,
3757                                       src_bytes, dst_bytes);
3758       break;
3759
3760     case coding_type_big5:
3761       result = encode_coding_sjis_big5 (coding, source, destination,
3762                                         src_bytes, dst_bytes, 0);
3763       break;
3764
3765     case coding_type_ccl:
3766       result = ccl_coding_driver (coding, source, destination,
3767                                   src_bytes, dst_bytes, 1);
3768       break;
3769
3770     default:                    /* i.e. case coding_type_no_conversion: */
3771     label_no_conversion:
3772       if (dst_bytes && src_bytes > dst_bytes)
3773         {
3774           coding->produced = dst_bytes;
3775           result = CODING_FINISH_INSUFFICIENT_DST;
3776         }
3777       else
3778         {
3779           coding->produced = src_bytes;
3780           result = CODING_FINISH_NORMAL;
3781         }
3782       if (dst_bytes)
3783         bcopy (source, destination, coding->produced);
3784       else
3785         safe_bcopy (source, destination, coding->produced);
3786       if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
3787         {
3788           unsigned char *p = destination, *pend = p + coding->produced;
3789           while (p < pend)
3790             if (*p++ == '\015') p[-1] = '\n';
3791         }
3792       coding->fake_multibyte = 1;
3793       coding->consumed
3794         = coding->consumed_char = coding->produced_char = coding->produced;
3795       break;
3796     }
3797
3798   return result;
3799 }
3800
3801 /* Scan text in the region between *BEG and *END (byte positions),
3802    skip characters which we don't have to decode by coding system
3803    CODING at the head and tail, then set *BEG and *END to the region
3804    of the text we actually have to convert.  The caller should move
3805    the gap out of the region in advance.
3806
3807    If STR is not NULL, *BEG and *END are indices into STR.  */
3808
3809 static void
3810 shrink_decoding_region (beg, end, coding, str)
3811      int *beg, *end;
3812      struct coding_system *coding;
3813      unsigned char *str;
3814 {
3815   unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
3816   int eol_conversion;
3817
3818   if (coding->type == coding_type_ccl
3819       || coding->type == coding_type_undecided
3820       || !NILP (coding->post_read_conversion))
3821     {
3822       /* We can't skip any data.  */
3823       return;
3824     }
3825   else if (coding->type == coding_type_no_conversion)
3826     {
3827       /* We need no conversion, but don't have to skip any data here.
3828          Decoding routine handles them effectively anyway.  */
3829       return;
3830     }
3831
3832   eol_conversion = (coding->eol_type != CODING_EOL_LF);
3833
3834   if ((! eol_conversion) && (coding->heading_ascii >= 0))
3835     /* Detection routine has already found how much we can skip at the
3836        head.  */
3837     *beg += coding->heading_ascii;
3838
3839   if (str)
3840     {
3841       begp_orig = begp = str + *beg;
3842       endp_orig = endp = str + *end;
3843     }
3844   else
3845     {
3846       begp_orig = begp = BYTE_POS_ADDR (*beg);
3847       endp_orig = endp = begp + *end - *beg;
3848     }
3849
3850   switch (coding->type)
3851     {
3852     case coding_type_emacs_mule:
3853     case coding_type_raw_text:
3854       if (eol_conversion)
3855         {
3856           if (coding->heading_ascii < 0)
3857             while (begp < endp && *begp != '\r' && *begp < 0x80) begp++;
3858           while (begp < endp && endp[-1] != '\r' && endp[-1] < 0x80)
3859             endp--;
3860           /* Do not consider LF as ascii if preceded by CR, since that
3861              confuses eol decoding. */
3862           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
3863             endp++;
3864         }
3865       else
3866         begp = endp;
3867       break;
3868
3869     case coding_type_sjis:
3870     case coding_type_big5:
3871       /* We can skip all ASCII characters at the head.  */
3872       if (coding->heading_ascii < 0)
3873         {
3874           if (eol_conversion)
3875             while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
3876           else
3877             while (begp < endp && *begp < 0x80) begp++;
3878         }
3879       /* We can skip all ASCII characters at the tail except for the
3880          second byte of SJIS or BIG5 code.  */
3881       if (eol_conversion)
3882         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
3883       else
3884         while (begp < endp && endp[-1] < 0x80) endp--;
3885       /* Do not consider LF as ascii if preceded by CR, since that
3886          confuses eol decoding. */
3887       if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
3888         endp++;
3889       if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
3890         endp++;
3891       break;
3892
3893     default:            /* i.e. case coding_type_iso2022: */
3894       if (coding->heading_ascii < 0)
3895         {
3896           /* We can skip all ASCII characters at the head except for a
3897              few control codes.  */
3898           while (begp < endp && (c = *begp) < 0x80
3899                  && c != ISO_CODE_CR && c != ISO_CODE_SO
3900                  && c != ISO_CODE_SI && c != ISO_CODE_ESC
3901                  && (!eol_conversion || c != ISO_CODE_LF))
3902             begp++;
3903         }
3904       switch (coding->category_idx)
3905         {
3906         case CODING_CATEGORY_IDX_ISO_8_1:
3907         case CODING_CATEGORY_IDX_ISO_8_2:
3908           /* We can skip all ASCII characters at the tail.  */
3909           if (eol_conversion)
3910             while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
3911           else
3912             while (begp < endp && endp[-1] < 0x80) endp--;
3913           /* Do not consider LF as ascii if preceded by CR, since that
3914              confuses eol decoding. */
3915           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
3916             endp++;
3917           break;
3918
3919         case CODING_CATEGORY_IDX_ISO_7:
3920         case CODING_CATEGORY_IDX_ISO_7_TIGHT:
3921           /* We can skip all charactes at the tail except for ESC and
3922              the following 2-byte at the tail.  */
3923           if (eol_conversion)
3924             while (begp < endp
3925                    && (c = endp[-1]) < 0x80 && c != ISO_CODE_ESC && c != '\r')
3926               endp--;
3927           else
3928             while (begp < endp
3929                    && (c = endp[-1]) < 0x80 && c != ISO_CODE_ESC)
3930               endp--;
3931           /* Do not consider LF as ascii if preceded by CR, since that
3932              confuses eol decoding. */
3933           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
3934             endp++;
3935           if (begp < endp && endp[-1] == ISO_CODE_ESC)
3936             {
3937               if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
3938                 /* This is an ASCII designation sequence.  We can
3939                     surely skip the tail.  */
3940                 endp += 2;
3941               else
3942                 /* Hmmm, we can't skip the tail.  */
3943                 endp = endp_orig;
3944             }
3945         }
3946     }
3947   *beg += begp - begp_orig;
3948   *end += endp - endp_orig;
3949   return;
3950 }
3951
3952 /* Like shrink_decoding_region but for encoding.  */
3953
3954 static void
3955 shrink_encoding_region (beg, end, coding, str)
3956      int *beg, *end;
3957      struct coding_system *coding;
3958      unsigned char *str;
3959 {
3960   unsigned char *begp_orig, *begp, *endp_orig, *endp;
3961   int eol_conversion;
3962
3963   if (coding->type == coding_type_ccl)
3964     /* We can't skip any data.  */
3965     return;
3966   else if (coding->type == coding_type_no_conversion)
3967     {
3968       /* We need no conversion.  */
3969       *beg = *end;
3970       return;
3971     }
3972
3973   if (str)
3974     {
3975       begp_orig = begp = str + *beg;
3976       endp_orig = endp = str + *end;
3977     }
3978   else
3979     {
3980       begp_orig = begp = BYTE_POS_ADDR (*beg);
3981       endp_orig = endp = begp + *end - *beg;
3982     }
3983
3984   eol_conversion = (coding->eol_type == CODING_EOL_CR
3985                     || coding->eol_type == CODING_EOL_CRLF);
3986
3987   /* Here, we don't have to check coding->pre_write_conversion because
3988      the caller is expected to have handled it already.  */
3989   switch (coding->type)
3990     {
3991     case coding_type_undecided:
3992     case coding_type_emacs_mule:
3993     case coding_type_raw_text:
3994       if (eol_conversion)
3995         {
3996           while (begp < endp && *begp != '\n') begp++;
3997           while (begp < endp && endp[-1] != '\n') endp--;
3998         }
3999       else
4000         begp = endp;
4001       break;
4002
4003     case coding_type_iso2022:
4004       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
4005         {
4006           unsigned char *bol = begp;
4007           while (begp < endp && *begp < 0x80)
4008             {
4009               begp++;
4010               if (begp[-1] == '\n')
4011                 bol = begp;
4012             }
4013           begp = bol;
4014           goto label_skip_tail;
4015         }
4016       /* fall down ... */
4017
4018     default:
4019       /* We can skip all ASCII characters at the head and tail.  */
4020       if (eol_conversion)
4021         while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
4022       else
4023         while (begp < endp && *begp < 0x80) begp++;
4024     label_skip_tail:
4025       if (eol_conversion)
4026         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
4027       else
4028         while (begp < endp && *(endp - 1) < 0x80) endp--;
4029       break;
4030     }
4031
4032   *beg += begp - begp_orig;
4033   *end += endp - endp_orig;
4034   return;
4035 }
4036
4037 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
4038    text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
4039    coding system CODING, and return the status code of code conversion
4040    (currently, this value has no meaning).
4041
4042    How many characters (and bytes) are converted to how many
4043    characters (and bytes) are recorded in members of the structure
4044    CODING.
4045
4046    If REPLACE is nonzero, we do various things as if the original text
4047    is deleted and a new text is inserted.  See the comments in
4048    replace_range (insdel.c) to know what we are doing.  */
4049
4050 int
4051 code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
4052      int from, from_byte, to, to_byte, encodep, replace;
4053      struct coding_system *coding;
4054 {
4055   int len = to - from, len_byte = to_byte - from_byte;
4056   int require, inserted, inserted_byte;
4057   int head_skip, tail_skip, total_skip;
4058   Lisp_Object saved_coding_symbol;
4059   int multibyte = !NILP (current_buffer->enable_multibyte_characters);
4060   int first = 1;
4061   int fake_multibyte = 0;
4062   unsigned char *src, *dst;
4063   Lisp_Object deletion;
4064
4065   deletion = Qnil;
4066   saved_coding_symbol = Qnil;
4067
4068   if (from < PT && PT < to)
4069     SET_PT_BOTH (from, from_byte);
4070
4071   if (replace)
4072     {
4073       int saved_from = from;
4074
4075       prepare_to_modify_buffer (from, to, &from);
4076       if (saved_from != from)
4077         {
4078           to = from + len;
4079           if (multibyte)
4080             from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
4081           else
4082             from_byte = from, to_byte = to;
4083           len_byte = to_byte - from_byte;
4084         }
4085     }
4086
4087   if (! encodep && CODING_REQUIRE_DETECTION (coding))
4088     {
4089       /* We must detect encoding of text and eol format.  */
4090
4091       if (from < GPT && to > GPT)
4092         move_gap_both (from, from_byte);
4093       if (coding->type == coding_type_undecided)
4094         {
4095           detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
4096           if (coding->type == coding_type_undecided)
4097             /* It seems that the text contains only ASCII, but we
4098                should not left it undecided because the deeper
4099                decoding routine (decode_coding) tries to detect the
4100                encodings again in vain.  */
4101             coding->type = coding_type_emacs_mule;
4102         }
4103       if (coding->eol_type == CODING_EOL_UNDECIDED)
4104         {
4105           saved_coding_symbol = coding->symbol;
4106           detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
4107           if (coding->eol_type == CODING_EOL_UNDECIDED)
4108             coding->eol_type = CODING_EOL_LF;
4109           /* We had better recover the original eol format if we
4110              encounter an inconsitent eol format while decoding.  */
4111           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4112         }
4113     }
4114
4115   coding->consumed_char = len, coding->consumed = len_byte;
4116
4117   if (encodep
4118       ? ! CODING_REQUIRE_ENCODING (coding)
4119       : ! CODING_REQUIRE_DECODING (coding))
4120     {
4121       coding->produced = len_byte;
4122       if (multibyte
4123           && ! replace
4124           /* See the comment of the member heading_ascii in coding.h.  */
4125           && coding->heading_ascii < len_byte)
4126         {
4127           /* We still may have to combine byte at the head and the
4128              tail of the text in the region.  */
4129           if (from < GPT && GPT < to)
4130             move_gap_both (to, to_byte);
4131           len = multibyte_chars_in_text (BYTE_POS_ADDR (from_byte), len_byte);
4132           adjust_after_insert (from, from_byte, to, to_byte, len);
4133           coding->produced_char = len;
4134         }
4135       else
4136         {
4137           if (!replace)
4138             adjust_after_insert (from, from_byte, to, to_byte, len_byte);
4139           coding->produced_char = len_byte;
4140         }
4141       return 0;
4142     }
4143
4144   /* Now we convert the text.  */
4145
4146   /* For encoding, we must process pre-write-conversion in advance.  */
4147   if (encodep
4148       && ! NILP (coding->pre_write_conversion)
4149       && SYMBOLP (coding->pre_write_conversion)
4150       && ! NILP (Ffboundp (coding->pre_write_conversion)))
4151     {
4152       /* The function in pre-write-conversion may put a new text in a
4153          new buffer.  */
4154       struct buffer *prev = current_buffer, *new;
4155
4156       call2 (coding->pre_write_conversion,
4157              make_number (from), make_number (to));
4158       if (current_buffer != prev)
4159         {
4160           len = ZV - BEGV;
4161           new = current_buffer;
4162           set_buffer_internal_1 (prev);
4163           del_range_2 (from, from_byte, to, to_byte);
4164           insert_from_buffer (new, BEG, len, 0);
4165           to = from + len;
4166           to_byte = multibyte ? CHAR_TO_BYTE (to) : to;
4167           len_byte = to_byte - from_byte;
4168         }
4169     }
4170
4171   if (replace)
4172     deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
4173
4174   /* Try to skip the heading and tailing ASCIIs.  */
4175   {
4176     int from_byte_orig = from_byte, to_byte_orig = to_byte;
4177
4178     if (from < GPT && GPT < to)
4179       move_gap_both (from, from_byte);
4180     if (encodep)
4181       shrink_encoding_region (&from_byte, &to_byte, coding, NULL);
4182     else
4183       shrink_decoding_region (&from_byte, &to_byte, coding, NULL);
4184     if (from_byte == to_byte
4185         && ! (coding->mode & CODING_MODE_LAST_BLOCK
4186               && CODING_REQUIRE_FLUSHING (coding)))
4187       {
4188         coding->produced = len_byte;
4189         coding->produced_char = multibyte ? len : len_byte;
4190         if (!replace)
4191           /* We must record and adjust for this new text now.  */
4192           adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
4193         return 0;
4194       }
4195
4196     head_skip = from_byte - from_byte_orig;
4197     tail_skip = to_byte_orig - to_byte;
4198     total_skip = head_skip + tail_skip;
4199     from += head_skip;
4200     to -= tail_skip;
4201     len -= total_skip; len_byte -= total_skip;
4202   }
4203
4204   /* For converion, we must put the gap before the text in addition to
4205      making the gap larger for efficient decoding.  The required gap
4206      size starts from 2000 which is the magic number used in make_gap.
4207      But, after one batch of conversion, it will be incremented if we
4208      find that it is not enough .  */
4209   require = 2000;
4210
4211   if (GAP_SIZE  < require)
4212     make_gap (require - GAP_SIZE);
4213   move_gap_both (from, from_byte);
4214
4215   inserted = inserted_byte = 0;
4216   src = GAP_END_ADDR, dst = GPT_ADDR;
4217
4218   GAP_SIZE += len_byte;
4219   ZV -= len;
4220   Z -= len;
4221   ZV_BYTE -= len_byte;
4222   Z_BYTE -= len_byte;
4223
4224   if (GPT - BEG < beg_unchanged)
4225     beg_unchanged = GPT - BEG;
4226   if (Z - GPT < end_unchanged)
4227     end_unchanged = Z - GPT;
4228
4229   for (;;)
4230     {
4231       int result;
4232
4233       /* The buffer memory is changed from:
4234          +--------+converted-text+---------+-------original-text------+---+
4235          |<-from->|<--inserted-->|---------|<-----------len---------->|---|
4236                   |<------------------- GAP_SIZE -------------------->|  */
4237       if (encodep)
4238         result = encode_coding (coding, src, dst, len_byte, 0);
4239       else
4240         result = decode_coding (coding, src, dst, len_byte, 0);
4241       /* to:
4242          +--------+-------converted-text--------+--+---original-text--+---+
4243          |<-from->|<--inserted-->|<--produced-->|--|<-(len-consumed)->|---|
4244                   |<------------------- GAP_SIZE -------------------->|  */
4245       if (coding->fake_multibyte)
4246         fake_multibyte = 1;
4247
4248       if (!encodep && !multibyte)
4249         coding->produced_char = coding->produced;
4250       inserted += coding->produced_char;
4251       inserted_byte += coding->produced;
4252       len_byte -= coding->consumed;
4253       src += coding->consumed;
4254       dst += inserted_byte;
4255
4256       if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
4257         {
4258           unsigned char *pend = dst, *p = pend - inserted_byte;
4259
4260           /* Encode LFs back to the original eol format (CR or CRLF).  */
4261           if (coding->eol_type == CODING_EOL_CR)
4262             {
4263               while (p < pend) if (*p++ == '\n') p[-1] = '\r';
4264             }
4265           else
4266             {
4267               int count = 0;
4268
4269               while (p < pend) if (*p++ == '\n') count++;
4270               if (src - dst < count)
4271                 {
4272                   /* We don't have sufficient room for putting LFs
4273                      back to CRLF.  We must record converted and
4274                      not-yet-converted text back to the buffer
4275                      content, enlarge the gap, then record them out of
4276                      the buffer contents again.  */
4277                   int add = len_byte + inserted_byte;
4278
4279                   GAP_SIZE -= add;
4280                   ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
4281                   GPT += inserted_byte; GPT_BYTE += inserted_byte;
4282                   make_gap (count - GAP_SIZE);
4283                   GAP_SIZE += add;
4284                   ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
4285                   GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
4286                   /* Don't forget to update SRC, DST, and PEND.  */
4287                   src = GAP_END_ADDR - len_byte;
4288                   dst = GPT_ADDR + inserted_byte;
4289                   pend = dst;
4290                 }
4291               inserted += count;
4292               inserted_byte += count;
4293               coding->produced += count;
4294               p = dst = pend + count;
4295               while (count)
4296                 {
4297                   *--p = *--pend;
4298                   if (*p == '\n') count--, *--p = '\r';
4299                 }
4300             }
4301
4302           /* Suppress eol-format conversion in the further conversion.  */
4303           coding->eol_type = CODING_EOL_LF;
4304
4305           /* Restore the original symbol.  */
4306           coding->symbol = saved_coding_symbol;
4307
4308           continue;
4309         }
4310       if (len_byte <= 0)
4311         break;
4312       if (result == CODING_FINISH_INSUFFICIENT_SRC)
4313         {
4314           /* The source text ends in invalid codes.  Let's just
4315              make them valid buffer contents, and finish conversion.  */
4316           inserted += len_byte;
4317           inserted_byte += len_byte;
4318           while (len_byte--)
4319             *dst++ = *src++;
4320           fake_multibyte = 1;
4321           break;
4322         }
4323       if (first)
4324         {
4325           /* We have just done the first batch of conversion which was
4326              stoped because of insufficient gap.  Let's reconsider the
4327              required gap size (i.e. SRT - DST) now.
4328
4329              We have converted ORIG bytes (== coding->consumed) into
4330              NEW bytes (coding->produced).  To convert the remaining
4331              LEN bytes, we may need REQUIRE bytes of gap, where:
4332                 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
4333                 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
4334              Here, we are sure that NEW >= ORIG.  */
4335           float ratio = coding->produced - coding->consumed;
4336           ratio /= coding->consumed;
4337           require = len_byte * ratio;
4338           first = 0;
4339         }
4340       if ((src - dst) < (require + 2000))
4341         {
4342           /* See the comment above the previous call of make_gap.  */
4343           int add = len_byte + inserted_byte;
4344
4345           GAP_SIZE -= add;
4346           ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
4347           GPT += inserted_byte; GPT_BYTE += inserted_byte;
4348           make_gap (require + 2000);
4349           GAP_SIZE += add;
4350           ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
4351           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
4352           /* Don't forget to update SRC, DST.  */
4353           src = GAP_END_ADDR - len_byte;
4354           dst = GPT_ADDR + inserted_byte;
4355         }
4356     }
4357   if (src - dst > 0) *dst = 0; /* Put an anchor.  */
4358
4359   if (multibyte
4360       && (fake_multibyte
4361           || !encodep && (to - from) != (to_byte - from_byte)))
4362     inserted = multibyte_chars_in_text (GPT_ADDR, inserted_byte);
4363
4364   /* If we have shrinked the conversion area, adjust it now.  */
4365   if (total_skip > 0)
4366     {
4367       if (tail_skip > 0)
4368         safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
4369       inserted += total_skip; inserted_byte += total_skip;
4370       GAP_SIZE += total_skip;
4371       GPT -= head_skip; GPT_BYTE -= head_skip;
4372       ZV -= total_skip; ZV_BYTE -= total_skip;
4373       Z -= total_skip; Z_BYTE -= total_skip;
4374       from -= head_skip; from_byte -= head_skip;
4375       to += tail_skip; to_byte += tail_skip;
4376     }
4377
4378   adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
4379
4380   if (! encodep && ! NILP (coding->post_read_conversion))
4381     {
4382       Lisp_Object val;
4383       int orig_inserted = inserted, pos = PT;
4384
4385       if (from != pos)
4386         temp_set_point_both (current_buffer, from, from_byte);
4387       val = call1 (coding->post_read_conversion, make_number (inserted));
4388       if (! NILP (val))
4389         {
4390           CHECK_NUMBER (val, 0);
4391           inserted = XFASTINT (val);
4392         }
4393       if (pos >= from + orig_inserted)
4394         temp_set_point (current_buffer, pos + (inserted - orig_inserted));
4395     }
4396
4397   signal_after_change (from, to - from, inserted);
4398
4399   {
4400     coding->consumed = to_byte - from_byte;
4401     coding->consumed_char = to - from;
4402     coding->produced = inserted_byte;
4403     coding->produced_char = inserted;
4404   }
4405
4406   return 0;
4407 }
4408
4409 Lisp_Object
4410 code_convert_string (str, coding, encodep, nocopy)
4411      Lisp_Object str;
4412      struct coding_system *coding;
4413      int encodep, nocopy;
4414 {
4415   int len;
4416   char *buf;
4417   int from = 0, to = XSTRING (str)->size;
4418   int to_byte = STRING_BYTES (XSTRING (str));
4419   struct gcpro gcpro1;
4420   Lisp_Object saved_coding_symbol;
4421   int result;
4422
4423   saved_coding_symbol = Qnil;
4424   if (encodep && !NILP (coding->pre_write_conversion)
4425       || !encodep && !NILP (coding->post_read_conversion))
4426     {
4427       /* Since we have to call Lisp functions which assume target text
4428          is in a buffer, after setting a temporary buffer, call
4429          code_convert_region.  */
4430       int count = specpdl_ptr - specpdl;
4431       struct buffer *prev = current_buffer;
4432
4433       record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
4434       temp_output_buffer_setup (" *code-converting-work*");
4435       set_buffer_internal (XBUFFER (Vstandard_output));
4436       if (encodep)
4437         insert_from_string (str, 0, 0, to, to_byte, 0);
4438       else
4439         {
4440           /* We must insert the contents of STR as is without
4441              unibyte<->multibyte conversion.  */
4442           current_buffer->enable_multibyte_characters = Qnil;
4443           insert_from_string (str, 0, 0, to_byte, to_byte, 0);
4444           current_buffer->enable_multibyte_characters = Qt;
4445         }
4446       code_convert_region (BEGV, BEGV_BYTE, ZV, ZV_BYTE, coding, encodep, 1);
4447       if (encodep)
4448         /* We must return the buffer contents as unibyte string.  */
4449         current_buffer->enable_multibyte_characters = Qnil;
4450       str = make_buffer_string (BEGV, ZV, 0);
4451       set_buffer_internal (prev);
4452       return unbind_to (count, str);
4453     }
4454
4455   if (! encodep && CODING_REQUIRE_DETECTION (coding))
4456     {
4457       /* See the comments in code_convert_region.  */
4458       if (coding->type == coding_type_undecided)
4459         {
4460           detect_coding (coding, XSTRING (str)->data, to_byte);
4461           if (coding->type == coding_type_undecided)
4462             coding->type = coding_type_emacs_mule;
4463         }
4464       if (coding->eol_type == CODING_EOL_UNDECIDED)
4465         {
4466           saved_coding_symbol = coding->symbol;
4467           detect_eol (coding, XSTRING (str)->data, to_byte);
4468           if (coding->eol_type == CODING_EOL_UNDECIDED)
4469             coding->eol_type = CODING_EOL_LF;
4470           /* We had better recover the original eol format if we
4471              encounter an inconsitent eol format while decoding.  */
4472           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4473         }
4474     }
4475
4476   if (encodep
4477       ? ! CODING_REQUIRE_ENCODING (coding)
4478       : ! CODING_REQUIRE_DECODING (coding))
4479     from = to_byte;
4480   else
4481     {
4482       /* Try to skip the heading and tailing ASCIIs.  */
4483       if (encodep)
4484         shrink_encoding_region (&from, &to_byte, coding, XSTRING (str)->data);
4485       else
4486         shrink_decoding_region (&from, &to_byte, coding, XSTRING (str)->data);
4487     }
4488   if (from == to_byte
4489       && ! (coding->mode & CODING_MODE_LAST_BLOCK
4490             && CODING_REQUIRE_FLUSHING (coding)))
4491     return (nocopy ? str : Fcopy_sequence (str));
4492
4493   if (encodep)
4494     len = encoding_buffer_size (coding, to_byte - from);
4495   else
4496     len = decoding_buffer_size (coding, to_byte - from);
4497   len += from + STRING_BYTES (XSTRING (str)) - to_byte;
4498   GCPRO1 (str);
4499   buf = get_conversion_buffer (len);
4500   UNGCPRO;
4501
4502   if (from > 0)
4503     bcopy (XSTRING (str)->data, buf, from);
4504   result = (encodep
4505             ? encode_coding (coding, XSTRING (str)->data + from,
4506                              buf + from, to_byte - from, len)
4507             : decode_coding (coding, XSTRING (str)->data + from,
4508                              buf + from, to_byte - from, len));
4509   if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
4510     {
4511       /* We simple try to decode the whole string again but without
4512          eol-conversion this time.  */
4513       coding->eol_type = CODING_EOL_LF;
4514       coding->symbol = saved_coding_symbol;
4515       return code_convert_string (str, coding, encodep, nocopy);
4516     }
4517
4518   bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced,
4519          STRING_BYTES (XSTRING (str)) - to_byte);
4520
4521   len = from + STRING_BYTES (XSTRING (str)) - to_byte;
4522   if (encodep)
4523     str = make_unibyte_string (buf, len + coding->produced);
4524   else
4525     {
4526       int chars= (coding->fake_multibyte
4527                   ? multibyte_chars_in_text (buf + from, coding->produced)
4528                   : coding->produced_char);
4529       str = make_multibyte_string (buf, len + chars, len + coding->produced);
4530     }
4531
4532   return str;
4533 }
4534
4535 \f
4536 #ifdef emacs
4537 /*** 8. Emacs Lisp library functions ***/
4538
4539 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
4540   "Return t if OBJECT is nil or a coding-system.\n\
4541 See the documentation of `make-coding-system' for information\n\
4542 about coding-system objects.")
4543   (obj)
4544      Lisp_Object obj;
4545 {
4546   if (NILP (obj))
4547     return Qt;
4548   if (!SYMBOLP (obj))
4549     return Qnil;
4550   /* Get coding-spec vector for OBJ.  */
4551   obj = Fget (obj, Qcoding_system);
4552   return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
4553           ? Qt : Qnil);
4554 }
4555
4556 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
4557        Sread_non_nil_coding_system, 1, 1, 0,
4558   "Read a coding system from the minibuffer, prompting with string PROMPT.")
4559   (prompt)
4560      Lisp_Object prompt;
4561 {
4562   Lisp_Object val;
4563   do
4564     {
4565       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
4566                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
4567     }
4568   while (XSTRING (val)->size == 0);
4569   return (Fintern (val, Qnil));
4570 }
4571
4572 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
4573   "Read a coding system from the minibuffer, prompting with string PROMPT.\n\
4574 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.")
4575   (prompt, default_coding_system)
4576      Lisp_Object prompt, default_coding_system;
4577 {
4578   Lisp_Object val;
4579   if (SYMBOLP (default_coding_system))
4580     XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
4581   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
4582                           Qt, Qnil, Qcoding_system_history,
4583                           default_coding_system, Qnil);
4584   return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
4585 }
4586
4587 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
4588        1, 1, 0,
4589   "Check validity of CODING-SYSTEM.\n\
4590 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.\n\
4591 It is valid if it is a symbol with a non-nil `coding-system' property.\n\
4592 The value of property should be a vector of length 5.")
4593   (coding_system)
4594      Lisp_Object coding_system;
4595 {
4596   CHECK_SYMBOL (coding_system, 0);
4597   if (!NILP (Fcoding_system_p (coding_system)))
4598     return coding_system;
4599   while (1)
4600     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
4601 }
4602 \f
4603 Lisp_Object
4604 detect_coding_system (src, src_bytes, highest)
4605      unsigned char *src;
4606      int src_bytes, highest;
4607 {
4608   int coding_mask, eol_type;
4609   Lisp_Object val, tmp;
4610   int dummy;
4611
4612   coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy);
4613   eol_type  = detect_eol_type (src, src_bytes, &dummy);
4614   if (eol_type == CODING_EOL_INCONSISTENT)
4615     eol_type = CODING_EOL_UNDECIDED;
4616
4617   if (!coding_mask)
4618     {
4619       val = Qundecided;
4620       if (eol_type != CODING_EOL_UNDECIDED)
4621         {
4622           Lisp_Object val2;
4623           val2 = Fget (Qundecided, Qeol_type);
4624           if (VECTORP (val2))
4625             val = XVECTOR (val2)->contents[eol_type];
4626         }
4627       return (highest ? val : Fcons (val, Qnil));
4628     }
4629
4630   /* At first, gather possible coding systems in VAL.  */
4631   val = Qnil;
4632   for (tmp = Vcoding_category_list; !NILP (tmp); tmp = XCONS (tmp)->cdr)
4633     {
4634       int idx
4635         = XFASTINT (Fget (XCONS (tmp)->car, Qcoding_category_index));
4636       if (coding_mask & (1 << idx))
4637         {
4638           val = Fcons (Fsymbol_value (XCONS (tmp)->car), val);
4639           if (highest)
4640             break;
4641         }
4642     }
4643   if (!highest)
4644     val = Fnreverse (val);
4645
4646   /* Then, replace the elements with subsidiary coding systems.  */
4647   for (tmp = val; !NILP (tmp); tmp = XCONS (tmp)->cdr)
4648     {
4649       if (eol_type != CODING_EOL_UNDECIDED
4650           && eol_type != CODING_EOL_INCONSISTENT)
4651         {
4652           Lisp_Object eol;
4653           eol = Fget (XCONS (tmp)->car, Qeol_type);
4654           if (VECTORP (eol))
4655             XCONS (tmp)->car = XVECTOR (eol)->contents[eol_type];
4656         }
4657     }
4658   return (highest ? XCONS (val)->car : val);
4659 }
4660
4661 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
4662        2, 3, 0,
4663   "Detect coding system of the text in the region between START and END.\n\
4664 Return a list of possible coding systems ordered by priority.\n\
4665 \n\
4666 If only ASCII characters are found, it returns a list of single element\n\
4667 `undecided' or its subsidiary coding system according to a detected\n\
4668 end-of-line format.\n\
4669 \n\
4670 If optional argument HIGHEST is non-nil, return the coding system of\n\
4671 highest priority.")
4672   (start, end, highest)
4673      Lisp_Object start, end, highest;
4674 {
4675   int from, to;
4676   int from_byte, to_byte;
4677
4678   CHECK_NUMBER_COERCE_MARKER (start, 0);
4679   CHECK_NUMBER_COERCE_MARKER (end, 1);
4680
4681   validate_region (&start, &end);
4682   from = XINT (start), to = XINT (end);
4683   from_byte = CHAR_TO_BYTE (from);
4684   to_byte = CHAR_TO_BYTE (to);
4685
4686   if (from < GPT && to >= GPT)
4687     move_gap_both (to, to_byte);
4688
4689   return detect_coding_system (BYTE_POS_ADDR (from_byte),
4690                                to_byte - from_byte,
4691                                !NILP (highest));
4692 }
4693
4694 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
4695        1, 2, 0,
4696   "Detect coding system of the text in STRING.\n\
4697 Return a list of possible coding systems ordered by priority.\n\
4698 \n\
4699 If only ASCII characters are found, it returns a list of single element\n\
4700 `undecided' or its subsidiary coding system according to a detected\n\
4701 end-of-line format.\n\
4702 \n\
4703 If optional argument HIGHEST is non-nil, return the coding system of\n\
4704 highest priority.")
4705   (string, highest)
4706      Lisp_Object string, highest;
4707 {
4708   CHECK_STRING (string, 0);
4709
4710   return detect_coding_system (XSTRING (string)->data,
4711                                STRING_BYTES (XSTRING (string)),
4712                                !NILP (highest));
4713 }
4714
4715 Lisp_Object
4716 code_convert_region1 (start, end, coding_system, encodep)
4717      Lisp_Object start, end, coding_system;
4718      int encodep;
4719 {
4720   struct coding_system coding;
4721   int from, to, len;
4722
4723   CHECK_NUMBER_COERCE_MARKER (start, 0);
4724   CHECK_NUMBER_COERCE_MARKER (end, 1);
4725   CHECK_SYMBOL (coding_system, 2);
4726
4727   validate_region (&start, &end);
4728   from = XFASTINT (start);
4729   to = XFASTINT (end);
4730
4731   if (NILP (coding_system))
4732     return make_number (to - from);
4733
4734   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
4735     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
4736
4737   /* The code conversion routine can not preserve text properties for
4738      now.  So, we must remove all text properties in the region.  */
4739   Fset_text_properties (start, end, Qnil, Qnil);
4740
4741   coding.mode |= CODING_MODE_LAST_BLOCK;
4742   code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
4743                        &coding, encodep, 1);
4744   Vlast_coding_system_used = coding.symbol;
4745   return make_number (coding.produced_char);
4746 }
4747
4748 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
4749        3, 3, "r\nzCoding system: ",
4750   "Decode the current region by specified coding system.\n\
4751 When called from a program, takes three arguments:\n\
4752 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
4753 This function sets `last-coding-system-used' to the precise coding system\n\
4754 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
4755 not fully specified.)\n\
4756 It returns the length of the decoded text.")
4757   (start, end, coding_system)
4758      Lisp_Object start, end, coding_system;
4759 {
4760   return code_convert_region1 (start, end, coding_system, 0);
4761 }
4762
4763 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
4764        3, 3, "r\nzCoding system: ",
4765   "Encode the current region by specified coding system.\n\
4766 When called from a program, takes three arguments:\n\
4767 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
4768 This function sets `last-coding-system-used' to the precise coding system\n\
4769 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
4770 not fully specified.)\n\
4771 It returns the length of the encoded text.")
4772   (start, end, coding_system)
4773      Lisp_Object start, end, coding_system;
4774 {
4775   return code_convert_region1 (start, end, coding_system, 1);
4776 }
4777
4778 Lisp_Object
4779 code_convert_string1 (string, coding_system, nocopy, encodep)
4780      Lisp_Object string, coding_system, nocopy;
4781      int encodep;
4782 {
4783   struct coding_system coding;
4784
4785   CHECK_STRING (string, 0);
4786   CHECK_SYMBOL (coding_system, 1);
4787
4788   if (NILP (coding_system))
4789     return (NILP (nocopy) ? Fcopy_sequence (string) : string);
4790
4791   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
4792     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
4793
4794   coding.mode |= CODING_MODE_LAST_BLOCK;
4795   Vlast_coding_system_used = coding.symbol;
4796   return code_convert_string (string, &coding, encodep, !NILP (nocopy));
4797 }
4798
4799 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
4800        2, 3, 0,
4801   "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
4802 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
4803 if the decoding operation is trivial.\n\
4804 This function sets `last-coding-system-used' to the precise coding system\n\
4805 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
4806 not fully specified.)")
4807   (string, coding_system, nocopy)
4808      Lisp_Object string, coding_system, nocopy;
4809 {
4810   return code_convert_string1 (string, coding_system, nocopy, 0);
4811 }
4812
4813 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
4814        2, 3, 0,
4815   "Encode STRING to CODING-SYSTEM, and return the result.\n\
4816 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
4817 if the encoding operation is trivial.\n\
4818 This function sets `last-coding-system-used' to the precise coding system\n\
4819 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
4820 not fully specified.)")
4821   (string, coding_system, nocopy)
4822      Lisp_Object string, coding_system, nocopy;
4823 {
4824   return code_convert_string1 (string, coding_system, nocopy, 1);
4825 }
4826
4827 /* Encode or decode STRING according to CODING_SYSTEM.
4828    Do not set Vlast_coding_system_used.  */
4829
4830 Lisp_Object
4831 code_convert_string_norecord (string, coding_system, encodep)
4832      Lisp_Object string, coding_system;
4833      int encodep;
4834 {
4835   struct coding_system coding;
4836
4837   CHECK_STRING (string, 0);
4838   CHECK_SYMBOL (coding_system, 1);
4839
4840   if (NILP (coding_system))
4841     return string;
4842
4843   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
4844     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
4845
4846   coding.mode |= CODING_MODE_LAST_BLOCK;
4847   return code_convert_string (string, &coding, encodep, Qt);
4848 }
4849 \f
4850 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
4851   "Decode a JISX0208 character of shift-jis encoding.\n\
4852 CODE is the character code in SJIS.\n\
4853 Return the corresponding character.")
4854   (code)
4855      Lisp_Object code;
4856 {
4857   unsigned char c1, c2, s1, s2;
4858   Lisp_Object val;
4859
4860   CHECK_NUMBER (code, 0);
4861   s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
4862   DECODE_SJIS (s1, s2, c1, c2);
4863   XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset_jisx0208, c1, c2));
4864   return val;
4865 }
4866
4867 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
4868   "Encode a JISX0208 character CHAR to SJIS coding system.\n\
4869 Return the corresponding character code in SJIS.")
4870   (ch)
4871      Lisp_Object ch;
4872 {
4873   int charset, c1, c2, s1, s2;
4874   Lisp_Object val;
4875
4876   CHECK_NUMBER (ch, 0);
4877   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
4878   if (charset == charset_jisx0208)
4879     {
4880       ENCODE_SJIS (c1, c2, s1, s2);
4881       XSETFASTINT (val, (s1 << 8) | s2);
4882     }
4883   else
4884     XSETFASTINT (val, 0);
4885   return val;
4886 }
4887
4888 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
4889   "Decode a Big5 character CODE of BIG5 coding system.\n\
4890 CODE is the character code in BIG5.\n\
4891 Return the corresponding character.")
4892   (code)
4893      Lisp_Object code;
4894 {
4895   int charset;
4896   unsigned char b1, b2, c1, c2;
4897   Lisp_Object val;
4898
4899   CHECK_NUMBER (code, 0);
4900   b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
4901   DECODE_BIG5 (b1, b2, charset, c1, c2);
4902   XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset, c1, c2));
4903   return val;
4904 }
4905
4906 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
4907   "Encode the Big5 character CHAR to BIG5 coding system.\n\
4908 Return the corresponding character code in Big5.")
4909   (ch)
4910      Lisp_Object ch;
4911 {
4912   int charset, c1, c2, b1, b2;
4913   Lisp_Object val;
4914
4915   CHECK_NUMBER (ch, 0);
4916   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
4917   if (charset == charset_big5_1 || charset == charset_big5_2)
4918     {
4919       ENCODE_BIG5 (charset, c1, c2, b1, b2);
4920       XSETFASTINT (val, (b1 << 8) | b2);
4921     }
4922   else
4923     XSETFASTINT (val, 0);
4924   return val;
4925 }
4926 \f
4927 DEFUN ("set-terminal-coding-system-internal",
4928        Fset_terminal_coding_system_internal,
4929        Sset_terminal_coding_system_internal, 1, 1, 0, "")
4930   (coding_system)
4931      Lisp_Object coding_system;
4932 {
4933   CHECK_SYMBOL (coding_system, 0);
4934   setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
4935   /* We had better not send unsafe characters to terminal.  */
4936   terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
4937
4938   return Qnil;
4939 }
4940
4941 DEFUN ("set-safe-terminal-coding-system-internal",
4942        Fset_safe_terminal_coding_system_internal,
4943        Sset_safe_terminal_coding_system_internal, 1, 1, 0, "")
4944   (coding_system)
4945      Lisp_Object coding_system;
4946 {
4947   CHECK_SYMBOL (coding_system, 0);
4948   setup_coding_system (Fcheck_coding_system (coding_system),
4949                        &safe_terminal_coding);
4950   return Qnil;
4951 }
4952
4953 DEFUN ("terminal-coding-system",
4954        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
4955   "Return coding system specified for terminal output.")
4956   ()
4957 {
4958   return terminal_coding.symbol;
4959 }
4960
4961 DEFUN ("set-keyboard-coding-system-internal",
4962        Fset_keyboard_coding_system_internal,
4963        Sset_keyboard_coding_system_internal, 1, 1, 0, "")
4964   (coding_system)
4965      Lisp_Object coding_system;
4966 {
4967   CHECK_SYMBOL (coding_system, 0);
4968   setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
4969   return Qnil;
4970 }
4971
4972 DEFUN ("keyboard-coding-system",
4973        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
4974   "Return coding system specified for decoding keyboard input.")
4975   ()
4976 {
4977   return keyboard_coding.symbol;
4978 }
4979
4980 \f
4981 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
4982        Sfind_operation_coding_system,  1, MANY, 0,
4983   "Choose a coding system for an operation based on the target name.\n\
4984 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).\n\
4985 DECODING-SYSTEM is the coding system to use for decoding\n\
4986 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system\n\
4987 for encoding (in case OPERATION does encoding).\n\
4988 \n\
4989 The first argument OPERATION specifies an I/O primitive:\n\
4990   For file I/O, `insert-file-contents' or `write-region'.\n\
4991   For process I/O, `call-process', `call-process-region', or `start-process'.\n\
4992   For network I/O, `open-network-stream'.\n\
4993 \n\
4994 The remaining arguments should be the same arguments that were passed\n\
4995 to the primitive.  Depending on which primitive, one of those arguments\n\
4996 is selected as the TARGET.  For example, if OPERATION does file I/O,\n\
4997 whichever argument specifies the file name is TARGET.\n\
4998 \n\
4999 TARGET has a meaning which depends on OPERATION:\n\
5000   For file I/O, TARGET is a file name.\n\
5001   For process I/O, TARGET is a process name.\n\
5002   For network I/O, TARGET is a service name or a port number\n\
5003 \n\
5004 This function looks up what specified for TARGET in,\n\
5005 `file-coding-system-alist', `process-coding-system-alist',\n\
5006 or `network-coding-system-alist' depending on OPERATION.\n\
5007 They may specify a coding system, a cons of coding systems,\n\
5008 or a function symbol to call.\n\
5009 In the last case, we call the function with one argument,\n\
5010 which is a list of all the arguments given to this function.")
5011   (nargs, args)
5012      int nargs;
5013      Lisp_Object *args;
5014 {
5015   Lisp_Object operation, target_idx, target, val;
5016   register Lisp_Object chain;
5017
5018   if (nargs < 2)
5019     error ("Too few arguments");
5020   operation = args[0];
5021   if (!SYMBOLP (operation)
5022       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
5023     error ("Invalid first arguement");
5024   if (nargs < 1 + XINT (target_idx))
5025     error ("Too few arguments for operation: %s",
5026            XSYMBOL (operation)->name->data);
5027   target = args[XINT (target_idx) + 1];
5028   if (!(STRINGP (target)
5029         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
5030     error ("Invalid %dth argument", XINT (target_idx) + 1);
5031
5032   chain = ((EQ (operation, Qinsert_file_contents)
5033             || EQ (operation, Qwrite_region))
5034            ? Vfile_coding_system_alist
5035            : (EQ (operation, Qopen_network_stream)
5036               ? Vnetwork_coding_system_alist
5037               : Vprocess_coding_system_alist));
5038   if (NILP (chain))
5039     return Qnil;
5040
5041   for (; CONSP (chain); chain = XCONS (chain)->cdr)
5042     {
5043       Lisp_Object elt;
5044       elt = XCONS (chain)->car;
5045
5046       if (CONSP (elt)
5047           && ((STRINGP (target)
5048                && STRINGP (XCONS (elt)->car)
5049                && fast_string_match (XCONS (elt)->car, target) >= 0)
5050               || (INTEGERP (target) && EQ (target, XCONS (elt)->car))))
5051         {
5052           val = XCONS (elt)->cdr;
5053           /* Here, if VAL is both a valid coding system and a valid
5054              function symbol, we return VAL as a coding system.  */
5055           if (CONSP (val))
5056             return val;
5057           if (! SYMBOLP (val))
5058             return Qnil;
5059           if (! NILP (Fcoding_system_p (val)))
5060             return Fcons (val, val);
5061           if (! NILP (Ffboundp (val)))
5062             {
5063               val = call1 (val, Flist (nargs, args));
5064               if (CONSP (val))
5065                 return val;
5066               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
5067                 return Fcons (val, val);
5068             }
5069           return Qnil;
5070         }
5071     }
5072   return Qnil;
5073 }
5074
5075 DEFUN ("update-coding-systems-internal",  Fupdate_coding_systems_internal,
5076        Supdate_coding_systems_internal, 0, 0, 0,
5077   "Update internal database for ISO2022 and CCL based coding systems.\n\
5078 When values of the following coding categories are changed, you must\n\
5079 call this function:\n\
5080   coding-category-iso-7, coding-category-iso-7-tight,\n\
5081   coding-category-iso-8-1, coding-category-iso-8-2,\n\
5082   coding-category-iso-7-else, coding-category-iso-8-else,\n\
5083   coding-category-ccl")
5084   ()
5085 {
5086   int i;
5087
5088   for (i = CODING_CATEGORY_IDX_ISO_7; i <= CODING_CATEGORY_IDX_CCL; i++)
5089     {
5090       Lisp_Object val;
5091
5092       val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[i])->value;
5093       if (!NILP (val))
5094         {
5095           if (! coding_system_table[i])
5096             coding_system_table[i] = ((struct coding_system *)
5097                                       xmalloc (sizeof (struct coding_system)));
5098           setup_coding_system (val, coding_system_table[i]);
5099         }
5100       else if (coding_system_table[i])
5101         {
5102           xfree (coding_system_table[i]);
5103           coding_system_table[i] = NULL;
5104         }
5105     }
5106
5107   return Qnil;
5108 }
5109
5110 DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal,
5111        Sset_coding_priority_internal, 0, 0, 0,
5112   "Update internal database for the current value of `coding-category-list'.\n\
5113 This function is internal use only.")
5114   ()
5115 {
5116   int i = 0, idx;
5117   Lisp_Object val;
5118
5119   val = Vcoding_category_list;
5120
5121   while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
5122     {
5123       if (! SYMBOLP (XCONS (val)->car))
5124         break;
5125       idx = XFASTINT (Fget (XCONS (val)->car, Qcoding_category_index));
5126       if (idx >= CODING_CATEGORY_IDX_MAX)
5127         break;
5128       coding_priorities[i++] = (1 << idx);
5129       val = XCONS (val)->cdr;
5130     }
5131   /* If coding-category-list is valid and contains all coding
5132      categories, `i' should be CODING_CATEGORY_IDX_MAX now.  If not,
5133      the following code saves Emacs from craching.  */
5134   while (i < CODING_CATEGORY_IDX_MAX)
5135     coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
5136
5137   return Qnil;
5138 }
5139
5140 #endif /* emacs */
5141
5142 \f
5143 /*** 9. Post-amble ***/
5144
5145 void
5146 init_coding ()
5147 {
5148   conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE);
5149 }
5150
5151 void
5152 init_coding_once ()
5153 {
5154   int i;
5155
5156   /* Emacs' internal format specific initialize routine.  */
5157   for (i = 0; i <= 0x20; i++)
5158     emacs_code_class[i] = EMACS_control_code;
5159   emacs_code_class[0x0A] = EMACS_linefeed_code;
5160   emacs_code_class[0x0D] = EMACS_carriage_return_code;
5161   for (i = 0x21 ; i < 0x7F; i++)
5162     emacs_code_class[i] = EMACS_ascii_code;
5163   emacs_code_class[0x7F] = EMACS_control_code;
5164   emacs_code_class[0x80] = EMACS_leading_code_composition;
5165   for (i = 0x81; i < 0xFF; i++)
5166     emacs_code_class[i] = EMACS_invalid_code;
5167   emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
5168   emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
5169   emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
5170   emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
5171
5172   /* ISO2022 specific initialize routine.  */
5173   for (i = 0; i < 0x20; i++)
5174     iso_code_class[i] = ISO_control_code;
5175   for (i = 0x21; i < 0x7F; i++)
5176     iso_code_class[i] = ISO_graphic_plane_0;
5177   for (i = 0x80; i < 0xA0; i++)
5178     iso_code_class[i] = ISO_control_code;
5179   for (i = 0xA1; i < 0xFF; i++)
5180     iso_code_class[i] = ISO_graphic_plane_1;
5181   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
5182   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
5183   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
5184   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
5185   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
5186   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
5187   iso_code_class[ISO_CODE_ESC] = ISO_escape;
5188   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
5189   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
5190   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
5191
5192   conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE;
5193
5194   setup_coding_system (Qnil, &keyboard_coding);
5195   setup_coding_system (Qnil, &terminal_coding);
5196   setup_coding_system (Qnil, &safe_terminal_coding);
5197   setup_coding_system (Qnil, &default_buffer_file_coding);
5198
5199   bzero (coding_system_table, sizeof coding_system_table);
5200
5201   bzero (ascii_skip_code, sizeof ascii_skip_code);
5202   for (i = 0; i < 128; i++)
5203     ascii_skip_code[i] = 1;
5204
5205 #if defined (MSDOS) || defined (WINDOWSNT)
5206   system_eol_type = CODING_EOL_CRLF;
5207 #else
5208   system_eol_type = CODING_EOL_LF;
5209 #endif
5210 }
5211
5212 #ifdef emacs
5213
5214 void
5215 syms_of_coding ()
5216 {
5217   Qtarget_idx = intern ("target-idx");
5218   staticpro (&Qtarget_idx);
5219
5220   Qcoding_system_history = intern ("coding-system-history");
5221   staticpro (&Qcoding_system_history);
5222   Fset (Qcoding_system_history, Qnil);
5223
5224   /* Target FILENAME is the first argument.  */
5225   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
5226   /* Target FILENAME is the third argument.  */
5227   Fput (Qwrite_region, Qtarget_idx, make_number (2));
5228
5229   Qcall_process = intern ("call-process");
5230   staticpro (&Qcall_process);
5231   /* Target PROGRAM is the first argument.  */
5232   Fput (Qcall_process, Qtarget_idx, make_number (0));
5233
5234   Qcall_process_region = intern ("call-process-region");
5235   staticpro (&Qcall_process_region);
5236   /* Target PROGRAM is the third argument.  */
5237   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
5238
5239   Qstart_process = intern ("start-process");
5240   staticpro (&Qstart_process);
5241   /* Target PROGRAM is the third argument.  */
5242   Fput (Qstart_process, Qtarget_idx, make_number (2));
5243
5244   Qopen_network_stream = intern ("open-network-stream");
5245   staticpro (&Qopen_network_stream);
5246   /* Target SERVICE is the fourth argument.  */
5247   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
5248
5249   Qcoding_system = intern ("coding-system");
5250   staticpro (&Qcoding_system);
5251
5252   Qeol_type = intern ("eol-type");
5253   staticpro (&Qeol_type);
5254
5255   Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
5256   staticpro (&Qbuffer_file_coding_system);
5257
5258   Qpost_read_conversion = intern ("post-read-conversion");
5259   staticpro (&Qpost_read_conversion);
5260
5261   Qpre_write_conversion = intern ("pre-write-conversion");
5262   staticpro (&Qpre_write_conversion);
5263
5264   Qno_conversion = intern ("no-conversion");
5265   staticpro (&Qno_conversion);
5266
5267   Qundecided = intern ("undecided");
5268   staticpro (&Qundecided);
5269
5270   Qcoding_system_p = intern ("coding-system-p");
5271   staticpro (&Qcoding_system_p);
5272
5273   Qcoding_system_error = intern ("coding-system-error");
5274   staticpro (&Qcoding_system_error);
5275
5276   Fput (Qcoding_system_error, Qerror_conditions,
5277         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
5278   Fput (Qcoding_system_error, Qerror_message,
5279         build_string ("Invalid coding system"));
5280
5281   Qcoding_category = intern ("coding-category");
5282   staticpro (&Qcoding_category);
5283   Qcoding_category_index = intern ("coding-category-index");
5284   staticpro (&Qcoding_category_index);
5285
5286   Vcoding_category_table
5287     = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
5288   staticpro (&Vcoding_category_table);
5289   {
5290     int i;
5291     for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
5292       {
5293         XVECTOR (Vcoding_category_table)->contents[i]
5294           = intern (coding_category_name[i]);
5295         Fput (XVECTOR (Vcoding_category_table)->contents[i],
5296               Qcoding_category_index, make_number (i));
5297       }
5298   }
5299
5300   Qtranslation_table = intern ("translation-table");
5301   staticpro (&Qtranslation_table);
5302   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
5303
5304   Qtranslation_table_id = intern ("translation-table-id");
5305   staticpro (&Qtranslation_table_id);
5306
5307   Qtranslation_table_for_decode = intern ("translation-table-for-decode");
5308   staticpro (&Qtranslation_table_for_decode);
5309
5310   Qtranslation_table_for_encode = intern ("translation-table-for-encode");
5311   staticpro (&Qtranslation_table_for_encode);
5312
5313   Qsafe_charsets = intern ("safe-charsets");
5314   staticpro (&Qsafe_charsets);
5315
5316   Qvalid_codes = intern ("valid-codes");
5317   staticpro (&Qvalid_codes);
5318
5319   Qemacs_mule = intern ("emacs-mule");
5320   staticpro (&Qemacs_mule);
5321
5322   Qraw_text = intern ("raw-text");
5323   staticpro (&Qraw_text);
5324
5325   defsubr (&Scoding_system_p);
5326   defsubr (&Sread_coding_system);
5327   defsubr (&Sread_non_nil_coding_system);
5328   defsubr (&Scheck_coding_system);
5329   defsubr (&Sdetect_coding_region);
5330   defsubr (&Sdetect_coding_string);
5331   defsubr (&Sdecode_coding_region);
5332   defsubr (&Sencode_coding_region);
5333   defsubr (&Sdecode_coding_string);
5334   defsubr (&Sencode_coding_string);
5335   defsubr (&Sdecode_sjis_char);
5336   defsubr (&Sencode_sjis_char);
5337   defsubr (&Sdecode_big5_char);
5338   defsubr (&Sencode_big5_char);
5339   defsubr (&Sset_terminal_coding_system_internal);
5340   defsubr (&Sset_safe_terminal_coding_system_internal);
5341   defsubr (&Sterminal_coding_system);
5342   defsubr (&Sset_keyboard_coding_system_internal);
5343   defsubr (&Skeyboard_coding_system);
5344   defsubr (&Sfind_operation_coding_system);
5345   defsubr (&Supdate_coding_systems_internal);
5346   defsubr (&Sset_coding_priority_internal);
5347
5348   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
5349     "List of coding systems.\n\
5350 \n\
5351 Do not alter the value of this variable manually.  This variable should be\n\
5352 updated by the functions `make-coding-system' and\n\
5353 `define-coding-system-alias'.");
5354   Vcoding_system_list = Qnil;
5355
5356   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
5357     "Alist of coding system names.\n\
5358 Each element is one element list of coding system name.\n\
5359 This variable is given to `completing-read' as TABLE argument.\n\
5360 \n\
5361 Do not alter the value of this variable manually.  This variable should be\n\
5362 updated by the functions `make-coding-system' and\n\
5363 `define-coding-system-alias'.");
5364   Vcoding_system_alist = Qnil;
5365
5366   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
5367     "List of coding-categories (symbols) ordered by priority.");
5368   {
5369     int i;
5370
5371     Vcoding_category_list = Qnil;
5372     for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
5373       Vcoding_category_list
5374         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
5375                  Vcoding_category_list);
5376   }
5377
5378   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
5379     "Specify the coding system for read operations.\n\
5380 It is useful to bind this variable with `let', but do not set it globally.\n\
5381 If the value is a coding system, it is used for decoding on read operation.\n\
5382 If not, an appropriate element is used from one of the coding system alists:\n\
5383 There are three such tables, `file-coding-system-alist',\n\
5384 `process-coding-system-alist', and `network-coding-system-alist'.");
5385   Vcoding_system_for_read = Qnil;
5386
5387   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
5388     "Specify the coding system for write operations.\n\
5389 It is useful to bind this variable with `let', but do not set it globally.\n\
5390 If the value is a coding system, it is used for encoding on write operation.\n\
5391 If not, an appropriate element is used from one of the coding system alists:\n\
5392 There are three such tables, `file-coding-system-alist',\n\
5393 `process-coding-system-alist', and `network-coding-system-alist'.");
5394   Vcoding_system_for_write = Qnil;
5395
5396   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
5397     "Coding system used in the latest file or process I/O.");
5398   Vlast_coding_system_used = Qnil;
5399
5400   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
5401     "*Non-nil inhibit code conversion of end-of-line format in any cases.");
5402   inhibit_eol_conversion = 0;
5403
5404   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
5405     "Non-nil means process buffer inherits coding system of process output.\n\
5406 Bind it to t if the process output is to be treated as if it were a file\n\
5407 read from some filesystem.");
5408   inherit_process_coding_system = 0;
5409
5410   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
5411     "Alist to decide a coding system to use for a file I/O operation.\n\
5412 The format is ((PATTERN . VAL) ...),\n\
5413 where PATTERN is a regular expression matching a file name,\n\
5414 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5415 If VAL is a coding system, it is used for both decoding and encoding\n\
5416 the file contents.\n\
5417 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5418 and the cdr part is used for encoding.\n\
5419 If VAL is a function symbol, the function must return a coding system\n\
5420 or a cons of coding systems which are used as above.\n\
5421 \n\
5422 See also the function `find-operation-coding-system'\n\
5423 and the variable `auto-coding-alist'.");
5424   Vfile_coding_system_alist = Qnil;
5425
5426   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
5427     "Alist to decide a coding system to use for a process I/O operation.\n\
5428 The format is ((PATTERN . VAL) ...),\n\
5429 where PATTERN is a regular expression matching a program name,\n\
5430 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5431 If VAL is a coding system, it is used for both decoding what received\n\
5432 from the program and encoding what sent to the program.\n\
5433 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5434 and the cdr part is used for encoding.\n\
5435 If VAL is a function symbol, the function must return a coding system\n\
5436 or a cons of coding systems which are used as above.\n\
5437 \n\
5438 See also the function `find-operation-coding-system'.");
5439   Vprocess_coding_system_alist = Qnil;
5440
5441   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
5442     "Alist to decide a coding system to use for a network I/O operation.\n\
5443 The format is ((PATTERN . VAL) ...),\n\
5444 where PATTERN is a regular expression matching a network service name\n\
5445 or is a port number to connect to,\n\
5446 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5447 If VAL is a coding system, it is used for both decoding what received\n\
5448 from the network stream and encoding what sent to the network stream.\n\
5449 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5450 and the cdr part is used for encoding.\n\
5451 If VAL is a function symbol, the function must return a coding system\n\
5452 or a cons of coding systems which are used as above.\n\
5453 \n\
5454 See also the function `find-operation-coding-system'.");
5455   Vnetwork_coding_system_alist = Qnil;
5456
5457   DEFVAR_INT ("eol-mnemonic-unix", &eol_mnemonic_unix,
5458     "Mnemonic character indicating UNIX-like end-of-line format (i.e. LF) .");
5459   eol_mnemonic_unix = ':';
5460
5461   DEFVAR_INT ("eol-mnemonic-dos", &eol_mnemonic_dos,
5462     "Mnemonic character indicating DOS-like end-of-line format (i.e. CRLF).");
5463   eol_mnemonic_dos = '\\';
5464
5465   DEFVAR_INT ("eol-mnemonic-mac", &eol_mnemonic_mac,
5466     "Mnemonic character indicating MAC-like end-of-line format (i.e. CR).");
5467   eol_mnemonic_mac = '/';
5468
5469   DEFVAR_INT ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
5470     "Mnemonic character indicating end-of-line format is not yet decided.");
5471   eol_mnemonic_undecided = ':';
5472
5473   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
5474     "*Non-nil enables character translation while encoding and decoding.");
5475   Venable_character_translation = Qt;
5476
5477   DEFVAR_LISP ("standard-translation-table-for-decode",
5478     &Vstandard_translation_table_for_decode,
5479     "Table for translating characters while decoding.");
5480   Vstandard_translation_table_for_decode = Qnil;
5481
5482   DEFVAR_LISP ("standard-translation-table-for-encode",
5483     &Vstandard_translation_table_for_encode,
5484     "Table for translationg characters while encoding.");
5485   Vstandard_translation_table_for_encode = Qnil;
5486
5487   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
5488     "Alist of charsets vs revision numbers.\n\
5489 While encoding, if a charset (car part of an element) is found,\n\
5490 designate it with the escape sequence identifing revision (cdr part of the element).");
5491   Vcharset_revision_alist = Qnil;
5492
5493   DEFVAR_LISP ("default-process-coding-system",
5494                &Vdefault_process_coding_system,
5495     "Cons of coding systems used for process I/O by default.\n\
5496 The car part is used for decoding a process output,\n\
5497 the cdr part is used for encoding a text to be sent to a process.");
5498   Vdefault_process_coding_system = Qnil;
5499
5500   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
5501     "Table of extra Latin codes in the range 128..159 (inclusive).\n\
5502 This is a vector of length 256.\n\
5503 If Nth element is non-nil, the existence of code N in a file\n\
5504 \(or output of subprocess) doesn't prevent it to be detected as\n\
5505 a coding system of ISO 2022 variant which has a flag\n\
5506 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file\n\
5507 or reading output of a subprocess.\n\
5508 Only 128th through 159th elements has a meaning.");
5509   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
5510
5511   DEFVAR_LISP ("select-safe-coding-system-function",
5512                &Vselect_safe_coding_system_function,
5513     "Function to call to select safe coding system for encoding a text.\n\
5514 \n\
5515 If set, this function is called to force a user to select a proper\n\
5516 coding system which can encode the text in the case that a default\n\
5517 coding system used in each operation can't encode the text.\n\
5518 \n\
5519 The default value is `select-safe-coding-system' (which see).");
5520   Vselect_safe_coding_system_function = Qnil;
5521
5522 }
5523
5524 #endif /* emacs */