src/coding.c

   1 /* Coding system handler (conversion, detection, and etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /*** TABLE OF CONTENTS ***
  23
  24   1. Preamble
  25   2. Emacs' internal format (emacs-mule) handlers
  26   3. ISO2022 handlers
  27   4. Shift-JIS and BIG5 handlers
  28   5. CCL handlers
  29   6. End-of-line handlers
  30   7. C library functions
  31   8. Emacs Lisp library functions
  32   9. Post-amble
  33
  34 */
  35
  36 /*** GENERAL NOTE on CODING SYSTEM ***
  37
  38   Coding system is an encoding mechanism of one or more character
  39   sets.  Here's a list of coding systems which Emacs can handle.  When
  40   we say "decode", it means converting some other coding system to
  41   Emacs' internal format (emacs-internal), and when we say "encode",
  42   it means converting the coding system emacs-mule to some other
  43   coding system.
  44
  45   0. Emacs' internal format (emacs-mule)
  46
  47   Emacs itself holds a multi-lingual character in a buffer and a string
  48   in a special format.  Details are described in section 2.
  49
  50   1. ISO2022
  51
  52   The most famous coding system for multiple character sets.  X's
  53   Compound Text, various EUCs (Extended Unix Code), and coding
  54   systems used in Internet communication such as ISO-2022-JP are
  55   all variants of ISO2022.  Details are described in section 3.
  56
  57   2. SJIS (or Shift-JIS or MS-Kanji-Code)
  58
  59   A coding system to encode character sets: ASCII, JISX0201, and
  60   JISX0208.  Widely used for PC's in Japan.  Details are described in
  61   section 4.
  62
  63   3. BIG5
  64
  65   A coding system to encode character sets: ASCII and Big5.  Widely
  66   used by Chinese (mainly in Taiwan and Hong Kong).  Details are
  67   described in section 4.  In this file, when we write "BIG5"
  68   (all uppercase), we mean the coding system, and when we write
  69   "Big5" (capitalized), we mean the character set.
  70
  71   4. Raw text
  72
  73   A coding system for a text containing random 8-bit code.  Emacs does
  74   no code conversion on such a text except for end-of-line format.
  75
  76   5. Other
  77
  78   If a user wants to read/write a text encoded in a coding system not
  79   listed above, he can supply a decoder and an encoder for it in CCL
  80   (Code Conversion Language) programs.  Emacs executes the CCL program
  81   while reading/writing.
  82
  83   Emacs represents a coding system by a Lisp symbol that has a property
  84   `coding-system'.  But, before actually using the coding system, the
  85   information about it is set in a structure of type `struct
  86   coding_system' for rapid processing.  See section 6 for more details.
  87
  88 */
  89
  90 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
  91
  92   How end-of-line of a text is encoded depends on a system.  For
  93   instance, Unix's format is just one byte of `line-feed' code,
  94   whereas DOS's format is two-byte sequence of `carriage-return' and
  95   `line-feed' codes.  MacOS's format is usually one byte of
  96   `carriage-return'.
  97
  98   Since text characters encoding and end-of-line encoding are
  99   independent, any coding system described above can take
 100   any format of end-of-line.  So, Emacs has information of format of
 101   end-of-line in each coding-system.  See section 6 for more details.
 102
 103 */
 104
 105 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 106
 107   These functions check if a text between SRC and SRC_END is encoded
 108   in the coding system category XXX.  Each returns an integer value in
 109   which appropriate flag bits for the category XXX is set.  The flag
 110   bits are defined in macros CODING_CATEGORY_MASK_XXX.  Below is the
 111   template of these functions.  */
 112 #if 0
 113 int
 114 detect_coding_emacs_mule (src, src_end)
 115      unsigned char *src, *src_end;
 116 {
 117   ...
 118 }
 119 #endif
 120
 121 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 122
 123   These functions decode SRC_BYTES length text at SOURCE encoded in
 124   CODING to Emacs' internal format (emacs-mule).  The resulting text
 125   goes to a place pointed to by DESTINATION, the length of which
 126   should not exceed DST_BYTES.  These functions set the information of
 127   original and decoded texts in the members produced, produced_char,
 128   consumed, and consumed_char of the structure *CODING.
 129
 130   The return value is an integer (CODING_FINISH_XXX) indicating how
 131   the decoding finished.
 132
 133   DST_BYTES zero means that source area and destination area are
 134   overlapped, which means that we can produce a decoded text until it
 135   reaches at the head of not-yet-decoded source text.
 136
 137   Below is a template of these functions.  */
 138 #if 0
 139 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 140      struct coding_system *coding;
 141      unsigned char *source, *destination;
 142      int src_bytes, dst_bytes;
 143 {
 144   ...
 145 }
 146 #endif
 147
 148 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 149
 150   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 151   internal format (emacs-mule) to CODING.  The resulting text goes to
 152   a place pointed to by DESTINATION, the length of which should not
 153   exceed DST_BYTES.  These functions set the information of
 154   original and encoded texts in the members produced, produced_char,
 155   consumed, and consumed_char of the structure *CODING.
 156
 157   The return value is an integer (CODING_FINISH_XXX) indicating how
 158   the encoding finished.
 159
 160   DST_BYTES zero means that source area and destination area are
 161   overlapped, which means that we can produce a decoded text until it
 162   reaches at the head of not-yet-decoded source text.
 163
 164   Below is a template of these functions.  */
 165 #if 0
 166 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 167      struct coding_system *coding;
 168      unsigned char *source, *destination;
 169      int src_bytes, dst_bytes;
 170 {
 171   ...
 172 }
 173 #endif
 174
 175 /*** COMMONLY USED MACROS ***/
 176
 177 /* The following three macros ONE_MORE_BYTE, TWO_MORE_BYTES, and
 178    THREE_MORE_BYTES safely get one, two, and three bytes from the
 179    source text respectively.  If there are not enough bytes in the
 180    source, they jump to `label_end_of_loop'.  The caller should set
 181    variables `src' and `src_end' to appropriate areas in advance.  */
 182
 183 #define ONE_MORE_BYTE(c1)       \
 184   do {                          \
 185     if (src < src_end)          \
 186       c1 = *src++;              \
 187     else                        \
 188       goto label_end_of_loop;   \
 189   } while (0)
 190
 191 #define TWO_MORE_BYTES(c1, c2)  \
 192   do {                          \
 193     if (src + 1 < src_end)      \
 194       c1 = *src++, c2 = *src++; \
 195     else                        \
 196       goto label_end_of_loop;   \
 197   } while (0)
 198
 199 #define THREE_MORE_BYTES(c1, c2, c3)            \
 200   do {                                          \
 201     if (src + 2 < src_end)                      \
 202       c1 = *src++, c2 = *src++, c3 = *src++;    \
 203     else                                        \
 204       goto label_end_of_loop;                   \
 205   } while (0)
 206
 207 /* The following three macros DECODE_CHARACTER_ASCII,
 208    DECODE_CHARACTER_DIMENSION1, and DECODE_CHARACTER_DIMENSION2 put
 209    the multi-byte form of a character of each class at the place
 210    pointed by `dst'.  The caller should set the variable `dst' to
 211    point to an appropriate area and the variable `coding' to point to
 212    the coding-system of the currently decoding text in advance.  */
 213
 214 /* Decode one ASCII character C.  */
 215
 216 #define DECODE_CHARACTER_ASCII(c)                               \
 217   do {                                                          \
 218     if (COMPOSING_P (coding->composing))                        \
 219       *dst++ = 0xA0, *dst++ = (c) | 0x80;                       \
 220     else                                                        \
 221       {                                                         \
 222         *dst++ = (c);                                           \
 223         coding->produced_char++;                                \
 224       }                                                         \
 225   } while (0)
 226
 227 /* Decode one DIMENSION1 character whose charset is CHARSET and whose
 228    position-code is C.  */
 229
 230 #define DECODE_CHARACTER_DIMENSION1(charset, c)                         \
 231   do {                                                                  \
 232     unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset);   \
 233     if (COMPOSING_P (coding->composing))                                \
 234       *dst++ = leading_code + 0x20;                                     \
 235     else                                                                \
 236       {                                                                 \
 237         *dst++ = leading_code;                                          \
 238         coding->produced_char++;                                        \
 239       }                                                                 \
 240     if (leading_code = CHARSET_LEADING_CODE_EXT (charset))              \
 241       *dst++ = leading_code;                                            \
 242     *dst++ = (c) | 0x80;                                                \
 243   } while (0)
 244
 245 /* Decode one DIMENSION2 character whose charset is CHARSET and whose
 246    position-codes are C1 and C2.  */
 247
 248 #define DECODE_CHARACTER_DIMENSION2(charset, c1, c2)    \
 249   do {                                                  \
 250     DECODE_CHARACTER_DIMENSION1 (charset, c1);          \
 251     *dst++ = (c2) | 0x80;                               \
 252   } while (0)
 253
 254 \f
 255 /*** 1. Preamble ***/
 256
 257 #include <stdio.h>
 258
 259 #ifdef emacs
 260
 261 #include <config.h>
 262 #include "lisp.h"
 263 #include "buffer.h"
 264 #include "charset.h"
 265 #include "ccl.h"
 266 #include "coding.h"
 267 #include "window.h"
 268
 269 #else  /* not emacs */
 270
 271 #include "mulelib.h"
 272
 273 #endif /* not emacs */
 274
 275 Lisp_Object Qcoding_system, Qeol_type;
 276 Lisp_Object Qbuffer_file_coding_system;
 277 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 278 Lisp_Object Qno_conversion, Qundecided;
 279 Lisp_Object Qcoding_system_history;
 280 Lisp_Object Qsafe_charsets;
 281 Lisp_Object Qvalid_codes;
 282
 283 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 284 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 285 Lisp_Object Qstart_process, Qopen_network_stream;
 286 Lisp_Object Qtarget_idx;
 287
 288 Lisp_Object Vselect_safe_coding_system_function;
 289
 290 /* Mnemonic character of each format of end-of-line.  */
 291 int eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 292 /* Mnemonic character to indicate format of end-of-line is not yet
 293    decided.  */
 294 int eol_mnemonic_undecided;
 295
 296 /* Format of end-of-line decided by system.  This is CODING_EOL_LF on
 297    Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.  */
 298 int system_eol_type;
 299
 300 #ifdef emacs
 301
 302 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 303
 304 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 305
 306 /* Coding system emacs-mule and raw-text are for converting only
 307    end-of-line format.  */
 308 Lisp_Object Qemacs_mule, Qraw_text;
 309
 310 /* Coding-systems are handed between Emacs Lisp programs and C internal
 311    routines by the following three variables.  */
 312 /* Coding-system for reading files and receiving data from process.  */
 313 Lisp_Object Vcoding_system_for_read;
 314 /* Coding-system for writing files and sending data to process.  */
 315 Lisp_Object Vcoding_system_for_write;
 316 /* Coding-system actually used in the latest I/O.  */
 317 Lisp_Object Vlast_coding_system_used;
 318
 319 /* A vector of length 256 which contains information about special
 320    Latin codes (especially for dealing with Microsoft codes).  */
 321 Lisp_Object Vlatin_extra_code_table;
 322
 323 /* Flag to inhibit code conversion of end-of-line format.  */
 324 int inhibit_eol_conversion;
 325
 326 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 327 int inherit_process_coding_system;
 328
 329 /* Coding system to be used to encode text for terminal display.  */
 330 struct coding_system terminal_coding;
 331
 332 /* Coding system to be used to encode text for terminal display when
 333    terminal coding system is nil.  */
 334 struct coding_system safe_terminal_coding;
 335
 336 /* Coding system of what is sent from terminal keyboard.  */
 337 struct coding_system keyboard_coding;
 338
 339 /* Default coding system to be used to write a file.  */
 340 struct coding_system default_buffer_file_coding;
 341
 342 Lisp_Object Vfile_coding_system_alist;
 343 Lisp_Object Vprocess_coding_system_alist;
 344 Lisp_Object Vnetwork_coding_system_alist;
 345
 346 #endif /* emacs */
 347
 348 Lisp_Object Qcoding_category, Qcoding_category_index;
 349
 350 /* List of symbols `coding-category-xxx' ordered by priority.  */
 351 Lisp_Object Vcoding_category_list;
 352
 353 /* Table of coding categories (Lisp symbols).  */
 354 Lisp_Object Vcoding_category_table;
 355
 356 /* Table of names of symbol for each coding-category.  */
 357 char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
 358   "coding-category-emacs-mule",
 359   "coding-category-sjis",
 360   "coding-category-iso-7",
 361   "coding-category-iso-7-tight",
 362   "coding-category-iso-8-1",
 363   "coding-category-iso-8-2",
 364   "coding-category-iso-7-else",
 365   "coding-category-iso-8-else",
 366   "coding-category-big5",
 367   "coding-category-raw-text",
 368   "coding-category-binary",
 369   "coding-category-ccl"
 370 };
 371
 372 /* Table of pointers to coding systems corresponding to each coding
 373    categories.  */
 374 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
 375
 376 /* Table of coding category masks.  Nth element is a mask for a coding
 377    cateogry of which priority is Nth.  */
 378 static
 379 int coding_priorities[CODING_CATEGORY_IDX_MAX];
 380
 381 /* Flag to tell if we look up translation table on character code
 382    conversion.  */
 383 Lisp_Object Venable_character_translation;
 384 /* Standard translation table to look up on decoding (reading).  */
 385 Lisp_Object Vstandard_translation_table_for_decode;
 386 /* Standard translation table to look up on encoding (writing).  */
 387 Lisp_Object Vstandard_translation_table_for_encode;
 388
 389 Lisp_Object Qtranslation_table;
 390 Lisp_Object Qtranslation_table_id;
 391 Lisp_Object Qtranslation_table_for_decode;
 392 Lisp_Object Qtranslation_table_for_encode;
 393
 394 /* Alist of charsets vs revision number.  */
 395 Lisp_Object Vcharset_revision_alist;
 396
 397 /* Default coding systems used for process I/O.  */
 398 Lisp_Object Vdefault_process_coding_system;
 399
 400 \f
 401 /*** 2. Emacs internal format (emacs-mule) handlers ***/
 402
 403 /* Emacs' internal format for encoding multiple character sets is a
 404    kind of multi-byte encoding, i.e. characters are encoded by
 405    variable-length sequences of one-byte codes.  ASCII characters
 406    and control characters (e.g. `tab', `newline') are represented by
 407    one-byte sequences which are their ASCII codes, in the range 0x00
 408    through 0x7F.  The other characters are represented by a sequence
 409    of `base leading-code', optional `extended leading-code', and one
 410    or two `position-code's.  The length of the sequence is determined
 411    by the base leading-code.  Leading-code takes the range 0x80
 412    through 0x9F, whereas extended leading-code and position-code take
 413    the range 0xA0 through 0xFF.  See `charset.h' for more details
 414    about leading-code and position-code.
 415
 416    There's one exception to this rule.  Special leading-code
 417    `leading-code-composition' denotes that the following several
 418    characters should be composed into one character.  Leading-codes of
 419    components (except for ASCII) are added 0x20.  An ASCII character
 420    component is represented by a 2-byte sequence of `0xA0' and
 421    `ASCII-code + 0x80'.  See also the comments in `charset.h' for the
 422    details of composite character.  Hence, we can summarize the code
 423    range as follows:
 424
 425    --- CODE RANGE of Emacs' internal format ---
 426    (character set)      (range)
 427    ASCII                0x00 .. 0x7F
 428    ELSE (1st byte)      0x80 .. 0x9F
 429         (rest bytes)    0xA0 .. 0xFF
 430    ---------------------------------------------
 431
 432   */
 433
 434 enum emacs_code_class_type emacs_code_class[256];
 435
 436 /* Go to the next statement only if *SRC is accessible and the code is
 437    greater than 0xA0.  */
 438 #define CHECK_CODE_RANGE_A0_FF  \
 439   do {                          \
 440     if (src >= src_end)         \
 441       goto label_end_of_switch; \
 442     else if (*src++ < 0xA0)     \
 443       return 0;                 \
 444   } while (0)
 445
 446 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 447    Check if a text is encoded in Emacs' internal format.  If it is,
 448    return CODING_CATEGORY_MASK_EMACS_MULE, else return 0.  */
 449
 450 int
 451 detect_coding_emacs_mule (src, src_end)
 452      unsigned char *src, *src_end;
 453 {
 454   unsigned char c;
 455   int composing = 0;
 456
 457   while (src < src_end)
 458     {
 459       c = *src++;
 460
 461       if (composing)
 462         {
 463           if (c < 0xA0)
 464             composing = 0;
 465           else
 466             c -= 0x20;
 467         }
 468
 469       switch (emacs_code_class[c])
 470         {
 471         case EMACS_ascii_code:
 472         case EMACS_linefeed_code:
 473           break;
 474
 475         case EMACS_control_code:
 476           if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
 477             return 0;
 478           break;
 479
 480         case EMACS_invalid_code:
 481           return 0;
 482
 483         case EMACS_leading_code_composition: /* c == 0x80 */
 484           if (composing)
 485             CHECK_CODE_RANGE_A0_FF;
 486           else
 487             composing = 1;
 488           break;
 489
 490         case EMACS_leading_code_4:
 491           CHECK_CODE_RANGE_A0_FF;
 492           /* fall down to check it two more times ...  */
 493
 494         case EMACS_leading_code_3:
 495           CHECK_CODE_RANGE_A0_FF;
 496           /* fall down to check it one more time ...  */
 497
 498         case EMACS_leading_code_2:
 499           CHECK_CODE_RANGE_A0_FF;
 500           break;
 501
 502         default:
 503         label_end_of_switch:
 504           break;
 505         }
 506     }
 507   return CODING_CATEGORY_MASK_EMACS_MULE;
 508 }
 509
 510 \f
 511 /*** 3. ISO2022 handlers ***/
 512
 513 /* The following note describes the coding system ISO2022 briefly.
 514    Since the intention of this note is to help in understanding of
 515    the programs in this file, some parts are NOT ACCURATE or OVERLY
 516    SIMPLIFIED.  For the thorough understanding, please refer to the
 517    original document of ISO2022.
 518
 519    ISO2022 provides many mechanisms to encode several character sets
 520    in 7-bit and 8-bit environment.  If one chooses 7-bite environment,
 521    all text is encoded by codes of less than 128.  This may make the
 522    encoded text a little bit longer, but the text gets more stability
 523    to pass through several gateways (some of them strip off the MSB).
 524
 525    There are two kinds of character set: control character set and
 526    graphic character set.  The former contains control characters such
 527    as `newline' and `escape' to provide control functions (control
 528    functions are provided also by escape sequences).  The latter
 529    contains graphic characters such as ' A' and '-'.  Emacs recognizes
 530    two control character sets and many graphic character sets.
 531
 532    Graphic character sets are classified into one of the following
 533    four classes, DIMENSION1_CHARS94, DIMENSION1_CHARS96,
 534    DIMENSION2_CHARS94, DIMENSION2_CHARS96 according to the number of
 535    bytes (DIMENSION) and the number of characters in one dimension
 536    (CHARS) of the set.  In addition, each character set is assigned an
 537    identification tag (called "final character" and denoted as <F>
 538    here after) which is unique in each class.  <F> of each character
 539    set is decided by ECMA(*) when it is registered in ISO.  Code range
 540    of <F> is 0x30..0x7F (0x30..0x3F are for private use only).
 541
 542    Note (*): ECMA = European Computer Manufacturers Association
 543
 544    Here are examples of graphic character set [NAME(<F>)]:
 545         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
 546         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
 547         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
 548         o DIMENSION2_CHARS96 -- none for the moment
 549
 550    A code area (1byte=8bits) is divided into 4 areas, C0, GL, C1, and GR.
 551         C0 [0x00..0x1F] -- control character plane 0
 552         GL [0x20..0x7F] -- graphic character plane 0
 553         C1 [0x80..0x9F] -- control character plane 1
 554         GR [0xA0..0xFF] -- graphic character plane 1
 555
 556    A control character set is directly designated and invoked to C0 or
 557    C1 by an escape sequence.  The most common case is that ISO646's
 558    control character set is designated/invoked to C0 and ISO6429's
 559    control character set is designated/invoked to C1, and usually
 560    these designations/invocations are omitted in a coded text.  With
 561    7-bit environment, only C0 can be used, and a control character for
 562    C1 is encoded by an appropriate escape sequence to fit in the
 563    environment.  All control characters for C1 are defined the
 564    corresponding escape sequences.
 565
 566    A graphic character set is at first designated to one of four
 567    graphic registers (G0 through G3), then these graphic registers are
 568    invoked to GL or GR.  These designations and invocations can be
 569    done independently.  The most common case is that G0 is invoked to
 570    GL, G1 is invoked to GR, and ASCII is designated to G0, and usually
 571    these invocations and designations are omitted in a coded text.
 572    With 7-bit environment, only GL can be used.
 573
 574    When a graphic character set of CHARS94 is invoked to GL, code 0x20
 575    and 0x7F of GL area work as control characters SPACE and DEL
 576    respectively, and code 0xA0 and 0xFF of GR area should not be used.
 577
 578    There are two ways of invocation: locking-shift and single-shift.
 579    With locking-shift, the invocation lasts until the next different
 580    invocation, whereas with single-shift, the invocation works only
 581    for the following character and doesn't affect locking-shift.
 582    Invocations are done by the following control characters or escape
 583    sequences.
 584
 585    ----------------------------------------------------------------------
 586    function             control char    escape sequence description
 587    ----------------------------------------------------------------------
 588    SI  (shift-in)               0x0F    none            invoke G0 to GL
 589    SO  (shift-out)              0x0E    none            invoke G1 to GL
 590    LS2 (locking-shift-2)        none    ESC 'n'         invoke G2 into GL
 591    LS3 (locking-shift-3)        none    ESC 'o'         invoke G3 into GL
 592    SS2 (single-shift-2)         0x8E    ESC 'N'         invoke G2 into GL
 593    SS3 (single-shift-3)         0x8F    ESC 'O'         invoke G3 into GL
 594    ----------------------------------------------------------------------
 595    The first four are for locking-shift.  Control characters for these
 596    functions are defined by macros ISO_CODE_XXX in `coding.h'.
 597
 598    Designations are done by the following escape sequences.
 599    ----------------------------------------------------------------------
 600    escape sequence      description
 601    ----------------------------------------------------------------------
 602    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
 603    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
 604    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
 605    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
 606    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
 607    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
 608    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
 609    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
 610    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
 611    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
 612    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
 613    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
 614    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
 615    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
 616    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
 617    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
 618    ----------------------------------------------------------------------
 619
 620    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
 621    of dimension 1, chars 94, and final character <F>, and etc.
 622
 623    Note (*): Although these designations are not allowed in ISO2022,
 624    Emacs accepts them on decoding, and produces them on encoding
 625    CHARS96 character set in a coding system which is characterized as
 626    7-bit environment, non-locking-shift, and non-single-shift.
 627
 628    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
 629    '(' can be omitted.  We call this as "short-form" here after.
 630
 631    Now you may notice that there are a lot of ways for encoding the
 632    same multilingual text in ISO2022.  Actually, there exists many
 633    coding systems such as Compound Text (used in X's inter client
 634    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
 635    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
 636    localized platforms), and all of these are variants of ISO2022.
 637
 638    In addition to the above, Emacs handles two more kinds of escape
 639    sequences: ISO6429's direction specification and Emacs' private
 640    sequence for specifying character composition.
 641
 642    ISO6429's direction specification takes the following format:
 643         o CSI ']'      -- end of the current direction
 644         o CSI '0' ']'  -- end of the current direction
 645         o CSI '1' ']'  -- start of left-to-right text
 646         o CSI '2' ']'  -- start of right-to-left text
 647    The control character CSI (0x9B: control sequence introducer) is
 648    abbreviated to the escape sequence ESC '[' in 7-bit environment.
 649
 650    Character composition specification takes the following format:
 651         o ESC '0' -- start character composition
 652         o ESC '1' -- end character composition
 653    Since these are not standard escape sequences of any ISO, the use
 654    of them for these meaning is restricted to Emacs only.  */
 655
 656 enum iso_code_class_type iso_code_class[256];
 657
 658 #define CHARSET_OK(idx, charset)                                \
 659   (coding_system_table[idx]                                     \
 660    && (coding_system_table[idx]->safe_charsets[charset]         \
 661        || (CODING_SPEC_ISO_REQUESTED_DESIGNATION                \
 662             (coding_system_table[idx], charset)                 \
 663            != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)))
 664
 665 #define SHIFT_OUT_OK(idx) \
 666   (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
 667
 668 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 669    Check if a text is encoded in ISO2022.  If it is, returns an
 670    integer in which appropriate flag bits any of:
 671         CODING_CATEGORY_MASK_ISO_7
 672         CODING_CATEGORY_MASK_ISO_7_TIGHT
 673         CODING_CATEGORY_MASK_ISO_8_1
 674         CODING_CATEGORY_MASK_ISO_8_2
 675         CODING_CATEGORY_MASK_ISO_7_ELSE
 676         CODING_CATEGORY_MASK_ISO_8_ELSE
 677    are set.  If a code which should never appear in ISO2022 is found,
 678    returns 0.  */
 679
 680 int
 681 detect_coding_iso2022 (src, src_end)
 682      unsigned char *src, *src_end;
 683 {
 684   int mask = CODING_CATEGORY_MASK_ISO;
 685   int mask_found = 0;
 686   int reg[4], shift_out = 0;
 687   int c, c1, i, charset;
 688
 689   reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
 690   while (mask && src < src_end)
 691     {
 692       c = *src++;
 693       switch (c)
 694         {
 695         case ISO_CODE_ESC:
 696           if (src >= src_end)
 697             break;
 698           c = *src++;
 699           if (c >= '(' && c <= '/')
 700             {
 701               /* Designation sequence for a charset of dimension 1.  */
 702               if (src >= src_end)
 703                 break;
 704               c1 = *src++;
 705               if (c1 < ' ' || c1 >= 0x80
 706                   || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
 707                 /* Invalid designation sequence.  Just ignore.  */
 708                 break;
 709               reg[(c - '(') % 4] = charset;
 710             }
 711           else if (c == '$')
 712             {
 713               /* Designation sequence for a charset of dimension 2.  */
 714               if (src >= src_end)
 715                 break;
 716               c = *src++;
 717               if (c >= '@' && c <= 'B')
 718                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
 719                 reg[0] = charset = iso_charset_table[1][0][c];
 720               else if (c >= '(' && c <= '/')
 721                 {
 722                   if (src >= src_end)
 723                     break;
 724                   c1 = *src++;
 725                   if (c1 < ' ' || c1 >= 0x80
 726                       || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
 727                     /* Invalid designation sequence.  Just ignore.  */
 728                     break;
 729                   reg[(c - '(') % 4] = charset;
 730                 }
 731               else
 732                 /* Invalid designation sequence.  Just ignore.  */
 733                 break;
 734             }
 735           else if (c == 'N' || c == 'n')
 736             {
 737               if (shift_out == 0
 738                   && (reg[1] >= 0
 739                       || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
 740                       || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
 741                 {
 742                   /* Locking shift out.  */
 743                   mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 744                   mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 745                   shift_out = 1;
 746                 }
 747               break;
 748             }
 749           else if (c == 'O' || c == 'o')
 750             {
 751               if (shift_out == 1)
 752                 {
 753                   /* Locking shift in.  */
 754                   mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 755                   mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 756                   shift_out = 0;
 757                 }
 758               break;
 759             }
 760           else if (c == '0' || c == '1' || c == '2')
 761             /* Start/end composition.  Just ignore.  */
 762             break;
 763           else
 764             /* Invalid escape sequence.  Just ignore.  */
 765             break;
 766
 767           /* We found a valid designation sequence for CHARSET.  */
 768           mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
 769           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset))
 770             mask_found |= CODING_CATEGORY_MASK_ISO_7;
 771           else
 772             mask &= ~CODING_CATEGORY_MASK_ISO_7;
 773           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset))
 774             mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
 775           else
 776             mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
 777           if (! CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset))
 778             mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
 779           if (! CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset))
 780             mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
 781           break;
 782
 783         case ISO_CODE_SO:
 784           if (shift_out == 0
 785               && (reg[1] >= 0
 786                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
 787                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
 788             {
 789               /* Locking shift out.  */
 790               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 791               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 792             }
 793           break;
 794
 795         case ISO_CODE_SI:
 796           if (shift_out == 1)
 797             {
 798               /* Locking shift in.  */
 799               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 800               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 801             }
 802           break;
 803
 804         case ISO_CODE_CSI:
 805         case ISO_CODE_SS2:
 806         case ISO_CODE_SS3:
 807           {
 808             int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
 809
 810             if (c != ISO_CODE_CSI)
 811               {
 812                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 813                     & CODING_FLAG_ISO_SINGLE_SHIFT)
 814                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 815                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 816                     & CODING_FLAG_ISO_SINGLE_SHIFT)
 817                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 818               }
 819             if (VECTORP (Vlatin_extra_code_table)
 820                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
 821               {
 822                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 823                     & CODING_FLAG_ISO_LATIN_EXTRA)
 824                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 825                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 826                     & CODING_FLAG_ISO_LATIN_EXTRA)
 827                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 828               }
 829             mask &= newmask;
 830             mask_found |= newmask;
 831           }
 832           break;
 833
 834         default:
 835           if (c < 0x80)
 836             break;
 837           else if (c < 0xA0)
 838             {
 839               if (VECTORP (Vlatin_extra_code_table)
 840                   && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
 841                 {
 842                   int newmask = 0;
 843
 844                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 845                       & CODING_FLAG_ISO_LATIN_EXTRA)
 846                     newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 847                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 848                       & CODING_FLAG_ISO_LATIN_EXTRA)
 849                     newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 850                   mask &= newmask;
 851                   mask_found |= newmask;
 852                 }
 853               else
 854                 return 0;
 855             }
 856           else
 857             {
 858               unsigned char *src_begin = src;
 859
 860               mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
 861                         | CODING_CATEGORY_MASK_ISO_7_ELSE);
 862               mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
 863               while (src < src_end && *src >= 0xA0)
 864                 src++;
 865               if ((src - src_begin - 1) & 1 && src < src_end)
 866                 mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
 867               else
 868                 mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
 869             }
 870           break;
 871         }
 872     }
 873
 874   return (mask & mask_found);
 875 }
 876
 877 /* Decode a character of which charset is CHARSET and the 1st position
 878    code is C1.  If dimension of CHARSET is 2, the 2nd position code is
 879    fetched from SRC and set to C2.  If CHARSET is negative, it means
 880    that we are decoding ill formed text, and what we can do is just to
 881    read C1 as is.  */
 882
 883 #define DECODE_ISO_CHARACTER(charset, c1)                               \
 884   do {                                                                  \
 885     int c_alt, charset_alt = (charset);                                 \
 886     if (COMPOSING_HEAD_P (coding->composing))                           \
 887       {                                                                 \
 888         *dst++ = LEADING_CODE_COMPOSITION;                              \
 889         if (COMPOSING_WITH_RULE_P (coding->composing))                  \
 890           /* To tell composition rules are embeded.  */                 \
 891           *dst++ = 0xFF;                                                \
 892         coding->composing += 2;                                         \
 893       }                                                                 \
 894     if (charset_alt >= 0)                                               \
 895       {                                                                 \
 896         if (CHARSET_DIMENSION (charset_alt) == 2)                       \
 897           {                                                             \
 898             ONE_MORE_BYTE (c2);                                         \
 899             if (iso_code_class[(c2) & 0x7F] != ISO_0x20_or_0x7F         \
 900                 && iso_code_class[(c2) & 0x7F] != ISO_graphic_plane_0)  \
 901               {                                                         \
 902                 src--;                                                  \
 903                 charset_alt = CHARSET_ASCII;                            \
 904               }                                                         \
 905           }                                                             \
 906         if (!NILP (translation_table)                                   \
 907             && ((c_alt = translate_char (translation_table,             \
 908                                          -1, charset_alt, c1, c2)) >= 0)) \
 909           SPLIT_CHAR (c_alt, charset_alt, c1, c2);                      \
 910       }                                                                 \
 911     if (charset_alt == CHARSET_ASCII || charset_alt < 0)                \
 912       DECODE_CHARACTER_ASCII (c1);                                      \
 913     else if (CHARSET_DIMENSION (charset_alt) == 1)                      \
 914       DECODE_CHARACTER_DIMENSION1 (charset_alt, c1);                    \
 915     else                                                                \
 916       DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2);                \
 917     if (COMPOSING_WITH_RULE_P (coding->composing))                      \
 918       /* To tell a composition rule follows.  */                        \
 919       coding->composing = COMPOSING_WITH_RULE_RULE;                     \
 920   } while (0)
 921
 922 /* Set designation state into CODING.  */
 923 #define DECODE_DESIGNATION(reg, dimension, chars, final_char)              \
 924   do {                                                                     \
 925     int charset = ISO_CHARSET_TABLE (make_number (dimension),              \
 926                                      make_number (chars),                  \
 927                                      make_number (final_char));            \
 928     if (charset >= 0                                                       \
 929         && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
 930             || coding->safe_charsets[charset]))                            \
 931       {                                                                    \
 932         if (coding->spec.iso2022.last_invalid_designation_register == 0    \
 933             && reg == 0                                                    \
 934             && charset == CHARSET_ASCII)                                   \
 935           {                                                                \
 936             /* We should insert this designation sequence as is so         \
 937                that it is surely written back to a file.  */               \
 938             coding->spec.iso2022.last_invalid_designation_register = -1;   \
 939             goto label_invalid_code;                                       \
 940           }                                                                \
 941         coding->spec.iso2022.last_invalid_designation_register = -1;       \
 942         if ((coding->mode & CODING_MODE_DIRECTION)                         \
 943             && CHARSET_REVERSE_CHARSET (charset) >= 0)                     \
 944           charset = CHARSET_REVERSE_CHARSET (charset);                     \
 945         CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;               \
 946       }                                                                    \
 947     else                                                                   \
 948       {                                                                    \
 949         coding->spec.iso2022.last_invalid_designation_register = reg;      \
 950         goto label_invalid_code;                                           \
 951       }                                                                    \
 952   } while (0)
 953
 954 /* Check if the current composing sequence contains only valid codes.
 955    If the composing sequence doesn't end before SRC_END, return -1.
 956    Else, if it contains only valid codes, return 0.
 957    Else return the length of the composing sequence.  */
 958
 959 int
 960 check_composing_code (coding, src, src_end)
 961      struct coding_system *coding;
 962      unsigned char *src, *src_end;
 963 {
 964   unsigned char *src_start = src;
 965   int invalid_code_found = 0;
 966   int charset, c, c1, dim;
 967
 968   while (src < src_end)
 969     {
 970       if (*src++ != ISO_CODE_ESC) continue;
 971       if (src >= src_end) break;
 972       if ((c = *src++) == '1') /* end of compsition */
 973         return (invalid_code_found ? src - src_start : 0);
 974       if (src + 2 >= src_end) break;
 975       if (!coding->flags & CODING_FLAG_ISO_DESIGNATION)
 976         invalid_code_found = 1;
 977       else
 978         {
 979           dim = 0;
 980           if (c == '$')
 981             {
 982               dim = 1;
 983               c = (*src >= '@' && *src <= 'B') ? '(' : *src++;
 984             }
 985           if (c >= '(' && c <= '/')
 986             {
 987               c1 = *src++;
 988               if ((c1 < ' ' || c1 >= 0x80)
 989                   || (charset = iso_charset_table[dim][c >= ','][c1]) < 0
 990                   || ! coding->safe_charsets[charset]
 991                   || (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
 992                       == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
 993                 invalid_code_found = 1;
 994             }
 995           else
 996             invalid_code_found = 1;
 997         }
 998     }
 999   return (invalid_code_found
1000           ? src - src_start
1001           : (coding->mode & CODING_MODE_LAST_BLOCK ? 0 : -1));
1002 }
1003
1004 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1005
1006 int
1007 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1008      struct coding_system *coding;
1009      unsigned char *source, *destination;
1010      int src_bytes, dst_bytes;
1011 {
1012   unsigned char *src = source;
1013   unsigned char *src_end = source + src_bytes;
1014   unsigned char *dst = destination;
1015   unsigned char *dst_end = destination + dst_bytes;
1016   /* Since the maximum bytes produced by each loop is 7, we subtract 6
1017      from DST_END to assure that overflow checking is necessary only
1018      at the head of loop.  */
1019   unsigned char *adjusted_dst_end = dst_end - 6;
1020   int charset;
1021   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
1022   int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1023   int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1024   Lisp_Object translation_table
1025     = coding->translation_table_for_decode;
1026   int result = CODING_FINISH_NORMAL;
1027
1028   if (!NILP (Venable_character_translation) && NILP (translation_table))
1029     translation_table = Vstandard_translation_table_for_decode;
1030
1031   coding->produced_char = 0;
1032   coding->fake_multibyte = 0;
1033   while (src < src_end && (dst_bytes
1034                            ? (dst < adjusted_dst_end)
1035                            : (dst < src - 6)))
1036     {
1037       /* SRC_BASE remembers the start position in source in each loop.
1038          The loop will be exited when there's not enough source text
1039          to analyze long escape sequence or 2-byte code (within macros
1040          ONE_MORE_BYTE or TWO_MORE_BYTES).  In that case, SRC is reset
1041          to SRC_BASE before exiting.  */
1042       unsigned char *src_base = src;
1043       int c1 = *src++, c2;
1044
1045       switch (iso_code_class [c1])
1046         {
1047         case ISO_0x20_or_0x7F:
1048           if (!coding->composing
1049               && (charset0 < 0 || CHARSET_CHARS (charset0) == 94))
1050             {
1051               /* This is SPACE or DEL.  */
1052               *dst++ = c1;
1053               coding->produced_char++;
1054               break;
1055             }
1056           /* This is a graphic character, we fall down ...  */
1057
1058         case ISO_graphic_plane_0:
1059           if (coding->composing == COMPOSING_WITH_RULE_RULE)
1060             {
1061               /* This is a composition rule.  */
1062               *dst++ = c1 | 0x80;
1063               coding->composing = COMPOSING_WITH_RULE_TAIL;
1064             }
1065           else
1066             DECODE_ISO_CHARACTER (charset0, c1);
1067           break;
1068
1069         case ISO_0xA0_or_0xFF:
1070           if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1071               || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1072             goto label_invalid_code;
1073           /* This is a graphic character, we fall down ... */
1074
1075         case ISO_graphic_plane_1:
1076           if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1077             goto label_invalid_code;
1078           else
1079             DECODE_ISO_CHARACTER (charset1, c1);
1080           break;
1081
1082         case ISO_control_code:
1083           /* All ISO2022 control characters in this class have the
1084              same representation in Emacs internal format.  */
1085           if (c1 == '\n'
1086               && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1087               && (coding->eol_type == CODING_EOL_CR
1088                   || coding->eol_type == CODING_EOL_CRLF))
1089             {
1090               result = CODING_FINISH_INCONSISTENT_EOL;
1091               goto label_end_of_loop_2;
1092             }
1093           *dst++ = c1;
1094           coding->produced_char++;
1095           break;
1096
1097         case ISO_carriage_return:
1098           if (coding->eol_type == CODING_EOL_CR)
1099             *dst++ = '\n';
1100           else if (coding->eol_type == CODING_EOL_CRLF)
1101             {
1102               ONE_MORE_BYTE (c1);
1103               if (c1 == ISO_CODE_LF)
1104                 *dst++ = '\n';
1105               else
1106                 {
1107                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1108                     {
1109                       result = CODING_FINISH_INCONSISTENT_EOL;
1110                       goto label_end_of_loop_2;
1111                     }
1112                   src--;
1113                   *dst++ = '\r';
1114                 }
1115             }
1116           else
1117             *dst++ = c1;
1118           coding->produced_char++;
1119           break;
1120
1121         case ISO_shift_out:
1122           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1123               || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1124             goto label_invalid_code;
1125           CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1126           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1127           break;
1128
1129         case ISO_shift_in:
1130           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1131             goto label_invalid_code;
1132           CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1133           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1134           break;
1135
1136         case ISO_single_shift_2_7:
1137         case ISO_single_shift_2:
1138           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1139             goto label_invalid_code;
1140           /* SS2 is handled as an escape sequence of ESC 'N' */
1141           c1 = 'N';
1142           goto label_escape_sequence;
1143
1144         case ISO_single_shift_3:
1145           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1146             goto label_invalid_code;
1147           /* SS2 is handled as an escape sequence of ESC 'O' */
1148           c1 = 'O';
1149           goto label_escape_sequence;
1150
1151         case ISO_control_sequence_introducer:
1152           /* CSI is handled as an escape sequence of ESC '[' ...  */
1153           c1 = '[';
1154           goto label_escape_sequence;
1155
1156         case ISO_escape:
1157           ONE_MORE_BYTE (c1);
1158         label_escape_sequence:
1159           /* Escape sequences handled by Emacs are invocation,
1160              designation, direction specification, and character
1161              composition specification.  */
1162           switch (c1)
1163             {
1164             case '&':           /* revision of following character set */
1165               ONE_MORE_BYTE (c1);
1166               if (!(c1 >= '@' && c1 <= '~'))
1167                 goto label_invalid_code;
1168               ONE_MORE_BYTE (c1);
1169               if (c1 != ISO_CODE_ESC)
1170                 goto label_invalid_code;
1171               ONE_MORE_BYTE (c1);
1172               goto label_escape_sequence;
1173
1174             case '$':           /* designation of 2-byte character set */
1175               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1176                 goto label_invalid_code;
1177               ONE_MORE_BYTE (c1);
1178               if (c1 >= '@' && c1 <= 'B')
1179                 {       /* designation of JISX0208.1978, GB2312.1980,
1180                                    or JISX0208.1980 */
1181                   DECODE_DESIGNATION (0, 2, 94, c1);
1182                 }
1183               else if (c1 >= 0x28 && c1 <= 0x2B)
1184                 {       /* designation of DIMENSION2_CHARS94 character set */
1185                   ONE_MORE_BYTE (c2);
1186                   DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
1187                 }
1188               else if (c1 >= 0x2C && c1 <= 0x2F)
1189                 {       /* designation of DIMENSION2_CHARS96 character set */
1190                   ONE_MORE_BYTE (c2);
1191                   DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
1192                 }
1193               else
1194                 goto label_invalid_code;
1195               break;
1196
1197             case 'n':           /* invocation of locking-shift-2 */
1198               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1199                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1200                 goto label_invalid_code;
1201               CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
1202               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1203               break;
1204
1205             case 'o':           /* invocation of locking-shift-3 */
1206               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1207                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1208                 goto label_invalid_code;
1209               CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
1210               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1211               break;
1212
1213             case 'N':           /* invocation of single-shift-2 */
1214               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1215                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1216                 goto label_invalid_code;
1217               ONE_MORE_BYTE (c1);
1218               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
1219               DECODE_ISO_CHARACTER (charset, c1);
1220               break;
1221
1222             case 'O':           /* invocation of single-shift-3 */
1223               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1224                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1225                 goto label_invalid_code;
1226               ONE_MORE_BYTE (c1);
1227               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
1228               DECODE_ISO_CHARACTER (charset, c1);
1229               break;
1230
1231             case '0': case '2': /* start composing */
1232               /* Before processing composing, we must be sure that all
1233                  characters being composed are supported by CODING.
1234                  If not, we must give up composing and insert the
1235                  bunch of codes for composing as is without decoding.  */
1236               {
1237                 int result1;
1238
1239                 result1 = check_composing_code (coding, src, src_end);
1240                 if (result1 == 0)
1241                   {
1242                     coding->composing = (c1 == '0'
1243                                          ? COMPOSING_NO_RULE_HEAD
1244                                          : COMPOSING_WITH_RULE_HEAD);
1245                     coding->produced_char++;
1246                   }
1247                 else if (result1 > 0)
1248                   {
1249                     if (result1 + 2 < (dst_bytes ? dst_end : src_base) - dst)
1250                       {
1251                         bcopy (src_base, dst, result1 + 2);
1252                         src += result1;
1253                         dst += result1 + 2;
1254                         coding->produced_char += result1 + 2;
1255                       }
1256                     else
1257                       {
1258                         result = CODING_FINISH_INSUFFICIENT_DST;
1259                         goto label_end_of_loop_2;
1260                       }
1261                   }
1262                 else
1263                   goto label_end_of_loop;
1264               }
1265               break;
1266
1267             case '1':           /* end composing */
1268               coding->composing = COMPOSING_NO;
1269               break;
1270
1271             case '[':           /* specification of direction */
1272               if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
1273                 goto label_invalid_code;
1274               /* For the moment, nested direction is not supported.
1275                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
1276                  left-to-right, and nozero means right-to-left.  */
1277               ONE_MORE_BYTE (c1);
1278               switch (c1)
1279                 {
1280                 case ']':       /* end of the current direction */
1281                   coding->mode &= ~CODING_MODE_DIRECTION;
1282
1283                 case '0':       /* end of the current direction */
1284                 case '1':       /* start of left-to-right direction */
1285                   ONE_MORE_BYTE (c1);
1286                   if (c1 == ']')
1287                     coding->mode &= ~CODING_MODE_DIRECTION;
1288                   else
1289                     goto label_invalid_code;
1290                   break;
1291
1292                 case '2':       /* start of right-to-left direction */
1293                   ONE_MORE_BYTE (c1);
1294                   if (c1 == ']')
1295                     coding->mode |= CODING_MODE_DIRECTION;
1296                   else
1297                     goto label_invalid_code;
1298                   break;
1299
1300                 default:
1301                   goto label_invalid_code;
1302                 }
1303               break;
1304
1305             default:
1306               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1307                 goto label_invalid_code;
1308               if (c1 >= 0x28 && c1 <= 0x2B)
1309                 {       /* designation of DIMENSION1_CHARS94 character set */
1310                   ONE_MORE_BYTE (c2);
1311                   DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
1312                 }
1313               else if (c1 >= 0x2C && c1 <= 0x2F)
1314                 {       /* designation of DIMENSION1_CHARS96 character set */
1315                   ONE_MORE_BYTE (c2);
1316                   DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
1317                 }
1318               else
1319                 {
1320                   goto label_invalid_code;
1321                 }
1322             }
1323           /* We must update these variables now.  */
1324           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1325           charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1326           break;
1327
1328         label_invalid_code:
1329           while (src_base < src)
1330             *dst++ = *src_base++;
1331           coding->fake_multibyte = 1;
1332         }
1333       continue;
1334
1335     label_end_of_loop:
1336       result = CODING_FINISH_INSUFFICIENT_SRC;
1337     label_end_of_loop_2:
1338       src = src_base;
1339       break;
1340     }
1341
1342   if (src < src_end)
1343     {
1344       if (result == CODING_FINISH_NORMAL)
1345         result = CODING_FINISH_INSUFFICIENT_DST;
1346       else if (result != CODING_FINISH_INCONSISTENT_EOL
1347                && coding->mode & CODING_MODE_LAST_BLOCK)
1348         {
1349           /* This is the last block of the text to be decoded.  We had
1350              better just flush out all remaining codes in the text
1351              although they are not valid characters.  */
1352           src_bytes = src_end - src;
1353           if (dst_bytes && (dst_end - dst < src_bytes))
1354             src_bytes = dst_end - dst;
1355           bcopy (src, dst, src_bytes);
1356           dst += src_bytes;
1357           src += src_bytes;
1358           coding->fake_multibyte = 1;
1359         }
1360     }
1361
1362   coding->consumed = coding->consumed_char = src - source;
1363   coding->produced = dst - destination;
1364   return result;
1365 }
1366
1367 /* ISO2022 encoding stuff.  */
1368
1369 /*
1370    It is not enough to say just "ISO2022" on encoding, we have to
1371    specify more details.  In Emacs, each coding system of ISO2022
1372    variant has the following specifications:
1373         1. Initial designation to G0 thru G3.
1374         2. Allows short-form designation?
1375         3. ASCII should be designated to G0 before control characters?
1376         4. ASCII should be designated to G0 at end of line?
1377         5. 7-bit environment or 8-bit environment?
1378         6. Use locking-shift?
1379         7. Use Single-shift?
1380    And the following two are only for Japanese:
1381         8. Use ASCII in place of JIS0201-1976-Roman?
1382         9. Use JISX0208-1983 in place of JISX0208-1978?
1383    These specifications are encoded in `coding->flags' as flag bits
1384    defined by macros CODING_FLAG_ISO_XXX.  See `coding.h' for more
1385    details.
1386 */
1387
1388 /* Produce codes (escape sequence) for designating CHARSET to graphic
1389    register REG.  If <final-char> of CHARSET is '@', 'A', or 'B' and
1390    the coding system CODING allows, produce designation sequence of
1391    short-form.  */
1392
1393 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
1394   do {                                                                  \
1395     unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset);        \
1396     char *intermediate_char_94 = "()*+";                                \
1397     char *intermediate_char_96 = ",-./";                                \
1398     int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset);    \
1399     if (revision < 255)                                                 \
1400       {                                                                 \
1401         *dst++ = ISO_CODE_ESC;                                          \
1402         *dst++ = '&';                                                   \
1403         *dst++ = '@' + revision;                                        \
1404       }                                                                 \
1405     *dst++ = ISO_CODE_ESC;                                              \
1406     if (CHARSET_DIMENSION (charset) == 1)                               \
1407       {                                                                 \
1408         if (CHARSET_CHARS (charset) == 94)                              \
1409           *dst++ = (unsigned char) (intermediate_char_94[reg]);         \
1410         else                                                            \
1411           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1412       }                                                                 \
1413     else                                                                \
1414       {                                                                 \
1415         *dst++ = '$';                                                   \
1416         if (CHARSET_CHARS (charset) == 94)                              \
1417           {                                                             \
1418             if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM)          \
1419                 || reg != 0                                             \
1420                 || final_char < '@' || final_char > 'B')                \
1421               *dst++ = (unsigned char) (intermediate_char_94[reg]);     \
1422           }                                                             \
1423         else                                                            \
1424           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1425       }                                                                 \
1426     *dst++ = final_char;                                                \
1427     CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;                \
1428   } while (0)
1429
1430 /* The following two macros produce codes (control character or escape
1431    sequence) for ISO2022 single-shift functions (single-shift-2 and
1432    single-shift-3).  */
1433
1434 #define ENCODE_SINGLE_SHIFT_2                           \
1435   do {                                                  \
1436     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1437       *dst++ = ISO_CODE_ESC, *dst++ = 'N';              \
1438     else                                                \
1439       {                                                 \
1440         *dst++ = ISO_CODE_SS2;                          \
1441         coding->fake_multibyte = 1;                     \
1442       }                                                 \
1443     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1444   } while (0)
1445
1446 #define ENCODE_SINGLE_SHIFT_3                           \
1447   do {                                                  \
1448     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1449       *dst++ = ISO_CODE_ESC, *dst++ = 'O';              \
1450     else                                                \
1451       {                                                 \
1452         *dst++ = ISO_CODE_SS3;                          \
1453         coding->fake_multibyte = 1;                     \
1454       }                                                 \
1455     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1456   } while (0)
1457
1458 /* The following four macros produce codes (control character or
1459    escape sequence) for ISO2022 locking-shift functions (shift-in,
1460    shift-out, locking-shift-2, and locking-shift-3).  */
1461
1462 #define ENCODE_SHIFT_IN                         \
1463   do {                                          \
1464     *dst++ = ISO_CODE_SI;                       \
1465     CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
1466   } while (0)
1467
1468 #define ENCODE_SHIFT_OUT                        \
1469   do {                                          \
1470     *dst++ = ISO_CODE_SO;                       \
1471     CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
1472   } while (0)
1473
1474 #define ENCODE_LOCKING_SHIFT_2                  \
1475   do {                                          \
1476     *dst++ = ISO_CODE_ESC, *dst++ = 'n';        \
1477     CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
1478   } while (0)
1479
1480 #define ENCODE_LOCKING_SHIFT_3                  \
1481   do {                                          \
1482     *dst++ = ISO_CODE_ESC, *dst++ = 'o';        \
1483     CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
1484   } while (0)
1485
1486 /* Produce codes for a DIMENSION1 character whose character set is
1487    CHARSET and whose position-code is C1.  Designation and invocation
1488    sequences are also produced in advance if necessary.  */
1489
1490
1491 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
1492   do {                                                                  \
1493     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1494       {                                                                 \
1495         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1496           *dst++ = c1 & 0x7F;                                           \
1497         else                                                            \
1498           *dst++ = c1 | 0x80;                                           \
1499         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1500         break;                                                          \
1501       }                                                                 \
1502     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1503       {                                                                 \
1504         *dst++ = c1 & 0x7F;                                             \
1505         break;                                                          \
1506       }                                                                 \
1507     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1508       {                                                                 \
1509         *dst++ = c1 | 0x80;                                             \
1510         break;                                                          \
1511       }                                                                 \
1512     else if (coding->flags & CODING_FLAG_ISO_SAFE                       \
1513              && !coding->safe_charsets[charset])                        \
1514       {                                                                 \
1515         /* We should not encode this character, instead produce one or  \
1516            two `?'s.  */                                                \
1517         *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;                 \
1518         if (CHARSET_WIDTH (charset) == 2)                               \
1519           *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;               \
1520         break;                                                          \
1521       }                                                                 \
1522     else                                                                \
1523       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1524          must invoke it, or, at first, designate it to some graphic     \
1525          register.  Then repeat the loop to actually produce the        \
1526          character.  */                                                 \
1527       dst = encode_invocation_designation (charset, coding, dst);       \
1528   } while (1)
1529
1530 /* Produce codes for a DIMENSION2 character whose character set is
1531    CHARSET and whose position-codes are C1 and C2.  Designation and
1532    invocation codes are also produced in advance if necessary.  */
1533
1534 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
1535   do {                                                                  \
1536     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1537       {                                                                 \
1538         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1539           *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F;                       \
1540         else                                                            \
1541           *dst++ = c1 | 0x80, *dst++ = c2 | 0x80;                       \
1542         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1543         break;                                                          \
1544       }                                                                 \
1545     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1546       {                                                                 \
1547         *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F;                          \
1548         break;                                                          \
1549       }                                                                 \
1550     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1551       {                                                                 \
1552         *dst++ = c1 | 0x80, *dst++= c2 | 0x80;                          \
1553         break;                                                          \
1554       }                                                                 \
1555     else if (coding->flags & CODING_FLAG_ISO_SAFE                       \
1556              && !coding->safe_charsets[charset])                        \
1557       {                                                                 \
1558         /* We should not encode this character, instead produce one or  \
1559            two `?'s.  */                                                \
1560         *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;                 \
1561         if (CHARSET_WIDTH (charset) == 2)                               \
1562           *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;               \
1563         break;                                                          \
1564       }                                                                 \
1565     else                                                                \
1566       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1567          must invoke it, or, at first, designate it to some graphic     \
1568          register.  Then repeat the loop to actually produce the        \
1569          character.  */                                                 \
1570       dst = encode_invocation_designation (charset, coding, dst);       \
1571   } while (1)
1572
1573 #define ENCODE_ISO_CHARACTER(charset, c1, c2)                   \
1574   do {                                                          \
1575     int c_alt, charset_alt;                                     \
1576     if (!NILP (translation_table)                               \
1577         && ((c_alt = translate_char (translation_table, -1,     \
1578                                      charset, c1, c2))          \
1579             >= 0))                                              \
1580       SPLIT_CHAR (c_alt, charset_alt, c1, c2);                  \
1581     else                                                        \
1582       charset_alt = charset;                                    \
1583     if (CHARSET_DIMENSION (charset_alt) == 1)                   \
1584       {                                                         \
1585         if (charset == CHARSET_ASCII                            \
1586             && coding->flags & CODING_FLAG_ISO_USE_ROMAN)       \
1587           charset_alt = charset_latin_jisx0201;                 \
1588         ENCODE_ISO_CHARACTER_DIMENSION1 (charset_alt, c1);      \
1589       }                                                         \
1590     else                                                        \
1591       {                                                         \
1592         if (charset == charset_jisx0208                         \
1593             && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)      \
1594           charset_alt = charset_jisx0208_1978;                  \
1595         ENCODE_ISO_CHARACTER_DIMENSION2 (charset_alt, c1, c2);  \
1596       }                                                         \
1597     if (! COMPOSING_P (coding->composing))                      \
1598       coding->consumed_char++;                                  \
1599   } while (0)
1600
1601 /* Produce designation and invocation codes at a place pointed by DST
1602    to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
1603    Return new DST.  */
1604
1605 unsigned char *
1606 encode_invocation_designation (charset, coding, dst)
1607      int charset;
1608      struct coding_system *coding;
1609      unsigned char *dst;
1610 {
1611   int reg;                      /* graphic register number */
1612
1613   /* At first, check designations.  */
1614   for (reg = 0; reg < 4; reg++)
1615     if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
1616       break;
1617
1618   if (reg >= 4)
1619     {
1620       /* CHARSET is not yet designated to any graphic registers.  */
1621       /* At first check the requested designation.  */
1622       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1623       if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
1624         /* Since CHARSET requests no special designation, designate it
1625            to graphic register 0.  */
1626         reg = 0;
1627
1628       ENCODE_DESIGNATION (charset, reg, coding);
1629     }
1630
1631   if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
1632       && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
1633     {
1634       /* Since the graphic register REG is not invoked to any graphic
1635          planes, invoke it to graphic plane 0.  */
1636       switch (reg)
1637         {
1638         case 0:                 /* graphic register 0 */
1639           ENCODE_SHIFT_IN;
1640           break;
1641
1642         case 1:                 /* graphic register 1 */
1643           ENCODE_SHIFT_OUT;
1644           break;
1645
1646         case 2:                 /* graphic register 2 */
1647           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1648             ENCODE_SINGLE_SHIFT_2;
1649           else
1650             ENCODE_LOCKING_SHIFT_2;
1651           break;
1652
1653         case 3:                 /* graphic register 3 */
1654           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1655             ENCODE_SINGLE_SHIFT_3;
1656           else
1657             ENCODE_LOCKING_SHIFT_3;
1658           break;
1659         }
1660     }
1661   return dst;
1662 }
1663
1664 /* The following two macros produce codes for indicating composition.  */
1665 #define ENCODE_COMPOSITION_NO_RULE_START  *dst++ = ISO_CODE_ESC, *dst++ = '0'
1666 #define ENCODE_COMPOSITION_WITH_RULE_START  *dst++ = ISO_CODE_ESC, *dst++ = '2'
1667 #define ENCODE_COMPOSITION_END    *dst++ = ISO_CODE_ESC, *dst++ = '1'
1668
1669 /* The following three macros produce codes for indicating direction
1670    of text.  */
1671 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER              \
1672   do {                                                  \
1673     if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS)    \
1674       *dst++ = ISO_CODE_ESC, *dst++ = '[';              \
1675     else                                                \
1676       *dst++ = ISO_CODE_CSI;                            \
1677   } while (0)
1678
1679 #define ENCODE_DIRECTION_R2L    \
1680   ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '2', *dst++ = ']'
1681
1682 #define ENCODE_DIRECTION_L2R    \
1683   ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '0', *dst++ = ']'
1684
1685 /* Produce codes for designation and invocation to reset the graphic
1686    planes and registers to initial state.  */
1687 #define ENCODE_RESET_PLANE_AND_REGISTER                                     \
1688   do {                                                                      \
1689     int reg;                                                                \
1690     if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                        \
1691       ENCODE_SHIFT_IN;                                                      \
1692     for (reg = 0; reg < 4; reg++)                                           \
1693       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0            \
1694           && (CODING_SPEC_ISO_DESIGNATION (coding, reg)                     \
1695               != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)))        \
1696         ENCODE_DESIGNATION                                                  \
1697           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
1698   } while (0)
1699
1700 /* Produce designation sequences of charsets in the line started from
1701    SRC to a place pointed by *DSTP, and update DSTP.
1702
1703    If the current block ends before any end-of-line, we may fail to
1704    find all the necessary designations.  */
1705
1706 void
1707 encode_designation_at_bol (coding, table, src, src_end, dstp)
1708      struct coding_system *coding;
1709      Lisp_Object table;
1710      unsigned char *src, *src_end, **dstp;
1711 {
1712   int charset, c, found = 0, reg;
1713   /* Table of charsets to be designated to each graphic register.  */
1714   int r[4];
1715   unsigned char *dst = *dstp;
1716
1717   for (reg = 0; reg < 4; reg++)
1718     r[reg] = -1;
1719
1720   while (src < src_end && *src != '\n' && found < 4)
1721     {
1722       int bytes = BYTES_BY_CHAR_HEAD (*src);
1723
1724       if (NILP (table))
1725         charset = CHARSET_AT (src);
1726       else
1727         {
1728           int c_alt;
1729           unsigned char c1, c2;
1730
1731           SPLIT_STRING(src, bytes, charset, c1, c2);
1732           if ((c_alt = translate_char (table, -1, charset, c1, c2)) >= 0)
1733             charset = CHAR_CHARSET (c_alt);
1734         }
1735
1736       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1737       if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
1738         {
1739           found++;
1740           r[reg] = charset;
1741         }
1742
1743       src += bytes;
1744     }
1745
1746   if (found)
1747     {
1748       for (reg = 0; reg < 4; reg++)
1749         if (r[reg] >= 0
1750             && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
1751           ENCODE_DESIGNATION (r[reg], reg, coding);
1752       *dstp = dst;
1753     }
1754 }
1755
1756 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
1757
1758 int
1759 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1760      struct coding_system *coding;
1761      unsigned char *source, *destination;
1762      int src_bytes, dst_bytes;
1763 {
1764   unsigned char *src = source;
1765   unsigned char *src_end = source + src_bytes;
1766   unsigned char *dst = destination;
1767   unsigned char *dst_end = destination + dst_bytes;
1768   /* Since the maximum bytes produced by each loop is 20, we subtract 19
1769      from DST_END to assure overflow checking is necessary only at the
1770      head of loop.  */
1771   unsigned char *adjusted_dst_end = dst_end - 19;
1772   Lisp_Object translation_table
1773       = coding->translation_table_for_encode;
1774   int result = CODING_FINISH_NORMAL;
1775
1776   if (!NILP (Venable_character_translation) && NILP (translation_table))
1777     translation_table = Vstandard_translation_table_for_encode;
1778
1779   coding->consumed_char = 0;
1780   coding->fake_multibyte = 0;
1781   while (src < src_end && (dst_bytes
1782                            ? (dst < adjusted_dst_end)
1783                            : (dst < src - 19)))
1784     {
1785       /* SRC_BASE remembers the start position in source in each loop.
1786          The loop will be exited when there's not enough source text
1787          to analyze multi-byte codes (within macros ONE_MORE_BYTE,
1788          TWO_MORE_BYTES, and THREE_MORE_BYTES).  In that case, SRC is
1789          reset to SRC_BASE before exiting.  */
1790       unsigned char *src_base = src;
1791       int charset, c1, c2, c3, c4;
1792
1793       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
1794           && CODING_SPEC_ISO_BOL (coding))
1795         {
1796           /* We have to produce designation sequences if any now.  */
1797           encode_designation_at_bol (coding, translation_table,
1798                                      src, src_end, &dst);
1799           CODING_SPEC_ISO_BOL (coding) = 0;
1800         }
1801
1802       c1 = *src++;
1803       /* If we are seeing a component of a composite character, we are
1804          seeing a leading-code encoded irregularly for composition, or
1805          a composition rule if composing with rule.  We must set C1 to
1806          a normal leading-code or an ASCII code.  If we are not seeing
1807          a composite character, we must reset composition,
1808          designation, and invocation states.  */
1809       if (COMPOSING_P (coding->composing))
1810         {
1811           if (c1 < 0xA0)
1812             {
1813               /* We are not in a composite character any longer.  */
1814               coding->composing = COMPOSING_NO;
1815               ENCODE_RESET_PLANE_AND_REGISTER;
1816               ENCODE_COMPOSITION_END;
1817             }
1818           else
1819             {
1820               if (coding->composing == COMPOSING_WITH_RULE_RULE)
1821                 {
1822                   *dst++ = c1 & 0x7F;
1823                   coding->composing = COMPOSING_WITH_RULE_HEAD;
1824                   continue;
1825                 }
1826               else if (coding->composing == COMPOSING_WITH_RULE_HEAD)
1827                 coding->composing = COMPOSING_WITH_RULE_RULE;
1828               if (c1 == 0xA0)
1829                 {
1830                   /* This is an ASCII component.  */
1831                   ONE_MORE_BYTE (c1);
1832                   c1 &= 0x7F;
1833                 }
1834               else
1835                 /* This is a leading-code of non ASCII component.  */
1836                 c1 -= 0x20;
1837             }
1838         }
1839
1840       /* Now encode one character.  C1 is a control character, an
1841          ASCII character, or a leading-code of multi-byte character.  */
1842       switch (emacs_code_class[c1])
1843         {
1844         case EMACS_ascii_code:
1845           ENCODE_ISO_CHARACTER (CHARSET_ASCII, c1, /* dummy */ c2);
1846           break;
1847
1848         case EMACS_control_code:
1849           if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
1850             ENCODE_RESET_PLANE_AND_REGISTER;
1851           *dst++ = c1;
1852           coding->consumed_char++;
1853           break;
1854
1855         case EMACS_carriage_return_code:
1856           if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
1857             {
1858               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
1859                 ENCODE_RESET_PLANE_AND_REGISTER;
1860               *dst++ = c1;
1861               coding->consumed_char++;
1862               break;
1863             }
1864           /* fall down to treat '\r' as '\n' ...  */
1865
1866         case EMACS_linefeed_code:
1867           if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
1868             ENCODE_RESET_PLANE_AND_REGISTER;
1869           if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
1870             bcopy (coding->spec.iso2022.initial_designation,
1871                    coding->spec.iso2022.current_designation,
1872                    sizeof coding->spec.iso2022.initial_designation);
1873           if (coding->eol_type == CODING_EOL_LF
1874               || coding->eol_type == CODING_EOL_UNDECIDED)
1875             *dst++ = ISO_CODE_LF;
1876           else if (coding->eol_type == CODING_EOL_CRLF)
1877             *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
1878           else
1879             *dst++ = ISO_CODE_CR;
1880           CODING_SPEC_ISO_BOL (coding) = 1;
1881           coding->consumed_char++;
1882           break;
1883
1884         case EMACS_leading_code_2:
1885           ONE_MORE_BYTE (c2);
1886           if (c2 < 0xA0)
1887             {
1888               /* invalid sequence */
1889               *dst++ = c1;
1890               src--;
1891               coding->consumed_char++;
1892             }
1893           else
1894             ENCODE_ISO_CHARACTER (c1, c2, /* dummy */ c3);
1895           break;
1896
1897         case EMACS_leading_code_3:
1898           TWO_MORE_BYTES (c2, c3);
1899           if (c2 < 0xA0 || c3 < 0xA0)
1900             {
1901               /* invalid sequence */
1902               *dst++ = c1;
1903               src -= 2;
1904               coding->consumed_char++;
1905             }
1906           else if (c1 < LEADING_CODE_PRIVATE_11)
1907             ENCODE_ISO_CHARACTER (c1, c2, c3);
1908           else
1909             ENCODE_ISO_CHARACTER (c2, c3, /* dummy */ c4);
1910           break;
1911
1912         case EMACS_leading_code_4:
1913           THREE_MORE_BYTES (c2, c3, c4);
1914           if (c2 < 0xA0 || c3 < 0xA0 || c4 < 0xA0)
1915             {
1916               /* invalid sequence */
1917               *dst++ = c1;
1918               src -= 3;
1919               coding->consumed_char++;
1920             }
1921           else
1922             ENCODE_ISO_CHARACTER (c2, c3, c4);
1923           break;
1924
1925         case EMACS_leading_code_composition:
1926           ONE_MORE_BYTE (c2);
1927           if (c2 < 0xA0)
1928             {
1929               /* invalid sequence */
1930               *dst++ = c1;
1931               src--;
1932               coding->consumed_char++;
1933             }
1934           else if (c2 == 0xFF)
1935             {
1936               ENCODE_RESET_PLANE_AND_REGISTER;
1937               coding->composing = COMPOSING_WITH_RULE_HEAD;
1938               ENCODE_COMPOSITION_WITH_RULE_START;
1939               coding->consumed_char++;
1940             }
1941           else
1942             {
1943               ENCODE_RESET_PLANE_AND_REGISTER;
1944               /* Rewind one byte because it is a character code of
1945                  composition elements.  */
1946               src--;
1947               coding->composing = COMPOSING_NO_RULE_HEAD;
1948               ENCODE_COMPOSITION_NO_RULE_START;
1949               coding->consumed_char++;
1950             }
1951           break;
1952
1953         case EMACS_invalid_code:
1954           *dst++ = c1;
1955           coding->consumed_char++;
1956           break;
1957         }
1958       continue;
1959     label_end_of_loop:
1960       result = CODING_FINISH_INSUFFICIENT_SRC;
1961       src = src_base;
1962       break;
1963     }
1964
1965   if (src < src_end && result == CODING_FINISH_NORMAL)
1966     result = CODING_FINISH_INSUFFICIENT_DST;
1967
1968   /* If this is the last block of the text to be encoded, we must
1969      reset graphic planes and registers to the initial state, and
1970      flush out the carryover if any.  */
1971   if (coding->mode & CODING_MODE_LAST_BLOCK)
1972     {
1973       ENCODE_RESET_PLANE_AND_REGISTER;
1974       if (COMPOSING_P (coding->composing))
1975         ENCODE_COMPOSITION_END;
1976     }
1977   coding->consumed = src - source;
1978   coding->produced = coding->produced_char = dst - destination;
1979   return result;
1980 }
1981
1982 \f
1983 /*** 4. SJIS and BIG5 handlers ***/
1984
1985 /* Although SJIS and BIG5 are not ISO's coding system, they are used
1986    quite widely.  So, for the moment, Emacs supports them in the bare
1987    C code.  But, in the future, they may be supported only by CCL.  */
1988
1989 /* SJIS is a coding system encoding three character sets: ASCII, right
1990    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
1991    as is.  A character of charset katakana-jisx0201 is encoded by
1992    "position-code + 0x80".  A character of charset japanese-jisx0208
1993    is encoded in 2-byte but two position-codes are divided and shifted
1994    so that it fit in the range below.
1995
1996    --- CODE RANGE of SJIS ---
1997    (character set)      (range)
1998    ASCII                0x00 .. 0x7F
1999    KATAKANA-JISX0201    0xA0 .. 0xDF
2000    JISX0208 (1st byte)  0x80 .. 0x9F and 0xE0 .. 0xEF
2001             (2nd byte)  0x40 .. 0xFF
2002    -------------------------------
2003
2004 */
2005
2006 /* BIG5 is a coding system encoding two character sets: ASCII and
2007    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
2008    character set and is encoded in two-byte.
2009
2010    --- CODE RANGE of BIG5 ---
2011    (character set)      (range)
2012    ASCII                0x00 .. 0x7F
2013    Big5 (1st byte)      0xA1 .. 0xFE
2014         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
2015    --------------------------
2016
2017    Since the number of characters in Big5 is larger than maximum
2018    characters in Emacs' charset (96x96), it can't be handled as one
2019    charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
2020    and `charset-big5-2'.  Both are DIMENSION2 and CHARS94.  The former
2021    contains frequently used characters and the latter contains less
2022    frequently used characters.  */
2023
2024 /* Macros to decode or encode a character of Big5 in BIG5.  B1 and B2
2025    are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2026    C1 and C2 are the 1st and 2nd position-codes of of Emacs' internal
2027    format.  CHARSET is `charset_big5_1' or `charset_big5_2'.  */
2028
2029 /* Number of Big5 characters which have the same code in 1st byte.  */
2030 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2031
2032 #define DECODE_BIG5(b1, b2, charset, c1, c2)                            \
2033   do {                                                                  \
2034     unsigned int temp                                                   \
2035       = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62);   \
2036     if (b1 < 0xC9)                                                      \
2037       charset = charset_big5_1;                                         \
2038     else                                                                \
2039       {                                                                 \
2040         charset = charset_big5_2;                                       \
2041         temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW;                          \
2042       }                                                                 \
2043     c1 = temp / (0xFF - 0xA1) + 0x21;                                   \
2044     c2 = temp % (0xFF - 0xA1) + 0x21;                                   \
2045   } while (0)
2046
2047 #define ENCODE_BIG5(charset, c1, c2, b1, b2)                            \
2048   do {                                                                  \
2049     unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21);      \
2050     if (charset == charset_big5_2)                                      \
2051       temp += BIG5_SAME_ROW * (0xC9 - 0xA1);                            \
2052     b1 = temp / BIG5_SAME_ROW + 0xA1;                                   \
2053     b2 = temp % BIG5_SAME_ROW;                                          \
2054     b2 += b2 < 0x3F ? 0x40 : 0x62;                                      \
2055   } while (0)
2056
2057 #define DECODE_SJIS_BIG5_CHARACTER(charset, c1, c2)                     \
2058   do {                                                                  \
2059     int c_alt, charset_alt = (charset);                                 \
2060     if (!NILP (translation_table)                                       \
2061         && ((c_alt = translate_char (translation_table,                 \
2062                                      -1, (charset), c1, c2)) >= 0))     \
2063           SPLIT_CHAR (c_alt, charset_alt, c1, c2);                      \
2064     if (charset_alt == CHARSET_ASCII || charset_alt < 0)                \
2065       DECODE_CHARACTER_ASCII (c1);                                      \
2066     else if (CHARSET_DIMENSION (charset_alt) == 1)                      \
2067       DECODE_CHARACTER_DIMENSION1 (charset_alt, c1);                    \
2068     else                                                                \
2069       DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2);                \
2070   } while (0)
2071
2072 #define ENCODE_SJIS_BIG5_CHARACTER(charset, c1, c2)             \
2073   do {                                                          \
2074     int c_alt, charset_alt;                                     \
2075     if (!NILP (translation_table)                               \
2076         && ((c_alt = translate_char (translation_table, -1,     \
2077                                      charset, c1, c2))          \
2078             >= 0))                                              \
2079       SPLIT_CHAR (c_alt, charset_alt, c1, c2);                  \
2080     else                                                        \
2081       charset_alt = charset;                                    \
2082     if (charset_alt == charset_ascii)                           \
2083       *dst++ = c1;                                              \
2084     else if (CHARSET_DIMENSION (charset_alt) == 1)              \
2085       {                                                         \
2086         if (sjis_p && charset_alt == charset_katakana_jisx0201) \
2087           *dst++ = c1;                                          \
2088         else                                                    \
2089           {                                                     \
2090             *dst++ = charset_alt, *dst++ = c1;                  \
2091             coding->fake_multibyte = 1;                         \
2092           }                                                     \
2093       }                                                         \
2094     else                                                        \
2095       {                                                         \
2096         c1 &= 0x7F, c2 &= 0x7F;                                 \
2097         if (sjis_p && charset_alt == charset_jisx0208)          \
2098           {                                                     \
2099             unsigned char s1, s2;                               \
2100                                                                 \
2101             ENCODE_SJIS (c1, c2, s1, s2);                       \
2102             *dst++ = s1, *dst++ = s2;                           \
2103             coding->fake_multibyte = 1;                         \
2104           }                                                     \
2105         else if (!sjis_p                                        \
2106                  && (charset_alt == charset_big5_1              \
2107                      || charset_alt == charset_big5_2))         \
2108           {                                                     \
2109             unsigned char b1, b2;                               \
2110                                                                 \
2111             ENCODE_BIG5 (charset_alt, c1, c2, b1, b2);          \
2112             *dst++ = b1, *dst++ = b2;                           \
2113           }                                                     \
2114         else                                                    \
2115           {                                                     \
2116             *dst++ = charset_alt, *dst++ = c1, *dst++ = c2;     \
2117             coding->fake_multibyte = 1;                         \
2118           }                                                     \
2119       }                                                         \
2120     coding->consumed_char++;                                    \
2121   } while (0);
2122
2123 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2124    Check if a text is encoded in SJIS.  If it is, return
2125    CODING_CATEGORY_MASK_SJIS, else return 0.  */
2126
2127 int
2128 detect_coding_sjis (src, src_end)
2129      unsigned char *src, *src_end;
2130 {
2131   unsigned char c;
2132
2133   while (src < src_end)
2134     {
2135       c = *src++;
2136       if ((c >= 0x80 && c < 0xA0) || c >= 0xE0)
2137         {
2138           if (src < src_end && *src++ < 0x40)
2139             return 0;
2140         }
2141     }
2142   return CODING_CATEGORY_MASK_SJIS;
2143 }
2144
2145 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2146    Check if a text is encoded in BIG5.  If it is, return
2147    CODING_CATEGORY_MASK_BIG5, else return 0.  */
2148
2149 int
2150 detect_coding_big5 (src, src_end)
2151      unsigned char *src, *src_end;
2152 {
2153   unsigned char c;
2154
2155   while (src < src_end)
2156     {
2157       c = *src++;
2158       if (c >= 0xA1)
2159         {
2160           if (src >= src_end)
2161             break;
2162           c = *src++;
2163           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
2164             return 0;
2165         }
2166     }
2167   return CODING_CATEGORY_MASK_BIG5;
2168 }
2169
2170 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2171    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
2172
2173 int
2174 decode_coding_sjis_big5 (coding, source, destination,
2175                          src_bytes, dst_bytes, sjis_p)
2176      struct coding_system *coding;
2177      unsigned char *source, *destination;
2178      int src_bytes, dst_bytes;
2179      int sjis_p;
2180 {
2181   unsigned char *src = source;
2182   unsigned char *src_end = source + src_bytes;
2183   unsigned char *dst = destination;
2184   unsigned char *dst_end = destination + dst_bytes;
2185   /* Since the maximum bytes produced by each loop is 4, we subtract 3
2186      from DST_END to assure overflow checking is necessary only at the
2187      head of loop.  */
2188   unsigned char *adjusted_dst_end = dst_end - 3;
2189   Lisp_Object translation_table
2190       = coding->translation_table_for_decode;
2191   int result = CODING_FINISH_NORMAL;
2192
2193   if (!NILP (Venable_character_translation) && NILP (translation_table))
2194     translation_table = Vstandard_translation_table_for_decode;
2195
2196   coding->produced_char = 0;
2197   coding->fake_multibyte = 0;
2198   while (src < src_end && (dst_bytes
2199                            ? (dst < adjusted_dst_end)
2200                            : (dst < src - 3)))
2201     {
2202       /* SRC_BASE remembers the start position in source in each loop.
2203          The loop will be exited when there's not enough source text
2204          to analyze two-byte character (within macro ONE_MORE_BYTE).
2205          In that case, SRC is reset to SRC_BASE before exiting.  */
2206       unsigned char *src_base = src;
2207       unsigned char c1 = *src++, c2, c3, c4;
2208
2209       if (c1 < 0x20)
2210         {
2211           if (c1 == '\r')
2212             {
2213               if (coding->eol_type == CODING_EOL_CRLF)
2214                 {
2215                   ONE_MORE_BYTE (c2);
2216                   if (c2 == '\n')
2217                     *dst++ = c2;
2218                   else if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2219                     {
2220                       result = CODING_FINISH_INCONSISTENT_EOL;
2221                       goto label_end_of_loop_2;
2222                     }
2223                   else
2224                     /* To process C2 again, SRC is subtracted by 1.  */
2225                     *dst++ = c1, src--;
2226                 }
2227               else if (coding->eol_type == CODING_EOL_CR)
2228                 *dst++ = '\n';
2229               else
2230                 *dst++ = c1;
2231             }
2232           else if (c1 == '\n'
2233                    && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2234                    && (coding->eol_type == CODING_EOL_CR
2235                        || coding->eol_type == CODING_EOL_CRLF))
2236             {
2237               result = CODING_FINISH_INCONSISTENT_EOL;
2238               goto label_end_of_loop_2;
2239             }
2240           else
2241             *dst++ = c1;
2242           coding->produced_char++;
2243         }
2244       else if (c1 < 0x80)
2245         DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
2246       else
2247         {
2248           if (sjis_p)
2249             {
2250               if (c1 < 0xA0 || (c1 >= 0xE0 && c1 < 0xF0))
2251                 {
2252                   /* SJIS -> JISX0208 */
2253                   ONE_MORE_BYTE (c2);
2254                   if (c2 >= 0x40)
2255                     {
2256                       DECODE_SJIS (c1, c2, c3, c4);
2257                       DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
2258                     }
2259                   else
2260                     goto label_invalid_code_2;
2261                 }
2262               else if (c1 < 0xE0)
2263                 /* SJIS -> JISX0201-Kana */
2264                 DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1,
2265                                             /* dummy */ c2);
2266               else
2267                 goto label_invalid_code_1;
2268             }
2269           else
2270             {
2271               /* BIG5 -> Big5 */
2272               if (c1 >= 0xA1 && c1 <= 0xFE)
2273                 {
2274                   ONE_MORE_BYTE (c2);
2275                   if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE))
2276                     {
2277                       int charset;
2278
2279                       DECODE_BIG5 (c1, c2, charset, c3, c4);
2280                       DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
2281                     }
2282                   else
2283                     goto label_invalid_code_2;
2284                 }
2285               else
2286                 goto label_invalid_code_1;
2287             }
2288         }
2289       continue;
2290
2291     label_invalid_code_1:
2292       *dst++ = c1;
2293       coding->produced_char++;
2294       coding->fake_multibyte = 1;
2295       continue;
2296
2297     label_invalid_code_2:
2298       *dst++ = c1; *dst++= c2;
2299       coding->produced_char += 2;
2300       coding->fake_multibyte = 1;
2301       continue;
2302
2303     label_end_of_loop:
2304       result = CODING_FINISH_INSUFFICIENT_SRC;
2305     label_end_of_loop_2:
2306       src = src_base;
2307       break;
2308     }
2309
2310   if (src < src_end)
2311     {
2312       if (result == CODING_FINISH_NORMAL)
2313         result = CODING_FINISH_INSUFFICIENT_DST;
2314       else if (result != CODING_FINISH_INCONSISTENT_EOL
2315                && coding->mode & CODING_MODE_LAST_BLOCK)
2316         {
2317           src_bytes = src_end - src;
2318           if (dst_bytes && (dst_end - dst < src_bytes))
2319             src_bytes = dst_end - dst;
2320           bcopy (dst, src, src_bytes);
2321           src += src_bytes;
2322           dst += src_bytes;
2323           coding->fake_multibyte = 1;
2324         }
2325     }
2326
2327   coding->consumed = coding->consumed_char = src - source;
2328   coding->produced = dst - destination;
2329   return result;
2330 }
2331
2332 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
2333    This function can encode `charset_ascii', `charset_katakana_jisx0201',
2334    `charset_jisx0208', `charset_big5_1', and `charset_big5-2'.  We are
2335    sure that all these charsets are registered as official charset
2336    (i.e. do not have extended leading-codes).  Characters of other
2337    charsets are produced without any encoding.  If SJIS_P is 1, encode
2338    SJIS text, else encode BIG5 text.  */
2339
2340 int
2341 encode_coding_sjis_big5 (coding, source, destination,
2342                          src_bytes, dst_bytes, sjis_p)
2343      struct coding_system *coding;
2344      unsigned char *source, *destination;
2345      int src_bytes, dst_bytes;
2346      int sjis_p;
2347 {
2348   unsigned char *src = source;
2349   unsigned char *src_end = source + src_bytes;
2350   unsigned char *dst = destination;
2351   unsigned char *dst_end = destination + dst_bytes;
2352   /* Since the maximum bytes produced by each loop is 2, we subtract 1
2353      from DST_END to assure overflow checking is necessary only at the
2354      head of loop.  */
2355   unsigned char *adjusted_dst_end = dst_end - 1;
2356   Lisp_Object translation_table
2357       = coding->translation_table_for_encode;
2358   int result = CODING_FINISH_NORMAL;
2359
2360   if (!NILP (Venable_character_translation) && NILP (translation_table))
2361     translation_table = Vstandard_translation_table_for_encode;
2362
2363   coding->consumed_char = 0;
2364   coding->fake_multibyte = 0;
2365   while (src < src_end && (dst_bytes
2366                            ? (dst < adjusted_dst_end)
2367                            : (dst < src - 1)))
2368     {
2369       /* SRC_BASE remembers the start position in source in each loop.
2370          The loop will be exited when there's not enough source text
2371          to analyze multi-byte codes (within macros ONE_MORE_BYTE and
2372          TWO_MORE_BYTES).  In that case, SRC is reset to SRC_BASE
2373          before exiting.  */
2374       unsigned char *src_base = src;
2375       unsigned char c1 = *src++, c2, c3, c4;
2376
2377       if (coding->composing)
2378         {
2379           if (c1 == 0xA0)
2380             {
2381               ONE_MORE_BYTE (c1);
2382               c1 &= 0x7F;
2383             }
2384           else if (c1 >= 0xA0)
2385             c1 -= 0x20;
2386           else
2387             coding->composing = 0;
2388         }
2389
2390       switch (emacs_code_class[c1])
2391         {
2392         case EMACS_ascii_code:
2393           ENCODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
2394           break;
2395
2396         case EMACS_control_code:
2397           *dst++ = c1;
2398           coding->consumed_char++;
2399           break;
2400
2401         case EMACS_carriage_return_code:
2402           if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2403             {
2404               *dst++ = c1;
2405               coding->consumed_char++;
2406               break;
2407             }
2408           /* fall down to treat '\r' as '\n' ...  */
2409
2410         case EMACS_linefeed_code:
2411           if (coding->eol_type == CODING_EOL_LF
2412               || coding->eol_type == CODING_EOL_UNDECIDED)
2413             *dst++ = '\n';
2414           else if (coding->eol_type == CODING_EOL_CRLF)
2415             *dst++ = '\r', *dst++ = '\n';
2416           else
2417             *dst++ = '\r';
2418           coding->consumed_char++;
2419           break;
2420
2421         case EMACS_leading_code_2:
2422           ONE_MORE_BYTE (c2);
2423           ENCODE_SJIS_BIG5_CHARACTER (c1, c2, /* dummy */ c3);
2424           break;
2425
2426         case EMACS_leading_code_3:
2427           TWO_MORE_BYTES (c2, c3);
2428           ENCODE_SJIS_BIG5_CHARACTER (c1, c2, c3);
2429           break;
2430
2431         case EMACS_leading_code_4:
2432           THREE_MORE_BYTES (c2, c3, c4);
2433           ENCODE_SJIS_BIG5_CHARACTER (c2, c3, c4);
2434           break;
2435
2436         case EMACS_leading_code_composition:
2437           coding->composing = 1;
2438           break;
2439
2440         default:                /* i.e. case EMACS_invalid_code: */
2441           *dst++ = c1;
2442           coding->consumed_char++;
2443         }
2444       continue;
2445
2446     label_end_of_loop:
2447       result = CODING_FINISH_INSUFFICIENT_SRC;
2448       src = src_base;
2449       break;
2450     }
2451
2452   if (result == CODING_FINISH_NORMAL
2453       && src < src_end)
2454     result = CODING_FINISH_INSUFFICIENT_DST;
2455   coding->consumed = src - source;
2456   coding->produced = coding->produced_char = dst - destination;
2457   return result;
2458 }
2459
2460 \f
2461 /*** 5. CCL handlers ***/
2462
2463 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2464    Check if a text is encoded in a coding system of which
2465    encoder/decoder are written in CCL program.  If it is, return
2466    CODING_CATEGORY_MASK_CCL, else return 0.  */
2467
2468 int
2469 detect_coding_ccl (src, src_end)
2470      unsigned char *src, *src_end;
2471 {
2472   unsigned char *valid;
2473
2474   /* No coding system is assigned to coding-category-ccl.  */
2475   if (!coding_system_table[CODING_CATEGORY_IDX_CCL])
2476     return 0;
2477
2478   valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
2479   while (src < src_end)
2480     {
2481       if (! valid[*src]) return 0;
2482       src++;
2483     }
2484   return CODING_CATEGORY_MASK_CCL;
2485 }
2486
2487 \f
2488 /*** 6. End-of-line handlers ***/
2489
2490 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2491    This function is called only when `coding->eol_type' is
2492    CODING_EOL_CRLF or CODING_EOL_CR.  */
2493
2494 int
2495 decode_eol (coding, source, destination, src_bytes, dst_bytes)
2496      struct coding_system *coding;
2497      unsigned char *source, *destination;
2498      int src_bytes, dst_bytes;
2499 {
2500   unsigned char *src = source;
2501   unsigned char *src_end = source + src_bytes;
2502   unsigned char *dst = destination;
2503   unsigned char *dst_end = destination + dst_bytes;
2504   unsigned char c;
2505   int result = CODING_FINISH_NORMAL;
2506
2507   coding->fake_multibyte = 0;
2508
2509   if (src_bytes <= 0)
2510     return result;
2511
2512   switch (coding->eol_type)
2513     {
2514     case CODING_EOL_CRLF:
2515       {
2516         /* Since the maximum bytes produced by each loop is 2, we
2517            subtract 1 from DST_END to assure overflow checking is
2518            necessary only at the head of loop.  */
2519         unsigned char *adjusted_dst_end = dst_end - 1;
2520
2521         while (src < src_end && (dst_bytes
2522                                  ? (dst < adjusted_dst_end)
2523                                  : (dst < src - 1)))
2524           {
2525             unsigned char *src_base = src;
2526
2527             c = *src++;
2528             if (c == '\r')
2529               {
2530                 ONE_MORE_BYTE (c);
2531                 if (c != '\n')
2532                   {
2533                     if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2534                       {
2535                         result = CODING_FINISH_INCONSISTENT_EOL;
2536                         goto label_end_of_loop_2;
2537                       }
2538                     *dst++ = '\r';
2539                     if (BASE_LEADING_CODE_P (c))
2540                       coding->fake_multibyte = 1;
2541                   }
2542                 *dst++ = c;
2543               }
2544             else if (c == '\n'
2545                      && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
2546               {
2547                 result = CODING_FINISH_INCONSISTENT_EOL;
2548                 goto label_end_of_loop_2;
2549               }
2550             else
2551               {
2552                 *dst++ = c;
2553                 if (BASE_LEADING_CODE_P (c))
2554                   coding->fake_multibyte = 1;
2555               }
2556             continue;
2557
2558           label_end_of_loop:
2559             result = CODING_FINISH_INSUFFICIENT_SRC;
2560           label_end_of_loop_2:
2561             src = src_base;
2562             break;
2563           }
2564         if (result == CODING_FINISH_NORMAL
2565             && src < src_end)
2566           result = CODING_FINISH_INSUFFICIENT_DST;
2567       }
2568       break;
2569
2570     case CODING_EOL_CR:
2571       if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2572         {
2573           while (src < src_end)
2574             {
2575               if ((c = *src++) == '\n')
2576                 break;
2577               if (BASE_LEADING_CODE_P (c))
2578                 coding->fake_multibyte = 1;
2579             }
2580           if (*--src == '\n')
2581             {
2582               src_bytes = src - source;
2583               result = CODING_FINISH_INCONSISTENT_EOL;
2584             }
2585         }
2586       if (dst_bytes && src_bytes > dst_bytes)
2587         {
2588           result = CODING_FINISH_INSUFFICIENT_DST;
2589           src_bytes = dst_bytes;
2590         }
2591       if (dst_bytes)
2592         bcopy (source, destination, src_bytes);
2593       else
2594         safe_bcopy (source, destination, src_bytes);
2595       src = source + src_bytes;
2596       while (src_bytes--) if (*dst++ == '\r') dst[-1] = '\n';
2597       break;
2598
2599     default:                    /* i.e. case: CODING_EOL_LF */
2600       if (dst_bytes && src_bytes > dst_bytes)
2601         {
2602           result = CODING_FINISH_INSUFFICIENT_DST;
2603           src_bytes = dst_bytes;
2604         }
2605       if (dst_bytes)
2606         bcopy (source, destination, src_bytes);
2607       else
2608         safe_bcopy (source, destination, src_bytes);
2609       src += src_bytes;
2610       dst += src_bytes;
2611       coding->fake_multibyte = 1;
2612       break;
2613     }
2614
2615   coding->consumed = coding->consumed_char = src - source;
2616   coding->produced = coding->produced_char = dst - destination;
2617   return result;
2618 }
2619
2620 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  Encode
2621    format of end-of-line according to `coding->eol_type'.  If
2622    `coding->mode & CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code
2623    '\r' in source text also means end-of-line.  */
2624
2625 int
2626 encode_eol (coding, source, destination, src_bytes, dst_bytes)
2627      struct coding_system *coding;
2628      unsigned char *source, *destination;
2629      int src_bytes, dst_bytes;
2630 {
2631   unsigned char *src = source;
2632   unsigned char *dst = destination;
2633   int result = CODING_FINISH_NORMAL;
2634
2635   coding->fake_multibyte = 0;
2636
2637   if (coding->eol_type == CODING_EOL_CRLF)
2638     {
2639       unsigned char c;
2640       unsigned char *src_end = source + src_bytes;
2641       unsigned char *dst_end = destination + dst_bytes;
2642       /* Since the maximum bytes produced by each loop is 2, we
2643          subtract 1 from DST_END to assure overflow checking is
2644          necessary only at the head of loop.  */
2645       unsigned char *adjusted_dst_end = dst_end - 1;
2646
2647       while (src < src_end && (dst_bytes
2648                                ? (dst < adjusted_dst_end)
2649                                : (dst < src - 1)))
2650         {
2651           c = *src++;
2652           if (c == '\n'
2653               || (c == '\r' && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)))
2654             *dst++ = '\r', *dst++ = '\n';
2655           else
2656             {
2657               *dst++ = c;
2658               if (BASE_LEADING_CODE_P (c))
2659                 coding->fake_multibyte = 1;
2660             }
2661         }
2662       if (src < src_end)
2663         result = CODING_FINISH_INSUFFICIENT_DST;
2664     }
2665   else
2666     {
2667       unsigned char c;
2668
2669       if (dst_bytes && src_bytes > dst_bytes)
2670         {
2671           src_bytes = dst_bytes;
2672           result = CODING_FINISH_INSUFFICIENT_DST;
2673         }
2674       if (dst_bytes)
2675         bcopy (source, destination, src_bytes);
2676       else
2677         safe_bcopy (source, destination, src_bytes);
2678       dst_bytes = src_bytes;
2679       if (coding->eol_type == CODING_EOL_CR)
2680         {
2681           while (src_bytes--)
2682             {
2683               if ((c = *dst++) == '\n')
2684                 dst[-1] = '\r';
2685               else if (BASE_LEADING_CODE_P (c))
2686                 coding->fake_multibyte = 1;
2687             }
2688         }
2689       else
2690         {
2691           if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
2692             {
2693               while (src_bytes--)
2694                 if (*dst++ == '\r') dst[-1] = '\n';
2695             }
2696           coding->fake_multibyte = 1;
2697         }
2698       src = source + dst_bytes;
2699       dst = destination + dst_bytes;
2700     }
2701
2702   coding->consumed = coding->consumed_char = src - source;
2703   coding->produced = coding->produced_char = dst - destination;
2704   return result;
2705 }
2706
2707 \f
2708 /*** 7. C library functions ***/
2709
2710 /* In Emacs Lisp, coding system is represented by a Lisp symbol which
2711    has a property `coding-system'.  The value of this property is a
2712    vector of length 5 (called as coding-vector).  Among elements of
2713    this vector, the first (element[0]) and the fifth (element[4])
2714    carry important information for decoding/encoding.  Before
2715    decoding/encoding, this information should be set in fields of a
2716    structure of type `coding_system'.
2717
2718    A value of property `coding-system' can be a symbol of another
2719    subsidiary coding-system.  In that case, Emacs gets coding-vector
2720    from that symbol.
2721
2722    `element[0]' contains information to be set in `coding->type'.  The
2723    value and its meaning is as follows:
2724
2725    0 -- coding_type_emacs_mule
2726    1 -- coding_type_sjis
2727    2 -- coding_type_iso2022
2728    3 -- coding_type_big5
2729    4 -- coding_type_ccl encoder/decoder written in CCL
2730    nil -- coding_type_no_conversion
2731    t -- coding_type_undecided (automatic conversion on decoding,
2732                                no-conversion on encoding)
2733
2734    `element[4]' contains information to be set in `coding->flags' and
2735    `coding->spec'.  The meaning varies by `coding->type'.
2736
2737    If `coding->type' is `coding_type_iso2022', element[4] is a vector
2738    of length 32 (of which the first 13 sub-elements are used now).
2739    Meanings of these sub-elements are:
2740
2741    sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
2742         If the value is an integer of valid charset, the charset is
2743         assumed to be designated to graphic register N initially.
2744
2745         If the value is minus, it is a minus value of charset which
2746         reserves graphic register N, which means that the charset is
2747         not designated initially but should be designated to graphic
2748         register N just before encoding a character in that charset.
2749
2750         If the value is nil, graphic register N is never used on
2751         encoding.
2752
2753    sub-element[N] where N is 4 through 11: to be set in `coding->flags'
2754         Each value takes t or nil.  See the section ISO2022 of
2755         `coding.h' for more information.
2756
2757    If `coding->type' is `coding_type_big5', element[4] is t to denote
2758    BIG5-ETen or nil to denote BIG5-HKU.
2759
2760    If `coding->type' takes the other value, element[4] is ignored.
2761
2762    Emacs Lisp's coding system also carries information about format of
2763    end-of-line in a value of property `eol-type'.  If the value is
2764    integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
2765    means CODING_EOL_CR.  If it is not integer, it should be a vector
2766    of subsidiary coding systems of which property `eol-type' has one
2767    of above values.
2768
2769 */
2770
2771 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
2772    and set it in CODING.  If CODING_SYSTEM_SYMBOL is invalid, CODING
2773    is setup so that no conversion is necessary and return -1, else
2774    return 0.  */
2775
2776 int
2777 setup_coding_system (coding_system, coding)
2778      Lisp_Object coding_system;
2779      struct coding_system *coding;
2780 {
2781   Lisp_Object coding_spec, coding_type, eol_type, plist;
2782   Lisp_Object val;
2783   int i;
2784
2785   /* Initialize some fields required for all kinds of coding systems.  */
2786   coding->symbol = coding_system;
2787   coding->common_flags = 0;
2788   coding->mode = 0;
2789   coding->heading_ascii = -1;
2790   coding->post_read_conversion = coding->pre_write_conversion = Qnil;
2791   coding_spec = Fget (coding_system, Qcoding_system);
2792   if (!VECTORP (coding_spec)
2793       || XVECTOR (coding_spec)->size != 5
2794       || !CONSP (XVECTOR (coding_spec)->contents[3]))
2795     goto label_invalid_coding_system;
2796
2797   eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
2798   if (VECTORP (eol_type))
2799     {
2800       coding->eol_type = CODING_EOL_UNDECIDED;
2801       coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
2802     }
2803   else if (XFASTINT (eol_type) == 1)
2804     {
2805       coding->eol_type = CODING_EOL_CRLF;
2806       coding->common_flags
2807         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2808     }
2809   else if (XFASTINT (eol_type) == 2)
2810     {
2811       coding->eol_type = CODING_EOL_CR;
2812       coding->common_flags
2813         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2814     }
2815   else
2816     coding->eol_type = CODING_EOL_LF;
2817
2818   coding_type = XVECTOR (coding_spec)->contents[0];
2819   /* Try short cut.  */
2820   if (SYMBOLP (coding_type))
2821     {
2822       if (EQ (coding_type, Qt))
2823         {
2824           coding->type = coding_type_undecided;
2825           coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
2826         }
2827       else
2828         coding->type = coding_type_no_conversion;
2829       return 0;
2830     }
2831
2832   /* Initialize remaining fields.  */
2833   coding->composing = 0;
2834   coding->translation_table_for_decode = Qnil;
2835   coding->translation_table_for_encode = Qnil;
2836
2837   /* Get values of coding system properties:
2838      `post-read-conversion', `pre-write-conversion',
2839      `translation-table-for-decode', `translation-table-for-encode'.  */
2840   plist = XVECTOR (coding_spec)->contents[3];
2841   coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
2842   coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
2843   val = Fplist_get (plist, Qtranslation_table_for_decode);
2844   if (SYMBOLP (val))
2845     val = Fget (val, Qtranslation_table_for_decode);
2846   coding->translation_table_for_decode = CHAR_TABLE_P (val) ? val : Qnil;
2847   val = Fplist_get (plist, Qtranslation_table_for_encode);
2848   if (SYMBOLP (val))
2849     val = Fget (val, Qtranslation_table_for_encode);
2850   coding->translation_table_for_encode = CHAR_TABLE_P (val) ? val : Qnil;
2851   val = Fplist_get (plist, Qcoding_category);
2852   if (!NILP (val))
2853     {
2854       val = Fget (val, Qcoding_category_index);
2855       if (INTEGERP (val))
2856         coding->category_idx = XINT (val);
2857       else
2858         goto label_invalid_coding_system;
2859     }
2860   else
2861     goto label_invalid_coding_system;
2862
2863   val = Fplist_get (plist, Qsafe_charsets);
2864   if (EQ (val, Qt))
2865     {
2866       for (i = 0; i <= MAX_CHARSET; i++)
2867         coding->safe_charsets[i] = 1;
2868     }
2869   else
2870     {
2871       bzero (coding->safe_charsets, MAX_CHARSET + 1);
2872       while (CONSP (val))
2873         {
2874           if ((i = get_charset_id (XCONS (val)->car)) >= 0)
2875             coding->safe_charsets[i] = 1;
2876           val = XCONS (val)->cdr;
2877         }
2878     }
2879
2880   switch (XFASTINT (coding_type))
2881     {
2882     case 0:
2883       coding->type = coding_type_emacs_mule;
2884       if (!NILP (coding->post_read_conversion))
2885         coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
2886       if (!NILP (coding->pre_write_conversion))
2887         coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
2888       break;
2889
2890     case 1:
2891       coding->type = coding_type_sjis;
2892       coding->common_flags
2893         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2894       break;
2895
2896     case 2:
2897       coding->type = coding_type_iso2022;
2898       coding->common_flags
2899         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2900       {
2901         Lisp_Object val, temp;
2902         Lisp_Object *flags;
2903         int i, charset, reg_bits = 0;
2904
2905         val = XVECTOR (coding_spec)->contents[4];
2906
2907         if (!VECTORP (val) || XVECTOR (val)->size != 32)
2908           goto label_invalid_coding_system;
2909
2910         flags = XVECTOR (val)->contents;
2911         coding->flags
2912           = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
2913              | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
2914              | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
2915              | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
2916              | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
2917              | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
2918              | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
2919              | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
2920              | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
2921              | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
2922              | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
2923              | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
2924              | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
2925              );
2926
2927         /* Invoke graphic register 0 to plane 0.  */
2928         CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
2929         /* Invoke graphic register 1 to plane 1 if we can use full 8-bit.  */
2930         CODING_SPEC_ISO_INVOCATION (coding, 1)
2931           = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
2932         /* Not single shifting at first.  */
2933         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
2934         /* Beginning of buffer should also be regarded as bol. */
2935         CODING_SPEC_ISO_BOL (coding) = 1;
2936
2937         for (charset = 0; charset <= MAX_CHARSET; charset++)
2938           CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
2939         val = Vcharset_revision_alist;
2940         while (CONSP (val))
2941           {
2942             charset = get_charset_id (Fcar_safe (XCONS (val)->car));
2943             if (charset >= 0
2944                 && (temp = Fcdr_safe (XCONS (val)->car), INTEGERP (temp))
2945                 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
2946               CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
2947             val = XCONS (val)->cdr;
2948           }
2949
2950         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
2951            FLAGS[REG] can be one of below:
2952                 integer CHARSET: CHARSET occupies register I,
2953                 t: designate nothing to REG initially, but can be used
2954                   by any charsets,
2955                 list of integer, nil, or t: designate the first
2956                   element (if integer) to REG initially, the remaining
2957                   elements (if integer) is designated to REG on request,
2958                   if an element is t, REG can be used by any charsets,
2959                 nil: REG is never used.  */
2960         for (charset = 0; charset <= MAX_CHARSET; charset++)
2961           CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
2962             = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
2963         for (i = 0; i < 4; i++)
2964           {
2965             if (INTEGERP (flags[i])
2966                 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))
2967                 || (charset = get_charset_id (flags[i])) >= 0)
2968               {
2969                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
2970                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
2971               }
2972             else if (EQ (flags[i], Qt))
2973               {
2974                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2975                 reg_bits |= 1 << i;
2976                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
2977               }
2978             else if (CONSP (flags[i]))
2979               {
2980                 Lisp_Object tail;
2981                 tail = flags[i];
2982
2983                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
2984                 if (INTEGERP (XCONS (tail)->car)
2985                     && (charset = XINT (XCONS (tail)->car),
2986                         CHARSET_VALID_P (charset))
2987                     || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
2988                   {
2989                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
2990                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
2991                   }
2992                 else
2993                   CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2994                 tail = XCONS (tail)->cdr;
2995                 while (CONSP (tail))
2996                   {
2997                     if (INTEGERP (XCONS (tail)->car)
2998                         && (charset = XINT (XCONS (tail)->car),
2999                             CHARSET_VALID_P (charset))
3000                         || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
3001                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3002                         = i;
3003                     else if (EQ (XCONS (tail)->car, Qt))
3004                       reg_bits |= 1 << i;
3005                     tail = XCONS (tail)->cdr;
3006                   }
3007               }
3008             else
3009               CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3010
3011             CODING_SPEC_ISO_DESIGNATION (coding, i)
3012               = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
3013           }
3014
3015         if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
3016           {
3017             /* REG 1 can be used only by locking shift in 7-bit env.  */
3018             if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
3019               reg_bits &= ~2;
3020             if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
3021               /* Without any shifting, only REG 0 and 1 can be used.  */
3022               reg_bits &= 3;
3023           }
3024
3025         if (reg_bits)
3026           for (charset = 0; charset <= MAX_CHARSET; charset++)
3027             {
3028               if (CHARSET_VALID_P (charset))
3029                 {
3030                   /* There exist some default graphic registers to be
3031                      used CHARSET.  */
3032
3033                   /* We had better avoid designating a charset of
3034                      CHARS96 to REG 0 as far as possible.  */
3035                   if (CHARSET_CHARS (charset) == 96)
3036                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3037                       = (reg_bits & 2
3038                          ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
3039                   else
3040                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3041                       = (reg_bits & 1
3042                          ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
3043                 }
3044             }
3045       }
3046       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3047       coding->spec.iso2022.last_invalid_designation_register = -1;
3048       break;
3049
3050     case 3:
3051       coding->type = coding_type_big5;
3052       coding->common_flags
3053         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3054       coding->flags
3055         = (NILP (XVECTOR (coding_spec)->contents[4])
3056            ? CODING_FLAG_BIG5_HKU
3057            : CODING_FLAG_BIG5_ETEN);
3058       break;
3059
3060     case 4:
3061       coding->type = coding_type_ccl;
3062       coding->common_flags
3063         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3064       {
3065         Lisp_Object val;
3066         Lisp_Object decoder, encoder;
3067
3068         val = XVECTOR (coding_spec)->contents[4];
3069         if (CONSP  (val)
3070             && SYMBOLP (XCONS (val)->car)
3071             && !NILP (decoder = Fget (XCONS (val)->car, Qccl_program_idx))
3072             && !NILP (decoder = Fcdr (Faref (Vccl_program_table, decoder)))
3073             && SYMBOLP (XCONS (val)->cdr)
3074             && !NILP (encoder = Fget (XCONS (val)->cdr, Qccl_program_idx))
3075             && !NILP (encoder = Fcdr (Faref (Vccl_program_table, encoder))))
3076           {
3077             setup_ccl_program (&(coding->spec.ccl.decoder), decoder);
3078             setup_ccl_program (&(coding->spec.ccl.encoder), encoder);
3079           }
3080         else
3081           goto label_invalid_coding_system;
3082
3083         bzero (coding->spec.ccl.valid_codes, 256);
3084         val = Fplist_get (plist, Qvalid_codes);
3085         if (CONSP (val))
3086           {
3087             Lisp_Object this;
3088
3089             for (this = XCONS (val)->car; CONSP (val); val = XCONS (val)->cdr)
3090               {
3091                 if (INTEGERP (this)
3092                     && XINT (this) >= 0 && XINT (this) < 256)
3093                   coding->spec.ccl.valid_codes[XINT (this)] = 1;
3094                 else if (CONSP (this)
3095                          && INTEGERP (XCONS (this)->car)
3096                          && INTEGERP (XCONS (this)->cdr))
3097                   {
3098                     int start = XINT (XCONS (this)->car);
3099                     int end = XINT (XCONS (this)->cdr);
3100
3101                     if (start >= 0 && start <= end && end < 256)
3102                       while (start < end)
3103                         coding->spec.ccl.valid_codes[start++] = 1;
3104                   }
3105               }
3106           }
3107       }
3108       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3109       break;
3110
3111     case 5:
3112       coding->type = coding_type_raw_text;
3113       break;
3114
3115     default:
3116       goto label_invalid_coding_system;
3117     }
3118   return 0;
3119
3120  label_invalid_coding_system:
3121   coding->type = coding_type_no_conversion;
3122   coding->category_idx = CODING_CATEGORY_IDX_BINARY;
3123   coding->common_flags = 0;
3124   coding->eol_type = CODING_EOL_LF;
3125   coding->pre_write_conversion = coding->post_read_conversion = Qnil;
3126   return -1;
3127 }
3128
3129 /* Setup raw-text or one of its subsidiaries in the structure
3130    coding_system CODING according to the already setup value eol_type
3131    in CODING.  CODING should be setup for some coding system in
3132    advance.  */
3133
3134 void
3135 setup_raw_text_coding_system (coding)
3136      struct coding_system *coding;
3137 {
3138   if (coding->type != coding_type_raw_text)
3139     {
3140       coding->symbol = Qraw_text;
3141       coding->type = coding_type_raw_text;
3142       if (coding->eol_type != CODING_EOL_UNDECIDED)
3143         {
3144           Lisp_Object subsidiaries;
3145           subsidiaries = Fget (Qraw_text, Qeol_type);
3146
3147           if (VECTORP (subsidiaries)
3148               && XVECTOR (subsidiaries)->size == 3)
3149             coding->symbol
3150               = XVECTOR (subsidiaries)->contents[coding->eol_type];
3151         }
3152     }
3153   return;
3154 }
3155
3156 /* Emacs has a mechanism to automatically detect a coding system if it
3157    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
3158    it's impossible to distinguish some coding systems accurately
3159    because they use the same range of codes.  So, at first, coding
3160    systems are categorized into 7, those are:
3161
3162    o coding-category-emacs-mule
3163
3164         The category for a coding system which has the same code range
3165         as Emacs' internal format.  Assigned the coding-system (Lisp
3166         symbol) `emacs-mule' by default.
3167
3168    o coding-category-sjis
3169
3170         The category for a coding system which has the same code range
3171         as SJIS.  Assigned the coding-system (Lisp
3172         symbol) `japanese-shift-jis' by default.
3173
3174    o coding-category-iso-7
3175
3176         The category for a coding system which has the same code range
3177         as ISO2022 of 7-bit environment.  This doesn't use any locking
3178         shift and single shift functions.  This can encode/decode all
3179         charsets.  Assigned the coding-system (Lisp symbol)
3180         `iso-2022-7bit' by default.
3181
3182    o coding-category-iso-7-tight
3183
3184         Same as coding-category-iso-7 except that this can
3185         encode/decode only the specified charsets.
3186
3187    o coding-category-iso-8-1
3188
3189         The category for a coding system which has the same code range
3190         as ISO2022 of 8-bit environment and graphic plane 1 used only
3191         for DIMENSION1 charset.  This doesn't use any locking shift
3192         and single shift functions.  Assigned the coding-system (Lisp
3193         symbol) `iso-latin-1' by default.
3194
3195    o coding-category-iso-8-2
3196
3197         The category for a coding system which has the same code range
3198         as ISO2022 of 8-bit environment and graphic plane 1 used only
3199         for DIMENSION2 charset.  This doesn't use any locking shift
3200         and single shift functions.  Assigned the coding-system (Lisp
3201         symbol) `japanese-iso-8bit' by default.
3202
3203    o coding-category-iso-7-else
3204
3205         The category for a coding system which has the same code range
3206         as ISO2022 of 7-bit environemnt but uses locking shift or
3207         single shift functions.  Assigned the coding-system (Lisp
3208         symbol) `iso-2022-7bit-lock' by default.
3209
3210    o coding-category-iso-8-else
3211
3212         The category for a coding system which has the same code range
3213         as ISO2022 of 8-bit environemnt but uses locking shift or
3214         single shift functions.  Assigned the coding-system (Lisp
3215         symbol) `iso-2022-8bit-ss2' by default.
3216
3217    o coding-category-big5
3218
3219         The category for a coding system which has the same code range
3220         as BIG5.  Assigned the coding-system (Lisp symbol)
3221         `cn-big5' by default.
3222
3223    o coding-category-ccl
3224
3225         The category for a coding system of which encoder/decoder is
3226         written in CCL programs.  The default value is nil, i.e., no
3227         coding system is assigned.
3228
3229    o coding-category-binary
3230
3231         The category for a coding system not categorized in any of the
3232         above.  Assigned the coding-system (Lisp symbol)
3233         `no-conversion' by default.
3234
3235    Each of them is a Lisp symbol and the value is an actual
3236    `coding-system's (this is also a Lisp symbol) assigned by a user.
3237    What Emacs does actually is to detect a category of coding system.
3238    Then, it uses a `coding-system' assigned to it.  If Emacs can't
3239    decide only one possible category, it selects a category of the
3240    highest priority.  Priorities of categories are also specified by a
3241    user in a Lisp variable `coding-category-list'.
3242
3243 */
3244
3245 static
3246 int ascii_skip_code[256];
3247
3248 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
3249    If it detects possible coding systems, return an integer in which
3250    appropriate flag bits are set.  Flag bits are defined by macros
3251    CODING_CATEGORY_MASK_XXX in `coding.h'.
3252
3253    How many ASCII characters are at the head is returned as *SKIP.  */
3254
3255 static int
3256 detect_coding_mask (source, src_bytes, priorities, skip)
3257      unsigned char *source;
3258      int src_bytes, *priorities, *skip;
3259 {
3260   register unsigned char c;
3261   unsigned char *src = source, *src_end = source + src_bytes;
3262   unsigned int mask;
3263   int i;
3264
3265   /* At first, skip all ASCII characters and control characters except
3266      for three ISO2022 specific control characters.  */
3267   ascii_skip_code[ISO_CODE_SO] = 0;
3268   ascii_skip_code[ISO_CODE_SI] = 0;
3269   ascii_skip_code[ISO_CODE_ESC] = 0;
3270
3271  label_loop_detect_coding:
3272   while (src < src_end && ascii_skip_code[*src]) src++;
3273   *skip = src - source;
3274
3275   if (src >= src_end)
3276     /* We found nothing other than ASCII.  There's nothing to do.  */
3277     return 0;
3278
3279   c = *src;
3280   /* The text seems to be encoded in some multilingual coding system.
3281      Now, try to find in which coding system the text is encoded.  */
3282   if (c < 0x80)
3283     {
3284       /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
3285       /* C is an ISO2022 specific control code of C0.  */
3286       mask = detect_coding_iso2022 (src, src_end);
3287       if (mask == 0)
3288         {
3289           /* No valid ISO2022 code follows C.  Try again.  */
3290           src++;
3291           if (c == ISO_CODE_ESC)
3292             ascii_skip_code[ISO_CODE_ESC] = 1;
3293           else
3294             ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1;
3295           goto label_loop_detect_coding;
3296         }
3297       if (priorities)
3298         goto label_return_highest_only;
3299     }
3300   else
3301     {
3302       int try;
3303
3304       if (c < 0xA0)
3305         {
3306           /* C is the first byte of SJIS character code,
3307              or a leading-code of Emacs' internal format (emacs-mule).  */
3308           try = CODING_CATEGORY_MASK_SJIS | CODING_CATEGORY_MASK_EMACS_MULE;
3309
3310           /* Or, if C is a special latin extra code,
3311              or is an ISO2022 specific control code of C1 (SS2 or SS3),
3312              or is an ISO2022 control-sequence-introducer (CSI),
3313              we should also consider the possibility of ISO2022 codings.  */
3314           if ((VECTORP (Vlatin_extra_code_table)
3315                && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3316               || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
3317               || (c == ISO_CODE_CSI
3318                   && (src < src_end
3319                       && (*src == ']'
3320                           || ((*src == '0' || *src == '1' || *src == '2')
3321                               && src + 1 < src_end
3322                               && src[1] == ']')))))
3323             try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
3324                      | CODING_CATEGORY_MASK_ISO_8BIT);
3325         }
3326       else
3327         /* C is a character of ISO2022 in graphic plane right,
3328            or a SJIS's 1-byte character code (i.e. JISX0201),
3329            or the first byte of BIG5's 2-byte code.  */
3330         try = (CODING_CATEGORY_MASK_ISO_8_ELSE
3331                 | CODING_CATEGORY_MASK_ISO_8BIT
3332                 | CODING_CATEGORY_MASK_SJIS
3333                 | CODING_CATEGORY_MASK_BIG5);
3334
3335       /* Or, we may have to consider the possibility of CCL.  */
3336       if (coding_system_table[CODING_CATEGORY_IDX_CCL]
3337           && (coding_system_table[CODING_CATEGORY_IDX_CCL]
3338               ->spec.ccl.valid_codes)[c])
3339         try |= CODING_CATEGORY_MASK_CCL;
3340
3341       mask = 0;
3342       if (priorities)
3343         {
3344           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3345             {
3346               if (priorities[i] & try & CODING_CATEGORY_MASK_ISO)
3347                 mask = detect_coding_iso2022 (src, src_end);
3348               else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
3349                 mask = detect_coding_sjis (src, src_end);
3350               else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
3351                 mask = detect_coding_big5 (src, src_end);
3352               else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
3353                 mask = detect_coding_emacs_mule (src, src_end);
3354               else if (priorities[i] & CODING_CATEGORY_MASK_CCL)
3355                 mask = detect_coding_ccl (src, src_end);
3356               else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
3357                 mask = CODING_CATEGORY_MASK_RAW_TEXT;
3358               else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
3359                 mask = CODING_CATEGORY_MASK_BINARY;
3360               if (mask)
3361                 goto label_return_highest_only;
3362             }
3363           return CODING_CATEGORY_MASK_RAW_TEXT;
3364         }
3365       if (try & CODING_CATEGORY_MASK_ISO)
3366         mask |= detect_coding_iso2022 (src, src_end);
3367       if (try & CODING_CATEGORY_MASK_SJIS)
3368         mask |= detect_coding_sjis (src, src_end);
3369       if (try & CODING_CATEGORY_MASK_BIG5)
3370         mask |= detect_coding_big5 (src, src_end);
3371       if (try & CODING_CATEGORY_MASK_EMACS_MULE)
3372         mask |= detect_coding_emacs_mule (src, src_end);
3373       if (try & CODING_CATEGORY_MASK_CCL)
3374         mask |= detect_coding_ccl (src, src_end);
3375     }
3376   return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
3377
3378  label_return_highest_only:
3379   for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3380     {
3381       if (mask & priorities[i])
3382         return priorities[i];
3383     }
3384   return CODING_CATEGORY_MASK_RAW_TEXT;
3385 }
3386
3387 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
3388    The information of the detected coding system is set in CODING.  */
3389
3390 void
3391 detect_coding (coding, src, src_bytes)
3392      struct coding_system *coding;
3393      unsigned char *src;
3394      int src_bytes;
3395 {
3396   unsigned int idx;
3397   int skip, mask, i;
3398   Lisp_Object val;
3399
3400   val = Vcoding_category_list;
3401   mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip);
3402   coding->heading_ascii = skip;
3403
3404   if (!mask) return;
3405
3406   /* We found a single coding system of the highest priority in MASK.  */
3407   idx = 0;
3408   while (mask && ! (mask & 1)) mask >>= 1, idx++;
3409   if (! mask)
3410     idx = CODING_CATEGORY_IDX_RAW_TEXT;
3411
3412   val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[idx])->value;
3413
3414   if (coding->eol_type != CODING_EOL_UNDECIDED)
3415     {
3416       Lisp_Object tmp;
3417
3418       tmp = Fget (val, Qeol_type);
3419       if (VECTORP (tmp))
3420         val = XVECTOR (tmp)->contents[coding->eol_type];
3421     }
3422   setup_coding_system (val, coding);
3423   /* Set this again because setup_coding_system reset this member.  */
3424   coding->heading_ascii = skip;
3425 }
3426
3427 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
3428    SOURCE is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
3429    CODING_EOL_CR, and CODING_EOL_UNDECIDED.
3430
3431    How many non-eol characters are at the head is returned as *SKIP.  */
3432
3433 #define MAX_EOL_CHECK_COUNT 3
3434
3435 static int
3436 detect_eol_type (source, src_bytes, skip)
3437      unsigned char *source;
3438      int src_bytes, *skip;
3439 {
3440   unsigned char *src = source, *src_end = src + src_bytes;
3441   unsigned char c;
3442   int total = 0;                /* How many end-of-lines are found so far.  */
3443   int eol_type = CODING_EOL_UNDECIDED;
3444   int this_eol_type;
3445
3446   *skip = 0;
3447
3448   while (src < src_end && total < MAX_EOL_CHECK_COUNT)
3449     {
3450       c = *src++;
3451       if (c == '\n' || c == '\r')
3452         {
3453           if (*skip == 0)
3454             *skip = src - 1 - source;
3455           total++;
3456           if (c == '\n')
3457             this_eol_type = CODING_EOL_LF;
3458           else if (src >= src_end || *src != '\n')
3459             this_eol_type = CODING_EOL_CR;
3460           else
3461             this_eol_type = CODING_EOL_CRLF, src++;
3462
3463           if (eol_type == CODING_EOL_UNDECIDED)
3464             /* This is the first end-of-line.  */
3465             eol_type = this_eol_type;
3466           else if (eol_type != this_eol_type)
3467             {
3468               /* The found type is different from what found before.  */
3469               eol_type = CODING_EOL_INCONSISTENT;
3470               break;
3471             }
3472         }
3473     }
3474
3475   if (*skip == 0)
3476     *skip = src_end - source;
3477   return eol_type;
3478 }
3479
3480 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
3481    is encoded.  If it detects an appropriate format of end-of-line, it
3482    sets the information in *CODING.  */
3483
3484 void
3485 detect_eol (coding, src, src_bytes)
3486      struct coding_system *coding;
3487      unsigned char *src;
3488      int src_bytes;
3489 {
3490   Lisp_Object val;
3491   int skip;
3492   int eol_type = detect_eol_type (src, src_bytes, &skip);
3493
3494   if (coding->heading_ascii > skip)
3495     coding->heading_ascii = skip;
3496   else
3497     skip = coding->heading_ascii;
3498
3499   if (eol_type == CODING_EOL_UNDECIDED)
3500     return;
3501   if (eol_type == CODING_EOL_INCONSISTENT)
3502     {
3503 #if 0
3504       /* This code is suppressed until we find a better way to
3505          distinguish raw text file and binary file.  */
3506
3507       /* If we have already detected that the coding is raw-text, the
3508          coding should actually be no-conversion.  */
3509       if (coding->type == coding_type_raw_text)
3510         {
3511           setup_coding_system (Qno_conversion, coding);
3512           return;
3513         }
3514       /* Else, let's decode only text code anyway.  */
3515 #endif /* 0 */
3516       eol_type = CODING_EOL_LF;
3517     }
3518
3519   val = Fget (coding->symbol, Qeol_type);
3520   if (VECTORP (val) && XVECTOR (val)->size == 3)
3521     {
3522       setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
3523       coding->heading_ascii = skip;
3524     }
3525 }
3526
3527 #define CONVERSION_BUFFER_EXTRA_ROOM 256
3528
3529 #define DECODING_BUFFER_MAG(coding)                                          \
3530   (coding->type == coding_type_iso2022                                       \
3531    ? 3                                                                       \
3532    : ((coding->type == coding_type_sjis || coding->type == coding_type_big5) \
3533       ? 2                                                                    \
3534       : (coding->type == coding_type_raw_text                                \
3535          ? 1                                                                 \
3536          : (coding->type == coding_type_ccl                                  \
3537             ? coding->spec.ccl.decoder.buf_magnification                     \
3538             : 2))))
3539
3540 /* Return maximum size (bytes) of a buffer enough for decoding
3541    SRC_BYTES of text encoded in CODING.  */
3542
3543 int
3544 decoding_buffer_size (coding, src_bytes)
3545      struct coding_system *coding;
3546      int src_bytes;
3547 {
3548   return (src_bytes * DECODING_BUFFER_MAG (coding)
3549           + CONVERSION_BUFFER_EXTRA_ROOM);
3550 }
3551
3552 /* Return maximum size (bytes) of a buffer enough for encoding
3553    SRC_BYTES of text to CODING.  */
3554
3555 int
3556 encoding_buffer_size (coding, src_bytes)
3557      struct coding_system *coding;
3558      int src_bytes;
3559 {
3560   int magnification;
3561
3562   if (coding->type == coding_type_ccl)
3563     magnification = coding->spec.ccl.encoder.buf_magnification;
3564   else
3565     magnification = 3;
3566
3567   return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
3568 }
3569
3570 #ifndef MINIMUM_CONVERSION_BUFFER_SIZE
3571 #define MINIMUM_CONVERSION_BUFFER_SIZE 1024
3572 #endif
3573
3574 char *conversion_buffer;
3575 int conversion_buffer_size;
3576
3577 /* Return a pointer to a SIZE bytes of buffer to be used for encoding
3578    or decoding.  Sufficient memory is allocated automatically.  If we
3579    run out of memory, return NULL.  */
3580
3581 char *
3582 get_conversion_buffer (size)
3583      int size;
3584 {
3585   if (size > conversion_buffer_size)
3586     {
3587       char *buf;
3588       int real_size = conversion_buffer_size * 2;
3589
3590       while (real_size < size) real_size *= 2;
3591       buf = (char *) xmalloc (real_size);
3592       xfree (conversion_buffer);
3593       conversion_buffer = buf;
3594       conversion_buffer_size = real_size;
3595     }
3596   return conversion_buffer;
3597 }
3598
3599 int
3600 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
3601      struct coding_system *coding;
3602      unsigned char *source, *destination;
3603      int src_bytes, dst_bytes, encodep;
3604 {
3605   struct ccl_program *ccl
3606     = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
3607   int result;
3608
3609   coding->produced = ccl_driver (ccl, source, destination,
3610                                  src_bytes, dst_bytes, &(coding->consumed));
3611   if (encodep)
3612     {
3613       coding->produced_char = coding->produced;
3614       coding->consumed_char
3615         = multibyte_chars_in_text (source, coding->consumed);
3616     }
3617   else
3618     {
3619       coding->produced_char
3620         = multibyte_chars_in_text (destination, coding->produced);
3621       coding->consumed_char = coding->consumed;
3622     }
3623   switch (ccl->status)
3624     {
3625     case CCL_STAT_SUSPEND_BY_SRC:
3626       result = CODING_FINISH_INSUFFICIENT_SRC;
3627       break;
3628     case CCL_STAT_SUSPEND_BY_DST:
3629       result = CODING_FINISH_INSUFFICIENT_DST;
3630       break;
3631     default:
3632       result = CODING_FINISH_NORMAL;
3633       break;
3634     }
3635   return result;
3636 }
3637
3638 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions".  Before
3639    decoding, it may detect coding system and format of end-of-line if
3640    those are not yet decided.  */
3641
3642 int
3643 decode_coding (coding, source, destination, src_bytes, dst_bytes)
3644      struct coding_system *coding;
3645      unsigned char *source, *destination;
3646      int src_bytes, dst_bytes;
3647 {
3648   int result;
3649
3650   if (src_bytes <= 0)
3651     {
3652       coding->produced = coding->produced_char = 0;
3653       coding->consumed = coding->consumed_char = 0;
3654       coding->fake_multibyte = 0;
3655       return CODING_FINISH_NORMAL;
3656     }
3657
3658   if (coding->type == coding_type_undecided)
3659     detect_coding (coding, source, src_bytes);
3660
3661   if (coding->eol_type == CODING_EOL_UNDECIDED)
3662     detect_eol (coding, source, src_bytes);
3663
3664   switch (coding->type)
3665     {
3666     case coding_type_emacs_mule:
3667     case coding_type_undecided:
3668     case coding_type_raw_text:
3669       if (coding->eol_type == CODING_EOL_LF
3670           ||  coding->eol_type == CODING_EOL_UNDECIDED)
3671         goto label_no_conversion;
3672       result = decode_eol (coding, source, destination, src_bytes, dst_bytes);
3673       break;
3674
3675     case coding_type_sjis:
3676       result = decode_coding_sjis_big5 (coding, source, destination,
3677                                         src_bytes, dst_bytes, 1);
3678       break;
3679
3680     case coding_type_iso2022:
3681       result = decode_coding_iso2022 (coding, source, destination,
3682                                       src_bytes, dst_bytes);
3683       break;
3684
3685     case coding_type_big5:
3686       result = decode_coding_sjis_big5 (coding, source, destination,
3687                                         src_bytes, dst_bytes, 0);
3688       break;
3689
3690     case coding_type_ccl:
3691       result = ccl_coding_driver (coding, source, destination,
3692                                   src_bytes, dst_bytes, 0);
3693       break;
3694
3695     default:                    /* i.e. case coding_type_no_conversion: */
3696     label_no_conversion:
3697       if (dst_bytes && src_bytes > dst_bytes)
3698         {
3699           coding->produced = dst_bytes;
3700           result = CODING_FINISH_INSUFFICIENT_DST;
3701         }
3702       else
3703         {
3704           coding->produced = src_bytes;
3705           result = CODING_FINISH_NORMAL;
3706         }
3707       if (dst_bytes)
3708         bcopy (source, destination, coding->produced);
3709       else
3710         safe_bcopy (source, destination, coding->produced);
3711       coding->fake_multibyte = 1;
3712       coding->consumed
3713         = coding->consumed_char = coding->produced_char = coding->produced;
3714       break;
3715     }
3716
3717   return result;
3718 }
3719
3720 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  */
3721
3722 int
3723 encode_coding (coding, source, destination, src_bytes, dst_bytes)
3724      struct coding_system *coding;
3725      unsigned char *source, *destination;
3726      int src_bytes, dst_bytes;
3727 {
3728   int result;
3729
3730   if (src_bytes <= 0)
3731     {
3732       coding->produced = coding->produced_char = 0;
3733       coding->consumed = coding->consumed_char = 0;
3734       coding->fake_multibyte = 0;
3735       return CODING_FINISH_NORMAL;
3736     }
3737
3738   switch (coding->type)
3739     {
3740     case coding_type_emacs_mule:
3741     case coding_type_undecided:
3742     case coding_type_raw_text:
3743       if (coding->eol_type == CODING_EOL_LF
3744           ||  coding->eol_type == CODING_EOL_UNDECIDED)
3745         goto label_no_conversion;
3746       result = encode_eol (coding, source, destination, src_bytes, dst_bytes);
3747       break;
3748
3749     case coding_type_sjis:
3750       result = encode_coding_sjis_big5 (coding, source, destination,
3751                                         src_bytes, dst_bytes, 1);
3752       break;
3753
3754     case coding_type_iso2022:
3755       result = encode_coding_iso2022 (coding, source, destination,
3756                                       src_bytes, dst_bytes);
3757       break;
3758
3759     case coding_type_big5:
3760       result = encode_coding_sjis_big5 (coding, source, destination,
3761                                         src_bytes, dst_bytes, 0);
3762       break;
3763
3764     case coding_type_ccl:
3765       result = ccl_coding_driver (coding, source, destination,
3766                                   src_bytes, dst_bytes, 1);
3767       break;
3768
3769     default:                    /* i.e. case coding_type_no_conversion: */
3770     label_no_conversion:
3771       if (dst_bytes && src_bytes > dst_bytes)
3772         {
3773           coding->produced = dst_bytes;
3774           result = CODING_FINISH_INSUFFICIENT_DST;
3775         }
3776       else
3777         {
3778           coding->produced = src_bytes;
3779           result = CODING_FINISH_NORMAL;
3780         }
3781       if (dst_bytes)
3782         bcopy (source, destination, coding->produced);
3783       else
3784         safe_bcopy (source, destination, coding->produced);
3785       if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
3786         {
3787           unsigned char *p = destination, *pend = p + coding->produced;
3788           while (p < pend)
3789             if (*p++ == '\015') p[-1] = '\n';
3790         }
3791       coding->fake_multibyte = 1;
3792       coding->consumed
3793         = coding->consumed_char = coding->produced_char = coding->produced;
3794       break;
3795     }
3796
3797   return result;
3798 }
3799
3800 /* Scan text in the region between *BEG and *END (byte positions),
3801    skip characters which we don't have to decode by coding system
3802    CODING at the head and tail, then set *BEG and *END to the region
3803    of the text we actually have to convert.  The caller should move
3804    the gap out of the region in advance.
3805
3806    If STR is not NULL, *BEG and *END are indices into STR.  */
3807
3808 static void
3809 shrink_decoding_region (beg, end, coding, str)
3810      int *beg, *end;
3811      struct coding_system *coding;
3812      unsigned char *str;
3813 {
3814   unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
3815   int eol_conversion;
3816
3817   if (coding->type == coding_type_ccl
3818       || coding->type == coding_type_undecided
3819       || !NILP (coding->post_read_conversion))
3820     {
3821       /* We can't skip any data.  */
3822       return;
3823     }
3824   else if (coding->type == coding_type_no_conversion)
3825     {
3826       /* We need no conversion, but don't have to skip any data here.
3827          Decoding routine handles them effectively anyway.  */
3828       return;
3829     }
3830
3831   eol_conversion = (coding->eol_type != CODING_EOL_LF);
3832
3833   if ((! eol_conversion) && (coding->heading_ascii >= 0))
3834     /* Detection routine has already found how much we can skip at the
3835        head.  */
3836     *beg += coding->heading_ascii;
3837
3838   if (str)
3839     {
3840       begp_orig = begp = str + *beg;
3841       endp_orig = endp = str + *end;
3842     }
3843   else
3844     {
3845       begp_orig = begp = BYTE_POS_ADDR (*beg);
3846       endp_orig = endp = begp + *end - *beg;
3847     }
3848
3849   switch (coding->type)
3850     {
3851     case coding_type_emacs_mule:
3852     case coding_type_raw_text:
3853       if (eol_conversion)
3854         {
3855           if (coding->heading_ascii < 0)
3856             while (begp < endp && *begp != '\r' && *begp < 0x80) begp++;
3857           while (begp < endp && endp[-1] != '\r' && endp[-1] < 0x80)
3858             endp--;
3859           /* Do not consider LF as ascii if preceded by CR, since that
3860              confuses eol decoding. */
3861           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
3862             endp++;
3863         }
3864       else
3865         begp = endp;
3866       break;
3867
3868     case coding_type_sjis:
3869     case coding_type_big5:
3870       /* We can skip all ASCII characters at the head.  */
3871       if (coding->heading_ascii < 0)
3872         {
3873           if (eol_conversion)
3874             while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
3875           else
3876             while (begp < endp && *begp < 0x80) begp++;
3877         }
3878       /* We can skip all ASCII characters at the tail except for the
3879          second byte of SJIS or BIG5 code.  */
3880       if (eol_conversion)
3881         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
3882       else
3883         while (begp < endp && endp[-1] < 0x80) endp--;
3884       /* Do not consider LF as ascii if preceded by CR, since that
3885          confuses eol decoding. */
3886       if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
3887         endp++;
3888       if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
3889         endp++;
3890       break;
3891
3892     default:            /* i.e. case coding_type_iso2022: */
3893       if (coding->heading_ascii < 0)
3894         {
3895           /* We can skip all ASCII characters at the head except for a
3896              few control codes.  */
3897           while (begp < endp && (c = *begp) < 0x80
3898                  && c != ISO_CODE_CR && c != ISO_CODE_SO
3899                  && c != ISO_CODE_SI && c != ISO_CODE_ESC
3900                  && (!eol_conversion || c != ISO_CODE_LF))
3901             begp++;
3902         }
3903       switch (coding->category_idx)
3904         {
3905         case CODING_CATEGORY_IDX_ISO_8_1:
3906         case CODING_CATEGORY_IDX_ISO_8_2:
3907           /* We can skip all ASCII characters at the tail.  */
3908           if (eol_conversion)
3909             while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
3910           else
3911             while (begp < endp && endp[-1] < 0x80) endp--;
3912           /* Do not consider LF as ascii if preceded by CR, since that
3913              confuses eol decoding. */
3914           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
3915             endp++;
3916           break;
3917
3918         case CODING_CATEGORY_IDX_ISO_7:
3919         case CODING_CATEGORY_IDX_ISO_7_TIGHT:
3920           /* We can skip all charactes at the tail except for ESC and
3921              the following 2-byte at the tail.  */
3922           if (eol_conversion)
3923             while (begp < endp
3924                    && (c = endp[-1]) < 0x80 && c != ISO_CODE_ESC && c != '\r')
3925               endp--;
3926           else
3927             while (begp < endp
3928                    && (c = endp[-1]) < 0x80 && c != ISO_CODE_ESC)
3929               endp--;
3930           /* Do not consider LF as ascii if preceded by CR, since that
3931              confuses eol decoding. */
3932           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
3933             endp++;
3934           if (begp < endp && endp[-1] == ISO_CODE_ESC)
3935             {
3936               if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
3937                 /* This is an ASCII designation sequence.  We can
3938                     surely skip the tail.  */
3939                 endp += 2;
3940               else
3941                 /* Hmmm, we can't skip the tail.  */
3942                 endp = endp_orig;
3943             }
3944         }
3945     }
3946   *beg += begp - begp_orig;
3947   *end += endp - endp_orig;
3948   return;
3949 }
3950
3951 /* Like shrink_decoding_region but for encoding.  */
3952
3953 static void
3954 shrink_encoding_region (beg, end, coding, str)
3955      int *beg, *end;
3956      struct coding_system *coding;
3957      unsigned char *str;
3958 {
3959   unsigned char *begp_orig, *begp, *endp_orig, *endp;
3960   int eol_conversion;
3961
3962   if (coding->type == coding_type_ccl)
3963     /* We can't skip any data.  */
3964     return;
3965   else if (coding->type == coding_type_no_conversion)
3966     {
3967       /* We need no conversion.  */
3968       *beg = *end;
3969       return;
3970     }
3971
3972   if (str)
3973     {
3974       begp_orig = begp = str + *beg;
3975       endp_orig = endp = str + *end;
3976     }
3977   else
3978     {
3979       begp_orig = begp = BYTE_POS_ADDR (*beg);
3980       endp_orig = endp = begp + *end - *beg;
3981     }
3982
3983   eol_conversion = (coding->eol_type == CODING_EOL_CR
3984                     || coding->eol_type == CODING_EOL_CRLF);
3985
3986   /* Here, we don't have to check coding->pre_write_conversion because
3987      the caller is expected to have handled it already.  */
3988   switch (coding->type)
3989     {
3990     case coding_type_undecided:
3991     case coding_type_emacs_mule:
3992     case coding_type_raw_text:
3993       if (eol_conversion)
3994         {
3995           while (begp < endp && *begp != '\n') begp++;
3996           while (begp < endp && endp[-1] != '\n') endp--;
3997         }
3998       else
3999         begp = endp;
4000       break;
4001
4002     case coding_type_iso2022:
4003       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
4004         {
4005           unsigned char *bol = begp;
4006           while (begp < endp && *begp < 0x80)
4007             {
4008               begp++;
4009               if (begp[-1] == '\n')
4010                 bol = begp;
4011             }
4012           begp = bol;
4013           goto label_skip_tail;
4014         }
4015       /* fall down ... */
4016
4017     default:
4018       /* We can skip all ASCII characters at the head and tail.  */
4019       if (eol_conversion)
4020         while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
4021       else
4022         while (begp < endp && *begp < 0x80) begp++;
4023     label_skip_tail:
4024       if (eol_conversion)
4025         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
4026       else
4027         while (begp < endp && *(endp - 1) < 0x80) endp--;
4028       break;
4029     }
4030
4031   *beg += begp - begp_orig;
4032   *end += endp - endp_orig;
4033   return;
4034 }
4035
4036 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
4037    text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
4038    coding system CODING, and return the status code of code conversion
4039    (currently, this value has no meaning).
4040
4041    How many characters (and bytes) are converted to how many
4042    characters (and bytes) are recorded in members of the structure
4043    CODING.
4044
4045    If REPLACE is nonzero, we do various things as if the original text
4046    is deleted and a new text is inserted.  See the comments in
4047    replace_range (insdel.c) to know what we are doing.  */
4048
4049 int
4050 code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
4051      int from, from_byte, to, to_byte, encodep, replace;
4052      struct coding_system *coding;
4053 {
4054   int len = to - from, len_byte = to_byte - from_byte;
4055   int require, inserted, inserted_byte;
4056   int head_skip, tail_skip, total_skip;
4057   Lisp_Object saved_coding_symbol;
4058   int multibyte = !NILP (current_buffer->enable_multibyte_characters);
4059   int first = 1;
4060   int fake_multibyte = 0;
4061   unsigned char *src, *dst;
4062   Lisp_Object deletion;
4063
4064   deletion = Qnil;
4065   saved_coding_symbol = Qnil;
4066
4067   if (from < PT && PT < to)
4068     SET_PT_BOTH (from, from_byte);
4069
4070   if (replace)
4071     {
4072       int saved_from = from;
4073
4074       prepare_to_modify_buffer (from, to, &from);
4075       if (saved_from != from)
4076         {
4077           to = from + len;
4078           if (multibyte)
4079             from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
4080           else
4081             from_byte = from, to_byte = to;
4082           len_byte = to_byte - from_byte;
4083         }
4084     }
4085
4086   if (! encodep && CODING_REQUIRE_DETECTION (coding))
4087     {
4088       /* We must detect encoding of text and eol format.  */
4089
4090       if (from < GPT && to > GPT)
4091         move_gap_both (from, from_byte);
4092       if (coding->type == coding_type_undecided)
4093         {
4094           detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
4095           if (coding->type == coding_type_undecided)
4096             /* It seems that the text contains only ASCII, but we
4097                should not left it undecided because the deeper
4098                decoding routine (decode_coding) tries to detect the
4099                encodings again in vain.  */
4100             coding->type = coding_type_emacs_mule;
4101         }
4102       if (coding->eol_type == CODING_EOL_UNDECIDED)
4103         {
4104           saved_coding_symbol = coding->symbol;
4105           detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
4106           if (coding->eol_type == CODING_EOL_UNDECIDED)
4107             coding->eol_type = CODING_EOL_LF;
4108           /* We had better recover the original eol format if we
4109              encounter an inconsitent eol format while decoding.  */
4110           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4111         }
4112     }
4113
4114   coding->consumed_char = len, coding->consumed = len_byte;
4115
4116   if (encodep
4117       ? ! CODING_REQUIRE_ENCODING (coding)
4118       : ! CODING_REQUIRE_DECODING (coding))
4119     {
4120       coding->produced = len_byte;
4121       if (multibyte
4122           && ! replace
4123           /* See the comment of the member heading_ascii in coding.h.  */
4124           && coding->heading_ascii < len_byte)
4125         {
4126           /* We still may have to combine byte at the head and the
4127              tail of the text in the region.  */
4128           if (from < GPT && GPT < to)
4129             move_gap_both (to, to_byte);
4130           len = multibyte_chars_in_text (BYTE_POS_ADDR (from_byte), len_byte);
4131           adjust_after_insert (from, from_byte, to, to_byte, len);
4132           coding->produced_char = len;
4133         }
4134       else
4135         {
4136           if (!replace)
4137             adjust_after_insert (from, from_byte, to, to_byte, len_byte);
4138           coding->produced_char = len_byte;
4139         }
4140       return 0;
4141     }
4142
4143   /* Now we convert the text.  */
4144
4145   /* For encoding, we must process pre-write-conversion in advance.  */
4146   if (encodep
4147       && ! NILP (coding->pre_write_conversion)
4148       && SYMBOLP (coding->pre_write_conversion)
4149       && ! NILP (Ffboundp (coding->pre_write_conversion)))
4150     {
4151       /* The function in pre-write-conversion may put a new text in a
4152          new buffer.  */
4153       struct buffer *prev = current_buffer, *new;
4154
4155       call2 (coding->pre_write_conversion,
4156              make_number (from), make_number (to));
4157       if (current_buffer != prev)
4158         {
4159           len = ZV - BEGV;
4160           new = current_buffer;
4161           set_buffer_internal_1 (prev);
4162           del_range_2 (from, from_byte, to, to_byte);
4163           insert_from_buffer (new, BEG, len, 0);
4164           to = from + len;
4165           to_byte = multibyte ? CHAR_TO_BYTE (to) : to;
4166           len_byte = to_byte - from_byte;
4167         }
4168     }
4169
4170   if (replace)
4171     deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
4172
4173   /* Try to skip the heading and tailing ASCIIs.  */
4174   {
4175     int from_byte_orig = from_byte, to_byte_orig = to_byte;
4176
4177     if (from < GPT && GPT < to)
4178       move_gap_both (from, from_byte);
4179     if (encodep)
4180       shrink_encoding_region (&from_byte, &to_byte, coding, NULL);
4181     else
4182       shrink_decoding_region (&from_byte, &to_byte, coding, NULL);
4183     if (from_byte == to_byte)
4184       {
4185         coding->produced = len_byte;
4186         coding->produced_char = multibyte ? len : len_byte;
4187         if (!replace)
4188           /* We must record and adjust for this new text now.  */
4189           adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
4190         return 0;
4191       }
4192
4193     head_skip = from_byte - from_byte_orig;
4194     tail_skip = to_byte_orig - to_byte;
4195     total_skip = head_skip + tail_skip;
4196     from += head_skip;
4197     to -= tail_skip;
4198     len -= total_skip; len_byte -= total_skip;
4199   }
4200
4201   /* For converion, we must put the gap before the text in addition to
4202      making the gap larger for efficient decoding.  The required gap
4203      size starts from 2000 which is the magic number used in make_gap.
4204      But, after one batch of conversion, it will be incremented if we
4205      find that it is not enough .  */
4206   require = 2000;
4207
4208   if (GAP_SIZE  < require)
4209     make_gap (require - GAP_SIZE);
4210   move_gap_both (from, from_byte);
4211
4212   if (GPT - BEG < beg_unchanged)
4213     beg_unchanged = GPT - BEG;
4214   if (Z - GPT < end_unchanged)
4215     end_unchanged = Z - GPT;
4216
4217   inserted = inserted_byte = 0;
4218   src = GAP_END_ADDR, dst = GPT_ADDR;
4219
4220   GAP_SIZE += len_byte;
4221   ZV -= len;
4222   Z -= len;
4223   ZV_BYTE -= len_byte;
4224   Z_BYTE -= len_byte;
4225
4226   for (;;)
4227     {
4228       int result;
4229
4230       /* The buffer memory is changed from:
4231          +--------+converted-text+---------+-------original-text------+---+
4232          |<-from->|<--inserted-->|---------|<-----------len---------->|---|
4233                   |<------------------- GAP_SIZE -------------------->|  */
4234       if (encodep)
4235         result = encode_coding (coding, src, dst, len_byte, 0);
4236       else
4237         result = decode_coding (coding, src, dst, len_byte, 0);
4238       /* to:
4239          +--------+-------converted-text--------+--+---original-text--+---+
4240          |<-from->|<--inserted-->|<--produced-->|--|<-(len-consumed)->|---|
4241                   |<------------------- GAP_SIZE -------------------->|  */
4242       if (coding->fake_multibyte)
4243         fake_multibyte = 1;
4244
4245       if (!encodep && !multibyte)
4246         coding->produced_char = coding->produced;
4247       inserted += coding->produced_char;
4248       inserted_byte += coding->produced;
4249       len_byte -= coding->consumed;
4250       src += coding->consumed;
4251       dst += inserted_byte;
4252
4253       if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
4254         {
4255           unsigned char *pend = dst, *p = pend - inserted_byte;
4256
4257           /* Encode LFs back to the original eol format (CR or CRLF).  */
4258           if (coding->eol_type == CODING_EOL_CR)
4259             {
4260               while (p < pend) if (*p++ == '\n') p[-1] = '\r';
4261             }
4262           else
4263             {
4264               int count = 0;
4265
4266               while (p < pend) if (*p++ == '\n') count++;
4267               if (src - dst < count)
4268                 {
4269                   /* We don't have sufficient room for putting LFs
4270                      back to CRLF.  We must record converted and
4271                      not-yet-converted text back to the buffer
4272                      content, enlarge the gap, then record them out of
4273                      the buffer contents again.  */
4274                   int add = len_byte + inserted_byte;
4275
4276                   GAP_SIZE -= add;
4277                   ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
4278                   GPT += inserted_byte; GPT_BYTE += inserted_byte;
4279                   make_gap (count - GAP_SIZE);
4280                   GAP_SIZE += add;
4281                   ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
4282                   GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
4283                   /* Don't forget to update SRC, DST, and PEND.  */
4284                   src = GAP_END_ADDR - len_byte;
4285                   dst = GPT_ADDR + inserted_byte;
4286                   pend = dst;
4287                 }
4288               inserted += count;
4289               inserted_byte += count;
4290               coding->produced += count;
4291               p = dst = pend + count;
4292               while (count)
4293                 {
4294                   *--p = *--pend;
4295                   if (*p == '\n') count--, *--p = '\r';
4296                 }
4297             }
4298
4299           /* Suppress eol-format conversion in the further conversion.  */
4300           coding->eol_type = CODING_EOL_LF;
4301
4302           /* Restore the original symbol.  */
4303           coding->symbol = saved_coding_symbol;
4304
4305           continue;
4306         }
4307       if (len_byte <= 0)
4308         break;
4309       if (result == CODING_FINISH_INSUFFICIENT_SRC)
4310         {
4311           /* The source text ends in invalid codes.  Let's just
4312              make them valid buffer contents, and finish conversion.  */
4313           inserted += len_byte;
4314           inserted_byte += len_byte;
4315           while (len_byte--)
4316             *dst++ = *src++;
4317           fake_multibyte = 1;
4318           break;
4319         }
4320       if (first)
4321         {
4322           /* We have just done the first batch of conversion which was
4323              stoped because of insufficient gap.  Let's reconsider the
4324              required gap size (i.e. SRT - DST) now.
4325
4326              We have converted ORIG bytes (== coding->consumed) into
4327              NEW bytes (coding->produced).  To convert the remaining
4328              LEN bytes, we may need REQUIRE bytes of gap, where:
4329                 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
4330                 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
4331              Here, we are sure that NEW >= ORIG.  */
4332           float ratio = coding->produced - coding->consumed;
4333           ratio /= coding->consumed;
4334           require = len_byte * ratio;
4335           first = 0;
4336         }
4337       if ((src - dst) < (require + 2000))
4338         {
4339           /* See the comment above the previous call of make_gap.  */
4340           int add = len_byte + inserted_byte;
4341
4342           GAP_SIZE -= add;
4343           ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
4344           GPT += inserted_byte; GPT_BYTE += inserted_byte;
4345           make_gap (require + 2000);
4346           GAP_SIZE += add;
4347           ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
4348           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
4349           /* Don't forget to update SRC, DST.  */
4350           src = GAP_END_ADDR - len_byte;
4351           dst = GPT_ADDR + inserted_byte;
4352         }
4353     }
4354   if (src - dst > 0) *dst = 0; /* Put an anchor.  */
4355
4356   if (multibyte
4357       && (fake_multibyte
4358           || !encodep && (to - from) != (to_byte - from_byte)))
4359     inserted = multibyte_chars_in_text (GPT_ADDR, inserted_byte);
4360
4361   /* If we have shrinked the conversion area, adjust it now.  */
4362   if (total_skip > 0)
4363     {
4364       if (tail_skip > 0)
4365         safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
4366       inserted += total_skip; inserted_byte += total_skip;
4367       GAP_SIZE += total_skip;
4368       GPT -= head_skip; GPT_BYTE -= head_skip;
4369       ZV -= total_skip; ZV_BYTE -= total_skip;
4370       Z -= total_skip; Z_BYTE -= total_skip;
4371       from -= head_skip; from_byte -= head_skip;
4372       to += tail_skip; to_byte += tail_skip;
4373     }
4374
4375   adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
4376
4377   if (! encodep && ! NILP (coding->post_read_conversion))
4378     {
4379       Lisp_Object val;
4380       int orig_inserted = inserted, pos = PT;
4381
4382       if (from != pos)
4383         temp_set_point_both (current_buffer, from, from_byte);
4384       val = call1 (coding->post_read_conversion, make_number (inserted));
4385       if (! NILP (val))
4386         {
4387           CHECK_NUMBER (val, 0);
4388           inserted = XFASTINT (val);
4389         }
4390       if (pos >= from + orig_inserted)
4391         temp_set_point (current_buffer, pos + (inserted - orig_inserted));
4392     }
4393
4394   signal_after_change (from, to - from, inserted);
4395
4396   {
4397     coding->consumed = to_byte - from_byte;
4398     coding->consumed_char = to - from;
4399     coding->produced = inserted_byte;
4400     coding->produced_char = inserted;
4401   }
4402
4403   return 0;
4404 }
4405
4406 Lisp_Object
4407 code_convert_string (str, coding, encodep, nocopy)
4408      Lisp_Object str;
4409      struct coding_system *coding;
4410      int encodep, nocopy;
4411 {
4412   int len;
4413   char *buf;
4414   int from = 0, to = XSTRING (str)->size;
4415   int to_byte = STRING_BYTES (XSTRING (str));
4416   struct gcpro gcpro1;
4417   Lisp_Object saved_coding_symbol;
4418   int result;
4419
4420   saved_coding_symbol = Qnil;
4421   if (encodep && !NILP (coding->pre_write_conversion)
4422       || !encodep && !NILP (coding->post_read_conversion))
4423     {
4424       /* Since we have to call Lisp functions which assume target text
4425          is in a buffer, after setting a temporary buffer, call
4426          code_convert_region.  */
4427       int count = specpdl_ptr - specpdl;
4428       struct buffer *prev = current_buffer;
4429
4430       record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
4431       temp_output_buffer_setup (" *code-converting-work*");
4432       set_buffer_internal (XBUFFER (Vstandard_output));
4433       if (encodep)
4434         insert_from_string (str, 0, 0, to, to_byte, 0);
4435       else
4436         {
4437           /* We must insert the contents of STR as is without
4438              unibyte<->multibyte conversion.  */
4439           current_buffer->enable_multibyte_characters = Qnil;
4440           insert_from_string (str, 0, 0, to_byte, to_byte, 0);
4441           current_buffer->enable_multibyte_characters = Qt;
4442         }
4443       code_convert_region (BEGV, BEGV_BYTE, ZV, ZV_BYTE, coding, encodep, 1);
4444       if (encodep)
4445         /* We must return the buffer contents as unibyte string.  */
4446         current_buffer->enable_multibyte_characters = Qnil;
4447       str = make_buffer_string (BEGV, ZV, 0);
4448       set_buffer_internal (prev);
4449       return unbind_to (count, str);
4450     }
4451
4452   if (! encodep && CODING_REQUIRE_DETECTION (coding))
4453     {
4454       /* See the comments in code_convert_region.  */
4455       if (coding->type == coding_type_undecided)
4456         {
4457           detect_coding (coding, XSTRING (str)->data, to_byte);
4458           if (coding->type == coding_type_undecided)
4459             coding->type = coding_type_emacs_mule;
4460         }
4461       if (coding->eol_type == CODING_EOL_UNDECIDED)
4462         {
4463           saved_coding_symbol = coding->symbol;
4464           detect_eol (coding, XSTRING (str)->data, to_byte);
4465           if (coding->eol_type == CODING_EOL_UNDECIDED)
4466             coding->eol_type = CODING_EOL_LF;
4467           /* We had better recover the original eol format if we
4468              encounter an inconsitent eol format while decoding.  */
4469           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4470         }
4471     }
4472
4473   if (encodep
4474       ? ! CODING_REQUIRE_ENCODING (coding)
4475       : ! CODING_REQUIRE_DECODING (coding))
4476     from = to_byte;
4477   else
4478     {
4479       /* Try to skip the heading and tailing ASCIIs.  */
4480       if (encodep)
4481         shrink_encoding_region (&from, &to_byte, coding, XSTRING (str)->data);
4482       else
4483         shrink_decoding_region (&from, &to_byte, coding, XSTRING (str)->data);
4484     }
4485   if (from == to_byte)
4486     return (nocopy ? str : Fcopy_sequence (str));
4487
4488   if (encodep)
4489     len = encoding_buffer_size (coding, to_byte - from);
4490   else
4491     len = decoding_buffer_size (coding, to_byte - from);
4492   len += from + STRING_BYTES (XSTRING (str)) - to_byte;
4493   GCPRO1 (str);
4494   buf = get_conversion_buffer (len);
4495   UNGCPRO;
4496
4497   if (from > 0)
4498     bcopy (XSTRING (str)->data, buf, from);
4499   result = (encodep
4500             ? encode_coding (coding, XSTRING (str)->data + from,
4501                              buf + from, to_byte - from, len)
4502             : decode_coding (coding, XSTRING (str)->data + from,
4503                              buf + from, to_byte - from, len));
4504   if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
4505     {
4506       /* We simple try to decode the whole string again but without
4507          eol-conversion this time.  */
4508       coding->eol_type = CODING_EOL_LF;
4509       coding->symbol = saved_coding_symbol;
4510       return code_convert_string (str, coding, encodep, nocopy);
4511     }
4512
4513   bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced,
4514          STRING_BYTES (XSTRING (str)) - to_byte);
4515
4516   len = from + STRING_BYTES (XSTRING (str)) - to_byte;
4517   if (encodep)
4518     str = make_unibyte_string (buf, len + coding->produced);
4519   else
4520     {
4521       int chars= (coding->fake_multibyte
4522                   ? multibyte_chars_in_text (buf + from, coding->produced)
4523                   : coding->produced_char);
4524       str = make_multibyte_string (buf, len + chars, len + coding->produced);
4525     }
4526
4527   return str;
4528 }
4529
4530 \f
4531 #ifdef emacs
4532 /*** 8. Emacs Lisp library functions ***/
4533
4534 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
4535   "Return t if OBJECT is nil or a coding-system.\n\
4536 See the documentation of `make-coding-system' for information\n\
4537 about coding-system objects.")
4538   (obj)
4539      Lisp_Object obj;
4540 {
4541   if (NILP (obj))
4542     return Qt;
4543   if (!SYMBOLP (obj))
4544     return Qnil;
4545   /* Get coding-spec vector for OBJ.  */
4546   obj = Fget (obj, Qcoding_system);
4547   return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
4548           ? Qt : Qnil);
4549 }
4550
4551 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
4552        Sread_non_nil_coding_system, 1, 1, 0,
4553   "Read a coding system from the minibuffer, prompting with string PROMPT.")
4554   (prompt)
4555      Lisp_Object prompt;
4556 {
4557   Lisp_Object val;
4558   do
4559     {
4560       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
4561                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
4562     }
4563   while (XSTRING (val)->size == 0);
4564   return (Fintern (val, Qnil));
4565 }
4566
4567 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
4568   "Read a coding system from the minibuffer, prompting with string PROMPT.\n\
4569 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.")
4570   (prompt, default_coding_system)
4571      Lisp_Object prompt, default_coding_system;
4572 {
4573   Lisp_Object val;
4574   if (SYMBOLP (default_coding_system))
4575     XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
4576   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
4577                           Qt, Qnil, Qcoding_system_history,
4578                           default_coding_system, Qnil);
4579   return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
4580 }
4581
4582 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
4583        1, 1, 0,
4584   "Check validity of CODING-SYSTEM.\n\
4585 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.\n\
4586 It is valid if it is a symbol with a non-nil `coding-system' property.\n\
4587 The value of property should be a vector of length 5.")
4588   (coding_system)
4589      Lisp_Object coding_system;
4590 {
4591   CHECK_SYMBOL (coding_system, 0);
4592   if (!NILP (Fcoding_system_p (coding_system)))
4593     return coding_system;
4594   while (1)
4595     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
4596 }
4597 \f
4598 Lisp_Object
4599 detect_coding_system (src, src_bytes, highest)
4600      unsigned char *src;
4601      int src_bytes, highest;
4602 {
4603   int coding_mask, eol_type;
4604   Lisp_Object val, tmp;
4605   int dummy;
4606
4607   coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy);
4608   eol_type  = detect_eol_type (src, src_bytes, &dummy);
4609   if (eol_type == CODING_EOL_INCONSISTENT)
4610     eol_type == CODING_EOL_UNDECIDED;
4611
4612   if (!coding_mask)
4613     {
4614       val = Qundecided;
4615       if (eol_type != CODING_EOL_UNDECIDED)
4616         {
4617           Lisp_Object val2;
4618           val2 = Fget (Qundecided, Qeol_type);
4619           if (VECTORP (val2))
4620             val = XVECTOR (val2)->contents[eol_type];
4621         }
4622       return (highest ? val : Fcons (val, Qnil));
4623     }
4624
4625   /* At first, gather possible coding systems in VAL.  */
4626   val = Qnil;
4627   for (tmp = Vcoding_category_list; !NILP (tmp); tmp = XCONS (tmp)->cdr)
4628     {
4629       int idx
4630         = XFASTINT (Fget (XCONS (tmp)->car, Qcoding_category_index));
4631       if (coding_mask & (1 << idx))
4632         {
4633           val = Fcons (Fsymbol_value (XCONS (tmp)->car), val);
4634           if (highest)
4635             break;
4636         }
4637     }
4638   if (!highest)
4639     val = Fnreverse (val);
4640
4641   /* Then, replace the elements with subsidiary coding systems.  */
4642   for (tmp = val; !NILP (tmp); tmp = XCONS (tmp)->cdr)
4643     {
4644       if (eol_type != CODING_EOL_UNDECIDED
4645           && eol_type != CODING_EOL_INCONSISTENT)
4646         {
4647           Lisp_Object eol;
4648           eol = Fget (XCONS (tmp)->car, Qeol_type);
4649           if (VECTORP (eol))
4650             XCONS (tmp)->car = XVECTOR (eol)->contents[eol_type];
4651         }
4652     }
4653   return (highest ? XCONS (val)->car : val);
4654 }
4655
4656 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
4657        2, 3, 0,
4658   "Detect coding system of the text in the region between START and END.\n\
4659 Return a list of possible coding systems ordered by priority.\n\
4660 \n\
4661 If only ASCII characters are found, it returns a list of single element\n\
4662 `undecided' or its subsidiary coding system according to a detected\n\
4663 end-of-line format.\n\
4664 \n\
4665 If optional argument HIGHEST is non-nil, return the coding system of\n\
4666 highest priority.")
4667   (start, end, highest)
4668      Lisp_Object start, end, highest;
4669 {
4670   int from, to;
4671   int from_byte, to_byte;
4672
4673   CHECK_NUMBER_COERCE_MARKER (start, 0);
4674   CHECK_NUMBER_COERCE_MARKER (end, 1);
4675
4676   validate_region (&start, &end);
4677   from = XINT (start), to = XINT (end);
4678   from_byte = CHAR_TO_BYTE (from);
4679   to_byte = CHAR_TO_BYTE (to);
4680
4681   if (from < GPT && to >= GPT)
4682     move_gap_both (to, to_byte);
4683
4684   return detect_coding_system (BYTE_POS_ADDR (from_byte),
4685                                to_byte - from_byte,
4686                                !NILP (highest));
4687 }
4688
4689 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
4690        1, 2, 0,
4691   "Detect coding system of the text in STRING.\n\
4692 Return a list of possible coding systems ordered by priority.\n\
4693 \n\
4694 If only ASCII characters are found, it returns a list of single element\n\
4695 `undecided' or its subsidiary coding system according to a detected\n\
4696 end-of-line format.\n\
4697 \n\
4698 If optional argument HIGHEST is non-nil, return the coding system of\n\
4699 highest priority.")
4700   (string, highest)
4701      Lisp_Object string, highest;
4702 {
4703   CHECK_STRING (string, 0);
4704
4705   return detect_coding_system (XSTRING (string)->data,
4706                                STRING_BYTES (XSTRING (string)),
4707                                !NILP (highest));
4708 }
4709
4710 Lisp_Object
4711 code_convert_region1 (start, end, coding_system, encodep)
4712      Lisp_Object start, end, coding_system;
4713      int encodep;
4714 {
4715   struct coding_system coding;
4716   int from, to, len;
4717
4718   CHECK_NUMBER_COERCE_MARKER (start, 0);
4719   CHECK_NUMBER_COERCE_MARKER (end, 1);
4720   CHECK_SYMBOL (coding_system, 2);
4721
4722   validate_region (&start, &end);
4723   from = XFASTINT (start);
4724   to = XFASTINT (end);
4725
4726   if (NILP (coding_system))
4727     return make_number (to - from);
4728
4729   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
4730     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
4731
4732   coding.mode |= CODING_MODE_LAST_BLOCK;
4733   code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
4734                        &coding, encodep, 1);
4735   Vlast_coding_system_used = coding.symbol;
4736   return make_number (coding.produced_char);
4737 }
4738
4739 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
4740        3, 3, "r\nzCoding system: ",
4741   "Decode the current region by specified coding system.\n\
4742 When called from a program, takes three arguments:\n\
4743 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
4744 This function sets `last-coding-system-used' to the precise coding system\n\
4745 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
4746 not fully specified.)\n\
4747 It returns the length of the decoded text.")
4748   (start, end, coding_system)
4749      Lisp_Object start, end, coding_system;
4750 {
4751   return code_convert_region1 (start, end, coding_system, 0);
4752 }
4753
4754 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
4755        3, 3, "r\nzCoding system: ",
4756   "Encode the current region by specified coding system.\n\
4757 When called from a program, takes three arguments:\n\
4758 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
4759 This function sets `last-coding-system-used' to the precise coding system\n\
4760 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
4761 not fully specified.)\n\
4762 It returns the length of the encoded text.")
4763   (start, end, coding_system)
4764      Lisp_Object start, end, coding_system;
4765 {
4766   return code_convert_region1 (start, end, coding_system, 1);
4767 }
4768
4769 Lisp_Object
4770 code_convert_string1 (string, coding_system, nocopy, encodep)
4771      Lisp_Object string, coding_system, nocopy;
4772      int encodep;
4773 {
4774   struct coding_system coding;
4775
4776   CHECK_STRING (string, 0);
4777   CHECK_SYMBOL (coding_system, 1);
4778
4779   if (NILP (coding_system))
4780     return (NILP (nocopy) ? Fcopy_sequence (string) : string);
4781
4782   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
4783     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
4784
4785   coding.mode |= CODING_MODE_LAST_BLOCK;
4786   Vlast_coding_system_used = coding.symbol;
4787   return code_convert_string (string, &coding, encodep, !NILP (nocopy));
4788 }
4789
4790 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
4791        2, 3, 0,
4792   "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
4793 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
4794 if the decoding operation is trivial.\n\
4795 This function sets `last-coding-system-used' to the precise coding system\n\
4796 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
4797 not fully specified.)")
4798   (string, coding_system, nocopy)
4799      Lisp_Object string, coding_system, nocopy;
4800 {
4801   return code_convert_string1 (string, coding_system, nocopy, 0);
4802 }
4803
4804 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
4805        2, 3, 0,
4806   "Encode STRING to CODING-SYSTEM, and return the result.\n\
4807 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
4808 if the encoding operation is trivial.\n\
4809 This function sets `last-coding-system-used' to the precise coding system\n\
4810 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
4811 not fully specified.)")
4812   (string, coding_system, nocopy)
4813      Lisp_Object string, coding_system, nocopy;
4814 {
4815   return code_convert_string1 (string, coding_system, nocopy, 1);
4816 }
4817
4818 /* Encode or decode STRING according to CODING_SYSTEM.
4819    Do not set Vlast_coding_system_used.  */
4820
4821 Lisp_Object
4822 code_convert_string_norecord (string, coding_system, encodep)
4823      Lisp_Object string, coding_system;
4824      int encodep;
4825 {
4826   struct coding_system coding;
4827
4828   CHECK_STRING (string, 0);
4829   CHECK_SYMBOL (coding_system, 1);
4830
4831   if (NILP (coding_system))
4832     return string;
4833
4834   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
4835     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
4836
4837   coding.mode |= CODING_MODE_LAST_BLOCK;
4838   return code_convert_string (string, &coding, encodep, Qt);
4839 }
4840 \f
4841 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
4842   "Decode a JISX0208 character of shift-jis encoding.\n\
4843 CODE is the character code in SJIS.\n\
4844 Return the corresponding character.")
4845   (code)
4846      Lisp_Object code;
4847 {
4848   unsigned char c1, c2, s1, s2;
4849   Lisp_Object val;
4850
4851   CHECK_NUMBER (code, 0);
4852   s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
4853   DECODE_SJIS (s1, s2, c1, c2);
4854   XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset_jisx0208, c1, c2));
4855   return val;
4856 }
4857
4858 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
4859   "Encode a JISX0208 character CHAR to SJIS coding system.\n\
4860 Return the corresponding character code in SJIS.")
4861   (ch)
4862      Lisp_Object ch;
4863 {
4864   int charset, c1, c2, s1, s2;
4865   Lisp_Object val;
4866
4867   CHECK_NUMBER (ch, 0);
4868   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
4869   if (charset == charset_jisx0208)
4870     {
4871       ENCODE_SJIS (c1, c2, s1, s2);
4872       XSETFASTINT (val, (s1 << 8) | s2);
4873     }
4874   else
4875     XSETFASTINT (val, 0);
4876   return val;
4877 }
4878
4879 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
4880   "Decode a Big5 character CODE of BIG5 coding system.\n\
4881 CODE is the character code in BIG5.\n\
4882 Return the corresponding character.")
4883   (code)
4884      Lisp_Object code;
4885 {
4886   int charset;
4887   unsigned char b1, b2, c1, c2;
4888   Lisp_Object val;
4889
4890   CHECK_NUMBER (code, 0);
4891   b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
4892   DECODE_BIG5 (b1, b2, charset, c1, c2);
4893   XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset, c1, c2));
4894   return val;
4895 }
4896
4897 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
4898   "Encode the Big5 character CHAR to BIG5 coding system.\n\
4899 Return the corresponding character code in Big5.")
4900   (ch)
4901      Lisp_Object ch;
4902 {
4903   int charset, c1, c2, b1, b2;
4904   Lisp_Object val;
4905
4906   CHECK_NUMBER (ch, 0);
4907   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
4908   if (charset == charset_big5_1 || charset == charset_big5_2)
4909     {
4910       ENCODE_BIG5 (charset, c1, c2, b1, b2);
4911       XSETFASTINT (val, (b1 << 8) | b2);
4912     }
4913   else
4914     XSETFASTINT (val, 0);
4915   return val;
4916 }
4917 \f
4918 DEFUN ("set-terminal-coding-system-internal",
4919        Fset_terminal_coding_system_internal,
4920        Sset_terminal_coding_system_internal, 1, 1, 0, "")
4921   (coding_system)
4922      Lisp_Object coding_system;
4923 {
4924   CHECK_SYMBOL (coding_system, 0);
4925   setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
4926   /* We had better not send unsafe characters to terminal.  */
4927   terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
4928
4929   return Qnil;
4930 }
4931
4932 DEFUN ("set-safe-terminal-coding-system-internal",
4933        Fset_safe_terminal_coding_system_internal,
4934        Sset_safe_terminal_coding_system_internal, 1, 1, 0, "")
4935   (coding_system)
4936      Lisp_Object coding_system;
4937 {
4938   CHECK_SYMBOL (coding_system, 0);
4939   setup_coding_system (Fcheck_coding_system (coding_system),
4940                        &safe_terminal_coding);
4941   return Qnil;
4942 }
4943
4944 DEFUN ("terminal-coding-system",
4945        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
4946   "Return coding system specified for terminal output.")
4947   ()
4948 {
4949   return terminal_coding.symbol;
4950 }
4951
4952 DEFUN ("set-keyboard-coding-system-internal",
4953        Fset_keyboard_coding_system_internal,
4954        Sset_keyboard_coding_system_internal, 1, 1, 0, "")
4955   (coding_system)
4956      Lisp_Object coding_system;
4957 {
4958   CHECK_SYMBOL (coding_system, 0);
4959   setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
4960   return Qnil;
4961 }
4962
4963 DEFUN ("keyboard-coding-system",
4964        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
4965   "Return coding system specified for decoding keyboard input.")
4966   ()
4967 {
4968   return keyboard_coding.symbol;
4969 }
4970
4971 \f
4972 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
4973        Sfind_operation_coding_system,  1, MANY, 0,
4974   "Choose a coding system for an operation based on the target name.\n\
4975 The value names a pair of coding systems: (DECODING-SYSTEM ENCODING-SYSTEM).\n\
4976 DECODING-SYSTEM is the coding system to use for decoding\n\
4977 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system\n\
4978 for encoding (in case OPERATION does encoding).\n\
4979 \n\
4980 The first argument OPERATION specifies an I/O primitive:\n\
4981   For file I/O, `insert-file-contents' or `write-region'.\n\
4982   For process I/O, `call-process', `call-process-region', or `start-process'.\n\
4983   For network I/O, `open-network-stream'.\n\
4984 \n\
4985 The remaining arguments should be the same arguments that were passed\n\
4986 to the primitive.  Depending on which primitive, one of those arguments\n\
4987 is selected as the TARGET.  For example, if OPERATION does file I/O,\n\
4988 whichever argument specifies the file name is TARGET.\n\
4989 \n\
4990 TARGET has a meaning which depends on OPERATION:\n\
4991   For file I/O, TARGET is a file name.\n\
4992   For process I/O, TARGET is a process name.\n\
4993   For network I/O, TARGET is a service name or a port number\n\
4994 \n\
4995 This function looks up what specified for TARGET in,\n\
4996 `file-coding-system-alist', `process-coding-system-alist',\n\
4997 or `network-coding-system-alist' depending on OPERATION.\n\
4998 They may specify a coding system, a cons of coding systems,\n\
4999 or a function symbol to call.\n\
5000 In the last case, we call the function with one argument,\n\
5001 which is a list of all the arguments given to this function.")
5002   (nargs, args)
5003      int nargs;
5004      Lisp_Object *args;
5005 {
5006   Lisp_Object operation, target_idx, target, val;
5007   register Lisp_Object chain;
5008
5009   if (nargs < 2)
5010     error ("Too few arguments");
5011   operation = args[0];
5012   if (!SYMBOLP (operation)
5013       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
5014     error ("Invalid first arguement");
5015   if (nargs < 1 + XINT (target_idx))
5016     error ("Too few arguments for operation: %s",
5017            XSYMBOL (operation)->name->data);
5018   target = args[XINT (target_idx) + 1];
5019   if (!(STRINGP (target)
5020         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
5021     error ("Invalid %dth argument", XINT (target_idx) + 1);
5022
5023   chain = ((EQ (operation, Qinsert_file_contents)
5024             || EQ (operation, Qwrite_region))
5025            ? Vfile_coding_system_alist
5026            : (EQ (operation, Qopen_network_stream)
5027               ? Vnetwork_coding_system_alist
5028               : Vprocess_coding_system_alist));
5029   if (NILP (chain))
5030     return Qnil;
5031
5032   for (; CONSP (chain); chain = XCONS (chain)->cdr)
5033     {
5034       Lisp_Object elt;
5035       elt = XCONS (chain)->car;
5036
5037       if (CONSP (elt)
5038           && ((STRINGP (target)
5039                && STRINGP (XCONS (elt)->car)
5040                && fast_string_match (XCONS (elt)->car, target) >= 0)
5041               || (INTEGERP (target) && EQ (target, XCONS (elt)->car))))
5042         {
5043           val = XCONS (elt)->cdr;
5044           /* Here, if VAL is both a valid coding system and a valid
5045              function symbol, we return VAL as a coding system.  */
5046           if (CONSP (val))
5047             return val;
5048           if (! SYMBOLP (val))
5049             return Qnil;
5050           if (! NILP (Fcoding_system_p (val)))
5051             return Fcons (val, val);
5052           if (! NILP (Ffboundp (val)))
5053             {
5054               val = call1 (val, Flist (nargs, args));
5055               if (CONSP (val))
5056                 return val;
5057               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
5058                 return Fcons (val, val);
5059             }
5060           return Qnil;
5061         }
5062     }
5063   return Qnil;
5064 }
5065
5066 DEFUN ("update-coding-systems-internal",  Fupdate_coding_systems_internal,
5067        Supdate_coding_systems_internal, 0, 0, 0,
5068   "Update internal database for ISO2022 and CCL based coding systems.\n\
5069 When values of the following coding categories are changed, you must\n\
5070 call this function:\n\
5071   coding-category-iso-7, coding-category-iso-7-tight,\n\
5072   coding-category-iso-8-1, coding-category-iso-8-2,\n\
5073   coding-category-iso-7-else, coding-category-iso-8-else,\n\
5074   coding-category-ccl")
5075   ()
5076 {
5077   int i;
5078
5079   for (i = CODING_CATEGORY_IDX_ISO_7; i <= CODING_CATEGORY_IDX_CCL; i++)
5080     {
5081       Lisp_Object val;
5082
5083       val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[i])->value;
5084       if (!NILP (val))
5085         {
5086           if (! coding_system_table[i])
5087             coding_system_table[i] = ((struct coding_system *)
5088                                       xmalloc (sizeof (struct coding_system)));
5089           setup_coding_system (val, coding_system_table[i]);
5090         }
5091       else if (coding_system_table[i])
5092         {
5093           xfree (coding_system_table[i]);
5094           coding_system_table[i] = NULL;
5095         }
5096     }
5097
5098   return Qnil;
5099 }
5100
5101 DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal,
5102        Sset_coding_priority_internal, 0, 0, 0,
5103   "Update internal database for the current value of `coding-category-list'.\n\
5104 This function is internal use only.")
5105   ()
5106 {
5107   int i = 0, idx;
5108   Lisp_Object val;
5109
5110   val = Vcoding_category_list;
5111
5112   while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
5113     {
5114       if (! SYMBOLP (XCONS (val)->car))
5115         break;
5116       idx = XFASTINT (Fget (XCONS (val)->car, Qcoding_category_index));
5117       if (idx >= CODING_CATEGORY_IDX_MAX)
5118         break;
5119       coding_priorities[i++] = (1 << idx);
5120       val = XCONS (val)->cdr;
5121     }
5122   /* If coding-category-list is valid and contains all coding
5123      categories, `i' should be CODING_CATEGORY_IDX_MAX now.  If not,
5124      the following code saves Emacs from craching.  */
5125   while (i < CODING_CATEGORY_IDX_MAX)
5126     coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
5127
5128   return Qnil;
5129 }
5130
5131 #endif /* emacs */
5132
5133 \f
5134 /*** 9. Post-amble ***/
5135
5136 void
5137 init_coding ()
5138 {
5139   conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE);
5140 }
5141
5142 void
5143 init_coding_once ()
5144 {
5145   int i;
5146
5147   /* Emacs' internal format specific initialize routine.  */
5148   for (i = 0; i <= 0x20; i++)
5149     emacs_code_class[i] = EMACS_control_code;
5150   emacs_code_class[0x0A] = EMACS_linefeed_code;
5151   emacs_code_class[0x0D] = EMACS_carriage_return_code;
5152   for (i = 0x21 ; i < 0x7F; i++)
5153     emacs_code_class[i] = EMACS_ascii_code;
5154   emacs_code_class[0x7F] = EMACS_control_code;
5155   emacs_code_class[0x80] = EMACS_leading_code_composition;
5156   for (i = 0x81; i < 0xFF; i++)
5157     emacs_code_class[i] = EMACS_invalid_code;
5158   emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
5159   emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
5160   emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
5161   emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
5162
5163   /* ISO2022 specific initialize routine.  */
5164   for (i = 0; i < 0x20; i++)
5165     iso_code_class[i] = ISO_control_code;
5166   for (i = 0x21; i < 0x7F; i++)
5167     iso_code_class[i] = ISO_graphic_plane_0;
5168   for (i = 0x80; i < 0xA0; i++)
5169     iso_code_class[i] = ISO_control_code;
5170   for (i = 0xA1; i < 0xFF; i++)
5171     iso_code_class[i] = ISO_graphic_plane_1;
5172   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
5173   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
5174   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
5175   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
5176   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
5177   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
5178   iso_code_class[ISO_CODE_ESC] = ISO_escape;
5179   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
5180   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
5181   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
5182
5183   conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE;
5184
5185   setup_coding_system (Qnil, &keyboard_coding);
5186   setup_coding_system (Qnil, &terminal_coding);
5187   setup_coding_system (Qnil, &safe_terminal_coding);
5188   setup_coding_system (Qnil, &default_buffer_file_coding);
5189
5190   bzero (coding_system_table, sizeof coding_system_table);
5191
5192   bzero (ascii_skip_code, sizeof ascii_skip_code);
5193   for (i = 0; i < 128; i++)
5194     ascii_skip_code[i] = 1;
5195
5196 #if defined (MSDOS) || defined (WINDOWSNT)
5197   system_eol_type = CODING_EOL_CRLF;
5198 #else
5199   system_eol_type = CODING_EOL_LF;
5200 #endif
5201 }
5202
5203 #ifdef emacs
5204
5205 void
5206 syms_of_coding ()
5207 {
5208   Qtarget_idx = intern ("target-idx");
5209   staticpro (&Qtarget_idx);
5210
5211   Qcoding_system_history = intern ("coding-system-history");
5212   staticpro (&Qcoding_system_history);
5213   Fset (Qcoding_system_history, Qnil);
5214
5215   /* Target FILENAME is the first argument.  */
5216   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
5217   /* Target FILENAME is the third argument.  */
5218   Fput (Qwrite_region, Qtarget_idx, make_number (2));
5219
5220   Qcall_process = intern ("call-process");
5221   staticpro (&Qcall_process);
5222   /* Target PROGRAM is the first argument.  */
5223   Fput (Qcall_process, Qtarget_idx, make_number (0));
5224
5225   Qcall_process_region = intern ("call-process-region");
5226   staticpro (&Qcall_process_region);
5227   /* Target PROGRAM is the third argument.  */
5228   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
5229
5230   Qstart_process = intern ("start-process");
5231   staticpro (&Qstart_process);
5232   /* Target PROGRAM is the third argument.  */
5233   Fput (Qstart_process, Qtarget_idx, make_number (2));
5234
5235   Qopen_network_stream = intern ("open-network-stream");
5236   staticpro (&Qopen_network_stream);
5237   /* Target SERVICE is the fourth argument.  */
5238   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
5239
5240   Qcoding_system = intern ("coding-system");
5241   staticpro (&Qcoding_system);
5242
5243   Qeol_type = intern ("eol-type");
5244   staticpro (&Qeol_type);
5245
5246   Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
5247   staticpro (&Qbuffer_file_coding_system);
5248
5249   Qpost_read_conversion = intern ("post-read-conversion");
5250   staticpro (&Qpost_read_conversion);
5251
5252   Qpre_write_conversion = intern ("pre-write-conversion");
5253   staticpro (&Qpre_write_conversion);
5254
5255   Qno_conversion = intern ("no-conversion");
5256   staticpro (&Qno_conversion);
5257
5258   Qundecided = intern ("undecided");
5259   staticpro (&Qundecided);
5260
5261   Qcoding_system_p = intern ("coding-system-p");
5262   staticpro (&Qcoding_system_p);
5263
5264   Qcoding_system_error = intern ("coding-system-error");
5265   staticpro (&Qcoding_system_error);
5266
5267   Fput (Qcoding_system_error, Qerror_conditions,
5268         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
5269   Fput (Qcoding_system_error, Qerror_message,
5270         build_string ("Invalid coding system"));
5271
5272   Qcoding_category = intern ("coding-category");
5273   staticpro (&Qcoding_category);
5274   Qcoding_category_index = intern ("coding-category-index");
5275   staticpro (&Qcoding_category_index);
5276
5277   Vcoding_category_table
5278     = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
5279   staticpro (&Vcoding_category_table);
5280   {
5281     int i;
5282     for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
5283       {
5284         XVECTOR (Vcoding_category_table)->contents[i]
5285           = intern (coding_category_name[i]);
5286         Fput (XVECTOR (Vcoding_category_table)->contents[i],
5287               Qcoding_category_index, make_number (i));
5288       }
5289   }
5290
5291   Qtranslation_table = intern ("translation-table");
5292   staticpro (&Qtranslation_table);
5293   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
5294
5295   Qtranslation_table_id = intern ("translation-table-id");
5296   staticpro (&Qtranslation_table_id);
5297
5298   Qtranslation_table_for_decode = intern ("translation-table-for-decode");
5299   staticpro (&Qtranslation_table_for_decode);
5300
5301   Qtranslation_table_for_encode = intern ("translation-table-for-encode");
5302   staticpro (&Qtranslation_table_for_encode);
5303
5304   Qsafe_charsets = intern ("safe-charsets");
5305   staticpro (&Qsafe_charsets);
5306
5307   Qvalid_codes = intern ("valid-codes");
5308   staticpro (&Qvalid_codes);
5309
5310   Qemacs_mule = intern ("emacs-mule");
5311   staticpro (&Qemacs_mule);
5312
5313   Qraw_text = intern ("raw-text");
5314   staticpro (&Qraw_text);
5315
5316   defsubr (&Scoding_system_p);
5317   defsubr (&Sread_coding_system);
5318   defsubr (&Sread_non_nil_coding_system);
5319   defsubr (&Scheck_coding_system);
5320   defsubr (&Sdetect_coding_region);
5321   defsubr (&Sdetect_coding_string);
5322   defsubr (&Sdecode_coding_region);
5323   defsubr (&Sencode_coding_region);
5324   defsubr (&Sdecode_coding_string);
5325   defsubr (&Sencode_coding_string);
5326   defsubr (&Sdecode_sjis_char);
5327   defsubr (&Sencode_sjis_char);
5328   defsubr (&Sdecode_big5_char);
5329   defsubr (&Sencode_big5_char);
5330   defsubr (&Sset_terminal_coding_system_internal);
5331   defsubr (&Sset_safe_terminal_coding_system_internal);
5332   defsubr (&Sterminal_coding_system);
5333   defsubr (&Sset_keyboard_coding_system_internal);
5334   defsubr (&Skeyboard_coding_system);
5335   defsubr (&Sfind_operation_coding_system);
5336   defsubr (&Supdate_coding_systems_internal);
5337   defsubr (&Sset_coding_priority_internal);
5338
5339   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
5340     "List of coding systems.\n\
5341 \n\
5342 Do not alter the value of this variable manually.  This variable should be\n\
5343 updated by the functions `make-coding-system' and\n\
5344 `define-coding-system-alias'.");
5345   Vcoding_system_list = Qnil;
5346
5347   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
5348     "Alist of coding system names.\n\
5349 Each element is one element list of coding system name.\n\
5350 This variable is given to `completing-read' as TABLE argument.\n\
5351 \n\
5352 Do not alter the value of this variable manually.  This variable should be\n\
5353 updated by the functions `make-coding-system' and\n\
5354 `define-coding-system-alias'.");
5355   Vcoding_system_alist = Qnil;
5356
5357   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
5358     "List of coding-categories (symbols) ordered by priority.");
5359   {
5360     int i;
5361
5362     Vcoding_category_list = Qnil;
5363     for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
5364       Vcoding_category_list
5365         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
5366                  Vcoding_category_list);
5367   }
5368
5369   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
5370     "Specify the coding system for read operations.\n\
5371 It is useful to bind this variable with `let', but do not set it globally.\n\
5372 If the value is a coding system, it is used for decoding on read operation.\n\
5373 If not, an appropriate element is used from one of the coding system alists:\n\
5374 There are three such tables, `file-coding-system-alist',\n\
5375 `process-coding-system-alist', and `network-coding-system-alist'.");
5376   Vcoding_system_for_read = Qnil;
5377
5378   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
5379     "Specify the coding system for write operations.\n\
5380 It is useful to bind this variable with `let', but do not set it globally.\n\
5381 If the value is a coding system, it is used for encoding on write operation.\n\
5382 If not, an appropriate element is used from one of the coding system alists:\n\
5383 There are three such tables, `file-coding-system-alist',\n\
5384 `process-coding-system-alist', and `network-coding-system-alist'.");
5385   Vcoding_system_for_write = Qnil;
5386
5387   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
5388     "Coding system used in the latest file or process I/O.");
5389   Vlast_coding_system_used = Qnil;
5390
5391   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
5392     "*Non-nil inhibit code conversion of end-of-line format in any cases.");
5393   inhibit_eol_conversion = 0;
5394
5395   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
5396     "Non-nil means process buffer inherits coding system of process output.\n\
5397 Bind it to t if the process output is to be treated as if it were a file\n\
5398 read from some filesystem.");
5399   inherit_process_coding_system = 0;
5400
5401   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
5402     "Alist to decide a coding system to use for a file I/O operation.\n\
5403 The format is ((PATTERN . VAL) ...),\n\
5404 where PATTERN is a regular expression matching a file name,\n\
5405 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5406 If VAL is a coding system, it is used for both decoding and encoding\n\
5407 the file contents.\n\
5408 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5409 and the cdr part is used for encoding.\n\
5410 If VAL is a function symbol, the function must return a coding system\n\
5411 or a cons of coding systems which are used as above.\n\
5412 \n\
5413 See also the function `find-operation-coding-system'.\n\
5414 and the variable `auto-coding-alist'.");
5415   Vfile_coding_system_alist = Qnil;
5416
5417   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
5418     "Alist to decide a coding system to use for a process I/O operation.\n\
5419 The format is ((PATTERN . VAL) ...),\n\
5420 where PATTERN is a regular expression matching a program name,\n\
5421 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5422 If VAL is a coding system, it is used for both decoding what received\n\
5423 from the program and encoding what sent to the program.\n\
5424 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5425 and the cdr part is used for encoding.\n\
5426 If VAL is a function symbol, the function must return a coding system\n\
5427 or a cons of coding systems which are used as above.\n\
5428 \n\
5429 See also the function `find-operation-coding-system'.");
5430   Vprocess_coding_system_alist = Qnil;
5431
5432   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
5433     "Alist to decide a coding system to use for a network I/O operation.\n\
5434 The format is ((PATTERN . VAL) ...),\n\
5435 where PATTERN is a regular expression matching a network service name\n\
5436 or is a port number to connect to,\n\
5437 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5438 If VAL is a coding system, it is used for both decoding what received\n\
5439 from the network stream and encoding what sent to the network stream.\n\
5440 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5441 and the cdr part is used for encoding.\n\
5442 If VAL is a function symbol, the function must return a coding system\n\
5443 or a cons of coding systems which are used as above.\n\
5444 \n\
5445 See also the function `find-operation-coding-system'.");
5446   Vnetwork_coding_system_alist = Qnil;
5447
5448   DEFVAR_INT ("eol-mnemonic-unix", &eol_mnemonic_unix,
5449     "Mnemonic character indicating UNIX-like end-of-line format (i.e. LF) .");
5450   eol_mnemonic_unix = ':';
5451
5452   DEFVAR_INT ("eol-mnemonic-dos", &eol_mnemonic_dos,
5453     "Mnemonic character indicating DOS-like end-of-line format (i.e. CRLF).");
5454   eol_mnemonic_dos = '\\';
5455
5456   DEFVAR_INT ("eol-mnemonic-mac", &eol_mnemonic_mac,
5457     "Mnemonic character indicating MAC-like end-of-line format (i.e. CR).");
5458   eol_mnemonic_mac = '/';
5459
5460   DEFVAR_INT ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
5461     "Mnemonic character indicating end-of-line format is not yet decided.");
5462   eol_mnemonic_undecided = ':';
5463
5464   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
5465     "*Non-nil enables character translation while encoding and decoding.");
5466   Venable_character_translation = Qt;
5467
5468   DEFVAR_LISP ("standard-translation-table-for-decode",
5469     &Vstandard_translation_table_for_decode,
5470     "Table for translating characters while decoding.");
5471   Vstandard_translation_table_for_decode = Qnil;
5472
5473   DEFVAR_LISP ("standard-translation-table-for-encode",
5474     &Vstandard_translation_table_for_encode,
5475     "Table for translationg characters while encoding.");
5476   Vstandard_translation_table_for_encode = Qnil;
5477
5478   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
5479     "Alist of charsets vs revision numbers.\n\
5480 While encoding, if a charset (car part of an element) is found,\n\
5481 designate it with the escape sequence identifing revision (cdr part of the element).");
5482   Vcharset_revision_alist = Qnil;
5483
5484   DEFVAR_LISP ("default-process-coding-system",
5485                &Vdefault_process_coding_system,
5486     "Cons of coding systems used for process I/O by default.\n\
5487 The car part is used for decoding a process output,\n\
5488 the cdr part is used for encoding a text to be sent to a process.");
5489   Vdefault_process_coding_system = Qnil;
5490
5491   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
5492     "Table of extra Latin codes in the range 128..159 (inclusive).\n\
5493 This is a vector of length 256.\n\
5494 If Nth element is non-nil, the existence of code N in a file\n\
5495 \(or output of subprocess) doesn't prevent it to be detected as\n\
5496 a coding system of ISO 2022 variant which has a flag\n\
5497 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file\n\
5498 or reading output of a subprocess.\n\
5499 Only 128th through 159th elements has a meaning.");
5500   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
5501
5502   DEFVAR_LISP ("select-safe-coding-system-function",
5503                &Vselect_safe_coding_system_function,
5504     "Function to call to select safe coding system for encoding a text.\n\
5505 \n\
5506 If set, this function is called to force a user to select a proper\n\
5507 coding system which can encode the text in the case that a default\n\
5508 coding system used in each operation can't encode the text.\n\
5509 \n\
5510 The default value is `select-safe-codign-system' (which see).");
5511   Vselect_safe_coding_system_function = Qnil;
5512
5513 }
5514
5515 #endif /* emacs */