src/coding.c

   1 /* Coding system handler (conversion, detection, and etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /*** TABLE OF CONTENTS ***
  23
  24   1. Preamble
  25   2. Emacs' internal format (emacs-mule) handlers
  26   3. ISO2022 handlers
  27   4. Shift-JIS and BIG5 handlers
  28   5. End-of-line handlers
  29   6. C library functions
  30   7. Emacs Lisp library functions
  31   8. Post-amble
  32
  33 */
  34
  35 /*** GENERAL NOTE on CODING SYSTEM ***
  36
  37   Coding system is an encoding mechanism of one or more character
  38   sets.  Here's a list of coding systems which Emacs can handle.  When
  39   we say "decode", it means converting some other coding system to
  40   Emacs' internal format (emacs-internal), and when we say "encode",
  41   it means converting the coding system emacs-mule to some other
  42   coding system.
  43
  44   0. Emacs' internal format (emacs-mule)
  45
  46   Emacs itself holds a multi-lingual character in a buffer and a string
  47   in a special format.  Details are described in section 2.
  48
  49   1. ISO2022
  50
  51   The most famous coding system for multiple character sets.  X's
  52   Compound Text, various EUCs (Extended Unix Code), and coding
  53   systems used in Internet communication such as ISO-2022-JP are
  54   all variants of ISO2022.  Details are described in section 3.
  55
  56   2. SJIS (or Shift-JIS or MS-Kanji-Code)
  57
  58   A coding system to encode character sets: ASCII, JISX0201, and
  59   JISX0208.  Widely used for PC's in Japan.  Details are described in
  60   section 4.
  61
  62   3. BIG5
  63
  64   A coding system to encode character sets: ASCII and Big5.  Widely
  65   used by Chinese (mainly in Taiwan and Hong Kong).  Details are
  66   described in section 4.  In this file, when we write "BIG5"
  67   (all uppercase), we mean the coding system, and when we write
  68   "Big5" (capitalized), we mean the character set.
  69
  70   4. Raw text
  71
  72   A coding system for a text containing random 8-bit code.  Emacs does
  73   no code conversion on such a text except for end-of-line format.
  74
  75   5. Other
  76
  77   If a user wants to read/write a text encoded in a coding system not
  78   listed above, he can supply a decoder and an encoder for it in CCL
  79   (Code Conversion Language) programs.  Emacs executes the CCL program
  80   while reading/writing.
  81
  82   Emacs represents a coding system by a Lisp symbol that has a property
  83   `coding-system'.  But, before actually using the coding system, the
  84   information about it is set in a structure of type `struct
  85   coding_system' for rapid processing.  See section 6 for more details.
  86
  87 */
  88
  89 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
  90
  91   How end-of-line of a text is encoded depends on a system.  For
  92   instance, Unix's format is just one byte of `line-feed' code,
  93   whereas DOS's format is two-byte sequence of `carriage-return' and
  94   `line-feed' codes.  MacOS's format is usually one byte of
  95   `carriage-return'.
  96
  97   Since text characters encoding and end-of-line encoding are
  98   independent, any coding system described above can take
  99   any format of end-of-line.  So, Emacs has information of format of
 100   end-of-line in each coding-system.  See section 6 for more details.
 101
 102 */
 103
 104 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 105
 106   These functions check if a text between SRC and SRC_END is encoded
 107   in the coding system category XXX.  Each returns an integer value in
 108   which appropriate flag bits for the category XXX is set.  The flag
 109   bits are defined in macros CODING_CATEGORY_MASK_XXX.  Below is the
 110   template of these functions.  */
 111 #if 0
 112 int
 113 detect_coding_emacs_mule (src, src_end)
 114      unsigned char *src, *src_end;
 115 {
 116   ...
 117 }
 118 #endif
 119
 120 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 121
 122   These functions decode SRC_BYTES length text at SOURCE encoded in
 123   CODING to Emacs' internal format (emacs-mule).  The resulting text
 124   goes to a place pointed to by DESTINATION, the length of which
 125   should not exceed DST_BYTES.  These functions set the information of
 126   original and decoded texts in the members produced, produced_char,
 127   consumed, and consumed_char of the structure *CODING.
 128
 129   The return value is an integer (CODING_FINISH_XXX) indicating how
 130   the decoding finished.
 131
 132   DST_BYTES zero means that source area and destination area are
 133   overlapped, which means that we can produce a decoded text until it
 134   reaches at the head of not-yet-decoded source text.
 135
 136   Below is a template of these functions.  */
 137 #if 0
 138 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 139      struct coding_system *coding;
 140      unsigned char *source, *destination;
 141      int src_bytes, dst_bytes;
 142 {
 143   ...
 144 }
 145 #endif
 146
 147 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 148
 149   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 150   internal format (emacs-mule) to CODING.  The resulting text goes to
 151   a place pointed to by DESTINATION, the length of which should not
 152   exceed DST_BYTES.  These functions set the information of
 153   original and encoded texts in the members produced, produced_char,
 154   consumed, and consumed_char of the structure *CODING.
 155
 156   The return value is an integer (CODING_FINISH_XXX) indicating how
 157   the encoding finished.
 158
 159   DST_BYTES zero means that source area and destination area are
 160   overlapped, which means that we can produce a decoded text until it
 161   reaches at the head of not-yet-decoded source text.
 162
 163   Below is a template of these functions.  */
 164 #if 0
 165 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 166      struct coding_system *coding;
 167      unsigned char *source, *destination;
 168      int src_bytes, dst_bytes;
 169 {
 170   ...
 171 }
 172 #endif
 173
 174 /*** COMMONLY USED MACROS ***/
 175
 176 /* The following three macros ONE_MORE_BYTE, TWO_MORE_BYTES, and
 177    THREE_MORE_BYTES safely get one, two, and three bytes from the
 178    source text respectively.  If there are not enough bytes in the
 179    source, they jump to `label_end_of_loop'.  The caller should set
 180    variables `src' and `src_end' to appropriate areas in advance.  */
 181
 182 #define ONE_MORE_BYTE(c1)       \
 183   do {                          \
 184     if (src < src_end)          \
 185       c1 = *src++;              \
 186     else                        \
 187       goto label_end_of_loop;   \
 188   } while (0)
 189
 190 #define TWO_MORE_BYTES(c1, c2)  \
 191   do {                          \
 192     if (src + 1 < src_end)      \
 193       c1 = *src++, c2 = *src++; \
 194     else                        \
 195       goto label_end_of_loop;   \
 196   } while (0)
 197
 198 #define THREE_MORE_BYTES(c1, c2, c3)            \
 199   do {                                          \
 200     if (src + 2 < src_end)                      \
 201       c1 = *src++, c2 = *src++, c3 = *src++;    \
 202     else                                        \
 203       goto label_end_of_loop;                   \
 204   } while (0)
 205
 206 /* The following three macros DECODE_CHARACTER_ASCII,
 207    DECODE_CHARACTER_DIMENSION1, and DECODE_CHARACTER_DIMENSION2 put
 208    the multi-byte form of a character of each class at the place
 209    pointed by `dst'.  The caller should set the variable `dst' to
 210    point to an appropriate area and the variable `coding' to point to
 211    the coding-system of the currently decoding text in advance.  */
 212
 213 /* Decode one ASCII character C.  */
 214
 215 #define DECODE_CHARACTER_ASCII(c)                               \
 216   do {                                                          \
 217     if (COMPOSING_P (coding->composing))                        \
 218       *dst++ = 0xA0, *dst++ = (c) | 0x80;                       \
 219     else                                                        \
 220       {                                                         \
 221         *dst++ = (c);                                           \
 222         coding->produced_char++;                                \
 223       }                                                         \
 224   } while (0)
 225
 226 /* Decode one DIMENSION1 character whose charset is CHARSET and whose
 227    position-code is C.  */
 228
 229 #define DECODE_CHARACTER_DIMENSION1(charset, c)                         \
 230   do {                                                                  \
 231     unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset);   \
 232     if (COMPOSING_P (coding->composing))                                \
 233       *dst++ = leading_code + 0x20;                                     \
 234     else                                                                \
 235       {                                                                 \
 236         *dst++ = leading_code;                                          \
 237         coding->produced_char++;                                        \
 238       }                                                                 \
 239     if (leading_code = CHARSET_LEADING_CODE_EXT (charset))              \
 240       *dst++ = leading_code;                                            \
 241     *dst++ = (c) | 0x80;                                                \
 242   } while (0)
 243
 244 /* Decode one DIMENSION2 character whose charset is CHARSET and whose
 245    position-codes are C1 and C2.  */
 246
 247 #define DECODE_CHARACTER_DIMENSION2(charset, c1, c2)    \
 248   do {                                                  \
 249     DECODE_CHARACTER_DIMENSION1 (charset, c1);          \
 250     *dst++ = (c2) | 0x80;                               \
 251   } while (0)
 252
 253 \f
 254 /*** 1. Preamble ***/
 255
 256 #include <stdio.h>
 257
 258 #ifdef emacs
 259
 260 #include <config.h>
 261 #include "lisp.h"
 262 #include "buffer.h"
 263 #include "charset.h"
 264 #include "ccl.h"
 265 #include "coding.h"
 266 #include "window.h"
 267
 268 #else  /* not emacs */
 269
 270 #include "mulelib.h"
 271
 272 #endif /* not emacs */
 273
 274 Lisp_Object Qcoding_system, Qeol_type;
 275 Lisp_Object Qbuffer_file_coding_system;
 276 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 277 Lisp_Object Qno_conversion, Qundecided;
 278 Lisp_Object Qcoding_system_history;
 279 Lisp_Object Qsafe_charsets;
 280
 281 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 282 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 283 Lisp_Object Qstart_process, Qopen_network_stream;
 284 Lisp_Object Qtarget_idx;
 285
 286 Lisp_Object Vselect_safe_coding_system_function;
 287
 288 /* Mnemonic character of each format of end-of-line.  */
 289 int eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 290 /* Mnemonic character to indicate format of end-of-line is not yet
 291    decided.  */
 292 int eol_mnemonic_undecided;
 293
 294 /* Format of end-of-line decided by system.  This is CODING_EOL_LF on
 295    Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.  */
 296 int system_eol_type;
 297
 298 #ifdef emacs
 299
 300 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 301
 302 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 303
 304 /* Coding system emacs-mule and raw-text are for converting only
 305    end-of-line format.  */
 306 Lisp_Object Qemacs_mule, Qraw_text;
 307
 308 /* Coding-systems are handed between Emacs Lisp programs and C internal
 309    routines by the following three variables.  */
 310 /* Coding-system for reading files and receiving data from process.  */
 311 Lisp_Object Vcoding_system_for_read;
 312 /* Coding-system for writing files and sending data to process.  */
 313 Lisp_Object Vcoding_system_for_write;
 314 /* Coding-system actually used in the latest I/O.  */
 315 Lisp_Object Vlast_coding_system_used;
 316
 317 /* A vector of length 256 which contains information about special
 318    Latin codes (especially for dealing with Microsoft codes).  */
 319 Lisp_Object Vlatin_extra_code_table;
 320
 321 /* Flag to inhibit code conversion of end-of-line format.  */
 322 int inhibit_eol_conversion;
 323
 324 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 325 int inherit_process_coding_system;
 326
 327 /* Coding system to be used to encode text for terminal display.  */
 328 struct coding_system terminal_coding;
 329
 330 /* Coding system to be used to encode text for terminal display when
 331    terminal coding system is nil.  */
 332 struct coding_system safe_terminal_coding;
 333
 334 /* Coding system of what is sent from terminal keyboard.  */
 335 struct coding_system keyboard_coding;
 336
 337 Lisp_Object Vfile_coding_system_alist;
 338 Lisp_Object Vprocess_coding_system_alist;
 339 Lisp_Object Vnetwork_coding_system_alist;
 340
 341 #endif /* emacs */
 342
 343 Lisp_Object Qcoding_category, Qcoding_category_index;
 344
 345 /* List of symbols `coding-category-xxx' ordered by priority.  */
 346 Lisp_Object Vcoding_category_list;
 347
 348 /* Table of coding categories (Lisp symbols).  */
 349 Lisp_Object Vcoding_category_table;
 350
 351 /* Table of names of symbol for each coding-category.  */
 352 char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
 353   "coding-category-emacs-mule",
 354   "coding-category-sjis",
 355   "coding-category-iso-7",
 356   "coding-category-iso-7-tight",
 357   "coding-category-iso-8-1",
 358   "coding-category-iso-8-2",
 359   "coding-category-iso-7-else",
 360   "coding-category-iso-8-else",
 361   "coding-category-big5",
 362   "coding-category-raw-text",
 363   "coding-category-binary"
 364 };
 365
 366 /* Table of pointers to coding systems corresponding to each coding
 367    categories.  */
 368 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
 369
 370 /* Table of coding category masks.  Nth element is a mask for a coding
 371    cateogry of which priority is Nth.  */
 372 static
 373 int coding_priorities[CODING_CATEGORY_IDX_MAX];
 374
 375 /* Flag to tell if we look up translation table on character code
 376    conversion.  */
 377 Lisp_Object Venable_character_translation;
 378 /* Standard translation table to look up on decoding (reading).  */
 379 Lisp_Object Vstandard_translation_table_for_decode;
 380 /* Standard translation table to look up on encoding (writing).  */
 381 Lisp_Object Vstandard_translation_table_for_encode;
 382
 383 Lisp_Object Qtranslation_table;
 384 Lisp_Object Qtranslation_table_id;
 385 Lisp_Object Qtranslation_table_for_decode;
 386 Lisp_Object Qtranslation_table_for_encode;
 387
 388 /* Alist of charsets vs revision number.  */
 389 Lisp_Object Vcharset_revision_alist;
 390
 391 /* Default coding systems used for process I/O.  */
 392 Lisp_Object Vdefault_process_coding_system;
 393
 394 \f
 395 /*** 2. Emacs internal format (emacs-mule) handlers ***/
 396
 397 /* Emacs' internal format for encoding multiple character sets is a
 398    kind of multi-byte encoding, i.e. characters are encoded by
 399    variable-length sequences of one-byte codes.  ASCII characters
 400    and control characters (e.g. `tab', `newline') are represented by
 401    one-byte sequences which are their ASCII codes, in the range 0x00
 402    through 0x7F.  The other characters are represented by a sequence
 403    of `base leading-code', optional `extended leading-code', and one
 404    or two `position-code's.  The length of the sequence is determined
 405    by the base leading-code.  Leading-code takes the range 0x80
 406    through 0x9F, whereas extended leading-code and position-code take
 407    the range 0xA0 through 0xFF.  See `charset.h' for more details
 408    about leading-code and position-code.
 409
 410    There's one exception to this rule.  Special leading-code
 411    `leading-code-composition' denotes that the following several
 412    characters should be composed into one character.  Leading-codes of
 413    components (except for ASCII) are added 0x20.  An ASCII character
 414    component is represented by a 2-byte sequence of `0xA0' and
 415    `ASCII-code + 0x80'.  See also the comments in `charset.h' for the
 416    details of composite character.  Hence, we can summarize the code
 417    range as follows:
 418
 419    --- CODE RANGE of Emacs' internal format ---
 420    (character set)      (range)
 421    ASCII                0x00 .. 0x7F
 422    ELSE (1st byte)      0x80 .. 0x9F
 423         (rest bytes)    0xA0 .. 0xFF
 424    ---------------------------------------------
 425
 426   */
 427
 428 enum emacs_code_class_type emacs_code_class[256];
 429
 430 /* Go to the next statement only if *SRC is accessible and the code is
 431    greater than 0xA0.  */
 432 #define CHECK_CODE_RANGE_A0_FF  \
 433   do {                          \
 434     if (src >= src_end)         \
 435       goto label_end_of_switch; \
 436     else if (*src++ < 0xA0)     \
 437       return 0;                 \
 438   } while (0)
 439
 440 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 441    Check if a text is encoded in Emacs' internal format.  If it is,
 442    return CODING_CATEGORY_MASK_EMACS_MULE, else return 0.  */
 443
 444 int
 445 detect_coding_emacs_mule (src, src_end)
 446      unsigned char *src, *src_end;
 447 {
 448   unsigned char c;
 449   int composing = 0;
 450
 451   while (src < src_end)
 452     {
 453       c = *src++;
 454
 455       if (composing)
 456         {
 457           if (c < 0xA0)
 458             composing = 0;
 459           else
 460             c -= 0x20;
 461         }
 462
 463       switch (emacs_code_class[c])
 464         {
 465         case EMACS_ascii_code:
 466         case EMACS_linefeed_code:
 467           break;
 468
 469         case EMACS_control_code:
 470           if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
 471             return 0;
 472           break;
 473
 474         case EMACS_invalid_code:
 475           return 0;
 476
 477         case EMACS_leading_code_composition: /* c == 0x80 */
 478           if (composing)
 479             CHECK_CODE_RANGE_A0_FF;
 480           else
 481             composing = 1;
 482           break;
 483
 484         case EMACS_leading_code_4:
 485           CHECK_CODE_RANGE_A0_FF;
 486           /* fall down to check it two more times ...  */
 487
 488         case EMACS_leading_code_3:
 489           CHECK_CODE_RANGE_A0_FF;
 490           /* fall down to check it one more time ...  */
 491
 492         case EMACS_leading_code_2:
 493           CHECK_CODE_RANGE_A0_FF;
 494           break;
 495
 496         default:
 497         label_end_of_switch:
 498           break;
 499         }
 500     }
 501   return CODING_CATEGORY_MASK_EMACS_MULE;
 502 }
 503
 504 \f
 505 /*** 3. ISO2022 handlers ***/
 506
 507 /* The following note describes the coding system ISO2022 briefly.
 508    Since the intention of this note is to help in understanding of
 509    the programs in this file, some parts are NOT ACCURATE or OVERLY
 510    SIMPLIFIED.  For the thorough understanding, please refer to the
 511    original document of ISO2022.
 512
 513    ISO2022 provides many mechanisms to encode several character sets
 514    in 7-bit and 8-bit environment.  If one chooses 7-bite environment,
 515    all text is encoded by codes of less than 128.  This may make the
 516    encoded text a little bit longer, but the text gets more stability
 517    to pass through several gateways (some of them strip off the MSB).
 518
 519    There are two kinds of character set: control character set and
 520    graphic character set.  The former contains control characters such
 521    as `newline' and `escape' to provide control functions (control
 522    functions are provided also by escape sequences).  The latter
 523    contains graphic characters such as ' A' and '-'.  Emacs recognizes
 524    two control character sets and many graphic character sets.
 525
 526    Graphic character sets are classified into one of the following
 527    four classes, DIMENSION1_CHARS94, DIMENSION1_CHARS96,
 528    DIMENSION2_CHARS94, DIMENSION2_CHARS96 according to the number of
 529    bytes (DIMENSION) and the number of characters in one dimension
 530    (CHARS) of the set.  In addition, each character set is assigned an
 531    identification tag (called "final character" and denoted as <F>
 532    here after) which is unique in each class.  <F> of each character
 533    set is decided by ECMA(*) when it is registered in ISO.  Code range
 534    of <F> is 0x30..0x7F (0x30..0x3F are for private use only).
 535
 536    Note (*): ECMA = European Computer Manufacturers Association
 537
 538    Here are examples of graphic character set [NAME(<F>)]:
 539         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
 540         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
 541         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
 542         o DIMENSION2_CHARS96 -- none for the moment
 543
 544    A code area (1byte=8bits) is divided into 4 areas, C0, GL, C1, and GR.
 545         C0 [0x00..0x1F] -- control character plane 0
 546         GL [0x20..0x7F] -- graphic character plane 0
 547         C1 [0x80..0x9F] -- control character plane 1
 548         GR [0xA0..0xFF] -- graphic character plane 1
 549
 550    A control character set is directly designated and invoked to C0 or
 551    C1 by an escape sequence.  The most common case is that ISO646's
 552    control character set is designated/invoked to C0 and ISO6429's
 553    control character set is designated/invoked to C1, and usually
 554    these designations/invocations are omitted in a coded text.  With
 555    7-bit environment, only C0 can be used, and a control character for
 556    C1 is encoded by an appropriate escape sequence to fit in the
 557    environment.  All control characters for C1 are defined the
 558    corresponding escape sequences.
 559
 560    A graphic character set is at first designated to one of four
 561    graphic registers (G0 through G3), then these graphic registers are
 562    invoked to GL or GR.  These designations and invocations can be
 563    done independently.  The most common case is that G0 is invoked to
 564    GL, G1 is invoked to GR, and ASCII is designated to G0, and usually
 565    these invocations and designations are omitted in a coded text.
 566    With 7-bit environment, only GL can be used.
 567
 568    When a graphic character set of CHARS94 is invoked to GL, code 0x20
 569    and 0x7F of GL area work as control characters SPACE and DEL
 570    respectively, and code 0xA0 and 0xFF of GR area should not be used.
 571
 572    There are two ways of invocation: locking-shift and single-shift.
 573    With locking-shift, the invocation lasts until the next different
 574    invocation, whereas with single-shift, the invocation works only
 575    for the following character and doesn't affect locking-shift.
 576    Invocations are done by the following control characters or escape
 577    sequences.
 578
 579    ----------------------------------------------------------------------
 580    function             control char    escape sequence description
 581    ----------------------------------------------------------------------
 582    SI  (shift-in)               0x0F    none            invoke G0 to GL
 583    SO  (shift-out)              0x0E    none            invoke G1 to GL
 584    LS2 (locking-shift-2)        none    ESC 'n'         invoke G2 into GL
 585    LS3 (locking-shift-3)        none    ESC 'o'         invoke G3 into GL
 586    SS2 (single-shift-2)         0x8E    ESC 'N'         invoke G2 into GL
 587    SS3 (single-shift-3)         0x8F    ESC 'O'         invoke G3 into GL
 588    ----------------------------------------------------------------------
 589    The first four are for locking-shift.  Control characters for these
 590    functions are defined by macros ISO_CODE_XXX in `coding.h'.
 591
 592    Designations are done by the following escape sequences.
 593    ----------------------------------------------------------------------
 594    escape sequence      description
 595    ----------------------------------------------------------------------
 596    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
 597    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
 598    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
 599    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
 600    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
 601    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
 602    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
 603    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
 604    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
 605    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
 606    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
 607    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
 608    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
 609    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
 610    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
 611    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
 612    ----------------------------------------------------------------------
 613
 614    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
 615    of dimension 1, chars 94, and final character <F>, and etc.
 616
 617    Note (*): Although these designations are not allowed in ISO2022,
 618    Emacs accepts them on decoding, and produces them on encoding
 619    CHARS96 character set in a coding system which is characterized as
 620    7-bit environment, non-locking-shift, and non-single-shift.
 621
 622    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
 623    '(' can be omitted.  We call this as "short-form" here after.
 624
 625    Now you may notice that there are a lot of ways for encoding the
 626    same multilingual text in ISO2022.  Actually, there exists many
 627    coding systems such as Compound Text (used in X's inter client
 628    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
 629    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
 630    localized platforms), and all of these are variants of ISO2022.
 631
 632    In addition to the above, Emacs handles two more kinds of escape
 633    sequences: ISO6429's direction specification and Emacs' private
 634    sequence for specifying character composition.
 635
 636    ISO6429's direction specification takes the following format:
 637         o CSI ']'      -- end of the current direction
 638         o CSI '0' ']'  -- end of the current direction
 639         o CSI '1' ']'  -- start of left-to-right text
 640         o CSI '2' ']'  -- start of right-to-left text
 641    The control character CSI (0x9B: control sequence introducer) is
 642    abbreviated to the escape sequence ESC '[' in 7-bit environment.
 643
 644    Character composition specification takes the following format:
 645         o ESC '0' -- start character composition
 646         o ESC '1' -- end character composition
 647    Since these are not standard escape sequences of any ISO, the use
 648    of them for these meaning is restricted to Emacs only.  */
 649
 650 enum iso_code_class_type iso_code_class[256];
 651
 652 #define CHARSET_OK(idx, charset)                        \
 653   (coding_system_table[idx]->safe_charsets[charset]     \
 654    || (CODING_SPEC_ISO_REQUESTED_DESIGNATION            \
 655        (coding_system_table[idx], charset)              \
 656        != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
 657
 658 #define SHIFT_OUT_OK(idx) \
 659   (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
 660
 661 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 662    Check if a text is encoded in ISO2022.  If it is, returns an
 663    integer in which appropriate flag bits any of:
 664         CODING_CATEGORY_MASK_ISO_7
 665         CODING_CATEGORY_MASK_ISO_7_TIGHT
 666         CODING_CATEGORY_MASK_ISO_8_1
 667         CODING_CATEGORY_MASK_ISO_8_2
 668         CODING_CATEGORY_MASK_ISO_7_ELSE
 669         CODING_CATEGORY_MASK_ISO_8_ELSE
 670    are set.  If a code which should never appear in ISO2022 is found,
 671    returns 0.  */
 672
 673 int
 674 detect_coding_iso2022 (src, src_end)
 675      unsigned char *src, *src_end;
 676 {
 677   int mask = CODING_CATEGORY_MASK_ISO;
 678   int mask_found = 0;
 679   int reg[4], shift_out = 0;
 680   int c, c1, i, charset;
 681
 682   reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
 683   while (mask && src < src_end)
 684     {
 685       c = *src++;
 686       switch (c)
 687         {
 688         case ISO_CODE_ESC:
 689           if (src >= src_end)
 690             break;
 691           c = *src++;
 692           if (c >= '(' && c <= '/')
 693             {
 694               /* Designation sequence for a charset of dimension 1.  */
 695               if (src >= src_end)
 696                 break;
 697               c1 = *src++;
 698               if (c1 < ' ' || c1 >= 0x80
 699                   || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
 700                 /* Invalid designation sequence.  Just ignore.  */
 701                 break;
 702               reg[(c - '(') % 4] = charset;
 703             }
 704           else if (c == '$')
 705             {
 706               /* Designation sequence for a charset of dimension 2.  */
 707               if (src >= src_end)
 708                 break;
 709               c = *src++;
 710               if (c >= '@' && c <= 'B')
 711                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
 712                 reg[0] = charset = iso_charset_table[1][0][c];
 713               else if (c >= '(' && c <= '/')
 714                 {
 715                   if (src >= src_end)
 716                     break;
 717                   c1 = *src++;
 718                   if (c1 < ' ' || c1 >= 0x80
 719                       || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
 720                     /* Invalid designation sequence.  Just ignore.  */
 721                     break;
 722                   reg[(c - '(') % 4] = charset;
 723                 }
 724               else
 725                 /* Invalid designation sequence.  Just ignore.  */
 726                 break;
 727             }
 728           else if (c == 'N' || c == 'n')
 729             {
 730               if (shift_out == 0
 731                   && (reg[1] >= 0
 732                       || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
 733                       || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
 734                 {
 735                   /* Locking shift out.  */
 736                   mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 737                   mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 738                   shift_out = 1;
 739                 }
 740               break;
 741             }
 742           else if (c == 'O' || c == 'o')
 743             {
 744               if (shift_out == 1)
 745                 {
 746                   /* Locking shift in.  */
 747                   mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 748                   mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 749                   shift_out = 0;
 750                 }
 751               break;
 752             }
 753           else if (c == '0' || c == '1' || c == '2')
 754             /* Start/end composition.  Just ignore.  */
 755             break;
 756           else
 757             /* Invalid escape sequence.  Just ignore.  */
 758             break;
 759
 760           /* We found a valid designation sequence for CHARSET.  */
 761           mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
 762           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset))
 763             mask_found |= CODING_CATEGORY_MASK_ISO_7;
 764           else
 765             mask &= ~CODING_CATEGORY_MASK_ISO_7;
 766           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset))
 767             mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
 768           else
 769             mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
 770           if (! CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset))
 771             mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
 772           if (! CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset))
 773             mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
 774           break;
 775
 776         case ISO_CODE_SO:
 777           if (shift_out == 0
 778               && (reg[1] >= 0
 779                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
 780                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
 781             {
 782               /* Locking shift out.  */
 783               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 784               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 785             }
 786           break;
 787
 788         case ISO_CODE_SI:
 789           if (shift_out == 1)
 790             {
 791               /* Locking shift in.  */
 792               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 793               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 794             }
 795           break;
 796
 797         case ISO_CODE_CSI:
 798         case ISO_CODE_SS2:
 799         case ISO_CODE_SS3:
 800           {
 801             int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
 802
 803             if (c != ISO_CODE_CSI)
 804               {
 805                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 806                     & CODING_FLAG_ISO_SINGLE_SHIFT)
 807                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 808                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 809                     & CODING_FLAG_ISO_SINGLE_SHIFT)
 810                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 811               }
 812             if (VECTORP (Vlatin_extra_code_table)
 813                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
 814               {
 815                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 816                     & CODING_FLAG_ISO_LATIN_EXTRA)
 817                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 818                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 819                     & CODING_FLAG_ISO_LATIN_EXTRA)
 820                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 821               }
 822             mask &= newmask;
 823             mask_found |= newmask;
 824           }
 825           break;
 826
 827         default:
 828           if (c < 0x80)
 829             break;
 830           else if (c < 0xA0)
 831             {
 832               if (VECTORP (Vlatin_extra_code_table)
 833                   && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
 834                 {
 835                   int newmask = 0;
 836
 837                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 838                       & CODING_FLAG_ISO_LATIN_EXTRA)
 839                     newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 840                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 841                       & CODING_FLAG_ISO_LATIN_EXTRA)
 842                     newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 843                   mask &= newmask;
 844                   mask_found |= newmask;
 845                 }
 846               else
 847                 return 0;
 848             }
 849           else
 850             {
 851               unsigned char *src_begin = src;
 852
 853               mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
 854                         | CODING_CATEGORY_MASK_ISO_7_ELSE);
 855               mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
 856               while (src < src_end && *src >= 0xA0)
 857                 src++;
 858               if ((src - src_begin - 1) & 1 && src < src_end)
 859                 mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
 860               else
 861                 mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
 862             }
 863           break;
 864         }
 865     }
 866
 867   return (mask & mask_found);
 868 }
 869
 870 /* Decode a character of which charset is CHARSET and the 1st position
 871    code is C1.  If dimension of CHARSET is 2, the 2nd position code is
 872    fetched from SRC and set to C2.  If CHARSET is negative, it means
 873    that we are decoding ill formed text, and what we can do is just to
 874    read C1 as is.  */
 875
 876 #define DECODE_ISO_CHARACTER(charset, c1)                               \
 877   do {                                                                  \
 878     int c_alt, charset_alt = (charset);                                 \
 879     if (COMPOSING_HEAD_P (coding->composing))                           \
 880       {                                                                 \
 881         *dst++ = LEADING_CODE_COMPOSITION;                              \
 882         if (COMPOSING_WITH_RULE_P (coding->composing))                  \
 883           /* To tell composition rules are embeded.  */                 \
 884           *dst++ = 0xFF;                                                \
 885         coding->composing += 2;                                         \
 886       }                                                                 \
 887     if ((charset) >= 0)                                                 \
 888       {                                                                 \
 889         if (CHARSET_DIMENSION (charset) == 2)                           \
 890           {                                                             \
 891             ONE_MORE_BYTE (c2);                                         \
 892             if (iso_code_class[(c2) & 0x7F] != ISO_0x20_or_0x7F         \
 893                 && iso_code_class[(c2) & 0x7F] != ISO_graphic_plane_0)  \
 894               {                                                         \
 895                 src--;                                                  \
 896                 c2 = ' ';                                               \
 897               }                                                         \
 898           }                                                             \
 899         if (!NILP (translation_table)                                   \
 900             && ((c_alt = translate_char (translation_table,             \
 901                                          -1, (charset), c1, c2)) >= 0)) \
 902           SPLIT_CHAR (c_alt, charset_alt, c1, c2);                      \
 903       }                                                                 \
 904     if (charset_alt == CHARSET_ASCII || charset_alt < 0)                \
 905       DECODE_CHARACTER_ASCII (c1);                                      \
 906     else if (CHARSET_DIMENSION (charset_alt) == 1)                      \
 907       DECODE_CHARACTER_DIMENSION1 (charset_alt, c1);                    \
 908     else                                                                \
 909       DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2);                \
 910     if (COMPOSING_WITH_RULE_P (coding->composing))                      \
 911       /* To tell a composition rule follows.  */                        \
 912       coding->composing = COMPOSING_WITH_RULE_RULE;                     \
 913   } while (0)
 914
 915 /* Set designation state into CODING.  */
 916 #define DECODE_DESIGNATION(reg, dimension, chars, final_char)              \
 917   do {                                                                     \
 918     int charset = ISO_CHARSET_TABLE (make_number (dimension),              \
 919                                      make_number (chars),                  \
 920                                      make_number (final_char));            \
 921     if (charset >= 0                                                       \
 922         && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
 923             || coding->safe_charsets[charset]))                            \
 924       {                                                                    \
 925         if (coding->spec.iso2022.last_invalid_designation_register == 0    \
 926             && reg == 0                                                    \
 927             && charset == CHARSET_ASCII)                                   \
 928           {                                                                \
 929             /* We should insert this designation sequence as is so         \
 930                that it is surely written back to a file.  */               \
 931             coding->spec.iso2022.last_invalid_designation_register = -1;   \
 932             goto label_invalid_code;                                       \
 933           }                                                                \
 934         coding->spec.iso2022.last_invalid_designation_register = -1;       \
 935         if ((coding->mode & CODING_MODE_DIRECTION)                         \
 936             && CHARSET_REVERSE_CHARSET (charset) >= 0)                     \
 937           charset = CHARSET_REVERSE_CHARSET (charset);                     \
 938         CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;               \
 939       }                                                                    \
 940     else                                                                   \
 941       {                                                                    \
 942         coding->spec.iso2022.last_invalid_designation_register = reg;      \
 943         goto label_invalid_code;                                           \
 944       }                                                                    \
 945   } while (0)
 946
 947 /* Check if the current composing sequence contains only valid codes.
 948    If the composing sequence doesn't end before SRC_END, return -1.
 949    Else, if it contains only valid codes, return 0.
 950    Else return the length of the composing sequence.  */
 951
 952 int
 953 check_composing_code (coding, src, src_end)
 954      struct coding_system *coding;
 955      unsigned char *src, *src_end;
 956 {
 957   unsigned char *src_start = src;
 958   int invalid_code_found = 0;
 959   int charset, c, c1, dim;
 960
 961   while (src < src_end)
 962     {
 963       if (*src++ != ISO_CODE_ESC) continue;
 964       if (src >= src_end) break;
 965       if ((c = *src++) == '1') /* end of compsition */
 966         return (invalid_code_found ? src - src_start : 0);
 967       if (src + 2 >= src_end) break;
 968       if (!coding->flags & CODING_FLAG_ISO_DESIGNATION)
 969         invalid_code_found = 1;
 970       else
 971         {
 972           dim = 0;
 973           if (c == '$')
 974             {
 975               dim = 1;
 976               c = (*src >= '@' && *src <= 'B') ? '(' : *src++;
 977             }
 978           if (c >= '(' && c <= '/')
 979             {
 980               c1 = *src++;
 981               if ((c1 < ' ' || c1 >= 0x80)
 982                   || (charset = iso_charset_table[dim][c >= ','][c1]) < 0
 983                   || ! coding->safe_charsets[charset]
 984                   || (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
 985                       == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
 986                 invalid_code_found = 1;
 987             }
 988           else
 989             invalid_code_found = 1;
 990         }
 991     }
 992   return (invalid_code_found
 993           ? src - src_start
 994           : (coding->mode & CODING_MODE_LAST_BLOCK ? 0 : -1));
 995 }
 996
 997 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
 998
 999 int
1000 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1001      struct coding_system *coding;
1002      unsigned char *source, *destination;
1003      int src_bytes, dst_bytes;
1004 {
1005   unsigned char *src = source;
1006   unsigned char *src_end = source + src_bytes;
1007   unsigned char *dst = destination;
1008   unsigned char *dst_end = destination + dst_bytes;
1009   /* Since the maximum bytes produced by each loop is 7, we subtract 6
1010      from DST_END to assure that overflow checking is necessary only
1011      at the head of loop.  */
1012   unsigned char *adjusted_dst_end = dst_end - 6;
1013   int charset;
1014   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
1015   int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1016   int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1017   Lisp_Object translation_table
1018     = coding->translation_table_for_decode;
1019   int result = CODING_FINISH_NORMAL;
1020
1021   if (!NILP (Venable_character_translation) && NILP (translation_table))
1022     translation_table = Vstandard_translation_table_for_decode;
1023
1024   coding->produced_char = 0;
1025   coding->fake_multibyte = 0;
1026   while (src < src_end && (dst_bytes
1027                            ? (dst < adjusted_dst_end)
1028                            : (dst < src - 6)))
1029     {
1030       /* SRC_BASE remembers the start position in source in each loop.
1031          The loop will be exited when there's not enough source text
1032          to analyze long escape sequence or 2-byte code (within macros
1033          ONE_MORE_BYTE or TWO_MORE_BYTES).  In that case, SRC is reset
1034          to SRC_BASE before exiting.  */
1035       unsigned char *src_base = src;
1036       int c1 = *src++, c2;
1037
1038       switch (iso_code_class [c1])
1039         {
1040         case ISO_0x20_or_0x7F:
1041           if (!coding->composing
1042               && (charset0 < 0 || CHARSET_CHARS (charset0) == 94))
1043             {
1044               /* This is SPACE or DEL.  */
1045               *dst++ = c1;
1046               coding->produced_char++;
1047               break;
1048             }
1049           /* This is a graphic character, we fall down ...  */
1050
1051         case ISO_graphic_plane_0:
1052           if (coding->composing == COMPOSING_WITH_RULE_RULE)
1053             {
1054               /* This is a composition rule.  */
1055               *dst++ = c1 | 0x80;
1056               coding->composing = COMPOSING_WITH_RULE_TAIL;
1057             }
1058           else
1059             DECODE_ISO_CHARACTER (charset0, c1);
1060           break;
1061
1062         case ISO_0xA0_or_0xFF:
1063           if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1064               || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1065             goto label_invalid_code;
1066           /* This is a graphic character, we fall down ... */
1067
1068         case ISO_graphic_plane_1:
1069           if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1070             goto label_invalid_code;
1071           else
1072             DECODE_ISO_CHARACTER (charset1, c1);
1073           break;
1074
1075         case ISO_control_code:
1076           /* All ISO2022 control characters in this class have the
1077              same representation in Emacs internal format.  */
1078           if (c1 == '\n'
1079               && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1080               && (coding->eol_type == CODING_EOL_CR
1081                   || coding->eol_type == CODING_EOL_CRLF))
1082             {
1083               result = CODING_FINISH_INCONSISTENT_EOL;
1084               goto label_end_of_loop_2;
1085             }
1086           *dst++ = c1;
1087           coding->produced_char++;
1088           break;
1089
1090         case ISO_carriage_return:
1091           if (coding->eol_type == CODING_EOL_CR)
1092             *dst++ = '\n';
1093           else if (coding->eol_type == CODING_EOL_CRLF)
1094             {
1095               ONE_MORE_BYTE (c1);
1096               if (c1 == ISO_CODE_LF)
1097                 *dst++ = '\n';
1098               else
1099                 {
1100                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1101                     {
1102                       result = CODING_FINISH_INCONSISTENT_EOL;
1103                       goto label_end_of_loop_2;
1104                     }
1105                   src--;
1106                   *dst++ = '\r';
1107                 }
1108             }
1109           else
1110             *dst++ = c1;
1111           coding->produced_char++;
1112           break;
1113
1114         case ISO_shift_out:
1115           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1116               || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1117             goto label_invalid_code;
1118           CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1119           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1120           break;
1121
1122         case ISO_shift_in:
1123           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1124             goto label_invalid_code;
1125           CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1126           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1127           break;
1128
1129         case ISO_single_shift_2_7:
1130         case ISO_single_shift_2:
1131           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1132             goto label_invalid_code;
1133           /* SS2 is handled as an escape sequence of ESC 'N' */
1134           c1 = 'N';
1135           goto label_escape_sequence;
1136
1137         case ISO_single_shift_3:
1138           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1139             goto label_invalid_code;
1140           /* SS2 is handled as an escape sequence of ESC 'O' */
1141           c1 = 'O';
1142           goto label_escape_sequence;
1143
1144         case ISO_control_sequence_introducer:
1145           /* CSI is handled as an escape sequence of ESC '[' ...  */
1146           c1 = '[';
1147           goto label_escape_sequence;
1148
1149         case ISO_escape:
1150           ONE_MORE_BYTE (c1);
1151         label_escape_sequence:
1152           /* Escape sequences handled by Emacs are invocation,
1153              designation, direction specification, and character
1154              composition specification.  */
1155           switch (c1)
1156             {
1157             case '&':           /* revision of following character set */
1158               ONE_MORE_BYTE (c1);
1159               if (!(c1 >= '@' && c1 <= '~'))
1160                 goto label_invalid_code;
1161               ONE_MORE_BYTE (c1);
1162               if (c1 != ISO_CODE_ESC)
1163                 goto label_invalid_code;
1164               ONE_MORE_BYTE (c1);
1165               goto label_escape_sequence;
1166
1167             case '$':           /* designation of 2-byte character set */
1168               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1169                 goto label_invalid_code;
1170               ONE_MORE_BYTE (c1);
1171               if (c1 >= '@' && c1 <= 'B')
1172                 {       /* designation of JISX0208.1978, GB2312.1980,
1173                                    or JISX0208.1980 */
1174                   DECODE_DESIGNATION (0, 2, 94, c1);
1175                 }
1176               else if (c1 >= 0x28 && c1 <= 0x2B)
1177                 {       /* designation of DIMENSION2_CHARS94 character set */
1178                   ONE_MORE_BYTE (c2);
1179                   DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
1180                 }
1181               else if (c1 >= 0x2C && c1 <= 0x2F)
1182                 {       /* designation of DIMENSION2_CHARS96 character set */
1183                   ONE_MORE_BYTE (c2);
1184                   DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
1185                 }
1186               else
1187                 goto label_invalid_code;
1188               break;
1189
1190             case 'n':           /* invocation of locking-shift-2 */
1191               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1192                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1193                 goto label_invalid_code;
1194               CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
1195               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1196               break;
1197
1198             case 'o':           /* invocation of locking-shift-3 */
1199               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1200                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1201                 goto label_invalid_code;
1202               CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
1203               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1204               break;
1205
1206             case 'N':           /* invocation of single-shift-2 */
1207               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1208                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1209                 goto label_invalid_code;
1210               ONE_MORE_BYTE (c1);
1211               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
1212               DECODE_ISO_CHARACTER (charset, c1);
1213               break;
1214
1215             case 'O':           /* invocation of single-shift-3 */
1216               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1217                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1218                 goto label_invalid_code;
1219               ONE_MORE_BYTE (c1);
1220               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
1221               DECODE_ISO_CHARACTER (charset, c1);
1222               break;
1223
1224             case '0': case '2': /* start composing */
1225               /* Before processing composing, we must be sure that all
1226                  characters being composed are supported by CODING.
1227                  If not, we must give up composing and insert the
1228                  bunch of codes for composing as is without decoding.  */
1229               {
1230                 int result1;
1231
1232                 result1 = check_composing_code (coding, src, src_end);
1233                 if (result1 == 0)
1234                   {
1235                     coding->composing = (c1 == '0'
1236                                          ? COMPOSING_NO_RULE_HEAD
1237                                          : COMPOSING_WITH_RULE_HEAD);
1238                     coding->produced_char++;
1239                   }
1240                 else if (result1 > 0)
1241                   {
1242                     if (result1 + 2 < (dst_bytes ? dst_end : src_base) - dst)
1243                       {
1244                         bcopy (src_base, dst, result1 + 2);
1245                         src += result1;
1246                         dst += result1 + 2;
1247                         coding->produced_char += result1 + 2;
1248                       }
1249                     else
1250                       {
1251                         result = CODING_FINISH_INSUFFICIENT_DST;
1252                         goto label_end_of_loop_2;
1253                       }
1254                   }
1255                 else
1256                   goto label_end_of_loop;
1257               }
1258               break;
1259
1260             case '1':           /* end composing */
1261               coding->composing = COMPOSING_NO;
1262               break;
1263
1264             case '[':           /* specification of direction */
1265               if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
1266                 goto label_invalid_code;
1267               /* For the moment, nested direction is not supported.
1268                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
1269                  left-to-right, and nozero means right-to-left.  */
1270               ONE_MORE_BYTE (c1);
1271               switch (c1)
1272                 {
1273                 case ']':       /* end of the current direction */
1274                   coding->mode &= ~CODING_MODE_DIRECTION;
1275
1276                 case '0':       /* end of the current direction */
1277                 case '1':       /* start of left-to-right direction */
1278                   ONE_MORE_BYTE (c1);
1279                   if (c1 == ']')
1280                     coding->mode &= ~CODING_MODE_DIRECTION;
1281                   else
1282                     goto label_invalid_code;
1283                   break;
1284
1285                 case '2':       /* start of right-to-left direction */
1286                   ONE_MORE_BYTE (c1);
1287                   if (c1 == ']')
1288                     coding->mode |= CODING_MODE_DIRECTION;
1289                   else
1290                     goto label_invalid_code;
1291                   break;
1292
1293                 default:
1294                   goto label_invalid_code;
1295                 }
1296               break;
1297
1298             default:
1299               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1300                 goto label_invalid_code;
1301               if (c1 >= 0x28 && c1 <= 0x2B)
1302                 {       /* designation of DIMENSION1_CHARS94 character set */
1303                   ONE_MORE_BYTE (c2);
1304                   DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
1305                 }
1306               else if (c1 >= 0x2C && c1 <= 0x2F)
1307                 {       /* designation of DIMENSION1_CHARS96 character set */
1308                   ONE_MORE_BYTE (c2);
1309                   DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
1310                 }
1311               else
1312                 {
1313                   goto label_invalid_code;
1314                 }
1315             }
1316           /* We must update these variables now.  */
1317           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1318           charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1319           break;
1320
1321         label_invalid_code:
1322           while (src_base < src)
1323             *dst++ = *src_base++;
1324           coding->fake_multibyte = 1;
1325         }
1326       continue;
1327
1328     label_end_of_loop:
1329       result = CODING_FINISH_INSUFFICIENT_SRC;
1330     label_end_of_loop_2:
1331       src = src_base;
1332       break;
1333     }
1334
1335   if (src < src_end)
1336     {
1337       if (result == CODING_FINISH_NORMAL)
1338         result = CODING_FINISH_INSUFFICIENT_DST;
1339       else if (result != CODING_FINISH_INCONSISTENT_EOL
1340                && coding->mode & CODING_MODE_LAST_BLOCK)
1341         {
1342           /* This is the last block of the text to be decoded.  We had
1343              better just flush out all remaining codes in the text
1344              although they are not valid characters.  */
1345           src_bytes = src_end - src;
1346           if (dst_bytes && (dst_end - dst < src_bytes))
1347             src_bytes = dst_end - dst;
1348           bcopy (src, dst, src_bytes);
1349           dst += src_bytes;
1350           src += src_bytes;
1351           coding->fake_multibyte = 1;
1352         }
1353     }
1354
1355   coding->consumed = coding->consumed_char = src - source;
1356   coding->produced = dst - destination;
1357   return result;
1358 }
1359
1360 /* ISO2022 encoding stuff.  */
1361
1362 /*
1363    It is not enough to say just "ISO2022" on encoding, we have to
1364    specify more details.  In Emacs, each coding system of ISO2022
1365    variant has the following specifications:
1366         1. Initial designation to G0 thru G3.
1367         2. Allows short-form designation?
1368         3. ASCII should be designated to G0 before control characters?
1369         4. ASCII should be designated to G0 at end of line?
1370         5. 7-bit environment or 8-bit environment?
1371         6. Use locking-shift?
1372         7. Use Single-shift?
1373    And the following two are only for Japanese:
1374         8. Use ASCII in place of JIS0201-1976-Roman?
1375         9. Use JISX0208-1983 in place of JISX0208-1978?
1376    These specifications are encoded in `coding->flags' as flag bits
1377    defined by macros CODING_FLAG_ISO_XXX.  See `coding.h' for more
1378    details.
1379 */
1380
1381 /* Produce codes (escape sequence) for designating CHARSET to graphic
1382    register REG.  If <final-char> of CHARSET is '@', 'A', or 'B' and
1383    the coding system CODING allows, produce designation sequence of
1384    short-form.  */
1385
1386 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
1387   do {                                                                  \
1388     unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset);        \
1389     char *intermediate_char_94 = "()*+";                                \
1390     char *intermediate_char_96 = ",-./";                                \
1391     int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset);    \
1392     if (revision < 255)                                                 \
1393       {                                                                 \
1394         *dst++ = ISO_CODE_ESC;                                          \
1395         *dst++ = '&';                                                   \
1396         *dst++ = '@' + revision;                                        \
1397       }                                                                 \
1398     *dst++ = ISO_CODE_ESC;                                              \
1399     if (CHARSET_DIMENSION (charset) == 1)                               \
1400       {                                                                 \
1401         if (CHARSET_CHARS (charset) == 94)                              \
1402           *dst++ = (unsigned char) (intermediate_char_94[reg]);         \
1403         else                                                            \
1404           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1405       }                                                                 \
1406     else                                                                \
1407       {                                                                 \
1408         *dst++ = '$';                                                   \
1409         if (CHARSET_CHARS (charset) == 94)                              \
1410           {                                                             \
1411             if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM)          \
1412                 || reg != 0                                             \
1413                 || final_char < '@' || final_char > 'B')                \
1414               *dst++ = (unsigned char) (intermediate_char_94[reg]);     \
1415           }                                                             \
1416         else                                                            \
1417           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1418       }                                                                 \
1419     *dst++ = final_char;                                                \
1420     CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;                \
1421   } while (0)
1422
1423 /* The following two macros produce codes (control character or escape
1424    sequence) for ISO2022 single-shift functions (single-shift-2 and
1425    single-shift-3).  */
1426
1427 #define ENCODE_SINGLE_SHIFT_2                           \
1428   do {                                                  \
1429     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1430       *dst++ = ISO_CODE_ESC, *dst++ = 'N';              \
1431     else                                                \
1432       {                                                 \
1433         *dst++ = ISO_CODE_SS2;                          \
1434         coding->fake_multibyte = 1;                     \
1435       }                                                 \
1436     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1437   } while (0)
1438
1439 #define ENCODE_SINGLE_SHIFT_3                           \
1440   do {                                                  \
1441     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1442       *dst++ = ISO_CODE_ESC, *dst++ = 'O';              \
1443     else                                                \
1444       {                                                 \
1445         *dst++ = ISO_CODE_SS3;                          \
1446         coding->fake_multibyte = 1;                     \
1447       }                                                 \
1448     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1449   } while (0)
1450
1451 /* The following four macros produce codes (control character or
1452    escape sequence) for ISO2022 locking-shift functions (shift-in,
1453    shift-out, locking-shift-2, and locking-shift-3).  */
1454
1455 #define ENCODE_SHIFT_IN                         \
1456   do {                                          \
1457     *dst++ = ISO_CODE_SI;                       \
1458     CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
1459   } while (0)
1460
1461 #define ENCODE_SHIFT_OUT                        \
1462   do {                                          \
1463     *dst++ = ISO_CODE_SO;                       \
1464     CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
1465   } while (0)
1466
1467 #define ENCODE_LOCKING_SHIFT_2                  \
1468   do {                                          \
1469     *dst++ = ISO_CODE_ESC, *dst++ = 'n';        \
1470     CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
1471   } while (0)
1472
1473 #define ENCODE_LOCKING_SHIFT_3                  \
1474   do {                                          \
1475     *dst++ = ISO_CODE_ESC, *dst++ = 'o';        \
1476     CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
1477   } while (0)
1478
1479 /* Produce codes for a DIMENSION1 character whose character set is
1480    CHARSET and whose position-code is C1.  Designation and invocation
1481    sequences are also produced in advance if necessary.  */
1482
1483
1484 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
1485   do {                                                                  \
1486     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1487       {                                                                 \
1488         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1489           *dst++ = c1 & 0x7F;                                           \
1490         else                                                            \
1491           *dst++ = c1 | 0x80;                                           \
1492         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1493         break;                                                          \
1494       }                                                                 \
1495     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1496       {                                                                 \
1497         *dst++ = c1 & 0x7F;                                             \
1498         break;                                                          \
1499       }                                                                 \
1500     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1501       {                                                                 \
1502         *dst++ = c1 | 0x80;                                             \
1503         break;                                                          \
1504       }                                                                 \
1505     else if (coding->flags & CODING_FLAG_ISO_SAFE                       \
1506              && !coding->safe_charsets[charset])                        \
1507       {                                                                 \
1508         /* We should not encode this character, instead produce one or  \
1509            two `?'s.  */                                                \
1510         *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;                 \
1511         if (CHARSET_WIDTH (charset) == 2)                               \
1512           *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;               \
1513         break;                                                          \
1514       }                                                                 \
1515     else                                                                \
1516       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1517          must invoke it, or, at first, designate it to some graphic     \
1518          register.  Then repeat the loop to actually produce the        \
1519          character.  */                                                 \
1520       dst = encode_invocation_designation (charset, coding, dst);       \
1521   } while (1)
1522
1523 /* Produce codes for a DIMENSION2 character whose character set is
1524    CHARSET and whose position-codes are C1 and C2.  Designation and
1525    invocation codes are also produced in advance if necessary.  */
1526
1527 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
1528   do {                                                                  \
1529     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1530       {                                                                 \
1531         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1532           *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F;                       \
1533         else                                                            \
1534           *dst++ = c1 | 0x80, *dst++ = c2 | 0x80;                       \
1535         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1536         break;                                                          \
1537       }                                                                 \
1538     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1539       {                                                                 \
1540         *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F;                          \
1541         break;                                                          \
1542       }                                                                 \
1543     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1544       {                                                                 \
1545         *dst++ = c1 | 0x80, *dst++= c2 | 0x80;                          \
1546         break;                                                          \
1547       }                                                                 \
1548     else if (coding->flags & CODING_FLAG_ISO_SAFE                       \
1549              && !coding->safe_charsets[charset])                        \
1550       {                                                                 \
1551         /* We should not encode this character, instead produce one or  \
1552            two `?'s.  */                                                \
1553         *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;                 \
1554         if (CHARSET_WIDTH (charset) == 2)                               \
1555           *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;               \
1556         break;                                                          \
1557       }                                                                 \
1558     else                                                                \
1559       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1560          must invoke it, or, at first, designate it to some graphic     \
1561          register.  Then repeat the loop to actually produce the        \
1562          character.  */                                                 \
1563       dst = encode_invocation_designation (charset, coding, dst);       \
1564   } while (1)
1565
1566 #define ENCODE_ISO_CHARACTER(charset, c1, c2)                   \
1567   do {                                                          \
1568     int c_alt, charset_alt;                                     \
1569     if (!NILP (translation_table)                               \
1570         && ((c_alt = translate_char (translation_table, -1,     \
1571                                      charset, c1, c2))          \
1572             >= 0))                                              \
1573       SPLIT_CHAR (c_alt, charset_alt, c1, c2);                  \
1574     else                                                        \
1575       charset_alt = charset;                                    \
1576     if (CHARSET_DIMENSION (charset_alt) == 1)                   \
1577       {                                                         \
1578         if (charset == CHARSET_ASCII                            \
1579             && coding->flags & CODING_FLAG_ISO_USE_ROMAN)       \
1580           charset_alt = charset_latin_jisx0201;                 \
1581         ENCODE_ISO_CHARACTER_DIMENSION1 (charset_alt, c1);      \
1582       }                                                         \
1583     else                                                        \
1584       {                                                         \
1585         if (charset == charset_jisx0208                         \
1586             && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)      \
1587           charset_alt = charset_jisx0208_1978;                  \
1588         ENCODE_ISO_CHARACTER_DIMENSION2 (charset_alt, c1, c2);  \
1589       }                                                         \
1590     if (! COMPOSING_P (coding->composing))                      \
1591       coding->consumed_char++;                                  \
1592   } while (0)
1593
1594 /* Produce designation and invocation codes at a place pointed by DST
1595    to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
1596    Return new DST.  */
1597
1598 unsigned char *
1599 encode_invocation_designation (charset, coding, dst)
1600      int charset;
1601      struct coding_system *coding;
1602      unsigned char *dst;
1603 {
1604   int reg;                      /* graphic register number */
1605
1606   /* At first, check designations.  */
1607   for (reg = 0; reg < 4; reg++)
1608     if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
1609       break;
1610
1611   if (reg >= 4)
1612     {
1613       /* CHARSET is not yet designated to any graphic registers.  */
1614       /* At first check the requested designation.  */
1615       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1616       if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
1617         /* Since CHARSET requests no special designation, designate it
1618            to graphic register 0.  */
1619         reg = 0;
1620
1621       ENCODE_DESIGNATION (charset, reg, coding);
1622     }
1623
1624   if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
1625       && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
1626     {
1627       /* Since the graphic register REG is not invoked to any graphic
1628          planes, invoke it to graphic plane 0.  */
1629       switch (reg)
1630         {
1631         case 0:                 /* graphic register 0 */
1632           ENCODE_SHIFT_IN;
1633           break;
1634
1635         case 1:                 /* graphic register 1 */
1636           ENCODE_SHIFT_OUT;
1637           break;
1638
1639         case 2:                 /* graphic register 2 */
1640           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1641             ENCODE_SINGLE_SHIFT_2;
1642           else
1643             ENCODE_LOCKING_SHIFT_2;
1644           break;
1645
1646         case 3:                 /* graphic register 3 */
1647           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1648             ENCODE_SINGLE_SHIFT_3;
1649           else
1650             ENCODE_LOCKING_SHIFT_3;
1651           break;
1652         }
1653     }
1654   return dst;
1655 }
1656
1657 /* The following two macros produce codes for indicating composition.  */
1658 #define ENCODE_COMPOSITION_NO_RULE_START  *dst++ = ISO_CODE_ESC, *dst++ = '0'
1659 #define ENCODE_COMPOSITION_WITH_RULE_START  *dst++ = ISO_CODE_ESC, *dst++ = '2'
1660 #define ENCODE_COMPOSITION_END    *dst++ = ISO_CODE_ESC, *dst++ = '1'
1661
1662 /* The following three macros produce codes for indicating direction
1663    of text.  */
1664 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER              \
1665   do {                                                  \
1666     if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS)    \
1667       *dst++ = ISO_CODE_ESC, *dst++ = '[';              \
1668     else                                                \
1669       *dst++ = ISO_CODE_CSI;                            \
1670   } while (0)
1671
1672 #define ENCODE_DIRECTION_R2L    \
1673   ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '2', *dst++ = ']'
1674
1675 #define ENCODE_DIRECTION_L2R    \
1676   ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '0', *dst++ = ']'
1677
1678 /* Produce codes for designation and invocation to reset the graphic
1679    planes and registers to initial state.  */
1680 #define ENCODE_RESET_PLANE_AND_REGISTER                                     \
1681   do {                                                                      \
1682     int reg;                                                                \
1683     if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                        \
1684       ENCODE_SHIFT_IN;                                                      \
1685     for (reg = 0; reg < 4; reg++)                                           \
1686       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0            \
1687           && (CODING_SPEC_ISO_DESIGNATION (coding, reg)                     \
1688               != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)))        \
1689         ENCODE_DESIGNATION                                                  \
1690           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
1691   } while (0)
1692
1693 /* Produce designation sequences of charsets in the line started from
1694    SRC to a place pointed by *DSTP, and update DSTP.
1695
1696    If the current block ends before any end-of-line, we may fail to
1697    find all the necessary designations.  */
1698
1699 void
1700 encode_designation_at_bol (coding, table, src, src_end, dstp)
1701      struct coding_system *coding;
1702      Lisp_Object table;
1703      unsigned char *src, *src_end, **dstp;
1704 {
1705   int charset, c, found = 0, reg;
1706   /* Table of charsets to be designated to each graphic register.  */
1707   int r[4];
1708   unsigned char *dst = *dstp;
1709
1710   for (reg = 0; reg < 4; reg++)
1711     r[reg] = -1;
1712
1713   while (src < src_end && *src != '\n' && found < 4)
1714     {
1715       int bytes = BYTES_BY_CHAR_HEAD (*src);
1716
1717       if (NILP (table))
1718         charset = CHARSET_AT (src);
1719       else
1720         {
1721           int c_alt;
1722           unsigned char c1, c2;
1723
1724           SPLIT_STRING(src, bytes, charset, c1, c2);
1725           if ((c_alt = translate_char (table, -1, charset, c1, c2)) >= 0)
1726             charset = CHAR_CHARSET (c_alt);
1727         }
1728
1729       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1730       if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
1731         {
1732           found++;
1733           r[reg] = charset;
1734         }
1735
1736       src += bytes;
1737     }
1738
1739   if (found)
1740     {
1741       for (reg = 0; reg < 4; reg++)
1742         if (r[reg] >= 0
1743             && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
1744           ENCODE_DESIGNATION (r[reg], reg, coding);
1745       *dstp = dst;
1746     }
1747 }
1748
1749 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
1750
1751 int
1752 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1753      struct coding_system *coding;
1754      unsigned char *source, *destination;
1755      int src_bytes, dst_bytes;
1756 {
1757   unsigned char *src = source;
1758   unsigned char *src_end = source + src_bytes;
1759   unsigned char *dst = destination;
1760   unsigned char *dst_end = destination + dst_bytes;
1761   /* Since the maximum bytes produced by each loop is 20, we subtract 19
1762      from DST_END to assure overflow checking is necessary only at the
1763      head of loop.  */
1764   unsigned char *adjusted_dst_end = dst_end - 19;
1765   Lisp_Object translation_table
1766       = coding->translation_table_for_encode;
1767   int result = CODING_FINISH_NORMAL;
1768
1769   if (!NILP (Venable_character_translation) && NILP (translation_table))
1770     translation_table = Vstandard_translation_table_for_encode;
1771
1772   coding->consumed_char = 0;
1773   coding->fake_multibyte = 0;
1774   while (src < src_end && (dst_bytes
1775                            ? (dst < adjusted_dst_end)
1776                            : (dst < src - 19)))
1777     {
1778       /* SRC_BASE remembers the start position in source in each loop.
1779          The loop will be exited when there's not enough source text
1780          to analyze multi-byte codes (within macros ONE_MORE_BYTE,
1781          TWO_MORE_BYTES, and THREE_MORE_BYTES).  In that case, SRC is
1782          reset to SRC_BASE before exiting.  */
1783       unsigned char *src_base = src;
1784       int charset, c1, c2, c3, c4;
1785
1786       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
1787           && CODING_SPEC_ISO_BOL (coding))
1788         {
1789           /* We have to produce designation sequences if any now.  */
1790           encode_designation_at_bol (coding, translation_table,
1791                                      src, src_end, &dst);
1792           CODING_SPEC_ISO_BOL (coding) = 0;
1793         }
1794
1795       c1 = *src++;
1796       /* If we are seeing a component of a composite character, we are
1797          seeing a leading-code encoded irregularly for composition, or
1798          a composition rule if composing with rule.  We must set C1 to
1799          a normal leading-code or an ASCII code.  If we are not seeing
1800          a composite character, we must reset composition,
1801          designation, and invocation states.  */
1802       if (COMPOSING_P (coding->composing))
1803         {
1804           if (c1 < 0xA0)
1805             {
1806               /* We are not in a composite character any longer.  */
1807               coding->composing = COMPOSING_NO;
1808               ENCODE_RESET_PLANE_AND_REGISTER;
1809               ENCODE_COMPOSITION_END;
1810             }
1811           else
1812             {
1813               if (coding->composing == COMPOSING_WITH_RULE_RULE)
1814                 {
1815                   *dst++ = c1 & 0x7F;
1816                   coding->composing = COMPOSING_WITH_RULE_HEAD;
1817                   continue;
1818                 }
1819               else if (coding->composing == COMPOSING_WITH_RULE_HEAD)
1820                 coding->composing = COMPOSING_WITH_RULE_RULE;
1821               if (c1 == 0xA0)
1822                 {
1823                   /* This is an ASCII component.  */
1824                   ONE_MORE_BYTE (c1);
1825                   c1 &= 0x7F;
1826                 }
1827               else
1828                 /* This is a leading-code of non ASCII component.  */
1829                 c1 -= 0x20;
1830             }
1831         }
1832
1833       /* Now encode one character.  C1 is a control character, an
1834          ASCII character, or a leading-code of multi-byte character.  */
1835       switch (emacs_code_class[c1])
1836         {
1837         case EMACS_ascii_code:
1838           ENCODE_ISO_CHARACTER (CHARSET_ASCII, c1, /* dummy */ c2);
1839           break;
1840
1841         case EMACS_control_code:
1842           if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
1843             ENCODE_RESET_PLANE_AND_REGISTER;
1844           *dst++ = c1;
1845           coding->consumed_char++;
1846           break;
1847
1848         case EMACS_carriage_return_code:
1849           if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
1850             {
1851               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
1852                 ENCODE_RESET_PLANE_AND_REGISTER;
1853               *dst++ = c1;
1854               coding->consumed_char++;
1855               break;
1856             }
1857           /* fall down to treat '\r' as '\n' ...  */
1858
1859         case EMACS_linefeed_code:
1860           if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
1861             ENCODE_RESET_PLANE_AND_REGISTER;
1862           if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
1863             bcopy (coding->spec.iso2022.initial_designation,
1864                    coding->spec.iso2022.current_designation,
1865                    sizeof coding->spec.iso2022.initial_designation);
1866           if (coding->eol_type == CODING_EOL_LF
1867               || coding->eol_type == CODING_EOL_UNDECIDED)
1868             *dst++ = ISO_CODE_LF;
1869           else if (coding->eol_type == CODING_EOL_CRLF)
1870             *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
1871           else
1872             *dst++ = ISO_CODE_CR;
1873           CODING_SPEC_ISO_BOL (coding) = 1;
1874           coding->consumed_char++;
1875           break;
1876
1877         case EMACS_leading_code_2:
1878           ONE_MORE_BYTE (c2);
1879           if (c2 < 0xA0)
1880             {
1881               /* invalid sequence */
1882               *dst++ = c1;
1883               src--;
1884               coding->consumed_char++;
1885             }
1886           else
1887             ENCODE_ISO_CHARACTER (c1, c2, /* dummy */ c3);
1888           break;
1889
1890         case EMACS_leading_code_3:
1891           TWO_MORE_BYTES (c2, c3);
1892           if (c2 < 0xA0 || c3 < 0xA0)
1893             {
1894               /* invalid sequence */
1895               *dst++ = c1;
1896               src -= 2;
1897               coding->consumed_char++;
1898             }
1899           else if (c1 < LEADING_CODE_PRIVATE_11)
1900             ENCODE_ISO_CHARACTER (c1, c2, c3);
1901           else
1902             ENCODE_ISO_CHARACTER (c2, c3, /* dummy */ c4);
1903           break;
1904
1905         case EMACS_leading_code_4:
1906           THREE_MORE_BYTES (c2, c3, c4);
1907           if (c2 < 0xA0 || c3 < 0xA0 || c4 < 0xA0)
1908             {
1909               /* invalid sequence */
1910               *dst++ = c1;
1911               src -= 3;
1912               coding->consumed_char++;
1913             }
1914           else
1915             ENCODE_ISO_CHARACTER (c2, c3, c4);
1916           break;
1917
1918         case EMACS_leading_code_composition:
1919           ONE_MORE_BYTE (c2);
1920           if (c2 < 0xA0)
1921             {
1922               /* invalid sequence */
1923               *dst++ = c1;
1924               src--;
1925               coding->consumed_char++;
1926             }
1927           else if (c2 == 0xFF)
1928             {
1929               ENCODE_RESET_PLANE_AND_REGISTER;
1930               coding->composing = COMPOSING_WITH_RULE_HEAD;
1931               ENCODE_COMPOSITION_WITH_RULE_START;
1932               coding->consumed_char++;
1933             }
1934           else
1935             {
1936               ENCODE_RESET_PLANE_AND_REGISTER;
1937               /* Rewind one byte because it is a character code of
1938                  composition elements.  */
1939               src--;
1940               coding->composing = COMPOSING_NO_RULE_HEAD;
1941               ENCODE_COMPOSITION_NO_RULE_START;
1942               coding->consumed_char++;
1943             }
1944           break;
1945
1946         case EMACS_invalid_code:
1947           *dst++ = c1;
1948           coding->consumed_char++;
1949           break;
1950         }
1951       continue;
1952     label_end_of_loop:
1953       result = CODING_FINISH_INSUFFICIENT_SRC;
1954       src = src_base;
1955       break;
1956     }
1957
1958   if (src < src_end && result == CODING_FINISH_NORMAL)
1959     result = CODING_FINISH_INSUFFICIENT_DST;
1960
1961   /* If this is the last block of the text to be encoded, we must
1962      reset graphic planes and registers to the initial state, and
1963      flush out the carryover if any.  */
1964   if (coding->mode & CODING_MODE_LAST_BLOCK)
1965     {
1966       ENCODE_RESET_PLANE_AND_REGISTER;
1967       if (COMPOSING_P (coding->composing))
1968         ENCODE_COMPOSITION_END;
1969     }
1970   coding->consumed = src - source;
1971   coding->produced = coding->produced_char = dst - destination;
1972   return result;
1973 }
1974
1975 \f
1976 /*** 4. SJIS and BIG5 handlers ***/
1977
1978 /* Although SJIS and BIG5 are not ISO's coding system, they are used
1979    quite widely.  So, for the moment, Emacs supports them in the bare
1980    C code.  But, in the future, they may be supported only by CCL.  */
1981
1982 /* SJIS is a coding system encoding three character sets: ASCII, right
1983    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
1984    as is.  A character of charset katakana-jisx0201 is encoded by
1985    "position-code + 0x80".  A character of charset japanese-jisx0208
1986    is encoded in 2-byte but two position-codes are divided and shifted
1987    so that it fit in the range below.
1988
1989    --- CODE RANGE of SJIS ---
1990    (character set)      (range)
1991    ASCII                0x00 .. 0x7F
1992    KATAKANA-JISX0201    0xA0 .. 0xDF
1993    JISX0208 (1st byte)  0x80 .. 0x9F and 0xE0 .. 0xEF
1994             (2nd byte)  0x40 .. 0xFF
1995    -------------------------------
1996
1997 */
1998
1999 /* BIG5 is a coding system encoding two character sets: ASCII and
2000    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
2001    character set and is encoded in two-byte.
2002
2003    --- CODE RANGE of BIG5 ---
2004    (character set)      (range)
2005    ASCII                0x00 .. 0x7F
2006    Big5 (1st byte)      0xA1 .. 0xFE
2007         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
2008    --------------------------
2009
2010    Since the number of characters in Big5 is larger than maximum
2011    characters in Emacs' charset (96x96), it can't be handled as one
2012    charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
2013    and `charset-big5-2'.  Both are DIMENSION2 and CHARS94.  The former
2014    contains frequently used characters and the latter contains less
2015    frequently used characters.  */
2016
2017 /* Macros to decode or encode a character of Big5 in BIG5.  B1 and B2
2018    are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2019    C1 and C2 are the 1st and 2nd position-codes of of Emacs' internal
2020    format.  CHARSET is `charset_big5_1' or `charset_big5_2'.  */
2021
2022 /* Number of Big5 characters which have the same code in 1st byte.  */
2023 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2024
2025 #define DECODE_BIG5(b1, b2, charset, c1, c2)                            \
2026   do {                                                                  \
2027     unsigned int temp                                                   \
2028       = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62);   \
2029     if (b1 < 0xC9)                                                      \
2030       charset = charset_big5_1;                                         \
2031     else                                                                \
2032       {                                                                 \
2033         charset = charset_big5_2;                                       \
2034         temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW;                          \
2035       }                                                                 \
2036     c1 = temp / (0xFF - 0xA1) + 0x21;                                   \
2037     c2 = temp % (0xFF - 0xA1) + 0x21;                                   \
2038   } while (0)
2039
2040 #define ENCODE_BIG5(charset, c1, c2, b1, b2)                            \
2041   do {                                                                  \
2042     unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21);      \
2043     if (charset == charset_big5_2)                                      \
2044       temp += BIG5_SAME_ROW * (0xC9 - 0xA1);                            \
2045     b1 = temp / BIG5_SAME_ROW + 0xA1;                                   \
2046     b2 = temp % BIG5_SAME_ROW;                                          \
2047     b2 += b2 < 0x3F ? 0x40 : 0x62;                                      \
2048   } while (0)
2049
2050 #define DECODE_SJIS_BIG5_CHARACTER(charset, c1, c2)                     \
2051   do {                                                                  \
2052     int c_alt, charset_alt = (charset);                                 \
2053     if (!NILP (translation_table)                                       \
2054         && ((c_alt = translate_char (translation_table,                 \
2055                                      -1, (charset), c1, c2)) >= 0))     \
2056           SPLIT_CHAR (c_alt, charset_alt, c1, c2);                      \
2057     if (charset_alt == CHARSET_ASCII || charset_alt < 0)                \
2058       DECODE_CHARACTER_ASCII (c1);                                      \
2059     else if (CHARSET_DIMENSION (charset_alt) == 1)                      \
2060       DECODE_CHARACTER_DIMENSION1 (charset_alt, c1);                    \
2061     else                                                                \
2062       DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2);                \
2063   } while (0)
2064
2065 #define ENCODE_SJIS_BIG5_CHARACTER(charset, c1, c2)             \
2066   do {                                                          \
2067     int c_alt, charset_alt;                                     \
2068     if (!NILP (translation_table)                               \
2069         && ((c_alt = translate_char (translation_table, -1,     \
2070                                      charset, c1, c2))          \
2071             >= 0))                                              \
2072       SPLIT_CHAR (c_alt, charset_alt, c1, c2);                  \
2073     else                                                        \
2074       charset_alt = charset;                                    \
2075     if (charset_alt == charset_ascii)                           \
2076       *dst++ = c1;                                              \
2077     else if (CHARSET_DIMENSION (charset_alt) == 1)              \
2078       {                                                         \
2079         if (sjis_p && charset_alt == charset_katakana_jisx0201) \
2080           *dst++ = c1;                                          \
2081         else                                                    \
2082           {                                                     \
2083             *dst++ = charset_alt, *dst++ = c1;                  \
2084             coding->fake_multibyte = 1;                         \
2085           }                                                     \
2086       }                                                         \
2087     else                                                        \
2088       {                                                         \
2089         c1 &= 0x7F, c2 &= 0x7F;                                 \
2090         if (sjis_p && charset_alt == charset_jisx0208)          \
2091           {                                                     \
2092             unsigned char s1, s2;                               \
2093                                                                 \
2094             ENCODE_SJIS (c1, c2, s1, s2);                       \
2095             *dst++ = s1, *dst++ = s2;                           \
2096             coding->fake_multibyte = 1;                         \
2097           }                                                     \
2098         else if (!sjis_p                                        \
2099                  && (charset_alt == charset_big5_1              \
2100                      || charset_alt == charset_big5_2))         \
2101           {                                                     \
2102             unsigned char b1, b2;                               \
2103                                                                 \
2104             ENCODE_BIG5 (charset_alt, c1, c2, b1, b2);          \
2105             *dst++ = b1, *dst++ = b2;                           \
2106           }                                                     \
2107         else                                                    \
2108           {                                                     \
2109             *dst++ = charset_alt, *dst++ = c1, *dst++ = c2;     \
2110             coding->fake_multibyte = 1;                         \
2111           }                                                     \
2112       }                                                         \
2113     coding->consumed_char++;                                    \
2114   } while (0);
2115
2116 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2117    Check if a text is encoded in SJIS.  If it is, return
2118    CODING_CATEGORY_MASK_SJIS, else return 0.  */
2119
2120 int
2121 detect_coding_sjis (src, src_end)
2122      unsigned char *src, *src_end;
2123 {
2124   unsigned char c;
2125
2126   while (src < src_end)
2127     {
2128       c = *src++;
2129       if ((c >= 0x80 && c < 0xA0) || c >= 0xE0)
2130         {
2131           if (src < src_end && *src++ < 0x40)
2132             return 0;
2133         }
2134     }
2135   return CODING_CATEGORY_MASK_SJIS;
2136 }
2137
2138 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2139    Check if a text is encoded in BIG5.  If it is, return
2140    CODING_CATEGORY_MASK_BIG5, else return 0.  */
2141
2142 int
2143 detect_coding_big5 (src, src_end)
2144      unsigned char *src, *src_end;
2145 {
2146   unsigned char c;
2147
2148   while (src < src_end)
2149     {
2150       c = *src++;
2151       if (c >= 0xA1)
2152         {
2153           if (src >= src_end)
2154             break;
2155           c = *src++;
2156           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
2157             return 0;
2158         }
2159     }
2160   return CODING_CATEGORY_MASK_BIG5;
2161 }
2162
2163 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2164    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
2165
2166 int
2167 decode_coding_sjis_big5 (coding, source, destination,
2168                          src_bytes, dst_bytes, sjis_p)
2169      struct coding_system *coding;
2170      unsigned char *source, *destination;
2171      int src_bytes, dst_bytes;
2172      int sjis_p;
2173 {
2174   unsigned char *src = source;
2175   unsigned char *src_end = source + src_bytes;
2176   unsigned char *dst = destination;
2177   unsigned char *dst_end = destination + dst_bytes;
2178   /* Since the maximum bytes produced by each loop is 4, we subtract 3
2179      from DST_END to assure overflow checking is necessary only at the
2180      head of loop.  */
2181   unsigned char *adjusted_dst_end = dst_end - 3;
2182   Lisp_Object translation_table
2183       = coding->translation_table_for_decode;
2184   int result = CODING_FINISH_NORMAL;
2185
2186   if (!NILP (Venable_character_translation) && NILP (translation_table))
2187     translation_table = Vstandard_translation_table_for_decode;
2188
2189   coding->produced_char = 0;
2190   coding->fake_multibyte = 0;
2191   while (src < src_end && (dst_bytes
2192                            ? (dst < adjusted_dst_end)
2193                            : (dst < src - 3)))
2194     {
2195       /* SRC_BASE remembers the start position in source in each loop.
2196          The loop will be exited when there's not enough source text
2197          to analyze two-byte character (within macro ONE_MORE_BYTE).
2198          In that case, SRC is reset to SRC_BASE before exiting.  */
2199       unsigned char *src_base = src;
2200       unsigned char c1 = *src++, c2, c3, c4;
2201
2202       if (c1 < 0x20)
2203         {
2204           if (c1 == '\r')
2205             {
2206               if (coding->eol_type == CODING_EOL_CRLF)
2207                 {
2208                   ONE_MORE_BYTE (c2);
2209                   if (c2 == '\n')
2210                     *dst++ = c2;
2211                   else if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2212                     {
2213                       result = CODING_FINISH_INCONSISTENT_EOL;
2214                       goto label_end_of_loop_2;
2215                     }
2216                   else
2217                     /* To process C2 again, SRC is subtracted by 1.  */
2218                     *dst++ = c1, src--;
2219                 }
2220               else if (coding->eol_type == CODING_EOL_CR)
2221                 *dst++ = '\n';
2222               else
2223                 *dst++ = c1;
2224             }
2225           else if (c1 == '\n'
2226                    && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2227                    && (coding->eol_type == CODING_EOL_CR
2228                        || coding->eol_type == CODING_EOL_CRLF))
2229             {
2230               result = CODING_FINISH_INCONSISTENT_EOL;
2231               goto label_end_of_loop_2;
2232             }
2233           else
2234             *dst++ = c1;
2235           coding->produced_char++;
2236         }
2237       else if (c1 < 0x80)
2238         DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
2239       else
2240         {
2241           if (sjis_p)
2242             {
2243               if (c1 < 0xA0 || (c1 >= 0xE0 && c1 < 0xF0))
2244                 {
2245                   /* SJIS -> JISX0208 */
2246                   ONE_MORE_BYTE (c2);
2247                   if (c2 >= 0x40)
2248                     {
2249                       DECODE_SJIS (c1, c2, c3, c4);
2250                       DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
2251                     }
2252                   else
2253                     goto label_invalid_code_2;
2254                 }
2255               else if (c1 < 0xE0)
2256                 /* SJIS -> JISX0201-Kana */
2257                 DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1,
2258                                             /* dummy */ c2);
2259               else
2260                 goto label_invalid_code_1;
2261             }
2262           else
2263             {
2264               /* BIG5 -> Big5 */
2265               if (c1 >= 0xA1 && c1 <= 0xFE)
2266                 {
2267                   ONE_MORE_BYTE (c2);
2268                   if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE))
2269                     {
2270                       int charset;
2271
2272                       DECODE_BIG5 (c1, c2, charset, c3, c4);
2273                       DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
2274                     }
2275                   else
2276                     goto label_invalid_code_2;
2277                 }
2278               else
2279                 goto label_invalid_code_1;
2280             }
2281         }
2282       continue;
2283
2284     label_invalid_code_1:
2285       *dst++ = c1;
2286       coding->produced_char++;
2287       coding->fake_multibyte = 1;
2288       continue;
2289
2290     label_invalid_code_2:
2291       *dst++ = c1; *dst++= c2;
2292       coding->produced_char += 2;
2293       coding->fake_multibyte = 1;
2294       continue;
2295
2296     label_end_of_loop:
2297       result = CODING_FINISH_INSUFFICIENT_SRC;
2298     label_end_of_loop_2:
2299       src = src_base;
2300       break;
2301     }
2302
2303   if (src < src_end)
2304     {
2305       if (result == CODING_FINISH_NORMAL)
2306         result = CODING_FINISH_INSUFFICIENT_DST;
2307       else if (result != CODING_FINISH_INCONSISTENT_EOL
2308                && coding->mode & CODING_MODE_LAST_BLOCK)
2309         {
2310           src_bytes = src_end - src;
2311           if (dst_bytes && (dst_end - dst < src_bytes))
2312             src_bytes = dst_end - dst;
2313           bcopy (dst, src, src_bytes);
2314           src += src_bytes;
2315           dst += src_bytes;
2316           coding->fake_multibyte = 1;
2317         }
2318     }
2319
2320   coding->consumed = coding->consumed_char = src - source;
2321   coding->produced = dst - destination;
2322   return result;
2323 }
2324
2325 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
2326    This function can encode `charset_ascii', `charset_katakana_jisx0201',
2327    `charset_jisx0208', `charset_big5_1', and `charset_big5-2'.  We are
2328    sure that all these charsets are registered as official charset
2329    (i.e. do not have extended leading-codes).  Characters of other
2330    charsets are produced without any encoding.  If SJIS_P is 1, encode
2331    SJIS text, else encode BIG5 text.  */
2332
2333 int
2334 encode_coding_sjis_big5 (coding, source, destination,
2335                          src_bytes, dst_bytes, sjis_p)
2336      struct coding_system *coding;
2337      unsigned char *source, *destination;
2338      int src_bytes, dst_bytes;
2339      int sjis_p;
2340 {
2341   unsigned char *src = source;
2342   unsigned char *src_end = source + src_bytes;
2343   unsigned char *dst = destination;
2344   unsigned char *dst_end = destination + dst_bytes;
2345   /* Since the maximum bytes produced by each loop is 2, we subtract 1
2346      from DST_END to assure overflow checking is necessary only at the
2347      head of loop.  */
2348   unsigned char *adjusted_dst_end = dst_end - 1;
2349   Lisp_Object translation_table
2350       = coding->translation_table_for_encode;
2351   int result = CODING_FINISH_NORMAL;
2352
2353   if (!NILP (Venable_character_translation) && NILP (translation_table))
2354     translation_table = Vstandard_translation_table_for_encode;
2355
2356   coding->consumed_char = 0;
2357   coding->fake_multibyte = 0;
2358   while (src < src_end && (dst_bytes
2359                            ? (dst < adjusted_dst_end)
2360                            : (dst < src - 1)))
2361     {
2362       /* SRC_BASE remembers the start position in source in each loop.
2363          The loop will be exited when there's not enough source text
2364          to analyze multi-byte codes (within macros ONE_MORE_BYTE and
2365          TWO_MORE_BYTES).  In that case, SRC is reset to SRC_BASE
2366          before exiting.  */
2367       unsigned char *src_base = src;
2368       unsigned char c1 = *src++, c2, c3, c4;
2369
2370       if (coding->composing)
2371         {
2372           if (c1 == 0xA0)
2373             {
2374               ONE_MORE_BYTE (c1);
2375               c1 &= 0x7F;
2376             }
2377           else if (c1 >= 0xA0)
2378             c1 -= 0x20;
2379           else
2380             coding->composing = 0;
2381         }
2382
2383       switch (emacs_code_class[c1])
2384         {
2385         case EMACS_ascii_code:
2386           ENCODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
2387           break;
2388
2389         case EMACS_control_code:
2390           *dst++ = c1;
2391           coding->consumed_char++;
2392           break;
2393
2394         case EMACS_carriage_return_code:
2395           if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2396             {
2397               *dst++ = c1;
2398               coding->consumed_char++;
2399               break;
2400             }
2401           /* fall down to treat '\r' as '\n' ...  */
2402
2403         case EMACS_linefeed_code:
2404           if (coding->eol_type == CODING_EOL_LF
2405               || coding->eol_type == CODING_EOL_UNDECIDED)
2406             *dst++ = '\n';
2407           else if (coding->eol_type == CODING_EOL_CRLF)
2408             *dst++ = '\r', *dst++ = '\n';
2409           else
2410             *dst++ = '\r';
2411           coding->consumed_char++;
2412           break;
2413
2414         case EMACS_leading_code_2:
2415           ONE_MORE_BYTE (c2);
2416           ENCODE_SJIS_BIG5_CHARACTER (c1, c2, /* dummy */ c3);
2417           break;
2418
2419         case EMACS_leading_code_3:
2420           TWO_MORE_BYTES (c2, c3);
2421           ENCODE_SJIS_BIG5_CHARACTER (c1, c2, c3);
2422           break;
2423
2424         case EMACS_leading_code_4:
2425           THREE_MORE_BYTES (c2, c3, c4);
2426           ENCODE_SJIS_BIG5_CHARACTER (c2, c3, c4);
2427           break;
2428
2429         case EMACS_leading_code_composition:
2430           coding->composing = 1;
2431           break;
2432
2433         default:                /* i.e. case EMACS_invalid_code: */
2434           *dst++ = c1;
2435           coding->consumed_char++;
2436         }
2437       continue;
2438
2439     label_end_of_loop:
2440       result = CODING_FINISH_INSUFFICIENT_SRC;
2441       src = src_base;
2442       break;
2443     }
2444
2445   if (result == CODING_FINISH_NORMAL
2446       && src < src_end)
2447     result = CODING_FINISH_INSUFFICIENT_DST;
2448   coding->consumed = src - source;
2449   coding->produced = coding->produced_char = dst - destination;
2450   return result;
2451 }
2452
2453 \f
2454 /*** 5. End-of-line handlers ***/
2455
2456 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2457    This function is called only when `coding->eol_type' is
2458    CODING_EOL_CRLF or CODING_EOL_CR.  */
2459
2460 int
2461 decode_eol (coding, source, destination, src_bytes, dst_bytes)
2462      struct coding_system *coding;
2463      unsigned char *source, *destination;
2464      int src_bytes, dst_bytes;
2465 {
2466   unsigned char *src = source;
2467   unsigned char *src_end = source + src_bytes;
2468   unsigned char *dst = destination;
2469   unsigned char *dst_end = destination + dst_bytes;
2470   unsigned char c;
2471   int result = CODING_FINISH_NORMAL;
2472
2473   coding->fake_multibyte = 0;
2474
2475   if (src_bytes <= 0)
2476     return result;
2477
2478   switch (coding->eol_type)
2479     {
2480     case CODING_EOL_CRLF:
2481       {
2482         /* Since the maximum bytes produced by each loop is 2, we
2483            subtract 1 from DST_END to assure overflow checking is
2484            necessary only at the head of loop.  */
2485         unsigned char *adjusted_dst_end = dst_end - 1;
2486
2487         while (src < src_end && (dst_bytes
2488                                  ? (dst < adjusted_dst_end)
2489                                  : (dst < src - 1)))
2490           {
2491             unsigned char *src_base = src;
2492
2493             c = *src++;
2494             if (c == '\r')
2495               {
2496                 ONE_MORE_BYTE (c);
2497                 if (c != '\n')
2498                   {
2499                     if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2500                       {
2501                         result = CODING_FINISH_INCONSISTENT_EOL;
2502                         goto label_end_of_loop_2;
2503                       }
2504                     *dst++ = '\r';
2505                     if (BASE_LEADING_CODE_P (c))
2506                       coding->fake_multibyte = 1;
2507                   }
2508                 *dst++ = c;
2509               }
2510             else if (c == '\n'
2511                      && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
2512               {
2513                 result = CODING_FINISH_INCONSISTENT_EOL;
2514                 goto label_end_of_loop_2;
2515               }
2516             else
2517               {
2518                 *dst++ = c;
2519                 if (BASE_LEADING_CODE_P (c))
2520                   coding->fake_multibyte = 1;
2521               }
2522             continue;
2523
2524           label_end_of_loop:
2525             result = CODING_FINISH_INSUFFICIENT_SRC;
2526           label_end_of_loop_2:
2527             src = src_base;
2528             break;
2529           }
2530         if (result == CODING_FINISH_NORMAL
2531             && src < src_end)
2532           result = CODING_FINISH_INSUFFICIENT_DST;
2533       }
2534       break;
2535
2536     case CODING_EOL_CR:
2537       if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2538         {
2539           while (src < src_end)
2540             {
2541               if ((c = *src++) == '\n')
2542                 break;
2543               if (BASE_LEADING_CODE_P (c))
2544                 coding->fake_multibyte = 1;
2545             }
2546           if (*--src == '\n')
2547             {
2548               src_bytes = src - source;
2549               result = CODING_FINISH_INCONSISTENT_EOL;
2550             }
2551         }
2552       if (dst_bytes && src_bytes > dst_bytes)
2553         {
2554           result = CODING_FINISH_INSUFFICIENT_DST;
2555           src_bytes = dst_bytes;
2556         }
2557       if (dst_bytes)
2558         bcopy (source, destination, src_bytes);
2559       else
2560         safe_bcopy (source, destination, src_bytes);
2561       src = source + src_bytes;
2562       while (src_bytes--) if (*dst++ == '\r') dst[-1] = '\n';
2563       break;
2564
2565     default:                    /* i.e. case: CODING_EOL_LF */
2566       if (dst_bytes && src_bytes > dst_bytes)
2567         {
2568           result = CODING_FINISH_INSUFFICIENT_DST;
2569           src_bytes = dst_bytes;
2570         }
2571       if (dst_bytes)
2572         bcopy (source, destination, src_bytes);
2573       else
2574         safe_bcopy (source, destination, src_bytes);
2575       src += src_bytes;
2576       dst += src_bytes;
2577       coding->fake_multibyte = 1;
2578       break;
2579     }
2580
2581   coding->consumed = coding->consumed_char = src - source;
2582   coding->produced = coding->produced_char = dst - destination;
2583   return result;
2584 }
2585
2586 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  Encode
2587    format of end-of-line according to `coding->eol_type'.  If
2588    `coding->mode & CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code
2589    '\r' in source text also means end-of-line.  */
2590
2591 int
2592 encode_eol (coding, source, destination, src_bytes, dst_bytes)
2593      struct coding_system *coding;
2594      unsigned char *source, *destination;
2595      int src_bytes, dst_bytes;
2596 {
2597   unsigned char *src = source;
2598   unsigned char *dst = destination;
2599   int result = CODING_FINISH_NORMAL;
2600
2601   coding->fake_multibyte = 0;
2602
2603   if (coding->eol_type == CODING_EOL_CRLF)
2604     {
2605       unsigned char c;
2606       unsigned char *src_end = source + src_bytes;
2607       unsigned char *dst_end = destination + dst_bytes;
2608       /* Since the maximum bytes produced by each loop is 2, we
2609          subtract 1 from DST_END to assure overflow checking is
2610          necessary only at the head of loop.  */
2611       unsigned char *adjusted_dst_end = dst_end - 1;
2612
2613       while (src < src_end && (dst_bytes
2614                                ? (dst < adjusted_dst_end)
2615                                : (dst < src - 1)))
2616         {
2617           c = *src++;
2618           if (c == '\n'
2619               || (c == '\r' && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)))
2620             *dst++ = '\r', *dst++ = '\n';
2621           else
2622             {
2623               *dst++ = c;
2624               if (BASE_LEADING_CODE_P (c))
2625                 coding->fake_multibyte = 1;
2626             }
2627         }
2628       if (src < src_end)
2629         result = CODING_FINISH_INSUFFICIENT_DST;
2630     }
2631   else
2632     {
2633       unsigned char c;
2634
2635       if (dst_bytes && src_bytes > dst_bytes)
2636         {
2637           src_bytes = dst_bytes;
2638           result = CODING_FINISH_INSUFFICIENT_DST;
2639         }
2640       if (dst_bytes)
2641         bcopy (source, destination, src_bytes);
2642       else
2643         safe_bcopy (source, destination, src_bytes);
2644       dst_bytes = src_bytes;
2645       if (coding->eol_type == CODING_EOL_CR)
2646         {
2647           while (src_bytes--)
2648             {
2649               if ((c = *dst++) == '\n')
2650                 dst[-1] = '\r';
2651               else if (BASE_LEADING_CODE_P (c))
2652                 coding->fake_multibyte = 1;
2653             }
2654         }
2655       else
2656         {
2657           if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
2658             {
2659               while (src_bytes--)
2660                 if (*dst++ == '\r') dst[-1] = '\n';
2661             }
2662           coding->fake_multibyte = 1;
2663         }
2664       src = source + dst_bytes;
2665       dst = destination + dst_bytes;
2666     }
2667
2668   coding->consumed = coding->consumed_char = src - source;
2669   coding->produced = coding->produced_char = dst - destination;
2670   return result;
2671 }
2672
2673 \f
2674 /*** 6. C library functions ***/
2675
2676 /* In Emacs Lisp, coding system is represented by a Lisp symbol which
2677    has a property `coding-system'.  The value of this property is a
2678    vector of length 5 (called as coding-vector).  Among elements of
2679    this vector, the first (element[0]) and the fifth (element[4])
2680    carry important information for decoding/encoding.  Before
2681    decoding/encoding, this information should be set in fields of a
2682    structure of type `coding_system'.
2683
2684    A value of property `coding-system' can be a symbol of another
2685    subsidiary coding-system.  In that case, Emacs gets coding-vector
2686    from that symbol.
2687
2688    `element[0]' contains information to be set in `coding->type'.  The
2689    value and its meaning is as follows:
2690
2691    0 -- coding_type_emacs_mule
2692    1 -- coding_type_sjis
2693    2 -- coding_type_iso2022
2694    3 -- coding_type_big5
2695    4 -- coding_type_ccl encoder/decoder written in CCL
2696    nil -- coding_type_no_conversion
2697    t -- coding_type_undecided (automatic conversion on decoding,
2698                                no-conversion on encoding)
2699
2700    `element[4]' contains information to be set in `coding->flags' and
2701    `coding->spec'.  The meaning varies by `coding->type'.
2702
2703    If `coding->type' is `coding_type_iso2022', element[4] is a vector
2704    of length 32 (of which the first 13 sub-elements are used now).
2705    Meanings of these sub-elements are:
2706
2707    sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
2708         If the value is an integer of valid charset, the charset is
2709         assumed to be designated to graphic register N initially.
2710
2711         If the value is minus, it is a minus value of charset which
2712         reserves graphic register N, which means that the charset is
2713         not designated initially but should be designated to graphic
2714         register N just before encoding a character in that charset.
2715
2716         If the value is nil, graphic register N is never used on
2717         encoding.
2718
2719    sub-element[N] where N is 4 through 11: to be set in `coding->flags'
2720         Each value takes t or nil.  See the section ISO2022 of
2721         `coding.h' for more information.
2722
2723    If `coding->type' is `coding_type_big5', element[4] is t to denote
2724    BIG5-ETen or nil to denote BIG5-HKU.
2725
2726    If `coding->type' takes the other value, element[4] is ignored.
2727
2728    Emacs Lisp's coding system also carries information about format of
2729    end-of-line in a value of property `eol-type'.  If the value is
2730    integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
2731    means CODING_EOL_CR.  If it is not integer, it should be a vector
2732    of subsidiary coding systems of which property `eol-type' has one
2733    of above values.
2734
2735 */
2736
2737 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
2738    and set it in CODING.  If CODING_SYSTEM_SYMBOL is invalid, CODING
2739    is setup so that no conversion is necessary and return -1, else
2740    return 0.  */
2741
2742 int
2743 setup_coding_system (coding_system, coding)
2744      Lisp_Object coding_system;
2745      struct coding_system *coding;
2746 {
2747   Lisp_Object coding_spec, coding_type, eol_type, plist;
2748   Lisp_Object val;
2749   int i;
2750
2751   /* Initialize some fields required for all kinds of coding systems.  */
2752   coding->symbol = coding_system;
2753   coding->common_flags = 0;
2754   coding->mode = 0;
2755   coding->heading_ascii = -1;
2756   coding->post_read_conversion = coding->pre_write_conversion = Qnil;
2757   coding_spec = Fget (coding_system, Qcoding_system);
2758   if (!VECTORP (coding_spec)
2759       || XVECTOR (coding_spec)->size != 5
2760       || !CONSP (XVECTOR (coding_spec)->contents[3]))
2761     goto label_invalid_coding_system;
2762
2763   eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
2764   if (VECTORP (eol_type))
2765     {
2766       coding->eol_type = CODING_EOL_UNDECIDED;
2767       coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
2768     }
2769   else if (XFASTINT (eol_type) == 1)
2770     {
2771       coding->eol_type = CODING_EOL_CRLF;
2772       coding->common_flags
2773         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2774     }
2775   else if (XFASTINT (eol_type) == 2)
2776     {
2777       coding->eol_type = CODING_EOL_CR;
2778       coding->common_flags
2779         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2780     }
2781   else
2782     coding->eol_type = CODING_EOL_LF;
2783
2784   coding_type = XVECTOR (coding_spec)->contents[0];
2785   /* Try short cut.  */
2786   if (SYMBOLP (coding_type))
2787     {
2788       if (EQ (coding_type, Qt))
2789         {
2790           coding->type = coding_type_undecided;
2791           coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
2792         }
2793       else
2794         coding->type = coding_type_no_conversion;
2795       return 0;
2796     }
2797
2798   /* Initialize remaining fields.  */
2799   coding->composing = 0;
2800   coding->translation_table_for_decode = Qnil;
2801   coding->translation_table_for_encode = Qnil;
2802
2803   /* Get values of coding system properties:
2804      `post-read-conversion', `pre-write-conversion',
2805      `translation-table-for-decode', `translation-table-for-encode'.  */
2806   plist = XVECTOR (coding_spec)->contents[3];
2807   coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
2808   coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
2809   val = Fplist_get (plist, Qtranslation_table_for_decode);
2810   if (SYMBOLP (val))
2811     val = Fget (val, Qtranslation_table_for_decode);
2812   coding->translation_table_for_decode = CHAR_TABLE_P (val) ? val : Qnil;
2813   val = Fplist_get (plist, Qtranslation_table_for_encode);
2814   if (SYMBOLP (val))
2815     val = Fget (val, Qtranslation_table_for_encode);
2816   coding->translation_table_for_encode = CHAR_TABLE_P (val) ? val : Qnil;
2817   val = Fplist_get (plist, Qcoding_category);
2818   if (!NILP (val))
2819     {
2820       val = Fget (val, Qcoding_category_index);
2821       if (INTEGERP (val))
2822         coding->category_idx = XINT (val);
2823       else
2824         goto label_invalid_coding_system;
2825     }
2826   else
2827     goto label_invalid_coding_system;
2828
2829   val = Fplist_get (plist, Qsafe_charsets);
2830   if (EQ (val, Qt))
2831     {
2832       for (i = 0; i <= MAX_CHARSET; i++)
2833         coding->safe_charsets[i] = 1;
2834     }
2835   else
2836     {
2837       bzero (coding->safe_charsets, MAX_CHARSET + 1);
2838       while (CONSP (val))
2839         {
2840           if ((i = get_charset_id (XCONS (val)->car)) >= 0)
2841             coding->safe_charsets[i] = 1;
2842           val = XCONS (val)->cdr;
2843         }
2844     }
2845
2846   switch (XFASTINT (coding_type))
2847     {
2848     case 0:
2849       coding->type = coding_type_emacs_mule;
2850       if (!NILP (coding->post_read_conversion))
2851         coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
2852       if (!NILP (coding->pre_write_conversion))
2853         coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
2854       break;
2855
2856     case 1:
2857       coding->type = coding_type_sjis;
2858       coding->common_flags
2859         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2860       break;
2861
2862     case 2:
2863       coding->type = coding_type_iso2022;
2864       coding->common_flags
2865         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2866       {
2867         Lisp_Object val, temp;
2868         Lisp_Object *flags;
2869         int i, charset, reg_bits = 0;
2870
2871         val = XVECTOR (coding_spec)->contents[4];
2872
2873         if (!VECTORP (val) || XVECTOR (val)->size != 32)
2874           goto label_invalid_coding_system;
2875
2876         flags = XVECTOR (val)->contents;
2877         coding->flags
2878           = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
2879              | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
2880              | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
2881              | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
2882              | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
2883              | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
2884              | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
2885              | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
2886              | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
2887              | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
2888              | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
2889              | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
2890              | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
2891              );
2892
2893         /* Invoke graphic register 0 to plane 0.  */
2894         CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
2895         /* Invoke graphic register 1 to plane 1 if we can use full 8-bit.  */
2896         CODING_SPEC_ISO_INVOCATION (coding, 1)
2897           = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
2898         /* Not single shifting at first.  */
2899         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
2900         /* Beginning of buffer should also be regarded as bol. */
2901         CODING_SPEC_ISO_BOL (coding) = 1;
2902
2903         for (charset = 0; charset <= MAX_CHARSET; charset++)
2904           CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
2905         val = Vcharset_revision_alist;
2906         while (CONSP (val))
2907           {
2908             charset = get_charset_id (Fcar_safe (XCONS (val)->car));
2909             if (charset >= 0
2910                 && (temp = Fcdr_safe (XCONS (val)->car), INTEGERP (temp))
2911                 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
2912               CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
2913             val = XCONS (val)->cdr;
2914           }
2915
2916         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
2917            FLAGS[REG] can be one of below:
2918                 integer CHARSET: CHARSET occupies register I,
2919                 t: designate nothing to REG initially, but can be used
2920                   by any charsets,
2921                 list of integer, nil, or t: designate the first
2922                   element (if integer) to REG initially, the remaining
2923                   elements (if integer) is designated to REG on request,
2924                   if an element is t, REG can be used by any charsets,
2925                 nil: REG is never used.  */
2926         for (charset = 0; charset <= MAX_CHARSET; charset++)
2927           CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
2928             = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
2929         for (i = 0; i < 4; i++)
2930           {
2931             if (INTEGERP (flags[i])
2932                 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))
2933                 || (charset = get_charset_id (flags[i])) >= 0)
2934               {
2935                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
2936                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
2937               }
2938             else if (EQ (flags[i], Qt))
2939               {
2940                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2941                 reg_bits |= 1 << i;
2942                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
2943               }
2944             else if (CONSP (flags[i]))
2945               {
2946                 Lisp_Object tail = flags[i];
2947
2948                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
2949                 if (INTEGERP (XCONS (tail)->car)
2950                     && (charset = XINT (XCONS (tail)->car),
2951                         CHARSET_VALID_P (charset))
2952                     || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
2953                   {
2954                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
2955                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
2956                   }
2957                 else
2958                   CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2959                 tail = XCONS (tail)->cdr;
2960                 while (CONSP (tail))
2961                   {
2962                     if (INTEGERP (XCONS (tail)->car)
2963                         && (charset = XINT (XCONS (tail)->car),
2964                             CHARSET_VALID_P (charset))
2965                         || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
2966                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
2967                         = i;
2968                     else if (EQ (XCONS (tail)->car, Qt))
2969                       reg_bits |= 1 << i;
2970                     tail = XCONS (tail)->cdr;
2971                   }
2972               }
2973             else
2974               CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2975
2976             CODING_SPEC_ISO_DESIGNATION (coding, i)
2977               = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
2978           }
2979
2980         if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
2981           {
2982             /* REG 1 can be used only by locking shift in 7-bit env.  */
2983             if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
2984               reg_bits &= ~2;
2985             if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
2986               /* Without any shifting, only REG 0 and 1 can be used.  */
2987               reg_bits &= 3;
2988           }
2989
2990         if (reg_bits)
2991           for (charset = 0; charset <= MAX_CHARSET; charset++)
2992             {
2993               if (CHARSET_VALID_P (charset))
2994                 {
2995                   /* There exist some default graphic registers to be
2996                      used CHARSET.  */
2997
2998                   /* We had better avoid designating a charset of
2999                      CHARS96 to REG 0 as far as possible.  */
3000                   if (CHARSET_CHARS (charset) == 96)
3001                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3002                       = (reg_bits & 2
3003                          ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
3004                   else
3005                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3006                       = (reg_bits & 1
3007                          ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
3008                 }
3009             }
3010       }
3011       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3012       coding->spec.iso2022.last_invalid_designation_register = -1;
3013       break;
3014
3015     case 3:
3016       coding->type = coding_type_big5;
3017       coding->common_flags
3018         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3019       coding->flags
3020         = (NILP (XVECTOR (coding_spec)->contents[4])
3021            ? CODING_FLAG_BIG5_HKU
3022            : CODING_FLAG_BIG5_ETEN);
3023       break;
3024
3025     case 4:
3026       coding->type = coding_type_ccl;
3027       coding->common_flags
3028         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3029       {
3030         Lisp_Object val = XVECTOR (coding_spec)->contents[4];
3031         Lisp_Object decoder, encoder;
3032
3033         if (CONSP  (val)
3034             && SYMBOLP (XCONS (val)->car)
3035             && !NILP (decoder = Fget (XCONS (val)->car, Qccl_program_idx))
3036             && !NILP (decoder = Fcdr (Faref (Vccl_program_table, decoder)))
3037             && SYMBOLP (XCONS (val)->cdr)
3038             && !NILP (encoder = Fget (XCONS (val)->cdr, Qccl_program_idx))
3039             && !NILP (encoder = Fcdr (Faref (Vccl_program_table, encoder))))
3040           {
3041             setup_ccl_program (&(coding->spec.ccl.decoder), decoder);
3042             setup_ccl_program (&(coding->spec.ccl.encoder), encoder);
3043           }
3044         else
3045           goto label_invalid_coding_system;
3046       }
3047       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3048       break;
3049
3050     case 5:
3051       coding->type = coding_type_raw_text;
3052       break;
3053
3054     default:
3055       goto label_invalid_coding_system;
3056     }
3057   return 0;
3058
3059  label_invalid_coding_system:
3060   coding->type = coding_type_no_conversion;
3061   coding->category_idx = CODING_CATEGORY_IDX_BINARY;
3062   coding->common_flags = 0;
3063   coding->eol_type = CODING_EOL_LF;
3064   coding->pre_write_conversion = coding->post_read_conversion = Qnil;
3065   return -1;
3066 }
3067
3068 /* Setup raw-text or one of its subsidiaries in the structure
3069    coding_system CODING according to the already setup value eol_type
3070    in CODING.  CODING should be setup for some coding system in
3071    advance.  */
3072
3073 void
3074 setup_raw_text_coding_system (coding)
3075      struct coding_system *coding;
3076 {
3077   if (coding->type != coding_type_raw_text)
3078     {
3079       coding->symbol = Qraw_text;
3080       coding->type = coding_type_raw_text;
3081       if (coding->eol_type != CODING_EOL_UNDECIDED)
3082         {
3083           Lisp_Object subsidiaries = Fget (Qraw_text, Qeol_type);
3084
3085           if (VECTORP (subsidiaries)
3086               && XVECTOR (subsidiaries)->size == 3)
3087             coding->symbol
3088               = XVECTOR (subsidiaries)->contents[coding->eol_type];
3089         }
3090     }
3091   return;
3092 }
3093
3094 /* Emacs has a mechanism to automatically detect a coding system if it
3095    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
3096    it's impossible to distinguish some coding systems accurately
3097    because they use the same range of codes.  So, at first, coding
3098    systems are categorized into 7, those are:
3099
3100    o coding-category-emacs-mule
3101
3102         The category for a coding system which has the same code range
3103         as Emacs' internal format.  Assigned the coding-system (Lisp
3104         symbol) `emacs-mule' by default.
3105
3106    o coding-category-sjis
3107
3108         The category for a coding system which has the same code range
3109         as SJIS.  Assigned the coding-system (Lisp
3110         symbol) `japanese-shift-jis' by default.
3111
3112    o coding-category-iso-7
3113
3114         The category for a coding system which has the same code range
3115         as ISO2022 of 7-bit environment.  This doesn't use any locking
3116         shift and single shift functions.  This can encode/decode all
3117         charsets.  Assigned the coding-system (Lisp symbol)
3118         `iso-2022-7bit' by default.
3119
3120    o coding-category-iso-7-tight
3121
3122         Same as coding-category-iso-7 except that this can
3123         encode/decode only the specified charsets.
3124
3125    o coding-category-iso-8-1
3126
3127         The category for a coding system which has the same code range
3128         as ISO2022 of 8-bit environment and graphic plane 1 used only
3129         for DIMENSION1 charset.  This doesn't use any locking shift
3130         and single shift functions.  Assigned the coding-system (Lisp
3131         symbol) `iso-latin-1' by default.
3132
3133    o coding-category-iso-8-2
3134
3135         The category for a coding system which has the same code range
3136         as ISO2022 of 8-bit environment and graphic plane 1 used only
3137         for DIMENSION2 charset.  This doesn't use any locking shift
3138         and single shift functions.  Assigned the coding-system (Lisp
3139         symbol) `japanese-iso-8bit' by default.
3140
3141    o coding-category-iso-7-else
3142
3143         The category for a coding system which has the same code range
3144         as ISO2022 of 7-bit environemnt but uses locking shift or
3145         single shift functions.  Assigned the coding-system (Lisp
3146         symbol) `iso-2022-7bit-lock' by default.
3147
3148    o coding-category-iso-8-else
3149
3150         The category for a coding system which has the same code range
3151         as ISO2022 of 8-bit environemnt but uses locking shift or
3152         single shift functions.  Assigned the coding-system (Lisp
3153         symbol) `iso-2022-8bit-ss2' by default.
3154
3155    o coding-category-big5
3156
3157         The category for a coding system which has the same code range
3158         as BIG5.  Assigned the coding-system (Lisp symbol)
3159         `cn-big5' by default.
3160
3161    o coding-category-binary
3162
3163         The category for a coding system not categorized in any of the
3164         above.  Assigned the coding-system (Lisp symbol)
3165         `no-conversion' by default.
3166
3167    Each of them is a Lisp symbol and the value is an actual
3168    `coding-system's (this is also a Lisp symbol) assigned by a user.
3169    What Emacs does actually is to detect a category of coding system.
3170    Then, it uses a `coding-system' assigned to it.  If Emacs can't
3171    decide only one possible category, it selects a category of the
3172    highest priority.  Priorities of categories are also specified by a
3173    user in a Lisp variable `coding-category-list'.
3174
3175 */
3176
3177 static
3178 int ascii_skip_code[256];
3179
3180 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
3181    If it detects possible coding systems, return an integer in which
3182    appropriate flag bits are set.  Flag bits are defined by macros
3183    CODING_CATEGORY_MASK_XXX in `coding.h'.
3184
3185    How many ASCII characters are at the head is returned as *SKIP.  */
3186
3187 static int
3188 detect_coding_mask (source, src_bytes, priorities, skip)
3189      unsigned char *source;
3190      int src_bytes, *priorities, *skip;
3191 {
3192   register unsigned char c;
3193   unsigned char *src = source, *src_end = source + src_bytes;
3194   unsigned int mask;
3195   int i;
3196
3197   /* At first, skip all ASCII characters and control characters except
3198      for three ISO2022 specific control characters.  */
3199   ascii_skip_code[ISO_CODE_SO] = 0;
3200   ascii_skip_code[ISO_CODE_SI] = 0;
3201   ascii_skip_code[ISO_CODE_ESC] = 0;
3202
3203  label_loop_detect_coding:
3204   while (src < src_end && ascii_skip_code[*src]) src++;
3205   *skip = src - source;
3206
3207   if (src >= src_end)
3208     /* We found nothing other than ASCII.  There's nothing to do.  */
3209     return 0;
3210
3211   c = *src;
3212   /* The text seems to be encoded in some multilingual coding system.
3213      Now, try to find in which coding system the text is encoded.  */
3214   if (c < 0x80)
3215     {
3216       /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
3217       /* C is an ISO2022 specific control code of C0.  */
3218       mask = detect_coding_iso2022 (src, src_end);
3219       if (mask == 0)
3220         {
3221           /* No valid ISO2022 code follows C.  Try again.  */
3222           src++;
3223           if (c == ISO_CODE_ESC)
3224             ascii_skip_code[ISO_CODE_ESC] = 1;
3225           else
3226             ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1;
3227           goto label_loop_detect_coding;
3228         }
3229       if (priorities)
3230         goto label_return_highest_only;
3231     }
3232   else
3233     {
3234       int try;
3235
3236       if (c < 0xA0)
3237         {
3238           /* C is the first byte of SJIS character code,
3239              or a leading-code of Emacs' internal format (emacs-mule).  */
3240           try = CODING_CATEGORY_MASK_SJIS | CODING_CATEGORY_MASK_EMACS_MULE;
3241
3242           /* Or, if C is a special latin extra code,
3243              or is an ISO2022 specific control code of C1 (SS2 or SS3),
3244              or is an ISO2022 control-sequence-introducer (CSI),
3245              we should also consider the possibility of ISO2022 codings.  */
3246           if ((VECTORP (Vlatin_extra_code_table)
3247                && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3248               || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
3249               || (c == ISO_CODE_CSI
3250                   && (src < src_end
3251                       && (*src == ']'
3252                           || ((*src == '0' || *src == '1' || *src == '2')
3253                               && src + 1 < src_end
3254                               && src[1] == ']')))))
3255             try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
3256                      | CODING_CATEGORY_MASK_ISO_8BIT);
3257         }
3258       else
3259         /* C is a character of ISO2022 in graphic plane right,
3260            or a SJIS's 1-byte character code (i.e. JISX0201),
3261            or the first byte of BIG5's 2-byte code.  */
3262         try = (CODING_CATEGORY_MASK_ISO_8_ELSE
3263                 | CODING_CATEGORY_MASK_ISO_8BIT
3264                 | CODING_CATEGORY_MASK_SJIS
3265                 | CODING_CATEGORY_MASK_BIG5);
3266
3267       mask = 0;
3268       if (priorities)
3269         {
3270           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3271             {
3272               if (priorities[i] & try & CODING_CATEGORY_MASK_ISO)
3273                 mask = detect_coding_iso2022 (src, src_end);
3274               else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
3275                 mask = detect_coding_sjis (src, src_end);
3276               else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
3277                 mask = detect_coding_big5 (src, src_end);
3278               else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
3279                 mask = detect_coding_emacs_mule (src, src_end);
3280               else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
3281                 mask = CODING_CATEGORY_MASK_RAW_TEXT;
3282               else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
3283                 mask = CODING_CATEGORY_MASK_BINARY;
3284               if (mask)
3285                 goto label_return_highest_only;
3286             }
3287           return CODING_CATEGORY_MASK_RAW_TEXT;
3288         }
3289       if (try & CODING_CATEGORY_MASK_ISO)
3290         mask |= detect_coding_iso2022 (src, src_end);
3291       if (try & CODING_CATEGORY_MASK_SJIS)
3292         mask |= detect_coding_sjis (src, src_end);
3293       if (try & CODING_CATEGORY_MASK_BIG5)
3294         mask |= detect_coding_big5 (src, src_end);
3295       if (try & CODING_CATEGORY_MASK_EMACS_MULE)
3296         mask |= detect_coding_emacs_mule (src, src_end);
3297     }
3298   return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
3299
3300  label_return_highest_only:
3301   for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3302     {
3303       if (mask & priorities[i])
3304         return priorities[i];
3305     }
3306   return CODING_CATEGORY_MASK_RAW_TEXT;
3307 }
3308
3309 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
3310    The information of the detected coding system is set in CODING.  */
3311
3312 void
3313 detect_coding (coding, src, src_bytes)
3314      struct coding_system *coding;
3315      unsigned char *src;
3316      int src_bytes;
3317 {
3318   unsigned int idx;
3319   int skip, mask, i;
3320   Lisp_Object val = Vcoding_category_list;
3321
3322   mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip);
3323   coding->heading_ascii = skip;
3324
3325   if (!mask) return;
3326
3327   /* We found a single coding system of the highest priority in MASK.  */
3328   idx = 0;
3329   while (mask && ! (mask & 1)) mask >>= 1, idx++;
3330   if (! mask)
3331     idx = CODING_CATEGORY_IDX_RAW_TEXT;
3332
3333   val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[idx])->value;
3334
3335   if (coding->eol_type != CODING_EOL_UNDECIDED)
3336     {
3337       Lisp_Object tmp = Fget (val, Qeol_type);
3338
3339       if (VECTORP (tmp))
3340         val = XVECTOR (tmp)->contents[coding->eol_type];
3341     }
3342   setup_coding_system (val, coding);
3343   /* Set this again because setup_coding_system reset this member.  */
3344   coding->heading_ascii = skip;
3345 }
3346
3347 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
3348    SOURCE is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
3349    CODING_EOL_CR, and CODING_EOL_UNDECIDED.
3350
3351    How many non-eol characters are at the head is returned as *SKIP.  */
3352
3353 #define MAX_EOL_CHECK_COUNT 3
3354
3355 static int
3356 detect_eol_type (source, src_bytes, skip)
3357      unsigned char *source;
3358      int src_bytes, *skip;
3359 {
3360   unsigned char *src = source, *src_end = src + src_bytes;
3361   unsigned char c;
3362   int total = 0;                /* How many end-of-lines are found so far.  */
3363   int eol_type = CODING_EOL_UNDECIDED;
3364   int this_eol_type;
3365
3366   *skip = 0;
3367
3368   while (src < src_end && total < MAX_EOL_CHECK_COUNT)
3369     {
3370       c = *src++;
3371       if (c == '\n' || c == '\r')
3372         {
3373           if (*skip == 0)
3374             *skip = src - 1 - source;
3375           total++;
3376           if (c == '\n')
3377             this_eol_type = CODING_EOL_LF;
3378           else if (src >= src_end || *src != '\n')
3379             this_eol_type = CODING_EOL_CR;
3380           else
3381             this_eol_type = CODING_EOL_CRLF, src++;
3382
3383           if (eol_type == CODING_EOL_UNDECIDED)
3384             /* This is the first end-of-line.  */
3385             eol_type = this_eol_type;
3386           else if (eol_type != this_eol_type)
3387             {
3388               /* The found type is different from what found before.  */
3389               eol_type = CODING_EOL_INCONSISTENT;
3390               break;
3391             }
3392         }
3393     }
3394
3395   if (*skip == 0)
3396     *skip = src_end - source;
3397   return eol_type;
3398 }
3399
3400 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
3401    is encoded.  If it detects an appropriate format of end-of-line, it
3402    sets the information in *CODING.  */
3403
3404 void
3405 detect_eol (coding, src, src_bytes)
3406      struct coding_system *coding;
3407      unsigned char *src;
3408      int src_bytes;
3409 {
3410   Lisp_Object val;
3411   int skip;
3412   int eol_type = detect_eol_type (src, src_bytes, &skip);
3413
3414   if (coding->heading_ascii > skip)
3415     coding->heading_ascii = skip;
3416   else
3417     skip = coding->heading_ascii;
3418
3419   if (eol_type == CODING_EOL_UNDECIDED)
3420     return;
3421   if (eol_type == CODING_EOL_INCONSISTENT)
3422     {
3423 #if 0
3424       /* This code is suppressed until we find a better way to
3425          distinguish raw text file and binary file.  */
3426
3427       /* If we have already detected that the coding is raw-text, the
3428          coding should actually be no-conversion.  */
3429       if (coding->type == coding_type_raw_text)
3430         {
3431           setup_coding_system (Qno_conversion, coding);
3432           return;
3433         }
3434       /* Else, let's decode only text code anyway.  */
3435 #endif /* 0 */
3436       eol_type = CODING_EOL_LF;
3437     }
3438
3439   val = Fget (coding->symbol, Qeol_type);
3440   if (VECTORP (val) && XVECTOR (val)->size == 3)
3441     {
3442       setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
3443       coding->heading_ascii = skip;
3444     }
3445 }
3446
3447 #define CONVERSION_BUFFER_EXTRA_ROOM 256
3448
3449 #define DECODING_BUFFER_MAG(coding)                                          \
3450   (coding->type == coding_type_iso2022                                       \
3451    ? 3                                                                       \
3452    : ((coding->type == coding_type_sjis || coding->type == coding_type_big5) \
3453       ? 2                                                                    \
3454       : (coding->type == coding_type_raw_text                                \
3455          ? 1                                                                 \
3456          : (coding->type == coding_type_ccl                                  \
3457             ? coding->spec.ccl.decoder.buf_magnification                     \
3458             : 2))))
3459
3460 /* Return maximum size (bytes) of a buffer enough for decoding
3461    SRC_BYTES of text encoded in CODING.  */
3462
3463 int
3464 decoding_buffer_size (coding, src_bytes)
3465      struct coding_system *coding;
3466      int src_bytes;
3467 {
3468   return (src_bytes * DECODING_BUFFER_MAG (coding)
3469           + CONVERSION_BUFFER_EXTRA_ROOM);
3470 }
3471
3472 /* Return maximum size (bytes) of a buffer enough for encoding
3473    SRC_BYTES of text to CODING.  */
3474
3475 int
3476 encoding_buffer_size (coding, src_bytes)
3477      struct coding_system *coding;
3478      int src_bytes;
3479 {
3480   int magnification;
3481
3482   if (coding->type == coding_type_ccl)
3483     magnification = coding->spec.ccl.encoder.buf_magnification;
3484   else
3485     magnification = 3;
3486
3487   return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
3488 }
3489
3490 #ifndef MINIMUM_CONVERSION_BUFFER_SIZE
3491 #define MINIMUM_CONVERSION_BUFFER_SIZE 1024
3492 #endif
3493
3494 char *conversion_buffer;
3495 int conversion_buffer_size;
3496
3497 /* Return a pointer to a SIZE bytes of buffer to be used for encoding
3498    or decoding.  Sufficient memory is allocated automatically.  If we
3499    run out of memory, return NULL.  */
3500
3501 char *
3502 get_conversion_buffer (size)
3503      int size;
3504 {
3505   if (size > conversion_buffer_size)
3506     {
3507       char *buf;
3508       int real_size = conversion_buffer_size * 2;
3509
3510       while (real_size < size) real_size *= 2;
3511       buf = (char *) xmalloc (real_size);
3512       xfree (conversion_buffer);
3513       conversion_buffer = buf;
3514       conversion_buffer_size = real_size;
3515     }
3516   return conversion_buffer;
3517 }
3518
3519 int
3520 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
3521      struct coding_system *coding;
3522      unsigned char *source, *destination;
3523      int src_bytes, dst_bytes, encodep;
3524 {
3525   struct ccl_program *ccl
3526     = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
3527   int result;
3528
3529   coding->produced = ccl_driver (ccl, source, destination,
3530                                  src_bytes, dst_bytes, &(coding->consumed));
3531   if (encodep)
3532     {
3533       coding->produced_char = coding->produced;
3534       coding->consumed_char
3535         = multibyte_chars_in_text (source, coding->consumed);
3536     }
3537   else
3538     {
3539       coding->produced_char
3540         = multibyte_chars_in_text (destination, coding->produced);
3541       coding->consumed_char = coding->consumed;
3542     }
3543   switch (ccl->status)
3544     {
3545     case CCL_STAT_SUSPEND_BY_SRC:
3546       result = CODING_FINISH_INSUFFICIENT_SRC;
3547       break;
3548     case CCL_STAT_SUSPEND_BY_DST:
3549       result = CODING_FINISH_INSUFFICIENT_DST;
3550       break;
3551     default:
3552       result = CODING_FINISH_NORMAL;
3553       break;
3554     }
3555   return result;
3556 }
3557
3558 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions".  Before
3559    decoding, it may detect coding system and format of end-of-line if
3560    those are not yet decided.  */
3561
3562 int
3563 decode_coding (coding, source, destination, src_bytes, dst_bytes)
3564      struct coding_system *coding;
3565      unsigned char *source, *destination;
3566      int src_bytes, dst_bytes;
3567 {
3568   int result;
3569
3570   if (src_bytes <= 0)
3571     {
3572       coding->produced = coding->produced_char = 0;
3573       coding->consumed = coding->consumed_char = 0;
3574       coding->fake_multibyte = 0;
3575       return CODING_FINISH_NORMAL;
3576     }
3577
3578   if (coding->type == coding_type_undecided)
3579     detect_coding (coding, source, src_bytes);
3580
3581   if (coding->eol_type == CODING_EOL_UNDECIDED)
3582     detect_eol (coding, source, src_bytes);
3583
3584   switch (coding->type)
3585     {
3586     case coding_type_emacs_mule:
3587     case coding_type_undecided:
3588     case coding_type_raw_text:
3589       if (coding->eol_type == CODING_EOL_LF
3590           ||  coding->eol_type == CODING_EOL_UNDECIDED)
3591         goto label_no_conversion;
3592       result = decode_eol (coding, source, destination, src_bytes, dst_bytes);
3593       break;
3594
3595     case coding_type_sjis:
3596       result = decode_coding_sjis_big5 (coding, source, destination,
3597                                         src_bytes, dst_bytes, 1);
3598       break;
3599
3600     case coding_type_iso2022:
3601       result = decode_coding_iso2022 (coding, source, destination,
3602                                       src_bytes, dst_bytes);
3603       break;
3604
3605     case coding_type_big5:
3606       result = decode_coding_sjis_big5 (coding, source, destination,
3607                                         src_bytes, dst_bytes, 0);
3608       break;
3609
3610     case coding_type_ccl:
3611       result = ccl_coding_driver (coding, source, destination,
3612                                   src_bytes, dst_bytes, 0);
3613       break;
3614
3615     default:                    /* i.e. case coding_type_no_conversion: */
3616     label_no_conversion:
3617       if (dst_bytes && src_bytes > dst_bytes)
3618         {
3619           coding->produced = dst_bytes;
3620           result = CODING_FINISH_INSUFFICIENT_DST;
3621         }
3622       else
3623         {
3624           coding->produced = src_bytes;
3625           result = CODING_FINISH_NORMAL;
3626         }
3627       if (dst_bytes)
3628         bcopy (source, destination, coding->produced);
3629       else
3630         safe_bcopy (source, destination, coding->produced);
3631       coding->fake_multibyte = 1;
3632       coding->consumed
3633         = coding->consumed_char = coding->produced_char = coding->produced;
3634       break;
3635     }
3636
3637   return result;
3638 }
3639
3640 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  */
3641
3642 int
3643 encode_coding (coding, source, destination, src_bytes, dst_bytes)
3644      struct coding_system *coding;
3645      unsigned char *source, *destination;
3646      int src_bytes, dst_bytes;
3647 {
3648   int result;
3649
3650   if (src_bytes <= 0)
3651     {
3652       coding->produced = coding->produced_char = 0;
3653       coding->consumed = coding->consumed_char = 0;
3654       coding->fake_multibyte = 0;
3655       return CODING_FINISH_NORMAL;
3656     }
3657
3658   switch (coding->type)
3659     {
3660     case coding_type_emacs_mule:
3661     case coding_type_undecided:
3662     case coding_type_raw_text:
3663       if (coding->eol_type == CODING_EOL_LF
3664           ||  coding->eol_type == CODING_EOL_UNDECIDED)
3665         goto label_no_conversion;
3666       result = encode_eol (coding, source, destination, src_bytes, dst_bytes);
3667       break;
3668
3669     case coding_type_sjis:
3670       result = encode_coding_sjis_big5 (coding, source, destination,
3671                                         src_bytes, dst_bytes, 1);
3672       break;
3673
3674     case coding_type_iso2022:
3675       result = encode_coding_iso2022 (coding, source, destination,
3676                                       src_bytes, dst_bytes);
3677       break;
3678
3679     case coding_type_big5:
3680       result = encode_coding_sjis_big5 (coding, source, destination,
3681                                         src_bytes, dst_bytes, 0);
3682       break;
3683
3684     case coding_type_ccl:
3685       result = ccl_coding_driver (coding, source, destination,
3686                                   src_bytes, dst_bytes, 1);
3687       break;
3688
3689     default:                    /* i.e. case coding_type_no_conversion: */
3690     label_no_conversion:
3691       if (dst_bytes && src_bytes > dst_bytes)
3692         {
3693           coding->produced = dst_bytes;
3694           result = CODING_FINISH_INSUFFICIENT_DST;
3695         }
3696       else
3697         {
3698           coding->produced = src_bytes;
3699           result = CODING_FINISH_NORMAL;
3700         }
3701       if (dst_bytes)
3702         bcopy (source, destination, coding->produced);
3703       else
3704         safe_bcopy (source, destination, coding->produced);
3705       if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
3706         {
3707           unsigned char *p = destination, *pend = p + coding->produced;
3708           while (p < pend)
3709             if (*p++ == '\015') p[-1] = '\n';
3710         }
3711       coding->fake_multibyte = 1;
3712       coding->consumed
3713         = coding->consumed_char = coding->produced_char = coding->produced;
3714       break;
3715     }
3716
3717   return result;
3718 }
3719
3720 /* Scan text in the region between *BEG and *END (byte positions),
3721    skip characters which we don't have to decode by coding system
3722    CODING at the head and tail, then set *BEG and *END to the region
3723    of the text we actually have to convert.  The caller should move
3724    the gap out of the region in advance.
3725
3726    If STR is not NULL, *BEG and *END are indices into STR.  */
3727
3728 static void
3729 shrink_decoding_region (beg, end, coding, str)
3730      int *beg, *end;
3731      struct coding_system *coding;
3732      unsigned char *str;
3733 {
3734   unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
3735   int eol_conversion;
3736
3737   if (coding->type == coding_type_ccl
3738       || coding->type == coding_type_undecided
3739       || !NILP (coding->post_read_conversion))
3740     {
3741       /* We can't skip any data.  */
3742       return;
3743     }
3744   else if (coding->type == coding_type_no_conversion)
3745     {
3746       /* We need no conversion, but don't have to skip any data here.
3747          Decoding routine handles them effectively anyway.  */
3748       return;
3749     }
3750
3751   eol_conversion = (coding->eol_type != CODING_EOL_LF);
3752
3753   if ((! eol_conversion) && (coding->heading_ascii >= 0))
3754     /* Detection routine has already found how much we can skip at the
3755        head.  */
3756     *beg += coding->heading_ascii;
3757
3758   if (str)
3759     {
3760       begp_orig = begp = str + *beg;
3761       endp_orig = endp = str + *end;
3762     }
3763   else
3764     {
3765       begp_orig = begp = BYTE_POS_ADDR (*beg);
3766       endp_orig = endp = begp + *end - *beg;
3767     }
3768
3769   switch (coding->type)
3770     {
3771     case coding_type_emacs_mule:
3772     case coding_type_raw_text:
3773       if (eol_conversion)
3774         {
3775           if (coding->heading_ascii < 0)
3776             while (begp < endp && *begp != '\r' && *begp < 0x80) begp++;
3777           while (begp < endp && endp[-1] != '\r' && endp[-1] < 0x80)
3778             endp--;
3779           /* Do not consider LF as ascii if preceded by CR, since that
3780              confuses eol decoding. */
3781           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
3782             endp++;
3783         }
3784       else
3785         begp = endp;
3786       break;
3787
3788     case coding_type_sjis:
3789     case coding_type_big5:
3790       /* We can skip all ASCII characters at the head.  */
3791       if (coding->heading_ascii < 0)
3792         {
3793           if (eol_conversion)
3794             while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
3795           else
3796             while (begp < endp && *begp < 0x80) begp++;
3797         }
3798       /* We can skip all ASCII characters at the tail except for the
3799          second byte of SJIS or BIG5 code.  */
3800       if (eol_conversion)
3801         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
3802       else
3803         while (begp < endp && endp[-1] < 0x80) endp--;
3804       /* Do not consider LF as ascii if preceded by CR, since that
3805          confuses eol decoding. */
3806       if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
3807         endp++;
3808       if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
3809         endp++;
3810       break;
3811
3812     default:            /* i.e. case coding_type_iso2022: */
3813       if (coding->heading_ascii < 0)
3814         {
3815           /* We can skip all ASCII characters at the head except for a
3816              few control codes.  */
3817           while (begp < endp && (c = *begp) < 0x80
3818                  && c != ISO_CODE_CR && c != ISO_CODE_SO
3819                  && c != ISO_CODE_SI && c != ISO_CODE_ESC
3820                  && (!eol_conversion || c != ISO_CODE_LF))
3821             begp++;
3822         }
3823       switch (coding->category_idx)
3824         {
3825         case CODING_CATEGORY_IDX_ISO_8_1:
3826         case CODING_CATEGORY_IDX_ISO_8_2:
3827           /* We can skip all ASCII characters at the tail.  */
3828           if (eol_conversion)
3829             while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
3830           else
3831             while (begp < endp && endp[-1] < 0x80) endp--;
3832           /* Do not consider LF as ascii if preceded by CR, since that
3833              confuses eol decoding. */
3834           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
3835             endp++;
3836           break;
3837
3838         case CODING_CATEGORY_IDX_ISO_7:
3839         case CODING_CATEGORY_IDX_ISO_7_TIGHT:
3840           /* We can skip all charactes at the tail except for ESC and
3841              the following 2-byte at the tail.  */
3842           if (eol_conversion)
3843             while (begp < endp
3844                    && (c = endp[-1]) < 0x80 && c != ISO_CODE_ESC && c != '\r')
3845               endp--;
3846           else
3847             while (begp < endp
3848                    && (c = endp[-1]) < 0x80 && c != ISO_CODE_ESC)
3849               endp--;
3850           /* Do not consider LF as ascii if preceded by CR, since that
3851              confuses eol decoding. */
3852           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
3853             endp++;
3854           if (begp < endp && endp[-1] == ISO_CODE_ESC)
3855             {
3856               if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
3857                 /* This is an ASCII designation sequence.  We can
3858                     surely skip the tail.  */
3859                 endp += 2;
3860               else
3861                 /* Hmmm, we can't skip the tail.  */
3862                 endp = endp_orig;
3863             }
3864         }
3865     }
3866   *beg += begp - begp_orig;
3867   *end += endp - endp_orig;
3868   return;
3869 }
3870
3871 /* Like shrink_decoding_region but for encoding.  */
3872
3873 static void
3874 shrink_encoding_region (beg, end, coding, str)
3875      int *beg, *end;
3876      struct coding_system *coding;
3877      unsigned char *str;
3878 {
3879   unsigned char *begp_orig, *begp, *endp_orig, *endp;
3880   int eol_conversion;
3881
3882   if (coding->type == coding_type_ccl)
3883     /* We can't skip any data.  */
3884     return;
3885   else if (coding->type == coding_type_no_conversion)
3886     {
3887       /* We need no conversion.  */
3888       *beg = *end;
3889       return;
3890     }
3891
3892   if (str)
3893     {
3894       begp_orig = begp = str + *beg;
3895       endp_orig = endp = str + *end;
3896     }
3897   else
3898     {
3899       begp_orig = begp = BYTE_POS_ADDR (*beg);
3900       endp_orig = endp = begp + *end - *beg;
3901     }
3902
3903   eol_conversion = (coding->eol_type == CODING_EOL_CR
3904                     || coding->eol_type == CODING_EOL_CRLF);
3905
3906   /* Here, we don't have to check coding->pre_write_conversion because
3907      the caller is expected to have handled it already.  */
3908   switch (coding->type)
3909     {
3910     case coding_type_undecided:
3911     case coding_type_emacs_mule:
3912     case coding_type_raw_text:
3913       if (eol_conversion)
3914         {
3915           while (begp < endp && *begp != '\n') begp++;
3916           while (begp < endp && endp[-1] != '\n') endp--;
3917         }
3918       else
3919         begp = endp;
3920       break;
3921
3922     case coding_type_iso2022:
3923       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
3924         {
3925           unsigned char *bol = begp;
3926           while (begp < endp && *begp < 0x80)
3927             {
3928               begp++;
3929               if (begp[-1] == '\n')
3930                 bol = begp;
3931             }
3932           begp = bol;
3933           goto label_skip_tail;
3934         }
3935       /* fall down ... */
3936
3937     default:
3938       /* We can skip all ASCII characters at the head and tail.  */
3939       if (eol_conversion)
3940         while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
3941       else
3942         while (begp < endp && *begp < 0x80) begp++;
3943     label_skip_tail:
3944       if (eol_conversion)
3945         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
3946       else
3947         while (begp < endp && *(endp - 1) < 0x80) endp--;
3948       break;
3949     }
3950
3951   *beg += begp - begp_orig;
3952   *end += endp - endp_orig;
3953   return;
3954 }
3955
3956 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
3957    text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
3958    coding system CODING, and return the status code of code conversion
3959    (currently, this value has no meaning).
3960
3961    How many characters (and bytes) are converted to how many
3962    characters (and bytes) are recorded in members of the structure
3963    CODING.
3964
3965    If REPLACE is nonzero, we do various things as if the original text
3966    is deleted and a new text is inserted.  See the comments in
3967    replace_range (insdel.c) to know what we are doing.  */
3968
3969 int
3970 code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
3971      int from, from_byte, to, to_byte, encodep, replace;
3972      struct coding_system *coding;
3973 {
3974   int len = to - from, len_byte = to_byte - from_byte;
3975   int require, inserted, inserted_byte;
3976   int head_skip, tail_skip, total_skip;
3977   Lisp_Object saved_coding_symbol = Qnil;
3978   int multibyte = !NILP (current_buffer->enable_multibyte_characters);
3979   int first = 1;
3980   int fake_multibyte = 0;
3981   unsigned char *src, *dst;
3982   Lisp_Object deletion = Qnil;
3983
3984   if (from < PT && PT < to)
3985     SET_PT_BOTH (from, from_byte);
3986
3987   if (replace)
3988     {
3989       int saved_from = from;
3990
3991       prepare_to_modify_buffer (from, to, &from);
3992       if (saved_from != from)
3993         {
3994           to = from + len;
3995           if (multibyte)
3996             from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
3997           else
3998             from_byte = from, to_byte = to;
3999           len_byte = to_byte - from_byte;
4000         }
4001     }
4002
4003   if (! encodep && CODING_REQUIRE_DETECTION (coding))
4004     {
4005       /* We must detect encoding of text and eol format.  */
4006
4007       if (from < GPT && to > GPT)
4008         move_gap_both (from, from_byte);
4009       if (coding->type == coding_type_undecided)
4010         {
4011           detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
4012           if (coding->type == coding_type_undecided)
4013             /* It seems that the text contains only ASCII, but we
4014                should not left it undecided because the deeper
4015                decoding routine (decode_coding) tries to detect the
4016                encodings again in vain.  */
4017             coding->type = coding_type_emacs_mule;
4018         }
4019       if (coding->eol_type == CODING_EOL_UNDECIDED)
4020         {
4021           saved_coding_symbol = coding->symbol;
4022           detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
4023           if (coding->eol_type == CODING_EOL_UNDECIDED)
4024             coding->eol_type = CODING_EOL_LF;
4025           /* We had better recover the original eol format if we
4026              encounter an inconsitent eol format while decoding.  */
4027           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4028         }
4029     }
4030
4031   coding->consumed_char = len, coding->consumed = len_byte;
4032
4033   if (encodep
4034       ? ! CODING_REQUIRE_ENCODING (coding)
4035       : ! CODING_REQUIRE_DECODING (coding))
4036     {
4037       coding->produced = len_byte;
4038       if (multibyte
4039           && ! replace
4040           /* See the comment of the member heading_ascii in coding.h.  */
4041           && coding->heading_ascii < len_byte)
4042         {
4043           /* We still may have to combine byte at the head and the
4044              tail of the text in the region.  */
4045           if (from < GPT && GPT < to)
4046             move_gap_both (to, to_byte);
4047           len = multibyte_chars_in_text (BYTE_POS_ADDR (from_byte), len_byte);
4048           adjust_after_insert (from, from_byte, to, to_byte, len);
4049           coding->produced_char = len;
4050         }
4051       else
4052         {
4053           if (!replace)
4054             adjust_after_insert (from, from_byte, to, to_byte, len_byte);
4055           coding->produced_char = len_byte;
4056         }
4057       return 0;
4058     }
4059
4060   /* Now we convert the text.  */
4061
4062   /* For encoding, we must process pre-write-conversion in advance.  */
4063   if (encodep
4064       && ! NILP (coding->pre_write_conversion)
4065       && SYMBOLP (coding->pre_write_conversion)
4066       && ! NILP (Ffboundp (coding->pre_write_conversion)))
4067     {
4068       /* The function in pre-write-conversion may put a new text in a
4069          new buffer.  */
4070       struct buffer *prev = current_buffer, *new;
4071
4072       call2 (coding->pre_write_conversion,
4073              make_number (from), make_number (to));
4074       if (current_buffer != prev)
4075         {
4076           len = ZV - BEGV;
4077           new = current_buffer;
4078           set_buffer_internal_1 (prev);
4079           del_range_2 (from, from_byte, to, to_byte);
4080           insert_from_buffer (new, BEG, len, 0);
4081           to = from + len;
4082           to_byte = multibyte ? CHAR_TO_BYTE (to) : to;
4083           len_byte = to_byte - from_byte;
4084         }
4085     }
4086
4087   if (replace)
4088     deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
4089
4090   /* Try to skip the heading and tailing ASCIIs.  */
4091   {
4092     int from_byte_orig = from_byte, to_byte_orig = to_byte;
4093
4094     if (from < GPT && GPT < to)
4095       move_gap_both (from, from_byte);
4096     if (encodep)
4097       shrink_encoding_region (&from_byte, &to_byte, coding, NULL);
4098     else
4099       shrink_decoding_region (&from_byte, &to_byte, coding, NULL);
4100     if (from_byte == to_byte)
4101       {
4102         coding->produced = len_byte;
4103         coding->produced_char = multibyte ? len : len_byte;
4104         if (!replace)
4105           /* We must record and adjust for this new text now.  */
4106           adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
4107         return 0;
4108       }
4109
4110     head_skip = from_byte - from_byte_orig;
4111     tail_skip = to_byte_orig - to_byte;
4112     total_skip = head_skip + tail_skip;
4113     from += head_skip;
4114     to -= tail_skip;
4115     len -= total_skip; len_byte -= total_skip;
4116   }
4117
4118   /* For converion, we must put the gap before the text in addition to
4119      making the gap larger for efficient decoding.  The required gap
4120      size starts from 2000 which is the magic number used in make_gap.
4121      But, after one batch of conversion, it will be incremented if we
4122      find that it is not enough .  */
4123   require = 2000;
4124
4125   if (GAP_SIZE  < require)
4126     make_gap (require - GAP_SIZE);
4127   move_gap_both (from, from_byte);
4128
4129   if (GPT - BEG < beg_unchanged)
4130     beg_unchanged = GPT - BEG;
4131   if (Z - GPT < end_unchanged)
4132     end_unchanged = Z - GPT;
4133
4134   inserted = inserted_byte = 0;
4135   src = GAP_END_ADDR, dst = GPT_ADDR;
4136
4137   GAP_SIZE += len_byte;
4138   ZV -= len;
4139   Z -= len;
4140   ZV_BYTE -= len_byte;
4141   Z_BYTE -= len_byte;
4142
4143   for (;;)
4144     {
4145       int result;
4146
4147       /* The buffer memory is changed from:
4148          +--------+converted-text+---------+-------original-text------+---+
4149          |<-from->|<--inserted-->|---------|<-----------len---------->|---|
4150                   |<------------------- GAP_SIZE -------------------->|  */
4151       if (encodep)
4152         result = encode_coding (coding, src, dst, len_byte, 0);
4153       else
4154         result = decode_coding (coding, src, dst, len_byte, 0);
4155       /* to:
4156          +--------+-------converted-text--------+--+---original-text--+---+
4157          |<-from->|<--inserted-->|<--produced-->|--|<-(len-consumed)->|---|
4158                   |<------------------- GAP_SIZE -------------------->|  */
4159       if (coding->fake_multibyte)
4160         fake_multibyte = 1;
4161
4162       if (!encodep && !multibyte)
4163         coding->produced_char = coding->produced;
4164       inserted += coding->produced_char;
4165       inserted_byte += coding->produced;
4166       len_byte -= coding->consumed;
4167       src += coding->consumed;
4168       dst += inserted_byte;
4169
4170       if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
4171         {
4172           unsigned char *pend = dst, *p = pend - inserted_byte;
4173
4174           /* Encode LFs back to the original eol format (CR or CRLF).  */
4175           if (coding->eol_type == CODING_EOL_CR)
4176             {
4177               while (p < pend) if (*p++ == '\n') p[-1] = '\r';
4178             }
4179           else
4180             {
4181               int count = 0;
4182
4183               while (p < pend) if (*p++ == '\n') count++;
4184               if (src - dst < count)
4185                 {
4186                   /* We don't have sufficient room for putting LFs
4187                      back to CRLF.  We must record converted and
4188                      not-yet-converted text back to the buffer
4189                      content, enlarge the gap, then record them out of
4190                      the buffer contents again.  */
4191                   int add = len_byte + inserted_byte;
4192
4193                   GAP_SIZE -= add;
4194                   ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
4195                   GPT += inserted_byte; GPT_BYTE += inserted_byte;
4196                   make_gap (count - GAP_SIZE);
4197                   GAP_SIZE += add;
4198                   ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
4199                   GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
4200                   /* Don't forget to update SRC, DST, and PEND.  */
4201                   src = GAP_END_ADDR - len_byte;
4202                   dst = GPT_ADDR + inserted_byte;
4203                   pend = dst;
4204                 }
4205               inserted += count;
4206               inserted_byte += count;
4207               coding->produced += count;
4208               p = dst = pend + count;
4209               while (count)
4210                 {
4211                   *--p = *--pend;
4212                   if (*p == '\n') count--, *--p = '\r';
4213                 }
4214             }
4215
4216           /* Suppress eol-format conversion in the further conversion.  */
4217           coding->eol_type = CODING_EOL_LF;
4218
4219           /* Restore the original symbol.  */
4220           coding->symbol = saved_coding_symbol;
4221
4222           continue;
4223         }
4224       if (len_byte <= 0)
4225         break;
4226       if (result == CODING_FINISH_INSUFFICIENT_SRC)
4227         {
4228           /* The source text ends in invalid codes.  Let's just
4229              make them valid buffer contents, and finish conversion.  */
4230           inserted += len_byte;
4231           inserted_byte += len_byte;
4232           while (len_byte--)
4233             *dst++ = *src++;
4234           fake_multibyte = 1;
4235           break;
4236         }
4237       if (first)
4238         {
4239           /* We have just done the first batch of conversion which was
4240              stoped because of insufficient gap.  Let's reconsider the
4241              required gap size (i.e. SRT - DST) now.
4242
4243              We have converted ORIG bytes (== coding->consumed) into
4244              NEW bytes (coding->produced).  To convert the remaining
4245              LEN bytes, we may need REQUIRE bytes of gap, where:
4246                 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
4247                 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
4248              Here, we are sure that NEW >= ORIG.  */
4249           float ratio = coding->produced - coding->consumed;
4250           ratio /= coding->consumed;
4251           require = len_byte * ratio;
4252           first = 0;
4253         }
4254       if ((src - dst) < (require + 2000))
4255         {
4256           /* See the comment above the previous call of make_gap.  */
4257           int add = len_byte + inserted_byte;
4258
4259           GAP_SIZE -= add;
4260           ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
4261           GPT += inserted_byte; GPT_BYTE += inserted_byte;
4262           make_gap (require + 2000);
4263           GAP_SIZE += add;
4264           ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
4265           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
4266           /* Don't forget to update SRC, DST.  */
4267           src = GAP_END_ADDR - len_byte;
4268           dst = GPT_ADDR + inserted_byte;
4269         }
4270     }
4271   if (src - dst > 0) *dst = 0; /* Put an anchor.  */
4272
4273   if (multibyte
4274       && (fake_multibyte
4275           || !encodep && (to - from) != (to_byte - from_byte)))
4276     inserted = multibyte_chars_in_text (GPT_ADDR, inserted_byte);
4277
4278   /* If we have shrinked the conversion area, adjust it now.  */
4279   if (total_skip > 0)
4280     {
4281       if (tail_skip > 0)
4282         safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
4283       inserted += total_skip; inserted_byte += total_skip;
4284       GAP_SIZE += total_skip;
4285       GPT -= head_skip; GPT_BYTE -= head_skip;
4286       ZV -= total_skip; ZV_BYTE -= total_skip;
4287       Z -= total_skip; Z_BYTE -= total_skip;
4288       from -= head_skip; from_byte -= head_skip;
4289       to += tail_skip; to_byte += tail_skip;
4290     }
4291
4292   adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
4293
4294   if (! encodep && ! NILP (coding->post_read_conversion))
4295     {
4296       Lisp_Object val;
4297       int orig_inserted = inserted, pos = PT;
4298
4299       if (from != pos)
4300         temp_set_point_both (current_buffer, from, from_byte);
4301       val = call1 (coding->post_read_conversion, make_number (inserted));
4302       if (! NILP (val))
4303         {
4304           CHECK_NUMBER (val, 0);
4305           inserted = XFASTINT (val);
4306         }
4307       if (pos >= from + orig_inserted)
4308         temp_set_point (current_buffer, pos + (inserted - orig_inserted));
4309     }
4310
4311   signal_after_change (from, to - from, inserted);
4312
4313   {
4314     coding->consumed = to_byte - from_byte;
4315     coding->consumed_char = to - from;
4316     coding->produced = inserted_byte;
4317     coding->produced_char = inserted;
4318   }
4319
4320   return 0;
4321 }
4322
4323 Lisp_Object
4324 code_convert_string (str, coding, encodep, nocopy)
4325      Lisp_Object str;
4326      struct coding_system *coding;
4327      int encodep, nocopy;
4328 {
4329   int len;
4330   char *buf;
4331   int from = 0, to = XSTRING (str)->size;
4332   int to_byte = STRING_BYTES (XSTRING (str));
4333   struct gcpro gcpro1;
4334   Lisp_Object saved_coding_symbol = Qnil;
4335   int result;
4336
4337   if (encodep && !NILP (coding->pre_write_conversion)
4338       || !encodep && !NILP (coding->post_read_conversion))
4339     {
4340       /* Since we have to call Lisp functions which assume target text
4341          is in a buffer, after setting a temporary buffer, call
4342          code_convert_region.  */
4343       int count = specpdl_ptr - specpdl;
4344       struct buffer *prev = current_buffer;
4345
4346       record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
4347       temp_output_buffer_setup (" *code-converting-work*");
4348       set_buffer_internal (XBUFFER (Vstandard_output));
4349       if (encodep)
4350         insert_from_string (str, 0, 0, to, to_byte, 0);
4351       else
4352         {
4353           /* We must insert the contents of STR as is without
4354              unibyte<->multibyte conversion.  */
4355           current_buffer->enable_multibyte_characters = Qnil;
4356           insert_from_string (str, 0, 0, to_byte, to_byte, 0);
4357           current_buffer->enable_multibyte_characters = Qt;
4358         }
4359       code_convert_region (BEGV, BEGV_BYTE, ZV, ZV_BYTE, coding, encodep, 1);
4360       if (encodep)
4361         /* We must return the buffer contents as unibyte string.  */
4362         current_buffer->enable_multibyte_characters = Qnil;
4363       str = make_buffer_string (BEGV, ZV, 0);
4364       set_buffer_internal (prev);
4365       return unbind_to (count, str);
4366     }
4367
4368   if (! encodep && CODING_REQUIRE_DETECTION (coding))
4369     {
4370       /* See the comments in code_convert_region.  */
4371       if (coding->type == coding_type_undecided)
4372         {
4373           detect_coding (coding, XSTRING (str)->data, to_byte);
4374           if (coding->type == coding_type_undecided)
4375             coding->type = coding_type_emacs_mule;
4376         }
4377       if (coding->eol_type == CODING_EOL_UNDECIDED)
4378         {
4379           saved_coding_symbol = coding->symbol;
4380           detect_eol (coding, XSTRING (str)->data, to_byte);
4381           if (coding->eol_type == CODING_EOL_UNDECIDED)
4382             coding->eol_type = CODING_EOL_LF;
4383           /* We had better recover the original eol format if we
4384              encounter an inconsitent eol format while decoding.  */
4385           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4386         }
4387     }
4388
4389   if (encodep
4390       ? ! CODING_REQUIRE_ENCODING (coding)
4391       : ! CODING_REQUIRE_DECODING (coding))
4392     from = to_byte;
4393   else
4394     {
4395       /* Try to skip the heading and tailing ASCIIs.  */
4396       if (encodep)
4397         shrink_encoding_region (&from, &to_byte, coding, XSTRING (str)->data);
4398       else
4399         shrink_decoding_region (&from, &to_byte, coding, XSTRING (str)->data);
4400     }
4401   if (from == to_byte)
4402     return (nocopy ? str : Fcopy_sequence (str));
4403
4404   if (encodep)
4405     len = encoding_buffer_size (coding, to_byte - from);
4406   else
4407     len = decoding_buffer_size (coding, to_byte - from);
4408   len += from + STRING_BYTES (XSTRING (str)) - to_byte;
4409   GCPRO1 (str);
4410   buf = get_conversion_buffer (len);
4411   UNGCPRO;
4412
4413   if (from > 0)
4414     bcopy (XSTRING (str)->data, buf, from);
4415   result = (encodep
4416             ? encode_coding (coding, XSTRING (str)->data + from,
4417                              buf + from, to_byte - from, len)
4418             : decode_coding (coding, XSTRING (str)->data + from,
4419                              buf + from, to_byte - from, len));
4420   if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
4421     {
4422       /* We simple try to decode the whole string again but without
4423          eol-conversion this time.  */
4424       coding->eol_type = CODING_EOL_LF;
4425       coding->symbol = saved_coding_symbol;
4426       return code_convert_string (str, coding, encodep, nocopy);
4427     }
4428
4429   bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced,
4430          STRING_BYTES (XSTRING (str)) - to_byte);
4431
4432   len = from + STRING_BYTES (XSTRING (str)) - to_byte;
4433   if (encodep)
4434     str = make_unibyte_string (buf, len + coding->produced);
4435   else
4436     str = make_string_from_bytes (buf, len + coding->produced_char,
4437                                   len + coding->produced);
4438   return str;
4439 }
4440
4441 \f
4442 #ifdef emacs
4443 /*** 7. Emacs Lisp library functions ***/
4444
4445 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
4446   "Return t if OBJECT is nil or a coding-system.\n\
4447 See the documentation of `make-coding-system' for information\n\
4448 about coding-system objects.")
4449   (obj)
4450      Lisp_Object obj;
4451 {
4452   if (NILP (obj))
4453     return Qt;
4454   if (!SYMBOLP (obj))
4455     return Qnil;
4456   /* Get coding-spec vector for OBJ.  */
4457   obj = Fget (obj, Qcoding_system);
4458   return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
4459           ? Qt : Qnil);
4460 }
4461
4462 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
4463        Sread_non_nil_coding_system, 1, 1, 0,
4464   "Read a coding system from the minibuffer, prompting with string PROMPT.")
4465   (prompt)
4466      Lisp_Object prompt;
4467 {
4468   Lisp_Object val;
4469   do
4470     {
4471       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
4472                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
4473     }
4474   while (XSTRING (val)->size == 0);
4475   return (Fintern (val, Qnil));
4476 }
4477
4478 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
4479   "Read a coding system from the minibuffer, prompting with string PROMPT.\n\
4480 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.")
4481   (prompt, default_coding_system)
4482      Lisp_Object prompt, default_coding_system;
4483 {
4484   Lisp_Object val;
4485   if (SYMBOLP (default_coding_system))
4486     XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
4487   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
4488                           Qt, Qnil, Qcoding_system_history,
4489                           default_coding_system, Qnil);
4490   return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
4491 }
4492
4493 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
4494        1, 1, 0,
4495   "Check validity of CODING-SYSTEM.\n\
4496 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.\n\
4497 It is valid if it is a symbol with a non-nil `coding-system' property.\n\
4498 The value of property should be a vector of length 5.")
4499   (coding_system)
4500      Lisp_Object coding_system;
4501 {
4502   CHECK_SYMBOL (coding_system, 0);
4503   if (!NILP (Fcoding_system_p (coding_system)))
4504     return coding_system;
4505   while (1)
4506     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
4507 }
4508 \f
4509 Lisp_Object
4510 detect_coding_system (src, src_bytes, highest)
4511      unsigned char *src;
4512      int src_bytes, highest;
4513 {
4514   int coding_mask, eol_type;
4515   Lisp_Object val, tmp;
4516   int dummy;
4517
4518   coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy);
4519   eol_type  = detect_eol_type (src, src_bytes, &dummy);
4520   if (eol_type == CODING_EOL_INCONSISTENT)
4521     eol_type == CODING_EOL_UNDECIDED;
4522
4523   if (!coding_mask)
4524     {
4525       val = Qundecided;
4526       if (eol_type != CODING_EOL_UNDECIDED)
4527         {
4528           Lisp_Object val2;
4529           val2 = Fget (Qundecided, Qeol_type);
4530           if (VECTORP (val2))
4531             val = XVECTOR (val2)->contents[eol_type];
4532         }
4533       return (highest ? val : Fcons (val, Qnil));
4534     }
4535
4536   /* At first, gather possible coding systems in VAL.  */
4537   val = Qnil;
4538   for (tmp = Vcoding_category_list; !NILP (tmp); tmp = XCONS (tmp)->cdr)
4539     {
4540       int idx
4541         = XFASTINT (Fget (XCONS (tmp)->car, Qcoding_category_index));
4542       if (coding_mask & (1 << idx))
4543         {
4544           val = Fcons (Fsymbol_value (XCONS (tmp)->car), val);
4545           if (highest)
4546             break;
4547         }
4548     }
4549   if (!highest)
4550     val = Fnreverse (val);
4551
4552   /* Then, replace the elements with subsidiary coding systems.  */
4553   for (tmp = val; !NILP (tmp); tmp = XCONS (tmp)->cdr)
4554     {
4555       if (eol_type != CODING_EOL_UNDECIDED
4556           && eol_type != CODING_EOL_INCONSISTENT)
4557         {
4558           Lisp_Object eol;
4559           eol = Fget (XCONS (tmp)->car, Qeol_type);
4560           if (VECTORP (eol))
4561             XCONS (tmp)->car = XVECTOR (eol)->contents[eol_type];
4562         }
4563     }
4564   return (highest ? XCONS (val)->car : val);
4565 }
4566
4567 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
4568        2, 3, 0,
4569   "Detect coding system of the text in the region between START and END.\n\
4570 Return a list of possible coding systems ordered by priority.\n\
4571 \n\
4572 If only ASCII characters are found, it returns a list of single element\n\
4573 `undecided' or its subsidiary coding system according to a detected\n\
4574 end-of-line format.\n\
4575 \n\
4576 If optional argument HIGHEST is non-nil, return the coding system of\n\
4577 highest priority.")
4578   (start, end, highest)
4579      Lisp_Object start, end, highest;
4580 {
4581   int from, to;
4582   int from_byte, to_byte;
4583
4584   CHECK_NUMBER_COERCE_MARKER (start, 0);
4585   CHECK_NUMBER_COERCE_MARKER (end, 1);
4586
4587   validate_region (&start, &end);
4588   from = XINT (start), to = XINT (end);
4589   from_byte = CHAR_TO_BYTE (from);
4590   to_byte = CHAR_TO_BYTE (to);
4591
4592   if (from < GPT && to >= GPT)
4593     move_gap_both (to, to_byte);
4594
4595   return detect_coding_system (BYTE_POS_ADDR (from_byte),
4596                                to_byte - from_byte,
4597                                !NILP (highest));
4598 }
4599
4600 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
4601        1, 2, 0,
4602   "Detect coding system of the text in STRING.\n\
4603 Return a list of possible coding systems ordered by priority.\n\
4604 \n\
4605 If only ASCII characters are found, it returns a list of single element\n\
4606 `undecided' or its subsidiary coding system according to a detected\n\
4607 end-of-line format.\n\
4608 \n\
4609 If optional argument HIGHEST is non-nil, return the coding system of\n\
4610 highest priority.")
4611   (string, highest)
4612      Lisp_Object string, highest;
4613 {
4614   CHECK_STRING (string, 0);
4615
4616   return detect_coding_system (XSTRING (string)->data,
4617                                STRING_BYTES (XSTRING (string)),
4618                                !NILP (highest));
4619 }
4620
4621 Lisp_Object
4622 code_convert_region1 (start, end, coding_system, encodep)
4623      Lisp_Object start, end, coding_system;
4624      int encodep;
4625 {
4626   struct coding_system coding;
4627   int from, to, len;
4628
4629   CHECK_NUMBER_COERCE_MARKER (start, 0);
4630   CHECK_NUMBER_COERCE_MARKER (end, 1);
4631   CHECK_SYMBOL (coding_system, 2);
4632
4633   validate_region (&start, &end);
4634   from = XFASTINT (start);
4635   to = XFASTINT (end);
4636
4637   if (NILP (coding_system))
4638     return make_number (to - from);
4639
4640   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
4641     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
4642
4643   coding.mode |= CODING_MODE_LAST_BLOCK;
4644   code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
4645                        &coding, encodep, 1);
4646   Vlast_coding_system_used = coding.symbol;
4647   return make_number (coding.produced_char);
4648 }
4649
4650 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
4651        3, 3, "r\nzCoding system: ",
4652   "Decode the current region by specified coding system.\n\
4653 When called from a program, takes three arguments:\n\
4654 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
4655 This function sets `last-coding-system-used' to the precise coding system\n\
4656 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
4657 not fully specified.)\n\
4658 It returns the length of the decoded text.")
4659   (start, end, coding_system)
4660      Lisp_Object start, end, coding_system;
4661 {
4662   return code_convert_region1 (start, end, coding_system, 0);
4663 }
4664
4665 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
4666        3, 3, "r\nzCoding system: ",
4667   "Encode the current region by specified coding system.\n\
4668 When called from a program, takes three arguments:\n\
4669 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
4670 This function sets `last-coding-system-used' to the precise coding system\n\
4671 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
4672 not fully specified.)\n\
4673 It returns the length of the encoded text.")
4674   (start, end, coding_system)
4675      Lisp_Object start, end, coding_system;
4676 {
4677   return code_convert_region1 (start, end, coding_system, 1);
4678 }
4679
4680 Lisp_Object
4681 code_convert_string1 (string, coding_system, nocopy, encodep)
4682      Lisp_Object string, coding_system, nocopy;
4683      int encodep;
4684 {
4685   struct coding_system coding;
4686
4687   CHECK_STRING (string, 0);
4688   CHECK_SYMBOL (coding_system, 1);
4689
4690   if (NILP (coding_system))
4691     return (NILP (nocopy) ? Fcopy_sequence (string) : string);
4692
4693   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
4694     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
4695
4696   coding.mode |= CODING_MODE_LAST_BLOCK;
4697   Vlast_coding_system_used = coding.symbol;
4698   return code_convert_string (string, &coding, encodep, !NILP (nocopy));
4699 }
4700
4701 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
4702        2, 3, 0,
4703   "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
4704 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
4705 if the decoding operation is trivial.\n\
4706 This function sets `last-coding-system-used' to the precise coding system\n\
4707 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
4708 not fully specified.)")
4709   (string, coding_system, nocopy)
4710      Lisp_Object string, coding_system, nocopy;
4711 {
4712   return code_convert_string1 (string, coding_system, nocopy, 0);
4713 }
4714
4715 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
4716        2, 3, 0,
4717   "Encode STRING to CODING-SYSTEM, and return the result.\n\
4718 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
4719 if the encoding operation is trivial.\n\
4720 This function sets `last-coding-system-used' to the precise coding system\n\
4721 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
4722 not fully specified.)")
4723   (string, coding_system, nocopy)
4724      Lisp_Object string, coding_system, nocopy;
4725 {
4726   return code_convert_string1 (string, coding_system, nocopy, 1);
4727 }
4728
4729 /* Encode or decode STRING according to CODING_SYSTEM.
4730    Do not set Vlast_coding_system_used.  */
4731
4732 Lisp_Object
4733 code_convert_string_norecord (string, coding_system, encodep)
4734      Lisp_Object string, coding_system;
4735      int encodep;
4736 {
4737   struct coding_system coding;
4738
4739   CHECK_STRING (string, 0);
4740   CHECK_SYMBOL (coding_system, 1);
4741
4742   if (NILP (coding_system))
4743     return string;
4744
4745   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
4746     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
4747
4748   coding.mode |= CODING_MODE_LAST_BLOCK;
4749   return code_convert_string (string, &coding, encodep, Qt);
4750 }
4751 \f
4752 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
4753   "Decode a JISX0208 character of shift-jis encoding.\n\
4754 CODE is the character code in SJIS.\n\
4755 Return the corresponding character.")
4756   (code)
4757      Lisp_Object code;
4758 {
4759   unsigned char c1, c2, s1, s2;
4760   Lisp_Object val;
4761
4762   CHECK_NUMBER (code, 0);
4763   s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
4764   DECODE_SJIS (s1, s2, c1, c2);
4765   XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset_jisx0208, c1, c2));
4766   return val;
4767 }
4768
4769 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
4770   "Encode a JISX0208 character CHAR to SJIS coding system.\n\
4771 Return the corresponding character code in SJIS.")
4772   (ch)
4773      Lisp_Object ch;
4774 {
4775   int charset, c1, c2, s1, s2;
4776   Lisp_Object val;
4777
4778   CHECK_NUMBER (ch, 0);
4779   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
4780   if (charset == charset_jisx0208)
4781     {
4782       ENCODE_SJIS (c1, c2, s1, s2);
4783       XSETFASTINT (val, (s1 << 8) | s2);
4784     }
4785   else
4786     XSETFASTINT (val, 0);
4787   return val;
4788 }
4789
4790 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
4791   "Decode a Big5 character CODE of BIG5 coding system.\n\
4792 CODE is the character code in BIG5.\n\
4793 Return the corresponding character.")
4794   (code)
4795      Lisp_Object code;
4796 {
4797   int charset;
4798   unsigned char b1, b2, c1, c2;
4799   Lisp_Object val;
4800
4801   CHECK_NUMBER (code, 0);
4802   b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
4803   DECODE_BIG5 (b1, b2, charset, c1, c2);
4804   XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset, c1, c2));
4805   return val;
4806 }
4807
4808 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
4809   "Encode the Big5 character CHAR to BIG5 coding system.\n\
4810 Return the corresponding character code in Big5.")
4811   (ch)
4812      Lisp_Object ch;
4813 {
4814   int charset, c1, c2, b1, b2;
4815   Lisp_Object val;
4816
4817   CHECK_NUMBER (ch, 0);
4818   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
4819   if (charset == charset_big5_1 || charset == charset_big5_2)
4820     {
4821       ENCODE_BIG5 (charset, c1, c2, b1, b2);
4822       XSETFASTINT (val, (b1 << 8) | b2);
4823     }
4824   else
4825     XSETFASTINT (val, 0);
4826   return val;
4827 }
4828 \f
4829 DEFUN ("set-terminal-coding-system-internal",
4830        Fset_terminal_coding_system_internal,
4831        Sset_terminal_coding_system_internal, 1, 1, 0, "")
4832   (coding_system)
4833      Lisp_Object coding_system;
4834 {
4835   CHECK_SYMBOL (coding_system, 0);
4836   setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
4837   /* We had better not send unsafe characters to terminal.  */
4838   terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
4839
4840   return Qnil;
4841 }
4842
4843 DEFUN ("set-safe-terminal-coding-system-internal",
4844        Fset_safe_terminal_coding_system_internal,
4845        Sset_safe_terminal_coding_system_internal, 1, 1, 0, "")
4846   (coding_system)
4847      Lisp_Object coding_system;
4848 {
4849   CHECK_SYMBOL (coding_system, 0);
4850   setup_coding_system (Fcheck_coding_system (coding_system),
4851                        &safe_terminal_coding);
4852   return Qnil;
4853 }
4854
4855 DEFUN ("terminal-coding-system",
4856        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
4857   "Return coding system specified for terminal output.")
4858   ()
4859 {
4860   return terminal_coding.symbol;
4861 }
4862
4863 DEFUN ("set-keyboard-coding-system-internal",
4864        Fset_keyboard_coding_system_internal,
4865        Sset_keyboard_coding_system_internal, 1, 1, 0, "")
4866   (coding_system)
4867      Lisp_Object coding_system;
4868 {
4869   CHECK_SYMBOL (coding_system, 0);
4870   setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
4871   return Qnil;
4872 }
4873
4874 DEFUN ("keyboard-coding-system",
4875        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
4876   "Return coding system specified for decoding keyboard input.")
4877   ()
4878 {
4879   return keyboard_coding.symbol;
4880 }
4881
4882 \f
4883 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
4884        Sfind_operation_coding_system,  1, MANY, 0,
4885   "Choose a coding system for an operation based on the target name.\n\
4886 The value names a pair of coding systems: (DECODING-SYSTEM ENCODING-SYSTEM).\n\
4887 DECODING-SYSTEM is the coding system to use for decoding\n\
4888 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system\n\
4889 for encoding (in case OPERATION does encoding).\n\
4890 \n\
4891 The first argument OPERATION specifies an I/O primitive:\n\
4892   For file I/O, `insert-file-contents' or `write-region'.\n\
4893   For process I/O, `call-process', `call-process-region', or `start-process'.\n\
4894   For network I/O, `open-network-stream'.\n\
4895 \n\
4896 The remaining arguments should be the same arguments that were passed\n\
4897 to the primitive.  Depending on which primitive, one of those arguments\n\
4898 is selected as the TARGET.  For example, if OPERATION does file I/O,\n\
4899 whichever argument specifies the file name is TARGET.\n\
4900 \n\
4901 TARGET has a meaning which depends on OPERATION:\n\
4902   For file I/O, TARGET is a file name.\n\
4903   For process I/O, TARGET is a process name.\n\
4904   For network I/O, TARGET is a service name or a port number\n\
4905 \n\
4906 This function looks up what specified for TARGET in,\n\
4907 `file-coding-system-alist', `process-coding-system-alist',\n\
4908 or `network-coding-system-alist' depending on OPERATION.\n\
4909 They may specify a coding system, a cons of coding systems,\n\
4910 or a function symbol to call.\n\
4911 In the last case, we call the function with one argument,\n\
4912 which is a list of all the arguments given to this function.")
4913   (nargs, args)
4914      int nargs;
4915      Lisp_Object *args;
4916 {
4917   Lisp_Object operation, target_idx, target, val;
4918   register Lisp_Object chain;
4919
4920   if (nargs < 2)
4921     error ("Too few arguments");
4922   operation = args[0];
4923   if (!SYMBOLP (operation)
4924       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
4925     error ("Invalid first arguement");
4926   if (nargs < 1 + XINT (target_idx))
4927     error ("Too few arguments for operation: %s",
4928            XSYMBOL (operation)->name->data);
4929   target = args[XINT (target_idx) + 1];
4930   if (!(STRINGP (target)
4931         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
4932     error ("Invalid %dth argument", XINT (target_idx) + 1);
4933
4934   chain = ((EQ (operation, Qinsert_file_contents)
4935             || EQ (operation, Qwrite_region))
4936            ? Vfile_coding_system_alist
4937            : (EQ (operation, Qopen_network_stream)
4938               ? Vnetwork_coding_system_alist
4939               : Vprocess_coding_system_alist));
4940   if (NILP (chain))
4941     return Qnil;
4942
4943   for (; CONSP (chain); chain = XCONS (chain)->cdr)
4944     {
4945       Lisp_Object elt;
4946       elt = XCONS (chain)->car;
4947
4948       if (CONSP (elt)
4949           && ((STRINGP (target)
4950                && STRINGP (XCONS (elt)->car)
4951                && fast_string_match (XCONS (elt)->car, target) >= 0)
4952               || (INTEGERP (target) && EQ (target, XCONS (elt)->car))))
4953         {
4954           val = XCONS (elt)->cdr;
4955           /* Here, if VAL is both a valid coding system and a valid
4956              function symbol, we return VAL as a coding system.  */
4957           if (CONSP (val))
4958             return val;
4959           if (! SYMBOLP (val))
4960             return Qnil;
4961           if (! NILP (Fcoding_system_p (val)))
4962             return Fcons (val, val);
4963           if (! NILP (Ffboundp (val)))
4964             {
4965               val = call1 (val, Flist (nargs, args));
4966               if (CONSP (val))
4967                 return val;
4968               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
4969                 return Fcons (val, val);
4970             }
4971           return Qnil;
4972         }
4973     }
4974   return Qnil;
4975 }
4976
4977 DEFUN ("update-iso-coding-systems", Fupdate_iso_coding_systems,
4978        Supdate_iso_coding_systems, 0, 0, 0,
4979   "Update internal database for ISO2022 based coding systems.\n\
4980 When values of the following coding categories are changed, you must\n\
4981 call this function:\n\
4982   coding-category-iso-7, coding-category-iso-7-tight,\n\
4983   coding-category-iso-8-1, coding-category-iso-8-2,\n\
4984   coding-category-iso-7-else, coding-category-iso-8-else")
4985   ()
4986 {
4987   int i;
4988
4989   for (i = CODING_CATEGORY_IDX_ISO_7; i <= CODING_CATEGORY_IDX_ISO_8_ELSE;
4990        i++)
4991     {
4992       if (! coding_system_table[i])
4993         coding_system_table[i]
4994           = (struct coding_system *) xmalloc (sizeof (struct coding_system));
4995       setup_coding_system
4996         (XSYMBOL (XVECTOR (Vcoding_category_table)->contents[i])->value,
4997          coding_system_table[i]);
4998     }
4999   return Qnil;
5000 }
5001
5002 DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal,
5003        Sset_coding_priority_internal, 0, 0, 0,
5004   "Update internal database for the current value of `coding-category-list'.\n\
5005 This function is internal use only.")
5006   ()
5007 {
5008   int i = 0, idx;
5009   Lisp_Object val = Vcoding_category_list;
5010
5011   while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
5012     {
5013       if (! SYMBOLP (XCONS (val)->car))
5014         break;
5015       idx = XFASTINT (Fget (XCONS (val)->car, Qcoding_category_index));
5016       if (idx >= CODING_CATEGORY_IDX_MAX)
5017         break;
5018       coding_priorities[i++] = (1 << idx);
5019       val = XCONS (val)->cdr;
5020     }
5021   /* If coding-category-list is valid and contains all coding
5022      categories, `i' should be CODING_CATEGORY_IDX_MAX now.  If not,
5023      the following code saves Emacs from craching.  */
5024   while (i < CODING_CATEGORY_IDX_MAX)
5025     coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
5026
5027   return Qnil;
5028 }
5029
5030 #endif /* emacs */
5031
5032 \f
5033 /*** 8. Post-amble ***/
5034
5035 void
5036 init_coding_once ()
5037 {
5038   int i;
5039
5040   /* Emacs' internal format specific initialize routine.  */
5041   for (i = 0; i <= 0x20; i++)
5042     emacs_code_class[i] = EMACS_control_code;
5043   emacs_code_class[0x0A] = EMACS_linefeed_code;
5044   emacs_code_class[0x0D] = EMACS_carriage_return_code;
5045   for (i = 0x21 ; i < 0x7F; i++)
5046     emacs_code_class[i] = EMACS_ascii_code;
5047   emacs_code_class[0x7F] = EMACS_control_code;
5048   emacs_code_class[0x80] = EMACS_leading_code_composition;
5049   for (i = 0x81; i < 0xFF; i++)
5050     emacs_code_class[i] = EMACS_invalid_code;
5051   emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
5052   emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
5053   emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
5054   emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
5055
5056   /* ISO2022 specific initialize routine.  */
5057   for (i = 0; i < 0x20; i++)
5058     iso_code_class[i] = ISO_control_code;
5059   for (i = 0x21; i < 0x7F; i++)
5060     iso_code_class[i] = ISO_graphic_plane_0;
5061   for (i = 0x80; i < 0xA0; i++)
5062     iso_code_class[i] = ISO_control_code;
5063   for (i = 0xA1; i < 0xFF; i++)
5064     iso_code_class[i] = ISO_graphic_plane_1;
5065   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
5066   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
5067   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
5068   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
5069   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
5070   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
5071   iso_code_class[ISO_CODE_ESC] = ISO_escape;
5072   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
5073   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
5074   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
5075
5076   conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE;
5077   conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE);
5078
5079   setup_coding_system (Qnil, &keyboard_coding);
5080   setup_coding_system (Qnil, &terminal_coding);
5081   setup_coding_system (Qnil, &safe_terminal_coding);
5082
5083   bzero (coding_system_table, sizeof coding_system_table);
5084
5085   bzero (ascii_skip_code, sizeof ascii_skip_code);
5086   for (i = 0; i < 128; i++)
5087     ascii_skip_code[i] = 1;
5088
5089 #if defined (MSDOS) || defined (WINDOWSNT)
5090   system_eol_type = CODING_EOL_CRLF;
5091 #else
5092   system_eol_type = CODING_EOL_LF;
5093 #endif
5094 }
5095
5096 #ifdef emacs
5097
5098 void
5099 syms_of_coding ()
5100 {
5101   Qtarget_idx = intern ("target-idx");
5102   staticpro (&Qtarget_idx);
5103
5104   Qcoding_system_history = intern ("coding-system-history");
5105   staticpro (&Qcoding_system_history);
5106   Fset (Qcoding_system_history, Qnil);
5107
5108   /* Target FILENAME is the first argument.  */
5109   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
5110   /* Target FILENAME is the third argument.  */
5111   Fput (Qwrite_region, Qtarget_idx, make_number (2));
5112
5113   Qcall_process = intern ("call-process");
5114   staticpro (&Qcall_process);
5115   /* Target PROGRAM is the first argument.  */
5116   Fput (Qcall_process, Qtarget_idx, make_number (0));
5117
5118   Qcall_process_region = intern ("call-process-region");
5119   staticpro (&Qcall_process_region);
5120   /* Target PROGRAM is the third argument.  */
5121   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
5122
5123   Qstart_process = intern ("start-process");
5124   staticpro (&Qstart_process);
5125   /* Target PROGRAM is the third argument.  */
5126   Fput (Qstart_process, Qtarget_idx, make_number (2));
5127
5128   Qopen_network_stream = intern ("open-network-stream");
5129   staticpro (&Qopen_network_stream);
5130   /* Target SERVICE is the fourth argument.  */
5131   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
5132
5133   Qcoding_system = intern ("coding-system");
5134   staticpro (&Qcoding_system);
5135
5136   Qeol_type = intern ("eol-type");
5137   staticpro (&Qeol_type);
5138
5139   Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
5140   staticpro (&Qbuffer_file_coding_system);
5141
5142   Qpost_read_conversion = intern ("post-read-conversion");
5143   staticpro (&Qpost_read_conversion);
5144
5145   Qpre_write_conversion = intern ("pre-write-conversion");
5146   staticpro (&Qpre_write_conversion);
5147
5148   Qno_conversion = intern ("no-conversion");
5149   staticpro (&Qno_conversion);
5150
5151   Qundecided = intern ("undecided");
5152   staticpro (&Qundecided);
5153
5154   Qcoding_system_p = intern ("coding-system-p");
5155   staticpro (&Qcoding_system_p);
5156
5157   Qcoding_system_error = intern ("coding-system-error");
5158   staticpro (&Qcoding_system_error);
5159
5160   Fput (Qcoding_system_error, Qerror_conditions,
5161         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
5162   Fput (Qcoding_system_error, Qerror_message,
5163         build_string ("Invalid coding system"));
5164
5165   Qcoding_category = intern ("coding-category");
5166   staticpro (&Qcoding_category);
5167   Qcoding_category_index = intern ("coding-category-index");
5168   staticpro (&Qcoding_category_index);
5169
5170   Vcoding_category_table
5171     = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
5172   staticpro (&Vcoding_category_table);
5173   {
5174     int i;
5175     for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
5176       {
5177         XVECTOR (Vcoding_category_table)->contents[i]
5178           = intern (coding_category_name[i]);
5179         Fput (XVECTOR (Vcoding_category_table)->contents[i],
5180               Qcoding_category_index, make_number (i));
5181       }
5182   }
5183
5184   Qtranslation_table = intern ("translation-table");
5185   staticpro (&Qtranslation_table);
5186   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (0));
5187
5188   Qtranslation_table_id = intern ("translation-table-id");
5189   staticpro (&Qtranslation_table_id);
5190
5191   Qtranslation_table_for_decode = intern ("translation-table-for-decode");
5192   staticpro (&Qtranslation_table_for_decode);
5193
5194   Qtranslation_table_for_encode = intern ("translation-table-for-encode");
5195   staticpro (&Qtranslation_table_for_encode);
5196
5197   Qsafe_charsets = intern ("safe-charsets");
5198   staticpro (&Qsafe_charsets);
5199
5200   Qemacs_mule = intern ("emacs-mule");
5201   staticpro (&Qemacs_mule);
5202
5203   Qraw_text = intern ("raw-text");
5204   staticpro (&Qraw_text);
5205
5206   defsubr (&Scoding_system_p);
5207   defsubr (&Sread_coding_system);
5208   defsubr (&Sread_non_nil_coding_system);
5209   defsubr (&Scheck_coding_system);
5210   defsubr (&Sdetect_coding_region);
5211   defsubr (&Sdetect_coding_string);
5212   defsubr (&Sdecode_coding_region);
5213   defsubr (&Sencode_coding_region);
5214   defsubr (&Sdecode_coding_string);
5215   defsubr (&Sencode_coding_string);
5216   defsubr (&Sdecode_sjis_char);
5217   defsubr (&Sencode_sjis_char);
5218   defsubr (&Sdecode_big5_char);
5219   defsubr (&Sencode_big5_char);
5220   defsubr (&Sset_terminal_coding_system_internal);
5221   defsubr (&Sset_safe_terminal_coding_system_internal);
5222   defsubr (&Sterminal_coding_system);
5223   defsubr (&Sset_keyboard_coding_system_internal);
5224   defsubr (&Skeyboard_coding_system);
5225   defsubr (&Sfind_operation_coding_system);
5226   defsubr (&Supdate_iso_coding_systems);
5227   defsubr (&Sset_coding_priority_internal);
5228
5229   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
5230     "List of coding systems.\n\
5231 \n\
5232 Do not alter the value of this variable manually.  This variable should be\n\
5233 updated by the functions `make-coding-system' and\n\
5234 `define-coding-system-alias'.");
5235   Vcoding_system_list = Qnil;
5236
5237   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
5238     "Alist of coding system names.\n\
5239 Each element is one element list of coding system name.\n\
5240 This variable is given to `completing-read' as TABLE argument.\n\
5241 \n\
5242 Do not alter the value of this variable manually.  This variable should be\n\
5243 updated by the functions `make-coding-system' and\n\
5244 `define-coding-system-alias'.");
5245   Vcoding_system_alist = Qnil;
5246
5247   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
5248     "List of coding-categories (symbols) ordered by priority.");
5249   {
5250     int i;
5251
5252     Vcoding_category_list = Qnil;
5253     for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
5254       Vcoding_category_list
5255         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
5256                  Vcoding_category_list);
5257   }
5258
5259   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
5260     "Specify the coding system for read operations.\n\
5261 It is useful to bind this variable with `let', but do not set it globally.\n\
5262 If the value is a coding system, it is used for decoding on read operation.\n\
5263 If not, an appropriate element is used from one of the coding system alists:\n\
5264 There are three such tables, `file-coding-system-alist',\n\
5265 `process-coding-system-alist', and `network-coding-system-alist'.");
5266   Vcoding_system_for_read = Qnil;
5267
5268   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
5269     "Specify the coding system for write operations.\n\
5270 It is useful to bind this variable with `let', but do not set it globally.\n\
5271 If the value is a coding system, it is used for encoding on write operation.\n\
5272 If not, an appropriate element is used from one of the coding system alists:\n\
5273 There are three such tables, `file-coding-system-alist',\n\
5274 `process-coding-system-alist', and `network-coding-system-alist'.");
5275   Vcoding_system_for_write = Qnil;
5276
5277   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
5278     "Coding system used in the latest file or process I/O.");
5279   Vlast_coding_system_used = Qnil;
5280
5281   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
5282     "*Non-nil inhibit code conversion of end-of-line format in any cases.");
5283   inhibit_eol_conversion = 0;
5284
5285   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
5286     "Non-nil means process buffer inherits coding system of process output.\n\
5287 Bind it to t if the process output is to be treated as if it were a file\n\
5288 read from some filesystem.");
5289   inherit_process_coding_system = 0;
5290
5291   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
5292     "Alist to decide a coding system to use for a file I/O operation.\n\
5293 The format is ((PATTERN . VAL) ...),\n\
5294 where PATTERN is a regular expression matching a file name,\n\
5295 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5296 If VAL is a coding system, it is used for both decoding and encoding\n\
5297 the file contents.\n\
5298 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5299 and the cdr part is used for encoding.\n\
5300 If VAL is a function symbol, the function must return a coding system\n\
5301 or a cons of coding systems which are used as above.\n\
5302 \n\
5303 See also the function `find-operation-coding-system'.");
5304   Vfile_coding_system_alist = Qnil;
5305
5306   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
5307     "Alist to decide a coding system to use for a process I/O operation.\n\
5308 The format is ((PATTERN . VAL) ...),\n\
5309 where PATTERN is a regular expression matching a program name,\n\
5310 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5311 If VAL is a coding system, it is used for both decoding what received\n\
5312 from the program and encoding what sent to the program.\n\
5313 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5314 and the cdr part is used for encoding.\n\
5315 If VAL is a function symbol, the function must return a coding system\n\
5316 or a cons of coding systems which are used as above.\n\
5317 \n\
5318 See also the function `find-operation-coding-system'.");
5319   Vprocess_coding_system_alist = Qnil;
5320
5321   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
5322     "Alist to decide a coding system to use for a network I/O operation.\n\
5323 The format is ((PATTERN . VAL) ...),\n\
5324 where PATTERN is a regular expression matching a network service name\n\
5325 or is a port number to connect to,\n\
5326 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5327 If VAL is a coding system, it is used for both decoding what received\n\
5328 from the network stream and encoding what sent to the network stream.\n\
5329 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5330 and the cdr part is used for encoding.\n\
5331 If VAL is a function symbol, the function must return a coding system\n\
5332 or a cons of coding systems which are used as above.\n\
5333 \n\
5334 See also the function `find-operation-coding-system'.");
5335   Vnetwork_coding_system_alist = Qnil;
5336
5337   DEFVAR_INT ("eol-mnemonic-unix", &eol_mnemonic_unix,
5338     "Mnemonic character indicating UNIX-like end-of-line format (i.e. LF) .");
5339   eol_mnemonic_unix = ':';
5340
5341   DEFVAR_INT ("eol-mnemonic-dos", &eol_mnemonic_dos,
5342     "Mnemonic character indicating DOS-like end-of-line format (i.e. CRLF).");
5343   eol_mnemonic_dos = '\\';
5344
5345   DEFVAR_INT ("eol-mnemonic-mac", &eol_mnemonic_mac,
5346     "Mnemonic character indicating MAC-like end-of-line format (i.e. CR).");
5347   eol_mnemonic_mac = '/';
5348
5349   DEFVAR_INT ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
5350     "Mnemonic character indicating end-of-line format is not yet decided.");
5351   eol_mnemonic_undecided = ':';
5352
5353   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
5354     "*Non-nil enables character translation while encoding and decoding.");
5355   Venable_character_translation = Qt;
5356
5357   DEFVAR_LISP ("standard-translation-table-for-decode",
5358     &Vstandard_translation_table_for_decode,
5359     "Table for translating characters while decoding.");
5360   Vstandard_translation_table_for_decode = Qnil;
5361
5362   DEFVAR_LISP ("standard-translation-table-for-encode",
5363     &Vstandard_translation_table_for_encode,
5364     "Table for translationg characters while encoding.");
5365   Vstandard_translation_table_for_encode = Qnil;
5366
5367   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
5368     "Alist of charsets vs revision numbers.\n\
5369 While encoding, if a charset (car part of an element) is found,\n\
5370 designate it with the escape sequence identifing revision (cdr part of the element).");
5371   Vcharset_revision_alist = Qnil;
5372
5373   DEFVAR_LISP ("default-process-coding-system",
5374                &Vdefault_process_coding_system,
5375     "Cons of coding systems used for process I/O by default.\n\
5376 The car part is used for decoding a process output,\n\
5377 the cdr part is used for encoding a text to be sent to a process.");
5378   Vdefault_process_coding_system = Qnil;
5379
5380   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
5381     "Table of extra Latin codes in the range 128..159 (inclusive).\n\
5382 This is a vector of length 256.\n\
5383 If Nth element is non-nil, the existence of code N in a file\n\
5384 \(or output of subprocess) doesn't prevent it to be detected as\n\
5385 a coding system of ISO 2022 variant which has a flag\n\
5386 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file\n\
5387 or reading output of a subprocess.\n\
5388 Only 128th through 159th elements has a meaning.");
5389   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
5390
5391   DEFVAR_LISP ("select-safe-coding-system-function",
5392                &Vselect_safe_coding_system_function,
5393     "Function to call to select safe coding system for encoding a text.\n\
5394 \n\
5395 If set, this function is called to force a user to select a proper\n\
5396 coding system which can encode the text in the case that a default\n\
5397 coding system used in each operation can't encode the text.\n\
5398 \n\
5399 The default value is `select-safe-codign-system' (which see).");
5400   Vselect_safe_coding_system_function = Qnil;
5401
5402 }
5403
5404 #endif /* emacs */