src/coding.c

   1 /* Coding system handler (conversion, detection, and etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /*** TABLE OF CONTENTS ***
  23
  24   1. Preamble
  25   2. Emacs' internal format (emacs-mule) handlers
  26   3. ISO2022 handlers
  27   4. Shift-JIS and BIG5 handlers
  28   5. End-of-line handlers
  29   6. C library functions
  30   7. Emacs Lisp library functions
  31   8. Post-amble
  32
  33 */
  34
  35 /*** GENERAL NOTE on CODING SYSTEM ***
  36
  37   Coding system is an encoding mechanism of one or more character
  38   sets.  Here's a list of coding systems which Emacs can handle.  When
  39   we say "decode", it means converting some other coding system to
  40   Emacs' internal format (emacs-internal), and when we say "encode",
  41   it means converting the coding system emacs-mule to some other
  42   coding system.
  43
  44   0. Emacs' internal format (emacs-mule)
  45
  46   Emacs itself holds a multi-lingual character in a buffer and a string
  47   in a special format.  Details are described in section 2.
  48
  49   1. ISO2022
  50
  51   The most famous coding system for multiple character sets.  X's
  52   Compound Text, various EUCs (Extended Unix Code), and coding
  53   systems used in Internet communication such as ISO-2022-JP are
  54   all variants of ISO2022.  Details are described in section 3.
  55
  56   2. SJIS (or Shift-JIS or MS-Kanji-Code)
  57
  58   A coding system to encode character sets: ASCII, JISX0201, and
  59   JISX0208.  Widely used for PC's in Japan.  Details are described in
  60   section 4.
  61
  62   3. BIG5
  63
  64   A coding system to encode character sets: ASCII and Big5.  Widely
  65   used by Chinese (mainly in Taiwan and Hong Kong).  Details are
  66   described in section 4.  In this file, when we write "BIG5"
  67   (all uppercase), we mean the coding system, and when we write
  68   "Big5" (capitalized), we mean the character set.
  69
  70   4. Raw text
  71
  72   A coding system for a text containing random 8-bit code.  Emacs does
  73   no code conversion on such a text except for end-of-line format.
  74
  75   5. Other
  76
  77   If a user wants to read/write a text encoded in a coding system not
  78   listed above, he can supply a decoder and an encoder for it in CCL
  79   (Code Conversion Language) programs.  Emacs executes the CCL program
  80   while reading/writing.
  81
  82   Emacs represents a coding system by a Lisp symbol that has a property
  83   `coding-system'.  But, before actually using the coding system, the
  84   information about it is set in a structure of type `struct
  85   coding_system' for rapid processing.  See section 6 for more details.
  86
  87 */
  88
  89 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
  90
  91   How end-of-line of a text is encoded depends on a system.  For
  92   instance, Unix's format is just one byte of `line-feed' code,
  93   whereas DOS's format is two-byte sequence of `carriage-return' and
  94   `line-feed' codes.  MacOS's format is usually one byte of
  95   `carriage-return'.
  96
  97   Since text characters encoding and end-of-line encoding are
  98   independent, any coding system described above can take
  99   any format of end-of-line.  So, Emacs has information of format of
 100   end-of-line in each coding-system.  See section 6 for more details.
 101
 102 */
 103
 104 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 105
 106   These functions check if a text between SRC and SRC_END is encoded
 107   in the coding system category XXX.  Each returns an integer value in
 108   which appropriate flag bits for the category XXX is set.  The flag
 109   bits are defined in macros CODING_CATEGORY_MASK_XXX.  Below is the
 110   template of these functions.  */
 111 #if 0
 112 int
 113 detect_coding_emacs_mule (src, src_end)
 114      unsigned char *src, *src_end;
 115 {
 116   ...
 117 }
 118 #endif
 119
 120 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 121
 122   These functions decode SRC_BYTES length text at SOURCE encoded in
 123   CODING to Emacs' internal format (emacs-mule).  The resulting text
 124   goes to a place pointed to by DESTINATION, the length of which
 125   should not exceed DST_BYTES.  These functions set the information of
 126   original and decoded texts in the members produced, produced_char,
 127   consumed, and consumed_char of the structure *CODING.
 128
 129   The return value is an integer (CODING_FINISH_XXX) indicating how
 130   the decoding finished.
 131
 132   DST_BYTES zero means that source area and destination area are
 133   overlapped, which means that we can produce a decoded text until it
 134   reaches at the head of not-yet-decoded source text.
 135
 136   Below is a template of these functions.  */
 137 #if 0
 138 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 139      struct coding_system *coding;
 140      unsigned char *source, *destination;
 141      int src_bytes, dst_bytes;
 142 {
 143   ...
 144 }
 145 #endif
 146
 147 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 148
 149   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 150   internal format (emacs-mule) to CODING.  The resulting text goes to
 151   a place pointed to by DESTINATION, the length of which should not
 152   exceed DST_BYTES.  These functions set the information of
 153   original and encoded texts in the members produced, produced_char,
 154   consumed, and consumed_char of the structure *CODING.
 155
 156   The return value is an integer (CODING_FINISH_XXX) indicating how
 157   the encoding finished.
 158
 159   DST_BYTES zero means that source area and destination area are
 160   overlapped, which means that we can produce a decoded text until it
 161   reaches at the head of not-yet-decoded source text.
 162
 163   Below is a template of these functions.  */
 164 #if 0
 165 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 166      struct coding_system *coding;
 167      unsigned char *source, *destination;
 168      int src_bytes, dst_bytes;
 169 {
 170   ...
 171 }
 172 #endif
 173
 174 /*** COMMONLY USED MACROS ***/
 175
 176 /* The following three macros ONE_MORE_BYTE, TWO_MORE_BYTES, and
 177    THREE_MORE_BYTES safely get one, two, and three bytes from the
 178    source text respectively.  If there are not enough bytes in the
 179    source, they jump to `label_end_of_loop'.  The caller should set
 180    variables `src' and `src_end' to appropriate areas in advance.  */
 181
 182 #define ONE_MORE_BYTE(c1)       \
 183   do {                          \
 184     if (src < src_end)          \
 185       c1 = *src++;              \
 186     else                        \
 187       goto label_end_of_loop;   \
 188   } while (0)
 189
 190 #define TWO_MORE_BYTES(c1, c2)  \
 191   do {                          \
 192     if (src + 1 < src_end)      \
 193       c1 = *src++, c2 = *src++; \
 194     else                        \
 195       goto label_end_of_loop;   \
 196   } while (0)
 197
 198 #define THREE_MORE_BYTES(c1, c2, c3)            \
 199   do {                                          \
 200     if (src + 2 < src_end)                      \
 201       c1 = *src++, c2 = *src++, c3 = *src++;    \
 202     else                                        \
 203       goto label_end_of_loop;                   \
 204   } while (0)
 205
 206 /* The following three macros DECODE_CHARACTER_ASCII,
 207    DECODE_CHARACTER_DIMENSION1, and DECODE_CHARACTER_DIMENSION2 put
 208    the multi-byte form of a character of each class at the place
 209    pointed by `dst'.  The caller should set the variable `dst' to
 210    point to an appropriate area and the variable `coding' to point to
 211    the coding-system of the currently decoding text in advance.  */
 212
 213 /* Decode one ASCII character C.  */
 214
 215 #define DECODE_CHARACTER_ASCII(c)                               \
 216   do {                                                          \
 217     if (COMPOSING_P (coding->composing))                        \
 218       *dst++ = 0xA0, *dst++ = (c) | 0x80;                       \
 219     else                                                        \
 220       {                                                         \
 221         *dst++ = (c);                                           \
 222         coding->produced_char++;                                \
 223       }                                                         \
 224   } while (0)
 225
 226 /* Decode one DIMENSION1 character whose charset is CHARSET and whose
 227    position-code is C.  */
 228
 229 #define DECODE_CHARACTER_DIMENSION1(charset, c)                         \
 230   do {                                                                  \
 231     unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset);   \
 232     if (COMPOSING_P (coding->composing))                                \
 233       *dst++ = leading_code + 0x20;                                     \
 234     else                                                                \
 235       {                                                                 \
 236         *dst++ = leading_code;                                          \
 237         coding->produced_char++;                                        \
 238       }                                                                 \
 239     if (leading_code = CHARSET_LEADING_CODE_EXT (charset))              \
 240       *dst++ = leading_code;                                            \
 241     *dst++ = (c) | 0x80;                                                \
 242   } while (0)
 243
 244 /* Decode one DIMENSION2 character whose charset is CHARSET and whose
 245    position-codes are C1 and C2.  */
 246
 247 #define DECODE_CHARACTER_DIMENSION2(charset, c1, c2)    \
 248   do {                                                  \
 249     DECODE_CHARACTER_DIMENSION1 (charset, c1);          \
 250     *dst++ = (c2) | 0x80;                               \
 251   } while (0)
 252
 253 \f
 254 /*** 1. Preamble ***/
 255
 256 #include <stdio.h>
 257
 258 #ifdef emacs
 259
 260 #include <config.h>
 261 #include "lisp.h"
 262 #include "buffer.h"
 263 #include "charset.h"
 264 #include "ccl.h"
 265 #include "coding.h"
 266 #include "window.h"
 267
 268 #else  /* not emacs */
 269
 270 #include "mulelib.h"
 271
 272 #endif /* not emacs */
 273
 274 Lisp_Object Qcoding_system, Qeol_type;
 275 Lisp_Object Qbuffer_file_coding_system;
 276 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 277 Lisp_Object Qno_conversion, Qundecided;
 278 Lisp_Object Qcoding_system_history;
 279 Lisp_Object Qsafe_charsets;
 280
 281 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 282 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 283 Lisp_Object Qstart_process, Qopen_network_stream;
 284 Lisp_Object Qtarget_idx;
 285
 286 Lisp_Object Vselect_safe_coding_system_function;
 287
 288 /* Mnemonic character of each format of end-of-line.  */
 289 int eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 290 /* Mnemonic character to indicate format of end-of-line is not yet
 291    decided.  */
 292 int eol_mnemonic_undecided;
 293
 294 /* Format of end-of-line decided by system.  This is CODING_EOL_LF on
 295    Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.  */
 296 int system_eol_type;
 297
 298 #ifdef emacs
 299
 300 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 301
 302 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 303
 304 /* Coding system emacs-mule and raw-text are for converting only
 305    end-of-line format.  */
 306 Lisp_Object Qemacs_mule, Qraw_text;
 307
 308 /* Coding-systems are handed between Emacs Lisp programs and C internal
 309    routines by the following three variables.  */
 310 /* Coding-system for reading files and receiving data from process.  */
 311 Lisp_Object Vcoding_system_for_read;
 312 /* Coding-system for writing files and sending data to process.  */
 313 Lisp_Object Vcoding_system_for_write;
 314 /* Coding-system actually used in the latest I/O.  */
 315 Lisp_Object Vlast_coding_system_used;
 316
 317 /* A vector of length 256 which contains information about special
 318    Latin codes (espepcially for dealing with Microsoft code).  */
 319 Lisp_Object Vlatin_extra_code_table;
 320
 321 /* Flag to inhibit code conversion of end-of-line format.  */
 322 int inhibit_eol_conversion;
 323
 324 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 325 int inherit_process_coding_system;
 326
 327 /* Coding system to be used to encode text for terminal display.  */
 328 struct coding_system terminal_coding;
 329
 330 /* Coding system to be used to encode text for terminal display when
 331    terminal coding system is nil.  */
 332 struct coding_system safe_terminal_coding;
 333
 334 /* Coding system of what is sent from terminal keyboard.  */
 335 struct coding_system keyboard_coding;
 336
 337 Lisp_Object Vfile_coding_system_alist;
 338 Lisp_Object Vprocess_coding_system_alist;
 339 Lisp_Object Vnetwork_coding_system_alist;
 340
 341 #endif /* emacs */
 342
 343 Lisp_Object Qcoding_category, Qcoding_category_index;
 344
 345 /* List of symbols `coding-category-xxx' ordered by priority.  */
 346 Lisp_Object Vcoding_category_list;
 347
 348 /* Table of coding categories (Lisp symbols).  */
 349 Lisp_Object Vcoding_category_table;
 350
 351 /* Table of names of symbol for each coding-category.  */
 352 char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
 353   "coding-category-emacs-mule",
 354   "coding-category-sjis",
 355   "coding-category-iso-7",
 356   "coding-category-iso-7-tight",
 357   "coding-category-iso-8-1",
 358   "coding-category-iso-8-2",
 359   "coding-category-iso-7-else",
 360   "coding-category-iso-8-else",
 361   "coding-category-big5",
 362   "coding-category-raw-text",
 363   "coding-category-binary"
 364 };
 365
 366 /* Table pointers to coding systems corresponding to each coding
 367    categories.  */
 368 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
 369
 370 /* Flag to tell if we look up unification table on character code
 371    conversion.  */
 372 Lisp_Object Venable_character_unification;
 373 /* Standard unification table to look up on decoding (reading).  */
 374 Lisp_Object Vstandard_character_unification_table_for_decode;
 375 /* Standard unification table to look up on encoding (writing).  */
 376 Lisp_Object Vstandard_character_unification_table_for_encode;
 377
 378 Lisp_Object Qcharacter_unification_table;
 379 Lisp_Object Qcharacter_unification_table_for_decode;
 380 Lisp_Object Qcharacter_unification_table_for_encode;
 381
 382 /* Alist of charsets vs revision number.  */
 383 Lisp_Object Vcharset_revision_alist;
 384
 385 /* Default coding systems used for process I/O.  */
 386 Lisp_Object Vdefault_process_coding_system;
 387
 388 \f
 389 /*** 2. Emacs internal format (emacs-mule) handlers ***/
 390
 391 /* Emacs' internal format for encoding multiple character sets is a
 392    kind of multi-byte encoding, i.e. characters are encoded by
 393    variable-length sequences of one-byte codes.  ASCII characters
 394    and control characters (e.g. `tab', `newline') are represented by
 395    one-byte sequences which are their ASCII codes, in the range 0x00
 396    through 0x7F.  The other characters are represented by a sequence
 397    of `base leading-code', optional `extended leading-code', and one
 398    or two `position-code's.  The length of the sequence is determined
 399    by the base leading-code.  Leading-code takes the range 0x80
 400    through 0x9F, whereas extended leading-code and position-code take
 401    the range 0xA0 through 0xFF.  See `charset.h' for more details
 402    about leading-code and position-code.
 403
 404    There's one exception to this rule.  Special leading-code
 405    `leading-code-composition' denotes that the following several
 406    characters should be composed into one character.  Leading-codes of
 407    components (except for ASCII) are added 0x20.  An ASCII character
 408    component is represented by a 2-byte sequence of `0xA0' and
 409    `ASCII-code + 0x80'.  See also the comments in `charset.h' for the
 410    details of composite character.  Hence, we can summarize the code
 411    range as follows:
 412
 413    --- CODE RANGE of Emacs' internal format ---
 414    (character set)      (range)
 415    ASCII                0x00 .. 0x7F
 416    ELSE (1st byte)      0x80 .. 0x9F
 417         (rest bytes)    0xA0 .. 0xFF
 418    ---------------------------------------------
 419
 420   */
 421
 422 enum emacs_code_class_type emacs_code_class[256];
 423
 424 /* Go to the next statement only if *SRC is accessible and the code is
 425    greater than 0xA0.  */
 426 #define CHECK_CODE_RANGE_A0_FF  \
 427   do {                          \
 428     if (src >= src_end)         \
 429       goto label_end_of_switch; \
 430     else if (*src++ < 0xA0)     \
 431       return 0;                 \
 432   } while (0)
 433
 434 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 435    Check if a text is encoded in Emacs' internal format.  If it is,
 436    return CODING_CATEGORY_MASK_EMACS_MULE, else return 0.  */
 437
 438 int
 439 detect_coding_emacs_mule (src, src_end)
 440      unsigned char *src, *src_end;
 441 {
 442   unsigned char c;
 443   int composing = 0;
 444
 445   while (src < src_end)
 446     {
 447       c = *src++;
 448
 449       if (composing)
 450         {
 451           if (c < 0xA0)
 452             composing = 0;
 453           else
 454             c -= 0x20;
 455         }
 456
 457       switch (emacs_code_class[c])
 458         {
 459         case EMACS_ascii_code:
 460         case EMACS_linefeed_code:
 461           break;
 462
 463         case EMACS_control_code:
 464           if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
 465             return 0;
 466           break;
 467
 468         case EMACS_invalid_code:
 469           return 0;
 470
 471         case EMACS_leading_code_composition: /* c == 0x80 */
 472           if (composing)
 473             CHECK_CODE_RANGE_A0_FF;
 474           else
 475             composing = 1;
 476           break;
 477
 478         case EMACS_leading_code_4:
 479           CHECK_CODE_RANGE_A0_FF;
 480           /* fall down to check it two more times ...  */
 481
 482         case EMACS_leading_code_3:
 483           CHECK_CODE_RANGE_A0_FF;
 484           /* fall down to check it one more time ...  */
 485
 486         case EMACS_leading_code_2:
 487           CHECK_CODE_RANGE_A0_FF;
 488           break;
 489
 490         default:
 491         label_end_of_switch:
 492           break;
 493         }
 494     }
 495   return CODING_CATEGORY_MASK_EMACS_MULE;
 496 }
 497
 498 \f
 499 /*** 3. ISO2022 handlers ***/
 500
 501 /* The following note describes the coding system ISO2022 briefly.
 502    Since the intention of this note is to help in understanding of
 503    the programs in this file, some parts are NOT ACCURATE or OVERLY
 504    SIMPLIFIED.  For the thorough understanding, please refer to the
 505    original document of ISO2022.
 506
 507    ISO2022 provides many mechanisms to encode several character sets
 508    in 7-bit and 8-bit environment.  If one chooses 7-bite environment,
 509    all text is encoded by codes of less than 128.  This may make the
 510    encoded text a little bit longer, but the text gets more stability
 511    to pass through several gateways (some of them strip off the MSB).
 512
 513    There are two kinds of character set: control character set and
 514    graphic character set.  The former contains control characters such
 515    as `newline' and `escape' to provide control functions (control
 516    functions are provided also by escape sequences).  The latter
 517    contains graphic characters such as ' A' and '-'.  Emacs recognizes
 518    two control character sets and many graphic character sets.
 519
 520    Graphic character sets are classified into one of the following
 521    four classes, DIMENSION1_CHARS94, DIMENSION1_CHARS96,
 522    DIMENSION2_CHARS94, DIMENSION2_CHARS96 according to the number of
 523    bytes (DIMENSION) and the number of characters in one dimension
 524    (CHARS) of the set.  In addition, each character set is assigned an
 525    identification tag (called "final character" and denoted as <F>
 526    here after) which is unique in each class.  <F> of each character
 527    set is decided by ECMA(*) when it is registered in ISO.  Code range
 528    of <F> is 0x30..0x7F (0x30..0x3F are for private use only).
 529
 530    Note (*): ECMA = European Computer Manufacturers Association
 531
 532    Here are examples of graphic character set [NAME(<F>)]:
 533         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
 534         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
 535         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
 536         o DIMENSION2_CHARS96 -- none for the moment
 537
 538    A code area (1byte=8bits) is divided into 4 areas, C0, GL, C1, and GR.
 539         C0 [0x00..0x1F] -- control character plane 0
 540         GL [0x20..0x7F] -- graphic character plane 0
 541         C1 [0x80..0x9F] -- control character plane 1
 542         GR [0xA0..0xFF] -- graphic character plane 1
 543
 544    A control character set is directly designated and invoked to C0 or
 545    C1 by an escape sequence.  The most common case is that ISO646's
 546    control character set is designated/invoked to C0 and ISO6429's
 547    control character set is designated/invoked to C1, and usually
 548    these designations/invocations are omitted in a coded text.  With
 549    7-bit environment, only C0 can be used, and a control character for
 550    C1 is encoded by an appropriate escape sequence to fit in the
 551    environment.  All control characters for C1 are defined the
 552    corresponding escape sequences.
 553
 554    A graphic character set is at first designated to one of four
 555    graphic registers (G0 through G3), then these graphic registers are
 556    invoked to GL or GR.  These designations and invocations can be
 557    done independently.  The most common case is that G0 is invoked to
 558    GL, G1 is invoked to GR, and ASCII is designated to G0, and usually
 559    these invocations and designations are omitted in a coded text.
 560    With 7-bit environment, only GL can be used.
 561
 562    When a graphic character set of CHARS94 is invoked to GL, code 0x20
 563    and 0x7F of GL area work as control characters SPACE and DEL
 564    respectively, and code 0xA0 and 0xFF of GR area should not be used.
 565
 566    There are two ways of invocation: locking-shift and single-shift.
 567    With locking-shift, the invocation lasts until the next different
 568    invocation, whereas with single-shift, the invocation works only
 569    for the following character and doesn't affect locking-shift.
 570    Invocations are done by the following control characters or escape
 571    sequences.
 572
 573    ----------------------------------------------------------------------
 574    function             control char    escape sequence description
 575    ----------------------------------------------------------------------
 576    SI  (shift-in)               0x0F    none            invoke G0 to GL
 577    SO  (shift-out)              0x0E    none            invoke G1 to GL
 578    LS2 (locking-shift-2)        none    ESC 'n'         invoke G2 into GL
 579    LS3 (locking-shift-3)        none    ESC 'o'         invoke G3 into GL
 580    SS2 (single-shift-2)         0x8E    ESC 'N'         invoke G2 into GL
 581    SS3 (single-shift-3)         0x8F    ESC 'O'         invoke G3 into GL
 582    ----------------------------------------------------------------------
 583    The first four are for locking-shift.  Control characters for these
 584    functions are defined by macros ISO_CODE_XXX in `coding.h'.
 585
 586    Designations are done by the following escape sequences.
 587    ----------------------------------------------------------------------
 588    escape sequence      description
 589    ----------------------------------------------------------------------
 590    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
 591    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
 592    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
 593    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
 594    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
 595    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
 596    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
 597    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
 598    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
 599    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
 600    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
 601    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
 602    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
 603    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
 604    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
 605    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
 606    ----------------------------------------------------------------------
 607
 608    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
 609    of dimension 1, chars 94, and final character <F>, and etc.
 610
 611    Note (*): Although these designations are not allowed in ISO2022,
 612    Emacs accepts them on decoding, and produces them on encoding
 613    CHARS96 character set in a coding system which is characterized as
 614    7-bit environment, non-locking-shift, and non-single-shift.
 615
 616    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
 617    '(' can be omitted.  We call this as "short-form" here after.
 618
 619    Now you may notice that there are a lot of ways for encoding the
 620    same multilingual text in ISO2022.  Actually, there exists many
 621    coding systems such as Compound Text (used in X's inter client
 622    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
 623    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
 624    localized platforms), and all of these are variants of ISO2022.
 625
 626    In addition to the above, Emacs handles two more kinds of escape
 627    sequences: ISO6429's direction specification and Emacs' private
 628    sequence for specifying character composition.
 629
 630    ISO6429's direction specification takes the following format:
 631         o CSI ']'      -- end of the current direction
 632         o CSI '0' ']'  -- end of the current direction
 633         o CSI '1' ']'  -- start of left-to-right text
 634         o CSI '2' ']'  -- start of right-to-left text
 635    The control character CSI (0x9B: control sequence introducer) is
 636    abbreviated to the escape sequence ESC '[' in 7-bit environment.
 637
 638    Character composition specification takes the following format:
 639         o ESC '0' -- start character composition
 640         o ESC '1' -- end character composition
 641    Since these are not standard escape sequences of any ISO, the use
 642    of them for these meaning is restricted to Emacs only.  */
 643
 644 enum iso_code_class_type iso_code_class[256];
 645
 646 #define CHARSET_OK(idx, charset)                        \
 647   (coding_system_table[idx]->safe_charsets[charset]     \
 648    || (CODING_SPEC_ISO_REQUESTED_DESIGNATION            \
 649        (coding_system_table[idx], charset)              \
 650        != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
 651
 652 #define SHIFT_OUT_OK(idx) \
 653   (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
 654
 655 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 656    Check if a text is encoded in ISO2022.  If it is, returns an
 657    integer in which appropriate flag bits any of:
 658         CODING_CATEGORY_MASK_ISO_7
 659         CODING_CATEGORY_MASK_ISO_7_TIGHT
 660         CODING_CATEGORY_MASK_ISO_8_1
 661         CODING_CATEGORY_MASK_ISO_8_2
 662         CODING_CATEGORY_MASK_ISO_7_ELSE
 663         CODING_CATEGORY_MASK_ISO_8_ELSE
 664    are set.  If a code which should never appear in ISO2022 is found,
 665    returns 0.  */
 666
 667 int
 668 detect_coding_iso2022 (src, src_end)
 669      unsigned char *src, *src_end;
 670 {
 671   int mask = CODING_CATEGORY_MASK_ISO;
 672   int mask_found = 0;
 673   int reg[4], shift_out = 0;
 674   int c, c1, i, charset;
 675
 676   reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
 677   while (mask && src < src_end)
 678     {
 679       c = *src++;
 680       switch (c)
 681         {
 682         case ISO_CODE_ESC:
 683           if (src >= src_end)
 684             break;
 685           c = *src++;
 686           if (c >= '(' && c <= '/')
 687             {
 688               /* Designation sequence for a charset of dimension 1.  */
 689               if (src >= src_end)
 690                 break;
 691               c1 = *src++;
 692               if (c1 < ' ' || c1 >= 0x80
 693                   || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
 694                 /* Invalid designation sequence.  Just ignore.  */
 695                 break;
 696               reg[(c - '(') % 4] = charset;
 697             }
 698           else if (c == '$')
 699             {
 700               /* Designation sequence for a charset of dimension 2.  */
 701               if (src >= src_end)
 702                 break;
 703               c = *src++;
 704               if (c >= '@' && c <= 'B')
 705                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
 706                 reg[0] = charset = iso_charset_table[1][0][c];
 707               else if (c >= '(' && c <= '/')
 708                 {
 709                   if (src >= src_end)
 710                     break;
 711                   c1 = *src++;
 712                   if (c1 < ' ' || c1 >= 0x80
 713                       || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
 714                     /* Invalid designation sequence.  Just ignore.  */
 715                     break;
 716                   reg[(c - '(') % 4] = charset;
 717                 }
 718               else
 719                 /* Invalid designation sequence.  Just ignore.  */
 720                 break;
 721             }
 722           else if (c == 'N' || c == 'n')
 723             {
 724               if (shift_out == 0
 725                   && (reg[1] >= 0
 726                       || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
 727                       || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
 728                 {
 729                   /* Locking shift out.  */
 730                   mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 731                   mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 732                   shift_out = 1;
 733                 }
 734               break;
 735             }
 736           else if (c == 'O' || c == 'o')
 737             {
 738               if (shift_out == 1)
 739                 {
 740                   /* Locking shift in.  */
 741                   mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 742                   mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 743                   shift_out = 0;
 744                 }
 745               break;
 746             }
 747           else if (c == '0' || c == '1' || c == '2')
 748             /* Start/end composition.  Just ignore.  */
 749             break;
 750           else
 751             /* Invalid escape sequence.  Just ignore.  */
 752             break;
 753
 754           /* We found a valid designation sequence for CHARSET.  */
 755           mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
 756           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset))
 757             mask_found |= CODING_CATEGORY_MASK_ISO_7;
 758           else
 759             mask &= ~CODING_CATEGORY_MASK_ISO_7;
 760           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset))
 761             mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
 762           else
 763             mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
 764           if (! CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset))
 765             mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
 766           if (! CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset))
 767             mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
 768           break;
 769
 770         case ISO_CODE_SO:
 771           if (shift_out == 0
 772               && (reg[1] >= 0
 773                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
 774                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
 775             {
 776               /* Locking shift out.  */
 777               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 778               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 779             }
 780           break;
 781
 782         case ISO_CODE_SI:
 783           if (shift_out == 1)
 784             {
 785               /* Locking shift in.  */
 786               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 787               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 788             }
 789           break;
 790
 791         case ISO_CODE_CSI:
 792         case ISO_CODE_SS2:
 793         case ISO_CODE_SS3:
 794           {
 795             int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
 796
 797             if (c != ISO_CODE_CSI)
 798               {
 799                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 800                     & CODING_FLAG_ISO_SINGLE_SHIFT)
 801                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 802                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 803                     & CODING_FLAG_ISO_SINGLE_SHIFT)
 804                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 805               }
 806             if (VECTORP (Vlatin_extra_code_table)
 807                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
 808               {
 809                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 810                     & CODING_FLAG_ISO_LATIN_EXTRA)
 811                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 812                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 813                     & CODING_FLAG_ISO_LATIN_EXTRA)
 814                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 815               }
 816             mask &= newmask;
 817             mask_found |= newmask;
 818           }
 819           break;
 820
 821         default:
 822           if (c < 0x80)
 823             break;
 824           else if (c < 0xA0)
 825             {
 826               if (VECTORP (Vlatin_extra_code_table)
 827                   && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
 828                 {
 829                   int newmask = 0;
 830
 831                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 832                       & CODING_FLAG_ISO_LATIN_EXTRA)
 833                     newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 834                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 835                       & CODING_FLAG_ISO_LATIN_EXTRA)
 836                     newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 837                   mask &= newmask;
 838                   mask_found |= newmask;
 839                 }
 840               else
 841                 return 0;
 842             }
 843           else
 844             {
 845               unsigned char *src_begin = src;
 846
 847               mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
 848                         | CODING_CATEGORY_MASK_ISO_7_ELSE);
 849               mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
 850               while (src < src_end && *src >= 0xA0)
 851                 src++;
 852               if ((src - src_begin - 1) & 1 && src < src_end)
 853                 mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
 854               else
 855                 mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
 856             }
 857           break;
 858         }
 859     }
 860
 861   return (mask & mask_found);
 862 }
 863
 864 /* Decode a character of which charset is CHARSET and the 1st position
 865    code is C1.  If dimension of CHARSET is 2, the 2nd position code is
 866    fetched from SRC and set to C2.  If CHARSET is negative, it means
 867    that we are decoding ill formed text, and what we can do is just to
 868    read C1 as is.  */
 869
 870 #define DECODE_ISO_CHARACTER(charset, c1)                               \
 871   do {                                                                  \
 872     int c_alt, charset_alt = (charset);                                 \
 873     if (COMPOSING_HEAD_P (coding->composing))                           \
 874       {                                                                 \
 875         *dst++ = LEADING_CODE_COMPOSITION;                              \
 876         if (COMPOSING_WITH_RULE_P (coding->composing))                  \
 877           /* To tell composition rules are embeded.  */                 \
 878           *dst++ = 0xFF;                                                \
 879         coding->composing += 2;                                         \
 880       }                                                                 \
 881     if ((charset) >= 0)                                                 \
 882       {                                                                 \
 883         if (CHARSET_DIMENSION (charset) == 2)                           \
 884           {                                                             \
 885             ONE_MORE_BYTE (c2);                                         \
 886             if (iso_code_class[(c2) & 0x7F] != ISO_0x20_or_0x7F         \
 887                 && iso_code_class[(c2) & 0x7F] != ISO_graphic_plane_0)  \
 888               {                                                         \
 889                 src--;                                                  \
 890                 c2 = ' ';                                               \
 891               }                                                         \
 892           }                                                             \
 893         if (!NILP (unification_table)                                   \
 894             && ((c_alt = unify_char (unification_table,                 \
 895                                      -1, (charset), c1, c2)) >= 0))     \
 896           SPLIT_CHAR (c_alt, charset_alt, c1, c2);                      \
 897       }                                                                 \
 898     if (charset_alt == CHARSET_ASCII || charset_alt < 0)                \
 899       DECODE_CHARACTER_ASCII (c1);                                      \
 900     else if (CHARSET_DIMENSION (charset_alt) == 1)                      \
 901       DECODE_CHARACTER_DIMENSION1 (charset_alt, c1);                    \
 902     else                                                                \
 903       DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2);                \
 904     if (COMPOSING_WITH_RULE_P (coding->composing))                      \
 905       /* To tell a composition rule follows.  */                        \
 906       coding->composing = COMPOSING_WITH_RULE_RULE;                     \
 907   } while (0)
 908
 909 /* Set designation state into CODING.  */
 910 #define DECODE_DESIGNATION(reg, dimension, chars, final_char)              \
 911   do {                                                                     \
 912     int charset = ISO_CHARSET_TABLE (make_number (dimension),              \
 913                                      make_number (chars),                  \
 914                                      make_number (final_char));            \
 915     if (charset >= 0                                                       \
 916         && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
 917             || coding->safe_charsets[charset]))                            \
 918       {                                                                    \
 919         if (coding->spec.iso2022.last_invalid_designation_register == 0    \
 920             && reg == 0                                                    \
 921             && charset == CHARSET_ASCII)                                   \
 922           {                                                                \
 923             /* We should insert this designation sequence as is so         \
 924                that it is surely written back to a file.  */               \
 925             coding->spec.iso2022.last_invalid_designation_register = -1;   \
 926             goto label_invalid_code;                                       \
 927           }                                                                \
 928         coding->spec.iso2022.last_invalid_designation_register = -1;       \
 929         if ((coding->mode & CODING_MODE_DIRECTION)                         \
 930             && CHARSET_REVERSE_CHARSET (charset) >= 0)                     \
 931           charset = CHARSET_REVERSE_CHARSET (charset);                     \
 932         CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;               \
 933       }                                                                    \
 934     else                                                                   \
 935       {                                                                    \
 936         coding->spec.iso2022.last_invalid_designation_register = reg;      \
 937         goto label_invalid_code;                                           \
 938       }                                                                    \
 939   } while (0)
 940
 941 /* Check if the current composing sequence contains only valid codes.
 942    If the composing sequence doesn't end before SRC_END, return -1.
 943    Else, if it contains only valid codes, return 0.
 944    Else return the length of the composing sequence.  */
 945
 946 int check_composing_code (coding, src, src_end)
 947      struct coding_system *coding;
 948      unsigned char *src, *src_end;
 949 {
 950   unsigned char *src_start = src;
 951   int invalid_code_found = 0;
 952   int charset, c, c1, dim;
 953
 954   while (src < src_end)
 955     {
 956       if (*src++ != ISO_CODE_ESC) continue;
 957       if (src >= src_end) break;
 958       if ((c = *src++) == '1') /* end of compsition */
 959         return (invalid_code_found ? src - src_start : 0);
 960       if (src + 2 >= src_end) break;
 961       if (!coding->flags & CODING_FLAG_ISO_DESIGNATION)
 962         invalid_code_found = 1;
 963       else
 964         {
 965           dim = 0;
 966           if (c == '$')
 967             {
 968               dim = 1;
 969               c = (*src >= '@' && *src <= 'B') ? '(' : *src++;
 970             }
 971           if (c >= '(' && c <= '/')
 972             {
 973               c1 = *src++;
 974               if ((c1 < ' ' || c1 >= 0x80)
 975                   || (charset = iso_charset_table[dim][c >= ','][c1]) < 0
 976                   || ! coding->safe_charsets[charset]
 977                   || (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
 978                       == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
 979                 invalid_code_found = 1;
 980             }
 981           else
 982             invalid_code_found = 1;
 983         }
 984     }
 985   return ((coding->mode & CODING_MODE_LAST_BLOCK) ? src_end - src_start : -1);
 986 }
 987
 988 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
 989
 990 int
 991 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
 992      struct coding_system *coding;
 993      unsigned char *source, *destination;
 994      int src_bytes, dst_bytes;
 995 {
 996   unsigned char *src = source;
 997   unsigned char *src_end = source + src_bytes;
 998   unsigned char *dst = destination;
 999   unsigned char *dst_end = destination + dst_bytes;
1000   /* Since the maximum bytes produced by each loop is 7, we subtract 6
1001      from DST_END to assure that overflow checking is necessary only
1002      at the head of loop.  */
1003   unsigned char *adjusted_dst_end = dst_end - 6;
1004   int charset;
1005   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
1006   int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1007   int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1008   Lisp_Object unification_table
1009     = coding->character_unification_table_for_decode;
1010   int result = CODING_FINISH_NORMAL;
1011
1012   if (!NILP (Venable_character_unification) && NILP (unification_table))
1013     unification_table = Vstandard_character_unification_table_for_decode;
1014
1015   coding->produced_char = 0;
1016   coding->fake_multibyte = 0;
1017   while (src < src_end && (dst_bytes
1018                            ? (dst < adjusted_dst_end)
1019                            : (dst < src - 6)))
1020     {
1021       /* SRC_BASE remembers the start position in source in each loop.
1022          The loop will be exited when there's not enough source text
1023          to analyze long escape sequence or 2-byte code (within macros
1024          ONE_MORE_BYTE or TWO_MORE_BYTES).  In that case, SRC is reset
1025          to SRC_BASE before exiting.  */
1026       unsigned char *src_base = src;
1027       int c1 = *src++, c2;
1028
1029       switch (iso_code_class [c1])
1030         {
1031         case ISO_0x20_or_0x7F:
1032           if (!coding->composing
1033               && (charset0 < 0 || CHARSET_CHARS (charset0) == 94))
1034             {
1035               /* This is SPACE or DEL.  */
1036               *dst++ = c1;
1037               coding->produced_char++;
1038               break;
1039             }
1040           /* This is a graphic character, we fall down ...  */
1041
1042         case ISO_graphic_plane_0:
1043           if (coding->composing == COMPOSING_WITH_RULE_RULE)
1044             {
1045               /* This is a composition rule.  */
1046               *dst++ = c1 | 0x80;
1047               coding->composing = COMPOSING_WITH_RULE_TAIL;
1048             }
1049           else
1050             DECODE_ISO_CHARACTER (charset0, c1);
1051           break;
1052
1053         case ISO_0xA0_or_0xFF:
1054           if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1055               || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1056             goto label_invalid_code;
1057           /* This is a graphic character, we fall down ... */
1058
1059         case ISO_graphic_plane_1:
1060           if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1061             goto label_invalid_code;
1062           else
1063             DECODE_ISO_CHARACTER (charset1, c1);
1064           break;
1065
1066         case ISO_control_code:
1067           /* All ISO2022 control characters in this class have the
1068              same representation in Emacs internal format.  */
1069           if (c1 == '\n'
1070               && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1071               && (coding->eol_type == CODING_EOL_CR
1072                   || coding->eol_type == CODING_EOL_CRLF))
1073             {
1074               result = CODING_FINISH_INCONSISTENT_EOL;
1075               goto label_end_of_loop_2;
1076             }
1077           *dst++ = c1;
1078           coding->produced_char++;
1079           break;
1080
1081         case ISO_carriage_return:
1082           if (coding->eol_type == CODING_EOL_CR)
1083             *dst++ = '\n';
1084           else if (coding->eol_type == CODING_EOL_CRLF)
1085             {
1086               ONE_MORE_BYTE (c1);
1087               if (c1 == ISO_CODE_LF)
1088                 *dst++ = '\n';
1089               else
1090                 {
1091                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1092                     {
1093                       result = CODING_FINISH_INCONSISTENT_EOL;
1094                       goto label_end_of_loop_2;
1095                     }
1096                   src--;
1097                   *dst++ = '\r';
1098                 }
1099             }
1100           else
1101             *dst++ = c1;
1102           coding->produced_char++;
1103           break;
1104
1105         case ISO_shift_out:
1106           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1107               || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1108             goto label_invalid_code;
1109           CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1110           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1111           break;
1112
1113         case ISO_shift_in:
1114           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1115             goto label_invalid_code;
1116           CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1117           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1118           break;
1119
1120         case ISO_single_shift_2_7:
1121         case ISO_single_shift_2:
1122           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1123             goto label_invalid_code;
1124           /* SS2 is handled as an escape sequence of ESC 'N' */
1125           c1 = 'N';
1126           goto label_escape_sequence;
1127
1128         case ISO_single_shift_3:
1129           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1130             goto label_invalid_code;
1131           /* SS2 is handled as an escape sequence of ESC 'O' */
1132           c1 = 'O';
1133           goto label_escape_sequence;
1134
1135         case ISO_control_sequence_introducer:
1136           /* CSI is handled as an escape sequence of ESC '[' ...  */
1137           c1 = '[';
1138           goto label_escape_sequence;
1139
1140         case ISO_escape:
1141           ONE_MORE_BYTE (c1);
1142         label_escape_sequence:
1143           /* Escape sequences handled by Emacs are invocation,
1144              designation, direction specification, and character
1145              composition specification.  */
1146           switch (c1)
1147             {
1148             case '&':           /* revision of following character set */
1149               ONE_MORE_BYTE (c1);
1150               if (!(c1 >= '@' && c1 <= '~'))
1151                 goto label_invalid_code;
1152               ONE_MORE_BYTE (c1);
1153               if (c1 != ISO_CODE_ESC)
1154                 goto label_invalid_code;
1155               ONE_MORE_BYTE (c1);
1156               goto label_escape_sequence;
1157
1158             case '$':           /* designation of 2-byte character set */
1159               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1160                 goto label_invalid_code;
1161               ONE_MORE_BYTE (c1);
1162               if (c1 >= '@' && c1 <= 'B')
1163                 {       /* designation of JISX0208.1978, GB2312.1980,
1164                                    or JISX0208.1980 */
1165                   DECODE_DESIGNATION (0, 2, 94, c1);
1166                 }
1167               else if (c1 >= 0x28 && c1 <= 0x2B)
1168                 {       /* designation of DIMENSION2_CHARS94 character set */
1169                   ONE_MORE_BYTE (c2);
1170                   DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
1171                 }
1172               else if (c1 >= 0x2C && c1 <= 0x2F)
1173                 {       /* designation of DIMENSION2_CHARS96 character set */
1174                   ONE_MORE_BYTE (c2);
1175                   DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
1176                 }
1177               else
1178                 goto label_invalid_code;
1179               break;
1180
1181             case 'n':           /* invocation of locking-shift-2 */
1182               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1183                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1184                 goto label_invalid_code;
1185               CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
1186               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1187               break;
1188
1189             case 'o':           /* invocation of locking-shift-3 */
1190               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1191                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1192                 goto label_invalid_code;
1193               CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
1194               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1195               break;
1196
1197             case 'N':           /* invocation of single-shift-2 */
1198               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1199                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1200                 goto label_invalid_code;
1201               ONE_MORE_BYTE (c1);
1202               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
1203               DECODE_ISO_CHARACTER (charset, c1);
1204               break;
1205
1206             case 'O':           /* invocation of single-shift-3 */
1207               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1208                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1209                 goto label_invalid_code;
1210               ONE_MORE_BYTE (c1);
1211               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
1212               DECODE_ISO_CHARACTER (charset, c1);
1213               break;
1214
1215             case '0': case '2': /* start composing */
1216               /* Before processing composing, we must be sure that all
1217                  characters being composed are supported by CODING.
1218                  If not, we must give up composing and insert the
1219                  bunch of codes for composing as is without decoding.  */
1220               {
1221                 int result1;
1222
1223                 result1 = check_composing_code (coding, src, src_end);
1224                 if (result1 == 0)
1225                   coding->composing = (c1 == '0'
1226                                        ? COMPOSING_NO_RULE_HEAD
1227                                        : COMPOSING_WITH_RULE_HEAD);
1228                 else if (result1 > 0)
1229                   {
1230                     if (result1 + 2 < (dst_bytes ? dst_end : src_base) - dst)
1231                       {
1232                         bcopy (src_base, dst, result1 + 2);
1233                         src += result1;
1234                         dst += result1 + 2;
1235                         coding->produced_char += result1 + 2;
1236                       }
1237                     else
1238                       {
1239                         result = CODING_FINISH_INSUFFICIENT_DST;
1240                         goto label_end_of_loop_2;
1241                       }
1242                   }
1243                 else
1244                   goto label_end_of_loop;
1245               }
1246               break;
1247
1248             case '1':           /* end composing */
1249               coding->composing = COMPOSING_NO;
1250               coding->produced_char++;
1251               break;
1252
1253             case '[':           /* specification of direction */
1254               if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
1255                 goto label_invalid_code;
1256               /* For the moment, nested direction is not supported.
1257                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
1258                  left-to-right, and nozero means right-to-left.  */
1259               ONE_MORE_BYTE (c1);
1260               switch (c1)
1261                 {
1262                 case ']':       /* end of the current direction */
1263                   coding->mode &= ~CODING_MODE_DIRECTION;
1264
1265                 case '0':       /* end of the current direction */
1266                 case '1':       /* start of left-to-right direction */
1267                   ONE_MORE_BYTE (c1);
1268                   if (c1 == ']')
1269                     coding->mode &= ~CODING_MODE_DIRECTION;
1270                   else
1271                     goto label_invalid_code;
1272                   break;
1273
1274                 case '2':       /* start of right-to-left direction */
1275                   ONE_MORE_BYTE (c1);
1276                   if (c1 == ']')
1277                     coding->mode |= CODING_MODE_DIRECTION;
1278                   else
1279                     goto label_invalid_code;
1280                   break;
1281
1282                 default:
1283                   goto label_invalid_code;
1284                 }
1285               break;
1286
1287             default:
1288               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1289                 goto label_invalid_code;
1290               if (c1 >= 0x28 && c1 <= 0x2B)
1291                 {       /* designation of DIMENSION1_CHARS94 character set */
1292                   ONE_MORE_BYTE (c2);
1293                   DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
1294                 }
1295               else if (c1 >= 0x2C && c1 <= 0x2F)
1296                 {       /* designation of DIMENSION1_CHARS96 character set */
1297                   ONE_MORE_BYTE (c2);
1298                   DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
1299                 }
1300               else
1301                 {
1302                   goto label_invalid_code;
1303                 }
1304             }
1305           /* We must update these variables now.  */
1306           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1307           charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1308           break;
1309
1310         label_invalid_code:
1311           while (src_base < src)
1312             *dst++ = *src_base++;
1313           coding->fake_multibyte = 1;
1314         }
1315       continue;
1316
1317     label_end_of_loop:
1318       result = CODING_FINISH_INSUFFICIENT_SRC;
1319     label_end_of_loop_2:
1320       src = src_base;
1321       break;
1322     }
1323
1324   if (src < src_end)
1325     {
1326       if (result == CODING_FINISH_NORMAL)
1327         result = CODING_FINISH_INSUFFICIENT_DST;
1328       else if (result != CODING_FINISH_INCONSISTENT_EOL
1329                && coding->mode & CODING_MODE_LAST_BLOCK)
1330         {
1331           /* This is the last block of the text to be decoded.  We had
1332              better just flush out all remaining codes in the text
1333              although they are not valid characters.  */
1334           src_bytes = src_end - src;
1335           if (dst_bytes && (dst_end - dst < src_bytes))
1336             src_bytes = dst_end - dst;
1337           bcopy (src, dst, src_bytes);
1338           dst += src_bytes;
1339           src += src_bytes;
1340           coding->fake_multibyte = 1;
1341         }
1342     }
1343
1344   coding->consumed = coding->consumed_char = src - source;
1345   coding->produced = dst - destination;
1346   return result;
1347 }
1348
1349 /* ISO2022 encoding stuff.  */
1350
1351 /*
1352    It is not enough to say just "ISO2022" on encoding, we have to
1353    specify more details.  In Emacs, each coding system of ISO2022
1354    variant has the following specifications:
1355         1. Initial designation to G0 thru G3.
1356         2. Allows short-form designation?
1357         3. ASCII should be designated to G0 before control characters?
1358         4. ASCII should be designated to G0 at end of line?
1359         5. 7-bit environment or 8-bit environment?
1360         6. Use locking-shift?
1361         7. Use Single-shift?
1362    And the following two are only for Japanese:
1363         8. Use ASCII in place of JIS0201-1976-Roman?
1364         9. Use JISX0208-1983 in place of JISX0208-1978?
1365    These specifications are encoded in `coding->flags' as flag bits
1366    defined by macros CODING_FLAG_ISO_XXX.  See `coding.h' for more
1367    details.
1368 */
1369
1370 /* Produce codes (escape sequence) for designating CHARSET to graphic
1371    register REG.  If <final-char> of CHARSET is '@', 'A', or 'B' and
1372    the coding system CODING allows, produce designation sequence of
1373    short-form.  */
1374
1375 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
1376   do {                                                                  \
1377     unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset);        \
1378     char *intermediate_char_94 = "()*+";                                \
1379     char *intermediate_char_96 = ",-./";                                \
1380     int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset);    \
1381     if (revision < 255)                                                 \
1382       {                                                                 \
1383         *dst++ = ISO_CODE_ESC;                                          \
1384         *dst++ = '&';                                                   \
1385         *dst++ = '@' + revision;                                        \
1386       }                                                                 \
1387     *dst++ = ISO_CODE_ESC;                                              \
1388     if (CHARSET_DIMENSION (charset) == 1)                               \
1389       {                                                                 \
1390         if (CHARSET_CHARS (charset) == 94)                              \
1391           *dst++ = (unsigned char) (intermediate_char_94[reg]);         \
1392         else                                                            \
1393           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1394       }                                                                 \
1395     else                                                                \
1396       {                                                                 \
1397         *dst++ = '$';                                                   \
1398         if (CHARSET_CHARS (charset) == 94)                              \
1399           {                                                             \
1400             if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM)          \
1401                 || reg != 0                                             \
1402                 || final_char < '@' || final_char > 'B')                \
1403               *dst++ = (unsigned char) (intermediate_char_94[reg]);     \
1404           }                                                             \
1405         else                                                            \
1406           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1407       }                                                                 \
1408     *dst++ = final_char;                                                \
1409     CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;                \
1410   } while (0)
1411
1412 /* The following two macros produce codes (control character or escape
1413    sequence) for ISO2022 single-shift functions (single-shift-2 and
1414    single-shift-3).  */
1415
1416 #define ENCODE_SINGLE_SHIFT_2                           \
1417   do {                                                  \
1418     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1419       *dst++ = ISO_CODE_ESC, *dst++ = 'N';              \
1420     else                                                \
1421       {                                                 \
1422         *dst++ = ISO_CODE_SS2;                          \
1423         coding->fake_multibyte = 1;                     \
1424       }                                                 \
1425     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1426   } while (0)
1427
1428 #define ENCODE_SINGLE_SHIFT_3                           \
1429   do {                                                  \
1430     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1431       *dst++ = ISO_CODE_ESC, *dst++ = 'O';              \
1432     else                                                \
1433       {                                                 \
1434         *dst++ = ISO_CODE_SS3;                          \
1435         coding->fake_multibyte = 1;                     \
1436       }                                                 \
1437     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1438   } while (0)
1439
1440 /* The following four macros produce codes (control character or
1441    escape sequence) for ISO2022 locking-shift functions (shift-in,
1442    shift-out, locking-shift-2, and locking-shift-3).  */
1443
1444 #define ENCODE_SHIFT_IN                         \
1445   do {                                          \
1446     *dst++ = ISO_CODE_SI;                       \
1447     CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
1448   } while (0)
1449
1450 #define ENCODE_SHIFT_OUT                        \
1451   do {                                          \
1452     *dst++ = ISO_CODE_SO;                       \
1453     CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
1454   } while (0)
1455
1456 #define ENCODE_LOCKING_SHIFT_2                  \
1457   do {                                          \
1458     *dst++ = ISO_CODE_ESC, *dst++ = 'n';        \
1459     CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
1460   } while (0)
1461
1462 #define ENCODE_LOCKING_SHIFT_3                  \
1463   do {                                          \
1464     *dst++ = ISO_CODE_ESC, *dst++ = 'o';        \
1465     CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
1466   } while (0)
1467
1468 /* Produce codes for a DIMENSION1 character whose character set is
1469    CHARSET and whose position-code is C1.  Designation and invocation
1470    sequences are also produced in advance if necessary.  */
1471
1472
1473 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
1474   do {                                                                  \
1475     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1476       {                                                                 \
1477         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1478           *dst++ = c1 & 0x7F;                                           \
1479         else                                                            \
1480           *dst++ = c1 | 0x80;                                           \
1481         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1482         break;                                                          \
1483       }                                                                 \
1484     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1485       {                                                                 \
1486         *dst++ = c1 & 0x7F;                                             \
1487         break;                                                          \
1488       }                                                                 \
1489     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1490       {                                                                 \
1491         *dst++ = c1 | 0x80;                                             \
1492         break;                                                          \
1493       }                                                                 \
1494     else if (coding->flags & CODING_FLAG_ISO_SAFE                       \
1495              && !coding->safe_charsets[charset])                        \
1496       {                                                                 \
1497         /* We should not encode this character, instead produce one or  \
1498            two `?'s.  */                                                \
1499         *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;                 \
1500         if (CHARSET_WIDTH (charset) == 2)                               \
1501           *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;               \
1502         break;                                                          \
1503       }                                                                 \
1504     else                                                                \
1505       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1506          must invoke it, or, at first, designate it to some graphic     \
1507          register.  Then repeat the loop to actually produce the        \
1508          character.  */                                                 \
1509       dst = encode_invocation_designation (charset, coding, dst);       \
1510   } while (1)
1511
1512 /* Produce codes for a DIMENSION2 character whose character set is
1513    CHARSET and whose position-codes are C1 and C2.  Designation and
1514    invocation codes are also produced in advance if necessary.  */
1515
1516 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
1517   do {                                                                  \
1518     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1519       {                                                                 \
1520         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1521           *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F;                       \
1522         else                                                            \
1523           *dst++ = c1 | 0x80, *dst++ = c2 | 0x80;                       \
1524         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1525         break;                                                          \
1526       }                                                                 \
1527     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1528       {                                                                 \
1529         *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F;                          \
1530         break;                                                          \
1531       }                                                                 \
1532     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1533       {                                                                 \
1534         *dst++ = c1 | 0x80, *dst++= c2 | 0x80;                          \
1535         break;                                                          \
1536       }                                                                 \
1537     else if (coding->flags & CODING_FLAG_ISO_SAFE                       \
1538              && !coding->safe_charsets[charset])                        \
1539       {                                                                 \
1540         /* We should not encode this character, instead produce one or  \
1541            two `?'s.  */                                                \
1542         *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;                 \
1543         if (CHARSET_WIDTH (charset) == 2)                               \
1544           *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;               \
1545         break;                                                          \
1546       }                                                                 \
1547     else                                                                \
1548       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1549          must invoke it, or, at first, designate it to some graphic     \
1550          register.  Then repeat the loop to actually produce the        \
1551          character.  */                                                 \
1552       dst = encode_invocation_designation (charset, coding, dst);       \
1553   } while (1)
1554
1555 #define ENCODE_ISO_CHARACTER(charset, c1, c2)                             \
1556   do {                                                                    \
1557     int c_alt, charset_alt;                                               \
1558     if (!NILP (unification_table)                                         \
1559         && ((c_alt = unify_char (unification_table, -1, charset, c1, c2)) \
1560             >= 0))                                                        \
1561       SPLIT_CHAR (c_alt, charset_alt, c1, c2);                            \
1562     else                                                                  \
1563       charset_alt = charset;                                              \
1564     if (CHARSET_DIMENSION (charset_alt) == 1)                             \
1565       {                                                                   \
1566         if (charset == CHARSET_ASCII                                      \
1567             && coding->flags & CODING_FLAG_ISO_USE_ROMAN)                 \
1568           charset_alt = charset_latin_jisx0201;                           \
1569         ENCODE_ISO_CHARACTER_DIMENSION1 (charset_alt, c1);                \
1570       }                                                                   \
1571     else                                                                  \
1572       {                                                                   \
1573         if (charset == charset_jisx0208                                   \
1574             && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)                \
1575           charset_alt = charset_jisx0208_1978;                            \
1576         ENCODE_ISO_CHARACTER_DIMENSION2 (charset_alt, c1, c2);            \
1577       }                                                                   \
1578     if (! COMPOSING_P (coding->composing))                                \
1579       coding->consumed_char++;                                            \
1580      } while (0)
1581
1582 /* Produce designation and invocation codes at a place pointed by DST
1583    to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
1584    Return new DST.  */
1585
1586 unsigned char *
1587 encode_invocation_designation (charset, coding, dst)
1588      int charset;
1589      struct coding_system *coding;
1590      unsigned char *dst;
1591 {
1592   int reg;                      /* graphic register number */
1593
1594   /* At first, check designations.  */
1595   for (reg = 0; reg < 4; reg++)
1596     if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
1597       break;
1598
1599   if (reg >= 4)
1600     {
1601       /* CHARSET is not yet designated to any graphic registers.  */
1602       /* At first check the requested designation.  */
1603       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1604       if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
1605         /* Since CHARSET requests no special designation, designate it
1606            to graphic register 0.  */
1607         reg = 0;
1608
1609       ENCODE_DESIGNATION (charset, reg, coding);
1610     }
1611
1612   if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
1613       && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
1614     {
1615       /* Since the graphic register REG is not invoked to any graphic
1616          planes, invoke it to graphic plane 0.  */
1617       switch (reg)
1618         {
1619         case 0:                 /* graphic register 0 */
1620           ENCODE_SHIFT_IN;
1621           break;
1622
1623         case 1:                 /* graphic register 1 */
1624           ENCODE_SHIFT_OUT;
1625           break;
1626
1627         case 2:                 /* graphic register 2 */
1628           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1629             ENCODE_SINGLE_SHIFT_2;
1630           else
1631             ENCODE_LOCKING_SHIFT_2;
1632           break;
1633
1634         case 3:                 /* graphic register 3 */
1635           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1636             ENCODE_SINGLE_SHIFT_3;
1637           else
1638             ENCODE_LOCKING_SHIFT_3;
1639           break;
1640         }
1641     }
1642   return dst;
1643 }
1644
1645 /* The following two macros produce codes for indicating composition.  */
1646 #define ENCODE_COMPOSITION_NO_RULE_START  *dst++ = ISO_CODE_ESC, *dst++ = '0'
1647 #define ENCODE_COMPOSITION_WITH_RULE_START  *dst++ = ISO_CODE_ESC, *dst++ = '2'
1648 #define ENCODE_COMPOSITION_END    *dst++ = ISO_CODE_ESC, *dst++ = '1'
1649
1650 /* The following three macros produce codes for indicating direction
1651    of text.  */
1652 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER              \
1653   do {                                                  \
1654     if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS)    \
1655       *dst++ = ISO_CODE_ESC, *dst++ = '[';              \
1656     else                                                \
1657       *dst++ = ISO_CODE_CSI;                            \
1658   } while (0)
1659
1660 #define ENCODE_DIRECTION_R2L    \
1661   ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '2', *dst++ = ']'
1662
1663 #define ENCODE_DIRECTION_L2R    \
1664   ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '0', *dst++ = ']'
1665
1666 /* Produce codes for designation and invocation to reset the graphic
1667    planes and registers to initial state.  */
1668 #define ENCODE_RESET_PLANE_AND_REGISTER                                     \
1669   do {                                                                      \
1670     int reg;                                                                \
1671     if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                        \
1672       ENCODE_SHIFT_IN;                                                      \
1673     for (reg = 0; reg < 4; reg++)                                           \
1674       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0            \
1675           && (CODING_SPEC_ISO_DESIGNATION (coding, reg)                     \
1676               != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)))        \
1677         ENCODE_DESIGNATION                                                  \
1678           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
1679   } while (0)
1680
1681 /* Produce designation sequences of charsets in the line started from
1682    SRC to a place pointed by *DSTP, and update DSTP.
1683
1684    If the current block ends before any end-of-line, we may fail to
1685    find all the necessary designations.  */
1686
1687 void
1688 encode_designation_at_bol (coding, table, src, src_end, dstp)
1689      struct coding_system *coding;
1690      Lisp_Object table;
1691      unsigned char *src, *src_end, **dstp;
1692 {
1693   int charset, c, found = 0, reg;
1694   /* Table of charsets to be designated to each graphic register.  */
1695   int r[4];
1696   unsigned char *dst = *dstp;
1697
1698   for (reg = 0; reg < 4; reg++)
1699     r[reg] = -1;
1700
1701   while (src < src_end && *src != '\n' && found < 4)
1702     {
1703       int bytes = BYTES_BY_CHAR_HEAD (*src);
1704
1705       if (NILP (table))
1706         charset = CHARSET_AT (src);
1707       else
1708         {
1709           int c_alt;
1710           unsigned char c1, c2;
1711
1712           SPLIT_STRING(src, bytes, charset, c1, c2);
1713           if ((c_alt = unify_char (table, -1, charset, c1, c2)) >= 0)
1714             charset = CHAR_CHARSET (c_alt);
1715         }
1716
1717       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1718       if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
1719         {
1720           found++;
1721           r[reg] = charset;
1722         }
1723
1724       src += bytes;
1725     }
1726
1727   if (found)
1728     {
1729       for (reg = 0; reg < 4; reg++)
1730         if (r[reg] >= 0
1731             && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
1732           ENCODE_DESIGNATION (r[reg], reg, coding);
1733       *dstp = dst;
1734     }
1735 }
1736
1737 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
1738
1739 int
1740 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1741      struct coding_system *coding;
1742      unsigned char *source, *destination;
1743      int src_bytes, dst_bytes;
1744 {
1745   unsigned char *src = source;
1746   unsigned char *src_end = source + src_bytes;
1747   unsigned char *dst = destination;
1748   unsigned char *dst_end = destination + dst_bytes;
1749   /* Since the maximum bytes produced by each loop is 20, we subtract 19
1750      from DST_END to assure overflow checking is necessary only at the
1751      head of loop.  */
1752   unsigned char *adjusted_dst_end = dst_end - 19;
1753   Lisp_Object unification_table
1754       = coding->character_unification_table_for_encode;
1755   int result = CODING_FINISH_NORMAL;
1756
1757   if (!NILP (Venable_character_unification) && NILP (unification_table))
1758     unification_table = Vstandard_character_unification_table_for_encode;
1759
1760   coding->consumed_char = 0;
1761   coding->fake_multibyte = 0;
1762   while (src < src_end && (dst_bytes
1763                            ? (dst < adjusted_dst_end)
1764                            : (dst < src - 19)))
1765     {
1766       /* SRC_BASE remembers the start position in source in each loop.
1767          The loop will be exited when there's not enough source text
1768          to analyze multi-byte codes (within macros ONE_MORE_BYTE,
1769          TWO_MORE_BYTES, and THREE_MORE_BYTES).  In that case, SRC is
1770          reset to SRC_BASE before exiting.  */
1771       unsigned char *src_base = src;
1772       int charset, c1, c2, c3, c4;
1773
1774       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
1775           && CODING_SPEC_ISO_BOL (coding))
1776         {
1777           /* We have to produce designation sequences if any now.  */
1778           encode_designation_at_bol (coding, unification_table,
1779                                      src, src_end, &dst);
1780           CODING_SPEC_ISO_BOL (coding) = 0;
1781         }
1782
1783       c1 = *src++;
1784       /* If we are seeing a component of a composite character, we are
1785          seeing a leading-code encoded irregularly for composition, or
1786          a composition rule if composing with rule.  We must set C1 to
1787          a normal leading-code or an ASCII code.  If we are not seeing
1788          a composite character, we must reset composition,
1789          designation, and invocation states.  */
1790       if (COMPOSING_P (coding->composing))
1791         {
1792           if (c1 < 0xA0)
1793             {
1794               /* We are not in a composite character any longer.  */
1795               coding->composing = COMPOSING_NO;
1796               ENCODE_RESET_PLANE_AND_REGISTER;
1797               ENCODE_COMPOSITION_END;
1798             }
1799           else
1800             {
1801               if (coding->composing == COMPOSING_WITH_RULE_RULE)
1802                 {
1803                   *dst++ = c1 & 0x7F;
1804                   coding->composing = COMPOSING_WITH_RULE_HEAD;
1805                   continue;
1806                 }
1807               else if (coding->composing == COMPOSING_WITH_RULE_HEAD)
1808                 coding->composing = COMPOSING_WITH_RULE_RULE;
1809               if (c1 == 0xA0)
1810                 {
1811                   /* This is an ASCII component.  */
1812                   ONE_MORE_BYTE (c1);
1813                   c1 &= 0x7F;
1814                 }
1815               else
1816                 /* This is a leading-code of non ASCII component.  */
1817                 c1 -= 0x20;
1818             }
1819         }
1820
1821       /* Now encode one character.  C1 is a control character, an
1822          ASCII character, or a leading-code of multi-byte character.  */
1823       switch (emacs_code_class[c1])
1824         {
1825         case EMACS_ascii_code:
1826           ENCODE_ISO_CHARACTER (CHARSET_ASCII, c1, /* dummy */ c2);
1827           break;
1828
1829         case EMACS_control_code:
1830           if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
1831             ENCODE_RESET_PLANE_AND_REGISTER;
1832           *dst++ = c1;
1833           coding->consumed_char++;
1834           break;
1835
1836         case EMACS_carriage_return_code:
1837           if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
1838             {
1839               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
1840                 ENCODE_RESET_PLANE_AND_REGISTER;
1841               *dst++ = c1;
1842               coding->consumed_char++;
1843               break;
1844             }
1845           /* fall down to treat '\r' as '\n' ...  */
1846
1847         case EMACS_linefeed_code:
1848           if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
1849             ENCODE_RESET_PLANE_AND_REGISTER;
1850           if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
1851             bcopy (coding->spec.iso2022.initial_designation,
1852                    coding->spec.iso2022.current_designation,
1853                    sizeof coding->spec.iso2022.initial_designation);
1854           if (coding->eol_type == CODING_EOL_LF
1855               || coding->eol_type == CODING_EOL_UNDECIDED)
1856             *dst++ = ISO_CODE_LF;
1857           else if (coding->eol_type == CODING_EOL_CRLF)
1858             *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
1859           else
1860             *dst++ = ISO_CODE_CR;
1861           CODING_SPEC_ISO_BOL (coding) = 1;
1862           coding->consumed_char++;
1863           break;
1864
1865         case EMACS_leading_code_2:
1866           ONE_MORE_BYTE (c2);
1867           if (c2 < 0xA0)
1868             {
1869               /* invalid sequence */
1870               *dst++ = c1;
1871               src--;
1872               coding->consumed_char++;
1873             }
1874           else
1875             ENCODE_ISO_CHARACTER (c1, c2, /* dummy */ c3);
1876           break;
1877
1878         case EMACS_leading_code_3:
1879           TWO_MORE_BYTES (c2, c3);
1880           if (c2 < 0xA0 || c3 < 0xA0)
1881             {
1882               /* invalid sequence */
1883               *dst++ = c1;
1884               src -= 2;
1885               coding->consumed_char++;
1886             }
1887           else if (c1 < LEADING_CODE_PRIVATE_11)
1888             ENCODE_ISO_CHARACTER (c1, c2, c3);
1889           else
1890             ENCODE_ISO_CHARACTER (c2, c3, /* dummy */ c4);
1891           break;
1892
1893         case EMACS_leading_code_4:
1894           THREE_MORE_BYTES (c2, c3, c4);
1895           if (c2 < 0xA0 || c3 < 0xA0 || c4 < 0xA0)
1896             {
1897               /* invalid sequence */
1898               *dst++ = c1;
1899               src -= 3;
1900               coding->consumed_char++;
1901             }
1902           else
1903             ENCODE_ISO_CHARACTER (c2, c3, c4);
1904           break;
1905
1906         case EMACS_leading_code_composition:
1907           ONE_MORE_BYTE (c2);
1908           if (c2 < 0xA0)
1909             {
1910               /* invalid sequence */
1911               *dst++ = c1;
1912               src--;
1913               coding->consumed_char++;
1914             }
1915           else if (c2 == 0xFF)
1916             {
1917               ENCODE_RESET_PLANE_AND_REGISTER;
1918               coding->composing = COMPOSING_WITH_RULE_HEAD;
1919               ENCODE_COMPOSITION_WITH_RULE_START;
1920               coding->consumed_char++;
1921             }
1922           else
1923             {
1924               ENCODE_RESET_PLANE_AND_REGISTER;
1925               /* Rewind one byte because it is a character code of
1926                  composition elements.  */
1927               src--;
1928               coding->composing = COMPOSING_NO_RULE_HEAD;
1929               ENCODE_COMPOSITION_NO_RULE_START;
1930               coding->consumed_char++;
1931             }
1932           break;
1933
1934         case EMACS_invalid_code:
1935           *dst++ = c1;
1936           coding->consumed_char++;
1937           break;
1938         }
1939       continue;
1940     label_end_of_loop:
1941       result = CODING_FINISH_INSUFFICIENT_SRC;
1942       src = src_base;
1943       break;
1944     }
1945
1946   if (src < src_end && result == CODING_FINISH_NORMAL)
1947     result = CODING_FINISH_INSUFFICIENT_DST;
1948
1949   /* If this is the last block of the text to be encoded, we must
1950      reset graphic planes and registers to the initial state, and
1951      flush out the carryover if any.  */
1952   if (coding->mode & CODING_MODE_LAST_BLOCK)
1953     ENCODE_RESET_PLANE_AND_REGISTER;
1954
1955   coding->consumed = src - source;
1956   coding->produced = coding->produced_char = dst - destination;
1957   return result;
1958 }
1959
1960 \f
1961 /*** 4. SJIS and BIG5 handlers ***/
1962
1963 /* Although SJIS and BIG5 are not ISO's coding system, they are used
1964    quite widely.  So, for the moment, Emacs supports them in the bare
1965    C code.  But, in the future, they may be supported only by CCL.  */
1966
1967 /* SJIS is a coding system encoding three character sets: ASCII, right
1968    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
1969    as is.  A character of charset katakana-jisx0201 is encoded by
1970    "position-code + 0x80".  A character of charset japanese-jisx0208
1971    is encoded in 2-byte but two position-codes are divided and shifted
1972    so that it fit in the range below.
1973
1974    --- CODE RANGE of SJIS ---
1975    (character set)      (range)
1976    ASCII                0x00 .. 0x7F
1977    KATAKANA-JISX0201    0xA0 .. 0xDF
1978    JISX0208 (1st byte)  0x80 .. 0x9F and 0xE0 .. 0xFF
1979             (2nd byte)  0x40 .. 0xFF
1980    -------------------------------
1981
1982 */
1983
1984 /* BIG5 is a coding system encoding two character sets: ASCII and
1985    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
1986    character set and is encoded in two-byte.
1987
1988    --- CODE RANGE of BIG5 ---
1989    (character set)      (range)
1990    ASCII                0x00 .. 0x7F
1991    Big5 (1st byte)      0xA1 .. 0xFE
1992         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
1993    --------------------------
1994
1995    Since the number of characters in Big5 is larger than maximum
1996    characters in Emacs' charset (96x96), it can't be handled as one
1997    charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
1998    and `charset-big5-2'.  Both are DIMENSION2 and CHARS94.  The former
1999    contains frequently used characters and the latter contains less
2000    frequently used characters.  */
2001
2002 /* Macros to decode or encode a character of Big5 in BIG5.  B1 and B2
2003    are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2004    C1 and C2 are the 1st and 2nd position-codes of of Emacs' internal
2005    format.  CHARSET is `charset_big5_1' or `charset_big5_2'.  */
2006
2007 /* Number of Big5 characters which have the same code in 1st byte.  */
2008 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2009
2010 #define DECODE_BIG5(b1, b2, charset, c1, c2)                            \
2011   do {                                                                  \
2012     unsigned int temp                                                   \
2013       = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62);   \
2014     if (b1 < 0xC9)                                                      \
2015       charset = charset_big5_1;                                         \
2016     else                                                                \
2017       {                                                                 \
2018         charset = charset_big5_2;                                       \
2019         temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW;                          \
2020       }                                                                 \
2021     c1 = temp / (0xFF - 0xA1) + 0x21;                                   \
2022     c2 = temp % (0xFF - 0xA1) + 0x21;                                   \
2023   } while (0)
2024
2025 #define ENCODE_BIG5(charset, c1, c2, b1, b2)                            \
2026   do {                                                                  \
2027     unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21);      \
2028     if (charset == charset_big5_2)                                      \
2029       temp += BIG5_SAME_ROW * (0xC9 - 0xA1);                            \
2030     b1 = temp / BIG5_SAME_ROW + 0xA1;                                   \
2031     b2 = temp % BIG5_SAME_ROW;                                          \
2032     b2 += b2 < 0x3F ? 0x40 : 0x62;                                      \
2033   } while (0)
2034
2035 #define DECODE_SJIS_BIG5_CHARACTER(charset, c1, c2)                     \
2036   do {                                                                  \
2037     int c_alt, charset_alt = (charset);                                 \
2038     if (!NILP (unification_table)                                       \
2039         && ((c_alt = unify_char (unification_table,                     \
2040                                  -1, (charset), c1, c2)) >= 0))         \
2041           SPLIT_CHAR (c_alt, charset_alt, c1, c2);                      \
2042     if (charset_alt == CHARSET_ASCII || charset_alt < 0)                \
2043       DECODE_CHARACTER_ASCII (c1);                                      \
2044     else if (CHARSET_DIMENSION (charset_alt) == 1)                      \
2045       DECODE_CHARACTER_DIMENSION1 (charset_alt, c1);                    \
2046     else                                                                \
2047       DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2);                \
2048   } while (0)
2049
2050 #define ENCODE_SJIS_BIG5_CHARACTER(charset, c1, c2)                       \
2051   do {                                                                    \
2052     int c_alt, charset_alt;                                               \
2053     if (!NILP (unification_table)                                         \
2054         && ((c_alt = unify_char (unification_table, -1, charset, c1, c2)) \
2055             >= 0))                                                        \
2056       SPLIT_CHAR (c_alt, charset_alt, c1, c2);                            \
2057     else                                                                  \
2058       charset_alt = charset;                                              \
2059     if (charset_alt == charset_ascii)                                     \
2060       *dst++ = c1;                                                        \
2061     else if (CHARSET_DIMENSION (charset_alt) == 1)                        \
2062       {                                                                   \
2063         if (sjis_p && charset_alt == charset_katakana_jisx0201)           \
2064           *dst++ = c1;                                                    \
2065         else                                                              \
2066           {                                                               \
2067             *dst++ = charset_alt, *dst++ = c1;                            \
2068             coding->fake_multibyte = 1;                                   \
2069           }                                                               \
2070       }                                                                   \
2071     else                                                                  \
2072       {                                                                   \
2073         c1 &= 0x7F, c2 &= 0x7F;                                           \
2074         if (sjis_p && charset_alt == charset_jisx0208)                    \
2075           {                                                               \
2076             unsigned char s1, s2;                                         \
2077                                                                           \
2078             ENCODE_SJIS (c1, c2, s1, s2);                                 \
2079             *dst++ = s1, *dst++ = s2;                                     \
2080             coding->fake_multibyte = 1;                                   \
2081           }                                                               \
2082         else if (!sjis_p                                                  \
2083                  && (charset_alt == charset_big5_1                        \
2084                      || charset_alt == charset_big5_2))                   \
2085           {                                                               \
2086             unsigned char b1, b2;                                         \
2087                                                                           \
2088             ENCODE_BIG5 (charset_alt, c1, c2, b1, b2);                    \
2089             *dst++ = b1, *dst++ = b2;                                     \
2090           }                                                               \
2091         else                                                              \
2092           {                                                               \
2093             *dst++ = charset_alt, *dst++ = c1, *dst++ = c2;               \
2094             coding->fake_multibyte = 1;                                   \
2095           }                                                               \
2096       }                                                                   \
2097     coding->consumed_char++;                                              \
2098   } while (0);
2099
2100 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2101    Check if a text is encoded in SJIS.  If it is, return
2102    CODING_CATEGORY_MASK_SJIS, else return 0.  */
2103
2104 int
2105 detect_coding_sjis (src, src_end)
2106      unsigned char *src, *src_end;
2107 {
2108   unsigned char c;
2109
2110   while (src < src_end)
2111     {
2112       c = *src++;
2113       if ((c >= 0x80 && c < 0xA0) || c >= 0xE0)
2114         {
2115           if (src < src_end && *src++ < 0x40)
2116             return 0;
2117         }
2118     }
2119   return CODING_CATEGORY_MASK_SJIS;
2120 }
2121
2122 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2123    Check if a text is encoded in BIG5.  If it is, return
2124    CODING_CATEGORY_MASK_BIG5, else return 0.  */
2125
2126 int
2127 detect_coding_big5 (src, src_end)
2128      unsigned char *src, *src_end;
2129 {
2130   unsigned char c;
2131
2132   while (src < src_end)
2133     {
2134       c = *src++;
2135       if (c >= 0xA1)
2136         {
2137           if (src >= src_end)
2138             break;
2139           c = *src++;
2140           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
2141             return 0;
2142         }
2143     }
2144   return CODING_CATEGORY_MASK_BIG5;
2145 }
2146
2147 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2148    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
2149
2150 int
2151 decode_coding_sjis_big5 (coding, source, destination,
2152                          src_bytes, dst_bytes, sjis_p)
2153      struct coding_system *coding;
2154      unsigned char *source, *destination;
2155      int src_bytes, dst_bytes;
2156      int sjis_p;
2157 {
2158   unsigned char *src = source;
2159   unsigned char *src_end = source + src_bytes;
2160   unsigned char *dst = destination;
2161   unsigned char *dst_end = destination + dst_bytes;
2162   /* Since the maximum bytes produced by each loop is 4, we subtract 3
2163      from DST_END to assure overflow checking is necessary only at the
2164      head of loop.  */
2165   unsigned char *adjusted_dst_end = dst_end - 3;
2166   Lisp_Object unification_table
2167       = coding->character_unification_table_for_decode;
2168   int result = CODING_FINISH_NORMAL;
2169
2170   if (!NILP (Venable_character_unification) && NILP (unification_table))
2171     unification_table = Vstandard_character_unification_table_for_decode;
2172
2173   coding->produced_char = 0;
2174   coding->fake_multibyte = 0;
2175   while (src < src_end && (dst_bytes
2176                            ? (dst < adjusted_dst_end)
2177                            : (dst < src - 3)))
2178     {
2179       /* SRC_BASE remembers the start position in source in each loop.
2180          The loop will be exited when there's not enough source text
2181          to analyze two-byte character (within macro ONE_MORE_BYTE).
2182          In that case, SRC is reset to SRC_BASE before exiting.  */
2183       unsigned char *src_base = src;
2184       unsigned char c1 = *src++, c2, c3, c4;
2185
2186       if (c1 < 0x20)
2187         {
2188           if (c1 == '\r')
2189             {
2190               if (coding->eol_type == CODING_EOL_CRLF)
2191                 {
2192                   ONE_MORE_BYTE (c2);
2193                   if (c2 == '\n')
2194                     *dst++ = c2;
2195                   else if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2196                     {
2197                       result = CODING_FINISH_INCONSISTENT_EOL;
2198                       goto label_end_of_loop_2;
2199                     }
2200                   else
2201                     /* To process C2 again, SRC is subtracted by 1.  */
2202                     *dst++ = c1, src--;
2203                 }
2204               else if (coding->eol_type == CODING_EOL_CR)
2205                 *dst++ = '\n';
2206               else
2207                 *dst++ = c1;
2208             }
2209           else if (c1 == '\n'
2210                    && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2211                    && (coding->eol_type == CODING_EOL_CR
2212                        || coding->eol_type == CODING_EOL_CRLF))
2213             {
2214               result = CODING_FINISH_INCONSISTENT_EOL;
2215               goto label_end_of_loop_2;
2216             }
2217           else
2218             *dst++ = c1;
2219           coding->produced_char++;
2220         }
2221       else if (c1 < 0x80)
2222         DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
2223       else if (c1 < 0xA0)
2224         {
2225           /* SJIS -> JISX0208 */
2226           if (sjis_p)
2227             {
2228               ONE_MORE_BYTE (c2);
2229               if (c2 >= 0x40)
2230                 {
2231                   DECODE_SJIS (c1, c2, c3, c4);
2232                   DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
2233                 }
2234               else
2235                 goto label_invalid_code_2;
2236             }
2237           else
2238             goto label_invalid_code_1;
2239         }
2240       else if (c1 < 0xE0)
2241         {
2242           /* SJIS -> JISX0201-Kana, BIG5 -> Big5 */
2243           if (sjis_p)
2244             DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1,
2245                                         /* dummy */ c2);
2246           else
2247             {
2248               int charset;
2249
2250               ONE_MORE_BYTE (c2);
2251               if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE))
2252                 {
2253                   DECODE_BIG5 (c1, c2, charset, c3, c4);
2254                   DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
2255                 }
2256               else
2257                 goto label_invalid_code_2;
2258             }
2259         }
2260       else                      /* C1 >= 0xE0 */
2261         {
2262           /* SJIS -> JISX0208, BIG5 -> Big5 */
2263           if (sjis_p)
2264             {
2265               ONE_MORE_BYTE (c2);
2266               if (c2 >= 0x40)
2267                 {
2268                   DECODE_SJIS (c1, c2, c3, c4);
2269                   DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
2270                 }
2271               else
2272                 goto label_invalid_code_2;
2273             }
2274           else
2275             {
2276               int charset;
2277
2278               ONE_MORE_BYTE (c2);
2279               if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE))
2280                 {
2281                   DECODE_BIG5 (c1, c2, charset, c3, c4);
2282                   DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
2283                 }
2284               else
2285                 goto label_invalid_code_2;
2286             }
2287         }
2288       continue;
2289
2290     label_invalid_code_1:
2291       *dst++ = c1;
2292       coding->produced_char++;
2293       coding->fake_multibyte = 1;
2294       continue;
2295
2296     label_invalid_code_2:
2297       *dst++ = c1; *dst++= c2;
2298       coding->produced_char += 2;
2299       coding->fake_multibyte = 1;
2300       continue;
2301
2302     label_end_of_loop:
2303       result = CODING_FINISH_INSUFFICIENT_SRC;
2304     label_end_of_loop_2:
2305       src = src_base;
2306       break;
2307     }
2308
2309   if (src < src_end)
2310     {
2311       if (result == CODING_FINISH_NORMAL)
2312         result = CODING_FINISH_INSUFFICIENT_DST;
2313       else if (result != CODING_FINISH_INCONSISTENT_EOL
2314                && coding->mode & CODING_MODE_LAST_BLOCK)
2315         {
2316           src_bytes = src_end - src;
2317           if (dst_bytes && (dst_end - dst < src_bytes))
2318             src_bytes = dst_end - dst;
2319           bcopy (dst, src, src_bytes);
2320           src += src_bytes;
2321           dst += src_bytes;
2322           coding->fake_multibyte = 1;
2323         }
2324     }
2325
2326   coding->consumed = coding->consumed_char = src - source;
2327   coding->produced = dst - destination;
2328   return result;
2329 }
2330
2331 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
2332    This function can encode `charset_ascii', `charset_katakana_jisx0201',
2333    `charset_jisx0208', `charset_big5_1', and `charset_big5-2'.  We are
2334    sure that all these charsets are registered as official charset
2335    (i.e. do not have extended leading-codes).  Characters of other
2336    charsets are produced without any encoding.  If SJIS_P is 1, encode
2337    SJIS text, else encode BIG5 text.  */
2338
2339 int
2340 encode_coding_sjis_big5 (coding, source, destination,
2341                          src_bytes, dst_bytes, sjis_p)
2342      struct coding_system *coding;
2343      unsigned char *source, *destination;
2344      int src_bytes, dst_bytes;
2345      int sjis_p;
2346 {
2347   unsigned char *src = source;
2348   unsigned char *src_end = source + src_bytes;
2349   unsigned char *dst = destination;
2350   unsigned char *dst_end = destination + dst_bytes;
2351   /* Since the maximum bytes produced by each loop is 2, we subtract 1
2352      from DST_END to assure overflow checking is necessary only at the
2353      head of loop.  */
2354   unsigned char *adjusted_dst_end = dst_end - 1;
2355   Lisp_Object unification_table
2356       = coding->character_unification_table_for_encode;
2357   int result = CODING_FINISH_NORMAL;
2358
2359   if (!NILP (Venable_character_unification) && NILP (unification_table))
2360     unification_table = Vstandard_character_unification_table_for_encode;
2361
2362   coding->consumed_char = 0;
2363   coding->fake_multibyte = 0;
2364   while (src < src_end && (dst_bytes
2365                            ? (dst < adjusted_dst_end)
2366                            : (dst < src - 1)))
2367     {
2368       /* SRC_BASE remembers the start position in source in each loop.
2369          The loop will be exited when there's not enough source text
2370          to analyze multi-byte codes (within macros ONE_MORE_BYTE and
2371          TWO_MORE_BYTES).  In that case, SRC is reset to SRC_BASE
2372          before exiting.  */
2373       unsigned char *src_base = src;
2374       unsigned char c1 = *src++, c2, c3, c4;
2375
2376       if (coding->composing)
2377         {
2378           if (c1 == 0xA0)
2379             {
2380               ONE_MORE_BYTE (c1);
2381               c1 &= 0x7F;
2382             }
2383           else if (c1 >= 0xA0)
2384             c1 -= 0x20;
2385           else
2386             coding->composing = 0;
2387         }
2388
2389       switch (emacs_code_class[c1])
2390         {
2391         case EMACS_ascii_code:
2392           ENCODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
2393           break;
2394
2395         case EMACS_control_code:
2396           *dst++ = c1;
2397           coding->consumed_char++;
2398           break;
2399
2400         case EMACS_carriage_return_code:
2401           if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2402             {
2403               *dst++ = c1;
2404               coding->consumed_char++;
2405               break;
2406             }
2407           /* fall down to treat '\r' as '\n' ...  */
2408
2409         case EMACS_linefeed_code:
2410           if (coding->eol_type == CODING_EOL_LF
2411               || coding->eol_type == CODING_EOL_UNDECIDED)
2412             *dst++ = '\n';
2413           else if (coding->eol_type == CODING_EOL_CRLF)
2414             *dst++ = '\r', *dst++ = '\n';
2415           else
2416             *dst++ = '\r';
2417           coding->consumed_char++;
2418           break;
2419
2420         case EMACS_leading_code_2:
2421           ONE_MORE_BYTE (c2);
2422           ENCODE_SJIS_BIG5_CHARACTER (c1, c2, /* dummy */ c3);
2423           break;
2424
2425         case EMACS_leading_code_3:
2426           TWO_MORE_BYTES (c2, c3);
2427           ENCODE_SJIS_BIG5_CHARACTER (c1, c2, c3);
2428           break;
2429
2430         case EMACS_leading_code_4:
2431           THREE_MORE_BYTES (c2, c3, c4);
2432           ENCODE_SJIS_BIG5_CHARACTER (c2, c3, c4);
2433           break;
2434
2435         case EMACS_leading_code_composition:
2436           coding->composing = 1;
2437           break;
2438
2439         default:                /* i.e. case EMACS_invalid_code: */
2440           *dst++ = c1;
2441           coding->consumed_char++;
2442         }
2443       continue;
2444
2445     label_end_of_loop:
2446       result = CODING_FINISH_INSUFFICIENT_SRC;
2447       src = src_base;
2448       break;
2449     }
2450
2451   if (result == CODING_FINISH_NORMAL
2452       && src < src_end)
2453     result = CODING_FINISH_INSUFFICIENT_DST;
2454   coding->consumed = src - source;
2455   coding->produced = coding->produced_char = dst - destination;
2456   return result;
2457 }
2458
2459 \f
2460 /*** 5. End-of-line handlers ***/
2461
2462 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2463    This function is called only when `coding->eol_type' is
2464    CODING_EOL_CRLF or CODING_EOL_CR.  */
2465
2466 int
2467 decode_eol (coding, source, destination, src_bytes, dst_bytes)
2468      struct coding_system *coding;
2469      unsigned char *source, *destination;
2470      int src_bytes, dst_bytes;
2471 {
2472   unsigned char *src = source;
2473   unsigned char *src_end = source + src_bytes;
2474   unsigned char *dst = destination;
2475   unsigned char *dst_end = destination + dst_bytes;
2476   unsigned char c;
2477   int result = CODING_FINISH_NORMAL;
2478
2479   coding->fake_multibyte = 0;
2480
2481   if (src_bytes <= 0)
2482     return result;
2483
2484   switch (coding->eol_type)
2485     {
2486     case CODING_EOL_CRLF:
2487       {
2488         /* Since the maximum bytes produced by each loop is 2, we
2489            subtract 1 from DST_END to assure overflow checking is
2490            necessary only at the head of loop.  */
2491         unsigned char *adjusted_dst_end = dst_end - 1;
2492
2493         while (src < src_end && (dst_bytes
2494                                  ? (dst < adjusted_dst_end)
2495                                  : (dst < src - 1)))
2496           {
2497             unsigned char *src_base = src;
2498
2499             c = *src++;
2500             if (c == '\r')
2501               {
2502                 ONE_MORE_BYTE (c);
2503                 if (c != '\n')
2504                   {
2505                     if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2506                       {
2507                         result = CODING_FINISH_INCONSISTENT_EOL;
2508                         goto label_end_of_loop_2;
2509                       }
2510                     *dst++ = '\r';
2511                     if (BASE_LEADING_CODE_P (c))
2512                       coding->fake_multibyte = 1;
2513                   }
2514                 *dst++ = c;
2515               }
2516             else if (c == '\n'
2517                      && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
2518               {
2519                 result = CODING_FINISH_INCONSISTENT_EOL;
2520                 goto label_end_of_loop_2;
2521               }
2522             else
2523               {
2524                 *dst++ = c;
2525                 if (BASE_LEADING_CODE_P (c))
2526                   coding->fake_multibyte = 1;
2527               }
2528             continue;
2529
2530           label_end_of_loop:
2531             result = CODING_FINISH_INSUFFICIENT_SRC;
2532           label_end_of_loop_2:
2533             src = src_base;
2534             break;
2535           }
2536         if (result == CODING_FINISH_NORMAL
2537             && src < src_end)
2538           result = CODING_FINISH_INSUFFICIENT_DST;
2539       }
2540       break;
2541
2542     case CODING_EOL_CR:
2543       if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2544         {
2545           while (src < src_end)
2546             {
2547               if ((c = *src++) == '\n')
2548                 break;
2549               if (BASE_LEADING_CODE_P (c))
2550                 coding->fake_multibyte = 1;
2551             }
2552           if (*--src == '\n')
2553             {
2554               src_bytes = src - source;
2555               result = CODING_FINISH_INCONSISTENT_EOL;
2556             }
2557         }
2558       if (dst_bytes && src_bytes > dst_bytes)
2559         {
2560           result = CODING_FINISH_INSUFFICIENT_DST;
2561           src_bytes = dst_bytes;
2562         }
2563       if (dst_bytes)
2564         bcopy (source, destination, src_bytes);
2565       else
2566         safe_bcopy (source, destination, src_bytes);
2567       src = source + src_bytes;
2568       while (src_bytes--) if (*dst++ == '\r') dst[-1] = '\n';
2569       break;
2570
2571     default:                    /* i.e. case: CODING_EOL_LF */
2572       if (dst_bytes && src_bytes > dst_bytes)
2573         {
2574           result = CODING_FINISH_INSUFFICIENT_DST;
2575           src_bytes = dst_bytes;
2576         }
2577       if (dst_bytes)
2578         bcopy (source, destination, src_bytes);
2579       else
2580         safe_bcopy (source, destination, src_bytes);
2581       src += src_bytes;
2582       dst += dst_bytes;
2583       coding->fake_multibyte = 1;
2584       break;
2585     }
2586
2587   coding->consumed = coding->consumed_char = src - source;
2588   coding->produced = coding->produced_char = dst - destination;
2589   return result;
2590 }
2591
2592 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  Encode
2593    format of end-of-line according to `coding->eol_type'.  If
2594    `coding->mode & CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code
2595    '\r' in source text also means end-of-line.  */
2596
2597 int
2598 encode_eol (coding, source, destination, src_bytes, dst_bytes)
2599      struct coding_system *coding;
2600      unsigned char *source, *destination;
2601      int src_bytes, dst_bytes;
2602 {
2603   unsigned char *src = source;
2604   unsigned char *dst = destination;
2605   int result = CODING_FINISH_NORMAL;
2606
2607   coding->fake_multibyte = 0;
2608
2609   if (coding->eol_type == CODING_EOL_CRLF)
2610     {
2611       unsigned char c;
2612       unsigned char *src_end = source + src_bytes;
2613       unsigned char *dst_end = destination + dst_bytes;
2614       /* Since the maximum bytes produced by each loop is 2, we
2615          subtract 1 from DST_END to assure overflow checking is
2616          necessary only at the head of loop.  */
2617       unsigned char *adjusted_dst_end = dst_end - 1;
2618
2619       while (src < src_end && (dst_bytes
2620                                ? (dst < adjusted_dst_end)
2621                                : (dst < src - 1)))
2622         {
2623           c = *src++;
2624           if (c == '\n'
2625               || (c == '\r' && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)))
2626             *dst++ = '\r', *dst++ = '\n';
2627           else
2628             {
2629               *dst++ = c;
2630               if (BASE_LEADING_CODE_P (c))
2631                 coding->fake_multibyte = 1;
2632             }
2633         }
2634       if (src < src_end)
2635         result = CODING_FINISH_INSUFFICIENT_DST;
2636     }
2637   else
2638     {
2639       unsigned char c;
2640
2641       if (dst_bytes && src_bytes > dst_bytes)
2642         {
2643           src_bytes = dst_bytes;
2644           result = CODING_FINISH_INSUFFICIENT_DST;
2645         }
2646       if (dst_bytes)
2647         bcopy (source, destination, src_bytes);
2648       else
2649         {
2650           safe_bcopy (source, destination, src_bytes);
2651           dst_bytes = src_bytes;
2652         }
2653       if (coding->eol_type == CODING_EOL_CRLF)
2654         {
2655           while (src_bytes--)
2656             {
2657               if ((c = *dst++) == '\n')
2658                 dst[-1] = '\r';
2659               else if (BASE_LEADING_CODE_P (c))
2660                   coding->fake_multibyte = 1;
2661             }
2662         }
2663       else
2664         {
2665           if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
2666             {
2667               while (src_bytes--)
2668                 if (*dst++ == '\r') dst[-1] = '\n';
2669             }
2670           coding->fake_multibyte = 1;
2671         }
2672       src = source + dst_bytes;
2673       dst = destination + dst_bytes;
2674     }
2675
2676   coding->consumed = coding->consumed_char = src - source;
2677   coding->produced = coding->produced_char = dst - destination;
2678   return result;
2679 }
2680
2681 \f
2682 /*** 6. C library functions ***/
2683
2684 /* In Emacs Lisp, coding system is represented by a Lisp symbol which
2685    has a property `coding-system'.  The value of this property is a
2686    vector of length 5 (called as coding-vector).  Among elements of
2687    this vector, the first (element[0]) and the fifth (element[4])
2688    carry important information for decoding/encoding.  Before
2689    decoding/encoding, this information should be set in fields of a
2690    structure of type `coding_system'.
2691
2692    A value of property `coding-system' can be a symbol of another
2693    subsidiary coding-system.  In that case, Emacs gets coding-vector
2694    from that symbol.
2695
2696    `element[0]' contains information to be set in `coding->type'.  The
2697    value and its meaning is as follows:
2698
2699    0 -- coding_type_emacs_mule
2700    1 -- coding_type_sjis
2701    2 -- coding_type_iso2022
2702    3 -- coding_type_big5
2703    4 -- coding_type_ccl encoder/decoder written in CCL
2704    nil -- coding_type_no_conversion
2705    t -- coding_type_undecided (automatic conversion on decoding,
2706                                no-conversion on encoding)
2707
2708    `element[4]' contains information to be set in `coding->flags' and
2709    `coding->spec'.  The meaning varies by `coding->type'.
2710
2711    If `coding->type' is `coding_type_iso2022', element[4] is a vector
2712    of length 32 (of which the first 13 sub-elements are used now).
2713    Meanings of these sub-elements are:
2714
2715    sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
2716         If the value is an integer of valid charset, the charset is
2717         assumed to be designated to graphic register N initially.
2718
2719         If the value is minus, it is a minus value of charset which
2720         reserves graphic register N, which means that the charset is
2721         not designated initially but should be designated to graphic
2722         register N just before encoding a character in that charset.
2723
2724         If the value is nil, graphic register N is never used on
2725         encoding.
2726
2727    sub-element[N] where N is 4 through 11: to be set in `coding->flags'
2728         Each value takes t or nil.  See the section ISO2022 of
2729         `coding.h' for more information.
2730
2731    If `coding->type' is `coding_type_big5', element[4] is t to denote
2732    BIG5-ETen or nil to denote BIG5-HKU.
2733
2734    If `coding->type' takes the other value, element[4] is ignored.
2735
2736    Emacs Lisp's coding system also carries information about format of
2737    end-of-line in a value of property `eol-type'.  If the value is
2738    integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
2739    means CODING_EOL_CR.  If it is not integer, it should be a vector
2740    of subsidiary coding systems of which property `eol-type' has one
2741    of above values.
2742
2743 */
2744
2745 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
2746    and set it in CODING.  If CODING_SYSTEM_SYMBOL is invalid, CODING
2747    is setup so that no conversion is necessary and return -1, else
2748    return 0.  */
2749
2750 int
2751 setup_coding_system (coding_system, coding)
2752      Lisp_Object coding_system;
2753      struct coding_system *coding;
2754 {
2755   Lisp_Object coding_spec, coding_type, eol_type, plist;
2756   Lisp_Object val;
2757   int i;
2758
2759   /* Initialize some fields required for all kinds of coding systems.  */
2760   coding->symbol = coding_system;
2761   coding->common_flags = 0;
2762   coding->mode = 0;
2763   coding->heading_ascii = -1;
2764   coding->post_read_conversion = coding->pre_write_conversion = Qnil;
2765   coding_spec = Fget (coding_system, Qcoding_system);
2766   if (!VECTORP (coding_spec)
2767       || XVECTOR (coding_spec)->size != 5
2768       || !CONSP (XVECTOR (coding_spec)->contents[3]))
2769     goto label_invalid_coding_system;
2770
2771   eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
2772   if (VECTORP (eol_type))
2773     {
2774       coding->eol_type = CODING_EOL_UNDECIDED;
2775       coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
2776     }
2777   else if (XFASTINT (eol_type) == 1)
2778     {
2779       coding->eol_type = CODING_EOL_CRLF;
2780       coding->common_flags
2781         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2782     }
2783   else if (XFASTINT (eol_type) == 2)
2784     {
2785       coding->eol_type = CODING_EOL_CR;
2786       coding->common_flags
2787         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2788     }
2789   else
2790     coding->eol_type = CODING_EOL_LF;
2791
2792   coding_type = XVECTOR (coding_spec)->contents[0];
2793   /* Try short cut.  */
2794   if (SYMBOLP (coding_type))
2795     {
2796       if (EQ (coding_type, Qt))
2797         {
2798           coding->type = coding_type_undecided;
2799           coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
2800         }
2801       else
2802         coding->type = coding_type_no_conversion;
2803       return 0;
2804     }
2805
2806   /* Initialize remaining fields.  */
2807   coding->composing = 0;
2808   coding->character_unification_table_for_decode = Qnil;
2809   coding->character_unification_table_for_encode = Qnil;
2810
2811   /* Get values of coding system properties:
2812      `post-read-conversion', `pre-write-conversion',
2813      `character-unification-table-for-decode',
2814      `character-unification-table-for-encode'.  */
2815   plist = XVECTOR (coding_spec)->contents[3];
2816   coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
2817   coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
2818   val = Fplist_get (plist, Qcharacter_unification_table_for_decode);
2819   if (SYMBOLP (val))
2820     val = Fget (val, Qcharacter_unification_table_for_decode);
2821   coding->character_unification_table_for_decode
2822     = CHAR_TABLE_P (val) ? val : Qnil;
2823   val = Fplist_get (plist, Qcharacter_unification_table_for_encode);
2824   if (SYMBOLP (val))
2825     val = Fget (val, Qcharacter_unification_table_for_encode);
2826   coding->character_unification_table_for_encode
2827     = CHAR_TABLE_P (val) ? val : Qnil;
2828   val = Fplist_get (plist, Qcoding_category);
2829   if (!NILP (val))
2830     {
2831       val = Fget (val, Qcoding_category_index);
2832       if (INTEGERP (val))
2833         coding->category_idx = XINT (val);
2834       else
2835         goto label_invalid_coding_system;
2836     }
2837   else
2838     goto label_invalid_coding_system;
2839
2840   val = Fplist_get (plist, Qsafe_charsets);
2841   if (EQ (val, Qt))
2842     {
2843       for (i = 0; i <= MAX_CHARSET; i++)
2844         coding->safe_charsets[i] = 1;
2845     }
2846   else
2847     {
2848       bzero (coding->safe_charsets, MAX_CHARSET + 1);
2849       while (CONSP (val))
2850         {
2851           if ((i = get_charset_id (XCONS (val)->car)) >= 0)
2852             coding->safe_charsets[i] = 1;
2853           val = XCONS (val)->cdr;
2854         }
2855     }
2856
2857   switch (XFASTINT (coding_type))
2858     {
2859     case 0:
2860       coding->type = coding_type_emacs_mule;
2861       if (!NILP (coding->post_read_conversion))
2862         coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
2863       if (!NILP (coding->pre_write_conversion))
2864         coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
2865       break;
2866
2867     case 1:
2868       coding->type = coding_type_sjis;
2869       coding->common_flags
2870         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2871       break;
2872
2873     case 2:
2874       coding->type = coding_type_iso2022;
2875       coding->common_flags
2876         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2877       {
2878         Lisp_Object val, temp;
2879         Lisp_Object *flags;
2880         int i, charset, reg_bits = 0;
2881
2882         val = XVECTOR (coding_spec)->contents[4];
2883
2884         if (!VECTORP (val) || XVECTOR (val)->size != 32)
2885           goto label_invalid_coding_system;
2886
2887         flags = XVECTOR (val)->contents;
2888         coding->flags
2889           = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
2890              | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
2891              | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
2892              | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
2893              | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
2894              | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
2895              | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
2896              | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
2897              | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
2898              | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
2899              | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
2900              | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
2901              | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
2902              );
2903
2904         /* Invoke graphic register 0 to plane 0.  */
2905         CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
2906         /* Invoke graphic register 1 to plane 1 if we can use full 8-bit.  */
2907         CODING_SPEC_ISO_INVOCATION (coding, 1)
2908           = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
2909         /* Not single shifting at first.  */
2910         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
2911         /* Beginning of buffer should also be regarded as bol. */
2912         CODING_SPEC_ISO_BOL (coding) = 1;
2913
2914         for (charset = 0; charset <= MAX_CHARSET; charset++)
2915           CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
2916         val = Vcharset_revision_alist;
2917         while (CONSP (val))
2918           {
2919             charset = get_charset_id (Fcar_safe (XCONS (val)->car));
2920             if (charset >= 0
2921                 && (temp = Fcdr_safe (XCONS (val)->car), INTEGERP (temp))
2922                 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
2923               CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
2924             val = XCONS (val)->cdr;
2925           }
2926
2927         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
2928            FLAGS[REG] can be one of below:
2929                 integer CHARSET: CHARSET occupies register I,
2930                 t: designate nothing to REG initially, but can be used
2931                   by any charsets,
2932                 list of integer, nil, or t: designate the first
2933                   element (if integer) to REG initially, the remaining
2934                   elements (if integer) is designated to REG on request,
2935                   if an element is t, REG can be used by any charsets,
2936                 nil: REG is never used.  */
2937         for (charset = 0; charset <= MAX_CHARSET; charset++)
2938           CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
2939             = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
2940         for (i = 0; i < 4; i++)
2941           {
2942             if (INTEGERP (flags[i])
2943                 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))
2944                 || (charset = get_charset_id (flags[i])) >= 0)
2945               {
2946                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
2947                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
2948               }
2949             else if (EQ (flags[i], Qt))
2950               {
2951                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2952                 reg_bits |= 1 << i;
2953                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
2954               }
2955             else if (CONSP (flags[i]))
2956               {
2957                 Lisp_Object tail = flags[i];
2958
2959                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
2960                 if (INTEGERP (XCONS (tail)->car)
2961                     && (charset = XINT (XCONS (tail)->car),
2962                         CHARSET_VALID_P (charset))
2963                     || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
2964                   {
2965                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
2966                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
2967                   }
2968                 else
2969                   CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2970                 tail = XCONS (tail)->cdr;
2971                 while (CONSP (tail))
2972                   {
2973                     if (INTEGERP (XCONS (tail)->car)
2974                         && (charset = XINT (XCONS (tail)->car),
2975                             CHARSET_VALID_P (charset))
2976                         || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
2977                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
2978                         = i;
2979                     else if (EQ (XCONS (tail)->car, Qt))
2980                       reg_bits |= 1 << i;
2981                     tail = XCONS (tail)->cdr;
2982                   }
2983               }
2984             else
2985               CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2986
2987             CODING_SPEC_ISO_DESIGNATION (coding, i)
2988               = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
2989           }
2990
2991         if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
2992           {
2993             /* REG 1 can be used only by locking shift in 7-bit env.  */
2994             if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
2995               reg_bits &= ~2;
2996             if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
2997               /* Without any shifting, only REG 0 and 1 can be used.  */
2998               reg_bits &= 3;
2999           }
3000
3001         if (reg_bits)
3002           for (charset = 0; charset <= MAX_CHARSET; charset++)
3003             {
3004               if (CHARSET_VALID_P (charset))
3005                 {
3006                   /* There exist some default graphic registers to be
3007                      used CHARSET.  */
3008
3009                   /* We had better avoid designating a charset of
3010                      CHARS96 to REG 0 as far as possible.  */
3011                   if (CHARSET_CHARS (charset) == 96)
3012                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3013                       = (reg_bits & 2
3014                          ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
3015                   else
3016                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3017                       = (reg_bits & 1
3018                          ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
3019                 }
3020             }
3021       }
3022       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3023       coding->spec.iso2022.last_invalid_designation_register = -1;
3024       break;
3025
3026     case 3:
3027       coding->type = coding_type_big5;
3028       coding->common_flags
3029         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3030       coding->flags
3031         = (NILP (XVECTOR (coding_spec)->contents[4])
3032            ? CODING_FLAG_BIG5_HKU
3033            : CODING_FLAG_BIG5_ETEN);
3034       break;
3035
3036     case 4:
3037       coding->type = coding_type_ccl;
3038       coding->common_flags
3039         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3040       {
3041         Lisp_Object val = XVECTOR (coding_spec)->contents[4];
3042         Lisp_Object decoder, encoder;
3043
3044         if (CONSP  (val)
3045             && SYMBOLP (XCONS (val)->car)
3046             && !NILP (decoder = Fget (XCONS (val)->car, Qccl_program_idx))
3047             && !NILP (decoder = Fcdr (Faref (Vccl_program_table, decoder)))
3048             && SYMBOLP (XCONS (val)->cdr)
3049             && !NILP (encoder = Fget (XCONS (val)->cdr, Qccl_program_idx))
3050             && !NILP (encoder = Fcdr (Faref (Vccl_program_table, encoder))))
3051           {
3052             setup_ccl_program (&(coding->spec.ccl.decoder), decoder);
3053             setup_ccl_program (&(coding->spec.ccl.encoder), encoder);
3054           }
3055         else
3056           goto label_invalid_coding_system;
3057       }
3058       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3059       break;
3060
3061     case 5:
3062       coding->type = coding_type_raw_text;
3063       break;
3064
3065     default:
3066       goto label_invalid_coding_system;
3067     }
3068   return 0;
3069
3070  label_invalid_coding_system:
3071   coding->type = coding_type_no_conversion;
3072   coding->category_idx = CODING_CATEGORY_IDX_BINARY;
3073   coding->common_flags = 0;
3074   coding->eol_type = CODING_EOL_LF;
3075   coding->pre_write_conversion = coding->post_read_conversion = Qnil;
3076   return -1;
3077 }
3078
3079 /* Emacs has a mechanism to automatically detect a coding system if it
3080    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
3081    it's impossible to distinguish some coding systems accurately
3082    because they use the same range of codes.  So, at first, coding
3083    systems are categorized into 7, those are:
3084
3085    o coding-category-emacs-mule
3086
3087         The category for a coding system which has the same code range
3088         as Emacs' internal format.  Assigned the coding-system (Lisp
3089         symbol) `emacs-mule' by default.
3090
3091    o coding-category-sjis
3092
3093         The category for a coding system which has the same code range
3094         as SJIS.  Assigned the coding-system (Lisp
3095         symbol) `japanese-shift-jis' by default.
3096
3097    o coding-category-iso-7
3098
3099         The category for a coding system which has the same code range
3100         as ISO2022 of 7-bit environment.  This doesn't use any locking
3101         shift and single shift functions.  This can encode/decode all
3102         charsets.  Assigned the coding-system (Lisp symbol)
3103         `iso-2022-7bit' by default.
3104
3105    o coding-category-iso-7-tight
3106
3107         Same as coding-category-iso-7 except that this can
3108         encode/decode only the specified charsets.
3109
3110    o coding-category-iso-8-1
3111
3112         The category for a coding system which has the same code range
3113         as ISO2022 of 8-bit environment and graphic plane 1 used only
3114         for DIMENSION1 charset.  This doesn't use any locking shift
3115         and single shift functions.  Assigned the coding-system (Lisp
3116         symbol) `iso-latin-1' by default.
3117
3118    o coding-category-iso-8-2
3119
3120         The category for a coding system which has the same code range
3121         as ISO2022 of 8-bit environment and graphic plane 1 used only
3122         for DIMENSION2 charset.  This doesn't use any locking shift
3123         and single shift functions.  Assigned the coding-system (Lisp
3124         symbol) `japanese-iso-8bit' by default.
3125
3126    o coding-category-iso-7-else
3127
3128         The category for a coding system which has the same code range
3129         as ISO2022 of 7-bit environemnt but uses locking shift or
3130         single shift functions.  Assigned the coding-system (Lisp
3131         symbol) `iso-2022-7bit-lock' by default.
3132
3133    o coding-category-iso-8-else
3134
3135         The category for a coding system which has the same code range
3136         as ISO2022 of 8-bit environemnt but uses locking shift or
3137         single shift functions.  Assigned the coding-system (Lisp
3138         symbol) `iso-2022-8bit-ss2' by default.
3139
3140    o coding-category-big5
3141
3142         The category for a coding system which has the same code range
3143         as BIG5.  Assigned the coding-system (Lisp symbol)
3144         `cn-big5' by default.
3145
3146    o coding-category-binary
3147
3148         The category for a coding system not categorized in any of the
3149         above.  Assigned the coding-system (Lisp symbol)
3150         `no-conversion' by default.
3151
3152    Each of them is a Lisp symbol and the value is an actual
3153    `coding-system's (this is also a Lisp symbol) assigned by a user.
3154    What Emacs does actually is to detect a category of coding system.
3155    Then, it uses a `coding-system' assigned to it.  If Emacs can't
3156    decide only one possible category, it selects a category of the
3157    highest priority.  Priorities of categories are also specified by a
3158    user in a Lisp variable `coding-category-list'.
3159
3160 */
3161
3162 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
3163    If it detects possible coding systems, return an integer in which
3164    appropriate flag bits are set.  Flag bits are defined by macros
3165    CODING_CATEGORY_MASK_XXX in `coding.h'.
3166
3167    How many ASCII characters are at the head is returned as *SKIP.  */
3168
3169 static int
3170 detect_coding_mask (source, src_bytes, priorities, skip)
3171      unsigned char *source;
3172      int src_bytes, *priorities, *skip;
3173 {
3174   register unsigned char c;
3175   unsigned char *src = source, *src_end = source + src_bytes;
3176   unsigned int mask = (CODING_CATEGORY_MASK_ISO_7BIT
3177                        | CODING_CATEGORY_MASK_ISO_SHIFT);
3178   int i;
3179
3180   /* At first, skip all ASCII characters and control characters except
3181      for three ISO2022 specific control characters.  */
3182  label_loop_detect_coding:
3183   while (src < src_end)
3184     {
3185       c = *src;
3186       if (c >= 0x80
3187           || ((mask & CODING_CATEGORY_MASK_ISO_7BIT)
3188               && c == ISO_CODE_ESC)
3189           || ((mask & CODING_CATEGORY_MASK_ISO_SHIFT)
3190               && (c == ISO_CODE_SI || c == ISO_CODE_SO)))
3191         break;
3192       src++;
3193     }
3194   *skip = src - source;
3195
3196   if (src >= src_end)
3197     /* We found nothing other than ASCII.  There's nothing to do.  */
3198     return 0;
3199
3200   /* The text seems to be encoded in some multilingual coding system.
3201      Now, try to find in which coding system the text is encoded.  */
3202   if (c < 0x80)
3203     {
3204       /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
3205       /* C is an ISO2022 specific control code of C0.  */
3206       mask = detect_coding_iso2022 (src, src_end);
3207       if (mask == 0)
3208         {
3209           /* No valid ISO2022 code follows C.  Try again.  */
3210           src++;
3211           mask = (c != ISO_CODE_ESC
3212                   ? CODING_CATEGORY_MASK_ISO_7BIT
3213                   : CODING_CATEGORY_MASK_ISO_SHIFT);
3214           goto label_loop_detect_coding;
3215         }
3216       if (priorities)
3217         goto label_return_highest_only;
3218     }
3219   else
3220     {
3221       int try;
3222
3223       if (c < 0xA0)
3224         {
3225           /* C is the first byte of SJIS character code,
3226              or a leading-code of Emacs' internal format (emacs-mule).  */
3227           try = CODING_CATEGORY_MASK_SJIS | CODING_CATEGORY_MASK_EMACS_MULE;
3228
3229           /* Or, if C is a special latin extra code,
3230              or is an ISO2022 specific control code of C1 (SS2 or SS3),
3231              or is an ISO2022 control-sequence-introducer (CSI),
3232              we should also consider the possibility of ISO2022 codings.  */
3233           if ((VECTORP (Vlatin_extra_code_table)
3234                && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3235               || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
3236               || (c == ISO_CODE_CSI
3237                   && (src < src_end
3238                       && (*src == ']'
3239                           || ((*src == '0' || *src == '1' || *src == '2')
3240                               && src + 1 < src_end
3241                               && src[1] == ']')))))
3242             try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
3243                      | CODING_CATEGORY_MASK_ISO_8BIT);
3244         }
3245       else
3246         /* C is a character of ISO2022 in graphic plane right,
3247            or a SJIS's 1-byte character code (i.e. JISX0201),
3248            or the first byte of BIG5's 2-byte code.  */
3249         try = (CODING_CATEGORY_MASK_ISO_8_ELSE
3250                 | CODING_CATEGORY_MASK_ISO_8BIT
3251                 | CODING_CATEGORY_MASK_SJIS
3252                 | CODING_CATEGORY_MASK_BIG5);
3253
3254       mask = 0;
3255       if (priorities)
3256         {
3257           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3258             {
3259               if (priorities[i] & try & CODING_CATEGORY_MASK_ISO)
3260                 mask = detect_coding_iso2022 (src, src_end);
3261               else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
3262                 mask = detect_coding_sjis (src, src_end);
3263               else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
3264                 mask = detect_coding_big5 (src, src_end);
3265               else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
3266                 mask = detect_coding_emacs_mule (src, src_end);
3267               else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
3268                 mask = CODING_CATEGORY_MASK_RAW_TEXT;
3269               else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
3270                 mask = CODING_CATEGORY_MASK_BINARY;
3271               if (mask)
3272                 goto label_return_highest_only;
3273             }
3274           return CODING_CATEGORY_MASK_RAW_TEXT;
3275         }
3276       if (try & CODING_CATEGORY_MASK_ISO)
3277         mask |= detect_coding_iso2022 (src, src_end);
3278       if (try & CODING_CATEGORY_MASK_SJIS)
3279         mask |= detect_coding_sjis (src, src_end);
3280       if (try & CODING_CATEGORY_MASK_BIG5)
3281         mask |= detect_coding_big5 (src, src_end);
3282       if (try & CODING_CATEGORY_MASK_EMACS_MULE)
3283         mask |= detect_coding_emacs_mule (src, src_end);
3284     }
3285   return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
3286
3287  label_return_highest_only:
3288   for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3289     {
3290       if (mask & priorities[i])
3291         return priorities[i];
3292     }
3293   return CODING_CATEGORY_MASK_RAW_TEXT;
3294 }
3295
3296 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
3297    The information of the detected coding system is set in CODING.  */
3298
3299 void
3300 detect_coding (coding, src, src_bytes)
3301      struct coding_system *coding;
3302      unsigned char *src;
3303      int src_bytes;
3304 {
3305   unsigned int idx;
3306   int skip, mask, i;
3307   int priorities[CODING_CATEGORY_IDX_MAX];
3308   Lisp_Object val = Vcoding_category_list;
3309
3310   i = 0;
3311   while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
3312     {
3313       if (! SYMBOLP (XCONS (val)->car))
3314         break;
3315       idx = XFASTINT (Fget (XCONS (val)->car, Qcoding_category_index));
3316       if (idx >= CODING_CATEGORY_IDX_MAX)
3317         break;
3318       priorities[i++] = (1 << idx);
3319       val = XCONS (val)->cdr;
3320     }
3321   /* If coding-category-list is valid and contains all coding
3322      categories, `i' should be CODING_CATEGORY_IDX_MAX now.  If not,
3323      the following code saves Emacs from craching.  */
3324   while (i < CODING_CATEGORY_IDX_MAX)
3325     priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
3326
3327   mask = detect_coding_mask (src, src_bytes, priorities, &skip);
3328   coding->heading_ascii = skip;
3329
3330   if (!mask) return;
3331
3332   /* We found a single coding system of the highest priority in MASK.  */
3333   idx = 0;
3334   while (mask && ! (mask & 1)) mask >>= 1, idx++;
3335   if (! mask)
3336     idx = CODING_CATEGORY_IDX_RAW_TEXT;
3337
3338   val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[idx])->value;
3339
3340   if (coding->eol_type != CODING_EOL_UNDECIDED)
3341     {
3342       Lisp_Object tmp = Fget (val, Qeol_type);
3343
3344       if (VECTORP (tmp))
3345         val = XVECTOR (tmp)->contents[coding->eol_type];
3346     }
3347   setup_coding_system (val, coding);
3348   /* Set this again because setup_coding_system reset this member.  */
3349   coding->heading_ascii = skip;
3350 }
3351
3352 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
3353    SOURCE is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
3354    CODING_EOL_CR, and CODING_EOL_UNDECIDED.
3355
3356    How many non-eol characters are at the head is returned as *SKIP.  */
3357
3358 #define MAX_EOL_CHECK_COUNT 3
3359
3360 static int
3361 detect_eol_type (source, src_bytes, skip)
3362      unsigned char *source;
3363      int src_bytes, *skip;
3364 {
3365   unsigned char *src = source, *src_end = src + src_bytes;
3366   unsigned char c;
3367   int total = 0;                /* How many end-of-lines are found so far.  */
3368   int eol_type = CODING_EOL_UNDECIDED;
3369   int this_eol_type;
3370
3371   *skip = 0;
3372
3373   while (src < src_end && total < MAX_EOL_CHECK_COUNT)
3374     {
3375       c = *src++;
3376       if (c == '\n' || c == '\r')
3377         {
3378           if (*skip == 0)
3379             *skip = src - 1 - source;
3380           total++;
3381           if (c == '\n')
3382             this_eol_type = CODING_EOL_LF;
3383           else if (src >= src_end || *src != '\n')
3384             this_eol_type = CODING_EOL_CR;
3385           else
3386             this_eol_type = CODING_EOL_CRLF, src++;
3387
3388           if (eol_type == CODING_EOL_UNDECIDED)
3389             /* This is the first end-of-line.  */
3390             eol_type = this_eol_type;
3391           else if (eol_type != this_eol_type)
3392             {
3393               /* The found type is different from what found before.  */
3394               eol_type = CODING_EOL_INCONSISTENT;
3395               break;
3396             }
3397         }
3398     }
3399
3400   if (*skip == 0)
3401     *skip = src_end - source;
3402   return eol_type;
3403 }
3404
3405 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
3406    is encoded.  If it detects an appropriate format of end-of-line, it
3407    sets the information in *CODING.  */
3408
3409 void
3410 detect_eol (coding, src, src_bytes)
3411      struct coding_system *coding;
3412      unsigned char *src;
3413      int src_bytes;
3414 {
3415   Lisp_Object val;
3416   int skip;
3417   int eol_type = detect_eol_type (src, src_bytes, &skip);
3418
3419   if (coding->heading_ascii > skip)
3420     coding->heading_ascii = skip;
3421   else
3422     skip = coding->heading_ascii;
3423
3424   if (eol_type == CODING_EOL_UNDECIDED)
3425     return;
3426   if (eol_type == CODING_EOL_INCONSISTENT)
3427     {
3428 #if 0
3429       /* This code is suppressed until we find a better way to
3430          distinguish raw text file and binary file.  */
3431
3432       /* If we have already detected that the coding is raw-text, the
3433          coding should actually be no-conversion.  */
3434       if (coding->type == coding_type_raw_text)
3435         {
3436           setup_coding_system (Qno_conversion, coding);
3437           return;
3438         }
3439       /* Else, let's decode only text code anyway.  */
3440 #endif /* 0 */
3441       eol_type = CODING_EOL_LF;
3442     }
3443
3444   val = Fget (coding->symbol, Qeol_type);
3445   if (VECTORP (val) && XVECTOR (val)->size == 3)
3446     {
3447       setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
3448       coding->heading_ascii = skip;
3449     }
3450 }
3451
3452 #define CONVERSION_BUFFER_EXTRA_ROOM 256
3453
3454 #define DECODING_BUFFER_MAG(coding)                                          \
3455   (coding->type == coding_type_iso2022                                       \
3456    ? 3                                                                       \
3457    : ((coding->type == coding_type_sjis || coding->type == coding_type_big5) \
3458       ? 2                                                                    \
3459       : (coding->type == coding_type_raw_text                                \
3460          ? 1                                                                 \
3461          : (coding->type == coding_type_ccl                                  \
3462             ? coding->spec.ccl.decoder.buf_magnification                     \
3463             : 2))))
3464
3465 /* Return maximum size (bytes) of a buffer enough for decoding
3466    SRC_BYTES of text encoded in CODING.  */
3467
3468 int
3469 decoding_buffer_size (coding, src_bytes)
3470      struct coding_system *coding;
3471      int src_bytes;
3472 {
3473   return (src_bytes * DECODING_BUFFER_MAG (coding)
3474           + CONVERSION_BUFFER_EXTRA_ROOM);
3475 }
3476
3477 /* Return maximum size (bytes) of a buffer enough for encoding
3478    SRC_BYTES of text to CODING.  */
3479
3480 int
3481 encoding_buffer_size (coding, src_bytes)
3482      struct coding_system *coding;
3483      int src_bytes;
3484 {
3485   int magnification;
3486
3487   if (coding->type == coding_type_ccl)
3488     magnification = coding->spec.ccl.encoder.buf_magnification;
3489   else
3490     magnification = 3;
3491
3492   return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
3493 }
3494
3495 #ifndef MINIMUM_CONVERSION_BUFFER_SIZE
3496 #define MINIMUM_CONVERSION_BUFFER_SIZE 1024
3497 #endif
3498
3499 char *conversion_buffer;
3500 int conversion_buffer_size;
3501
3502 /* Return a pointer to a SIZE bytes of buffer to be used for encoding
3503    or decoding.  Sufficient memory is allocated automatically.  If we
3504    run out of memory, return NULL.  */
3505
3506 char *
3507 get_conversion_buffer (size)
3508      int size;
3509 {
3510   if (size > conversion_buffer_size)
3511     {
3512       char *buf;
3513       int real_size = conversion_buffer_size * 2;
3514
3515       while (real_size < size) real_size *= 2;
3516       buf = (char *) xmalloc (real_size);
3517       xfree (conversion_buffer);
3518       conversion_buffer = buf;
3519       conversion_buffer_size = real_size;
3520     }
3521   return conversion_buffer;
3522 }
3523
3524 int
3525 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
3526      struct coding_system *coding;
3527      unsigned char *source, *destination;
3528      int src_bytes, dst_bytes, encodep;
3529 {
3530   struct ccl_program *ccl
3531     = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
3532   int result;
3533
3534   coding->produced = ccl_driver (ccl, source, destination,
3535                                  src_bytes, dst_bytes, &(coding->consumed));
3536   if (encodep)
3537     {
3538       coding->produced_char = coding->produced;
3539       coding->consumed_char
3540         = multibyte_chars_in_text (source, coding->consumed);
3541     }
3542   else
3543     {
3544       coding->produced_char
3545         = multibyte_chars_in_text (destination, coding->produced);
3546       coding->consumed_char = coding->consumed;
3547     }
3548   switch (ccl->status)
3549     {
3550     case CCL_STAT_SUSPEND_BY_SRC:
3551       result = CODING_FINISH_INSUFFICIENT_SRC;
3552       break;
3553     case CCL_STAT_SUSPEND_BY_DST:
3554       result = CODING_FINISH_INSUFFICIENT_DST;
3555       break;
3556     default:
3557       result = CODING_FINISH_NORMAL;
3558       break;
3559     }
3560   return result;
3561 }
3562
3563 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions".  Before
3564    decoding, it may detect coding system and format of end-of-line if
3565    those are not yet decided.  */
3566
3567 int
3568 decode_coding (coding, source, destination, src_bytes, dst_bytes)
3569      struct coding_system *coding;
3570      unsigned char *source, *destination;
3571      int src_bytes, dst_bytes;
3572 {
3573   int result;
3574
3575   if (src_bytes <= 0)
3576     {
3577       coding->produced = coding->produced_char = 0;
3578       coding->consumed = coding->consumed_char = 0;
3579       coding->fake_multibyte = 0;
3580       return CODING_FINISH_NORMAL;
3581     }
3582
3583   if (coding->type == coding_type_undecided)
3584     detect_coding (coding, source, src_bytes);
3585
3586   if (coding->eol_type == CODING_EOL_UNDECIDED)
3587     detect_eol (coding, source, src_bytes);
3588
3589   switch (coding->type)
3590     {
3591     case coding_type_emacs_mule:
3592     case coding_type_undecided:
3593     case coding_type_raw_text:
3594       if (coding->eol_type == CODING_EOL_LF
3595           ||  coding->eol_type == CODING_EOL_UNDECIDED)
3596         goto label_no_conversion;
3597       result = decode_eol (coding, source, destination, src_bytes, dst_bytes);
3598       break;
3599
3600     case coding_type_sjis:
3601       result = decode_coding_sjis_big5 (coding, source, destination,
3602                                         src_bytes, dst_bytes, 1);
3603       break;
3604
3605     case coding_type_iso2022:
3606       result = decode_coding_iso2022 (coding, source, destination,
3607                                       src_bytes, dst_bytes);
3608       break;
3609
3610     case coding_type_big5:
3611       result = decode_coding_sjis_big5 (coding, source, destination,
3612                                         src_bytes, dst_bytes, 0);
3613       break;
3614
3615     case coding_type_ccl:
3616       result = ccl_coding_driver (coding, source, destination,
3617                                   src_bytes, dst_bytes, 0);
3618       break;
3619
3620     default:                    /* i.e. case coding_type_no_conversion: */
3621     label_no_conversion:
3622       if (dst_bytes && src_bytes > dst_bytes)
3623         {
3624           coding->produced = dst_bytes;
3625           result = CODING_FINISH_INSUFFICIENT_DST;
3626         }
3627       else
3628         {
3629           coding->produced = src_bytes;
3630           result = CODING_FINISH_NORMAL;
3631         }
3632       if (dst_bytes)
3633         bcopy (source, destination, coding->produced);
3634       else
3635         safe_bcopy (source, destination, coding->produced);
3636       coding->fake_multibyte = 1;
3637       coding->consumed
3638         = coding->consumed_char = coding->produced_char = coding->produced;
3639       break;
3640     }
3641
3642   return result;
3643 }
3644
3645 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  */
3646
3647 int
3648 encode_coding (coding, source, destination, src_bytes, dst_bytes)
3649      struct coding_system *coding;
3650      unsigned char *source, *destination;
3651      int src_bytes, dst_bytes;
3652 {
3653   int result;
3654
3655   if (src_bytes <= 0)
3656     {
3657       coding->produced = coding->produced_char = 0;
3658       coding->consumed = coding->consumed_char = 0;
3659       coding->fake_multibyte = 0;
3660       return CODING_FINISH_NORMAL;
3661     }
3662
3663   switch (coding->type)
3664     {
3665     case coding_type_emacs_mule:
3666     case coding_type_undecided:
3667     case coding_type_raw_text:
3668       if (coding->eol_type == CODING_EOL_LF
3669           ||  coding->eol_type == CODING_EOL_UNDECIDED)
3670         goto label_no_conversion;
3671       result = encode_eol (coding, source, destination, src_bytes, dst_bytes);
3672       break;
3673
3674     case coding_type_sjis:
3675       result = encode_coding_sjis_big5 (coding, source, destination,
3676                                         src_bytes, dst_bytes, 1);
3677       break;
3678
3679     case coding_type_iso2022:
3680       result = encode_coding_iso2022 (coding, source, destination,
3681                                       src_bytes, dst_bytes);
3682       break;
3683
3684     case coding_type_big5:
3685       result = encode_coding_sjis_big5 (coding, source, destination,
3686                                         src_bytes, dst_bytes, 0);
3687       break;
3688
3689     case coding_type_ccl:
3690       result = ccl_coding_driver (coding, source, destination,
3691                                   src_bytes, dst_bytes, 1);
3692       break;
3693
3694     default:                    /* i.e. case coding_type_no_conversion: */
3695     label_no_conversion:
3696       if (dst_bytes && src_bytes > dst_bytes)
3697         {
3698           coding->produced = dst_bytes;
3699           result = CODING_FINISH_INSUFFICIENT_DST;
3700         }
3701       else
3702         {
3703           coding->produced = src_bytes;
3704           result = CODING_FINISH_NORMAL;
3705         }
3706       if (dst_bytes)
3707         bcopy (source, destination, coding->produced);
3708       else
3709         safe_bcopy (source, destination, coding->produced);
3710       if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
3711         {
3712           unsigned char *p = destination, *pend = p + coding->produced;
3713           while (p < pend)
3714             if (*p++ == '\015') p[-1] = '\n';
3715         }
3716       coding->fake_multibyte = 1;
3717       coding->consumed
3718         = coding->consumed_char = coding->produced_char = coding->produced;
3719       break;
3720     }
3721
3722   return result;
3723 }
3724
3725 /* Scan text in the region between *BEG and *END (byte positions),
3726    skip characters which we don't have to decode by coding system
3727    CODING at the head and tail, then set *BEG and *END to the region
3728    of the text we actually have to convert.  The caller should move
3729    the gap out of the region in advance.
3730
3731    If STR is not NULL, *BEG and *END are indices into STR.  */
3732
3733 static void
3734 shrink_decoding_region (beg, end, coding, str)
3735      int *beg, *end;
3736      struct coding_system *coding;
3737      unsigned char *str;
3738 {
3739   unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
3740   int eol_conversion;
3741
3742   if (coding->type == coding_type_ccl
3743       || coding->type == coding_type_undecided
3744       || !NILP (coding->post_read_conversion))
3745     {
3746       /* We can't skip any data.  */
3747       return;
3748     }
3749   else if (coding->type == coding_type_no_conversion)
3750     {
3751       /* We need no conversion, but don't have to skip any data here.
3752          Decoding routine handles them effectively anyway.  */
3753       return;
3754     }
3755
3756   if (coding->heading_ascii >= 0)
3757     /* Detection routine has already found how much we can skip at the
3758        head.  */
3759     *beg += coding->heading_ascii;
3760
3761   if (str)
3762     {
3763       begp_orig = begp = str + *beg;
3764       endp_orig = endp = str + *end;
3765     }
3766   else
3767     {
3768       begp_orig = begp = BYTE_POS_ADDR (*beg);
3769       endp_orig = endp = begp + *end - *beg;
3770     }
3771
3772   eol_conversion = (coding->eol_type != CODING_EOL_LF);
3773
3774   switch (coding->type)
3775     {
3776     case coding_type_emacs_mule:
3777     case coding_type_raw_text:
3778       if (eol_conversion)
3779         {
3780           if (coding->heading_ascii < 0)
3781             while (begp < endp && *begp != '\r' && *begp < 0x80) begp++;
3782           while (begp < endp && endp[-1] != '\r' && endp[-1] < 0x80)
3783             endp--;
3784           /* Do not consider LF as ascii if preceded by CR, since that
3785              confuses eol decoding. */
3786           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
3787             endp++;
3788         }
3789       else
3790         begp = endp;
3791       break;
3792
3793     case coding_type_sjis:
3794     case coding_type_big5:
3795       /* We can skip all ASCII characters at the head.  */
3796       if (coding->heading_ascii < 0)
3797         {
3798           if (eol_conversion)
3799             while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
3800           else
3801             while (begp < endp && *begp < 0x80) begp++;
3802         }
3803       /* We can skip all ASCII characters at the tail except for the
3804          second byte of SJIS or BIG5 code.  */
3805       if (eol_conversion)
3806         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
3807       else
3808         while (begp < endp && endp[-1] < 0x80) endp--;
3809       /* Do not consider LF as ascii if preceded by CR, since that
3810          confuses eol decoding. */
3811       if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
3812         endp++;
3813       if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
3814         endp++;
3815       break;
3816
3817     default:            /* i.e. case coding_type_iso2022: */
3818       if (coding->heading_ascii < 0)
3819         {
3820           /* We can skip all ASCII characters at the head except for a
3821              few control codes.  */
3822           while (begp < endp && (c = *begp) < 0x80
3823                  && c != ISO_CODE_CR && c != ISO_CODE_SO
3824                  && c != ISO_CODE_SI && c != ISO_CODE_ESC
3825                  && (!eol_conversion || c != ISO_CODE_LF))
3826             begp++;
3827         }
3828       switch (coding->category_idx)
3829         {
3830         case CODING_CATEGORY_IDX_ISO_8_1:
3831         case CODING_CATEGORY_IDX_ISO_8_2:
3832           /* We can skip all ASCII characters at the tail.  */
3833           if (eol_conversion)
3834             while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
3835           else
3836             while (begp < endp && endp[-1] < 0x80) endp--;
3837           /* Do not consider LF as ascii if preceded by CR, since that
3838              confuses eol decoding. */
3839           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
3840             endp++;
3841           break;
3842
3843         case CODING_CATEGORY_IDX_ISO_7:
3844         case CODING_CATEGORY_IDX_ISO_7_TIGHT:
3845           /* We can skip all charactes at the tail except for ESC and
3846              the following 2-byte at the tail.  */
3847           if (eol_conversion)
3848             while (begp < endp
3849                    && (c = endp[-1]) < 0x80 && c != ISO_CODE_ESC && c != '\r')
3850               endp--;
3851           else
3852             while (begp < endp
3853                    && (c = endp[-1]) < 0x80 && c != ISO_CODE_ESC)
3854               endp--;
3855           /* Do not consider LF as ascii if preceded by CR, since that
3856              confuses eol decoding. */
3857           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
3858             endp++;
3859           if (begp < endp && endp[-1] == ISO_CODE_ESC)
3860             {
3861               if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
3862                 /* This is an ASCII designation sequence.  We can
3863                     surely skip the tail.  */
3864                 endp += 2;
3865               else
3866                 /* Hmmm, we can't skip the tail.  */
3867                 endp = endp_orig;
3868             }
3869         }
3870     }
3871   *beg += begp - begp_orig;
3872   *end += endp - endp_orig;
3873   return;
3874 }
3875
3876 /* Like shrink_decoding_region but for encoding.  */
3877
3878 static void
3879 shrink_encoding_region (beg, end, coding, str)
3880      int *beg, *end;
3881      struct coding_system *coding;
3882      unsigned char *str;
3883 {
3884   unsigned char *begp_orig, *begp, *endp_orig, *endp;
3885   int eol_conversion;
3886
3887   if (coding->type == coding_type_ccl)
3888     /* We can't skip any data.  */
3889     return;
3890   else if (coding->type == coding_type_no_conversion)
3891     {
3892       /* We need no conversion.  */
3893       *beg = *end;
3894       return;
3895     }
3896
3897   if (str)
3898     {
3899       begp_orig = begp = str + *beg;
3900       endp_orig = endp = str + *end;
3901     }
3902   else
3903     {
3904       begp_orig = begp = BYTE_POS_ADDR (*beg);
3905       endp_orig = endp = begp + *end - *beg;
3906     }
3907
3908   eol_conversion = (coding->eol_type == CODING_EOL_CR
3909                     || coding->eol_type == CODING_EOL_CRLF);
3910
3911   /* Here, we don't have to check coding->pre_write_conversion because
3912      the caller is expected to have handled it already.  */
3913   switch (coding->type)
3914     {
3915     case coding_type_undecided:
3916     case coding_type_emacs_mule:
3917     case coding_type_raw_text:
3918       if (eol_conversion)
3919         {
3920           while (begp < endp && *begp != '\n') begp++;
3921           while (begp < endp && endp[-1] != '\n') endp--;
3922         }
3923       else
3924         begp = endp;
3925       break;
3926
3927     case coding_type_iso2022:
3928       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
3929         {
3930           unsigned char *bol = begp;
3931           while (begp < endp && *begp < 0x80)
3932             {
3933               begp++;
3934               if (begp[-1] == '\n')
3935                 bol = begp;
3936             }
3937           begp = bol;
3938           goto label_skip_tail;
3939         }
3940       /* fall down ... */
3941
3942     default:
3943       /* We can skip all ASCII characters at the head and tail.  */
3944       if (eol_conversion)
3945         while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
3946       else
3947         while (begp < endp && *begp < 0x80) begp++;
3948     label_skip_tail:
3949       if (eol_conversion)
3950         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
3951       else
3952         while (begp < endp && *(endp - 1) < 0x80) endp--;
3953       break;
3954     }
3955
3956   *beg += begp - begp_orig;
3957   *end += endp - endp_orig;
3958   return;
3959 }
3960
3961 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
3962    text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
3963    coding system CODING, and return the status code of code conversion
3964    (currently, this value has no meaning).
3965
3966    How many characters (and bytes) are converted to how many
3967    characters (and bytes) are recorded in members of the structure
3968    CODING.
3969
3970    If REPLACE is nonzero, we do various things as if the original text
3971    is deleted and a new text is inserted.  See the comments in
3972    replace_range (insdel.c) to know what we are doing.  */
3973
3974 int
3975 code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
3976      int from, from_byte, to, to_byte, encodep, replace;
3977      struct coding_system *coding;
3978 {
3979   int len = to - from, len_byte = to_byte - from_byte;
3980   int require, inserted, inserted_byte;
3981   int head_skip, tail_skip, total_skip;
3982   Lisp_Object saved_coding_symbol = Qnil;
3983   int multibyte = !NILP (current_buffer->enable_multibyte_characters);
3984   int first = 1;
3985   int fake_multibyte = 0;
3986   unsigned char *src, *dst;
3987   Lisp_Object deletion = Qnil;
3988
3989   if (from < PT && PT < to)
3990     SET_PT_BOTH (from, from_byte);
3991
3992   if (replace)
3993     {
3994       int saved_from = from;
3995
3996       prepare_to_modify_buffer (from, to, &from);
3997       if (saved_from != from)
3998         {
3999           to = from + len;
4000           if (multibyte)
4001             from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
4002           else
4003             from_byte = from, to_byte = to;
4004           len_byte = to_byte - from_byte;
4005         }
4006     }
4007
4008   if (! encodep && CODING_REQUIRE_DETECTION (coding))
4009     {
4010       /* We must detect encoding of text and eol format.  */
4011
4012       if (from < GPT && to > GPT)
4013         move_gap_both (from, from_byte);
4014       if (coding->type == coding_type_undecided)
4015         {
4016           detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
4017           if (coding->type == coding_type_undecided)
4018             /* It seems that the text contains only ASCII, but we
4019                should not left it undecided because the deeper
4020                decoding routine (decode_coding) tries to detect the
4021                encodings again in vain.  */
4022             coding->type = coding_type_emacs_mule;
4023         }
4024       if (coding->eol_type == CODING_EOL_UNDECIDED)
4025         {
4026           saved_coding_symbol = coding->symbol;
4027           detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
4028           if (coding->eol_type == CODING_EOL_UNDECIDED)
4029             coding->eol_type = CODING_EOL_LF;
4030           /* We had better recover the original eol format if we
4031              encounter an inconsitent eol format while decoding.  */
4032           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4033         }
4034     }
4035
4036   coding->consumed_char = len, coding->consumed = len_byte;
4037
4038   if (encodep
4039       ? ! CODING_REQUIRE_ENCODING (coding)
4040       : ! CODING_REQUIRE_DECODING (coding))
4041     {
4042       coding->produced = len_byte;
4043       if (multibyte
4044           && ! replace
4045           /* See the comment of the member heading_ascii in coding.h.  */
4046           && coding->heading_ascii < len_byte)
4047         {
4048           /* We still may have to combine byte at the head and the
4049              tail of the text in the region.  */
4050           if (from < GPT && GPT < to)
4051             move_gap_both (to, to_byte);
4052           len = multibyte_chars_in_text (BYTE_POS_ADDR (from_byte), len_byte);
4053           adjust_after_insert (from, from_byte, to, to_byte, len);
4054           coding->produced_char = len;
4055         }
4056       else
4057         {
4058           if (!replace)
4059             adjust_after_insert (from, from_byte, to, to_byte, len_byte);
4060           coding->produced_char = len_byte;
4061         }
4062       return 0;
4063     }
4064
4065   /* Now we convert the text.  */
4066
4067   /* For encoding, we must process pre-write-conversion in advance.  */
4068   if (encodep
4069       && ! NILP (coding->pre_write_conversion)
4070       && SYMBOLP (coding->pre_write_conversion)
4071       && ! NILP (Ffboundp (coding->pre_write_conversion)))
4072     {
4073       /* The function in pre-write-conversion may put a new text in a
4074          new buffer.  */
4075       struct buffer *prev = current_buffer, *new;
4076
4077       call2 (coding->pre_write_conversion,
4078              make_number (from), make_number (to));
4079       if (current_buffer != prev)
4080         {
4081           len = ZV - BEGV;
4082           new = current_buffer;
4083           set_buffer_internal_1 (prev);
4084           del_range_2 (from, from_byte, to, to_byte);
4085           insert_from_buffer (new, BEG, len, 0);
4086           to = from + len;
4087           to_byte = multibyte ? CHAR_TO_BYTE (to) : to;
4088           len_byte = to_byte - from_byte;
4089         }
4090     }
4091
4092   if (replace)
4093     deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
4094
4095   /* Try to skip the heading and tailing ASCIIs.  */
4096   {
4097     int from_byte_orig = from_byte, to_byte_orig = to_byte;
4098
4099     if (from < GPT && GPT < to)
4100       move_gap_both (from, from_byte);
4101     if (encodep)
4102       shrink_encoding_region (&from_byte, &to_byte, coding, NULL);
4103     else
4104       shrink_decoding_region (&from_byte, &to_byte, coding, NULL);
4105     if (from_byte == to_byte)
4106       {
4107         coding->produced = len_byte;
4108         coding->produced_char = multibyte ? len : len_byte;
4109         if (!replace)
4110           /* We must record and adjust for this new text now.  */
4111           adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
4112         return 0;
4113       }
4114
4115     head_skip = from_byte - from_byte_orig;
4116     tail_skip = to_byte_orig - to_byte;
4117     total_skip = head_skip + tail_skip;
4118     from += head_skip;
4119     to -= tail_skip;
4120     len -= total_skip; len_byte -= total_skip;
4121   }
4122
4123   /* For converion, we must put the gap before the text in addition to
4124      making the gap larger for efficient decoding.  The required gap
4125      size starts from 2000 which is the magic number used in make_gap.
4126      But, after one batch of conversion, it will be incremented if we
4127      find that it is not enough .  */
4128   require = 2000;
4129
4130   if (GAP_SIZE  < require)
4131     make_gap (require - GAP_SIZE);
4132   move_gap_both (from, from_byte);
4133
4134   if (GPT - BEG < beg_unchanged)
4135     beg_unchanged = GPT - BEG;
4136   if (Z - GPT < end_unchanged)
4137     end_unchanged = Z - GPT;
4138
4139   inserted = inserted_byte = 0;
4140   src = GAP_END_ADDR, dst = GPT_ADDR;
4141
4142   GAP_SIZE += len_byte;
4143   ZV -= len;
4144   Z -= len;
4145   ZV_BYTE -= len_byte;
4146   Z_BYTE -= len_byte;
4147
4148   for (;;)
4149     {
4150       int result;
4151
4152       /* The buffer memory is changed from:
4153          +--------+converted-text+---------+-------original-text------+---+
4154          |<-from->|<--inserted-->|---------|<-----------len---------->|---|
4155                   |<------------------- GAP_SIZE -------------------->|  */
4156       if (encodep)
4157         result = encode_coding (coding, src, dst, len_byte, 0);
4158       else
4159         result = decode_coding (coding, src, dst, len_byte, 0);
4160       /* to:
4161          +--------+-------converted-text--------+--+---original-text--+---+
4162          |<-from->|<--inserted-->|<--produced-->|--|<-(len-consumed)->|---|
4163                   |<------------------- GAP_SIZE -------------------->|  */
4164       if (coding->fake_multibyte)
4165         fake_multibyte = 1;
4166
4167       if (!encodep && !multibyte)
4168         coding->produced_char = coding->produced;
4169       inserted += coding->produced_char;
4170       inserted_byte += coding->produced;
4171       len_byte -= coding->consumed;
4172       src += coding->consumed;
4173       dst += inserted_byte;
4174
4175       if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
4176         {
4177           unsigned char *pend = dst, *p = pend - inserted_byte;
4178
4179           /* Encode LFs back to the original eol format (CR or CRLF).  */
4180           if (coding->eol_type == CODING_EOL_CR)
4181             {
4182               while (p < pend) if (*p++ == '\n') p[-1] = '\r';
4183             }
4184           else
4185             {
4186               int count = 0;
4187
4188               while (p < pend) if (*p++ == '\n') count++;
4189               if (src - dst < count)
4190                 {
4191                   /* We don't have sufficient room for putting LFs
4192                      back to CRLF.  We must record converted and
4193                      not-yet-converted text back to the buffer
4194                      content, enlarge the gap, then record them out of
4195                      the buffer contents again.  */
4196                   int add = len_byte + inserted_byte;
4197
4198                   GAP_SIZE -= add;
4199                   ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
4200                   GPT += inserted_byte; GPT_BYTE += inserted_byte;
4201                   make_gap (count - GAP_SIZE);
4202                   GAP_SIZE += add;
4203                   ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
4204                   GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
4205                   /* Don't forget to update SRC, DST, and PEND.  */
4206                   src = GAP_END_ADDR - len_byte;
4207                   dst = GPT_ADDR + inserted_byte;
4208                   pend = dst;
4209                 }
4210               inserted += count;
4211               inserted_byte += count;
4212               coding->produced += count;
4213               p = dst = pend + count;
4214               while (count)
4215                 {
4216                   *--p = *--pend;
4217                   if (*p == '\n') count--, *--p = '\r';
4218                 }
4219             }
4220
4221           /* Suppress eol-format conversion in the further conversion.  */
4222           coding->eol_type = CODING_EOL_LF;
4223
4224           /* Restore the original symbol.  */
4225           coding->symbol = saved_coding_symbol;
4226
4227           continue;
4228         }
4229       if (len_byte <= 0)
4230         break;
4231       if (result == CODING_FINISH_INSUFFICIENT_SRC)
4232         {
4233           /* The source text ends in invalid codes.  Let's just
4234              make them valid buffer contents, and finish conversion.  */
4235           inserted += len_byte;
4236           inserted_byte += len_byte;
4237           while (len_byte--)
4238             *dst++ = *src++;
4239           fake_multibyte = 1;
4240           break;
4241         }
4242       if (first)
4243         {
4244           /* We have just done the first batch of conversion which was
4245              stoped because of insufficient gap.  Let's reconsider the
4246              required gap size (i.e. SRT - DST) now.
4247
4248              We have converted ORIG bytes (== coding->consumed) into
4249              NEW bytes (coding->produced).  To convert the remaining
4250              LEN bytes, we may need REQUIRE bytes of gap, where:
4251                 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
4252                 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
4253              Here, we are sure that NEW >= ORIG.  */
4254           float ratio = coding->produced - coding->consumed;
4255           ratio /= coding->consumed;
4256           require = len_byte * ratio;
4257           first = 0;
4258         }
4259       if ((src - dst) < (require + 2000))
4260         {
4261           /* See the comment above the previous call of make_gap.  */
4262           int add = len_byte + inserted_byte;
4263
4264           GAP_SIZE -= add;
4265           ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
4266           GPT += inserted_byte; GPT_BYTE += inserted_byte;
4267           make_gap (require + 2000);
4268           GAP_SIZE += add;
4269           ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
4270           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
4271           /* Don't forget to update SRC, DST.  */
4272           src = GAP_END_ADDR - len_byte;
4273           dst = GPT_ADDR + inserted_byte;
4274         }
4275     }
4276   if (src - dst > 0) *dst = 0; /* Put an anchor.  */
4277
4278   if (multibyte
4279       && (fake_multibyte
4280           || !encodep && (to - from) != (to_byte - from_byte)))
4281     inserted = multibyte_chars_in_text (GPT_ADDR, inserted_byte);
4282
4283   /* If we have shrinked the conversion area, adjust it now.  */
4284   if (total_skip > 0)
4285     {
4286       if (tail_skip > 0)
4287         safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
4288       inserted += total_skip; inserted_byte += total_skip;
4289       GAP_SIZE += total_skip;
4290       GPT -= head_skip; GPT_BYTE -= head_skip;
4291       ZV -= total_skip; ZV_BYTE -= total_skip;
4292       Z -= total_skip; Z_BYTE -= total_skip;
4293       from -= head_skip; from_byte -= head_skip;
4294       to += tail_skip; to_byte += tail_skip;
4295     }
4296
4297   adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
4298
4299   if (! encodep && ! NILP (coding->post_read_conversion))
4300     {
4301       Lisp_Object val;
4302       int orig_inserted = inserted, pos = PT;
4303
4304       if (from != pos)
4305         temp_set_point_both (current_buffer, from, from_byte);
4306       val = call1 (coding->post_read_conversion, make_number (inserted));
4307       if (! NILP (val))
4308         {
4309           CHECK_NUMBER (val, 0);
4310           inserted = XFASTINT (val);
4311         }
4312       if (pos >= from + orig_inserted)
4313         temp_set_point (current_buffer, pos + (inserted - orig_inserted));
4314     }
4315
4316   signal_after_change (from, to - from, inserted);
4317
4318   {
4319     coding->consumed = to_byte - from_byte;
4320     coding->consumed_char = to - from;
4321     coding->produced = inserted_byte;
4322     coding->produced_char = inserted;
4323   }
4324
4325   return 0;
4326 }
4327
4328 Lisp_Object
4329 code_convert_string (str, coding, encodep, nocopy)
4330      Lisp_Object str;
4331      struct coding_system *coding;
4332      int encodep, nocopy;
4333 {
4334   int len;
4335   char *buf;
4336   int from = 0, to = XSTRING (str)->size;
4337   int to_byte = STRING_BYTES (XSTRING (str));
4338   struct gcpro gcpro1;
4339   Lisp_Object saved_coding_symbol = Qnil;
4340   int result;
4341
4342   if (encodep && !NILP (coding->pre_write_conversion)
4343       || !encodep && !NILP (coding->post_read_conversion))
4344     {
4345       /* Since we have to call Lisp functions which assume target text
4346          is in a buffer, after setting a temporary buffer, call
4347          code_convert_region.  */
4348       int count = specpdl_ptr - specpdl;
4349       struct buffer *prev = current_buffer;
4350
4351       record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
4352       temp_output_buffer_setup (" *code-converting-work*");
4353       set_buffer_internal (XBUFFER (Vstandard_output));
4354       if (encodep)
4355         insert_from_string (str, 0, 0, to, to_byte, 0);
4356       else
4357         {
4358           /* We must insert the contents of STR as is without
4359              unibyte<->multibyte conversion.  */
4360           current_buffer->enable_multibyte_characters = Qnil;
4361           insert_from_string (str, 0, 0, to_byte, to_byte, 0);
4362           current_buffer->enable_multibyte_characters = Qt;
4363         }
4364       code_convert_region (BEGV, BEGV_BYTE, ZV, ZV_BYTE, coding, encodep, 1);
4365       if (encodep)
4366         /* We must return the buffer contents as unibyte string.  */
4367         current_buffer->enable_multibyte_characters = Qnil;
4368       str = make_buffer_string (BEGV, ZV, 0);
4369       set_buffer_internal (prev);
4370       return unbind_to (count, str);
4371     }
4372
4373   if (! encodep && CODING_REQUIRE_DETECTION (coding))
4374     {
4375       /* See the comments in code_convert_region.  */
4376       if (coding->type == coding_type_undecided)
4377         {
4378           detect_coding (coding, XSTRING (str)->data, to_byte);
4379           if (coding->type == coding_type_undecided)
4380             coding->type = coding_type_emacs_mule;
4381         }
4382       if (coding->eol_type == CODING_EOL_UNDECIDED)
4383         {
4384           saved_coding_symbol = coding->symbol;
4385           detect_eol (coding, XSTRING (str)->data, to_byte);
4386           if (coding->eol_type == CODING_EOL_UNDECIDED)
4387             coding->eol_type = CODING_EOL_LF;
4388           /* We had better recover the original eol format if we
4389              encounter an inconsitent eol format while decoding.  */
4390           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4391         }
4392     }
4393
4394   if (encodep
4395       ? ! CODING_REQUIRE_ENCODING (coding)
4396       : ! CODING_REQUIRE_DECODING (coding))
4397     from = to_byte;
4398   else
4399     {
4400       /* Try to skip the heading and tailing ASCIIs.  */
4401       if (encodep)
4402         shrink_encoding_region (&from, &to_byte, coding, XSTRING (str)->data);
4403       else
4404         shrink_decoding_region (&from, &to_byte, coding, XSTRING (str)->data);
4405     }
4406   if (from == to_byte)
4407     return (nocopy ? str : Fcopy_sequence (str));
4408
4409   if (encodep)
4410     len = encoding_buffer_size (coding, to_byte - from);
4411   else
4412     len = decoding_buffer_size (coding, to_byte - from);
4413   len += from + STRING_BYTES (XSTRING (str)) - to_byte;
4414   GCPRO1 (str);
4415   buf = get_conversion_buffer (len);
4416   UNGCPRO;
4417
4418   if (from > 0)
4419     bcopy (XSTRING (str)->data, buf, from);
4420   result = (encodep
4421             ? encode_coding (coding, XSTRING (str)->data + from,
4422                              buf + from, to_byte - from, len)
4423             : decode_coding (coding, XSTRING (str)->data + from,
4424                              buf + from, to_byte - from, len));
4425   if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
4426     {
4427       /* We simple try to decode the whole string again but without
4428          eol-conversion this time.  */
4429       coding->eol_type = CODING_EOL_LF;
4430       coding->symbol = saved_coding_symbol;
4431       return code_convert_string (str, coding, encodep, nocopy);
4432     }
4433
4434   bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced,
4435          STRING_BYTES (XSTRING (str)) - to_byte);
4436
4437   len = from + STRING_BYTES (XSTRING (str)) - to_byte;
4438   if (encodep)
4439     str = make_unibyte_string (buf, len + coding->produced);
4440   else
4441     str = make_string_from_bytes (buf, len + coding->produced_char,
4442                                   len + coding->produced);
4443   return str;
4444 }
4445
4446 \f
4447 #ifdef emacs
4448 /*** 7. Emacs Lisp library functions ***/
4449
4450 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
4451   "Return t if OBJECT is nil or a coding-system.\n\
4452 See the documentation of `make-coding-system' for information\n\
4453 about coding-system objects.")
4454   (obj)
4455      Lisp_Object obj;
4456 {
4457   if (NILP (obj))
4458     return Qt;
4459   if (!SYMBOLP (obj))
4460     return Qnil;
4461   /* Get coding-spec vector for OBJ.  */
4462   obj = Fget (obj, Qcoding_system);
4463   return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
4464           ? Qt : Qnil);
4465 }
4466
4467 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
4468        Sread_non_nil_coding_system, 1, 1, 0,
4469   "Read a coding system from the minibuffer, prompting with string PROMPT.")
4470   (prompt)
4471      Lisp_Object prompt;
4472 {
4473   Lisp_Object val;
4474   do
4475     {
4476       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
4477                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
4478     }
4479   while (XSTRING (val)->size == 0);
4480   return (Fintern (val, Qnil));
4481 }
4482
4483 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
4484   "Read a coding system from the minibuffer, prompting with string PROMPT.\n\
4485 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.")
4486   (prompt, default_coding_system)
4487      Lisp_Object prompt, default_coding_system;
4488 {
4489   Lisp_Object val;
4490   if (SYMBOLP (default_coding_system))
4491     XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
4492   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
4493                           Qt, Qnil, Qcoding_system_history,
4494                           default_coding_system, Qnil);
4495   return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
4496 }
4497
4498 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
4499        1, 1, 0,
4500   "Check validity of CODING-SYSTEM.\n\
4501 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.\n\
4502 It is valid if it is a symbol with a non-nil `coding-system' property.\n\
4503 The value of property should be a vector of length 5.")
4504   (coding_system)
4505      Lisp_Object coding_system;
4506 {
4507   CHECK_SYMBOL (coding_system, 0);
4508   if (!NILP (Fcoding_system_p (coding_system)))
4509     return coding_system;
4510   while (1)
4511     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
4512 }
4513 \f
4514 Lisp_Object
4515 detect_coding_system (src, src_bytes, highest)
4516      unsigned char *src;
4517      int src_bytes, highest;
4518 {
4519   int coding_mask, eol_type;
4520   Lisp_Object val, tmp;
4521   int dummy;
4522
4523   coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy);
4524   eol_type  = detect_eol_type (src, src_bytes, &dummy);
4525   if (eol_type == CODING_EOL_INCONSISTENT)
4526     eol_type == CODING_EOL_UNDECIDED;
4527
4528   if (!coding_mask)
4529     {
4530       val = Qundecided;
4531       if (eol_type != CODING_EOL_UNDECIDED)
4532         {
4533           Lisp_Object val2;
4534           val2 = Fget (Qundecided, Qeol_type);
4535           if (VECTORP (val2))
4536             val = XVECTOR (val2)->contents[eol_type];
4537         }
4538       return val;
4539     }
4540
4541   /* At first, gather possible coding systems in VAL.  */
4542   val = Qnil;
4543   for (tmp = Vcoding_category_list; !NILP (tmp); tmp = XCONS (tmp)->cdr)
4544     {
4545       int idx
4546         = XFASTINT (Fget (XCONS (tmp)->car, Qcoding_category_index));
4547       if (coding_mask & (1 << idx))
4548         {
4549           val = Fcons (Fsymbol_value (XCONS (tmp)->car), val);
4550           if (highest)
4551             break;
4552         }
4553     }
4554   if (!highest)
4555     val = Fnreverse (val);
4556
4557   /* Then, substitute the elements by subsidiary coding systems.  */
4558   for (tmp = val; !NILP (tmp); tmp = XCONS (tmp)->cdr)
4559     {
4560       if (eol_type != CODING_EOL_UNDECIDED)
4561         {
4562           Lisp_Object eol;
4563           eol = Fget (XCONS (tmp)->car, Qeol_type);
4564           if (VECTORP (eol))
4565             XCONS (tmp)->car = XVECTOR (eol)->contents[eol_type];
4566         }
4567     }
4568   return (highest ? XCONS (val)->car : val);
4569 }
4570
4571 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
4572        2, 3, 0,
4573   "Detect coding system of the text in the region between START and END.\n\
4574 Return a list of possible coding systems ordered by priority.\n\
4575 \n\
4576 If only ASCII characters are found, it returns `undecided'\n\
4577 or its subsidiary coding system according to a detected end-of-line format.\n\
4578 \n\
4579 If optional argument HIGHEST is non-nil, return the coding system of\n\
4580 highest priority.")
4581   (start, end, highest)
4582      Lisp_Object start, end, highest;
4583 {
4584   int from, to;
4585   int from_byte, to_byte;
4586
4587   CHECK_NUMBER_COERCE_MARKER (start, 0);
4588   CHECK_NUMBER_COERCE_MARKER (end, 1);
4589
4590   validate_region (&start, &end);
4591   from = XINT (start), to = XINT (end);
4592   from_byte = CHAR_TO_BYTE (from);
4593   to_byte = CHAR_TO_BYTE (to);
4594
4595   if (from < GPT && to >= GPT)
4596     move_gap_both (to, to_byte);
4597
4598   return detect_coding_system (BYTE_POS_ADDR (from_byte),
4599                                to_byte - from_byte,
4600                                !NILP (highest));
4601 }
4602
4603 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
4604        1, 2, 0,
4605   "Detect coding system of the text in STRING.\n\
4606 Return a list of possible coding systems ordered by priority.\n\
4607 \n\
4608 If only ASCII characters are found, it returns `undecided'\n\
4609 or its subsidiary coding system according to a detected end-of-line format.\n\
4610 \n\
4611 If optional argument HIGHEST is non-nil, return the coding system of\n\
4612 highest priority.")
4613   (string, highest)
4614      Lisp_Object string, highest;
4615 {
4616   CHECK_STRING (string, 0);
4617
4618   return detect_coding_system (XSTRING (string)->data,
4619                                STRING_BYTES (XSTRING (string)),
4620                                !NILP (highest));
4621 }
4622
4623 Lisp_Object
4624 code_convert_region1 (start, end, coding_system, encodep)
4625      Lisp_Object start, end, coding_system;
4626      int encodep;
4627 {
4628   struct coding_system coding;
4629   int from, to, len;
4630
4631   CHECK_NUMBER_COERCE_MARKER (start, 0);
4632   CHECK_NUMBER_COERCE_MARKER (end, 1);
4633   CHECK_SYMBOL (coding_system, 2);
4634
4635   validate_region (&start, &end);
4636   from = XFASTINT (start);
4637   to = XFASTINT (end);
4638
4639   if (NILP (coding_system))
4640     return make_number (to - from);
4641
4642   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
4643     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
4644
4645   coding.mode |= CODING_MODE_LAST_BLOCK;
4646   code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
4647                        &coding, encodep, 1);
4648   Vlast_coding_system_used = coding.symbol;
4649   return make_number (coding.produced_char);
4650 }
4651
4652 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
4653        3, 3, "r\nzCoding system: ",
4654   "Decode the current region by specified coding system.\n\
4655 When called from a program, takes three arguments:\n\
4656 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
4657 This function sets `last-coding-system-used' to the precise coding system\n\
4658 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
4659 not fully specified.)\n\
4660 It returns the length of the decoded text.")
4661   (start, end, coding_system)
4662      Lisp_Object start, end, coding_system;
4663 {
4664   return code_convert_region1 (start, end, coding_system, 0);
4665 }
4666
4667 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
4668        3, 3, "r\nzCoding system: ",
4669   "Encode the current region by specified coding system.\n\
4670 When called from a program, takes three arguments:\n\
4671 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
4672 This function sets `last-coding-system-used' to the precise coding system\n\
4673 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
4674 not fully specified.)\n\
4675 It returns the length of the encoded text.")
4676   (start, end, coding_system)
4677      Lisp_Object start, end, coding_system;
4678 {
4679   return code_convert_region1 (start, end, coding_system, 1);
4680 }
4681
4682 Lisp_Object
4683 code_convert_string1 (string, coding_system, nocopy, encodep)
4684      Lisp_Object string, coding_system, nocopy;
4685      int encodep;
4686 {
4687   struct coding_system coding;
4688
4689   CHECK_STRING (string, 0);
4690   CHECK_SYMBOL (coding_system, 1);
4691
4692   if (NILP (coding_system))
4693     return (NILP (nocopy) ? Fcopy_sequence (string) : string);
4694
4695   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
4696     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
4697
4698   coding.mode |= CODING_MODE_LAST_BLOCK;
4699   Vlast_coding_system_used = coding.symbol;
4700   return code_convert_string (string, &coding, encodep, !NILP (nocopy));
4701 }
4702
4703 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
4704        2, 3, 0,
4705   "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
4706 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
4707 if the decoding operation is trivial.\n\
4708 This function sets `last-coding-system-used' to the precise coding system\n\
4709 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
4710 not fully specified.)")
4711   (string, coding_system, nocopy)
4712      Lisp_Object string, coding_system, nocopy;
4713 {
4714   return code_convert_string1 (string, coding_system, nocopy, 0);
4715 }
4716
4717 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
4718        2, 3, 0,
4719   "Encode STRING to CODING-SYSTEM, and return the result.\n\
4720 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
4721 if the encoding operation is trivial.\n\
4722 This function sets `last-coding-system-used' to the precise coding system\n\
4723 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
4724 not fully specified.)")
4725   (string, coding_system, nocopy)
4726      Lisp_Object string, coding_system, nocopy;
4727 {
4728   return code_convert_string1 (string, coding_system, nocopy, 1);
4729 }
4730
4731 \f
4732 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
4733   "Decode a JISX0208 character of shift-jis encoding.\n\
4734 CODE is the character code in SJIS.\n\
4735 Return the corresponding character.")
4736   (code)
4737      Lisp_Object code;
4738 {
4739   unsigned char c1, c2, s1, s2;
4740   Lisp_Object val;
4741
4742   CHECK_NUMBER (code, 0);
4743   s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
4744   DECODE_SJIS (s1, s2, c1, c2);
4745   XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset_jisx0208, c1, c2));
4746   return val;
4747 }
4748
4749 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
4750   "Encode a JISX0208 character CHAR to SJIS coding system.\n\
4751 Return the corresponding character code in SJIS.")
4752   (ch)
4753      Lisp_Object ch;
4754 {
4755   int charset, c1, c2, s1, s2;
4756   Lisp_Object val;
4757
4758   CHECK_NUMBER (ch, 0);
4759   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
4760   if (charset == charset_jisx0208)
4761     {
4762       ENCODE_SJIS (c1, c2, s1, s2);
4763       XSETFASTINT (val, (s1 << 8) | s2);
4764     }
4765   else
4766     XSETFASTINT (val, 0);
4767   return val;
4768 }
4769
4770 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
4771   "Decode a Big5 character CODE of BIG5 coding system.\n\
4772 CODE is the character code in BIG5.\n\
4773 Return the corresponding character.")
4774   (code)
4775      Lisp_Object code;
4776 {
4777   int charset;
4778   unsigned char b1, b2, c1, c2;
4779   Lisp_Object val;
4780
4781   CHECK_NUMBER (code, 0);
4782   b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
4783   DECODE_BIG5 (b1, b2, charset, c1, c2);
4784   XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset, c1, c2));
4785   return val;
4786 }
4787
4788 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
4789   "Encode the Big5 character CHAR to BIG5 coding system.\n\
4790 Return the corresponding character code in Big5.")
4791   (ch)
4792      Lisp_Object ch;
4793 {
4794   int charset, c1, c2, b1, b2;
4795   Lisp_Object val;
4796
4797   CHECK_NUMBER (ch, 0);
4798   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
4799   if (charset == charset_big5_1 || charset == charset_big5_2)
4800     {
4801       ENCODE_BIG5 (charset, c1, c2, b1, b2);
4802       XSETFASTINT (val, (b1 << 8) | b2);
4803     }
4804   else
4805     XSETFASTINT (val, 0);
4806   return val;
4807 }
4808 \f
4809 DEFUN ("set-terminal-coding-system-internal",
4810        Fset_terminal_coding_system_internal,
4811        Sset_terminal_coding_system_internal, 1, 1, 0, "")
4812   (coding_system)
4813      Lisp_Object coding_system;
4814 {
4815   CHECK_SYMBOL (coding_system, 0);
4816   setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
4817   /* We had better not send unsafe characters to terminal.  */
4818   terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
4819
4820   return Qnil;
4821 }
4822
4823 DEFUN ("set-safe-terminal-coding-system-internal",
4824        Fset_safe_terminal_coding_system_internal,
4825        Sset_safe_terminal_coding_system_internal, 1, 1, 0, "")
4826   (coding_system)
4827      Lisp_Object coding_system;
4828 {
4829   CHECK_SYMBOL (coding_system, 0);
4830   setup_coding_system (Fcheck_coding_system (coding_system),
4831                        &safe_terminal_coding);
4832   return Qnil;
4833 }
4834
4835 DEFUN ("terminal-coding-system",
4836        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
4837   "Return coding system specified for terminal output.")
4838   ()
4839 {
4840   return terminal_coding.symbol;
4841 }
4842
4843 DEFUN ("set-keyboard-coding-system-internal",
4844        Fset_keyboard_coding_system_internal,
4845        Sset_keyboard_coding_system_internal, 1, 1, 0, "")
4846   (coding_system)
4847      Lisp_Object coding_system;
4848 {
4849   CHECK_SYMBOL (coding_system, 0);
4850   setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
4851   return Qnil;
4852 }
4853
4854 DEFUN ("keyboard-coding-system",
4855        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
4856   "Return coding system specified for decoding keyboard input.")
4857   ()
4858 {
4859   return keyboard_coding.symbol;
4860 }
4861
4862 \f
4863 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
4864        Sfind_operation_coding_system,  1, MANY, 0,
4865   "Choose a coding system for an operation based on the target name.\n\
4866 The value names a pair of coding systems: (DECODING-SYSTEM ENCODING-SYSTEM).\n\
4867 DECODING-SYSTEM is the coding system to use for decoding\n\
4868 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system\n\
4869 for encoding (in case OPERATION does encoding).\n\
4870 \n\
4871 The first argument OPERATION specifies an I/O primitive:\n\
4872   For file I/O, `insert-file-contents' or `write-region'.\n\
4873   For process I/O, `call-process', `call-process-region', or `start-process'.\n\
4874   For network I/O, `open-network-stream'.\n\
4875 \n\
4876 The remaining arguments should be the same arguments that were passed\n\
4877 to the primitive.  Depending on which primitive, one of those arguments\n\
4878 is selected as the TARGET.  For example, if OPERATION does file I/O,\n\
4879 whichever argument specifies the file name is TARGET.\n\
4880 \n\
4881 TARGET has a meaning which depends on OPERATION:\n\
4882   For file I/O, TARGET is a file name.\n\
4883   For process I/O, TARGET is a process name.\n\
4884   For network I/O, TARGET is a service name or a port number\n\
4885 \n\
4886 This function looks up what specified for TARGET in,\n\
4887 `file-coding-system-alist', `process-coding-system-alist',\n\
4888 or `network-coding-system-alist' depending on OPERATION.\n\
4889 They may specify a coding system, a cons of coding systems,\n\
4890 or a function symbol to call.\n\
4891 In the last case, we call the function with one argument,\n\
4892 which is a list of all the arguments given to this function.")
4893   (nargs, args)
4894      int nargs;
4895      Lisp_Object *args;
4896 {
4897   Lisp_Object operation, target_idx, target, val;
4898   register Lisp_Object chain;
4899
4900   if (nargs < 2)
4901     error ("Too few arguments");
4902   operation = args[0];
4903   if (!SYMBOLP (operation)
4904       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
4905     error ("Invalid first arguement");
4906   if (nargs < 1 + XINT (target_idx))
4907     error ("Too few arguments for operation: %s",
4908            XSYMBOL (operation)->name->data);
4909   target = args[XINT (target_idx) + 1];
4910   if (!(STRINGP (target)
4911         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
4912     error ("Invalid %dth argument", XINT (target_idx) + 1);
4913
4914   chain = ((EQ (operation, Qinsert_file_contents)
4915             || EQ (operation, Qwrite_region))
4916            ? Vfile_coding_system_alist
4917            : (EQ (operation, Qopen_network_stream)
4918               ? Vnetwork_coding_system_alist
4919               : Vprocess_coding_system_alist));
4920   if (NILP (chain))
4921     return Qnil;
4922
4923   for (; CONSP (chain); chain = XCONS (chain)->cdr)
4924     {
4925       Lisp_Object elt;
4926       elt = XCONS (chain)->car;
4927
4928       if (CONSP (elt)
4929           && ((STRINGP (target)
4930                && STRINGP (XCONS (elt)->car)
4931                && fast_string_match (XCONS (elt)->car, target) >= 0)
4932               || (INTEGERP (target) && EQ (target, XCONS (elt)->car))))
4933         {
4934           val = XCONS (elt)->cdr;
4935           /* Here, if VAL is both a valid coding system and a valid
4936              function symbol, we return VAL as a coding system.  */
4937           if (CONSP (val))
4938             return val;
4939           if (! SYMBOLP (val))
4940             return Qnil;
4941           if (! NILP (Fcoding_system_p (val)))
4942             return Fcons (val, val);
4943           if (! NILP (Ffboundp (val)))
4944             {
4945               val = call1 (val, Flist (nargs, args));
4946               if (CONSP (val))
4947                 return val;
4948               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
4949                 return Fcons (val, val);
4950             }
4951           return Qnil;
4952         }
4953     }
4954   return Qnil;
4955 }
4956
4957 DEFUN ("update-iso-coding-systems", Fupdate_iso_coding_systems,
4958        Supdate_iso_coding_systems, 0, 0, 0,
4959   "Update internal database for ISO2022 based coding systems.\n\
4960 When values of the following coding categories are changed, you must\n\
4961 call this function:\n\
4962   coding-category-iso-7, coding-category-iso-7-tight,\n\
4963   coding-category-iso-8-1, coding-category-iso-8-2,\n\
4964   coding-category-iso-7-else, coding-category-iso-8-else")
4965   ()
4966 {
4967   int i;
4968
4969   for (i = CODING_CATEGORY_IDX_ISO_7; i <= CODING_CATEGORY_IDX_ISO_8_ELSE;
4970        i++)
4971     {
4972       if (! coding_system_table[i])
4973         coding_system_table[i]
4974           = (struct coding_system *) xmalloc (sizeof (struct coding_system));
4975       setup_coding_system
4976         (XSYMBOL (XVECTOR (Vcoding_category_table)->contents[i])->value,
4977          coding_system_table[i]);
4978     }
4979   return Qnil;
4980 }
4981
4982 #endif /* emacs */
4983
4984 \f
4985 /*** 8. Post-amble ***/
4986
4987 void
4988 init_coding_once ()
4989 {
4990   int i;
4991
4992   /* Emacs' internal format specific initialize routine.  */
4993   for (i = 0; i <= 0x20; i++)
4994     emacs_code_class[i] = EMACS_control_code;
4995   emacs_code_class[0x0A] = EMACS_linefeed_code;
4996   emacs_code_class[0x0D] = EMACS_carriage_return_code;
4997   for (i = 0x21 ; i < 0x7F; i++)
4998     emacs_code_class[i] = EMACS_ascii_code;
4999   emacs_code_class[0x7F] = EMACS_control_code;
5000   emacs_code_class[0x80] = EMACS_leading_code_composition;
5001   for (i = 0x81; i < 0xFF; i++)
5002     emacs_code_class[i] = EMACS_invalid_code;
5003   emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
5004   emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
5005   emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
5006   emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
5007
5008   /* ISO2022 specific initialize routine.  */
5009   for (i = 0; i < 0x20; i++)
5010     iso_code_class[i] = ISO_control_code;
5011   for (i = 0x21; i < 0x7F; i++)
5012     iso_code_class[i] = ISO_graphic_plane_0;
5013   for (i = 0x80; i < 0xA0; i++)
5014     iso_code_class[i] = ISO_control_code;
5015   for (i = 0xA1; i < 0xFF; i++)
5016     iso_code_class[i] = ISO_graphic_plane_1;
5017   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
5018   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
5019   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
5020   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
5021   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
5022   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
5023   iso_code_class[ISO_CODE_ESC] = ISO_escape;
5024   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
5025   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
5026   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
5027
5028   conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE;
5029   conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE);
5030
5031   setup_coding_system (Qnil, &keyboard_coding);
5032   setup_coding_system (Qnil, &terminal_coding);
5033   setup_coding_system (Qnil, &safe_terminal_coding);
5034
5035   bzero (coding_system_table, sizeof coding_system_table);
5036
5037 #if defined (MSDOS) || defined (WINDOWSNT)
5038   system_eol_type = CODING_EOL_CRLF;
5039 #else
5040   system_eol_type = CODING_EOL_LF;
5041 #endif
5042 }
5043
5044 #ifdef emacs
5045
5046 void
5047 syms_of_coding ()
5048 {
5049   Qtarget_idx = intern ("target-idx");
5050   staticpro (&Qtarget_idx);
5051
5052   Qcoding_system_history = intern ("coding-system-history");
5053   staticpro (&Qcoding_system_history);
5054   Fset (Qcoding_system_history, Qnil);
5055
5056   /* Target FILENAME is the first argument.  */
5057   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
5058   /* Target FILENAME is the third argument.  */
5059   Fput (Qwrite_region, Qtarget_idx, make_number (2));
5060
5061   Qcall_process = intern ("call-process");
5062   staticpro (&Qcall_process);
5063   /* Target PROGRAM is the first argument.  */
5064   Fput (Qcall_process, Qtarget_idx, make_number (0));
5065
5066   Qcall_process_region = intern ("call-process-region");
5067   staticpro (&Qcall_process_region);
5068   /* Target PROGRAM is the third argument.  */
5069   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
5070
5071   Qstart_process = intern ("start-process");
5072   staticpro (&Qstart_process);
5073   /* Target PROGRAM is the third argument.  */
5074   Fput (Qstart_process, Qtarget_idx, make_number (2));
5075
5076   Qopen_network_stream = intern ("open-network-stream");
5077   staticpro (&Qopen_network_stream);
5078   /* Target SERVICE is the fourth argument.  */
5079   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
5080
5081   Qcoding_system = intern ("coding-system");
5082   staticpro (&Qcoding_system);
5083
5084   Qeol_type = intern ("eol-type");
5085   staticpro (&Qeol_type);
5086
5087   Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
5088   staticpro (&Qbuffer_file_coding_system);
5089
5090   Qpost_read_conversion = intern ("post-read-conversion");
5091   staticpro (&Qpost_read_conversion);
5092
5093   Qpre_write_conversion = intern ("pre-write-conversion");
5094   staticpro (&Qpre_write_conversion);
5095
5096   Qno_conversion = intern ("no-conversion");
5097   staticpro (&Qno_conversion);
5098
5099   Qundecided = intern ("undecided");
5100   staticpro (&Qundecided);
5101
5102   Qcoding_system_p = intern ("coding-system-p");
5103   staticpro (&Qcoding_system_p);
5104
5105   Qcoding_system_error = intern ("coding-system-error");
5106   staticpro (&Qcoding_system_error);
5107
5108   Fput (Qcoding_system_error, Qerror_conditions,
5109         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
5110   Fput (Qcoding_system_error, Qerror_message,
5111         build_string ("Invalid coding system"));
5112
5113   Qcoding_category = intern ("coding-category");
5114   staticpro (&Qcoding_category);
5115   Qcoding_category_index = intern ("coding-category-index");
5116   staticpro (&Qcoding_category_index);
5117
5118   Vcoding_category_table
5119     = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
5120   staticpro (&Vcoding_category_table);
5121   {
5122     int i;
5123     for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
5124       {
5125         XVECTOR (Vcoding_category_table)->contents[i]
5126           = intern (coding_category_name[i]);
5127         Fput (XVECTOR (Vcoding_category_table)->contents[i],
5128               Qcoding_category_index, make_number (i));
5129       }
5130   }
5131
5132   Qcharacter_unification_table = intern ("character-unification-table");
5133   staticpro (&Qcharacter_unification_table);
5134   Fput (Qcharacter_unification_table, Qchar_table_extra_slots,
5135         make_number (0));
5136
5137   Qcharacter_unification_table_for_decode
5138     = intern ("character-unification-table-for-decode");
5139   staticpro (&Qcharacter_unification_table_for_decode);
5140
5141   Qcharacter_unification_table_for_encode
5142     = intern ("character-unification-table-for-encode");
5143   staticpro (&Qcharacter_unification_table_for_encode);
5144
5145   Qsafe_charsets = intern ("safe-charsets");
5146   staticpro (&Qsafe_charsets);
5147
5148   Qemacs_mule = intern ("emacs-mule");
5149   staticpro (&Qemacs_mule);
5150
5151   Qraw_text = intern ("raw-text");
5152   staticpro (&Qraw_text);
5153
5154   defsubr (&Scoding_system_p);
5155   defsubr (&Sread_coding_system);
5156   defsubr (&Sread_non_nil_coding_system);
5157   defsubr (&Scheck_coding_system);
5158   defsubr (&Sdetect_coding_region);
5159   defsubr (&Sdetect_coding_string);
5160   defsubr (&Sdecode_coding_region);
5161   defsubr (&Sencode_coding_region);
5162   defsubr (&Sdecode_coding_string);
5163   defsubr (&Sencode_coding_string);
5164   defsubr (&Sdecode_sjis_char);
5165   defsubr (&Sencode_sjis_char);
5166   defsubr (&Sdecode_big5_char);
5167   defsubr (&Sencode_big5_char);
5168   defsubr (&Sset_terminal_coding_system_internal);
5169   defsubr (&Sset_safe_terminal_coding_system_internal);
5170   defsubr (&Sterminal_coding_system);
5171   defsubr (&Sset_keyboard_coding_system_internal);
5172   defsubr (&Skeyboard_coding_system);
5173   defsubr (&Sfind_operation_coding_system);
5174   defsubr (&Supdate_iso_coding_systems);
5175
5176   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
5177     "List of coding systems.\n\
5178 \n\
5179 Do not alter the value of this variable manually.  This variable should be\n\
5180 updated by the functions `make-coding-system' and\n\
5181 `define-coding-system-alias'.");
5182   Vcoding_system_list = Qnil;
5183
5184   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
5185     "Alist of coding system names.\n\
5186 Each element is one element list of coding system name.\n\
5187 This variable is given to `completing-read' as TABLE argument.\n\
5188 \n\
5189 Do not alter the value of this variable manually.  This variable should be\n\
5190 updated by the functions `make-coding-system' and\n\
5191 `define-coding-system-alias'.");
5192   Vcoding_system_alist = Qnil;
5193
5194   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
5195     "List of coding-categories (symbols) ordered by priority.");
5196   {
5197     int i;
5198
5199     Vcoding_category_list = Qnil;
5200     for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
5201       Vcoding_category_list
5202         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
5203                  Vcoding_category_list);
5204   }
5205
5206   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
5207     "Specify the coding system for read operations.\n\
5208 It is useful to bind this variable with `let', but do not set it globally.\n\
5209 If the value is a coding system, it is used for decoding on read operation.\n\
5210 If not, an appropriate element is used from one of the coding system alists:\n\
5211 There are three such tables, `file-coding-system-alist',\n\
5212 `process-coding-system-alist', and `network-coding-system-alist'.");
5213   Vcoding_system_for_read = Qnil;
5214
5215   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
5216     "Specify the coding system for write operations.\n\
5217 It is useful to bind this variable with `let', but do not set it globally.\n\
5218 If the value is a coding system, it is used for encoding on write operation.\n\
5219 If not, an appropriate element is used from one of the coding system alists:\n\
5220 There are three such tables, `file-coding-system-alist',\n\
5221 `process-coding-system-alist', and `network-coding-system-alist'.");
5222   Vcoding_system_for_write = Qnil;
5223
5224   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
5225     "Coding system used in the latest file or process I/O.");
5226   Vlast_coding_system_used = Qnil;
5227
5228   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
5229     "*Non-nil inhibit code conversion of end-of-line format in any cases.");
5230   inhibit_eol_conversion = 0;
5231
5232   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
5233     "Non-nil means process buffer inherits coding system of process output.\n\
5234 Bind it to t if the process output is to be treated as if it were a file\n\
5235 read from some filesystem.");
5236   inherit_process_coding_system = 0;
5237
5238   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
5239     "Alist to decide a coding system to use for a file I/O operation.\n\
5240 The format is ((PATTERN . VAL) ...),\n\
5241 where PATTERN is a regular expression matching a file name,\n\
5242 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5243 If VAL is a coding system, it is used for both decoding and encoding\n\
5244 the file contents.\n\
5245 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5246 and the cdr part is used for encoding.\n\
5247 If VAL is a function symbol, the function must return a coding system\n\
5248 or a cons of coding systems which are used as above.\n\
5249 \n\
5250 See also the function `find-operation-coding-system'.");
5251   Vfile_coding_system_alist = Qnil;
5252
5253   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
5254     "Alist to decide a coding system to use for a process I/O operation.\n\
5255 The format is ((PATTERN . VAL) ...),\n\
5256 where PATTERN is a regular expression matching a program name,\n\
5257 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5258 If VAL is a coding system, it is used for both decoding what received\n\
5259 from the program and encoding what sent to the program.\n\
5260 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5261 and the cdr part is used for encoding.\n\
5262 If VAL is a function symbol, the function must return a coding system\n\
5263 or a cons of coding systems which are used as above.\n\
5264 \n\
5265 See also the function `find-operation-coding-system'.");
5266   Vprocess_coding_system_alist = Qnil;
5267
5268   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
5269     "Alist to decide a coding system to use for a network I/O operation.\n\
5270 The format is ((PATTERN . VAL) ...),\n\
5271 where PATTERN is a regular expression matching a network service name\n\
5272 or is a port number to connect to,\n\
5273 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5274 If VAL is a coding system, it is used for both decoding what received\n\
5275 from the network stream and encoding what sent to the network stream.\n\
5276 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5277 and the cdr part is used for encoding.\n\
5278 If VAL is a function symbol, the function must return a coding system\n\
5279 or a cons of coding systems which are used as above.\n\
5280 \n\
5281 See also the function `find-operation-coding-system'.");
5282   Vnetwork_coding_system_alist = Qnil;
5283
5284   DEFVAR_INT ("eol-mnemonic-unix", &eol_mnemonic_unix,
5285     "Mnemonic character indicating UNIX-like end-of-line format (i.e. LF) .");
5286   eol_mnemonic_unix = ':';
5287
5288   DEFVAR_INT ("eol-mnemonic-dos", &eol_mnemonic_dos,
5289     "Mnemonic character indicating DOS-like end-of-line format (i.e. CRLF).");
5290   eol_mnemonic_dos = '\\';
5291
5292   DEFVAR_INT ("eol-mnemonic-mac", &eol_mnemonic_mac,
5293     "Mnemonic character indicating MAC-like end-of-line format (i.e. CR).");
5294   eol_mnemonic_mac = '/';
5295
5296   DEFVAR_INT ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
5297     "Mnemonic character indicating end-of-line format is not yet decided.");
5298   eol_mnemonic_undecided = ':';
5299
5300   DEFVAR_LISP ("enable-character-unification", &Venable_character_unification,
5301     "Non-nil means ISO 2022 encoder/decoder do character unification.");
5302   Venable_character_unification = Qt;
5303
5304   DEFVAR_LISP ("standard-character-unification-table-for-decode",
5305     &Vstandard_character_unification_table_for_decode,
5306     "Table for unifying characters when reading.");
5307   Vstandard_character_unification_table_for_decode = Qnil;
5308
5309   DEFVAR_LISP ("standard-character-unification-table-for-encode",
5310     &Vstandard_character_unification_table_for_encode,
5311     "Table for unifying characters when writing.");
5312   Vstandard_character_unification_table_for_encode = Qnil;
5313
5314   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
5315     "Alist of charsets vs revision numbers.\n\
5316 While encoding, if a charset (car part of an element) is found,\n\
5317 designate it with the escape sequence identifing revision (cdr part of the element).");
5318   Vcharset_revision_alist = Qnil;
5319
5320   DEFVAR_LISP ("default-process-coding-system",
5321                &Vdefault_process_coding_system,
5322     "Cons of coding systems used for process I/O by default.\n\
5323 The car part is used for decoding a process output,\n\
5324 the cdr part is used for encoding a text to be sent to a process.");
5325   Vdefault_process_coding_system = Qnil;
5326
5327   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
5328     "Table of extra Latin codes in the range 128..159 (inclusive).\n\
5329 This is a vector of length 256.\n\
5330 If Nth element is non-nil, the existence of code N in a file\n\
5331 \(or output of subprocess) doesn't prevent it to be detected as\n\
5332 a coding system of ISO 2022 variant which has a flag\n\
5333 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file\n\
5334 or reading output of a subprocess.\n\
5335 Only 128th through 159th elements has a meaning.");
5336   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
5337
5338   DEFVAR_LISP ("select-safe-coding-system-function",
5339                &Vselect_safe_coding_system_function,
5340     "Function to call to select safe coding system for encoding a text.\n\
5341 \n\
5342 If set, this function is called to force a user to select a proper\n\
5343 coding system which can encode the text in the case that a default\n\
5344 coding system used in each operation can't encode the text.\n\
5345 \n\
5346 The default value is `select-safe-codign-system' (which see).");
5347   Vselect_safe_coding_system_function = Qnil;
5348
5349 }
5350
5351 #endif /* emacs */