src/coding.c

   1 /* Coding system handler (conversion, detection, and etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /*** TABLE OF CONTENTS ***
  23
  24   1. Preamble
  25   2. Emacs' internal format (emacs-mule) handlers
  26   3. ISO2022 handlers
  27   4. Shift-JIS and BIG5 handlers
  28   5. End-of-line handlers
  29   6. C library functions
  30   7. Emacs Lisp library functions
  31   8. Post-amble
  32
  33 */
  34
  35 /*** GENERAL NOTE on CODING SYSTEM ***
  36
  37   Coding system is an encoding mechanism of one or more character
  38   sets.  Here's a list of coding systems which Emacs can handle.  When
  39   we say "decode", it means converting some other coding system to
  40   Emacs' internal format (emacs-internal), and when we say "encode",
  41   it means converting the coding system emacs-mule to some other
  42   coding system.
  43
  44   0. Emacs' internal format (emacs-mule)
  45
  46   Emacs itself holds a multi-lingual character in a buffer and a string
  47   in a special format.  Details are described in section 2.
  48
  49   1. ISO2022
  50
  51   The most famous coding system for multiple character sets.  X's
  52   Compound Text, various EUCs (Extended Unix Code), and coding
  53   systems used in Internet communication such as ISO-2022-JP are
  54   all variants of ISO2022.  Details are described in section 3.
  55
  56   2. SJIS (or Shift-JIS or MS-Kanji-Code)
  57
  58   A coding system to encode character sets: ASCII, JISX0201, and
  59   JISX0208.  Widely used for PC's in Japan.  Details are described in
  60   section 4.
  61
  62   3. BIG5
  63
  64   A coding system to encode character sets: ASCII and Big5.  Widely
  65   used by Chinese (mainly in Taiwan and Hong Kong).  Details are
  66   described in section 4.  In this file, when we write "BIG5"
  67   (all uppercase), we mean the coding system, and when we write
  68   "Big5" (capitalized), we mean the character set.
  69
  70   4. Raw text
  71
  72   A coding system for a text containing random 8-bit code.  Emacs does
  73   no code conversion on such a text except for end-of-line format.
  74
  75   5. Other
  76
  77   If a user wants to read/write a text encoded in a coding system not
  78   listed above, he can supply a decoder and an encoder for it in CCL
  79   (Code Conversion Language) programs.  Emacs executes the CCL program
  80   while reading/writing.
  81
  82   Emacs represents a coding system by a Lisp symbol that has a property
  83   `coding-system'.  But, before actually using the coding system, the
  84   information about it is set in a structure of type `struct
  85   coding_system' for rapid processing.  See section 6 for more details.
  86
  87 */
  88
  89 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
  90
  91   How end-of-line of a text is encoded depends on a system.  For
  92   instance, Unix's format is just one byte of `line-feed' code,
  93   whereas DOS's format is two-byte sequence of `carriage-return' and
  94   `line-feed' codes.  MacOS's format is usually one byte of
  95   `carriage-return'.
  96
  97   Since text characters encoding and end-of-line encoding are
  98   independent, any coding system described above can take
  99   any format of end-of-line.  So, Emacs has information of format of
 100   end-of-line in each coding-system.  See section 6 for more details.
 101
 102 */
 103
 104 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 105
 106   These functions check if a text between SRC and SRC_END is encoded
 107   in the coding system category XXX.  Each returns an integer value in
 108   which appropriate flag bits for the category XXX is set.  The flag
 109   bits are defined in macros CODING_CATEGORY_MASK_XXX.  Below is the
 110   template of these functions.  */
 111 #if 0
 112 int
 113 detect_coding_emacs_mule (src, src_end)
 114      unsigned char *src, *src_end;
 115 {
 116   ...
 117 }
 118 #endif
 119
 120 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 121
 122   These functions decode SRC_BYTES length text at SOURCE encoded in
 123   CODING to Emacs' internal format (emacs-mule).  The resulting text
 124   goes to a place pointed to by DESTINATION, the length of which
 125   should not exceed DST_BYTES.  These functions set the information of
 126   original and decoded texts in the members produced, produced_char,
 127   consumed, and consumed_char of the structure *CODING.
 128
 129   The return value is an integer (CODING_FINISH_XXX) indicating how
 130   the decoding finished.
 131
 132   DST_BYTES zero means that source area and destination area are
 133   overlapped, which means that we can produce a decoded text until it
 134   reaches at the head of not-yet-decoded source text.
 135
 136   Below is a template of these functions.  */
 137 #if 0
 138 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 139      struct coding_system *coding;
 140      unsigned char *source, *destination;
 141      int src_bytes, dst_bytes;
 142 {
 143   ...
 144 }
 145 #endif
 146
 147 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 148
 149   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 150   internal format (emacs-mule) to CODING.  The resulting text goes to
 151   a place pointed to by DESTINATION, the length of which should not
 152   exceed DST_BYTES.  These functions set the information of
 153   original and encoded texts in the members produced, produced_char,
 154   consumed, and consumed_char of the structure *CODING.
 155
 156   The return value is an integer (CODING_FINISH_XXX) indicating how
 157   the encoding finished.
 158
 159   DST_BYTES zero means that source area and destination area are
 160   overlapped, which means that we can produce a decoded text until it
 161   reaches at the head of not-yet-decoded source text.
 162
 163   Below is a template of these functions.  */
 164 #if 0
 165 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 166      struct coding_system *coding;
 167      unsigned char *source, *destination;
 168      int src_bytes, dst_bytes;
 169 {
 170   ...
 171 }
 172 #endif
 173
 174 /*** COMMONLY USED MACROS ***/
 175
 176 /* The following three macros ONE_MORE_BYTE, TWO_MORE_BYTES, and
 177    THREE_MORE_BYTES safely get one, two, and three bytes from the
 178    source text respectively.  If there are not enough bytes in the
 179    source, they jump to `label_end_of_loop'.  The caller should set
 180    variables `src' and `src_end' to appropriate areas in advance.  */
 181
 182 #define ONE_MORE_BYTE(c1)       \
 183   do {                          \
 184     if (src < src_end)          \
 185       c1 = *src++;              \
 186     else                        \
 187       goto label_end_of_loop;   \
 188   } while (0)
 189
 190 #define TWO_MORE_BYTES(c1, c2)  \
 191   do {                          \
 192     if (src + 1 < src_end)      \
 193       c1 = *src++, c2 = *src++; \
 194     else                        \
 195       goto label_end_of_loop;   \
 196   } while (0)
 197
 198 #define THREE_MORE_BYTES(c1, c2, c3)            \
 199   do {                                          \
 200     if (src + 2 < src_end)                      \
 201       c1 = *src++, c2 = *src++, c3 = *src++;    \
 202     else                                        \
 203       goto label_end_of_loop;                   \
 204   } while (0)
 205
 206 /* The following three macros DECODE_CHARACTER_ASCII,
 207    DECODE_CHARACTER_DIMENSION1, and DECODE_CHARACTER_DIMENSION2 put
 208    the multi-byte form of a character of each class at the place
 209    pointed by `dst'.  The caller should set the variable `dst' to
 210    point to an appropriate area and the variable `coding' to point to
 211    the coding-system of the currently decoding text in advance.  */
 212
 213 /* Decode one ASCII character C.  */
 214
 215 #define DECODE_CHARACTER_ASCII(c)                               \
 216   do {                                                          \
 217     if (COMPOSING_P (coding->composing))                        \
 218       *dst++ = 0xA0, *dst++ = (c) | 0x80;                       \
 219     else                                                        \
 220       {                                                         \
 221         *dst++ = (c);                                           \
 222         coding->produced_char++;                                \
 223       }                                                         \
 224   } while (0)
 225
 226 /* Decode one DIMENSION1 character whose charset is CHARSET and whose
 227    position-code is C.  */
 228
 229 #define DECODE_CHARACTER_DIMENSION1(charset, c)                         \
 230   do {                                                                  \
 231     unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset);   \
 232     if (COMPOSING_P (coding->composing))                                \
 233       *dst++ = leading_code + 0x20;                                     \
 234     else                                                                \
 235       {                                                                 \
 236         *dst++ = leading_code;                                          \
 237         coding->produced_char++;                                        \
 238       }                                                                 \
 239     if (leading_code = CHARSET_LEADING_CODE_EXT (charset))              \
 240       *dst++ = leading_code;                                            \
 241     *dst++ = (c) | 0x80;                                                \
 242   } while (0)
 243
 244 /* Decode one DIMENSION2 character whose charset is CHARSET and whose
 245    position-codes are C1 and C2.  */
 246
 247 #define DECODE_CHARACTER_DIMENSION2(charset, c1, c2)    \
 248   do {                                                  \
 249     DECODE_CHARACTER_DIMENSION1 (charset, c1);          \
 250     *dst++ = (c2) | 0x80;                               \
 251   } while (0)
 252
 253 \f
 254 /*** 1. Preamble ***/
 255
 256 #include <stdio.h>
 257
 258 #ifdef emacs
 259
 260 #include <config.h>
 261 #include "lisp.h"
 262 #include "buffer.h"
 263 #include "charset.h"
 264 #include "ccl.h"
 265 #include "coding.h"
 266 #include "window.h"
 267
 268 #else  /* not emacs */
 269
 270 #include "mulelib.h"
 271
 272 #endif /* not emacs */
 273
 274 Lisp_Object Qcoding_system, Qeol_type;
 275 Lisp_Object Qbuffer_file_coding_system;
 276 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 277 Lisp_Object Qno_conversion, Qundecided;
 278 Lisp_Object Qcoding_system_history;
 279 Lisp_Object Qsafe_charsets;
 280
 281 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 282 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 283 Lisp_Object Qstart_process, Qopen_network_stream;
 284 Lisp_Object Qtarget_idx;
 285
 286 Lisp_Object Vselect_safe_coding_system_function;
 287
 288 /* Mnemonic character of each format of end-of-line.  */
 289 int eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 290 /* Mnemonic character to indicate format of end-of-line is not yet
 291    decided.  */
 292 int eol_mnemonic_undecided;
 293
 294 /* Format of end-of-line decided by system.  This is CODING_EOL_LF on
 295    Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.  */
 296 int system_eol_type;
 297
 298 #ifdef emacs
 299
 300 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 301
 302 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 303
 304 /* Coding system emacs-mule and raw-text are for converting only
 305    end-of-line format.  */
 306 Lisp_Object Qemacs_mule, Qraw_text;
 307
 308 /* Coding-systems are handed between Emacs Lisp programs and C internal
 309    routines by the following three variables.  */
 310 /* Coding-system for reading files and receiving data from process.  */
 311 Lisp_Object Vcoding_system_for_read;
 312 /* Coding-system for writing files and sending data to process.  */
 313 Lisp_Object Vcoding_system_for_write;
 314 /* Coding-system actually used in the latest I/O.  */
 315 Lisp_Object Vlast_coding_system_used;
 316
 317 /* A vector of length 256 which contains information about special
 318    Latin codes (espepcially for dealing with Microsoft code).  */
 319 Lisp_Object Vlatin_extra_code_table;
 320
 321 /* Flag to inhibit code conversion of end-of-line format.  */
 322 int inhibit_eol_conversion;
 323
 324 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 325 int inherit_process_coding_system;
 326
 327 /* Coding system to be used to encode text for terminal display.  */
 328 struct coding_system terminal_coding;
 329
 330 /* Coding system to be used to encode text for terminal display when
 331    terminal coding system is nil.  */
 332 struct coding_system safe_terminal_coding;
 333
 334 /* Coding system of what is sent from terminal keyboard.  */
 335 struct coding_system keyboard_coding;
 336
 337 Lisp_Object Vfile_coding_system_alist;
 338 Lisp_Object Vprocess_coding_system_alist;
 339 Lisp_Object Vnetwork_coding_system_alist;
 340
 341 #endif /* emacs */
 342
 343 Lisp_Object Qcoding_category, Qcoding_category_index;
 344
 345 /* List of symbols `coding-category-xxx' ordered by priority.  */
 346 Lisp_Object Vcoding_category_list;
 347
 348 /* Table of coding categories (Lisp symbols).  */
 349 Lisp_Object Vcoding_category_table;
 350
 351 /* Table of names of symbol for each coding-category.  */
 352 char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
 353   "coding-category-emacs-mule",
 354   "coding-category-sjis",
 355   "coding-category-iso-7",
 356   "coding-category-iso-7-tight",
 357   "coding-category-iso-8-1",
 358   "coding-category-iso-8-2",
 359   "coding-category-iso-7-else",
 360   "coding-category-iso-8-else",
 361   "coding-category-big5",
 362   "coding-category-raw-text",
 363   "coding-category-binary"
 364 };
 365
 366 /* Table pointers to coding systems corresponding to each coding
 367    categories.  */
 368 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
 369
 370 /* Flag to tell if we look up unification table on character code
 371    conversion.  */
 372 Lisp_Object Venable_character_unification;
 373 /* Standard unification table to look up on decoding (reading).  */
 374 Lisp_Object Vstandard_character_unification_table_for_decode;
 375 /* Standard unification table to look up on encoding (writing).  */
 376 Lisp_Object Vstandard_character_unification_table_for_encode;
 377
 378 Lisp_Object Qcharacter_unification_table;
 379 Lisp_Object Qcharacter_unification_table_for_decode;
 380 Lisp_Object Qcharacter_unification_table_for_encode;
 381
 382 /* Alist of charsets vs revision number.  */
 383 Lisp_Object Vcharset_revision_alist;
 384
 385 /* Default coding systems used for process I/O.  */
 386 Lisp_Object Vdefault_process_coding_system;
 387
 388 \f
 389 /*** 2. Emacs internal format (emacs-mule) handlers ***/
 390
 391 /* Emacs' internal format for encoding multiple character sets is a
 392    kind of multi-byte encoding, i.e. characters are encoded by
 393    variable-length sequences of one-byte codes.  ASCII characters
 394    and control characters (e.g. `tab', `newline') are represented by
 395    one-byte sequences which are their ASCII codes, in the range 0x00
 396    through 0x7F.  The other characters are represented by a sequence
 397    of `base leading-code', optional `extended leading-code', and one
 398    or two `position-code's.  The length of the sequence is determined
 399    by the base leading-code.  Leading-code takes the range 0x80
 400    through 0x9F, whereas extended leading-code and position-code take
 401    the range 0xA0 through 0xFF.  See `charset.h' for more details
 402    about leading-code and position-code.
 403
 404    There's one exception to this rule.  Special leading-code
 405    `leading-code-composition' denotes that the following several
 406    characters should be composed into one character.  Leading-codes of
 407    components (except for ASCII) are added 0x20.  An ASCII character
 408    component is represented by a 2-byte sequence of `0xA0' and
 409    `ASCII-code + 0x80'.  See also the comments in `charset.h' for the
 410    details of composite character.  Hence, we can summarize the code
 411    range as follows:
 412
 413    --- CODE RANGE of Emacs' internal format ---
 414    (character set)      (range)
 415    ASCII                0x00 .. 0x7F
 416    ELSE (1st byte)      0x80 .. 0x9F
 417         (rest bytes)    0xA0 .. 0xFF
 418    ---------------------------------------------
 419
 420   */
 421
 422 enum emacs_code_class_type emacs_code_class[256];
 423
 424 /* Go to the next statement only if *SRC is accessible and the code is
 425    greater than 0xA0.  */
 426 #define CHECK_CODE_RANGE_A0_FF  \
 427   do {                          \
 428     if (src >= src_end)         \
 429       goto label_end_of_switch; \
 430     else if (*src++ < 0xA0)     \
 431       return 0;                 \
 432   } while (0)
 433
 434 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 435    Check if a text is encoded in Emacs' internal format.  If it is,
 436    return CODING_CATEGORY_MASK_EMACS_MULE, else return 0.  */
 437
 438 int
 439 detect_coding_emacs_mule (src, src_end)
 440      unsigned char *src, *src_end;
 441 {
 442   unsigned char c;
 443   int composing = 0;
 444
 445   while (src < src_end)
 446     {
 447       c = *src++;
 448
 449       if (composing)
 450         {
 451           if (c < 0xA0)
 452             composing = 0;
 453           else
 454             c -= 0x20;
 455         }
 456
 457       switch (emacs_code_class[c])
 458         {
 459         case EMACS_ascii_code:
 460         case EMACS_linefeed_code:
 461           break;
 462
 463         case EMACS_control_code:
 464           if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
 465             return 0;
 466           break;
 467
 468         case EMACS_invalid_code:
 469           return 0;
 470
 471         case EMACS_leading_code_composition: /* c == 0x80 */
 472           if (composing)
 473             CHECK_CODE_RANGE_A0_FF;
 474           else
 475             composing = 1;
 476           break;
 477
 478         case EMACS_leading_code_4:
 479           CHECK_CODE_RANGE_A0_FF;
 480           /* fall down to check it two more times ...  */
 481
 482         case EMACS_leading_code_3:
 483           CHECK_CODE_RANGE_A0_FF;
 484           /* fall down to check it one more time ...  */
 485
 486         case EMACS_leading_code_2:
 487           CHECK_CODE_RANGE_A0_FF;
 488           break;
 489
 490         default:
 491         label_end_of_switch:
 492           break;
 493         }
 494     }
 495   return CODING_CATEGORY_MASK_EMACS_MULE;
 496 }
 497
 498 \f
 499 /*** 3. ISO2022 handlers ***/
 500
 501 /* The following note describes the coding system ISO2022 briefly.
 502    Since the intention of this note is to help in understanding of
 503    the programs in this file, some parts are NOT ACCURATE or OVERLY
 504    SIMPLIFIED.  For the thorough understanding, please refer to the
 505    original document of ISO2022.
 506
 507    ISO2022 provides many mechanisms to encode several character sets
 508    in 7-bit and 8-bit environment.  If one chooses 7-bite environment,
 509    all text is encoded by codes of less than 128.  This may make the
 510    encoded text a little bit longer, but the text gets more stability
 511    to pass through several gateways (some of them strip off the MSB).
 512
 513    There are two kinds of character set: control character set and
 514    graphic character set.  The former contains control characters such
 515    as `newline' and `escape' to provide control functions (control
 516    functions are provided also by escape sequences).  The latter
 517    contains graphic characters such as ' A' and '-'.  Emacs recognizes
 518    two control character sets and many graphic character sets.
 519
 520    Graphic character sets are classified into one of the following
 521    four classes, DIMENSION1_CHARS94, DIMENSION1_CHARS96,
 522    DIMENSION2_CHARS94, DIMENSION2_CHARS96 according to the number of
 523    bytes (DIMENSION) and the number of characters in one dimension
 524    (CHARS) of the set.  In addition, each character set is assigned an
 525    identification tag (called "final character" and denoted as <F>
 526    here after) which is unique in each class.  <F> of each character
 527    set is decided by ECMA(*) when it is registered in ISO.  Code range
 528    of <F> is 0x30..0x7F (0x30..0x3F are for private use only).
 529
 530    Note (*): ECMA = European Computer Manufacturers Association
 531
 532    Here are examples of graphic character set [NAME(<F>)]:
 533         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
 534         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
 535         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
 536         o DIMENSION2_CHARS96 -- none for the moment
 537
 538    A code area (1byte=8bits) is divided into 4 areas, C0, GL, C1, and GR.
 539         C0 [0x00..0x1F] -- control character plane 0
 540         GL [0x20..0x7F] -- graphic character plane 0
 541         C1 [0x80..0x9F] -- control character plane 1
 542         GR [0xA0..0xFF] -- graphic character plane 1
 543
 544    A control character set is directly designated and invoked to C0 or
 545    C1 by an escape sequence.  The most common case is that ISO646's
 546    control character set is designated/invoked to C0 and ISO6429's
 547    control character set is designated/invoked to C1, and usually
 548    these designations/invocations are omitted in a coded text.  With
 549    7-bit environment, only C0 can be used, and a control character for
 550    C1 is encoded by an appropriate escape sequence to fit in the
 551    environment.  All control characters for C1 are defined the
 552    corresponding escape sequences.
 553
 554    A graphic character set is at first designated to one of four
 555    graphic registers (G0 through G3), then these graphic registers are
 556    invoked to GL or GR.  These designations and invocations can be
 557    done independently.  The most common case is that G0 is invoked to
 558    GL, G1 is invoked to GR, and ASCII is designated to G0, and usually
 559    these invocations and designations are omitted in a coded text.
 560    With 7-bit environment, only GL can be used.
 561
 562    When a graphic character set of CHARS94 is invoked to GL, code 0x20
 563    and 0x7F of GL area work as control characters SPACE and DEL
 564    respectively, and code 0xA0 and 0xFF of GR area should not be used.
 565
 566    There are two ways of invocation: locking-shift and single-shift.
 567    With locking-shift, the invocation lasts until the next different
 568    invocation, whereas with single-shift, the invocation works only
 569    for the following character and doesn't affect locking-shift.
 570    Invocations are done by the following control characters or escape
 571    sequences.
 572
 573    ----------------------------------------------------------------------
 574    function             control char    escape sequence description
 575    ----------------------------------------------------------------------
 576    SI  (shift-in)               0x0F    none            invoke G0 to GL
 577    SO  (shift-out)              0x0E    none            invoke G1 to GL
 578    LS2 (locking-shift-2)        none    ESC 'n'         invoke G2 into GL
 579    LS3 (locking-shift-3)        none    ESC 'o'         invoke G3 into GL
 580    SS2 (single-shift-2)         0x8E    ESC 'N'         invoke G2 into GL
 581    SS3 (single-shift-3)         0x8F    ESC 'O'         invoke G3 into GL
 582    ----------------------------------------------------------------------
 583    The first four are for locking-shift.  Control characters for these
 584    functions are defined by macros ISO_CODE_XXX in `coding.h'.
 585
 586    Designations are done by the following escape sequences.
 587    ----------------------------------------------------------------------
 588    escape sequence      description
 589    ----------------------------------------------------------------------
 590    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
 591    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
 592    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
 593    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
 594    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
 595    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
 596    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
 597    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
 598    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
 599    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
 600    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
 601    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
 602    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
 603    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
 604    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
 605    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
 606    ----------------------------------------------------------------------
 607
 608    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
 609    of dimension 1, chars 94, and final character <F>, and etc.
 610
 611    Note (*): Although these designations are not allowed in ISO2022,
 612    Emacs accepts them on decoding, and produces them on encoding
 613    CHARS96 character set in a coding system which is characterized as
 614    7-bit environment, non-locking-shift, and non-single-shift.
 615
 616    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
 617    '(' can be omitted.  We call this as "short-form" here after.
 618
 619    Now you may notice that there are a lot of ways for encoding the
 620    same multilingual text in ISO2022.  Actually, there exists many
 621    coding systems such as Compound Text (used in X's inter client
 622    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
 623    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
 624    localized platforms), and all of these are variants of ISO2022.
 625
 626    In addition to the above, Emacs handles two more kinds of escape
 627    sequences: ISO6429's direction specification and Emacs' private
 628    sequence for specifying character composition.
 629
 630    ISO6429's direction specification takes the following format:
 631         o CSI ']'      -- end of the current direction
 632         o CSI '0' ']'  -- end of the current direction
 633         o CSI '1' ']'  -- start of left-to-right text
 634         o CSI '2' ']'  -- start of right-to-left text
 635    The control character CSI (0x9B: control sequence introducer) is
 636    abbreviated to the escape sequence ESC '[' in 7-bit environment.
 637
 638    Character composition specification takes the following format:
 639         o ESC '0' -- start character composition
 640         o ESC '1' -- end character composition
 641    Since these are not standard escape sequences of any ISO, the use
 642    of them for these meaning is restricted to Emacs only.  */
 643
 644 enum iso_code_class_type iso_code_class[256];
 645
 646 #define CHARSET_OK(idx, charset)                        \
 647   (coding_system_table[idx]->safe_charsets[charset]     \
 648    || (CODING_SPEC_ISO_REQUESTED_DESIGNATION            \
 649        (coding_system_table[idx], charset)              \
 650        != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
 651
 652 #define SHIFT_OUT_OK(idx) \
 653   (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
 654
 655 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 656    Check if a text is encoded in ISO2022.  If it is, returns an
 657    integer in which appropriate flag bits any of:
 658         CODING_CATEGORY_MASK_ISO_7
 659         CODING_CATEGORY_MASK_ISO_7_TIGHT
 660         CODING_CATEGORY_MASK_ISO_8_1
 661         CODING_CATEGORY_MASK_ISO_8_2
 662         CODING_CATEGORY_MASK_ISO_7_ELSE
 663         CODING_CATEGORY_MASK_ISO_8_ELSE
 664    are set.  If a code which should never appear in ISO2022 is found,
 665    returns 0.  */
 666
 667 int
 668 detect_coding_iso2022 (src, src_end)
 669      unsigned char *src, *src_end;
 670 {
 671   int mask = CODING_CATEGORY_MASK_ISO;
 672   int mask_found = 0;
 673   int reg[4], shift_out = 0;
 674   int c, c1, i, charset;
 675
 676   reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
 677   while (mask && src < src_end)
 678     {
 679       c = *src++;
 680       switch (c)
 681         {
 682         case ISO_CODE_ESC:
 683           if (src >= src_end)
 684             break;
 685           c = *src++;
 686           if (c >= '(' && c <= '/')
 687             {
 688               /* Designation sequence for a charset of dimension 1.  */
 689               if (src >= src_end)
 690                 break;
 691               c1 = *src++;
 692               if (c1 < ' ' || c1 >= 0x80
 693                   || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
 694                 /* Invalid designation sequence.  Just ignore.  */
 695                 break;
 696               reg[(c - '(') % 4] = charset;
 697             }
 698           else if (c == '$')
 699             {
 700               /* Designation sequence for a charset of dimension 2.  */
 701               if (src >= src_end)
 702                 break;
 703               c = *src++;
 704               if (c >= '@' && c <= 'B')
 705                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
 706                 reg[0] = charset = iso_charset_table[1][0][c];
 707               else if (c >= '(' && c <= '/')
 708                 {
 709                   if (src >= src_end)
 710                     break;
 711                   c1 = *src++;
 712                   if (c1 < ' ' || c1 >= 0x80
 713                       || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
 714                     /* Invalid designation sequence.  Just ignore.  */
 715                     break;
 716                   reg[(c - '(') % 4] = charset;
 717                 }
 718               else
 719                 /* Invalid designation sequence.  Just ignore.  */
 720                 break;
 721             }
 722           else if (c == 'N' || c == 'n')
 723             {
 724               if (shift_out == 0
 725                   && (reg[1] >= 0
 726                       || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
 727                       || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
 728                 {
 729                   /* Locking shift out.  */
 730                   mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 731                   mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 732                   shift_out = 1;
 733                 }
 734               break;
 735             }
 736           else if (c == 'O' || c == 'o')
 737             {
 738               if (shift_out == 1)
 739                 {
 740                   /* Locking shift in.  */
 741                   mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 742                   mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 743                   shift_out = 0;
 744                 }
 745               break;
 746             }
 747           else if (c == '0' || c == '1' || c == '2')
 748             /* Start/end composition.  Just ignore.  */
 749             break;
 750           else
 751             /* Invalid escape sequence.  Just ignore.  */
 752             break;
 753
 754           /* We found a valid designation sequence for CHARSET.  */
 755           mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
 756           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset))
 757             mask_found |= CODING_CATEGORY_MASK_ISO_7;
 758           else
 759             mask &= ~CODING_CATEGORY_MASK_ISO_7;
 760           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset))
 761             mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
 762           else
 763             mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
 764           if (! CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset))
 765             mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
 766           if (! CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset))
 767             mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
 768           break;
 769
 770         case ISO_CODE_SO:
 771           if (shift_out == 0
 772               && (reg[1] >= 0
 773                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
 774                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
 775             {
 776               /* Locking shift out.  */
 777               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 778               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 779             }
 780           break;
 781
 782         case ISO_CODE_SI:
 783           if (shift_out == 1)
 784             {
 785               /* Locking shift in.  */
 786               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 787               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 788             }
 789           break;
 790
 791         case ISO_CODE_CSI:
 792         case ISO_CODE_SS2:
 793         case ISO_CODE_SS3:
 794           {
 795             int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
 796
 797             if (c != ISO_CODE_CSI)
 798               {
 799                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 800                     & CODING_FLAG_ISO_SINGLE_SHIFT)
 801                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 802                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 803                     & CODING_FLAG_ISO_SINGLE_SHIFT)
 804                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 805               }
 806             if (VECTORP (Vlatin_extra_code_table)
 807                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
 808               {
 809                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 810                     & CODING_FLAG_ISO_LATIN_EXTRA)
 811                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 812                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 813                     & CODING_FLAG_ISO_LATIN_EXTRA)
 814                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 815               }
 816             mask &= newmask;
 817             mask_found |= newmask;
 818           }
 819           break;
 820
 821         default:
 822           if (c < 0x80)
 823             break;
 824           else if (c < 0xA0)
 825             {
 826               if (VECTORP (Vlatin_extra_code_table)
 827                   && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
 828                 {
 829                   int newmask = 0;
 830
 831                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 832                       & CODING_FLAG_ISO_LATIN_EXTRA)
 833                     newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 834                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 835                       & CODING_FLAG_ISO_LATIN_EXTRA)
 836                     newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 837                   mask &= newmask;
 838                   mask_found |= newmask;
 839                 }
 840               else
 841                 return 0;
 842             }
 843           else
 844             {
 845               unsigned char *src_begin = src;
 846
 847               mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
 848                         | CODING_CATEGORY_MASK_ISO_7_ELSE);
 849               mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
 850               while (src < src_end && *src >= 0xA0)
 851                 src++;
 852               if ((src - src_begin - 1) & 1 && src < src_end)
 853                 mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
 854               else
 855                 mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
 856             }
 857           break;
 858         }
 859     }
 860
 861   return (mask & mask_found);
 862 }
 863
 864 /* Decode a character of which charset is CHARSET and the 1st position
 865    code is C1.  If dimension of CHARSET is 2, the 2nd position code is
 866    fetched from SRC and set to C2.  If CHARSET is negative, it means
 867    that we are decoding ill formed text, and what we can do is just to
 868    read C1 as is.  */
 869
 870 #define DECODE_ISO_CHARACTER(charset, c1)                               \
 871   do {                                                                  \
 872     int c_alt, charset_alt = (charset);                                 \
 873     if (COMPOSING_HEAD_P (coding->composing))                           \
 874       {                                                                 \
 875         *dst++ = LEADING_CODE_COMPOSITION;                              \
 876         if (COMPOSING_WITH_RULE_P (coding->composing))                  \
 877           /* To tell composition rules are embeded.  */                 \
 878           *dst++ = 0xFF;                                                \
 879         coding->composing += 2;                                         \
 880       }                                                                 \
 881     if ((charset) >= 0)                                                 \
 882       {                                                                 \
 883         if (CHARSET_DIMENSION (charset) == 2)                           \
 884           {                                                             \
 885             ONE_MORE_BYTE (c2);                                         \
 886             if (iso_code_class[(c2) & 0x7F] != ISO_0x20_or_0x7F         \
 887                 && iso_code_class[(c2) & 0x7F] != ISO_graphic_plane_0)  \
 888               {                                                         \
 889                 src--;                                                  \
 890                 c2 = ' ';                                               \
 891               }                                                         \
 892           }                                                             \
 893         if (!NILP (unification_table)                                   \
 894             && ((c_alt = unify_char (unification_table,                 \
 895                                      -1, (charset), c1, c2)) >= 0))     \
 896           SPLIT_CHAR (c_alt, charset_alt, c1, c2);                      \
 897       }                                                                 \
 898     if (charset_alt == CHARSET_ASCII || charset_alt < 0)                \
 899       DECODE_CHARACTER_ASCII (c1);                                      \
 900     else if (CHARSET_DIMENSION (charset_alt) == 1)                      \
 901       DECODE_CHARACTER_DIMENSION1 (charset_alt, c1);                    \
 902     else                                                                \
 903       DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2);                \
 904     if (COMPOSING_WITH_RULE_P (coding->composing))                      \
 905       /* To tell a composition rule follows.  */                        \
 906       coding->composing = COMPOSING_WITH_RULE_RULE;                     \
 907   } while (0)
 908
 909 /* Set designation state into CODING.  */
 910 #define DECODE_DESIGNATION(reg, dimension, chars, final_char)              \
 911   do {                                                                     \
 912     int charset = ISO_CHARSET_TABLE (make_number (dimension),              \
 913                                      make_number (chars),                  \
 914                                      make_number (final_char));            \
 915     if (charset >= 0                                                       \
 916         && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
 917             || coding->safe_charsets[charset]))                            \
 918       {                                                                    \
 919         if (coding->spec.iso2022.last_invalid_designation_register == 0    \
 920             && reg == 0                                                    \
 921             && charset == CHARSET_ASCII)                                   \
 922           {                                                                \
 923             /* We should insert this designation sequence as is so         \
 924                that it is surely written back to a file.  */               \
 925             coding->spec.iso2022.last_invalid_designation_register = -1;   \
 926             goto label_invalid_code;                                       \
 927           }                                                                \
 928         coding->spec.iso2022.last_invalid_designation_register = -1;       \
 929         if ((coding->mode & CODING_MODE_DIRECTION)                         \
 930             && CHARSET_REVERSE_CHARSET (charset) >= 0)                     \
 931           charset = CHARSET_REVERSE_CHARSET (charset);                     \
 932         CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;               \
 933       }                                                                    \
 934     else                                                                   \
 935       {                                                                    \
 936         coding->spec.iso2022.last_invalid_designation_register = reg;      \
 937         goto label_invalid_code;                                           \
 938       }                                                                    \
 939   } while (0)
 940
 941 /* Check if the current composing sequence contains only valid codes.
 942    If the composing sequence doesn't end before SRC_END, return -1.
 943    Else, if it contains only valid codes, return 0.
 944    Else return the length of the composing sequence.  */
 945
 946 int check_composing_code (coding, src, src_end)
 947      struct coding_system *coding;
 948      unsigned char *src, *src_end;
 949 {
 950   unsigned char *src_start = src;
 951   int invalid_code_found = 0;
 952   int charset, c, c1, dim;
 953
 954   while (src < src_end)
 955     {
 956       if (*src++ != ISO_CODE_ESC) continue;
 957       if (src >= src_end) break;
 958       if ((c = *src++) == '1') /* end of compsition */
 959         return (invalid_code_found ? src - src_start : 0);
 960       if (src + 2 >= src_end) break;
 961       if (!coding->flags & CODING_FLAG_ISO_DESIGNATION)
 962         invalid_code_found = 1;
 963       else
 964         {
 965           dim = 0;
 966           if (c == '$')
 967             {
 968               dim = 1;
 969               c = (*src >= '@' && *src <= 'B') ? '(' : *src++;
 970             }
 971           if (c >= '(' && c <= '/')
 972             {
 973               c1 = *src++;
 974               if ((c1 < ' ' || c1 >= 0x80)
 975                   || (charset = iso_charset_table[dim][c >= ','][c1]) < 0
 976                   || ! coding->safe_charsets[charset]
 977                   || (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
 978                       == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
 979                 invalid_code_found = 1;
 980             }
 981           else
 982             invalid_code_found = 1;
 983         }
 984     }
 985   return ((coding->mode & CODING_MODE_LAST_BLOCK) ? src_end - src_start : -1);
 986 }
 987
 988 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
 989
 990 int
 991 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
 992      struct coding_system *coding;
 993      unsigned char *source, *destination;
 994      int src_bytes, dst_bytes;
 995 {
 996   unsigned char *src = source;
 997   unsigned char *src_end = source + src_bytes;
 998   unsigned char *dst = destination;
 999   unsigned char *dst_end = destination + dst_bytes;
1000   /* Since the maximum bytes produced by each loop is 7, we subtract 6
1001      from DST_END to assure that overflow checking is necessary only
1002      at the head of loop.  */
1003   unsigned char *adjusted_dst_end = dst_end - 6;
1004   int charset;
1005   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
1006   int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1007   int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1008   Lisp_Object unification_table
1009     = coding->character_unification_table_for_decode;
1010   int result = CODING_FINISH_NORMAL;
1011
1012   if (!NILP (Venable_character_unification) && NILP (unification_table))
1013     unification_table = Vstandard_character_unification_table_for_decode;
1014
1015   coding->produced_char = 0;
1016   coding->fake_multibyte = 0;
1017   while (src < src_end && (dst_bytes
1018                            ? (dst < adjusted_dst_end)
1019                            : (dst < src - 6)))
1020     {
1021       /* SRC_BASE remembers the start position in source in each loop.
1022          The loop will be exited when there's not enough source text
1023          to analyze long escape sequence or 2-byte code (within macros
1024          ONE_MORE_BYTE or TWO_MORE_BYTES).  In that case, SRC is reset
1025          to SRC_BASE before exiting.  */
1026       unsigned char *src_base = src;
1027       int c1 = *src++, c2;
1028
1029       switch (iso_code_class [c1])
1030         {
1031         case ISO_0x20_or_0x7F:
1032           if (!coding->composing
1033               && (charset0 < 0 || CHARSET_CHARS (charset0) == 94))
1034             {
1035               /* This is SPACE or DEL.  */
1036               *dst++ = c1;
1037               coding->produced_char++;
1038               break;
1039             }
1040           /* This is a graphic character, we fall down ...  */
1041
1042         case ISO_graphic_plane_0:
1043           if (coding->composing == COMPOSING_WITH_RULE_RULE)
1044             {
1045               /* This is a composition rule.  */
1046               *dst++ = c1 | 0x80;
1047               coding->composing = COMPOSING_WITH_RULE_TAIL;
1048             }
1049           else
1050             DECODE_ISO_CHARACTER (charset0, c1);
1051           break;
1052
1053         case ISO_0xA0_or_0xFF:
1054           if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1055               || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1056             goto label_invalid_code;
1057           /* This is a graphic character, we fall down ... */
1058
1059         case ISO_graphic_plane_1:
1060           if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1061             goto label_invalid_code;
1062           else
1063             DECODE_ISO_CHARACTER (charset1, c1);
1064           break;
1065
1066         case ISO_control_code:
1067           /* All ISO2022 control characters in this class have the
1068              same representation in Emacs internal format.  */
1069           if (c1 == '\n'
1070               && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1071               && (coding->eol_type == CODING_EOL_CR
1072                   || coding->eol_type == CODING_EOL_CRLF))
1073             {
1074               result = CODING_FINISH_INCONSISTENT_EOL;
1075               goto label_end_of_loop_2;
1076             }
1077           *dst++ = c1;
1078           coding->produced_char++;
1079           break;
1080
1081         case ISO_carriage_return:
1082           if (coding->eol_type == CODING_EOL_CR)
1083             *dst++ = '\n';
1084           else if (coding->eol_type == CODING_EOL_CRLF)
1085             {
1086               ONE_MORE_BYTE (c1);
1087               if (c1 == ISO_CODE_LF)
1088                 *dst++ = '\n';
1089               else
1090                 {
1091                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1092                     {
1093                       result = CODING_FINISH_INCONSISTENT_EOL;
1094                       goto label_end_of_loop_2;
1095                     }
1096                   src--;
1097                   *dst++ = '\r';
1098                 }
1099             }
1100           else
1101             *dst++ = c1;
1102           coding->produced_char++;
1103           break;
1104
1105         case ISO_shift_out:
1106           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1107               || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1108             goto label_invalid_code;
1109           CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1110           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1111           break;
1112
1113         case ISO_shift_in:
1114           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1115             goto label_invalid_code;
1116           CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1117           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1118           break;
1119
1120         case ISO_single_shift_2_7:
1121         case ISO_single_shift_2:
1122           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1123             goto label_invalid_code;
1124           /* SS2 is handled as an escape sequence of ESC 'N' */
1125           c1 = 'N';
1126           goto label_escape_sequence;
1127
1128         case ISO_single_shift_3:
1129           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1130             goto label_invalid_code;
1131           /* SS2 is handled as an escape sequence of ESC 'O' */
1132           c1 = 'O';
1133           goto label_escape_sequence;
1134
1135         case ISO_control_sequence_introducer:
1136           /* CSI is handled as an escape sequence of ESC '[' ...  */
1137           c1 = '[';
1138           goto label_escape_sequence;
1139
1140         case ISO_escape:
1141           ONE_MORE_BYTE (c1);
1142         label_escape_sequence:
1143           /* Escape sequences handled by Emacs are invocation,
1144              designation, direction specification, and character
1145              composition specification.  */
1146           switch (c1)
1147             {
1148             case '&':           /* revision of following character set */
1149               ONE_MORE_BYTE (c1);
1150               if (!(c1 >= '@' && c1 <= '~'))
1151                 goto label_invalid_code;
1152               ONE_MORE_BYTE (c1);
1153               if (c1 != ISO_CODE_ESC)
1154                 goto label_invalid_code;
1155               ONE_MORE_BYTE (c1);
1156               goto label_escape_sequence;
1157
1158             case '$':           /* designation of 2-byte character set */
1159               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1160                 goto label_invalid_code;
1161               ONE_MORE_BYTE (c1);
1162               if (c1 >= '@' && c1 <= 'B')
1163                 {       /* designation of JISX0208.1978, GB2312.1980,
1164                                    or JISX0208.1980 */
1165                   DECODE_DESIGNATION (0, 2, 94, c1);
1166                 }
1167               else if (c1 >= 0x28 && c1 <= 0x2B)
1168                 {       /* designation of DIMENSION2_CHARS94 character set */
1169                   ONE_MORE_BYTE (c2);
1170                   DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
1171                 }
1172               else if (c1 >= 0x2C && c1 <= 0x2F)
1173                 {       /* designation of DIMENSION2_CHARS96 character set */
1174                   ONE_MORE_BYTE (c2);
1175                   DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
1176                 }
1177               else
1178                 goto label_invalid_code;
1179               break;
1180
1181             case 'n':           /* invocation of locking-shift-2 */
1182               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1183                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1184                 goto label_invalid_code;
1185               CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
1186               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1187               break;
1188
1189             case 'o':           /* invocation of locking-shift-3 */
1190               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1191                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1192                 goto label_invalid_code;
1193               CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
1194               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1195               break;
1196
1197             case 'N':           /* invocation of single-shift-2 */
1198               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1199                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1200                 goto label_invalid_code;
1201               ONE_MORE_BYTE (c1);
1202               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
1203               DECODE_ISO_CHARACTER (charset, c1);
1204               break;
1205
1206             case 'O':           /* invocation of single-shift-3 */
1207               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1208                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1209                 goto label_invalid_code;
1210               ONE_MORE_BYTE (c1);
1211               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
1212               DECODE_ISO_CHARACTER (charset, c1);
1213               break;
1214
1215             case '0': case '2': /* start composing */
1216               /* Before processing composing, we must be sure that all
1217                  characters being composed are supported by CODING.
1218                  If not, we must give up composing and insert the
1219                  bunch of codes for composing as is without decoding.  */
1220               {
1221                 int result1;
1222
1223                 result1 = check_composing_code (coding, src, src_end);
1224                 if (result1 == 0)
1225                   coding->composing = (c1 == '0'
1226                                        ? COMPOSING_NO_RULE_HEAD
1227                                        : COMPOSING_WITH_RULE_HEAD);
1228                 else if (result1 > 0)
1229                   {
1230                     if (result1 + 2 < (dst_bytes ? dst_end : src_base) - dst)
1231                       {
1232                         bcopy (src_base, dst, result1 + 2);
1233                         src += result1;
1234                         dst += result1 + 2;
1235                         coding->produced_char += result1 + 2;
1236                       }
1237                     else
1238                       {
1239                         result = CODING_FINISH_INSUFFICIENT_DST;
1240                         goto label_end_of_loop_2;
1241                       }
1242                   }
1243                 else
1244                   goto label_end_of_loop;
1245               }
1246               break;
1247
1248             case '1':           /* end composing */
1249               coding->composing = COMPOSING_NO;
1250               coding->produced_char++;
1251               break;
1252
1253             case '[':           /* specification of direction */
1254               if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
1255                 goto label_invalid_code;
1256               /* For the moment, nested direction is not supported.
1257                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
1258                  left-to-right, and nozero means right-to-left.  */
1259               ONE_MORE_BYTE (c1);
1260               switch (c1)
1261                 {
1262                 case ']':       /* end of the current direction */
1263                   coding->mode &= ~CODING_MODE_DIRECTION;
1264
1265                 case '0':       /* end of the current direction */
1266                 case '1':       /* start of left-to-right direction */
1267                   ONE_MORE_BYTE (c1);
1268                   if (c1 == ']')
1269                     coding->mode &= ~CODING_MODE_DIRECTION;
1270                   else
1271                     goto label_invalid_code;
1272                   break;
1273
1274                 case '2':       /* start of right-to-left direction */
1275                   ONE_MORE_BYTE (c1);
1276                   if (c1 == ']')
1277                     coding->mode |= CODING_MODE_DIRECTION;
1278                   else
1279                     goto label_invalid_code;
1280                   break;
1281
1282                 default:
1283                   goto label_invalid_code;
1284                 }
1285               break;
1286
1287             default:
1288               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1289                 goto label_invalid_code;
1290               if (c1 >= 0x28 && c1 <= 0x2B)
1291                 {       /* designation of DIMENSION1_CHARS94 character set */
1292                   ONE_MORE_BYTE (c2);
1293                   DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
1294                 }
1295               else if (c1 >= 0x2C && c1 <= 0x2F)
1296                 {       /* designation of DIMENSION1_CHARS96 character set */
1297                   ONE_MORE_BYTE (c2);
1298                   DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
1299                 }
1300               else
1301                 {
1302                   goto label_invalid_code;
1303                 }
1304             }
1305           /* We must update these variables now.  */
1306           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1307           charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1308           break;
1309
1310         label_invalid_code:
1311           while (src_base < src)
1312             *dst++ = *src_base++;
1313           coding->fake_multibyte = 1;
1314         }
1315       continue;
1316
1317     label_end_of_loop:
1318       result = CODING_FINISH_INSUFFICIENT_SRC;
1319     label_end_of_loop_2:
1320       src = src_base;
1321       break;
1322     }
1323
1324   if (src < src_end)
1325     {
1326       if (result == CODING_FINISH_NORMAL)
1327         result = CODING_FINISH_INSUFFICIENT_DST;
1328       else if (result != CODING_FINISH_INCONSISTENT_EOL
1329                && coding->mode & CODING_MODE_LAST_BLOCK)
1330         {
1331           /* This is the last block of the text to be decoded.  We had
1332              better just flush out all remaining codes in the text
1333              although they are not valid characters.  */
1334           src_bytes = src_end - src;
1335           if (dst_bytes && (dst_end - dst < src_bytes))
1336             src_bytes = dst_end - dst;
1337           bcopy (src, dst, src_bytes);
1338           dst += src_bytes;
1339           src += src_bytes;
1340           coding->fake_multibyte = 1;
1341         }
1342     }
1343
1344   coding->consumed = coding->consumed_char = src - source;
1345   coding->produced = dst - destination;
1346   return result;
1347 }
1348
1349 /* ISO2022 encoding stuff.  */
1350
1351 /*
1352    It is not enough to say just "ISO2022" on encoding, we have to
1353    specify more details.  In Emacs, each coding system of ISO2022
1354    variant has the following specifications:
1355         1. Initial designation to G0 thru G3.
1356         2. Allows short-form designation?
1357         3. ASCII should be designated to G0 before control characters?
1358         4. ASCII should be designated to G0 at end of line?
1359         5. 7-bit environment or 8-bit environment?
1360         6. Use locking-shift?
1361         7. Use Single-shift?
1362    And the following two are only for Japanese:
1363         8. Use ASCII in place of JIS0201-1976-Roman?
1364         9. Use JISX0208-1983 in place of JISX0208-1978?
1365    These specifications are encoded in `coding->flags' as flag bits
1366    defined by macros CODING_FLAG_ISO_XXX.  See `coding.h' for more
1367    details.
1368 */
1369
1370 /* Produce codes (escape sequence) for designating CHARSET to graphic
1371    register REG.  If <final-char> of CHARSET is '@', 'A', or 'B' and
1372    the coding system CODING allows, produce designation sequence of
1373    short-form.  */
1374
1375 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
1376   do {                                                                  \
1377     unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset);        \
1378     char *intermediate_char_94 = "()*+";                                \
1379     char *intermediate_char_96 = ",-./";                                \
1380     int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset);    \
1381     if (revision < 255)                                                 \
1382       {                                                                 \
1383         *dst++ = ISO_CODE_ESC;                                          \
1384         *dst++ = '&';                                                   \
1385         *dst++ = '@' + revision;                                        \
1386       }                                                                 \
1387     *dst++ = ISO_CODE_ESC;                                              \
1388     if (CHARSET_DIMENSION (charset) == 1)                               \
1389       {                                                                 \
1390         if (CHARSET_CHARS (charset) == 94)                              \
1391           *dst++ = (unsigned char) (intermediate_char_94[reg]);         \
1392         else                                                            \
1393           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1394       }                                                                 \
1395     else                                                                \
1396       {                                                                 \
1397         *dst++ = '$';                                                   \
1398         if (CHARSET_CHARS (charset) == 94)                              \
1399           {                                                             \
1400             if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM)          \
1401                 || reg != 0                                             \
1402                 || final_char < '@' || final_char > 'B')                \
1403               *dst++ = (unsigned char) (intermediate_char_94[reg]);     \
1404           }                                                             \
1405         else                                                            \
1406           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1407       }                                                                 \
1408     *dst++ = final_char;                                                \
1409     CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;                \
1410   } while (0)
1411
1412 /* The following two macros produce codes (control character or escape
1413    sequence) for ISO2022 single-shift functions (single-shift-2 and
1414    single-shift-3).  */
1415
1416 #define ENCODE_SINGLE_SHIFT_2                           \
1417   do {                                                  \
1418     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1419       *dst++ = ISO_CODE_ESC, *dst++ = 'N';              \
1420     else                                                \
1421       {                                                 \
1422         *dst++ = ISO_CODE_SS2;                          \
1423         coding->fake_multibyte = 1;                     \
1424       }                                                 \
1425     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1426   } while (0)
1427
1428 #define ENCODE_SINGLE_SHIFT_3                           \
1429   do {                                                  \
1430     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1431       *dst++ = ISO_CODE_ESC, *dst++ = 'O';              \
1432     else                                                \
1433       {                                                 \
1434         *dst++ = ISO_CODE_SS3;                          \
1435         coding->fake_multibyte = 1;                     \
1436       }                                                 \
1437     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1438   } while (0)
1439
1440 /* The following four macros produce codes (control character or
1441    escape sequence) for ISO2022 locking-shift functions (shift-in,
1442    shift-out, locking-shift-2, and locking-shift-3).  */
1443
1444 #define ENCODE_SHIFT_IN                         \
1445   do {                                          \
1446     *dst++ = ISO_CODE_SI;                       \
1447     CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
1448   } while (0)
1449
1450 #define ENCODE_SHIFT_OUT                        \
1451   do {                                          \
1452     *dst++ = ISO_CODE_SO;                       \
1453     CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
1454   } while (0)
1455
1456 #define ENCODE_LOCKING_SHIFT_2                  \
1457   do {                                          \
1458     *dst++ = ISO_CODE_ESC, *dst++ = 'n';        \
1459     CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
1460   } while (0)
1461
1462 #define ENCODE_LOCKING_SHIFT_3                  \
1463   do {                                          \
1464     *dst++ = ISO_CODE_ESC, *dst++ = 'o';        \
1465     CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
1466   } while (0)
1467
1468 /* Produce codes for a DIMENSION1 character whose character set is
1469    CHARSET and whose position-code is C1.  Designation and invocation
1470    sequences are also produced in advance if necessary.  */
1471
1472
1473 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
1474   do {                                                                  \
1475     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1476       {                                                                 \
1477         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1478           *dst++ = c1 & 0x7F;                                           \
1479         else                                                            \
1480           *dst++ = c1 | 0x80;                                           \
1481         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1482         break;                                                          \
1483       }                                                                 \
1484     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1485       {                                                                 \
1486         *dst++ = c1 & 0x7F;                                             \
1487         break;                                                          \
1488       }                                                                 \
1489     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1490       {                                                                 \
1491         *dst++ = c1 | 0x80;                                             \
1492         break;                                                          \
1493       }                                                                 \
1494     else if (coding->flags & CODING_FLAG_ISO_SAFE                       \
1495              && !coding->safe_charsets[charset])                        \
1496       {                                                                 \
1497         /* We should not encode this character, instead produce one or  \
1498            two `?'s.  */                                                \
1499         *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;                 \
1500         if (CHARSET_WIDTH (charset) == 2)                               \
1501           *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;               \
1502         break;                                                          \
1503       }                                                                 \
1504     else                                                                \
1505       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1506          must invoke it, or, at first, designate it to some graphic     \
1507          register.  Then repeat the loop to actually produce the        \
1508          character.  */                                                 \
1509       dst = encode_invocation_designation (charset, coding, dst);       \
1510   } while (1)
1511
1512 /* Produce codes for a DIMENSION2 character whose character set is
1513    CHARSET and whose position-codes are C1 and C2.  Designation and
1514    invocation codes are also produced in advance if necessary.  */
1515
1516 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
1517   do {                                                                  \
1518     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1519       {                                                                 \
1520         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1521           *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F;                       \
1522         else                                                            \
1523           *dst++ = c1 | 0x80, *dst++ = c2 | 0x80;                       \
1524         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1525         break;                                                          \
1526       }                                                                 \
1527     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1528       {                                                                 \
1529         *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F;                          \
1530         break;                                                          \
1531       }                                                                 \
1532     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1533       {                                                                 \
1534         *dst++ = c1 | 0x80, *dst++= c2 | 0x80;                          \
1535         break;                                                          \
1536       }                                                                 \
1537     else if (coding->flags & CODING_FLAG_ISO_SAFE                       \
1538              && !coding->safe_charsets[charset])                        \
1539       {                                                                 \
1540         /* We should not encode this character, instead produce one or  \
1541            two `?'s.  */                                                \
1542         *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;                 \
1543         if (CHARSET_WIDTH (charset) == 2)                               \
1544           *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;               \
1545         break;                                                          \
1546       }                                                                 \
1547     else                                                                \
1548       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1549          must invoke it, or, at first, designate it to some graphic     \
1550          register.  Then repeat the loop to actually produce the        \
1551          character.  */                                                 \
1552       dst = encode_invocation_designation (charset, coding, dst);       \
1553   } while (1)
1554
1555 #define ENCODE_ISO_CHARACTER(charset, c1, c2)                             \
1556   do {                                                                    \
1557     int c_alt, charset_alt;                                               \
1558     if (!NILP (unification_table)                                         \
1559         && ((c_alt = unify_char (unification_table, -1, charset, c1, c2)) \
1560             >= 0))                                                        \
1561       SPLIT_CHAR (c_alt, charset_alt, c1, c2);                            \
1562     else                                                                  \
1563       charset_alt = charset;                                              \
1564     if (CHARSET_DIMENSION (charset_alt) == 1)                             \
1565       {                                                                   \
1566         if (charset == CHARSET_ASCII                                      \
1567             && coding->flags & CODING_FLAG_ISO_USE_ROMAN)                 \
1568           charset_alt = charset_latin_jisx0201;                           \
1569         ENCODE_ISO_CHARACTER_DIMENSION1 (charset_alt, c1);                \
1570       }                                                                   \
1571     else                                                                  \
1572       {                                                                   \
1573         if (charset == charset_jisx0208                                   \
1574             && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)                \
1575           charset_alt = charset_jisx0208_1978;                            \
1576         ENCODE_ISO_CHARACTER_DIMENSION2 (charset_alt, c1, c2);            \
1577       }                                                                   \
1578     if (! COMPOSING_P (coding->composing))                                \
1579       coding->consumed_char++;                                            \
1580      } while (0)
1581
1582 /* Produce designation and invocation codes at a place pointed by DST
1583    to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
1584    Return new DST.  */
1585
1586 unsigned char *
1587 encode_invocation_designation (charset, coding, dst)
1588      int charset;
1589      struct coding_system *coding;
1590      unsigned char *dst;
1591 {
1592   int reg;                      /* graphic register number */
1593
1594   /* At first, check designations.  */
1595   for (reg = 0; reg < 4; reg++)
1596     if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
1597       break;
1598
1599   if (reg >= 4)
1600     {
1601       /* CHARSET is not yet designated to any graphic registers.  */
1602       /* At first check the requested designation.  */
1603       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1604       if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
1605         /* Since CHARSET requests no special designation, designate it
1606            to graphic register 0.  */
1607         reg = 0;
1608
1609       ENCODE_DESIGNATION (charset, reg, coding);
1610     }
1611
1612   if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
1613       && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
1614     {
1615       /* Since the graphic register REG is not invoked to any graphic
1616          planes, invoke it to graphic plane 0.  */
1617       switch (reg)
1618         {
1619         case 0:                 /* graphic register 0 */
1620           ENCODE_SHIFT_IN;
1621           break;
1622
1623         case 1:                 /* graphic register 1 */
1624           ENCODE_SHIFT_OUT;
1625           break;
1626
1627         case 2:                 /* graphic register 2 */
1628           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1629             ENCODE_SINGLE_SHIFT_2;
1630           else
1631             ENCODE_LOCKING_SHIFT_2;
1632           break;
1633
1634         case 3:                 /* graphic register 3 */
1635           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1636             ENCODE_SINGLE_SHIFT_3;
1637           else
1638             ENCODE_LOCKING_SHIFT_3;
1639           break;
1640         }
1641     }
1642   return dst;
1643 }
1644
1645 /* The following two macros produce codes for indicating composition.  */
1646 #define ENCODE_COMPOSITION_NO_RULE_START  *dst++ = ISO_CODE_ESC, *dst++ = '0'
1647 #define ENCODE_COMPOSITION_WITH_RULE_START  *dst++ = ISO_CODE_ESC, *dst++ = '2'
1648 #define ENCODE_COMPOSITION_END    *dst++ = ISO_CODE_ESC, *dst++ = '1'
1649
1650 /* The following three macros produce codes for indicating direction
1651    of text.  */
1652 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER              \
1653   do {                                                  \
1654     if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS)    \
1655       *dst++ = ISO_CODE_ESC, *dst++ = '[';              \
1656     else                                                \
1657       *dst++ = ISO_CODE_CSI;                            \
1658   } while (0)
1659
1660 #define ENCODE_DIRECTION_R2L    \
1661   ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '2', *dst++ = ']'
1662
1663 #define ENCODE_DIRECTION_L2R    \
1664   ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '0', *dst++ = ']'
1665
1666 /* Produce codes for designation and invocation to reset the graphic
1667    planes and registers to initial state.  */
1668 #define ENCODE_RESET_PLANE_AND_REGISTER                                     \
1669   do {                                                                      \
1670     int reg;                                                                \
1671     if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                        \
1672       ENCODE_SHIFT_IN;                                                      \
1673     for (reg = 0; reg < 4; reg++)                                           \
1674       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0            \
1675           && (CODING_SPEC_ISO_DESIGNATION (coding, reg)                     \
1676               != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)))        \
1677         ENCODE_DESIGNATION                                                  \
1678           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
1679   } while (0)
1680
1681 /* Produce designation sequences of charsets in the line started from
1682    SRC to a place pointed by *DSTP, and update DSTP.
1683
1684    If the current block ends before any end-of-line, we may fail to
1685    find all the necessary designations.  */
1686
1687 void
1688 encode_designation_at_bol (coding, table, src, src_end, dstp)
1689      struct coding_system *coding;
1690      Lisp_Object table;
1691      unsigned char *src, *src_end, **dstp;
1692 {
1693   int charset, c, found = 0, reg;
1694   /* Table of charsets to be designated to each graphic register.  */
1695   int r[4];
1696   unsigned char *dst = *dstp;
1697
1698   for (reg = 0; reg < 4; reg++)
1699     r[reg] = -1;
1700
1701   while (src < src_end && *src != '\n' && found < 4)
1702     {
1703       int bytes = BYTES_BY_CHAR_HEAD (*src);
1704
1705       if (NILP (table))
1706         charset = CHARSET_AT (src);
1707       else
1708         {
1709           int c_alt;
1710           unsigned char c1, c2;
1711
1712           SPLIT_STRING(src, bytes, charset, c1, c2);
1713           if ((c_alt = unify_char (table, -1, charset, c1, c2)) >= 0)
1714             charset = CHAR_CHARSET (c_alt);
1715         }
1716
1717       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1718       if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
1719         {
1720           found++;
1721           r[reg] = charset;
1722         }
1723
1724       src += bytes;
1725     }
1726
1727   if (found)
1728     {
1729       for (reg = 0; reg < 4; reg++)
1730         if (r[reg] >= 0
1731             && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
1732           ENCODE_DESIGNATION (r[reg], reg, coding);
1733       *dstp = dst;
1734     }
1735 }
1736
1737 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
1738
1739 int
1740 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1741      struct coding_system *coding;
1742      unsigned char *source, *destination;
1743      int src_bytes, dst_bytes;
1744 {
1745   unsigned char *src = source;
1746   unsigned char *src_end = source + src_bytes;
1747   unsigned char *dst = destination;
1748   unsigned char *dst_end = destination + dst_bytes;
1749   /* Since the maximum bytes produced by each loop is 20, we subtract 19
1750      from DST_END to assure overflow checking is necessary only at the
1751      head of loop.  */
1752   unsigned char *adjusted_dst_end = dst_end - 19;
1753   Lisp_Object unification_table
1754       = coding->character_unification_table_for_encode;
1755   int result = CODING_FINISH_NORMAL;
1756
1757   if (!NILP (Venable_character_unification) && NILP (unification_table))
1758     unification_table = Vstandard_character_unification_table_for_encode;
1759
1760   coding->consumed_char = 0;
1761   coding->fake_multibyte = 0;
1762   while (src < src_end && (dst_bytes
1763                            ? (dst < adjusted_dst_end)
1764                            : (dst < src - 19)))
1765     {
1766       /* SRC_BASE remembers the start position in source in each loop.
1767          The loop will be exited when there's not enough source text
1768          to analyze multi-byte codes (within macros ONE_MORE_BYTE,
1769          TWO_MORE_BYTES, and THREE_MORE_BYTES).  In that case, SRC is
1770          reset to SRC_BASE before exiting.  */
1771       unsigned char *src_base = src;
1772       int charset, c1, c2, c3, c4;
1773
1774       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
1775           && CODING_SPEC_ISO_BOL (coding))
1776         {
1777           /* We have to produce designation sequences if any now.  */
1778           encode_designation_at_bol (coding, unification_table,
1779                                      src, src_end, &dst);
1780           CODING_SPEC_ISO_BOL (coding) = 0;
1781         }
1782
1783       c1 = *src++;
1784       /* If we are seeing a component of a composite character, we are
1785          seeing a leading-code encoded irregularly for composition, or
1786          a composition rule if composing with rule.  We must set C1 to
1787          a normal leading-code or an ASCII code.  If we are not seeing
1788          a composite character, we must reset composition,
1789          designation, and invocation states.  */
1790       if (COMPOSING_P (coding->composing))
1791         {
1792           if (c1 < 0xA0)
1793             {
1794               /* We are not in a composite character any longer.  */
1795               coding->composing = COMPOSING_NO;
1796               ENCODE_RESET_PLANE_AND_REGISTER;
1797               ENCODE_COMPOSITION_END;
1798             }
1799           else
1800             {
1801               if (coding->composing == COMPOSING_WITH_RULE_RULE)
1802                 {
1803                   *dst++ = c1 & 0x7F;
1804                   coding->composing = COMPOSING_WITH_RULE_HEAD;
1805                   continue;
1806                 }
1807               else if (coding->composing == COMPOSING_WITH_RULE_HEAD)
1808                 coding->composing = COMPOSING_WITH_RULE_RULE;
1809               if (c1 == 0xA0)
1810                 {
1811                   /* This is an ASCII component.  */
1812                   ONE_MORE_BYTE (c1);
1813                   c1 &= 0x7F;
1814                 }
1815               else
1816                 /* This is a leading-code of non ASCII component.  */
1817                 c1 -= 0x20;
1818             }
1819         }
1820
1821       /* Now encode one character.  C1 is a control character, an
1822          ASCII character, or a leading-code of multi-byte character.  */
1823       switch (emacs_code_class[c1])
1824         {
1825         case EMACS_ascii_code:
1826           ENCODE_ISO_CHARACTER (CHARSET_ASCII, c1, /* dummy */ c2);
1827           break;
1828
1829         case EMACS_control_code:
1830           if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
1831             ENCODE_RESET_PLANE_AND_REGISTER;
1832           *dst++ = c1;
1833           coding->consumed_char++;
1834           break;
1835
1836         case EMACS_carriage_return_code:
1837           if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
1838             {
1839               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
1840                 ENCODE_RESET_PLANE_AND_REGISTER;
1841               *dst++ = c1;
1842               coding->consumed_char++;
1843               break;
1844             }
1845           /* fall down to treat '\r' as '\n' ...  */
1846
1847         case EMACS_linefeed_code:
1848           if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
1849             ENCODE_RESET_PLANE_AND_REGISTER;
1850           if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
1851             bcopy (coding->spec.iso2022.initial_designation,
1852                    coding->spec.iso2022.current_designation,
1853                    sizeof coding->spec.iso2022.initial_designation);
1854           if (coding->eol_type == CODING_EOL_LF
1855               || coding->eol_type == CODING_EOL_UNDECIDED)
1856             *dst++ = ISO_CODE_LF;
1857           else if (coding->eol_type == CODING_EOL_CRLF)
1858             *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
1859           else
1860             *dst++ = ISO_CODE_CR;
1861           CODING_SPEC_ISO_BOL (coding) = 1;
1862           coding->consumed_char++;
1863           break;
1864
1865         case EMACS_leading_code_2:
1866           ONE_MORE_BYTE (c2);
1867           if (c2 < 0xA0)
1868             {
1869               /* invalid sequence */
1870               *dst++ = c1;
1871               *dst++ = c2;
1872               coding->consumed_char += 2;
1873             }
1874           else
1875             ENCODE_ISO_CHARACTER (c1, c2, /* dummy */ c3);
1876           break;
1877
1878         case EMACS_leading_code_3:
1879           TWO_MORE_BYTES (c2, c3);
1880           if (c2 < 0xA0 || c3 < 0xA0)
1881             {
1882               /* invalid sequence */
1883               *dst++ = c1;
1884               *dst++ = c2;
1885               *dst++ = c3;
1886               coding->consumed_char += 3;
1887             }
1888           else if (c1 < LEADING_CODE_PRIVATE_11)
1889             ENCODE_ISO_CHARACTER (c1, c2, c3);
1890           else
1891             ENCODE_ISO_CHARACTER (c2, c3, /* dummy */ c4);
1892           break;
1893
1894         case EMACS_leading_code_4:
1895           THREE_MORE_BYTES (c2, c3, c4);
1896           if (c2 < 0xA0 || c3 < 0xA0 || c4 < 0xA0)
1897             {
1898               /* invalid sequence */
1899               *dst++ = c1;
1900               *dst++ = c2;
1901               *dst++ = c3;
1902               *dst++ = c4;
1903               coding->consumed_char += 4;
1904             }
1905           else
1906             ENCODE_ISO_CHARACTER (c2, c3, c4);
1907           break;
1908
1909         case EMACS_leading_code_composition:
1910           ONE_MORE_BYTE (c2);
1911           if (c2 < 0xA0)
1912             {
1913               /* invalid sequence */
1914               *dst++ = c1;
1915               *dst++ = c2;
1916               coding->consumed_char += 2;
1917             }
1918           else if (c2 == 0xFF)
1919             {
1920               ENCODE_RESET_PLANE_AND_REGISTER;
1921               coding->composing = COMPOSING_WITH_RULE_HEAD;
1922               ENCODE_COMPOSITION_WITH_RULE_START;
1923               coding->consumed_char++;
1924             }
1925           else
1926             {
1927               ENCODE_RESET_PLANE_AND_REGISTER;
1928               /* Rewind one byte because it is a character code of
1929                  composition elements.  */
1930               src--;
1931               coding->composing = COMPOSING_NO_RULE_HEAD;
1932               ENCODE_COMPOSITION_NO_RULE_START;
1933               coding->consumed_char++;
1934             }
1935           break;
1936
1937         case EMACS_invalid_code:
1938           *dst++ = c1;
1939           coding->consumed_char++;
1940           break;
1941         }
1942       continue;
1943     label_end_of_loop:
1944       result = CODING_FINISH_INSUFFICIENT_SRC;
1945       src = src_base;
1946       break;
1947     }
1948
1949   if (src < src_end)
1950     {
1951       if (result == CODING_FINISH_NORMAL)
1952         result = CODING_FINISH_INSUFFICIENT_DST;
1953       else
1954         /* If this is the last block of the text to be encoded, we
1955            must reset graphic planes and registers to the initial
1956            state, and flush out the carryover if any.  */
1957         if (coding->mode & CODING_MODE_LAST_BLOCK)
1958           ENCODE_RESET_PLANE_AND_REGISTER;
1959     }
1960
1961   coding->consumed = src - source;
1962   coding->produced = coding->produced_char = dst - destination;
1963   return result;
1964 }
1965
1966 \f
1967 /*** 4. SJIS and BIG5 handlers ***/
1968
1969 /* Although SJIS and BIG5 are not ISO's coding system, they are used
1970    quite widely.  So, for the moment, Emacs supports them in the bare
1971    C code.  But, in the future, they may be supported only by CCL.  */
1972
1973 /* SJIS is a coding system encoding three character sets: ASCII, right
1974    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
1975    as is.  A character of charset katakana-jisx0201 is encoded by
1976    "position-code + 0x80".  A character of charset japanese-jisx0208
1977    is encoded in 2-byte but two position-codes are divided and shifted
1978    so that it fit in the range below.
1979
1980    --- CODE RANGE of SJIS ---
1981    (character set)      (range)
1982    ASCII                0x00 .. 0x7F
1983    KATAKANA-JISX0201    0xA0 .. 0xDF
1984    JISX0208 (1st byte)  0x80 .. 0x9F and 0xE0 .. 0xFF
1985             (2nd byte)  0x40 .. 0xFF
1986    -------------------------------
1987
1988 */
1989
1990 /* BIG5 is a coding system encoding two character sets: ASCII and
1991    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
1992    character set and is encoded in two-byte.
1993
1994    --- CODE RANGE of BIG5 ---
1995    (character set)      (range)
1996    ASCII                0x00 .. 0x7F
1997    Big5 (1st byte)      0xA1 .. 0xFE
1998         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
1999    --------------------------
2000
2001    Since the number of characters in Big5 is larger than maximum
2002    characters in Emacs' charset (96x96), it can't be handled as one
2003    charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
2004    and `charset-big5-2'.  Both are DIMENSION2 and CHARS94.  The former
2005    contains frequently used characters and the latter contains less
2006    frequently used characters.  */
2007
2008 /* Macros to decode or encode a character of Big5 in BIG5.  B1 and B2
2009    are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2010    C1 and C2 are the 1st and 2nd position-codes of of Emacs' internal
2011    format.  CHARSET is `charset_big5_1' or `charset_big5_2'.  */
2012
2013 /* Number of Big5 characters which have the same code in 1st byte.  */
2014 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2015
2016 #define DECODE_BIG5(b1, b2, charset, c1, c2)                            \
2017   do {                                                                  \
2018     unsigned int temp                                                   \
2019       = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62);   \
2020     if (b1 < 0xC9)                                                      \
2021       charset = charset_big5_1;                                         \
2022     else                                                                \
2023       {                                                                 \
2024         charset = charset_big5_2;                                       \
2025         temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW;                          \
2026       }                                                                 \
2027     c1 = temp / (0xFF - 0xA1) + 0x21;                                   \
2028     c2 = temp % (0xFF - 0xA1) + 0x21;                                   \
2029   } while (0)
2030
2031 #define ENCODE_BIG5(charset, c1, c2, b1, b2)                            \
2032   do {                                                                  \
2033     unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21);      \
2034     if (charset == charset_big5_2)                                      \
2035       temp += BIG5_SAME_ROW * (0xC9 - 0xA1);                            \
2036     b1 = temp / BIG5_SAME_ROW + 0xA1;                                   \
2037     b2 = temp % BIG5_SAME_ROW;                                          \
2038     b2 += b2 < 0x3F ? 0x40 : 0x62;                                      \
2039   } while (0)
2040
2041 #define DECODE_SJIS_BIG5_CHARACTER(charset, c1, c2)                     \
2042   do {                                                                  \
2043     int c_alt, charset_alt = (charset);                                 \
2044     if (!NILP (unification_table)                                       \
2045         && ((c_alt = unify_char (unification_table,                     \
2046                                  -1, (charset), c1, c2)) >= 0))         \
2047           SPLIT_CHAR (c_alt, charset_alt, c1, c2);                      \
2048     if (charset_alt == CHARSET_ASCII || charset_alt < 0)                \
2049       DECODE_CHARACTER_ASCII (c1);                                      \
2050     else if (CHARSET_DIMENSION (charset_alt) == 1)                      \
2051       DECODE_CHARACTER_DIMENSION1 (charset_alt, c1);                    \
2052     else                                                                \
2053       DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2);                \
2054   } while (0)
2055
2056 #define ENCODE_SJIS_BIG5_CHARACTER(charset, c1, c2)                       \
2057   do {                                                                    \
2058     int c_alt, charset_alt;                                               \
2059     if (!NILP (unification_table)                                         \
2060         && ((c_alt = unify_char (unification_table, -1, charset, c1, c2)) \
2061             >= 0))                                                        \
2062       SPLIT_CHAR (c_alt, charset_alt, c1, c2);                            \
2063     else                                                                  \
2064       charset_alt = charset;                                              \
2065     if (charset_alt == charset_ascii)                                     \
2066       *dst++ = c1;                                                        \
2067     else if (CHARSET_DIMENSION (charset_alt) == 1)                        \
2068       {                                                                   \
2069         if (sjis_p && charset_alt == charset_katakana_jisx0201)           \
2070           *dst++ = c1;                                                    \
2071         else                                                              \
2072           {                                                               \
2073             *dst++ = charset_alt, *dst++ = c1;                            \
2074             coding->fake_multibyte = 1;                                   \
2075           }                                                               \
2076       }                                                                   \
2077     else                                                                  \
2078       {                                                                   \
2079         c1 &= 0x7F, c2 &= 0x7F;                                           \
2080         if (sjis_p && charset_alt == charset_jisx0208)                    \
2081           {                                                               \
2082             unsigned char s1, s2;                                         \
2083                                                                           \
2084             ENCODE_SJIS (c1, c2, s1, s2);                                 \
2085             *dst++ = s1, *dst++ = s2;                                     \
2086             coding->fake_multibyte = 1;                                   \
2087           }                                                               \
2088         else if (!sjis_p                                                  \
2089                  && (charset_alt == charset_big5_1                        \
2090                      || charset_alt == charset_big5_2))                   \
2091           {                                                               \
2092             unsigned char b1, b2;                                         \
2093                                                                           \
2094             ENCODE_BIG5 (charset_alt, c1, c2, b1, b2);                    \
2095             *dst++ = b1, *dst++ = b2;                                     \
2096           }                                                               \
2097         else                                                              \
2098           {                                                               \
2099             *dst++ = charset_alt, *dst++ = c1, *dst++ = c2;               \
2100             coding->fake_multibyte = 1;                                   \
2101           }                                                               \
2102       }                                                                   \
2103     coding->consumed_char++;                                              \
2104   } while (0);
2105
2106 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2107    Check if a text is encoded in SJIS.  If it is, return
2108    CODING_CATEGORY_MASK_SJIS, else return 0.  */
2109
2110 int
2111 detect_coding_sjis (src, src_end)
2112      unsigned char *src, *src_end;
2113 {
2114   unsigned char c;
2115
2116   while (src < src_end)
2117     {
2118       c = *src++;
2119       if ((c >= 0x80 && c < 0xA0) || c >= 0xE0)
2120         {
2121           if (src < src_end && *src++ < 0x40)
2122             return 0;
2123         }
2124     }
2125   return CODING_CATEGORY_MASK_SJIS;
2126 }
2127
2128 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2129    Check if a text is encoded in BIG5.  If it is, return
2130    CODING_CATEGORY_MASK_BIG5, else return 0.  */
2131
2132 int
2133 detect_coding_big5 (src, src_end)
2134      unsigned char *src, *src_end;
2135 {
2136   unsigned char c;
2137
2138   while (src < src_end)
2139     {
2140       c = *src++;
2141       if (c >= 0xA1)
2142         {
2143           if (src >= src_end)
2144             break;
2145           c = *src++;
2146           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
2147             return 0;
2148         }
2149     }
2150   return CODING_CATEGORY_MASK_BIG5;
2151 }
2152
2153 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2154    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
2155
2156 int
2157 decode_coding_sjis_big5 (coding, source, destination,
2158                          src_bytes, dst_bytes, sjis_p)
2159      struct coding_system *coding;
2160      unsigned char *source, *destination;
2161      int src_bytes, dst_bytes;
2162      int sjis_p;
2163 {
2164   unsigned char *src = source;
2165   unsigned char *src_end = source + src_bytes;
2166   unsigned char *dst = destination;
2167   unsigned char *dst_end = destination + dst_bytes;
2168   /* Since the maximum bytes produced by each loop is 4, we subtract 3
2169      from DST_END to assure overflow checking is necessary only at the
2170      head of loop.  */
2171   unsigned char *adjusted_dst_end = dst_end - 3;
2172   Lisp_Object unification_table
2173       = coding->character_unification_table_for_decode;
2174   int result = CODING_FINISH_NORMAL;
2175
2176   if (!NILP (Venable_character_unification) && NILP (unification_table))
2177     unification_table = Vstandard_character_unification_table_for_decode;
2178
2179   coding->produced_char = 0;
2180   coding->fake_multibyte = 0;
2181   while (src < src_end && (dst_bytes
2182                            ? (dst < adjusted_dst_end)
2183                            : (dst < src - 3)))
2184     {
2185       /* SRC_BASE remembers the start position in source in each loop.
2186          The loop will be exited when there's not enough source text
2187          to analyze two-byte character (within macro ONE_MORE_BYTE).
2188          In that case, SRC is reset to SRC_BASE before exiting.  */
2189       unsigned char *src_base = src;
2190       unsigned char c1 = *src++, c2, c3, c4;
2191
2192       if (c1 < 0x20)
2193         {
2194           if (c1 == '\r')
2195             {
2196               if (coding->eol_type == CODING_EOL_CRLF)
2197                 {
2198                   ONE_MORE_BYTE (c2);
2199                   if (c2 == '\n')
2200                     *dst++ = c2;
2201                   else if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2202                     {
2203                       result = CODING_FINISH_INCONSISTENT_EOL;
2204                       goto label_end_of_loop_2;
2205                     }
2206                   else
2207                     /* To process C2 again, SRC is subtracted by 1.  */
2208                     *dst++ = c1, src--;
2209                 }
2210               else if (coding->eol_type == CODING_EOL_CR)
2211                 *dst++ = '\n';
2212               else
2213                 *dst++ = c1;
2214             }
2215           else if (c1 == '\n'
2216                    && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2217                    && (coding->eol_type == CODING_EOL_CR
2218                        || coding->eol_type == CODING_EOL_CRLF))
2219             {
2220               result = CODING_FINISH_INCONSISTENT_EOL;
2221               goto label_end_of_loop_2;
2222             }
2223           else
2224             *dst++ = c1;
2225           coding->produced_char++;
2226         }
2227       else if (c1 < 0x80)
2228         DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
2229       else if (c1 < 0xA0)
2230         {
2231           /* SJIS -> JISX0208 */
2232           if (sjis_p)
2233             {
2234               ONE_MORE_BYTE (c2);
2235               if (c2 >= 0x40)
2236                 {
2237                   DECODE_SJIS (c1, c2, c3, c4);
2238                   DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
2239                 }
2240               else
2241                 goto label_invalid_code_2;
2242             }
2243           else
2244             goto label_invalid_code_1;
2245         }
2246       else if (c1 < 0xE0)
2247         {
2248           /* SJIS -> JISX0201-Kana, BIG5 -> Big5 */
2249           if (sjis_p)
2250             DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1,
2251                                         /* dummy */ c2);
2252           else
2253             {
2254               int charset;
2255
2256               ONE_MORE_BYTE (c2);
2257               if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE))
2258                 {
2259                   DECODE_BIG5 (c1, c2, charset, c3, c4);
2260                   DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
2261                 }
2262               else
2263                 goto label_invalid_code_2;
2264             }
2265         }
2266       else                      /* C1 >= 0xE0 */
2267         {
2268           /* SJIS -> JISX0208, BIG5 -> Big5 */
2269           if (sjis_p)
2270             {
2271               ONE_MORE_BYTE (c2);
2272               if (c2 >= 0x40)
2273                 {
2274                   DECODE_SJIS (c1, c2, c3, c4);
2275                   DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
2276                 }
2277               else
2278                 goto label_invalid_code_2;
2279             }
2280           else
2281             {
2282               int charset;
2283
2284               ONE_MORE_BYTE (c2);
2285               if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE))
2286                 {
2287                   DECODE_BIG5 (c1, c2, charset, c3, c4);
2288                   DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
2289                 }
2290               else
2291                 goto label_invalid_code_2;
2292             }
2293         }
2294       continue;
2295
2296     label_invalid_code_1:
2297       *dst++ = c1;
2298       coding->produced_char++;
2299       coding->fake_multibyte = 1;
2300       continue;
2301
2302     label_invalid_code_2:
2303       *dst++ = c1; *dst++= c2;
2304       coding->produced_char += 2;
2305       coding->fake_multibyte = 1;
2306       continue;
2307
2308     label_end_of_loop:
2309       result = CODING_FINISH_INSUFFICIENT_SRC;
2310     label_end_of_loop_2:
2311       src = src_base;
2312       break;
2313     }
2314
2315   if (src < src_end)
2316     {
2317       if (result == CODING_FINISH_NORMAL)
2318         result = CODING_FINISH_INSUFFICIENT_DST;
2319       else if (result != CODING_FINISH_INCONSISTENT_EOL
2320                && coding->mode & CODING_MODE_LAST_BLOCK)
2321         {
2322           src_bytes = src_end - src;
2323           if (dst_bytes && (dst_end - dst < src_bytes))
2324             src_bytes = dst_end - dst;
2325           bcopy (dst, src, src_bytes);
2326           src += src_bytes;
2327           dst += src_bytes;
2328           coding->fake_multibyte = 1;
2329         }
2330     }
2331
2332   coding->consumed = coding->consumed_char = src - source;
2333   coding->produced = dst - destination;
2334   return result;
2335 }
2336
2337 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
2338    This function can encode `charset_ascii', `charset_katakana_jisx0201',
2339    `charset_jisx0208', `charset_big5_1', and `charset_big5-2'.  We are
2340    sure that all these charsets are registered as official charset
2341    (i.e. do not have extended leading-codes).  Characters of other
2342    charsets are produced without any encoding.  If SJIS_P is 1, encode
2343    SJIS text, else encode BIG5 text.  */
2344
2345 int
2346 encode_coding_sjis_big5 (coding, source, destination,
2347                          src_bytes, dst_bytes, sjis_p)
2348      struct coding_system *coding;
2349      unsigned char *source, *destination;
2350      int src_bytes, dst_bytes;
2351      int sjis_p;
2352 {
2353   unsigned char *src = source;
2354   unsigned char *src_end = source + src_bytes;
2355   unsigned char *dst = destination;
2356   unsigned char *dst_end = destination + dst_bytes;
2357   /* Since the maximum bytes produced by each loop is 2, we subtract 1
2358      from DST_END to assure overflow checking is necessary only at the
2359      head of loop.  */
2360   unsigned char *adjusted_dst_end = dst_end - 1;
2361   Lisp_Object unification_table
2362       = coding->character_unification_table_for_encode;
2363   int result = CODING_FINISH_NORMAL;
2364
2365   if (!NILP (Venable_character_unification) && NILP (unification_table))
2366     unification_table = Vstandard_character_unification_table_for_encode;
2367
2368   coding->consumed_char = 0;
2369   coding->fake_multibyte = 0;
2370   while (src < src_end && (dst_bytes
2371                            ? (dst < adjusted_dst_end)
2372                            : (dst < src - 1)))
2373     {
2374       /* SRC_BASE remembers the start position in source in each loop.
2375          The loop will be exited when there's not enough source text
2376          to analyze multi-byte codes (within macros ONE_MORE_BYTE and
2377          TWO_MORE_BYTES).  In that case, SRC is reset to SRC_BASE
2378          before exiting.  */
2379       unsigned char *src_base = src;
2380       unsigned char c1 = *src++, c2, c3, c4;
2381
2382       if (coding->composing)
2383         {
2384           if (c1 == 0xA0)
2385             {
2386               ONE_MORE_BYTE (c1);
2387               c1 &= 0x7F;
2388             }
2389           else if (c1 >= 0xA0)
2390             c1 -= 0x20;
2391           else
2392             coding->composing = 0;
2393         }
2394
2395       switch (emacs_code_class[c1])
2396         {
2397         case EMACS_ascii_code:
2398           ENCODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
2399           break;
2400
2401         case EMACS_control_code:
2402           *dst++ = c1;
2403           coding->consumed_char++;
2404           break;
2405
2406         case EMACS_carriage_return_code:
2407           if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2408             {
2409               *dst++ = c1;
2410               coding->consumed_char++;
2411               break;
2412             }
2413           /* fall down to treat '\r' as '\n' ...  */
2414
2415         case EMACS_linefeed_code:
2416           if (coding->eol_type == CODING_EOL_LF
2417               || coding->eol_type == CODING_EOL_UNDECIDED)
2418             *dst++ = '\n';
2419           else if (coding->eol_type == CODING_EOL_CRLF)
2420             *dst++ = '\r', *dst++ = '\n';
2421           else
2422             *dst++ = '\r';
2423           coding->consumed_char++;
2424           break;
2425
2426         case EMACS_leading_code_2:
2427           ONE_MORE_BYTE (c2);
2428           ENCODE_SJIS_BIG5_CHARACTER (c1, c2, /* dummy */ c3);
2429           break;
2430
2431         case EMACS_leading_code_3:
2432           TWO_MORE_BYTES (c2, c3);
2433           ENCODE_SJIS_BIG5_CHARACTER (c1, c2, c3);
2434           break;
2435
2436         case EMACS_leading_code_4:
2437           THREE_MORE_BYTES (c2, c3, c4);
2438           ENCODE_SJIS_BIG5_CHARACTER (c2, c3, c4);
2439           break;
2440
2441         case EMACS_leading_code_composition:
2442           coding->composing = 1;
2443           break;
2444
2445         default:                /* i.e. case EMACS_invalid_code: */
2446           *dst++ = c1;
2447           coding->consumed_char++;
2448         }
2449       continue;
2450
2451     label_end_of_loop:
2452       result = CODING_FINISH_INSUFFICIENT_SRC;
2453       src = src_base;
2454       break;
2455     }
2456
2457   if (result == CODING_FINISH_NORMAL
2458       && src < src_end)
2459     result = CODING_FINISH_INSUFFICIENT_DST;
2460   coding->consumed = src - source;
2461   coding->produced = coding->produced_char = dst - destination;
2462   return result;
2463 }
2464
2465 \f
2466 /*** 5. End-of-line handlers ***/
2467
2468 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2469    This function is called only when `coding->eol_type' is
2470    CODING_EOL_CRLF or CODING_EOL_CR.  */
2471
2472 int
2473 decode_eol (coding, source, destination, src_bytes, dst_bytes)
2474      struct coding_system *coding;
2475      unsigned char *source, *destination;
2476      int src_bytes, dst_bytes;
2477 {
2478   unsigned char *src = source;
2479   unsigned char *src_end = source + src_bytes;
2480   unsigned char *dst = destination;
2481   unsigned char *dst_end = destination + dst_bytes;
2482   unsigned char c;
2483   int result = CODING_FINISH_NORMAL;
2484
2485   coding->fake_multibyte = 0;
2486
2487   if (src_bytes <= 0)
2488     return result;
2489
2490   switch (coding->eol_type)
2491     {
2492     case CODING_EOL_CRLF:
2493       {
2494         /* Since the maximum bytes produced by each loop is 2, we
2495            subtract 1 from DST_END to assure overflow checking is
2496            necessary only at the head of loop.  */
2497         unsigned char *adjusted_dst_end = dst_end - 1;
2498
2499         while (src < src_end && (dst_bytes
2500                                  ? (dst < adjusted_dst_end)
2501                                  : (dst < src - 1)))
2502           {
2503             unsigned char *src_base = src;
2504
2505             c = *src++;
2506             if (c == '\r')
2507               {
2508                 ONE_MORE_BYTE (c);
2509                 if (c != '\n')
2510                   {
2511                     if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2512                       {
2513                         result = CODING_FINISH_INCONSISTENT_EOL;
2514                         goto label_end_of_loop_2;
2515                       }
2516                     *dst++ = '\r';
2517                     if (BASE_LEADING_CODE_P (c))
2518                       coding->fake_multibyte = 1;
2519                   }
2520                 *dst++ = c;
2521               }
2522             else if (c == '\n'
2523                      && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
2524               {
2525                 result = CODING_FINISH_INCONSISTENT_EOL;
2526                 goto label_end_of_loop_2;
2527               }
2528             else
2529               {
2530                 *dst++ = c;
2531                 if (BASE_LEADING_CODE_P (c))
2532                   coding->fake_multibyte = 1;
2533               }
2534             continue;
2535
2536           label_end_of_loop:
2537             result = CODING_FINISH_INSUFFICIENT_SRC;
2538           label_end_of_loop_2:
2539             src = src_base;
2540             break;
2541           }
2542         if (result == CODING_FINISH_NORMAL
2543             && src < src_end)
2544           result = CODING_FINISH_INSUFFICIENT_DST;
2545       }
2546       break;
2547
2548     case CODING_EOL_CR:
2549       if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2550         {
2551           while (src < src_end)
2552             {
2553               if ((c = *src++) == '\n')
2554                 break;
2555               if (BASE_LEADING_CODE_P (c))
2556                 coding->fake_multibyte = 1;
2557             }
2558           if (*--src == '\n')
2559             {
2560               src_bytes = src - source;
2561               result = CODING_FINISH_INCONSISTENT_EOL;
2562             }
2563         }
2564       if (dst_bytes && src_bytes > dst_bytes)
2565         {
2566           result = CODING_FINISH_INSUFFICIENT_DST;
2567           src_bytes = dst_bytes;
2568         }
2569       if (dst_bytes)
2570         bcopy (source, destination, src_bytes);
2571       else
2572         safe_bcopy (source, destination, src_bytes);
2573       src = source + src_bytes;
2574       while (src_bytes--) if (*dst++ == '\r') dst[-1] = '\n';
2575       break;
2576
2577     default:                    /* i.e. case: CODING_EOL_LF */
2578       if (dst_bytes && src_bytes > dst_bytes)
2579         {
2580           result = CODING_FINISH_INSUFFICIENT_DST;
2581           src_bytes = dst_bytes;
2582         }
2583       if (dst_bytes)
2584         bcopy (source, destination, src_bytes);
2585       else
2586         safe_bcopy (source, destination, src_bytes);
2587       src += src_bytes;
2588       dst += dst_bytes;
2589       coding->fake_multibyte = 1;
2590       break;
2591     }
2592
2593   coding->consumed = coding->consumed_char = src - source;
2594   coding->produced = coding->produced_char = dst - destination;
2595   return result;
2596 }
2597
2598 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  Encode
2599    format of end-of-line according to `coding->eol_type'.  If
2600    `coding->mode & CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code
2601    '\r' in source text also means end-of-line.  */
2602
2603 int
2604 encode_eol (coding, source, destination, src_bytes, dst_bytes)
2605      struct coding_system *coding;
2606      unsigned char *source, *destination;
2607      int src_bytes, dst_bytes;
2608 {
2609   unsigned char *src = source;
2610   unsigned char *dst = destination;
2611   int result = CODING_FINISH_NORMAL;
2612
2613   coding->fake_multibyte = 0;
2614
2615   if (coding->eol_type == CODING_EOL_CRLF)
2616     {
2617       unsigned char c;
2618       unsigned char *src_end = source + src_bytes;
2619       unsigned char *dst_end = destination + dst_bytes;
2620       /* Since the maximum bytes produced by each loop is 2, we
2621          subtract 1 from DST_END to assure overflow checking is
2622          necessary only at the head of loop.  */
2623       unsigned char *adjusted_dst_end = dst_end - 1;
2624
2625       while (src < src_end && (dst_bytes
2626                                ? (dst < adjusted_dst_end)
2627                                : (dst < src - 1)))
2628         {
2629           c = *src++;
2630           if (c == '\n'
2631               || (c == '\r' && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)))
2632             *dst++ = '\r', *dst++ = '\n';
2633           else
2634             {
2635               *dst++ = c;
2636               if (BASE_LEADING_CODE_P (c))
2637                 coding->fake_multibyte = 1;
2638             }
2639         }
2640       if (src < src_end)
2641         result = CODING_FINISH_INSUFFICIENT_DST;
2642     }
2643   else
2644     {
2645       unsigned char c;
2646
2647       if (dst_bytes && src_bytes > dst_bytes)
2648         {
2649           src_bytes = dst_bytes;
2650           result = CODING_FINISH_INSUFFICIENT_DST;
2651         }
2652       if (dst_bytes)
2653         bcopy (source, destination, src_bytes);
2654       else
2655         {
2656           safe_bcopy (source, destination, src_bytes);
2657           dst_bytes = src_bytes;
2658         }
2659       if (coding->eol_type == CODING_EOL_CRLF)
2660         {
2661           while (src_bytes--)
2662             {
2663               if ((c = *dst++) == '\n')
2664                 dst[-1] = '\r';
2665               else if (BASE_LEADING_CODE_P (c))
2666                   coding->fake_multibyte = 1;
2667             }
2668         }
2669       else
2670         {
2671           if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
2672             {
2673               while (src_bytes--)
2674                 if (*dst++ == '\r') dst[-1] = '\n';
2675             }
2676           coding->fake_multibyte = 1;
2677         }
2678       src = source + dst_bytes;
2679       dst = destination + dst_bytes;
2680     }
2681
2682   coding->consumed = coding->consumed_char = src - source;
2683   coding->produced = coding->produced_char = dst - destination;
2684   return result;
2685 }
2686
2687 \f
2688 /*** 6. C library functions ***/
2689
2690 /* In Emacs Lisp, coding system is represented by a Lisp symbol which
2691    has a property `coding-system'.  The value of this property is a
2692    vector of length 5 (called as coding-vector).  Among elements of
2693    this vector, the first (element[0]) and the fifth (element[4])
2694    carry important information for decoding/encoding.  Before
2695    decoding/encoding, this information should be set in fields of a
2696    structure of type `coding_system'.
2697
2698    A value of property `coding-system' can be a symbol of another
2699    subsidiary coding-system.  In that case, Emacs gets coding-vector
2700    from that symbol.
2701
2702    `element[0]' contains information to be set in `coding->type'.  The
2703    value and its meaning is as follows:
2704
2705    0 -- coding_type_emacs_mule
2706    1 -- coding_type_sjis
2707    2 -- coding_type_iso2022
2708    3 -- coding_type_big5
2709    4 -- coding_type_ccl encoder/decoder written in CCL
2710    nil -- coding_type_no_conversion
2711    t -- coding_type_undecided (automatic conversion on decoding,
2712                                no-conversion on encoding)
2713
2714    `element[4]' contains information to be set in `coding->flags' and
2715    `coding->spec'.  The meaning varies by `coding->type'.
2716
2717    If `coding->type' is `coding_type_iso2022', element[4] is a vector
2718    of length 32 (of which the first 13 sub-elements are used now).
2719    Meanings of these sub-elements are:
2720
2721    sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
2722         If the value is an integer of valid charset, the charset is
2723         assumed to be designated to graphic register N initially.
2724
2725         If the value is minus, it is a minus value of charset which
2726         reserves graphic register N, which means that the charset is
2727         not designated initially but should be designated to graphic
2728         register N just before encoding a character in that charset.
2729
2730         If the value is nil, graphic register N is never used on
2731         encoding.
2732
2733    sub-element[N] where N is 4 through 11: to be set in `coding->flags'
2734         Each value takes t or nil.  See the section ISO2022 of
2735         `coding.h' for more information.
2736
2737    If `coding->type' is `coding_type_big5', element[4] is t to denote
2738    BIG5-ETen or nil to denote BIG5-HKU.
2739
2740    If `coding->type' takes the other value, element[4] is ignored.
2741
2742    Emacs Lisp's coding system also carries information about format of
2743    end-of-line in a value of property `eol-type'.  If the value is
2744    integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
2745    means CODING_EOL_CR.  If it is not integer, it should be a vector
2746    of subsidiary coding systems of which property `eol-type' has one
2747    of above values.
2748
2749 */
2750
2751 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
2752    and set it in CODING.  If CODING_SYSTEM_SYMBOL is invalid, CODING
2753    is setup so that no conversion is necessary and return -1, else
2754    return 0.  */
2755
2756 int
2757 setup_coding_system (coding_system, coding)
2758      Lisp_Object coding_system;
2759      struct coding_system *coding;
2760 {
2761   Lisp_Object coding_spec, coding_type, eol_type, plist;
2762   Lisp_Object val;
2763   int i;
2764
2765   /* Initialize some fields required for all kinds of coding systems.  */
2766   coding->symbol = coding_system;
2767   coding->common_flags = 0;
2768   coding->mode = 0;
2769   coding->heading_ascii = -1;
2770   coding->post_read_conversion = coding->pre_write_conversion = Qnil;
2771   coding_spec = Fget (coding_system, Qcoding_system);
2772   if (!VECTORP (coding_spec)
2773       || XVECTOR (coding_spec)->size != 5
2774       || !CONSP (XVECTOR (coding_spec)->contents[3]))
2775     goto label_invalid_coding_system;
2776
2777   eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
2778   if (VECTORP (eol_type))
2779     {
2780       coding->eol_type = CODING_EOL_UNDECIDED;
2781       coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
2782     }
2783   else if (XFASTINT (eol_type) == 1)
2784     {
2785       coding->eol_type = CODING_EOL_CRLF;
2786       coding->common_flags
2787         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2788     }
2789   else if (XFASTINT (eol_type) == 2)
2790     {
2791       coding->eol_type = CODING_EOL_CR;
2792       coding->common_flags
2793         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2794     }
2795   else
2796     coding->eol_type = CODING_EOL_LF;
2797
2798   coding_type = XVECTOR (coding_spec)->contents[0];
2799   /* Try short cut.  */
2800   if (SYMBOLP (coding_type))
2801     {
2802       if (EQ (coding_type, Qt))
2803         {
2804           coding->type = coding_type_undecided;
2805           coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
2806         }
2807       else
2808         coding->type = coding_type_no_conversion;
2809       return 0;
2810     }
2811
2812   /* Initialize remaining fields.  */
2813   coding->composing = 0;
2814   coding->character_unification_table_for_decode = Qnil;
2815   coding->character_unification_table_for_encode = Qnil;
2816
2817   /* Get values of coding system properties:
2818      `post-read-conversion', `pre-write-conversion',
2819      `character-unification-table-for-decode',
2820      `character-unification-table-for-encode'.  */
2821   plist = XVECTOR (coding_spec)->contents[3];
2822   coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
2823   coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
2824   val = Fplist_get (plist, Qcharacter_unification_table_for_decode);
2825   if (SYMBOLP (val))
2826     val = Fget (val, Qcharacter_unification_table_for_decode);
2827   coding->character_unification_table_for_decode
2828     = CHAR_TABLE_P (val) ? val : Qnil;
2829   val = Fplist_get (plist, Qcharacter_unification_table_for_encode);
2830   if (SYMBOLP (val))
2831     val = Fget (val, Qcharacter_unification_table_for_encode);
2832   coding->character_unification_table_for_encode
2833     = CHAR_TABLE_P (val) ? val : Qnil;
2834   val = Fplist_get (plist, Qcoding_category);
2835   if (!NILP (val))
2836     {
2837       val = Fget (val, Qcoding_category_index);
2838       if (INTEGERP (val))
2839         coding->category_idx = XINT (val);
2840       else
2841         goto label_invalid_coding_system;
2842     }
2843   else
2844     goto label_invalid_coding_system;
2845
2846   val = Fplist_get (plist, Qsafe_charsets);
2847   if (EQ (val, Qt))
2848     {
2849       for (i = 0; i <= MAX_CHARSET; i++)
2850         coding->safe_charsets[i] = 1;
2851     }
2852   else
2853     {
2854       bzero (coding->safe_charsets, MAX_CHARSET + 1);
2855       while (CONSP (val))
2856         {
2857           if ((i = get_charset_id (XCONS (val)->car)) >= 0)
2858             coding->safe_charsets[i] = 1;
2859           val = XCONS (val)->cdr;
2860         }
2861     }
2862
2863   switch (XFASTINT (coding_type))
2864     {
2865     case 0:
2866       coding->type = coding_type_emacs_mule;
2867       if (!NILP (coding->post_read_conversion))
2868         coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
2869       if (!NILP (coding->pre_write_conversion))
2870         coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
2871       break;
2872
2873     case 1:
2874       coding->type = coding_type_sjis;
2875       coding->common_flags
2876         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2877       break;
2878
2879     case 2:
2880       coding->type = coding_type_iso2022;
2881       coding->common_flags
2882         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2883       {
2884         Lisp_Object val, temp;
2885         Lisp_Object *flags;
2886         int i, charset, reg_bits = 0;
2887
2888         val = XVECTOR (coding_spec)->contents[4];
2889
2890         if (!VECTORP (val) || XVECTOR (val)->size != 32)
2891           goto label_invalid_coding_system;
2892
2893         flags = XVECTOR (val)->contents;
2894         coding->flags
2895           = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
2896              | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
2897              | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
2898              | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
2899              | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
2900              | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
2901              | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
2902              | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
2903              | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
2904              | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
2905              | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
2906              | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
2907              | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
2908              );
2909
2910         /* Invoke graphic register 0 to plane 0.  */
2911         CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
2912         /* Invoke graphic register 1 to plane 1 if we can use full 8-bit.  */
2913         CODING_SPEC_ISO_INVOCATION (coding, 1)
2914           = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
2915         /* Not single shifting at first.  */
2916         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
2917         /* Beginning of buffer should also be regarded as bol. */
2918         CODING_SPEC_ISO_BOL (coding) = 1;
2919
2920         for (charset = 0; charset <= MAX_CHARSET; charset++)
2921           CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
2922         val = Vcharset_revision_alist;
2923         while (CONSP (val))
2924           {
2925             charset = get_charset_id (Fcar_safe (XCONS (val)->car));
2926             if (charset >= 0
2927                 && (temp = Fcdr_safe (XCONS (val)->car), INTEGERP (temp))
2928                 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
2929               CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
2930             val = XCONS (val)->cdr;
2931           }
2932
2933         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
2934            FLAGS[REG] can be one of below:
2935                 integer CHARSET: CHARSET occupies register I,
2936                 t: designate nothing to REG initially, but can be used
2937                   by any charsets,
2938                 list of integer, nil, or t: designate the first
2939                   element (if integer) to REG initially, the remaining
2940                   elements (if integer) is designated to REG on request,
2941                   if an element is t, REG can be used by any charsets,
2942                 nil: REG is never used.  */
2943         for (charset = 0; charset <= MAX_CHARSET; charset++)
2944           CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
2945             = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
2946         for (i = 0; i < 4; i++)
2947           {
2948             if (INTEGERP (flags[i])
2949                 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))
2950                 || (charset = get_charset_id (flags[i])) >= 0)
2951               {
2952                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
2953                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
2954               }
2955             else if (EQ (flags[i], Qt))
2956               {
2957                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2958                 reg_bits |= 1 << i;
2959                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
2960               }
2961             else if (CONSP (flags[i]))
2962               {
2963                 Lisp_Object tail = flags[i];
2964
2965                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
2966                 if (INTEGERP (XCONS (tail)->car)
2967                     && (charset = XINT (XCONS (tail)->car),
2968                         CHARSET_VALID_P (charset))
2969                     || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
2970                   {
2971                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
2972                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
2973                   }
2974                 else
2975                   CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2976                 tail = XCONS (tail)->cdr;
2977                 while (CONSP (tail))
2978                   {
2979                     if (INTEGERP (XCONS (tail)->car)
2980                         && (charset = XINT (XCONS (tail)->car),
2981                             CHARSET_VALID_P (charset))
2982                         || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
2983                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
2984                         = i;
2985                     else if (EQ (XCONS (tail)->car, Qt))
2986                       reg_bits |= 1 << i;
2987                     tail = XCONS (tail)->cdr;
2988                   }
2989               }
2990             else
2991               CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2992
2993             CODING_SPEC_ISO_DESIGNATION (coding, i)
2994               = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
2995           }
2996
2997         if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
2998           {
2999             /* REG 1 can be used only by locking shift in 7-bit env.  */
3000             if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
3001               reg_bits &= ~2;
3002             if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
3003               /* Without any shifting, only REG 0 and 1 can be used.  */
3004               reg_bits &= 3;
3005           }
3006
3007         if (reg_bits)
3008           for (charset = 0; charset <= MAX_CHARSET; charset++)
3009             {
3010               if (CHARSET_VALID_P (charset))
3011                 {
3012                   /* There exist some default graphic registers to be
3013                      used CHARSET.  */
3014
3015                   /* We had better avoid designating a charset of
3016                      CHARS96 to REG 0 as far as possible.  */
3017                   if (CHARSET_CHARS (charset) == 96)
3018                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3019                       = (reg_bits & 2
3020                          ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
3021                   else
3022                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3023                       = (reg_bits & 1
3024                          ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
3025                 }
3026             }
3027       }
3028       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3029       coding->spec.iso2022.last_invalid_designation_register = -1;
3030       break;
3031
3032     case 3:
3033       coding->type = coding_type_big5;
3034       coding->common_flags
3035         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3036       coding->flags
3037         = (NILP (XVECTOR (coding_spec)->contents[4])
3038            ? CODING_FLAG_BIG5_HKU
3039            : CODING_FLAG_BIG5_ETEN);
3040       break;
3041
3042     case 4:
3043       coding->type = coding_type_ccl;
3044       coding->common_flags
3045         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3046       {
3047         Lisp_Object val = XVECTOR (coding_spec)->contents[4];
3048         Lisp_Object decoder, encoder;
3049
3050         if (CONSP  (val)
3051             && SYMBOLP (XCONS (val)->car)
3052             && !NILP (decoder = Fget (XCONS (val)->car, Qccl_program_idx))
3053             && (decoder = Fcdr (Faref (Vccl_program_table, decoder)))
3054             && SYMBOLP (XCONS (val)->cdr)
3055             && !NILP (encoder = Fget (XCONS (val)->cdr, Qccl_program_idx))
3056             && (encoder = Fcdr (Faref (Vccl_program_table, encoder))))
3057           {
3058             setup_ccl_program (&(coding->spec.ccl.decoder), decoder);
3059             setup_ccl_program (&(coding->spec.ccl.encoder), encoder);
3060           }
3061         else
3062           goto label_invalid_coding_system;
3063       }
3064       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3065       break;
3066
3067     case 5:
3068       coding->type = coding_type_raw_text;
3069       break;
3070
3071     default:
3072       goto label_invalid_coding_system;
3073     }
3074   return 0;
3075
3076  label_invalid_coding_system:
3077   coding->type = coding_type_no_conversion;
3078   coding->category_idx = CODING_CATEGORY_IDX_BINARY;
3079   coding->common_flags = 0;
3080   coding->eol_type = CODING_EOL_LF;
3081   coding->pre_write_conversion = coding->post_read_conversion = Qnil;
3082   return -1;
3083 }
3084
3085 /* Emacs has a mechanism to automatically detect a coding system if it
3086    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
3087    it's impossible to distinguish some coding systems accurately
3088    because they use the same range of codes.  So, at first, coding
3089    systems are categorized into 7, those are:
3090
3091    o coding-category-emacs-mule
3092
3093         The category for a coding system which has the same code range
3094         as Emacs' internal format.  Assigned the coding-system (Lisp
3095         symbol) `emacs-mule' by default.
3096
3097    o coding-category-sjis
3098
3099         The category for a coding system which has the same code range
3100         as SJIS.  Assigned the coding-system (Lisp
3101         symbol) `japanese-shift-jis' by default.
3102
3103    o coding-category-iso-7
3104
3105         The category for a coding system which has the same code range
3106         as ISO2022 of 7-bit environment.  This doesn't use any locking
3107         shift and single shift functions.  This can encode/decode all
3108         charsets.  Assigned the coding-system (Lisp symbol)
3109         `iso-2022-7bit' by default.
3110
3111    o coding-category-iso-7-tight
3112
3113         Same as coding-category-iso-7 except that this can
3114         encode/decode only the specified charsets.
3115
3116    o coding-category-iso-8-1
3117
3118         The category for a coding system which has the same code range
3119         as ISO2022 of 8-bit environment and graphic plane 1 used only
3120         for DIMENSION1 charset.  This doesn't use any locking shift
3121         and single shift functions.  Assigned the coding-system (Lisp
3122         symbol) `iso-latin-1' by default.
3123
3124    o coding-category-iso-8-2
3125
3126         The category for a coding system which has the same code range
3127         as ISO2022 of 8-bit environment and graphic plane 1 used only
3128         for DIMENSION2 charset.  This doesn't use any locking shift
3129         and single shift functions.  Assigned the coding-system (Lisp
3130         symbol) `japanese-iso-8bit' by default.
3131
3132    o coding-category-iso-7-else
3133
3134         The category for a coding system which has the same code range
3135         as ISO2022 of 7-bit environemnt but uses locking shift or
3136         single shift functions.  Assigned the coding-system (Lisp
3137         symbol) `iso-2022-7bit-lock' by default.
3138
3139    o coding-category-iso-8-else
3140
3141         The category for a coding system which has the same code range
3142         as ISO2022 of 8-bit environemnt but uses locking shift or
3143         single shift functions.  Assigned the coding-system (Lisp
3144         symbol) `iso-2022-8bit-ss2' by default.
3145
3146    o coding-category-big5
3147
3148         The category for a coding system which has the same code range
3149         as BIG5.  Assigned the coding-system (Lisp symbol)
3150         `cn-big5' by default.
3151
3152    o coding-category-binary
3153
3154         The category for a coding system not categorized in any of the
3155         above.  Assigned the coding-system (Lisp symbol)
3156         `no-conversion' by default.
3157
3158    Each of them is a Lisp symbol and the value is an actual
3159    `coding-system's (this is also a Lisp symbol) assigned by a user.
3160    What Emacs does actually is to detect a category of coding system.
3161    Then, it uses a `coding-system' assigned to it.  If Emacs can't
3162    decide only one possible category, it selects a category of the
3163    highest priority.  Priorities of categories are also specified by a
3164    user in a Lisp variable `coding-category-list'.
3165
3166 */
3167
3168 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
3169    If it detects possible coding systems, return an integer in which
3170    appropriate flag bits are set.  Flag bits are defined by macros
3171    CODING_CATEGORY_MASK_XXX in `coding.h'.
3172
3173    How many ASCII characters are at the head is returned as *SKIP.  */
3174
3175 static int
3176 detect_coding_mask (source, src_bytes, priorities, skip)
3177      unsigned char *source;
3178      int src_bytes, *priorities, *skip;
3179 {
3180   register unsigned char c;
3181   unsigned char *src = source, *src_end = source + src_bytes;
3182   unsigned int mask = (CODING_CATEGORY_MASK_ISO_7BIT
3183                        | CODING_CATEGORY_MASK_ISO_SHIFT);
3184   int i;
3185
3186   /* At first, skip all ASCII characters and control characters except
3187      for three ISO2022 specific control characters.  */
3188  label_loop_detect_coding:
3189   while (src < src_end)
3190     {
3191       c = *src;
3192       if (c >= 0x80
3193           || ((mask & CODING_CATEGORY_MASK_ISO_7BIT)
3194               && c == ISO_CODE_ESC)
3195           || ((mask & CODING_CATEGORY_MASK_ISO_SHIFT)
3196               && (c == ISO_CODE_SI || c == ISO_CODE_SO)))
3197         break;
3198       src++;
3199     }
3200   *skip = src - source;
3201
3202   if (src >= src_end)
3203     /* We found nothing other than ASCII.  There's nothing to do.  */
3204     return 0;
3205
3206   /* The text seems to be encoded in some multilingual coding system.
3207      Now, try to find in which coding system the text is encoded.  */
3208   if (c < 0x80)
3209     {
3210       /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
3211       /* C is an ISO2022 specific control code of C0.  */
3212       mask = detect_coding_iso2022 (src, src_end);
3213       if (mask == 0)
3214         {
3215           /* No valid ISO2022 code follows C.  Try again.  */
3216           src++;
3217           mask = (c != ISO_CODE_ESC
3218                   ? CODING_CATEGORY_MASK_ISO_7BIT
3219                   : CODING_CATEGORY_MASK_ISO_SHIFT);
3220           goto label_loop_detect_coding;
3221         }
3222       if (priorities)
3223         goto label_return_highest_only;
3224     }
3225   else
3226     {
3227       int try;
3228
3229       if (c < 0xA0)
3230         {
3231           /* C is the first byte of SJIS character code,
3232              or a leading-code of Emacs' internal format (emacs-mule).  */
3233           try = CODING_CATEGORY_MASK_SJIS | CODING_CATEGORY_MASK_EMACS_MULE;
3234
3235           /* Or, if C is a special latin extra code,
3236              or is an ISO2022 specific control code of C1 (SS2 or SS3),
3237              or is an ISO2022 control-sequence-introducer (CSI),
3238              we should also consider the possibility of ISO2022 codings.  */
3239           if ((VECTORP (Vlatin_extra_code_table)
3240                && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3241               || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
3242               || (c == ISO_CODE_CSI
3243                   && (src < src_end
3244                       && (*src == ']'
3245                           || ((*src == '0' || *src == '1' || *src == '2')
3246                               && src + 1 < src_end
3247                               && src[1] == ']')))))
3248             try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
3249                      | CODING_CATEGORY_MASK_ISO_8BIT);
3250         }
3251       else
3252         /* C is a character of ISO2022 in graphic plane right,
3253            or a SJIS's 1-byte character code (i.e. JISX0201),
3254            or the first byte of BIG5's 2-byte code.  */
3255         try = (CODING_CATEGORY_MASK_ISO_8_ELSE
3256                 | CODING_CATEGORY_MASK_ISO_8BIT
3257                 | CODING_CATEGORY_MASK_SJIS
3258                 | CODING_CATEGORY_MASK_BIG5);
3259
3260       mask = 0;
3261       if (priorities)
3262         {
3263           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3264             {
3265               priorities[i] &= try;
3266               if (priorities[i] & CODING_CATEGORY_MASK_ISO)
3267                 mask = detect_coding_iso2022 (src, src_end);
3268               else if (priorities[i] & CODING_CATEGORY_MASK_SJIS)
3269                 mask = detect_coding_sjis (src, src_end);
3270               else if (priorities[i] & CODING_CATEGORY_MASK_BIG5)
3271                 mask = detect_coding_big5 (src, src_end);
3272               else if (priorities[i] & CODING_CATEGORY_MASK_EMACS_MULE)
3273                 mask = detect_coding_emacs_mule (src, src_end);
3274               if (mask)
3275                 goto label_return_highest_only;
3276             }
3277           return CODING_CATEGORY_MASK_RAW_TEXT;
3278         }
3279       if (try & CODING_CATEGORY_MASK_ISO)
3280         mask |= detect_coding_iso2022 (src, src_end);
3281       if (try & CODING_CATEGORY_MASK_SJIS)
3282         mask |= detect_coding_sjis (src, src_end);
3283       if (try & CODING_CATEGORY_MASK_BIG5)
3284         mask |= detect_coding_big5 (src, src_end);
3285       if (try & CODING_CATEGORY_MASK_EMACS_MULE)
3286         mask |= detect_coding_emacs_mule (src, src_end);
3287     }
3288   return (mask | CODING_CATEGORY_MASK_RAW_TEXT);
3289
3290  label_return_highest_only:
3291   for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3292     {
3293       if (mask & priorities[i])
3294         return priorities[i];
3295     }
3296   return CODING_CATEGORY_MASK_RAW_TEXT;
3297 }
3298
3299 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
3300    The information of the detected coding system is set in CODING.  */
3301
3302 void
3303 detect_coding (coding, src, src_bytes)
3304      struct coding_system *coding;
3305      unsigned char *src;
3306      int src_bytes;
3307 {
3308   unsigned int idx;
3309   int skip, mask, i;
3310   int priorities[CODING_CATEGORY_IDX_MAX];
3311   Lisp_Object val = Vcoding_category_list;
3312
3313   i = 0;
3314   while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
3315     {
3316       if (! SYMBOLP (XCONS (val)->car))
3317         break;
3318       idx = XFASTINT (Fget (XCONS (val)->car, Qcoding_category_index));
3319       if (idx >= CODING_CATEGORY_IDX_MAX)
3320         break;
3321       priorities[i++] = (1 << idx);
3322       val = XCONS (val)->cdr;
3323     }
3324   /* If coding-category-list is valid and contains all coding
3325      categories, `i' should be CODING_CATEGORY_IDX_MAX now.  If not,
3326      the following code saves Emacs from craching.  */
3327   while (i < CODING_CATEGORY_IDX_MAX)
3328     priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
3329
3330   mask = detect_coding_mask (src, src_bytes, priorities, &skip);
3331   coding->heading_ascii = skip;
3332
3333   if (!mask) return;
3334
3335   /* We found a single coding system of the highest priority in MASK.  */
3336   idx = 0;
3337   while (mask && ! (mask & 1)) mask >>= 1, idx++;
3338   if (! mask)
3339     idx = CODING_CATEGORY_IDX_RAW_TEXT;
3340
3341   val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[idx])->value;
3342
3343   if (coding->eol_type != CODING_EOL_UNDECIDED)
3344     {
3345       Lisp_Object tmp = Fget (val, Qeol_type);
3346
3347       if (VECTORP (tmp))
3348         val = XVECTOR (tmp)->contents[coding->eol_type];
3349     }
3350   setup_coding_system (val, coding);
3351   /* Set this again because setup_coding_system reset this member.  */
3352   coding->heading_ascii = skip;
3353 }
3354
3355 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
3356    SOURCE is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
3357    CODING_EOL_CR, and CODING_EOL_UNDECIDED.
3358
3359    How many non-eol characters are at the head is returned as *SKIP.  */
3360
3361 #define MAX_EOL_CHECK_COUNT 3
3362
3363 static int
3364 detect_eol_type (source, src_bytes, skip)
3365      unsigned char *source;
3366      int src_bytes, *skip;
3367 {
3368   unsigned char *src = source, *src_end = src + src_bytes;
3369   unsigned char c;
3370   int total = 0;                /* How many end-of-lines are found so far.  */
3371   int eol_type = CODING_EOL_UNDECIDED;
3372   int this_eol_type;
3373
3374   *skip = 0;
3375
3376   while (src < src_end && total < MAX_EOL_CHECK_COUNT)
3377     {
3378       c = *src++;
3379       if (c == '\n' || c == '\r')
3380         {
3381           if (*skip == 0)
3382             *skip = src - 1 - source;
3383           total++;
3384           if (c == '\n')
3385             this_eol_type = CODING_EOL_LF;
3386           else if (src >= src_end || *src != '\n')
3387             this_eol_type = CODING_EOL_CR;
3388           else
3389             this_eol_type = CODING_EOL_CRLF, src++;
3390
3391           if (eol_type == CODING_EOL_UNDECIDED)
3392             /* This is the first end-of-line.  */
3393             eol_type = this_eol_type;
3394           else if (eol_type != this_eol_type)
3395             {
3396               /* The found type is different from what found before.  */
3397               eol_type = CODING_EOL_INCONSISTENT;
3398               break;
3399             }
3400         }
3401     }
3402
3403   if (*skip == 0)
3404     *skip = src_end - source;
3405   return eol_type;
3406 }
3407
3408 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
3409    is encoded.  If it detects an appropriate format of end-of-line, it
3410    sets the information in *CODING.  */
3411
3412 void
3413 detect_eol (coding, src, src_bytes)
3414      struct coding_system *coding;
3415      unsigned char *src;
3416      int src_bytes;
3417 {
3418   Lisp_Object val;
3419   int skip;
3420   int eol_type = detect_eol_type (src, src_bytes, &skip);
3421
3422   if (coding->heading_ascii > skip)
3423     coding->heading_ascii = skip;
3424   else
3425     skip = coding->heading_ascii;
3426
3427   if (eol_type == CODING_EOL_UNDECIDED)
3428     return;
3429   if (eol_type == CODING_EOL_INCONSISTENT)
3430     {
3431 #if 0
3432       /* This code is suppressed until we find a better way to
3433          distinguish raw text file and binary file.  */
3434
3435       /* If we have already detected that the coding is raw-text, the
3436          coding should actually be no-conversion.  */
3437       if (coding->type == coding_type_raw_text)
3438         {
3439           setup_coding_system (Qno_conversion, coding);
3440           return;
3441         }
3442       /* Else, let's decode only text code anyway.  */
3443 #endif /* 0 */
3444       eol_type = CODING_EOL_LF;
3445     }
3446
3447   val = Fget (coding->symbol, Qeol_type);
3448   if (VECTORP (val) && XVECTOR (val)->size == 3)
3449     {
3450       setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
3451       coding->heading_ascii = skip;
3452     }
3453 }
3454
3455 #define CONVERSION_BUFFER_EXTRA_ROOM 256
3456
3457 #define DECODING_BUFFER_MAG(coding)                                          \
3458   (coding->type == coding_type_iso2022                                       \
3459    ? 3                                                                       \
3460    : ((coding->type == coding_type_sjis || coding->type == coding_type_big5) \
3461       ? 2                                                                    \
3462       : (coding->type == coding_type_raw_text                                \
3463          ? 1                                                                 \
3464          : (coding->type == coding_type_ccl                                  \
3465             ? coding->spec.ccl.decoder.buf_magnification                     \
3466             : 2))))
3467
3468 /* Return maximum size (bytes) of a buffer enough for decoding
3469    SRC_BYTES of text encoded in CODING.  */
3470
3471 int
3472 decoding_buffer_size (coding, src_bytes)
3473      struct coding_system *coding;
3474      int src_bytes;
3475 {
3476   return (src_bytes * DECODING_BUFFER_MAG (coding)
3477           + CONVERSION_BUFFER_EXTRA_ROOM);
3478 }
3479
3480 /* Return maximum size (bytes) of a buffer enough for encoding
3481    SRC_BYTES of text to CODING.  */
3482
3483 int
3484 encoding_buffer_size (coding, src_bytes)
3485      struct coding_system *coding;
3486      int src_bytes;
3487 {
3488   int magnification;
3489
3490   if (coding->type == coding_type_ccl)
3491     magnification = coding->spec.ccl.encoder.buf_magnification;
3492   else
3493     magnification = 3;
3494
3495   return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
3496 }
3497
3498 #ifndef MINIMUM_CONVERSION_BUFFER_SIZE
3499 #define MINIMUM_CONVERSION_BUFFER_SIZE 1024
3500 #endif
3501
3502 char *conversion_buffer;
3503 int conversion_buffer_size;
3504
3505 /* Return a pointer to a SIZE bytes of buffer to be used for encoding
3506    or decoding.  Sufficient memory is allocated automatically.  If we
3507    run out of memory, return NULL.  */
3508
3509 char *
3510 get_conversion_buffer (size)
3511      int size;
3512 {
3513   if (size > conversion_buffer_size)
3514     {
3515       char *buf;
3516       int real_size = conversion_buffer_size * 2;
3517
3518       while (real_size < size) real_size *= 2;
3519       buf = (char *) xmalloc (real_size);
3520       xfree (conversion_buffer);
3521       conversion_buffer = buf;
3522       conversion_buffer_size = real_size;
3523     }
3524   return conversion_buffer;
3525 }
3526
3527 int
3528 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
3529      struct coding_system *coding;
3530      unsigned char *source, *destination;
3531      int src_bytes, dst_bytes, encodep;
3532 {
3533   struct ccl_program *ccl
3534     = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
3535   int result;
3536
3537   coding->produced = ccl_driver (ccl, source, destination,
3538                                  src_bytes, dst_bytes, &(coding->consumed));
3539   if (encodep)
3540     {
3541       coding->produced_char = coding->produced;
3542       coding->consumed_char
3543         = multibyte_chars_in_text (source, coding->consumed);
3544     }
3545   else
3546     {
3547       coding->produced_char
3548         = multibyte_chars_in_text (destination, coding->produced);
3549       coding->consumed_char = coding->consumed;
3550     }
3551   switch (ccl->status)
3552     {
3553     case CCL_STAT_SUSPEND_BY_SRC:
3554       result = CODING_FINISH_INSUFFICIENT_SRC;
3555       break;
3556     case CCL_STAT_SUSPEND_BY_DST:
3557       result = CODING_FINISH_INSUFFICIENT_DST;
3558       break;
3559     default:
3560       result = CODING_FINISH_NORMAL;
3561       break;
3562     }
3563   return result;
3564 }
3565
3566 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions".  Before
3567    decoding, it may detect coding system and format of end-of-line if
3568    those are not yet decided.  */
3569
3570 int
3571 decode_coding (coding, source, destination, src_bytes, dst_bytes)
3572      struct coding_system *coding;
3573      unsigned char *source, *destination;
3574      int src_bytes, dst_bytes;
3575 {
3576   int result;
3577
3578   if (src_bytes <= 0)
3579     {
3580       coding->produced = coding->produced_char = 0;
3581       coding->consumed = coding->consumed_char = 0;
3582       coding->fake_multibyte = 0;
3583       return CODING_FINISH_NORMAL;
3584     }
3585
3586   if (coding->type == coding_type_undecided)
3587     detect_coding (coding, source, src_bytes);
3588
3589   if (coding->eol_type == CODING_EOL_UNDECIDED)
3590     detect_eol (coding, source, src_bytes);
3591
3592   switch (coding->type)
3593     {
3594     case coding_type_emacs_mule:
3595     case coding_type_undecided:
3596     case coding_type_raw_text:
3597       if (coding->eol_type == CODING_EOL_LF
3598           ||  coding->eol_type == CODING_EOL_UNDECIDED)
3599         goto label_no_conversion;
3600       result = decode_eol (coding, source, destination, src_bytes, dst_bytes);
3601       break;
3602
3603     case coding_type_sjis:
3604       result = decode_coding_sjis_big5 (coding, source, destination,
3605                                         src_bytes, dst_bytes, 1);
3606       break;
3607
3608     case coding_type_iso2022:
3609       result = decode_coding_iso2022 (coding, source, destination,
3610                                       src_bytes, dst_bytes);
3611       break;
3612
3613     case coding_type_big5:
3614       result = decode_coding_sjis_big5 (coding, source, destination,
3615                                         src_bytes, dst_bytes, 0);
3616       break;
3617
3618     case coding_type_ccl:
3619       result = ccl_coding_driver (coding, source, destination,
3620                                   src_bytes, dst_bytes, 0);
3621       break;
3622
3623     default:                    /* i.e. case coding_type_no_conversion: */
3624     label_no_conversion:
3625       if (dst_bytes && src_bytes > dst_bytes)
3626         {
3627           coding->produced = dst_bytes;
3628           result = CODING_FINISH_INSUFFICIENT_DST;
3629         }
3630       else
3631         {
3632           coding->produced = src_bytes;
3633           result = CODING_FINISH_NORMAL;
3634         }
3635       if (dst_bytes)
3636         bcopy (source, destination, coding->produced);
3637       else
3638         safe_bcopy (source, destination, coding->produced);
3639       coding->fake_multibyte = 1;
3640       coding->consumed
3641         = coding->consumed_char = coding->produced_char = coding->produced;
3642       break;
3643     }
3644
3645   return result;
3646 }
3647
3648 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  */
3649
3650 int
3651 encode_coding (coding, source, destination, src_bytes, dst_bytes)
3652      struct coding_system *coding;
3653      unsigned char *source, *destination;
3654      int src_bytes, dst_bytes;
3655 {
3656   int result;
3657
3658   if (src_bytes <= 0)
3659     {
3660       coding->produced = coding->produced_char = 0;
3661       coding->consumed = coding->consumed_char = 0;
3662       coding->fake_multibyte = 0;
3663       return CODING_FINISH_NORMAL;
3664     }
3665
3666   switch (coding->type)
3667     {
3668     case coding_type_emacs_mule:
3669     case coding_type_undecided:
3670     case coding_type_raw_text:
3671       if (coding->eol_type == CODING_EOL_LF
3672           ||  coding->eol_type == CODING_EOL_UNDECIDED)
3673         goto label_no_conversion;
3674       result = encode_eol (coding, source, destination, src_bytes, dst_bytes);
3675       break;
3676
3677     case coding_type_sjis:
3678       result = encode_coding_sjis_big5 (coding, source, destination,
3679                                         src_bytes, dst_bytes, 1);
3680       break;
3681
3682     case coding_type_iso2022:
3683       result = encode_coding_iso2022 (coding, source, destination,
3684                                       src_bytes, dst_bytes);
3685       break;
3686
3687     case coding_type_big5:
3688       result = encode_coding_sjis_big5 (coding, source, destination,
3689                                         src_bytes, dst_bytes, 0);
3690       break;
3691
3692     case coding_type_ccl:
3693       result = ccl_coding_driver (coding, source, destination,
3694                                   src_bytes, dst_bytes, 1);
3695       break;
3696
3697     default:                    /* i.e. case coding_type_no_conversion: */
3698     label_no_conversion:
3699       if (dst_bytes && src_bytes > dst_bytes)
3700         {
3701           coding->produced = dst_bytes;
3702           result = CODING_FINISH_INSUFFICIENT_DST;
3703         }
3704       else
3705         {
3706           coding->produced = src_bytes;
3707           result = CODING_FINISH_NORMAL;
3708         }
3709       if (dst_bytes)
3710         bcopy (source, destination, coding->produced);
3711       else
3712         safe_bcopy (source, destination, coding->produced);
3713       if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
3714         {
3715           unsigned char *p = destination, *pend = p + coding->produced;
3716           while (p < pend)
3717             if (*p++ == '\015') p[-1] = '\n';
3718         }
3719       coding->fake_multibyte = 1;
3720       coding->consumed
3721         = coding->consumed_char = coding->produced_char = coding->produced;
3722       break;
3723     }
3724
3725   return result;
3726 }
3727
3728 /* Scan text in the region between *BEG and *END (byte positions),
3729    skip characters which we don't have to decode by coding system
3730    CODING at the head and tail, then set *BEG and *END to the region
3731    of the text we actually have to convert.  The caller should move
3732    the gap out of the region in advance.
3733
3734    If STR is not NULL, *BEG and *END are indices into STR.  */
3735
3736 static void
3737 shrink_decoding_region (beg, end, coding, str)
3738      int *beg, *end;
3739      struct coding_system *coding;
3740      unsigned char *str;
3741 {
3742   unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
3743   int eol_conversion;
3744
3745   if (coding->type == coding_type_ccl
3746       || coding->type == coding_type_undecided
3747       || !NILP (coding->post_read_conversion))
3748     {
3749       /* We can't skip any data.  */
3750       return;
3751     }
3752   else if (coding->type == coding_type_no_conversion)
3753     {
3754       /* We need no conversion, but don't have to skip any data here.
3755          Decoding routine handles them effectively anyway.  */
3756       return;
3757     }
3758
3759   if (coding->heading_ascii >= 0)
3760     /* Detection routine has already found how much we can skip at the
3761        head.  */
3762     *beg += coding->heading_ascii;
3763
3764   if (str)
3765     {
3766       begp_orig = begp = str + *beg;
3767       endp_orig = endp = str + *end;
3768     }
3769   else
3770     {
3771       begp_orig = begp = BYTE_POS_ADDR (*beg);
3772       endp_orig = endp = begp + *end - *beg;
3773     }
3774
3775   eol_conversion = (coding->eol_type != CODING_EOL_LF);
3776
3777   switch (coding->type)
3778     {
3779     case coding_type_emacs_mule:
3780     case coding_type_raw_text:
3781       if (eol_conversion)
3782         {
3783           if (coding->heading_ascii < 0)
3784             while (begp < endp && *begp != '\r' && *begp < 0x80) begp++;
3785           while (begp < endp && *(endp - 1) != '\r' && *(endp - 1) < 0x80)
3786             endp--;
3787         }
3788       else
3789         begp = endp;
3790       break;
3791
3792     case coding_type_sjis:
3793     case coding_type_big5:
3794       /* We can skip all ASCII characters at the head.  */
3795       if (coding->heading_ascii < 0)
3796         {
3797           if (eol_conversion)
3798             while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
3799           else
3800             while (begp < endp && *begp < 0x80) begp++;
3801         }
3802       /* We can skip all ASCII characters at the tail except for the
3803          second byte of SJIS or BIG5 code.  */
3804       if (eol_conversion)
3805         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
3806       else
3807         while (begp < endp && endp[-1] < 0x80) endp--;
3808       if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
3809         endp++;
3810       break;
3811
3812     default:            /* i.e. case coding_type_iso2022: */
3813       if (coding->heading_ascii < 0)
3814         {
3815           /* We can skip all ASCII characters at the head except for a
3816              few control codes.  */
3817           while (begp < endp && (c = *begp) < 0x80
3818                  && c != ISO_CODE_CR && c != ISO_CODE_SO
3819                  && c != ISO_CODE_SI && c != ISO_CODE_ESC
3820                  && (!eol_conversion || c != ISO_CODE_LF))
3821             begp++;
3822         }
3823       switch (coding->category_idx)
3824         {
3825         case CODING_CATEGORY_IDX_ISO_8_1:
3826         case CODING_CATEGORY_IDX_ISO_8_2:
3827           /* We can skip all ASCII characters at the tail.  */
3828           if (eol_conversion)
3829             while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
3830           else
3831             while (begp < endp && endp[-1] < 0x80) endp--;
3832           break;
3833
3834         case CODING_CATEGORY_IDX_ISO_7:
3835         case CODING_CATEGORY_IDX_ISO_7_TIGHT:
3836           /* We can skip all charactes at the tail except for ESC and
3837              the following 2-byte at the tail.  */
3838           if (eol_conversion)
3839             while (begp < endp
3840                    && (c = endp[-1]) < 0x80 && c != ISO_CODE_ESC && c != '\r')
3841               endp--;
3842           else
3843             while (begp < endp
3844                    && (c = endp[-1]) < 0x80 && c != ISO_CODE_ESC)
3845               endp--;
3846           if (begp < endp && endp[-1] == ISO_CODE_ESC)
3847             {
3848               if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
3849                 /* This is an ASCII designation sequence.  We can
3850                     surely skip the tail.  */
3851                 endp += 2;
3852               else
3853                 /* Hmmm, we can't skip the tail.  */
3854                 endp = endp_orig;
3855             }
3856         }
3857     }
3858   *beg += begp - begp_orig;
3859   *end += endp - endp_orig;
3860   return;
3861 }
3862
3863 /* Like shrink_decoding_region but for encoding.  */
3864
3865 static void
3866 shrink_encoding_region (beg, end, coding, str)
3867      int *beg, *end;
3868      struct coding_system *coding;
3869      unsigned char *str;
3870 {
3871   unsigned char *begp_orig, *begp, *endp_orig, *endp;
3872   int eol_conversion;
3873
3874   if (coding->type == coding_type_ccl)
3875     /* We can't skip any data.  */
3876     return;
3877   else if (coding->type == coding_type_no_conversion)
3878     {
3879       /* We need no conversion.  */
3880       *beg = *end;
3881       return;
3882     }
3883
3884   if (str)
3885     {
3886       begp_orig = begp = str + *beg;
3887       endp_orig = endp = str + *end;
3888     }
3889   else
3890     {
3891       begp_orig = begp = BYTE_POS_ADDR (*beg);
3892       endp_orig = endp = begp + *end - *beg;
3893     }
3894
3895   eol_conversion = (coding->eol_type == CODING_EOL_CR
3896                     || coding->eol_type == CODING_EOL_CRLF);
3897
3898   /* Here, we don't have to check coding->pre_write_conversion because
3899      the caller is expected to have handled it already.  */
3900   switch (coding->type)
3901     {
3902     case coding_type_undecided:
3903     case coding_type_emacs_mule:
3904     case coding_type_raw_text:
3905       if (eol_conversion)
3906         {
3907           while (begp < endp && *begp != '\n') begp++;
3908           while (begp < endp && endp[-1] != '\n') endp--;
3909         }
3910       else
3911         begp = endp;
3912       break;
3913
3914     case coding_type_iso2022:
3915       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
3916         {
3917           unsigned char *bol = begp;
3918           while (begp < endp && *begp < 0x80)
3919             {
3920               begp++;
3921               if (begp[-1] == '\n')
3922                 bol = begp;
3923             }
3924           begp = bol;
3925           goto label_skip_tail;
3926         }
3927       /* fall down ... */
3928
3929     default:
3930       /* We can skip all ASCII characters at the head and tail.  */
3931       if (eol_conversion)
3932         while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
3933       else
3934         while (begp < endp && *begp < 0x80) begp++;
3935     label_skip_tail:
3936       if (eol_conversion)
3937         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
3938       else
3939         while (begp < endp && *(endp - 1) < 0x80) endp--;
3940       break;
3941     }
3942
3943   *beg += begp - begp_orig;
3944   *end += endp - endp_orig;
3945   return;
3946 }
3947
3948 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
3949    text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
3950    coding system CODING, and return the status code of code conversion
3951    (currently, this value has no meaning).
3952
3953    How many characters (and bytes) are converted to how many
3954    characters (and bytes) are recorded in members of the structure
3955    CODING.
3956
3957    If REPLACE is nonzero, we do various things as if the original text
3958    is deleted and a new text is inserted.  See the comments in
3959    replace_range (insdel.c) to know what we are doing.  */
3960
3961 int
3962 code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
3963      int from, from_byte, to, to_byte, encodep, replace;
3964      struct coding_system *coding;
3965 {
3966   int len = to - from, len_byte = to_byte - from_byte;
3967   int require, inserted, inserted_byte;
3968   int head_skip, tail_skip, total_skip;
3969   Lisp_Object saved_coding_symbol = Qnil;
3970   int multibyte = !NILP (current_buffer->enable_multibyte_characters);
3971   int first = 1;
3972   int fake_multibyte = 0;
3973   unsigned char *src, *dst;
3974   Lisp_Object deletion = Qnil;
3975
3976   if (from < PT && PT < to)
3977     SET_PT_BOTH (from, from_byte);
3978
3979   if (replace)
3980     {
3981       int saved_from = from;
3982
3983       prepare_to_modify_buffer (from, to, &from);
3984       if (saved_from != from)
3985         {
3986           to = from + len;
3987           if (multibyte)
3988             from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
3989           else
3990             from_byte = from, to_byte = to;
3991           len_byte = to_byte - from_byte;
3992         }
3993     }
3994
3995   if (! encodep && CODING_REQUIRE_DETECTION (coding))
3996     {
3997       /* We must detect encoding of text and eol format.  */
3998
3999       if (from < GPT && to > GPT)
4000         move_gap_both (from, from_byte);
4001       if (coding->type == coding_type_undecided)
4002         {
4003           detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
4004           if (coding->type == coding_type_undecided)
4005             /* It seems that the text contains only ASCII, but we
4006                should not left it undecided because the deeper
4007                decoding routine (decode_coding) tries to detect the
4008                encodings again in vain.  */
4009             coding->type = coding_type_emacs_mule;
4010         }
4011       if (coding->eol_type == CODING_EOL_UNDECIDED)
4012         {
4013           saved_coding_symbol = coding->symbol;
4014           detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
4015           if (coding->eol_type == CODING_EOL_UNDECIDED)
4016             coding->eol_type = CODING_EOL_LF;
4017           /* We had better recover the original eol format if we
4018              encounter an inconsitent eol format while decoding.  */
4019           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4020         }
4021     }
4022
4023   coding->consumed_char = len, coding->consumed = len_byte;
4024
4025   if (encodep
4026       ? ! CODING_REQUIRE_ENCODING (coding)
4027       : ! CODING_REQUIRE_DECODING (coding))
4028     {
4029       coding->produced = len_byte;
4030       if (multibyte
4031           && ! replace
4032           /* See the comment of the member heading_ascii in coding.h.  */
4033           && coding->heading_ascii < len_byte)
4034         {
4035           /* We still may have to combine byte at the head and the
4036              tail of the text in the region.  */
4037           if (from < GPT && GPT < to)
4038             move_gap_both (to, to_byte);
4039           len = multibyte_chars_in_text (BYTE_POS_ADDR (from_byte), len_byte);
4040           adjust_after_insert (from, from_byte, to, to_byte, len);
4041           coding->produced_char = len;
4042         }
4043       else
4044         {
4045           if (!replace)
4046             adjust_after_insert (from, from_byte, to, to_byte, len_byte);
4047           coding->produced_char = len_byte;
4048         }
4049       return 0;
4050     }
4051
4052   /* Now we convert the text.  */
4053
4054   /* For encoding, we must process pre-write-conversion in advance.  */
4055   if (encodep
4056       && ! NILP (coding->pre_write_conversion)
4057       && SYMBOLP (coding->pre_write_conversion)
4058       && ! NILP (Ffboundp (coding->pre_write_conversion)))
4059     {
4060       /* The function in pre-write-conversion may put a new text in a
4061          new buffer.  */
4062       struct buffer *prev = current_buffer, *new;
4063
4064       call2 (coding->pre_write_conversion,
4065              make_number (from), make_number (to));
4066       if (current_buffer != prev)
4067         {
4068           len = ZV - BEGV;
4069           new = current_buffer;
4070           set_buffer_internal_1 (prev);
4071           del_range_2 (from, from_byte, to, to_byte);
4072           insert_from_buffer (new, BEG, len, 0);
4073           to = from + len;
4074           to_byte = multibyte ? CHAR_TO_BYTE (to) : to;
4075           len_byte = to_byte - from_byte;
4076         }
4077     }
4078
4079   if (replace)
4080     deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
4081
4082   /* Try to skip the heading and tailing ASCIIs.  */
4083   {
4084     int from_byte_orig = from_byte, to_byte_orig = to_byte;
4085
4086     if (from < GPT && GPT < to)
4087       move_gap_both (from, from_byte);
4088     if (encodep)
4089       shrink_encoding_region (&from_byte, &to_byte, coding, NULL);
4090     else
4091       shrink_decoding_region (&from_byte, &to_byte, coding, NULL);
4092     if (from_byte == to_byte)
4093       {
4094         coding->produced = len_byte;
4095         coding->produced_char = multibyte ? len : len_byte;
4096         if (!replace)
4097           /* We must record and adjust for this new text now.  */
4098           adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
4099         return 0;
4100       }
4101
4102     head_skip = from_byte - from_byte_orig;
4103     tail_skip = to_byte_orig - to_byte;
4104     total_skip = head_skip + tail_skip;
4105     from += head_skip;
4106     to -= tail_skip;
4107     len -= total_skip; len_byte -= total_skip;
4108   }
4109
4110   /* For converion, we must put the gap before the text in addition to
4111      making the gap larger for efficient decoding.  The required gap
4112      size starts from 2000 which is the magic number used in make_gap.
4113      But, after one batch of conversion, it will be incremented if we
4114      find that it is not enough .  */
4115   require = 2000;
4116
4117   if (GAP_SIZE  < require)
4118     make_gap (require - GAP_SIZE);
4119   move_gap_both (from, from_byte);
4120
4121   if (GPT - BEG < beg_unchanged)
4122     beg_unchanged = GPT - BEG;
4123   if (Z - GPT < end_unchanged)
4124     end_unchanged = Z - GPT;
4125
4126   inserted = inserted_byte = 0;
4127   src = GAP_END_ADDR, dst = GPT_ADDR;
4128
4129   GAP_SIZE += len_byte;
4130   ZV -= len;
4131   Z -= len;
4132   ZV_BYTE -= len_byte;
4133   Z_BYTE -= len_byte;
4134
4135   for (;;)
4136     {
4137       int result;
4138
4139       /* The buffer memory is changed from:
4140          +--------+converted-text+---------+-------original-text------+---+
4141          |<-from->|<--inserted-->|---------|<-----------len---------->|---|
4142                   |<------------------- GAP_SIZE -------------------->|  */
4143       if (encodep)
4144         result = encode_coding (coding, src, dst, len_byte, 0);
4145       else
4146         result = decode_coding (coding, src, dst, len_byte, 0);
4147       /* to:
4148          +--------+-------converted-text--------+--+---original-text--+---+
4149          |<-from->|<--inserted-->|<--produced-->|--|<-(len-consumed)->|---|
4150                   |<------------------- GAP_SIZE -------------------->|  */
4151       if (coding->fake_multibyte)
4152         fake_multibyte = 1;
4153
4154       if (!encodep && !multibyte)
4155         coding->produced_char = coding->produced;
4156       inserted += coding->produced_char;
4157       inserted_byte += coding->produced;
4158       len_byte -= coding->consumed;
4159       src += coding->consumed;
4160       dst += inserted_byte;
4161
4162       if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
4163         {
4164           unsigned char *pend = dst, *p = pend - inserted_byte;
4165
4166           /* Encode LFs back to the original eol format (CR or CRLF).  */
4167           if (coding->eol_type == CODING_EOL_CR)
4168             {
4169               while (p < pend) if (*p++ == '\n') p[-1] = '\r';
4170             }
4171           else
4172             {
4173               int count = 0;
4174
4175               while (p < pend) if (*p++ == '\n') count++;
4176               if (src - dst < count)
4177                 {
4178                   /* We don't have sufficient room for putting LFs
4179                      back to CRLF.  We must record converted and
4180                      not-yet-converted text back to the buffer
4181                      content, enlarge the gap, then record them out of
4182                      the buffer contents again.  */
4183                   int add = len_byte + inserted_byte;
4184
4185                   GAP_SIZE -= add;
4186                   ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
4187                   GPT += inserted_byte; GPT_BYTE += inserted_byte;
4188                   make_gap (count - GAP_SIZE);
4189                   GAP_SIZE += add;
4190                   ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
4191                   GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
4192                   /* Don't forget to update SRC, DST, and PEND.  */
4193                   src = GAP_END_ADDR - len_byte;
4194                   dst = GPT_ADDR + inserted_byte;
4195                   pend = dst;
4196                 }
4197               inserted += count;
4198               inserted_byte += count;
4199               coding->produced += count;
4200               p = dst = pend + count;
4201               while (count)
4202                 {
4203                   *--p = *--pend;
4204                   if (*p == '\n') count--, *--p = '\r';
4205                 }
4206             }
4207
4208           /* Suppress eol-format conversion in the further conversion.  */
4209           coding->eol_type = CODING_EOL_LF;
4210
4211           /* Restore the original symbol.  */
4212           coding->symbol = saved_coding_symbol;
4213
4214           continue;
4215         }
4216       if (len_byte <= 0)
4217         break;
4218       if (result == CODING_FINISH_INSUFFICIENT_SRC)
4219         {
4220           /* The source text ends in invalid codes.  Let's just
4221              make them valid buffer contents, and finish conversion.  */
4222           inserted += len_byte;
4223           inserted_byte += len_byte;
4224           while (len_byte--)
4225             *src++ = *dst++;
4226           fake_multibyte = 1;
4227           break;
4228         }
4229       if (first)
4230         {
4231           /* We have just done the first batch of conversion which was
4232              stoped because of insufficient gap.  Let's reconsider the
4233              required gap size (i.e. SRT - DST) now.
4234
4235              We have converted ORIG bytes (== coding->consumed) into
4236              NEW bytes (coding->produced).  To convert the remaining
4237              LEN bytes, we may need REQUIRE bytes of gap, where:
4238                 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
4239                 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
4240              Here, we are sure that NEW >= ORIG.  */
4241           float ratio = coding->produced - coding->consumed;
4242           ratio /= coding->consumed;
4243           require = len_byte * ratio;
4244           first = 0;
4245         }
4246       if ((src - dst) < (require + 2000))
4247         {
4248           /* See the comment above the previous call of make_gap.  */
4249           int add = len_byte + inserted_byte;
4250
4251           GAP_SIZE -= add;
4252           ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
4253           GPT += inserted_byte; GPT_BYTE += inserted_byte;
4254           make_gap (require + 2000);
4255           GAP_SIZE += add;
4256           ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
4257           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
4258           /* Don't forget to update SRC, DST.  */
4259           src = GAP_END_ADDR - len_byte;
4260           dst = GPT_ADDR + inserted_byte;
4261         }
4262     }
4263   if (src - dst > 0) *dst = 0; /* Put an anchor.  */
4264
4265   if (multibyte
4266       && (fake_multibyte
4267           || !encodep && (to - from) != (to_byte - from_byte)))
4268     inserted = multibyte_chars_in_text (GPT_ADDR, inserted_byte);
4269
4270   /* If we have shrinked the conversion area, adjust it now.  */
4271   if (total_skip > 0)
4272     {
4273       if (tail_skip > 0)
4274         safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
4275       inserted += total_skip; inserted_byte += total_skip;
4276       GAP_SIZE += total_skip;
4277       GPT -= head_skip; GPT_BYTE -= head_skip;
4278       ZV -= total_skip; ZV_BYTE -= total_skip;
4279       Z -= total_skip; Z_BYTE -= total_skip;
4280       from -= head_skip; from_byte -= head_skip;
4281       to += tail_skip; to_byte += tail_skip;
4282     }
4283
4284   adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
4285
4286   if (! encodep && ! NILP (coding->post_read_conversion))
4287     {
4288       Lisp_Object val;
4289       int orig_inserted = inserted, pos = PT;
4290
4291       if (from != pos)
4292         temp_set_point_both (current_buffer, from, from_byte);
4293       val = call1 (coding->post_read_conversion, make_number (inserted));
4294       if (! NILP (val))
4295         {
4296           CHECK_NUMBER (val, 0);
4297           inserted = XFASTINT (val);
4298         }
4299       if (pos >= from + orig_inserted)
4300         temp_set_point (current_buffer, pos + (inserted - orig_inserted));
4301     }
4302
4303   signal_after_change (from, to - from, inserted);
4304
4305   {
4306     coding->consumed = to_byte - from_byte;
4307     coding->consumed_char = to - from;
4308     coding->produced = inserted_byte;
4309     coding->produced_char = inserted;
4310   }
4311
4312   return 0;
4313 }
4314
4315 Lisp_Object
4316 code_convert_string (str, coding, encodep, nocopy)
4317      Lisp_Object str;
4318      struct coding_system *coding;
4319      int encodep, nocopy;
4320 {
4321   int len;
4322   char *buf;
4323   int from = 0, to = XSTRING (str)->size;
4324   int to_byte = STRING_BYTES (XSTRING (str));
4325   struct gcpro gcpro1;
4326   Lisp_Object saved_coding_symbol = Qnil;
4327   int result;
4328
4329   if (encodep && !NILP (coding->pre_write_conversion)
4330       || !encodep && !NILP (coding->post_read_conversion))
4331     {
4332       /* Since we have to call Lisp functions which assume target text
4333          is in a buffer, after setting a temporary buffer, call
4334          code_convert_region.  */
4335       int count = specpdl_ptr - specpdl;
4336       struct buffer *prev = current_buffer;
4337
4338       record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
4339       temp_output_buffer_setup (" *code-converting-work*");
4340       set_buffer_internal (XBUFFER (Vstandard_output));
4341       if (encodep)
4342         insert_from_string (str, 0, 0, to, to_byte, 0);
4343       else
4344         {
4345           /* We must insert the contents of STR as is without
4346              unibyte<->multibyte conversion.  */
4347           current_buffer->enable_multibyte_characters = Qnil;
4348           insert_from_string (str, 0, 0, to_byte, to_byte, 0);
4349           current_buffer->enable_multibyte_characters = Qt;
4350         }
4351       code_convert_region (BEGV, BEGV_BYTE, ZV, ZV_BYTE, coding, encodep, 1);
4352       if (encodep)
4353         /* We must return the buffer contents as unibyte string.  */
4354         current_buffer->enable_multibyte_characters = Qnil;
4355       str = make_buffer_string (BEGV, ZV, 0);
4356       set_buffer_internal (prev);
4357       return unbind_to (count, str);
4358     }
4359
4360   if (! encodep && CODING_REQUIRE_DETECTION (coding))
4361     {
4362       /* See the comments in code_convert_region.  */
4363       if (coding->type == coding_type_undecided)
4364         {
4365           detect_coding (coding, XSTRING (str)->data, to_byte);
4366           if (coding->type == coding_type_undecided)
4367             coding->type = coding_type_emacs_mule;
4368         }
4369       if (coding->eol_type == CODING_EOL_UNDECIDED)
4370         {
4371           saved_coding_symbol = coding->symbol;
4372           detect_eol (coding, XSTRING (str)->data, to_byte);
4373           if (coding->eol_type == CODING_EOL_UNDECIDED)
4374             coding->eol_type = CODING_EOL_LF;
4375           /* We had better recover the original eol format if we
4376              encounter an inconsitent eol format while decoding.  */
4377           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4378         }
4379     }
4380
4381   if (encodep
4382       ? ! CODING_REQUIRE_ENCODING (coding)
4383       : ! CODING_REQUIRE_DECODING (coding))
4384     from = to_byte;
4385   else
4386     {
4387       /* Try to skip the heading and tailing ASCIIs.  */
4388       if (encodep)
4389         shrink_encoding_region (&from, &to_byte, coding, XSTRING (str)->data);
4390       else
4391         shrink_decoding_region (&from, &to_byte, coding, XSTRING (str)->data);
4392     }
4393   if (from == to_byte)
4394     return (nocopy ? str : Fcopy_sequence (str));
4395
4396   if (encodep)
4397     len = encoding_buffer_size (coding, to_byte - from);
4398   else
4399     len = decoding_buffer_size (coding, to_byte - from);
4400   len += from + STRING_BYTES (XSTRING (str)) - to_byte;
4401   GCPRO1 (str);
4402   buf = get_conversion_buffer (len);
4403   UNGCPRO;
4404
4405   if (from > 0)
4406     bcopy (XSTRING (str)->data, buf, from);
4407   result = (encodep
4408             ? encode_coding (coding, XSTRING (str)->data + from,
4409                              buf + from, to_byte - from, len)
4410             : decode_coding (coding, XSTRING (str)->data + from,
4411                              buf + from, to_byte - from, len));
4412   if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
4413     {
4414       /* We simple try to decode the whole string again but without
4415          eol-conversion this time.  */
4416       coding->eol_type = CODING_EOL_LF;
4417       coding->symbol = saved_coding_symbol;
4418       return code_convert_string (str, coding, encodep, nocopy);
4419     }
4420
4421   bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced,
4422          STRING_BYTES (XSTRING (str)) - to_byte);
4423
4424   len = from + STRING_BYTES (XSTRING (str)) - to_byte;
4425   if (encodep)
4426     str = make_unibyte_string (buf, len + coding->produced);
4427   else
4428     str = make_string_from_bytes (buf, len + coding->produced_char,
4429                                   len + coding->produced);
4430   return str;
4431 }
4432
4433 \f
4434 #ifdef emacs
4435 /*** 7. Emacs Lisp library functions ***/
4436
4437 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
4438   "Return t if OBJECT is nil or a coding-system.\n\
4439 See the documentation of `make-coding-system' for information\n\
4440 about coding-system objects.")
4441   (obj)
4442      Lisp_Object obj;
4443 {
4444   if (NILP (obj))
4445     return Qt;
4446   if (!SYMBOLP (obj))
4447     return Qnil;
4448   /* Get coding-spec vector for OBJ.  */
4449   obj = Fget (obj, Qcoding_system);
4450   return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
4451           ? Qt : Qnil);
4452 }
4453
4454 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
4455        Sread_non_nil_coding_system, 1, 1, 0,
4456   "Read a coding system from the minibuffer, prompting with string PROMPT.")
4457   (prompt)
4458      Lisp_Object prompt;
4459 {
4460   Lisp_Object val;
4461   do
4462     {
4463       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
4464                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
4465     }
4466   while (XSTRING (val)->size == 0);
4467   return (Fintern (val, Qnil));
4468 }
4469
4470 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
4471   "Read a coding system from the minibuffer, prompting with string PROMPT.\n\
4472 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.")
4473   (prompt, default_coding_system)
4474      Lisp_Object prompt, default_coding_system;
4475 {
4476   Lisp_Object val;
4477   if (SYMBOLP (default_coding_system))
4478     XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
4479   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
4480                           Qt, Qnil, Qcoding_system_history,
4481                           default_coding_system, Qnil);
4482   return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
4483 }
4484
4485 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
4486        1, 1, 0,
4487   "Check validity of CODING-SYSTEM.\n\
4488 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.\n\
4489 It is valid if it is a symbol with a non-nil `coding-system' property.\n\
4490 The value of property should be a vector of length 5.")
4491   (coding_system)
4492      Lisp_Object coding_system;
4493 {
4494   CHECK_SYMBOL (coding_system, 0);
4495   if (!NILP (Fcoding_system_p (coding_system)))
4496     return coding_system;
4497   while (1)
4498     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
4499 }
4500 \f
4501 Lisp_Object
4502 detect_coding_system (src, src_bytes, highest)
4503      unsigned char *src;
4504      int src_bytes, highest;
4505 {
4506   int coding_mask, eol_type;
4507   Lisp_Object val, tmp;
4508   int dummy;
4509
4510   coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy);
4511   eol_type  = detect_eol_type (src, src_bytes, &dummy);
4512   if (eol_type == CODING_EOL_INCONSISTENT)
4513     eol_type == CODING_EOL_UNDECIDED;
4514
4515   if (!coding_mask)
4516     {
4517       val = Qundecided;
4518       if (eol_type != CODING_EOL_UNDECIDED)
4519         {
4520           Lisp_Object val2;
4521           val2 = Fget (Qundecided, Qeol_type);
4522           if (VECTORP (val2))
4523             val = XVECTOR (val2)->contents[eol_type];
4524         }
4525       return val;
4526     }
4527
4528   /* At first, gather possible coding systems in VAL.  */
4529   val = Qnil;
4530   for (tmp = Vcoding_category_list; !NILP (tmp); tmp = XCONS (tmp)->cdr)
4531     {
4532       int idx
4533         = XFASTINT (Fget (XCONS (tmp)->car, Qcoding_category_index));
4534       if (coding_mask & (1 << idx))
4535         {
4536           val = Fcons (Fsymbol_value (XCONS (tmp)->car), val);
4537           if (highest)
4538             break;
4539         }
4540     }
4541   if (!highest)
4542     val = Fnreverse (val);
4543
4544   /* Then, substitute the elements by subsidiary coding systems.  */
4545   for (tmp = val; !NILP (tmp); tmp = XCONS (tmp)->cdr)
4546     {
4547       if (eol_type != CODING_EOL_UNDECIDED)
4548         {
4549           Lisp_Object eol;
4550           eol = Fget (XCONS (tmp)->car, Qeol_type);
4551           if (VECTORP (eol))
4552             XCONS (tmp)->car = XVECTOR (eol)->contents[eol_type];
4553         }
4554     }
4555   return (highest ? XCONS (val)->car : val);
4556 }
4557
4558 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
4559        2, 3, 0,
4560   "Detect coding system of the text in the region between START and END.\n\
4561 Return a list of possible coding systems ordered by priority.\n\
4562 \n\
4563 If only ASCII characters are found, it returns `undecided'\n\
4564 or its subsidiary coding system according to a detected end-of-line format.\n\
4565 \n\
4566 If optional argument HIGHEST is non-nil, return the coding system of\n\
4567 highest priority.")
4568   (start, end, highest)
4569      Lisp_Object start, end, highest;
4570 {
4571   int from, to;
4572   int from_byte, to_byte;
4573
4574   CHECK_NUMBER_COERCE_MARKER (start, 0);
4575   CHECK_NUMBER_COERCE_MARKER (end, 1);
4576
4577   validate_region (&start, &end);
4578   from = XINT (start), to = XINT (end);
4579   from_byte = CHAR_TO_BYTE (from);
4580   to_byte = CHAR_TO_BYTE (to);
4581
4582   if (from < GPT && to >= GPT)
4583     move_gap_both (to, to_byte);
4584
4585   return detect_coding_system (BYTE_POS_ADDR (from_byte),
4586                                to_byte - from_byte,
4587                                !NILP (highest));
4588 }
4589
4590 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
4591        1, 2, 0,
4592   "Detect coding system of the text in STRING.\n\
4593 Return a list of possible coding systems ordered by priority.\n\
4594 \n\
4595 If only ASCII characters are found, it returns `undecided'\n\
4596 or its subsidiary coding system according to a detected end-of-line format.\n\
4597 \n\
4598 If optional argument HIGHEST is non-nil, return the coding system of\n\
4599 highest priority.")
4600   (string, highest)
4601      Lisp_Object string, highest;
4602 {
4603   CHECK_STRING (string, 0);
4604
4605   return detect_coding_system (XSTRING (string)->data,
4606                                STRING_BYTES (XSTRING (string)),
4607                                !NILP (highest));
4608 }
4609
4610 Lisp_Object
4611 code_convert_region1 (start, end, coding_system, encodep)
4612      Lisp_Object start, end, coding_system;
4613      int encodep;
4614 {
4615   struct coding_system coding;
4616   int from, to, len;
4617
4618   CHECK_NUMBER_COERCE_MARKER (start, 0);
4619   CHECK_NUMBER_COERCE_MARKER (end, 1);
4620   CHECK_SYMBOL (coding_system, 2);
4621
4622   validate_region (&start, &end);
4623   from = XFASTINT (start);
4624   to = XFASTINT (end);
4625
4626   if (NILP (coding_system))
4627     return make_number (to - from);
4628
4629   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
4630     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
4631
4632   coding.mode |= CODING_MODE_LAST_BLOCK;
4633   code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
4634                        &coding, encodep, 1);
4635   return make_number (coding.produced_char);
4636 }
4637
4638 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
4639        3, 3, "r\nzCoding system: ",
4640   "Decode the current region by specified coding system.\n\
4641 When called from a program, takes three arguments:\n\
4642 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
4643 Return length of decoded text.")
4644   (start, end, coding_system)
4645      Lisp_Object start, end, coding_system;
4646 {
4647   return code_convert_region1 (start, end, coding_system, 0);
4648 }
4649
4650 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
4651        3, 3, "r\nzCoding system: ",
4652   "Encode the current region by specified coding system.\n\
4653 When called from a program, takes three arguments:\n\
4654 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
4655 Return length of encoded text.")
4656   (start, end, coding_system)
4657      Lisp_Object start, end, coding_system;
4658 {
4659   return code_convert_region1 (start, end, coding_system, 1);
4660 }
4661
4662 Lisp_Object
4663 code_convert_string1 (string, coding_system, nocopy, encodep)
4664      Lisp_Object string, coding_system, nocopy;
4665      int encodep;
4666 {
4667   struct coding_system coding;
4668
4669   CHECK_STRING (string, 0);
4670   CHECK_SYMBOL (coding_system, 1);
4671
4672   if (NILP (coding_system))
4673     return (NILP (nocopy) ? Fcopy_sequence (string) : string);
4674
4675   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
4676     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
4677
4678   coding.mode |= CODING_MODE_LAST_BLOCK;
4679   return code_convert_string (string, &coding, encodep, !NILP (nocopy));
4680 }
4681
4682 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
4683        2, 3, 0,
4684   "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
4685 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
4686 if the decoding operation is trivial.")
4687   (string, coding_system, nocopy)
4688      Lisp_Object string, coding_system, nocopy;
4689 {
4690   return code_convert_string1(string, coding_system, nocopy, 0);
4691 }
4692
4693 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
4694        2, 3, 0,
4695   "Encode STRING to CODING-SYSTEM, and return the result.\n\
4696 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
4697 if the encoding operation is trivial.")
4698   (string, coding_system, nocopy)
4699      Lisp_Object string, coding_system, nocopy;
4700 {
4701   return code_convert_string1(string, coding_system, nocopy, 1);
4702 }
4703
4704 \f
4705 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
4706   "Decode a JISX0208 character of shift-jis encoding.\n\
4707 CODE is the character code in SJIS.\n\
4708 Return the corresponding character.")
4709   (code)
4710      Lisp_Object code;
4711 {
4712   unsigned char c1, c2, s1, s2;
4713   Lisp_Object val;
4714
4715   CHECK_NUMBER (code, 0);
4716   s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
4717   DECODE_SJIS (s1, s2, c1, c2);
4718   XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset_jisx0208, c1, c2));
4719   return val;
4720 }
4721
4722 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
4723   "Encode a JISX0208 character CHAR to SJIS coding system.\n\
4724 Return the corresponding character code in SJIS.")
4725   (ch)
4726      Lisp_Object ch;
4727 {
4728   int charset, c1, c2, s1, s2;
4729   Lisp_Object val;
4730
4731   CHECK_NUMBER (ch, 0);
4732   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
4733   if (charset == charset_jisx0208)
4734     {
4735       ENCODE_SJIS (c1, c2, s1, s2);
4736       XSETFASTINT (val, (s1 << 8) | s2);
4737     }
4738   else
4739     XSETFASTINT (val, 0);
4740   return val;
4741 }
4742
4743 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
4744   "Decode a Big5 character CODE of BIG5 coding system.\n\
4745 CODE is the character code in BIG5.\n\
4746 Return the corresponding character.")
4747   (code)
4748      Lisp_Object code;
4749 {
4750   int charset;
4751   unsigned char b1, b2, c1, c2;
4752   Lisp_Object val;
4753
4754   CHECK_NUMBER (code, 0);
4755   b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
4756   DECODE_BIG5 (b1, b2, charset, c1, c2);
4757   XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset, c1, c2));
4758   return val;
4759 }
4760
4761 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
4762   "Encode the Big5 character CHAR to BIG5 coding system.\n\
4763 Return the corresponding character code in Big5.")
4764   (ch)
4765      Lisp_Object ch;
4766 {
4767   int charset, c1, c2, b1, b2;
4768   Lisp_Object val;
4769
4770   CHECK_NUMBER (ch, 0);
4771   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
4772   if (charset == charset_big5_1 || charset == charset_big5_2)
4773     {
4774       ENCODE_BIG5 (charset, c1, c2, b1, b2);
4775       XSETFASTINT (val, (b1 << 8) | b2);
4776     }
4777   else
4778     XSETFASTINT (val, 0);
4779   return val;
4780 }
4781 \f
4782 DEFUN ("set-terminal-coding-system-internal",
4783        Fset_terminal_coding_system_internal,
4784        Sset_terminal_coding_system_internal, 1, 1, 0, "")
4785   (coding_system)
4786      Lisp_Object coding_system;
4787 {
4788   CHECK_SYMBOL (coding_system, 0);
4789   setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
4790   /* We had better not send unsafe characters to terminal.  */
4791   terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
4792
4793   return Qnil;
4794 }
4795
4796 DEFUN ("set-safe-terminal-coding-system-internal",
4797        Fset_safe_terminal_coding_system_internal,
4798        Sset_safe_terminal_coding_system_internal, 1, 1, 0, "")
4799   (coding_system)
4800      Lisp_Object coding_system;
4801 {
4802   CHECK_SYMBOL (coding_system, 0);
4803   setup_coding_system (Fcheck_coding_system (coding_system),
4804                        &safe_terminal_coding);
4805   return Qnil;
4806 }
4807
4808 DEFUN ("terminal-coding-system",
4809        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
4810   "Return coding system specified for terminal output.")
4811   ()
4812 {
4813   return terminal_coding.symbol;
4814 }
4815
4816 DEFUN ("set-keyboard-coding-system-internal",
4817        Fset_keyboard_coding_system_internal,
4818        Sset_keyboard_coding_system_internal, 1, 1, 0, "")
4819   (coding_system)
4820      Lisp_Object coding_system;
4821 {
4822   CHECK_SYMBOL (coding_system, 0);
4823   setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
4824   return Qnil;
4825 }
4826
4827 DEFUN ("keyboard-coding-system",
4828        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
4829   "Return coding system specified for decoding keyboard input.")
4830   ()
4831 {
4832   return keyboard_coding.symbol;
4833 }
4834
4835 \f
4836 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
4837        Sfind_operation_coding_system,  1, MANY, 0,
4838   "Choose a coding system for an operation based on the target name.\n\
4839 The value names a pair of coding systems: (DECODING-SYSTEM ENCODING-SYSTEM).\n\
4840 DECODING-SYSTEM is the coding system to use for decoding\n\
4841 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system\n\
4842 for encoding (in case OPERATION does encoding).\n\
4843 \n\
4844 The first argument OPERATION specifies an I/O primitive:\n\
4845   For file I/O, `insert-file-contents' or `write-region'.\n\
4846   For process I/O, `call-process', `call-process-region', or `start-process'.\n\
4847   For network I/O, `open-network-stream'.\n\
4848 \n\
4849 The remaining arguments should be the same arguments that were passed\n\
4850 to the primitive.  Depending on which primitive, one of those arguments\n\
4851 is selected as the TARGET.  For example, if OPERATION does file I/O,\n\
4852 whichever argument specifies the file name is TARGET.\n\
4853 \n\
4854 TARGET has a meaning which depends on OPERATION:\n\
4855   For file I/O, TARGET is a file name.\n\
4856   For process I/O, TARGET is a process name.\n\
4857   For network I/O, TARGET is a service name or a port number\n\
4858 \n\
4859 This function looks up what specified for TARGET in,\n\
4860 `file-coding-system-alist', `process-coding-system-alist',\n\
4861 or `network-coding-system-alist' depending on OPERATION.\n\
4862 They may specify a coding system, a cons of coding systems,\n\
4863 or a function symbol to call.\n\
4864 In the last case, we call the function with one argument,\n\
4865 which is a list of all the arguments given to this function.")
4866   (nargs, args)
4867      int nargs;
4868      Lisp_Object *args;
4869 {
4870   Lisp_Object operation, target_idx, target, val;
4871   register Lisp_Object chain;
4872
4873   if (nargs < 2)
4874     error ("Too few arguments");
4875   operation = args[0];
4876   if (!SYMBOLP (operation)
4877       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
4878     error ("Invalid first arguement");
4879   if (nargs < 1 + XINT (target_idx))
4880     error ("Too few arguments for operation: %s",
4881            XSYMBOL (operation)->name->data);
4882   target = args[XINT (target_idx) + 1];
4883   if (!(STRINGP (target)
4884         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
4885     error ("Invalid %dth argument", XINT (target_idx) + 1);
4886
4887   chain = ((EQ (operation, Qinsert_file_contents)
4888             || EQ (operation, Qwrite_region))
4889            ? Vfile_coding_system_alist
4890            : (EQ (operation, Qopen_network_stream)
4891               ? Vnetwork_coding_system_alist
4892               : Vprocess_coding_system_alist));
4893   if (NILP (chain))
4894     return Qnil;
4895
4896   for (; CONSP (chain); chain = XCONS (chain)->cdr)
4897     {
4898       Lisp_Object elt;
4899       elt = XCONS (chain)->car;
4900
4901       if (CONSP (elt)
4902           && ((STRINGP (target)
4903                && STRINGP (XCONS (elt)->car)
4904                && fast_string_match (XCONS (elt)->car, target) >= 0)
4905               || (INTEGERP (target) && EQ (target, XCONS (elt)->car))))
4906         {
4907           val = XCONS (elt)->cdr;
4908           /* Here, if VAL is both a valid coding system and a valid
4909              function symbol, we return VAL as a coding system.  */
4910           if (CONSP (val))
4911             return val;
4912           if (! SYMBOLP (val))
4913             return Qnil;
4914           if (! NILP (Fcoding_system_p (val)))
4915             return Fcons (val, val);
4916           if (! NILP (Ffboundp (val)))
4917             {
4918               val = call1 (val, Flist (nargs, args));
4919               if (CONSP (val))
4920                 return val;
4921               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
4922                 return Fcons (val, val);
4923             }
4924           return Qnil;
4925         }
4926     }
4927   return Qnil;
4928 }
4929
4930 DEFUN ("update-iso-coding-systems", Fupdate_iso_coding_systems,
4931        Supdate_iso_coding_systems, 0, 0, 0,
4932   "Update internal database for ISO2022 based coding systems.\n\
4933 When values of the following coding categories are changed, you must\n\
4934 call this function:\n\
4935   coding-category-iso-7, coding-category-iso-7-tight,\n\
4936   coding-category-iso-8-1, coding-category-iso-8-2,\n\
4937   coding-category-iso-7-else, coding-category-iso-8-else")
4938   ()
4939 {
4940   int i;
4941
4942   for (i = CODING_CATEGORY_IDX_ISO_7; i <= CODING_CATEGORY_IDX_ISO_8_ELSE;
4943        i++)
4944     {
4945       if (! coding_system_table[i])
4946         coding_system_table[i]
4947           = (struct coding_system *) xmalloc (sizeof (struct coding_system));
4948       setup_coding_system
4949         (XSYMBOL (XVECTOR (Vcoding_category_table)->contents[i])->value,
4950          coding_system_table[i]);
4951     }
4952   return Qnil;
4953 }
4954
4955 #endif /* emacs */
4956
4957 \f
4958 /*** 8. Post-amble ***/
4959
4960 void
4961 init_coding_once ()
4962 {
4963   int i;
4964
4965   /* Emacs' internal format specific initialize routine.  */
4966   for (i = 0; i <= 0x20; i++)
4967     emacs_code_class[i] = EMACS_control_code;
4968   emacs_code_class[0x0A] = EMACS_linefeed_code;
4969   emacs_code_class[0x0D] = EMACS_carriage_return_code;
4970   for (i = 0x21 ; i < 0x7F; i++)
4971     emacs_code_class[i] = EMACS_ascii_code;
4972   emacs_code_class[0x7F] = EMACS_control_code;
4973   emacs_code_class[0x80] = EMACS_leading_code_composition;
4974   for (i = 0x81; i < 0xFF; i++)
4975     emacs_code_class[i] = EMACS_invalid_code;
4976   emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
4977   emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
4978   emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
4979   emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
4980
4981   /* ISO2022 specific initialize routine.  */
4982   for (i = 0; i < 0x20; i++)
4983     iso_code_class[i] = ISO_control_code;
4984   for (i = 0x21; i < 0x7F; i++)
4985     iso_code_class[i] = ISO_graphic_plane_0;
4986   for (i = 0x80; i < 0xA0; i++)
4987     iso_code_class[i] = ISO_control_code;
4988   for (i = 0xA1; i < 0xFF; i++)
4989     iso_code_class[i] = ISO_graphic_plane_1;
4990   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
4991   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
4992   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
4993   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
4994   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
4995   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
4996   iso_code_class[ISO_CODE_ESC] = ISO_escape;
4997   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
4998   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
4999   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
5000
5001   conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE;
5002   conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE);
5003
5004   setup_coding_system (Qnil, &keyboard_coding);
5005   setup_coding_system (Qnil, &terminal_coding);
5006   setup_coding_system (Qnil, &safe_terminal_coding);
5007
5008   bzero (coding_system_table, sizeof coding_system_table);
5009
5010 #if defined (MSDOS) || defined (WINDOWSNT)
5011   system_eol_type = CODING_EOL_CRLF;
5012 #else
5013   system_eol_type = CODING_EOL_LF;
5014 #endif
5015 }
5016
5017 #ifdef emacs
5018
5019 void
5020 syms_of_coding ()
5021 {
5022   Qtarget_idx = intern ("target-idx");
5023   staticpro (&Qtarget_idx);
5024
5025   Qcoding_system_history = intern ("coding-system-history");
5026   staticpro (&Qcoding_system_history);
5027   Fset (Qcoding_system_history, Qnil);
5028
5029   /* Target FILENAME is the first argument.  */
5030   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
5031   /* Target FILENAME is the third argument.  */
5032   Fput (Qwrite_region, Qtarget_idx, make_number (2));
5033
5034   Qcall_process = intern ("call-process");
5035   staticpro (&Qcall_process);
5036   /* Target PROGRAM is the first argument.  */
5037   Fput (Qcall_process, Qtarget_idx, make_number (0));
5038
5039   Qcall_process_region = intern ("call-process-region");
5040   staticpro (&Qcall_process_region);
5041   /* Target PROGRAM is the third argument.  */
5042   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
5043
5044   Qstart_process = intern ("start-process");
5045   staticpro (&Qstart_process);
5046   /* Target PROGRAM is the third argument.  */
5047   Fput (Qstart_process, Qtarget_idx, make_number (2));
5048
5049   Qopen_network_stream = intern ("open-network-stream");
5050   staticpro (&Qopen_network_stream);
5051   /* Target SERVICE is the fourth argument.  */
5052   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
5053
5054   Qcoding_system = intern ("coding-system");
5055   staticpro (&Qcoding_system);
5056
5057   Qeol_type = intern ("eol-type");
5058   staticpro (&Qeol_type);
5059
5060   Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
5061   staticpro (&Qbuffer_file_coding_system);
5062
5063   Qpost_read_conversion = intern ("post-read-conversion");
5064   staticpro (&Qpost_read_conversion);
5065
5066   Qpre_write_conversion = intern ("pre-write-conversion");
5067   staticpro (&Qpre_write_conversion);
5068
5069   Qno_conversion = intern ("no-conversion");
5070   staticpro (&Qno_conversion);
5071
5072   Qundecided = intern ("undecided");
5073   staticpro (&Qundecided);
5074
5075   Qcoding_system_p = intern ("coding-system-p");
5076   staticpro (&Qcoding_system_p);
5077
5078   Qcoding_system_error = intern ("coding-system-error");
5079   staticpro (&Qcoding_system_error);
5080
5081   Fput (Qcoding_system_error, Qerror_conditions,
5082         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
5083   Fput (Qcoding_system_error, Qerror_message,
5084         build_string ("Invalid coding system"));
5085
5086   Qcoding_category = intern ("coding-category");
5087   staticpro (&Qcoding_category);
5088   Qcoding_category_index = intern ("coding-category-index");
5089   staticpro (&Qcoding_category_index);
5090
5091   Vcoding_category_table
5092     = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
5093   staticpro (&Vcoding_category_table);
5094   {
5095     int i;
5096     for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
5097       {
5098         XVECTOR (Vcoding_category_table)->contents[i]
5099           = intern (coding_category_name[i]);
5100         Fput (XVECTOR (Vcoding_category_table)->contents[i],
5101               Qcoding_category_index, make_number (i));
5102       }
5103   }
5104
5105   Qcharacter_unification_table = intern ("character-unification-table");
5106   staticpro (&Qcharacter_unification_table);
5107   Fput (Qcharacter_unification_table, Qchar_table_extra_slots,
5108         make_number (0));
5109
5110   Qcharacter_unification_table_for_decode
5111     = intern ("character-unification-table-for-decode");
5112   staticpro (&Qcharacter_unification_table_for_decode);
5113
5114   Qcharacter_unification_table_for_encode
5115     = intern ("character-unification-table-for-encode");
5116   staticpro (&Qcharacter_unification_table_for_encode);
5117
5118   Qsafe_charsets = intern ("safe-charsets");
5119   staticpro (&Qsafe_charsets);
5120
5121   Qemacs_mule = intern ("emacs-mule");
5122   staticpro (&Qemacs_mule);
5123
5124   Qraw_text = intern ("raw-text");
5125   staticpro (&Qraw_text);
5126
5127   defsubr (&Scoding_system_p);
5128   defsubr (&Sread_coding_system);
5129   defsubr (&Sread_non_nil_coding_system);
5130   defsubr (&Scheck_coding_system);
5131   defsubr (&Sdetect_coding_region);
5132   defsubr (&Sdetect_coding_string);
5133   defsubr (&Sdecode_coding_region);
5134   defsubr (&Sencode_coding_region);
5135   defsubr (&Sdecode_coding_string);
5136   defsubr (&Sencode_coding_string);
5137   defsubr (&Sdecode_sjis_char);
5138   defsubr (&Sencode_sjis_char);
5139   defsubr (&Sdecode_big5_char);
5140   defsubr (&Sencode_big5_char);
5141   defsubr (&Sset_terminal_coding_system_internal);
5142   defsubr (&Sset_safe_terminal_coding_system_internal);
5143   defsubr (&Sterminal_coding_system);
5144   defsubr (&Sset_keyboard_coding_system_internal);
5145   defsubr (&Skeyboard_coding_system);
5146   defsubr (&Sfind_operation_coding_system);
5147   defsubr (&Supdate_iso_coding_systems);
5148
5149   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
5150     "List of coding systems.\n\
5151 \n\
5152 Do not alter the value of this variable manually.  This variable should be\n\
5153 updated by the functions `make-coding-system' and\n\
5154 `define-coding-system-alias'.");
5155   Vcoding_system_list = Qnil;
5156
5157   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
5158     "Alist of coding system names.\n\
5159 Each element is one element list of coding system name.\n\
5160 This variable is given to `completing-read' as TABLE argument.\n\
5161 \n\
5162 Do not alter the value of this variable manually.  This variable should be\n\
5163 updated by the functions `make-coding-system' and\n\
5164 `define-coding-system-alias'.");
5165   Vcoding_system_alist = Qnil;
5166
5167   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
5168     "List of coding-categories (symbols) ordered by priority.");
5169   {
5170     int i;
5171
5172     Vcoding_category_list = Qnil;
5173     for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
5174       Vcoding_category_list
5175         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
5176                  Vcoding_category_list);
5177   }
5178
5179   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
5180     "Specify the coding system for read operations.\n\
5181 It is useful to bind this variable with `let', but do not set it globally.\n\
5182 If the value is a coding system, it is used for decoding on read operation.\n\
5183 If not, an appropriate element is used from one of the coding system alists:\n\
5184 There are three such tables, `file-coding-system-alist',\n\
5185 `process-coding-system-alist', and `network-coding-system-alist'.");
5186   Vcoding_system_for_read = Qnil;
5187
5188   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
5189     "Specify the coding system for write operations.\n\
5190 It is useful to bind this variable with `let', but do not set it globally.\n\
5191 If the value is a coding system, it is used for encoding on write operation.\n\
5192 If not, an appropriate element is used from one of the coding system alists:\n\
5193 There are three such tables, `file-coding-system-alist',\n\
5194 `process-coding-system-alist', and `network-coding-system-alist'.");
5195   Vcoding_system_for_write = Qnil;
5196
5197   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
5198     "Coding system used in the latest file or process I/O.");
5199   Vlast_coding_system_used = Qnil;
5200
5201   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
5202     "*Non-nil inhibit code conversion of end-of-line format in any cases.");
5203   inhibit_eol_conversion = 0;
5204
5205   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
5206     "Non-nil means process buffer inherits coding system of process output.\n\
5207 Bind it to t if the process output is to be treated as if it were a file\n\
5208 read from some filesystem.");
5209   inherit_process_coding_system = 0;
5210
5211   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
5212     "Alist to decide a coding system to use for a file I/O operation.\n\
5213 The format is ((PATTERN . VAL) ...),\n\
5214 where PATTERN is a regular expression matching a file name,\n\
5215 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5216 If VAL is a coding system, it is used for both decoding and encoding\n\
5217 the file contents.\n\
5218 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5219 and the cdr part is used for encoding.\n\
5220 If VAL is a function symbol, the function must return a coding system\n\
5221 or a cons of coding systems which are used as above.\n\
5222 \n\
5223 See also the function `find-operation-coding-system'.");
5224   Vfile_coding_system_alist = Qnil;
5225
5226   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
5227     "Alist to decide a coding system to use for a process I/O operation.\n\
5228 The format is ((PATTERN . VAL) ...),\n\
5229 where PATTERN is a regular expression matching a program name,\n\
5230 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5231 If VAL is a coding system, it is used for both decoding what received\n\
5232 from the program and encoding what sent to the program.\n\
5233 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5234 and the cdr part is used for encoding.\n\
5235 If VAL is a function symbol, the function must return a coding system\n\
5236 or a cons of coding systems which are used as above.\n\
5237 \n\
5238 See also the function `find-operation-coding-system'.");
5239   Vprocess_coding_system_alist = Qnil;
5240
5241   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
5242     "Alist to decide a coding system to use for a network I/O operation.\n\
5243 The format is ((PATTERN . VAL) ...),\n\
5244 where PATTERN is a regular expression matching a network service name\n\
5245 or is a port number to connect to,\n\
5246 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5247 If VAL is a coding system, it is used for both decoding what received\n\
5248 from the network stream and encoding what sent to the network stream.\n\
5249 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5250 and the cdr part is used for encoding.\n\
5251 If VAL is a function symbol, the function must return a coding system\n\
5252 or a cons of coding systems which are used as above.\n\
5253 \n\
5254 See also the function `find-operation-coding-system'.");
5255   Vnetwork_coding_system_alist = Qnil;
5256
5257   DEFVAR_INT ("eol-mnemonic-unix", &eol_mnemonic_unix,
5258     "Mnemonic character indicating UNIX-like end-of-line format (i.e. LF) .");
5259   eol_mnemonic_unix = ':';
5260
5261   DEFVAR_INT ("eol-mnemonic-dos", &eol_mnemonic_dos,
5262     "Mnemonic character indicating DOS-like end-of-line format (i.e. CRLF).");
5263   eol_mnemonic_dos = '\\';
5264
5265   DEFVAR_INT ("eol-mnemonic-mac", &eol_mnemonic_mac,
5266     "Mnemonic character indicating MAC-like end-of-line format (i.e. CR).");
5267   eol_mnemonic_mac = '/';
5268
5269   DEFVAR_INT ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
5270     "Mnemonic character indicating end-of-line format is not yet decided.");
5271   eol_mnemonic_undecided = ':';
5272
5273   DEFVAR_LISP ("enable-character-unification", &Venable_character_unification,
5274     "Non-nil means ISO 2022 encoder/decoder do character unification.");
5275   Venable_character_unification = Qt;
5276
5277   DEFVAR_LISP ("standard-character-unification-table-for-decode",
5278     &Vstandard_character_unification_table_for_decode,
5279     "Table for unifying characters when reading.");
5280   Vstandard_character_unification_table_for_decode = Qnil;
5281
5282   DEFVAR_LISP ("standard-character-unification-table-for-encode",
5283     &Vstandard_character_unification_table_for_encode,
5284     "Table for unifying characters when writing.");
5285   Vstandard_character_unification_table_for_encode = Qnil;
5286
5287   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
5288     "Alist of charsets vs revision numbers.\n\
5289 While encoding, if a charset (car part of an element) is found,\n\
5290 designate it with the escape sequence identifing revision (cdr part of the element).");
5291   Vcharset_revision_alist = Qnil;
5292
5293   DEFVAR_LISP ("default-process-coding-system",
5294                &Vdefault_process_coding_system,
5295     "Cons of coding systems used for process I/O by default.\n\
5296 The car part is used for decoding a process output,\n\
5297 the cdr part is used for encoding a text to be sent to a process.");
5298   Vdefault_process_coding_system = Qnil;
5299
5300   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
5301     "Table of extra Latin codes in the range 128..159 (inclusive).\n\
5302 This is a vector of length 256.\n\
5303 If Nth element is non-nil, the existence of code N in a file\n\
5304 \(or output of subprocess) doesn't prevent it to be detected as\n\
5305 a coding system of ISO 2022 variant which has a flag\n\
5306 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file\n\
5307 or reading output of a subprocess.\n\
5308 Only 128th through 159th elements has a meaning.");
5309   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
5310
5311   DEFVAR_LISP ("select-safe-coding-system-function",
5312                &Vselect_safe_coding_system_function,
5313     "Function to call to select safe coding system for encoding a text.\n\
5314 \n\
5315 If set, this function is called to force a user to select a proper\n\
5316 coding system which can encode the text in the case that a default\n\
5317 coding system used in each operation can't encode the text.\n\
5318 \n\
5319 The default value is `select-safe-codign-system' (which see).");
5320   Vselect_safe_coding_system_function = Qnil;
5321
5322 }
5323
5324 #endif /* emacs */