src/coding.c

   1 /* Coding system handler (conversion, detection, and etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /*** TABLE OF CONTENTS ***
  23
  24   1. Preamble
  25   2. Emacs' internal format (emacs-mule) handlers
  26   3. ISO2022 handlers
  27   4. Shift-JIS and BIG5 handlers
  28   5. End-of-line handlers
  29   6. C library functions
  30   7. Emacs Lisp library functions
  31   8. Post-amble
  32
  33 */
  34
  35 /*** GENERAL NOTE on CODING SYSTEM ***
  36
  37   Coding system is an encoding mechanism of one or more character
  38   sets.  Here's a list of coding systems which Emacs can handle.  When
  39   we say "decode", it means converting some other coding system to
  40   Emacs' internal format (emacs-internal), and when we say "encode",
  41   it means converting the coding system emacs-mule to some other
  42   coding system.
  43
  44   0. Emacs' internal format (emacs-mule)
  45
  46   Emacs itself holds a multi-lingual character in a buffer and a string
  47   in a special format.  Details are described in section 2.
  48
  49   1. ISO2022
  50
  51   The most famous coding system for multiple character sets.  X's
  52   Compound Text, various EUCs (Extended Unix Code), and coding
  53   systems used in Internet communication such as ISO-2022-JP are
  54   all variants of ISO2022.  Details are described in section 3.
  55
  56   2. SJIS (or Shift-JIS or MS-Kanji-Code)
  57
  58   A coding system to encode character sets: ASCII, JISX0201, and
  59   JISX0208.  Widely used for PC's in Japan.  Details are described in
  60   section 4.
  61
  62   3. BIG5
  63
  64   A coding system to encode character sets: ASCII and Big5.  Widely
  65   used by Chinese (mainly in Taiwan and Hong Kong).  Details are
  66   described in section 4.  In this file, when we write "BIG5"
  67   (all uppercase), we mean the coding system, and when we write
  68   "Big5" (capitalized), we mean the character set.
  69
  70   4. Raw text
  71
  72   A coding system for a text containing random 8-bit code.  Emacs does
  73   no code conversion on such a text except for end-of-line format.
  74
  75   5. Other
  76
  77   If a user wants to read/write a text encoded in a coding system not
  78   listed above, he can supply a decoder and an encoder for it in CCL
  79   (Code Conversion Language) programs.  Emacs executes the CCL program
  80   while reading/writing.
  81
  82   Emacs represents a coding system by a Lisp symbol that has a property
  83   `coding-system'.  But, before actually using the coding system, the
  84   information about it is set in a structure of type `struct
  85   coding_system' for rapid processing.  See section 6 for more details.
  86
  87 */
  88
  89 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
  90
  91   How end-of-line of a text is encoded depends on a system.  For
  92   instance, Unix's format is just one byte of `line-feed' code,
  93   whereas DOS's format is two-byte sequence of `carriage-return' and
  94   `line-feed' codes.  MacOS's format is usually one byte of
  95   `carriage-return'.
  96
  97   Since text characters encoding and end-of-line encoding are
  98   independent, any coding system described above can take
  99   any format of end-of-line.  So, Emacs has information of format of
 100   end-of-line in each coding-system.  See section 6 for more details.
 101
 102 */
 103
 104 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 105
 106   These functions check if a text between SRC and SRC_END is encoded
 107   in the coding system category XXX.  Each returns an integer value in
 108   which appropriate flag bits for the category XXX is set.  The flag
 109   bits are defined in macros CODING_CATEGORY_MASK_XXX.  Below is the
 110   template of these functions.  */
 111 #if 0
 112 int
 113 detect_coding_emacs_mule (src, src_end)
 114      unsigned char *src, *src_end;
 115 {
 116   ...
 117 }
 118 #endif
 119
 120 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 121
 122   These functions decode SRC_BYTES length text at SOURCE encoded in
 123   CODING to Emacs' internal format (emacs-mule).  The resulting text
 124   goes to a place pointed to by DESTINATION, the length of which
 125   should not exceed DST_BYTES.  These functions set the information of
 126   original and decoded texts in the members produced, produced_char,
 127   consumed, and consumed_char of the structure *CODING.
 128
 129   The return value is an integer (CODING_FINISH_XXX) indicating how
 130   the decoding finished.
 131
 132   DST_BYTES zero means that source area and destination area are
 133   overlapped, which means that we can produce a decoded text until it
 134   reaches at the head of not-yet-decoded source text.
 135
 136   Below is a template of these functions.  */
 137 #if 0
 138 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 139      struct coding_system *coding;
 140      unsigned char *source, *destination;
 141      int src_bytes, dst_bytes;
 142 {
 143   ...
 144 }
 145 #endif
 146
 147 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 148
 149   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 150   internal format (emacs-mule) to CODING.  The resulting text goes to
 151   a place pointed to by DESTINATION, the length of which should not
 152   exceed DST_BYTES.  These functions set the information of
 153   original and encoded texts in the members produced, produced_char,
 154   consumed, and consumed_char of the structure *CODING.
 155
 156   The return value is an integer (CODING_FINISH_XXX) indicating how
 157   the encoding finished.
 158
 159   DST_BYTES zero means that source area and destination area are
 160   overlapped, which means that we can produce a decoded text until it
 161   reaches at the head of not-yet-decoded source text.
 162
 163   Below is a template of these functions.  */
 164 #if 0
 165 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 166      struct coding_system *coding;
 167      unsigned char *source, *destination;
 168      int src_bytes, dst_bytes;
 169 {
 170   ...
 171 }
 172 #endif
 173
 174 /*** COMMONLY USED MACROS ***/
 175
 176 /* The following three macros ONE_MORE_BYTE, TWO_MORE_BYTES, and
 177    THREE_MORE_BYTES safely get one, two, and three bytes from the
 178    source text respectively.  If there are not enough bytes in the
 179    source, they jump to `label_end_of_loop'.  The caller should set
 180    variables `src' and `src_end' to appropriate areas in advance.  */
 181
 182 #define ONE_MORE_BYTE(c1)       \
 183   do {                          \
 184     if (src < src_end)          \
 185       c1 = *src++;              \
 186     else                        \
 187       goto label_end_of_loop;   \
 188   } while (0)
 189
 190 #define TWO_MORE_BYTES(c1, c2)  \
 191   do {                          \
 192     if (src + 1 < src_end)      \
 193       c1 = *src++, c2 = *src++; \
 194     else                        \
 195       goto label_end_of_loop;   \
 196   } while (0)
 197
 198 #define THREE_MORE_BYTES(c1, c2, c3)            \
 199   do {                                          \
 200     if (src + 2 < src_end)                      \
 201       c1 = *src++, c2 = *src++, c3 = *src++;    \
 202     else                                        \
 203       goto label_end_of_loop;                   \
 204   } while (0)
 205
 206 /* The following three macros DECODE_CHARACTER_ASCII,
 207    DECODE_CHARACTER_DIMENSION1, and DECODE_CHARACTER_DIMENSION2 put
 208    the multi-byte form of a character of each class at the place
 209    pointed by `dst'.  The caller should set the variable `dst' to
 210    point to an appropriate area and the variable `coding' to point to
 211    the coding-system of the currently decoding text in advance.  */
 212
 213 /* Decode one ASCII character C.  */
 214
 215 #define DECODE_CHARACTER_ASCII(c)                               \
 216   do {                                                          \
 217     if (COMPOSING_P (coding->composing))                        \
 218       *dst++ = 0xA0, *dst++ = (c) | 0x80;                       \
 219     else                                                        \
 220       {                                                         \
 221         *dst++ = (c);                                           \
 222         coding->produced_char++;                                \
 223       }                                                         \
 224   } while (0)
 225
 226 /* Decode one DIMENSION1 character whose charset is CHARSET and whose
 227    position-code is C.  */
 228
 229 #define DECODE_CHARACTER_DIMENSION1(charset, c)                         \
 230   do {                                                                  \
 231     unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset);   \
 232     if (COMPOSING_P (coding->composing))                                \
 233       *dst++ = leading_code + 0x20;                                     \
 234     else                                                                \
 235       {                                                                 \
 236         *dst++ = leading_code;                                          \
 237         coding->produced_char++;                                        \
 238       }                                                                 \
 239     if (leading_code = CHARSET_LEADING_CODE_EXT (charset))              \
 240       *dst++ = leading_code;                                            \
 241     *dst++ = (c) | 0x80;                                                \
 242   } while (0)
 243
 244 /* Decode one DIMENSION2 character whose charset is CHARSET and whose
 245    position-codes are C1 and C2.  */
 246
 247 #define DECODE_CHARACTER_DIMENSION2(charset, c1, c2)    \
 248   do {                                                  \
 249     DECODE_CHARACTER_DIMENSION1 (charset, c1);          \
 250     *dst++ = (c2) | 0x80;                               \
 251   } while (0)
 252
 253 \f
 254 /*** 1. Preamble ***/
 255
 256 #include <stdio.h>
 257
 258 #ifdef emacs
 259
 260 #include <config.h>
 261 #include "lisp.h"
 262 #include "buffer.h"
 263 #include "charset.h"
 264 #include "ccl.h"
 265 #include "coding.h"
 266 #include "window.h"
 267
 268 #else  /* not emacs */
 269
 270 #include "mulelib.h"
 271
 272 #endif /* not emacs */
 273
 274 Lisp_Object Qcoding_system, Qeol_type;
 275 Lisp_Object Qbuffer_file_coding_system;
 276 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 277 Lisp_Object Qno_conversion, Qundecided;
 278 Lisp_Object Qcoding_system_history;
 279 Lisp_Object Qsafe_charsets;
 280
 281 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 282 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 283 Lisp_Object Qstart_process, Qopen_network_stream;
 284 Lisp_Object Qtarget_idx;
 285
 286 Lisp_Object Vselect_safe_coding_system_function;
 287
 288 /* Mnemonic character of each format of end-of-line.  */
 289 int eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 290 /* Mnemonic character to indicate format of end-of-line is not yet
 291    decided.  */
 292 int eol_mnemonic_undecided;
 293
 294 /* Format of end-of-line decided by system.  This is CODING_EOL_LF on
 295    Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.  */
 296 int system_eol_type;
 297
 298 #ifdef emacs
 299
 300 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 301
 302 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 303
 304 /* Coding system emacs-mule and raw-text are for converting only
 305    end-of-line format.  */
 306 Lisp_Object Qemacs_mule, Qraw_text;
 307
 308 /* Coding-systems are handed between Emacs Lisp programs and C internal
 309    routines by the following three variables.  */
 310 /* Coding-system for reading files and receiving data from process.  */
 311 Lisp_Object Vcoding_system_for_read;
 312 /* Coding-system for writing files and sending data to process.  */
 313 Lisp_Object Vcoding_system_for_write;
 314 /* Coding-system actually used in the latest I/O.  */
 315 Lisp_Object Vlast_coding_system_used;
 316
 317 /* A vector of length 256 which contains information about special
 318    Latin codes (espepcially for dealing with Microsoft code).  */
 319 Lisp_Object Vlatin_extra_code_table;
 320
 321 /* Flag to inhibit code conversion of end-of-line format.  */
 322 int inhibit_eol_conversion;
 323
 324 /* Coding system to be used to encode text for terminal display.  */
 325 struct coding_system terminal_coding;
 326
 327 /* Coding system to be used to encode text for terminal display when
 328    terminal coding system is nil.  */
 329 struct coding_system safe_terminal_coding;
 330
 331 /* Coding system of what is sent from terminal keyboard.  */
 332 struct coding_system keyboard_coding;
 333
 334 Lisp_Object Vfile_coding_system_alist;
 335 Lisp_Object Vprocess_coding_system_alist;
 336 Lisp_Object Vnetwork_coding_system_alist;
 337
 338 #endif /* emacs */
 339
 340 Lisp_Object Qcoding_category, Qcoding_category_index;
 341
 342 /* List of symbols `coding-category-xxx' ordered by priority.  */
 343 Lisp_Object Vcoding_category_list;
 344
 345 /* Table of coding categories (Lisp symbols).  */
 346 Lisp_Object Vcoding_category_table;
 347
 348 /* Table of names of symbol for each coding-category.  */
 349 char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
 350   "coding-category-emacs-mule",
 351   "coding-category-sjis",
 352   "coding-category-iso-7",
 353   "coding-category-iso-7-tight",
 354   "coding-category-iso-8-1",
 355   "coding-category-iso-8-2",
 356   "coding-category-iso-7-else",
 357   "coding-category-iso-8-else",
 358   "coding-category-big5",
 359   "coding-category-raw-text",
 360   "coding-category-binary"
 361 };
 362
 363 /* Table pointers to coding systems corresponding to each coding
 364    categories.  */
 365 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
 366
 367 /* Flag to tell if we look up unification table on character code
 368    conversion.  */
 369 Lisp_Object Venable_character_unification;
 370 /* Standard unification table to look up on decoding (reading).  */
 371 Lisp_Object Vstandard_character_unification_table_for_decode;
 372 /* Standard unification table to look up on encoding (writing).  */
 373 Lisp_Object Vstandard_character_unification_table_for_encode;
 374
 375 Lisp_Object Qcharacter_unification_table;
 376 Lisp_Object Qcharacter_unification_table_for_decode;
 377 Lisp_Object Qcharacter_unification_table_for_encode;
 378
 379 /* Alist of charsets vs revision number.  */
 380 Lisp_Object Vcharset_revision_alist;
 381
 382 /* Default coding systems used for process I/O.  */
 383 Lisp_Object Vdefault_process_coding_system;
 384
 385 \f
 386 /*** 2. Emacs internal format (emacs-mule) handlers ***/
 387
 388 /* Emacs' internal format for encoding multiple character sets is a
 389    kind of multi-byte encoding, i.e. characters are encoded by
 390    variable-length sequences of one-byte codes.  ASCII characters
 391    and control characters (e.g. `tab', `newline') are represented by
 392    one-byte sequences which are their ASCII codes, in the range 0x00
 393    through 0x7F.  The other characters are represented by a sequence
 394    of `base leading-code', optional `extended leading-code', and one
 395    or two `position-code's.  The length of the sequence is determined
 396    by the base leading-code.  Leading-code takes the range 0x80
 397    through 0x9F, whereas extended leading-code and position-code take
 398    the range 0xA0 through 0xFF.  See `charset.h' for more details
 399    about leading-code and position-code.
 400
 401    There's one exception to this rule.  Special leading-code
 402    `leading-code-composition' denotes that the following several
 403    characters should be composed into one character.  Leading-codes of
 404    components (except for ASCII) are added 0x20.  An ASCII character
 405    component is represented by a 2-byte sequence of `0xA0' and
 406    `ASCII-code + 0x80'.  See also the comments in `charset.h' for the
 407    details of composite character.  Hence, we can summarize the code
 408    range as follows:
 409
 410    --- CODE RANGE of Emacs' internal format ---
 411    (character set)      (range)
 412    ASCII                0x00 .. 0x7F
 413    ELSE (1st byte)      0x80 .. 0x9F
 414         (rest bytes)    0xA0 .. 0xFF
 415    ---------------------------------------------
 416
 417   */
 418
 419 enum emacs_code_class_type emacs_code_class[256];
 420
 421 /* Go to the next statement only if *SRC is accessible and the code is
 422    greater than 0xA0.  */
 423 #define CHECK_CODE_RANGE_A0_FF  \
 424   do {                          \
 425     if (src >= src_end)         \
 426       goto label_end_of_switch; \
 427     else if (*src++ < 0xA0)     \
 428       return 0;                 \
 429   } while (0)
 430
 431 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 432    Check if a text is encoded in Emacs' internal format.  If it is,
 433    return CODING_CATEGORY_MASK_EMACS_MULE, else return 0.  */
 434
 435 int
 436 detect_coding_emacs_mule (src, src_end)
 437      unsigned char *src, *src_end;
 438 {
 439   unsigned char c;
 440   int composing = 0;
 441
 442   while (src < src_end)
 443     {
 444       c = *src++;
 445
 446       if (composing)
 447         {
 448           if (c < 0xA0)
 449             composing = 0;
 450           else
 451             c -= 0x20;
 452         }
 453
 454       switch (emacs_code_class[c])
 455         {
 456         case EMACS_ascii_code:
 457         case EMACS_linefeed_code:
 458           break;
 459
 460         case EMACS_control_code:
 461           if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
 462             return 0;
 463           break;
 464
 465         case EMACS_invalid_code:
 466           return 0;
 467
 468         case EMACS_leading_code_composition: /* c == 0x80 */
 469           if (composing)
 470             CHECK_CODE_RANGE_A0_FF;
 471           else
 472             composing = 1;
 473           break;
 474
 475         case EMACS_leading_code_4:
 476           CHECK_CODE_RANGE_A0_FF;
 477           /* fall down to check it two more times ...  */
 478
 479         case EMACS_leading_code_3:
 480           CHECK_CODE_RANGE_A0_FF;
 481           /* fall down to check it one more time ...  */
 482
 483         case EMACS_leading_code_2:
 484           CHECK_CODE_RANGE_A0_FF;
 485           break;
 486
 487         default:
 488         label_end_of_switch:
 489           break;
 490         }
 491     }
 492   return CODING_CATEGORY_MASK_EMACS_MULE;
 493 }
 494
 495 \f
 496 /*** 3. ISO2022 handlers ***/
 497
 498 /* The following note describes the coding system ISO2022 briefly.
 499    Since the intention of this note is to help in understanding of
 500    the programs in this file, some parts are NOT ACCURATE or OVERLY
 501    SIMPLIFIED.  For the thorough understanding, please refer to the
 502    original document of ISO2022.
 503
 504    ISO2022 provides many mechanisms to encode several character sets
 505    in 7-bit and 8-bit environment.  If one chooses 7-bite environment,
 506    all text is encoded by codes of less than 128.  This may make the
 507    encoded text a little bit longer, but the text gets more stability
 508    to pass through several gateways (some of them strip off the MSB).
 509
 510    There are two kinds of character set: control character set and
 511    graphic character set.  The former contains control characters such
 512    as `newline' and `escape' to provide control functions (control
 513    functions are provided also by escape sequences).  The latter
 514    contains graphic characters such as ' A' and '-'.  Emacs recognizes
 515    two control character sets and many graphic character sets.
 516
 517    Graphic character sets are classified into one of the following
 518    four classes, DIMENSION1_CHARS94, DIMENSION1_CHARS96,
 519    DIMENSION2_CHARS94, DIMENSION2_CHARS96 according to the number of
 520    bytes (DIMENSION) and the number of characters in one dimension
 521    (CHARS) of the set.  In addition, each character set is assigned an
 522    identification tag (called "final character" and denoted as <F>
 523    here after) which is unique in each class.  <F> of each character
 524    set is decided by ECMA(*) when it is registered in ISO.  Code range
 525    of <F> is 0x30..0x7F (0x30..0x3F are for private use only).
 526
 527    Note (*): ECMA = European Computer Manufacturers Association
 528
 529    Here are examples of graphic character set [NAME(<F>)]:
 530         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
 531         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
 532         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
 533         o DIMENSION2_CHARS96 -- none for the moment
 534
 535    A code area (1byte=8bits) is divided into 4 areas, C0, GL, C1, and GR.
 536         C0 [0x00..0x1F] -- control character plane 0
 537         GL [0x20..0x7F] -- graphic character plane 0
 538         C1 [0x80..0x9F] -- control character plane 1
 539         GR [0xA0..0xFF] -- graphic character plane 1
 540
 541    A control character set is directly designated and invoked to C0 or
 542    C1 by an escape sequence.  The most common case is that ISO646's
 543    control character set is designated/invoked to C0 and ISO6429's
 544    control character set is designated/invoked to C1, and usually
 545    these designations/invocations are omitted in a coded text.  With
 546    7-bit environment, only C0 can be used, and a control character for
 547    C1 is encoded by an appropriate escape sequence to fit in the
 548    environment.  All control characters for C1 are defined the
 549    corresponding escape sequences.
 550
 551    A graphic character set is at first designated to one of four
 552    graphic registers (G0 through G3), then these graphic registers are
 553    invoked to GL or GR.  These designations and invocations can be
 554    done independently.  The most common case is that G0 is invoked to
 555    GL, G1 is invoked to GR, and ASCII is designated to G0, and usually
 556    these invocations and designations are omitted in a coded text.
 557    With 7-bit environment, only GL can be used.
 558
 559    When a graphic character set of CHARS94 is invoked to GL, code 0x20
 560    and 0x7F of GL area work as control characters SPACE and DEL
 561    respectively, and code 0xA0 and 0xFF of GR area should not be used.
 562
 563    There are two ways of invocation: locking-shift and single-shift.
 564    With locking-shift, the invocation lasts until the next different
 565    invocation, whereas with single-shift, the invocation works only
 566    for the following character and doesn't affect locking-shift.
 567    Invocations are done by the following control characters or escape
 568    sequences.
 569
 570    ----------------------------------------------------------------------
 571    function             control char    escape sequence description
 572    ----------------------------------------------------------------------
 573    SI  (shift-in)               0x0F    none            invoke G0 to GL
 574    SO  (shift-out)              0x0E    none            invoke G1 to GL
 575    LS2 (locking-shift-2)        none    ESC 'n'         invoke G2 into GL
 576    LS3 (locking-shift-3)        none    ESC 'o'         invoke G3 into GL
 577    SS2 (single-shift-2)         0x8E    ESC 'N'         invoke G2 into GL
 578    SS3 (single-shift-3)         0x8F    ESC 'O'         invoke G3 into GL
 579    ----------------------------------------------------------------------
 580    The first four are for locking-shift.  Control characters for these
 581    functions are defined by macros ISO_CODE_XXX in `coding.h'.
 582
 583    Designations are done by the following escape sequences.
 584    ----------------------------------------------------------------------
 585    escape sequence      description
 586    ----------------------------------------------------------------------
 587    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
 588    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
 589    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
 590    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
 591    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
 592    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
 593    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
 594    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
 595    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
 596    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
 597    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
 598    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
 599    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
 600    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
 601    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
 602    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
 603    ----------------------------------------------------------------------
 604
 605    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
 606    of dimension 1, chars 94, and final character <F>, and etc.
 607
 608    Note (*): Although these designations are not allowed in ISO2022,
 609    Emacs accepts them on decoding, and produces them on encoding
 610    CHARS96 character set in a coding system which is characterized as
 611    7-bit environment, non-locking-shift, and non-single-shift.
 612
 613    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
 614    '(' can be omitted.  We call this as "short-form" here after.
 615
 616    Now you may notice that there are a lot of ways for encoding the
 617    same multilingual text in ISO2022.  Actually, there exists many
 618    coding systems such as Compound Text (used in X's inter client
 619    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
 620    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
 621    localized platforms), and all of these are variants of ISO2022.
 622
 623    In addition to the above, Emacs handles two more kinds of escape
 624    sequences: ISO6429's direction specification and Emacs' private
 625    sequence for specifying character composition.
 626
 627    ISO6429's direction specification takes the following format:
 628         o CSI ']'      -- end of the current direction
 629         o CSI '0' ']'  -- end of the current direction
 630         o CSI '1' ']'  -- start of left-to-right text
 631         o CSI '2' ']'  -- start of right-to-left text
 632    The control character CSI (0x9B: control sequence introducer) is
 633    abbreviated to the escape sequence ESC '[' in 7-bit environment.
 634
 635    Character composition specification takes the following format:
 636         o ESC '0' -- start character composition
 637         o ESC '1' -- end character composition
 638    Since these are not standard escape sequences of any ISO, the use
 639    of them for these meaning is restricted to Emacs only.  */
 640
 641 enum iso_code_class_type iso_code_class[256];
 642
 643 #define CHARSET_OK(idx, charset)                        \
 644   (coding_system_table[idx]->safe_charsets[charset]     \
 645    || (CODING_SPEC_ISO_REQUESTED_DESIGNATION            \
 646        (coding_system_table[idx], charset)              \
 647        != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
 648
 649 #define SHIFT_OUT_OK(idx) \
 650   (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
 651
 652 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 653    Check if a text is encoded in ISO2022.  If it is, returns an
 654    integer in which appropriate flag bits any of:
 655         CODING_CATEGORY_MASK_ISO_7
 656         CODING_CATEGORY_MASK_ISO_7_TIGHT
 657         CODING_CATEGORY_MASK_ISO_8_1
 658         CODING_CATEGORY_MASK_ISO_8_2
 659         CODING_CATEGORY_MASK_ISO_7_ELSE
 660         CODING_CATEGORY_MASK_ISO_8_ELSE
 661    are set.  If a code which should never appear in ISO2022 is found,
 662    returns 0.  */
 663
 664 int
 665 detect_coding_iso2022 (src, src_end)
 666      unsigned char *src, *src_end;
 667 {
 668   int mask = CODING_CATEGORY_MASK_ISO;
 669   int mask_found = 0;
 670   int reg[4], shift_out = 0;
 671   int c, c1, i, charset;
 672
 673   reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
 674   while (mask && src < src_end)
 675     {
 676       c = *src++;
 677       switch (c)
 678         {
 679         case ISO_CODE_ESC:
 680           if (src >= src_end)
 681             break;
 682           c = *src++;
 683           if (c >= '(' && c <= '/')
 684             {
 685               /* Designation sequence for a charset of dimension 1.  */
 686               if (src >= src_end)
 687                 break;
 688               c1 = *src++;
 689               if (c1 < ' ' || c1 >= 0x80
 690                   || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
 691                 /* Invalid designation sequence.  Just ignore.  */
 692                 break;
 693               reg[(c - '(') % 4] = charset;
 694             }
 695           else if (c == '$')
 696             {
 697               /* Designation sequence for a charset of dimension 2.  */
 698               if (src >= src_end)
 699                 break;
 700               c = *src++;
 701               if (c >= '@' && c <= 'B')
 702                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
 703                 reg[0] = charset = iso_charset_table[1][0][c];
 704               else if (c >= '(' && c <= '/')
 705                 {
 706                   if (src >= src_end)
 707                     break;
 708                   c1 = *src++;
 709                   if (c1 < ' ' || c1 >= 0x80
 710                       || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
 711                     /* Invalid designation sequence.  Just ignore.  */
 712                     break;
 713                   reg[(c - '(') % 4] = charset;
 714                 }
 715               else
 716                 /* Invalid designation sequence.  Just ignore.  */
 717                 break;
 718             }
 719           else if (c == 'N' || c == 'n')
 720             {
 721               if (shift_out == 0
 722                   && (reg[1] >= 0
 723                       || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
 724                       || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
 725                 {
 726                   /* Locking shift out.  */
 727                   mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 728                   mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 729                   shift_out = 1;
 730                 }
 731               break;
 732             }
 733           else if (c == 'O' || c == 'o')
 734             {
 735               if (shift_out == 1)
 736                 {
 737                   /* Locking shift in.  */
 738                   mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 739                   mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 740                   shift_out = 0;
 741                 }
 742               break;
 743             }
 744           else if (c == '0' || c == '1' || c == '2')
 745             /* Start/end composition.  Just ignore.  */
 746             break;
 747           else
 748             /* Invalid escape sequence.  Just ignore.  */
 749             break;
 750
 751           /* We found a valid designation sequence for CHARSET.  */
 752           mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
 753           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset))
 754             mask_found |= CODING_CATEGORY_MASK_ISO_7;
 755           else
 756             mask &= ~CODING_CATEGORY_MASK_ISO_7;
 757           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset))
 758             mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
 759           else
 760             mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
 761           if (! CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset))
 762             mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
 763           if (! CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset))
 764             mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
 765           break;
 766
 767         case ISO_CODE_SO:
 768           if (shift_out == 0
 769               && (reg[1] >= 0
 770                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
 771                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
 772             {
 773               /* Locking shift out.  */
 774               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 775               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 776             }
 777           break;
 778
 779         case ISO_CODE_SI:
 780           if (shift_out == 1)
 781             {
 782               /* Locking shift in.  */
 783               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 784               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 785             }
 786           break;
 787
 788         case ISO_CODE_CSI:
 789         case ISO_CODE_SS2:
 790         case ISO_CODE_SS3:
 791           {
 792             int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
 793
 794             if (c != ISO_CODE_CSI)
 795               {
 796                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 797                     & CODING_FLAG_ISO_SINGLE_SHIFT)
 798                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 799                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 800                     & CODING_FLAG_ISO_SINGLE_SHIFT)
 801                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 802               }
 803             if (VECTORP (Vlatin_extra_code_table)
 804                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
 805               {
 806                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 807                     & CODING_FLAG_ISO_LATIN_EXTRA)
 808                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 809                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 810                     & CODING_FLAG_ISO_LATIN_EXTRA)
 811                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 812               }
 813             mask &= newmask;
 814             mask_found |= newmask;
 815           }
 816           break;
 817
 818         default:
 819           if (c < 0x80)
 820             break;
 821           else if (c < 0xA0)
 822             {
 823               if (VECTORP (Vlatin_extra_code_table)
 824                   && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
 825                 {
 826                   int newmask = 0;
 827
 828                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 829                       & CODING_FLAG_ISO_LATIN_EXTRA)
 830                     newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 831                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 832                       & CODING_FLAG_ISO_LATIN_EXTRA)
 833                     newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 834                   mask &= newmask;
 835                   mask_found |= newmask;
 836                 }
 837               else
 838                 return 0;
 839             }
 840           else
 841             {
 842               unsigned char *src_begin = src;
 843
 844               mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
 845                         | CODING_CATEGORY_MASK_ISO_7_ELSE);
 846               mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
 847               while (src < src_end && *src >= 0xA0)
 848                 src++;
 849               if ((src - src_begin - 1) & 1 && src < src_end)
 850                 mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
 851               else
 852                 mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
 853             }
 854           break;
 855         }
 856     }
 857
 858   return (mask & mask_found);
 859 }
 860
 861 /* Decode a character of which charset is CHARSET and the 1st position
 862    code is C1.  If dimension of CHARSET is 2, the 2nd position code is
 863    fetched from SRC and set to C2.  If CHARSET is negative, it means
 864    that we are decoding ill formed text, and what we can do is just to
 865    read C1 as is.  */
 866
 867 #define DECODE_ISO_CHARACTER(charset, c1)                               \
 868   do {                                                                  \
 869     int c_alt, charset_alt = (charset);                                 \
 870     if (COMPOSING_HEAD_P (coding->composing))                           \
 871       {                                                                 \
 872         *dst++ = LEADING_CODE_COMPOSITION;                              \
 873         if (COMPOSING_WITH_RULE_P (coding->composing))                  \
 874           /* To tell composition rules are embeded.  */                 \
 875           *dst++ = 0xFF;                                                \
 876         coding->composing += 2;                                         \
 877       }                                                                 \
 878     if ((charset) >= 0)                                                 \
 879       {                                                                 \
 880         if (CHARSET_DIMENSION (charset) == 2)                           \
 881           {                                                             \
 882             ONE_MORE_BYTE (c2);                                         \
 883             if (iso_code_class[(c2) & 0x7F] != ISO_0x20_or_0x7F         \
 884                 && iso_code_class[(c2) & 0x7F] != ISO_graphic_plane_0)  \
 885               {                                                         \
 886                 src--;                                                  \
 887                 c2 = ' ';                                               \
 888               }                                                         \
 889           }                                                             \
 890         if (!NILP (unification_table)                                   \
 891             && ((c_alt = unify_char (unification_table,                 \
 892                                      -1, (charset), c1, c2)) >= 0))     \
 893           SPLIT_CHAR (c_alt, charset_alt, c1, c2);                      \
 894       }                                                                 \
 895     if (charset_alt == CHARSET_ASCII || charset_alt < 0)                \
 896       DECODE_CHARACTER_ASCII (c1);                                      \
 897     else if (CHARSET_DIMENSION (charset_alt) == 1)                      \
 898       DECODE_CHARACTER_DIMENSION1 (charset_alt, c1);                    \
 899     else                                                                \
 900       DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2);                \
 901     if (COMPOSING_WITH_RULE_P (coding->composing))                      \
 902       /* To tell a composition rule follows.  */                        \
 903       coding->composing = COMPOSING_WITH_RULE_RULE;                     \
 904   } while (0)
 905
 906 /* Set designation state into CODING.  */
 907 #define DECODE_DESIGNATION(reg, dimension, chars, final_char)              \
 908   do {                                                                     \
 909     int charset = ISO_CHARSET_TABLE (make_number (dimension),              \
 910                                      make_number (chars),                  \
 911                                      make_number (final_char));            \
 912     if (charset >= 0                                                       \
 913         && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
 914             || coding->safe_charsets[charset]))                            \
 915       {                                                                    \
 916         if (coding->spec.iso2022.last_invalid_designation_register == 0    \
 917             && reg == 0                                                    \
 918             && charset == CHARSET_ASCII)                                   \
 919           {                                                                \
 920             /* We should insert this designation sequence as is so         \
 921                that it is surely written back to a file.  */               \
 922             coding->spec.iso2022.last_invalid_designation_register = -1;   \
 923             goto label_invalid_code;                                       \
 924           }                                                                \
 925         coding->spec.iso2022.last_invalid_designation_register = -1;       \
 926         if ((coding->mode & CODING_MODE_DIRECTION)                         \
 927             && CHARSET_REVERSE_CHARSET (charset) >= 0)                     \
 928           charset = CHARSET_REVERSE_CHARSET (charset);                     \
 929         CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;               \
 930       }                                                                    \
 931     else                                                                   \
 932       {                                                                    \
 933         coding->spec.iso2022.last_invalid_designation_register = reg;      \
 934         goto label_invalid_code;                                           \
 935       }                                                                    \
 936   } while (0)
 937
 938 /* Check if the current composing sequence contains only valid codes.
 939    If the composing sequence doesn't end before SRC_END, return -1.
 940    Else, if it contains only valid codes, return 0.
 941    Else return the length of the composing sequence.  */
 942
 943 int check_composing_code (coding, src, src_end)
 944      struct coding_system *coding;
 945      unsigned char *src, *src_end;
 946 {
 947   unsigned char *src_start = src;
 948   int invalid_code_found = 0;
 949   int charset, c, c1, dim;
 950
 951   while (src < src_end)
 952     {
 953       if (*src++ != ISO_CODE_ESC) continue;
 954       if (src >= src_end) break;
 955       if ((c = *src++) == '1') /* end of compsition */
 956         return (invalid_code_found ? src - src_start : 0);
 957       if (src + 2 >= src_end) break;
 958       if (!coding->flags & CODING_FLAG_ISO_DESIGNATION)
 959         invalid_code_found = 1;
 960       else
 961         {
 962           dim = 0;
 963           if (c == '$')
 964             {
 965               dim = 1;
 966               c = (*src >= '@' && *src <= 'B') ? '(' : *src++;
 967             }
 968           if (c >= '(' && c <= '/')
 969             {
 970               c1 = *src++;
 971               if ((c1 < ' ' || c1 >= 0x80)
 972                   || (charset = iso_charset_table[dim][c >= ','][c1]) < 0
 973                   || ! coding->safe_charsets[charset]
 974                   || (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
 975                       == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
 976                 invalid_code_found = 1;
 977             }
 978           else
 979             invalid_code_found = 1;
 980         }
 981     }
 982   return ((coding->mode & CODING_MODE_LAST_BLOCK) ? src_end - src_start : -1);
 983 }
 984
 985 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
 986
 987 int
 988 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
 989      struct coding_system *coding;
 990      unsigned char *source, *destination;
 991      int src_bytes, dst_bytes;
 992 {
 993   unsigned char *src = source;
 994   unsigned char *src_end = source + src_bytes;
 995   unsigned char *dst = destination;
 996   unsigned char *dst_end = destination + dst_bytes;
 997   /* Since the maximum bytes produced by each loop is 7, we subtract 6
 998      from DST_END to assure that overflow checking is necessary only
 999      at the head of loop.  */
1000   unsigned char *adjusted_dst_end = dst_end - 6;
1001   int charset;
1002   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
1003   int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1004   int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1005   Lisp_Object unification_table
1006     = coding->character_unification_table_for_decode;
1007   int result = CODING_FINISH_NORMAL;
1008
1009   if (!NILP (Venable_character_unification) && NILP (unification_table))
1010     unification_table = Vstandard_character_unification_table_for_decode;
1011
1012   coding->produced_char = 0;
1013   coding->fake_multibyte = 0;
1014   while (src < src_end && (dst_bytes
1015                            ? (dst < adjusted_dst_end)
1016                            : (dst < src - 6)))
1017     {
1018       /* SRC_BASE remembers the start position in source in each loop.
1019          The loop will be exited when there's not enough source text
1020          to analyze long escape sequence or 2-byte code (within macros
1021          ONE_MORE_BYTE or TWO_MORE_BYTES).  In that case, SRC is reset
1022          to SRC_BASE before exiting.  */
1023       unsigned char *src_base = src;
1024       int c1 = *src++, c2;
1025
1026       switch (iso_code_class [c1])
1027         {
1028         case ISO_0x20_or_0x7F:
1029           if (!coding->composing
1030               && (charset0 < 0 || CHARSET_CHARS (charset0) == 94))
1031             {
1032               /* This is SPACE or DEL.  */
1033               *dst++ = c1;
1034               coding->produced_char++;
1035               break;
1036             }
1037           /* This is a graphic character, we fall down ...  */
1038
1039         case ISO_graphic_plane_0:
1040           if (coding->composing == COMPOSING_WITH_RULE_RULE)
1041             {
1042               /* This is a composition rule.  */
1043               *dst++ = c1 | 0x80;
1044               coding->composing = COMPOSING_WITH_RULE_TAIL;
1045             }
1046           else
1047             DECODE_ISO_CHARACTER (charset0, c1);
1048           break;
1049
1050         case ISO_0xA0_or_0xFF:
1051           if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1052               || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1053             goto label_invalid_code;
1054           /* This is a graphic character, we fall down ... */
1055
1056         case ISO_graphic_plane_1:
1057           if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1058             goto label_invalid_code;
1059           else
1060             DECODE_ISO_CHARACTER (charset1, c1);
1061           break;
1062
1063         case ISO_control_code:
1064           /* All ISO2022 control characters in this class have the
1065              same representation in Emacs internal format.  */
1066           if (c1 == '\n'
1067               && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1068               && (coding->eol_type == CODING_EOL_CR
1069                   || coding->eol_type == CODING_EOL_CRLF))
1070             {
1071               result = CODING_FINISH_INCONSISTENT_EOL;
1072               goto label_end_of_loop_2;
1073             }
1074           *dst++ = c1;
1075           coding->produced_char++;
1076           break;
1077
1078         case ISO_carriage_return:
1079           if (coding->eol_type == CODING_EOL_CR)
1080             *dst++ = '\n';
1081           else if (coding->eol_type == CODING_EOL_CRLF)
1082             {
1083               ONE_MORE_BYTE (c1);
1084               if (c1 == ISO_CODE_LF)
1085                 *dst++ = '\n';
1086               else
1087                 {
1088                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1089                     {
1090                       result = CODING_FINISH_INCONSISTENT_EOL;
1091                       goto label_end_of_loop_2;
1092                     }
1093                   src--;
1094                   *dst++ = '\r';
1095                 }
1096             }
1097           else
1098             *dst++ = c1;
1099           coding->produced_char++;
1100           break;
1101
1102         case ISO_shift_out:
1103           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1104               || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1105             goto label_invalid_code;
1106           CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1107           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1108           break;
1109
1110         case ISO_shift_in:
1111           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1112             goto label_invalid_code;
1113           CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1114           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1115           break;
1116
1117         case ISO_single_shift_2_7:
1118         case ISO_single_shift_2:
1119           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1120             goto label_invalid_code;
1121           /* SS2 is handled as an escape sequence of ESC 'N' */
1122           c1 = 'N';
1123           goto label_escape_sequence;
1124
1125         case ISO_single_shift_3:
1126           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1127             goto label_invalid_code;
1128           /* SS2 is handled as an escape sequence of ESC 'O' */
1129           c1 = 'O';
1130           goto label_escape_sequence;
1131
1132         case ISO_control_sequence_introducer:
1133           /* CSI is handled as an escape sequence of ESC '[' ...  */
1134           c1 = '[';
1135           goto label_escape_sequence;
1136
1137         case ISO_escape:
1138           ONE_MORE_BYTE (c1);
1139         label_escape_sequence:
1140           /* Escape sequences handled by Emacs are invocation,
1141              designation, direction specification, and character
1142              composition specification.  */
1143           switch (c1)
1144             {
1145             case '&':           /* revision of following character set */
1146               ONE_MORE_BYTE (c1);
1147               if (!(c1 >= '@' && c1 <= '~'))
1148                 goto label_invalid_code;
1149               ONE_MORE_BYTE (c1);
1150               if (c1 != ISO_CODE_ESC)
1151                 goto label_invalid_code;
1152               ONE_MORE_BYTE (c1);
1153               goto label_escape_sequence;
1154
1155             case '$':           /* designation of 2-byte character set */
1156               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1157                 goto label_invalid_code;
1158               ONE_MORE_BYTE (c1);
1159               if (c1 >= '@' && c1 <= 'B')
1160                 {       /* designation of JISX0208.1978, GB2312.1980,
1161                                    or JISX0208.1980 */
1162                   DECODE_DESIGNATION (0, 2, 94, c1);
1163                 }
1164               else if (c1 >= 0x28 && c1 <= 0x2B)
1165                 {       /* designation of DIMENSION2_CHARS94 character set */
1166                   ONE_MORE_BYTE (c2);
1167                   DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
1168                 }
1169               else if (c1 >= 0x2C && c1 <= 0x2F)
1170                 {       /* designation of DIMENSION2_CHARS96 character set */
1171                   ONE_MORE_BYTE (c2);
1172                   DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
1173                 }
1174               else
1175                 goto label_invalid_code;
1176               break;
1177
1178             case 'n':           /* invocation of locking-shift-2 */
1179               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1180                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1181                 goto label_invalid_code;
1182               CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
1183               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1184               break;
1185
1186             case 'o':           /* invocation of locking-shift-3 */
1187               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1188                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1189                 goto label_invalid_code;
1190               CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
1191               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1192               break;
1193
1194             case 'N':           /* invocation of single-shift-2 */
1195               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1196                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1197                 goto label_invalid_code;
1198               ONE_MORE_BYTE (c1);
1199               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
1200               DECODE_ISO_CHARACTER (charset, c1);
1201               break;
1202
1203             case 'O':           /* invocation of single-shift-3 */
1204               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1205                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1206                 goto label_invalid_code;
1207               ONE_MORE_BYTE (c1);
1208               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
1209               DECODE_ISO_CHARACTER (charset, c1);
1210               break;
1211
1212             case '0': case '2': /* start composing */
1213               /* Before processing composing, we must be sure that all
1214                  characters being composed are supported by CODING.
1215                  If not, we must give up composing and insert the
1216                  bunch of codes for composing as is without decoding.  */
1217               {
1218                 int result1;
1219
1220                 result1 = check_composing_code (coding, src, src_end);
1221                 if (result1 == 0)
1222                   coding->composing = (c1 == '0'
1223                                        ? COMPOSING_NO_RULE_HEAD
1224                                        : COMPOSING_WITH_RULE_HEAD);
1225                 else if (result1 > 0)
1226                   {
1227                     if (result1 + 2 < (dst_bytes ? dst_end : src_base) - dst)
1228                       {
1229                         bcopy (src_base, dst, result1 + 2);
1230                         src += result1;
1231                         dst += result1 + 2;
1232                         coding->produced_char += result1 + 2;
1233                       }
1234                     else
1235                       {
1236                         result = CODING_FINISH_INSUFFICIENT_DST;
1237                         goto label_end_of_loop_2;
1238                       }
1239                   }
1240                 else
1241                   goto label_end_of_loop;
1242               }
1243               break;
1244
1245             case '1':           /* end composing */
1246               coding->composing = COMPOSING_NO;
1247               coding->produced_char++;
1248               break;
1249
1250             case '[':           /* specification of direction */
1251               if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
1252                 goto label_invalid_code;
1253               /* For the moment, nested direction is not supported.
1254                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
1255                  left-to-right, and nozero means right-to-left.  */
1256               ONE_MORE_BYTE (c1);
1257               switch (c1)
1258                 {
1259                 case ']':       /* end of the current direction */
1260                   coding->mode &= ~CODING_MODE_DIRECTION;
1261
1262                 case '0':       /* end of the current direction */
1263                 case '1':       /* start of left-to-right direction */
1264                   ONE_MORE_BYTE (c1);
1265                   if (c1 == ']')
1266                     coding->mode &= ~CODING_MODE_DIRECTION;
1267                   else
1268                     goto label_invalid_code;
1269                   break;
1270
1271                 case '2':       /* start of right-to-left direction */
1272                   ONE_MORE_BYTE (c1);
1273                   if (c1 == ']')
1274                     coding->mode |= CODING_MODE_DIRECTION;
1275                   else
1276                     goto label_invalid_code;
1277                   break;
1278
1279                 default:
1280                   goto label_invalid_code;
1281                 }
1282               break;
1283
1284             default:
1285               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1286                 goto label_invalid_code;
1287               if (c1 >= 0x28 && c1 <= 0x2B)
1288                 {       /* designation of DIMENSION1_CHARS94 character set */
1289                   ONE_MORE_BYTE (c2);
1290                   DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
1291                 }
1292               else if (c1 >= 0x2C && c1 <= 0x2F)
1293                 {       /* designation of DIMENSION1_CHARS96 character set */
1294                   ONE_MORE_BYTE (c2);
1295                   DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
1296                 }
1297               else
1298                 {
1299                   goto label_invalid_code;
1300                 }
1301             }
1302           /* We must update these variables now.  */
1303           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1304           charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1305           break;
1306
1307         label_invalid_code:
1308           while (src_base < src)
1309             *dst++ = *src_base++;
1310           coding->fake_multibyte = 1;
1311         }
1312       continue;
1313
1314     label_end_of_loop:
1315       result = CODING_FINISH_INSUFFICIENT_SRC;
1316     label_end_of_loop_2:
1317       src = src_base;
1318       break;
1319     }
1320
1321   if (src < src_end)
1322     {
1323       if (result == CODING_FINISH_NORMAL)
1324         result = CODING_FINISH_INSUFFICIENT_DST;
1325       else if (result != CODING_FINISH_INCONSISTENT_EOL
1326                && coding->mode & CODING_MODE_LAST_BLOCK)
1327         {
1328           /* This is the last block of the text to be decoded.  We had
1329              better just flush out all remaining codes in the text
1330              although they are not valid characters.  */
1331           src_bytes = src_end - src;
1332           if (dst_bytes && (dst_end - dst < src_bytes))
1333             src_bytes = dst_end - dst;
1334           bcopy (src, dst, src_bytes);
1335           dst += src_bytes;
1336           src += src_bytes;
1337           coding->fake_multibyte = 1;
1338         }
1339     }
1340
1341   coding->consumed = coding->consumed_char = src - source;
1342   coding->produced = dst - destination;
1343   return result;
1344 }
1345
1346 /* ISO2022 encoding stuff.  */
1347
1348 /*
1349    It is not enough to say just "ISO2022" on encoding, we have to
1350    specify more details.  In Emacs, each coding system of ISO2022
1351    variant has the following specifications:
1352         1. Initial designation to G0 thru G3.
1353         2. Allows short-form designation?
1354         3. ASCII should be designated to G0 before control characters?
1355         4. ASCII should be designated to G0 at end of line?
1356         5. 7-bit environment or 8-bit environment?
1357         6. Use locking-shift?
1358         7. Use Single-shift?
1359    And the following two are only for Japanese:
1360         8. Use ASCII in place of JIS0201-1976-Roman?
1361         9. Use JISX0208-1983 in place of JISX0208-1978?
1362    These specifications are encoded in `coding->flags' as flag bits
1363    defined by macros CODING_FLAG_ISO_XXX.  See `coding.h' for more
1364    details.
1365 */
1366
1367 /* Produce codes (escape sequence) for designating CHARSET to graphic
1368    register REG.  If <final-char> of CHARSET is '@', 'A', or 'B' and
1369    the coding system CODING allows, produce designation sequence of
1370    short-form.  */
1371
1372 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
1373   do {                                                                  \
1374     unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset);        \
1375     char *intermediate_char_94 = "()*+";                                \
1376     char *intermediate_char_96 = ",-./";                                \
1377     int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset);    \
1378     if (revision < 255)                                                 \
1379       {                                                                 \
1380         *dst++ = ISO_CODE_ESC;                                          \
1381         *dst++ = '&';                                                   \
1382         *dst++ = '@' + revision;                                        \
1383       }                                                                 \
1384     *dst++ = ISO_CODE_ESC;                                              \
1385     if (CHARSET_DIMENSION (charset) == 1)                               \
1386       {                                                                 \
1387         if (CHARSET_CHARS (charset) == 94)                              \
1388           *dst++ = (unsigned char) (intermediate_char_94[reg]);         \
1389         else                                                            \
1390           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1391       }                                                                 \
1392     else                                                                \
1393       {                                                                 \
1394         *dst++ = '$';                                                   \
1395         if (CHARSET_CHARS (charset) == 94)                              \
1396           {                                                             \
1397             if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM)          \
1398                 || reg != 0                                             \
1399                 || final_char < '@' || final_char > 'B')                \
1400               *dst++ = (unsigned char) (intermediate_char_94[reg]);     \
1401           }                                                             \
1402         else                                                            \
1403           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1404       }                                                                 \
1405     *dst++ = final_char;                                                \
1406     CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;                \
1407   } while (0)
1408
1409 /* The following two macros produce codes (control character or escape
1410    sequence) for ISO2022 single-shift functions (single-shift-2 and
1411    single-shift-3).  */
1412
1413 #define ENCODE_SINGLE_SHIFT_2                           \
1414   do {                                                  \
1415     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1416       *dst++ = ISO_CODE_ESC, *dst++ = 'N';              \
1417     else                                                \
1418       {                                                 \
1419         *dst++ = ISO_CODE_SS2;                          \
1420         coding->fake_multibyte = 1;                     \
1421       }                                                 \
1422     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1423   } while (0)
1424
1425 #define ENCODE_SINGLE_SHIFT_3                           \
1426   do {                                                  \
1427     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1428       *dst++ = ISO_CODE_ESC, *dst++ = 'O';              \
1429     else                                                \
1430       {                                                 \
1431         *dst++ = ISO_CODE_SS3;                          \
1432         coding->fake_multibyte = 1;                     \
1433       }                                                 \
1434     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1435   } while (0)
1436
1437 /* The following four macros produce codes (control character or
1438    escape sequence) for ISO2022 locking-shift functions (shift-in,
1439    shift-out, locking-shift-2, and locking-shift-3).  */
1440
1441 #define ENCODE_SHIFT_IN                         \
1442   do {                                          \
1443     *dst++ = ISO_CODE_SI;                       \
1444     CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
1445   } while (0)
1446
1447 #define ENCODE_SHIFT_OUT                        \
1448   do {                                          \
1449     *dst++ = ISO_CODE_SO;                       \
1450     CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
1451   } while (0)
1452
1453 #define ENCODE_LOCKING_SHIFT_2                  \
1454   do {                                          \
1455     *dst++ = ISO_CODE_ESC, *dst++ = 'n';        \
1456     CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
1457   } while (0)
1458
1459 #define ENCODE_LOCKING_SHIFT_3                  \
1460   do {                                          \
1461     *dst++ = ISO_CODE_ESC, *dst++ = 'o';        \
1462     CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
1463   } while (0)
1464
1465 /* Produce codes for a DIMENSION1 character whose character set is
1466    CHARSET and whose position-code is C1.  Designation and invocation
1467    sequences are also produced in advance if necessary.  */
1468
1469
1470 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
1471   do {                                                                  \
1472     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1473       {                                                                 \
1474         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1475           *dst++ = c1 & 0x7F;                                           \
1476         else                                                            \
1477           *dst++ = c1 | 0x80;                                           \
1478         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1479         break;                                                          \
1480       }                                                                 \
1481     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1482       {                                                                 \
1483         *dst++ = c1 & 0x7F;                                             \
1484         break;                                                          \
1485       }                                                                 \
1486     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1487       {                                                                 \
1488         *dst++ = c1 | 0x80;                                             \
1489         break;                                                          \
1490       }                                                                 \
1491     else if (coding->flags & CODING_FLAG_ISO_SAFE                       \
1492              && !coding->safe_charsets[charset])                        \
1493       {                                                                 \
1494         /* We should not encode this character, instead produce one or  \
1495            two `?'s.  */                                                \
1496         *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;                 \
1497         if (CHARSET_WIDTH (charset) == 2)                               \
1498           *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;               \
1499         break;                                                          \
1500       }                                                                 \
1501     else                                                                \
1502       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1503          must invoke it, or, at first, designate it to some graphic     \
1504          register.  Then repeat the loop to actually produce the        \
1505          character.  */                                                 \
1506       dst = encode_invocation_designation (charset, coding, dst);       \
1507   } while (1)
1508
1509 /* Produce codes for a DIMENSION2 character whose character set is
1510    CHARSET and whose position-codes are C1 and C2.  Designation and
1511    invocation codes are also produced in advance if necessary.  */
1512
1513 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
1514   do {                                                                  \
1515     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1516       {                                                                 \
1517         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1518           *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F;                       \
1519         else                                                            \
1520           *dst++ = c1 | 0x80, *dst++ = c2 | 0x80;                       \
1521         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1522         break;                                                          \
1523       }                                                                 \
1524     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1525       {                                                                 \
1526         *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F;                          \
1527         break;                                                          \
1528       }                                                                 \
1529     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1530       {                                                                 \
1531         *dst++ = c1 | 0x80, *dst++= c2 | 0x80;                          \
1532         break;                                                          \
1533       }                                                                 \
1534     else if (coding->flags & CODING_FLAG_ISO_SAFE                       \
1535              && !coding->safe_charsets[charset])                        \
1536       {                                                                 \
1537         /* We should not encode this character, instead produce one or  \
1538            two `?'s.  */                                                \
1539         *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;                 \
1540         if (CHARSET_WIDTH (charset) == 2)                               \
1541           *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;               \
1542         break;                                                          \
1543       }                                                                 \
1544     else                                                                \
1545       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1546          must invoke it, or, at first, designate it to some graphic     \
1547          register.  Then repeat the loop to actually produce the        \
1548          character.  */                                                 \
1549       dst = encode_invocation_designation (charset, coding, dst);       \
1550   } while (1)
1551
1552 #define ENCODE_ISO_CHARACTER(charset, c1, c2)                             \
1553   do {                                                                    \
1554     int c_alt, charset_alt;                                               \
1555     if (!NILP (unification_table)                                         \
1556         && ((c_alt = unify_char (unification_table, -1, charset, c1, c2)) \
1557             >= 0))                                                        \
1558       SPLIT_CHAR (c_alt, charset_alt, c1, c2);                            \
1559     else                                                                  \
1560       charset_alt = charset;                                              \
1561     if (CHARSET_DIMENSION (charset_alt) == 1)                             \
1562       {                                                                   \
1563         if (charset == CHARSET_ASCII                                      \
1564             && coding->flags & CODING_FLAG_ISO_USE_ROMAN)                 \
1565           charset_alt = charset_latin_jisx0201;                           \
1566         ENCODE_ISO_CHARACTER_DIMENSION1 (charset_alt, c1);                \
1567       }                                                                   \
1568     else                                                                  \
1569       {                                                                   \
1570         if (charset == charset_jisx0208                                   \
1571             && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)                \
1572           charset_alt = charset_jisx0208_1978;                            \
1573         ENCODE_ISO_CHARACTER_DIMENSION2 (charset_alt, c1, c2);            \
1574       }                                                                   \
1575     if (! COMPOSING_P (coding->composing))                                \
1576       coding->consumed_char++;                                            \
1577      } while (0)
1578
1579 /* Produce designation and invocation codes at a place pointed by DST
1580    to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
1581    Return new DST.  */
1582
1583 unsigned char *
1584 encode_invocation_designation (charset, coding, dst)
1585      int charset;
1586      struct coding_system *coding;
1587      unsigned char *dst;
1588 {
1589   int reg;                      /* graphic register number */
1590
1591   /* At first, check designations.  */
1592   for (reg = 0; reg < 4; reg++)
1593     if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
1594       break;
1595
1596   if (reg >= 4)
1597     {
1598       /* CHARSET is not yet designated to any graphic registers.  */
1599       /* At first check the requested designation.  */
1600       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1601       if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
1602         /* Since CHARSET requests no special designation, designate it
1603            to graphic register 0.  */
1604         reg = 0;
1605
1606       ENCODE_DESIGNATION (charset, reg, coding);
1607     }
1608
1609   if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
1610       && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
1611     {
1612       /* Since the graphic register REG is not invoked to any graphic
1613          planes, invoke it to graphic plane 0.  */
1614       switch (reg)
1615         {
1616         case 0:                 /* graphic register 0 */
1617           ENCODE_SHIFT_IN;
1618           break;
1619
1620         case 1:                 /* graphic register 1 */
1621           ENCODE_SHIFT_OUT;
1622           break;
1623
1624         case 2:                 /* graphic register 2 */
1625           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1626             ENCODE_SINGLE_SHIFT_2;
1627           else
1628             ENCODE_LOCKING_SHIFT_2;
1629           break;
1630
1631         case 3:                 /* graphic register 3 */
1632           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1633             ENCODE_SINGLE_SHIFT_3;
1634           else
1635             ENCODE_LOCKING_SHIFT_3;
1636           break;
1637         }
1638     }
1639   return dst;
1640 }
1641
1642 /* The following two macros produce codes for indicating composition.  */
1643 #define ENCODE_COMPOSITION_NO_RULE_START  *dst++ = ISO_CODE_ESC, *dst++ = '0'
1644 #define ENCODE_COMPOSITION_WITH_RULE_START  *dst++ = ISO_CODE_ESC, *dst++ = '2'
1645 #define ENCODE_COMPOSITION_END    *dst++ = ISO_CODE_ESC, *dst++ = '1'
1646
1647 /* The following three macros produce codes for indicating direction
1648    of text.  */
1649 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER              \
1650   do {                                                  \
1651     if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS)    \
1652       *dst++ = ISO_CODE_ESC, *dst++ = '[';              \
1653     else                                                \
1654       *dst++ = ISO_CODE_CSI;                            \
1655   } while (0)
1656
1657 #define ENCODE_DIRECTION_R2L    \
1658   ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '2', *dst++ = ']'
1659
1660 #define ENCODE_DIRECTION_L2R    \
1661   ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '0', *dst++ = ']'
1662
1663 /* Produce codes for designation and invocation to reset the graphic
1664    planes and registers to initial state.  */
1665 #define ENCODE_RESET_PLANE_AND_REGISTER                                     \
1666   do {                                                                      \
1667     int reg;                                                                \
1668     if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                        \
1669       ENCODE_SHIFT_IN;                                                      \
1670     for (reg = 0; reg < 4; reg++)                                           \
1671       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0            \
1672           && (CODING_SPEC_ISO_DESIGNATION (coding, reg)                     \
1673               != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)))        \
1674         ENCODE_DESIGNATION                                                  \
1675           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
1676   } while (0)
1677
1678 /* Produce designation sequences of charsets in the line started from
1679    SRC to a place pointed by *DSTP, and update DSTP.
1680
1681    If the current block ends before any end-of-line, we may fail to
1682    find all the necessary designations.  */
1683
1684 void
1685 encode_designation_at_bol (coding, table, src, src_end, dstp)
1686      struct coding_system *coding;
1687      Lisp_Object table;
1688      unsigned char *src, *src_end, **dstp;
1689 {
1690   int charset, c, found = 0, reg;
1691   /* Table of charsets to be designated to each graphic register.  */
1692   int r[4];
1693   unsigned char *dst = *dstp;
1694
1695   for (reg = 0; reg < 4; reg++)
1696     r[reg] = -1;
1697
1698   while (src < src_end && *src != '\n' && found < 4)
1699     {
1700       int bytes = BYTES_BY_CHAR_HEAD (*src);
1701
1702       if (NILP (table))
1703         charset = CHARSET_AT (src);
1704       else
1705         {
1706           int c_alt;
1707           unsigned char c1, c2;
1708
1709           SPLIT_STRING(src, bytes, charset, c1, c2);
1710           if ((c_alt = unify_char (table, -1, charset, c1, c2)) >= 0)
1711             charset = CHAR_CHARSET (c_alt);
1712         }
1713
1714       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1715       if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
1716         {
1717           found++;
1718           r[reg] = charset;
1719         }
1720
1721       src += bytes;
1722     }
1723
1724   if (found)
1725     {
1726       for (reg = 0; reg < 4; reg++)
1727         if (r[reg] >= 0
1728             && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
1729           ENCODE_DESIGNATION (r[reg], reg, coding);
1730       *dstp = dst;
1731     }
1732 }
1733
1734 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
1735
1736 int
1737 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1738      struct coding_system *coding;
1739      unsigned char *source, *destination;
1740      int src_bytes, dst_bytes;
1741 {
1742   unsigned char *src = source;
1743   unsigned char *src_end = source + src_bytes;
1744   unsigned char *dst = destination;
1745   unsigned char *dst_end = destination + dst_bytes;
1746   /* Since the maximum bytes produced by each loop is 20, we subtract 19
1747      from DST_END to assure overflow checking is necessary only at the
1748      head of loop.  */
1749   unsigned char *adjusted_dst_end = dst_end - 19;
1750   Lisp_Object unification_table
1751       = coding->character_unification_table_for_encode;
1752   int result = CODING_FINISH_NORMAL;
1753
1754   if (!NILP (Venable_character_unification) && NILP (unification_table))
1755     unification_table = Vstandard_character_unification_table_for_encode;
1756
1757   coding->consumed_char = 0;
1758   coding->fake_multibyte = 0;
1759   while (src < src_end && (dst_bytes
1760                            ? (dst < adjusted_dst_end)
1761                            : (dst < src - 19)))
1762     {
1763       /* SRC_BASE remembers the start position in source in each loop.
1764          The loop will be exited when there's not enough source text
1765          to analyze multi-byte codes (within macros ONE_MORE_BYTE,
1766          TWO_MORE_BYTES, and THREE_MORE_BYTES).  In that case, SRC is
1767          reset to SRC_BASE before exiting.  */
1768       unsigned char *src_base = src;
1769       int charset, c1, c2, c3, c4;
1770
1771       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
1772           && CODING_SPEC_ISO_BOL (coding))
1773         {
1774           /* We have to produce designation sequences if any now.  */
1775           encode_designation_at_bol (coding, unification_table,
1776                                      src, src_end, &dst);
1777           CODING_SPEC_ISO_BOL (coding) = 0;
1778         }
1779
1780       c1 = *src++;
1781       /* If we are seeing a component of a composite character, we are
1782          seeing a leading-code encoded irregularly for composition, or
1783          a composition rule if composing with rule.  We must set C1 to
1784          a normal leading-code or an ASCII code.  If we are not seeing
1785          a composite character, we must reset composition,
1786          designation, and invocation states.  */
1787       if (COMPOSING_P (coding->composing))
1788         {
1789           if (c1 < 0xA0)
1790             {
1791               /* We are not in a composite character any longer.  */
1792               coding->composing = COMPOSING_NO;
1793               ENCODE_RESET_PLANE_AND_REGISTER;
1794               ENCODE_COMPOSITION_END;
1795             }
1796           else
1797             {
1798               if (coding->composing == COMPOSING_WITH_RULE_RULE)
1799                 {
1800                   *dst++ = c1 & 0x7F;
1801                   coding->composing = COMPOSING_WITH_RULE_HEAD;
1802                   continue;
1803                 }
1804               else if (coding->composing == COMPOSING_WITH_RULE_HEAD)
1805                 coding->composing = COMPOSING_WITH_RULE_RULE;
1806               if (c1 == 0xA0)
1807                 {
1808                   /* This is an ASCII component.  */
1809                   ONE_MORE_BYTE (c1);
1810                   c1 &= 0x7F;
1811                 }
1812               else
1813                 /* This is a leading-code of non ASCII component.  */
1814                 c1 -= 0x20;
1815             }
1816         }
1817
1818       /* Now encode one character.  C1 is a control character, an
1819          ASCII character, or a leading-code of multi-byte character.  */
1820       switch (emacs_code_class[c1])
1821         {
1822         case EMACS_ascii_code:
1823           ENCODE_ISO_CHARACTER (CHARSET_ASCII, c1, /* dummy */ c2);
1824           break;
1825
1826         case EMACS_control_code:
1827           if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
1828             ENCODE_RESET_PLANE_AND_REGISTER;
1829           *dst++ = c1;
1830           coding->consumed_char++;
1831           break;
1832
1833         case EMACS_carriage_return_code:
1834           if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
1835             {
1836               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
1837                 ENCODE_RESET_PLANE_AND_REGISTER;
1838               *dst++ = c1;
1839               coding->consumed_char++;
1840               break;
1841             }
1842           /* fall down to treat '\r' as '\n' ...  */
1843
1844         case EMACS_linefeed_code:
1845           if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
1846             ENCODE_RESET_PLANE_AND_REGISTER;
1847           if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
1848             bcopy (coding->spec.iso2022.initial_designation,
1849                    coding->spec.iso2022.current_designation,
1850                    sizeof coding->spec.iso2022.initial_designation);
1851           if (coding->eol_type == CODING_EOL_LF
1852               || coding->eol_type == CODING_EOL_UNDECIDED)
1853             *dst++ = ISO_CODE_LF;
1854           else if (coding->eol_type == CODING_EOL_CRLF)
1855             *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
1856           else
1857             *dst++ = ISO_CODE_CR;
1858           CODING_SPEC_ISO_BOL (coding) = 1;
1859           coding->consumed_char++;
1860           break;
1861
1862         case EMACS_leading_code_2:
1863           ONE_MORE_BYTE (c2);
1864           if (c2 < 0xA0)
1865             {
1866               /* invalid sequence */
1867               *dst++ = c1;
1868               *dst++ = c2;
1869               coding->consumed_char += 2;
1870             }
1871           else
1872             ENCODE_ISO_CHARACTER (c1, c2, /* dummy */ c3);
1873           break;
1874
1875         case EMACS_leading_code_3:
1876           TWO_MORE_BYTES (c2, c3);
1877           if (c2 < 0xA0 || c3 < 0xA0)
1878             {
1879               /* invalid sequence */
1880               *dst++ = c1;
1881               *dst++ = c2;
1882               *dst++ = c3;
1883               coding->consumed_char += 3;
1884             }
1885           else if (c1 < LEADING_CODE_PRIVATE_11)
1886             ENCODE_ISO_CHARACTER (c1, c2, c3);
1887           else
1888             ENCODE_ISO_CHARACTER (c2, c3, /* dummy */ c4);
1889           break;
1890
1891         case EMACS_leading_code_4:
1892           THREE_MORE_BYTES (c2, c3, c4);
1893           if (c2 < 0xA0 || c3 < 0xA0 || c4 < 0xA0)
1894             {
1895               /* invalid sequence */
1896               *dst++ = c1;
1897               *dst++ = c2;
1898               *dst++ = c3;
1899               *dst++ = c4;
1900               coding->consumed_char += 4;
1901             }
1902           else
1903             ENCODE_ISO_CHARACTER (c2, c3, c4);
1904           break;
1905
1906         case EMACS_leading_code_composition:
1907           ONE_MORE_BYTE (c2);
1908           if (c2 < 0xA0)
1909             {
1910               /* invalid sequence */
1911               *dst++ = c1;
1912               *dst++ = c2;
1913               coding->consumed_char += 2;
1914             }
1915           else if (c2 == 0xFF)
1916             {
1917               ENCODE_RESET_PLANE_AND_REGISTER;
1918               coding->composing = COMPOSING_WITH_RULE_HEAD;
1919               ENCODE_COMPOSITION_WITH_RULE_START;
1920               coding->consumed_char++;
1921             }
1922           else
1923             {
1924               ENCODE_RESET_PLANE_AND_REGISTER;
1925               /* Rewind one byte because it is a character code of
1926                  composition elements.  */
1927               src--;
1928               coding->composing = COMPOSING_NO_RULE_HEAD;
1929               ENCODE_COMPOSITION_NO_RULE_START;
1930               coding->consumed_char++;
1931             }
1932           break;
1933
1934         case EMACS_invalid_code:
1935           *dst++ = c1;
1936           coding->consumed_char++;
1937           break;
1938         }
1939       continue;
1940     label_end_of_loop:
1941       result = CODING_FINISH_INSUFFICIENT_SRC;
1942       src = src_base;
1943       break;
1944     }
1945
1946   if (src < src_end)
1947     {
1948       if (result == CODING_FINISH_NORMAL)
1949         result = CODING_FINISH_INSUFFICIENT_DST;
1950       else
1951         /* If this is the last block of the text to be encoded, we
1952            must reset graphic planes and registers to the initial
1953            state, and flush out the carryover if any.  */
1954         if (coding->mode & CODING_MODE_LAST_BLOCK)
1955           ENCODE_RESET_PLANE_AND_REGISTER;
1956     }
1957
1958   coding->consumed = src - source;
1959   coding->produced = coding->produced_char = dst - destination;
1960   return result;
1961 }
1962
1963 \f
1964 /*** 4. SJIS and BIG5 handlers ***/
1965
1966 /* Although SJIS and BIG5 are not ISO's coding system, they are used
1967    quite widely.  So, for the moment, Emacs supports them in the bare
1968    C code.  But, in the future, they may be supported only by CCL.  */
1969
1970 /* SJIS is a coding system encoding three character sets: ASCII, right
1971    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
1972    as is.  A character of charset katakana-jisx0201 is encoded by
1973    "position-code + 0x80".  A character of charset japanese-jisx0208
1974    is encoded in 2-byte but two position-codes are divided and shifted
1975    so that it fit in the range below.
1976
1977    --- CODE RANGE of SJIS ---
1978    (character set)      (range)
1979    ASCII                0x00 .. 0x7F
1980    KATAKANA-JISX0201    0xA0 .. 0xDF
1981    JISX0208 (1st byte)  0x80 .. 0x9F and 0xE0 .. 0xFF
1982             (2nd byte)  0x40 .. 0xFF
1983    -------------------------------
1984
1985 */
1986
1987 /* BIG5 is a coding system encoding two character sets: ASCII and
1988    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
1989    character set and is encoded in two-byte.
1990
1991    --- CODE RANGE of BIG5 ---
1992    (character set)      (range)
1993    ASCII                0x00 .. 0x7F
1994    Big5 (1st byte)      0xA1 .. 0xFE
1995         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
1996    --------------------------
1997
1998    Since the number of characters in Big5 is larger than maximum
1999    characters in Emacs' charset (96x96), it can't be handled as one
2000    charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
2001    and `charset-big5-2'.  Both are DIMENSION2 and CHARS94.  The former
2002    contains frequently used characters and the latter contains less
2003    frequently used characters.  */
2004
2005 /* Macros to decode or encode a character of Big5 in BIG5.  B1 and B2
2006    are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2007    C1 and C2 are the 1st and 2nd position-codes of of Emacs' internal
2008    format.  CHARSET is `charset_big5_1' or `charset_big5_2'.  */
2009
2010 /* Number of Big5 characters which have the same code in 1st byte.  */
2011 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2012
2013 #define DECODE_BIG5(b1, b2, charset, c1, c2)                            \
2014   do {                                                                  \
2015     unsigned int temp                                                   \
2016       = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62);   \
2017     if (b1 < 0xC9)                                                      \
2018       charset = charset_big5_1;                                         \
2019     else                                                                \
2020       {                                                                 \
2021         charset = charset_big5_2;                                       \
2022         temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW;                          \
2023       }                                                                 \
2024     c1 = temp / (0xFF - 0xA1) + 0x21;                                   \
2025     c2 = temp % (0xFF - 0xA1) + 0x21;                                   \
2026   } while (0)
2027
2028 #define ENCODE_BIG5(charset, c1, c2, b1, b2)                            \
2029   do {                                                                  \
2030     unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21);      \
2031     if (charset == charset_big5_2)                                      \
2032       temp += BIG5_SAME_ROW * (0xC9 - 0xA1);                            \
2033     b1 = temp / BIG5_SAME_ROW + 0xA1;                                   \
2034     b2 = temp % BIG5_SAME_ROW;                                          \
2035     b2 += b2 < 0x3F ? 0x40 : 0x62;                                      \
2036   } while (0)
2037
2038 #define DECODE_SJIS_BIG5_CHARACTER(charset, c1, c2)                     \
2039   do {                                                                  \
2040     int c_alt, charset_alt = (charset);                                 \
2041     if (!NILP (unification_table)                                       \
2042         && ((c_alt = unify_char (unification_table,                     \
2043                                  -1, (charset), c1, c2)) >= 0))         \
2044           SPLIT_CHAR (c_alt, charset_alt, c1, c2);                      \
2045     if (charset_alt == CHARSET_ASCII || charset_alt < 0)                \
2046       DECODE_CHARACTER_ASCII (c1);                                      \
2047     else if (CHARSET_DIMENSION (charset_alt) == 1)                      \
2048       DECODE_CHARACTER_DIMENSION1 (charset_alt, c1);                    \
2049     else                                                                \
2050       DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2);                \
2051   } while (0)
2052
2053 #define ENCODE_SJIS_BIG5_CHARACTER(charset, c1, c2)                       \
2054   do {                                                                    \
2055     int c_alt, charset_alt;                                               \
2056     if (!NILP (unification_table)                                         \
2057         && ((c_alt = unify_char (unification_table, -1, charset, c1, c2)) \
2058             >= 0))                                                        \
2059       SPLIT_CHAR (c_alt, charset_alt, c1, c2);                            \
2060     else                                                                  \
2061       charset_alt = charset;                                              \
2062     if (charset_alt == charset_ascii)                                     \
2063       *dst++ = c1;                                                        \
2064     else if (CHARSET_DIMENSION (charset_alt) == 1)                        \
2065       {                                                                   \
2066         if (sjis_p && charset_alt == charset_katakana_jisx0201)           \
2067           *dst++ = c1;                                                    \
2068         else                                                              \
2069           {                                                               \
2070             *dst++ = charset_alt, *dst++ = c1;                            \
2071             coding->fake_multibyte = 1;                                   \
2072           }                                                               \
2073       }                                                                   \
2074     else                                                                  \
2075       {                                                                   \
2076         c1 &= 0x7F, c2 &= 0x7F;                                           \
2077         if (sjis_p && charset_alt == charset_jisx0208)                    \
2078           {                                                               \
2079             unsigned char s1, s2;                                         \
2080                                                                           \
2081             ENCODE_SJIS (c1, c2, s1, s2);                                 \
2082             *dst++ = s1, *dst++ = s2;                                     \
2083             coding->fake_multibyte = 1;                                   \
2084           }                                                               \
2085         else if (!sjis_p                                                  \
2086                  && (charset_alt == charset_big5_1                        \
2087                      || charset_alt == charset_big5_2))                   \
2088           {                                                               \
2089             unsigned char b1, b2;                                         \
2090                                                                           \
2091             ENCODE_BIG5 (charset_alt, c1, c2, b1, b2);                    \
2092             *dst++ = b1, *dst++ = b2;                                     \
2093           }                                                               \
2094         else                                                              \
2095           {                                                               \
2096             *dst++ = charset_alt, *dst++ = c1, *dst++ = c2;               \
2097             coding->fake_multibyte = 1;                                   \
2098           }                                                               \
2099       }                                                                   \
2100     coding->consumed_char++;                                              \
2101   } while (0);
2102
2103 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2104    Check if a text is encoded in SJIS.  If it is, return
2105    CODING_CATEGORY_MASK_SJIS, else return 0.  */
2106
2107 int
2108 detect_coding_sjis (src, src_end)
2109      unsigned char *src, *src_end;
2110 {
2111   unsigned char c;
2112
2113   while (src < src_end)
2114     {
2115       c = *src++;
2116       if ((c >= 0x80 && c < 0xA0) || c >= 0xE0)
2117         {
2118           if (src < src_end && *src++ < 0x40)
2119             return 0;
2120         }
2121     }
2122   return CODING_CATEGORY_MASK_SJIS;
2123 }
2124
2125 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2126    Check if a text is encoded in BIG5.  If it is, return
2127    CODING_CATEGORY_MASK_BIG5, else return 0.  */
2128
2129 int
2130 detect_coding_big5 (src, src_end)
2131      unsigned char *src, *src_end;
2132 {
2133   unsigned char c;
2134
2135   while (src < src_end)
2136     {
2137       c = *src++;
2138       if (c >= 0xA1)
2139         {
2140           if (src >= src_end)
2141             break;
2142           c = *src++;
2143           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
2144             return 0;
2145         }
2146     }
2147   return CODING_CATEGORY_MASK_BIG5;
2148 }
2149
2150 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2151    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
2152
2153 int
2154 decode_coding_sjis_big5 (coding, source, destination,
2155                          src_bytes, dst_bytes, sjis_p)
2156      struct coding_system *coding;
2157      unsigned char *source, *destination;
2158      int src_bytes, dst_bytes;
2159      int sjis_p;
2160 {
2161   unsigned char *src = source;
2162   unsigned char *src_end = source + src_bytes;
2163   unsigned char *dst = destination;
2164   unsigned char *dst_end = destination + dst_bytes;
2165   /* Since the maximum bytes produced by each loop is 4, we subtract 3
2166      from DST_END to assure overflow checking is necessary only at the
2167      head of loop.  */
2168   unsigned char *adjusted_dst_end = dst_end - 3;
2169   Lisp_Object unification_table
2170       = coding->character_unification_table_for_decode;
2171   int result = CODING_FINISH_NORMAL;
2172
2173   if (!NILP (Venable_character_unification) && NILP (unification_table))
2174     unification_table = Vstandard_character_unification_table_for_decode;
2175
2176   coding->produced_char = 0;
2177   coding->fake_multibyte = 0;
2178   while (src < src_end && (dst_bytes
2179                            ? (dst < adjusted_dst_end)
2180                            : (dst < src - 3)))
2181     {
2182       /* SRC_BASE remembers the start position in source in each loop.
2183          The loop will be exited when there's not enough source text
2184          to analyze two-byte character (within macro ONE_MORE_BYTE).
2185          In that case, SRC is reset to SRC_BASE before exiting.  */
2186       unsigned char *src_base = src;
2187       unsigned char c1 = *src++, c2, c3, c4;
2188
2189       if (c1 < 0x20)
2190         {
2191           if (c1 == '\r')
2192             {
2193               if (coding->eol_type == CODING_EOL_CRLF)
2194                 {
2195                   ONE_MORE_BYTE (c2);
2196                   if (c2 == '\n')
2197                     *dst++ = c2;
2198                   else if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2199                     {
2200                       result = CODING_FINISH_INCONSISTENT_EOL;
2201                       goto label_end_of_loop_2;
2202                     }
2203                   else
2204                     /* To process C2 again, SRC is subtracted by 1.  */
2205                     *dst++ = c1, src--;
2206                 }
2207               else if (coding->eol_type == CODING_EOL_CR)
2208                 *dst++ = '\n';
2209               else
2210                 *dst++ = c1;
2211             }
2212           else if (c1 == '\n'
2213                    && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2214                    && (coding->eol_type == CODING_EOL_CR
2215                        || coding->eol_type == CODING_EOL_CRLF))
2216             {
2217               result = CODING_FINISH_INCONSISTENT_EOL;
2218               goto label_end_of_loop_2;
2219             }
2220           else
2221             *dst++ = c1;
2222           coding->produced_char++;
2223         }
2224       else if (c1 < 0x80)
2225         DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
2226       else if (c1 < 0xA0)
2227         {
2228           /* SJIS -> JISX0208 */
2229           if (sjis_p)
2230             {
2231               ONE_MORE_BYTE (c2);
2232               if (c2 >= 0x40)
2233                 {
2234                   DECODE_SJIS (c1, c2, c3, c4);
2235                   DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
2236                 }
2237               else
2238                 goto label_invalid_code_2;
2239             }
2240           else
2241             goto label_invalid_code_1;
2242         }
2243       else if (c1 < 0xE0)
2244         {
2245           /* SJIS -> JISX0201-Kana, BIG5 -> Big5 */
2246           if (sjis_p)
2247             DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1,
2248                                         /* dummy */ c2);
2249           else
2250             {
2251               int charset;
2252
2253               ONE_MORE_BYTE (c2);
2254               if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE))
2255                 {
2256                   DECODE_BIG5 (c1, c2, charset, c3, c4);
2257                   DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
2258                 }
2259               else
2260                 goto label_invalid_code_2;
2261             }
2262         }
2263       else                      /* C1 >= 0xE0 */
2264         {
2265           /* SJIS -> JISX0208, BIG5 -> Big5 */
2266           if (sjis_p)
2267             {
2268               ONE_MORE_BYTE (c2);
2269               if (c2 >= 0x40)
2270                 {
2271                   DECODE_SJIS (c1, c2, c3, c4);
2272                   DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
2273                 }
2274               else
2275                 goto label_invalid_code_2;
2276             }
2277           else
2278             {
2279               int charset;
2280
2281               ONE_MORE_BYTE (c2);
2282               if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE))
2283                 {
2284                   DECODE_BIG5 (c1, c2, charset, c3, c4);
2285                   DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
2286                 }
2287               else
2288                 goto label_invalid_code_2;
2289             }
2290         }
2291       continue;
2292
2293     label_invalid_code_1:
2294       *dst++ = c1;
2295       coding->produced_char++;
2296       coding->fake_multibyte = 1;
2297       continue;
2298
2299     label_invalid_code_2:
2300       *dst++ = c1; *dst++= c2;
2301       coding->produced_char += 2;
2302       coding->fake_multibyte = 1;
2303       continue;
2304
2305     label_end_of_loop:
2306       result = CODING_FINISH_INSUFFICIENT_SRC;
2307     label_end_of_loop_2:
2308       src = src_base;
2309       break;
2310     }
2311
2312   if (src < src_end)
2313     {
2314       if (result == CODING_FINISH_NORMAL)
2315         result = CODING_FINISH_INSUFFICIENT_DST;
2316       else if (result != CODING_FINISH_INCONSISTENT_EOL
2317                && coding->mode & CODING_MODE_LAST_BLOCK)
2318         {
2319           src_bytes = src_end - src;
2320           if (dst_bytes && (dst_end - dst < src_bytes))
2321             src_bytes = dst_end - dst;
2322           bcopy (dst, src, src_bytes);
2323           src += src_bytes;
2324           dst += src_bytes;
2325           coding->fake_multibyte = 1;
2326         }
2327     }
2328
2329   coding->consumed = coding->consumed_char = src - source;
2330   coding->produced = dst - destination;
2331   return result;
2332 }
2333
2334 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
2335    This function can encode `charset_ascii', `charset_katakana_jisx0201',
2336    `charset_jisx0208', `charset_big5_1', and `charset_big5-2'.  We are
2337    sure that all these charsets are registered as official charset
2338    (i.e. do not have extended leading-codes).  Characters of other
2339    charsets are produced without any encoding.  If SJIS_P is 1, encode
2340    SJIS text, else encode BIG5 text.  */
2341
2342 int
2343 encode_coding_sjis_big5 (coding, source, destination,
2344                          src_bytes, dst_bytes, sjis_p)
2345      struct coding_system *coding;
2346      unsigned char *source, *destination;
2347      int src_bytes, dst_bytes;
2348      int sjis_p;
2349 {
2350   unsigned char *src = source;
2351   unsigned char *src_end = source + src_bytes;
2352   unsigned char *dst = destination;
2353   unsigned char *dst_end = destination + dst_bytes;
2354   /* Since the maximum bytes produced by each loop is 2, we subtract 1
2355      from DST_END to assure overflow checking is necessary only at the
2356      head of loop.  */
2357   unsigned char *adjusted_dst_end = dst_end - 1;
2358   Lisp_Object unification_table
2359       = coding->character_unification_table_for_encode;
2360   int result = CODING_FINISH_NORMAL;
2361
2362   if (!NILP (Venable_character_unification) && NILP (unification_table))
2363     unification_table = Vstandard_character_unification_table_for_encode;
2364
2365   coding->consumed_char = 0;
2366   coding->fake_multibyte = 0;
2367   while (src < src_end && (dst_bytes
2368                            ? (dst < adjusted_dst_end)
2369                            : (dst < src - 1)))
2370     {
2371       /* SRC_BASE remembers the start position in source in each loop.
2372          The loop will be exited when there's not enough source text
2373          to analyze multi-byte codes (within macros ONE_MORE_BYTE and
2374          TWO_MORE_BYTES).  In that case, SRC is reset to SRC_BASE
2375          before exiting.  */
2376       unsigned char *src_base = src;
2377       unsigned char c1 = *src++, c2, c3, c4;
2378
2379       if (coding->composing)
2380         {
2381           if (c1 == 0xA0)
2382             {
2383               ONE_MORE_BYTE (c1);
2384               c1 &= 0x7F;
2385             }
2386           else if (c1 >= 0xA0)
2387             c1 -= 0x20;
2388           else
2389             coding->composing = 0;
2390         }
2391
2392       switch (emacs_code_class[c1])
2393         {
2394         case EMACS_ascii_code:
2395           ENCODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
2396           break;
2397
2398         case EMACS_control_code:
2399           *dst++ = c1;
2400           coding->consumed_char++;
2401           break;
2402
2403         case EMACS_carriage_return_code:
2404           if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2405             {
2406               *dst++ = c1;
2407               coding->consumed_char++;
2408               break;
2409             }
2410           /* fall down to treat '\r' as '\n' ...  */
2411
2412         case EMACS_linefeed_code:
2413           if (coding->eol_type == CODING_EOL_LF
2414               || coding->eol_type == CODING_EOL_UNDECIDED)
2415             *dst++ = '\n';
2416           else if (coding->eol_type == CODING_EOL_CRLF)
2417             *dst++ = '\r', *dst++ = '\n';
2418           else
2419             *dst++ = '\r';
2420           coding->consumed_char++;
2421           break;
2422
2423         case EMACS_leading_code_2:
2424           ONE_MORE_BYTE (c2);
2425           ENCODE_SJIS_BIG5_CHARACTER (c1, c2, /* dummy */ c3);
2426           break;
2427
2428         case EMACS_leading_code_3:
2429           TWO_MORE_BYTES (c2, c3);
2430           ENCODE_SJIS_BIG5_CHARACTER (c1, c2, c3);
2431           break;
2432
2433         case EMACS_leading_code_4:
2434           THREE_MORE_BYTES (c2, c3, c4);
2435           ENCODE_SJIS_BIG5_CHARACTER (c2, c3, c4);
2436           break;
2437
2438         case EMACS_leading_code_composition:
2439           coding->composing = 1;
2440           break;
2441
2442         default:                /* i.e. case EMACS_invalid_code: */
2443           *dst++ = c1;
2444           coding->consumed_char++;
2445         }
2446       continue;
2447
2448     label_end_of_loop:
2449       result = CODING_FINISH_INSUFFICIENT_SRC;
2450       src = src_base;
2451       break;
2452     }
2453
2454   if (result == CODING_FINISH_NORMAL
2455       && src < src_end)
2456     result = CODING_FINISH_INSUFFICIENT_DST;
2457   coding->consumed = src - source;
2458   coding->produced = coding->produced_char = dst - destination;
2459   return result;
2460 }
2461
2462 \f
2463 /*** 5. End-of-line handlers ***/
2464
2465 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2466    This function is called only when `coding->eol_type' is
2467    CODING_EOL_CRLF or CODING_EOL_CR.  */
2468
2469 int
2470 decode_eol (coding, source, destination, src_bytes, dst_bytes)
2471      struct coding_system *coding;
2472      unsigned char *source, *destination;
2473      int src_bytes, dst_bytes;
2474 {
2475   unsigned char *src = source;
2476   unsigned char *src_end = source + src_bytes;
2477   unsigned char *dst = destination;
2478   unsigned char *dst_end = destination + dst_bytes;
2479   unsigned char c;
2480   int result = CODING_FINISH_NORMAL;
2481
2482   coding->fake_multibyte = 0;
2483
2484   if (src_bytes <= 0)
2485     return result;
2486
2487   switch (coding->eol_type)
2488     {
2489     case CODING_EOL_CRLF:
2490       {
2491         /* Since the maximum bytes produced by each loop is 2, we
2492            subtract 1 from DST_END to assure overflow checking is
2493            necessary only at the head of loop.  */
2494         unsigned char *adjusted_dst_end = dst_end - 1;
2495
2496         while (src < src_end && (dst_bytes
2497                                  ? (dst < adjusted_dst_end)
2498                                  : (dst < src - 1)))
2499           {
2500             unsigned char *src_base = src;
2501
2502             c = *src++;
2503             if (c == '\r')
2504               {
2505                 ONE_MORE_BYTE (c);
2506                 if (c != '\n')
2507                   {
2508                     if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2509                       {
2510                         result = CODING_FINISH_INCONSISTENT_EOL;
2511                         goto label_end_of_loop_2;
2512                       }
2513                     *dst++ = '\r';
2514                     if (BASE_LEADING_CODE_P (c))
2515                       coding->fake_multibyte = 1;
2516                   }
2517                 *dst++ = c;
2518               }
2519             else if (c == '\n'
2520                      && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
2521               {
2522                 result = CODING_FINISH_INCONSISTENT_EOL;
2523                 goto label_end_of_loop_2;
2524               }
2525             else
2526               {
2527                 *dst++ = c;
2528                 if (BASE_LEADING_CODE_P (c))
2529                   coding->fake_multibyte = 1;
2530               }
2531             continue;
2532
2533           label_end_of_loop:
2534             result = CODING_FINISH_INSUFFICIENT_SRC;
2535           label_end_of_loop_2:
2536             src = src_base;
2537             break;
2538           }
2539         if (result == CODING_FINISH_NORMAL
2540             && src < src_end)
2541           result = CODING_FINISH_INSUFFICIENT_DST;
2542       }
2543       break;
2544
2545     case CODING_EOL_CR:
2546       if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2547         {
2548           while (src < src_end)
2549             {
2550               if ((c = *src++) == '\n')
2551                 break;
2552               if (BASE_LEADING_CODE_P (c))
2553                 coding->fake_multibyte = 1;
2554             }
2555           if (*--src == '\n')
2556             {
2557               src_bytes = src - source;
2558               result = CODING_FINISH_INCONSISTENT_EOL;
2559             }
2560         }
2561       if (dst_bytes && src_bytes > dst_bytes)
2562         {
2563           result = CODING_FINISH_INSUFFICIENT_DST;
2564           src_bytes = dst_bytes;
2565         }
2566       if (dst_bytes)
2567         bcopy (source, destination, src_bytes);
2568       else
2569         safe_bcopy (source, destination, src_bytes);
2570       src = source + src_bytes;
2571       while (src_bytes--) if (*dst++ == '\r') dst[-1] = '\n';
2572       break;
2573
2574     default:                    /* i.e. case: CODING_EOL_LF */
2575       if (dst_bytes && src_bytes > dst_bytes)
2576         {
2577           result = CODING_FINISH_INSUFFICIENT_DST;
2578           src_bytes = dst_bytes;
2579         }
2580       if (dst_bytes)
2581         bcopy (source, destination, src_bytes);
2582       else
2583         safe_bcopy (source, destination, src_bytes);
2584       src += src_bytes;
2585       dst += dst_bytes;
2586       coding->fake_multibyte = 1;
2587       break;
2588     }
2589
2590   coding->consumed = coding->consumed_char = src - source;
2591   coding->produced = coding->produced_char = dst - destination;
2592   return result;
2593 }
2594
2595 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  Encode
2596    format of end-of-line according to `coding->eol_type'.  If
2597    `coding->mode & CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code
2598    '\r' in source text also means end-of-line.  */
2599
2600 int
2601 encode_eol (coding, source, destination, src_bytes, dst_bytes)
2602      struct coding_system *coding;
2603      unsigned char *source, *destination;
2604      int src_bytes, dst_bytes;
2605 {
2606   unsigned char *src = source;
2607   unsigned char *dst = destination;
2608   int result = CODING_FINISH_NORMAL;
2609
2610   coding->fake_multibyte = 0;
2611
2612   if (coding->eol_type == CODING_EOL_CRLF)
2613     {
2614       unsigned char c;
2615       unsigned char *src_end = source + src_bytes;
2616       unsigned char *dst_end = destination + dst_bytes;
2617       /* Since the maximum bytes produced by each loop is 2, we
2618          subtract 1 from DST_END to assure overflow checking is
2619          necessary only at the head of loop.  */
2620       unsigned char *adjusted_dst_end = dst_end - 1;
2621
2622       while (src < src_end && (dst_bytes
2623                                ? (dst < adjusted_dst_end)
2624                                : (dst < src - 1)))
2625         {
2626           c = *src++;
2627           if (c == '\n'
2628               || (c == '\r' && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)))
2629             *dst++ = '\r', *dst++ = '\n';
2630           else
2631             {
2632               *dst++ = c;
2633               if (BASE_LEADING_CODE_P (c))
2634                 coding->fake_multibyte = 1;
2635             }
2636         }
2637       if (src < src_end)
2638         result = CODING_FINISH_INSUFFICIENT_DST;
2639     }
2640   else
2641     {
2642       unsigned char c;
2643
2644       if (dst_bytes && src_bytes > dst_bytes)
2645         {
2646           src_bytes = dst_bytes;
2647           result = CODING_FINISH_INSUFFICIENT_DST;
2648         }
2649       if (dst_bytes)
2650         bcopy (source, destination, src_bytes);
2651       else
2652         {
2653           safe_bcopy (source, destination, src_bytes);
2654           dst_bytes = src_bytes;
2655         }
2656       if (coding->eol_type == CODING_EOL_CRLF)
2657         {
2658           while (src_bytes--)
2659             {
2660               if ((c = *dst++) == '\n')
2661                 dst[-1] = '\r';
2662               else if (BASE_LEADING_CODE_P (c))
2663                   coding->fake_multibyte = 1;
2664             }
2665         }
2666       else
2667         {
2668           if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
2669             {
2670               while (src_bytes--)
2671                 if (*dst++ == '\r') dst[-1] = '\n';
2672             }
2673           coding->fake_multibyte = 1;
2674         }
2675       src = source + dst_bytes;
2676       dst = destination + dst_bytes;
2677     }
2678
2679   coding->consumed = coding->consumed_char = src - source;
2680   coding->produced = coding->produced_char = dst - destination;
2681   return result;
2682 }
2683
2684 \f
2685 /*** 6. C library functions ***/
2686
2687 /* In Emacs Lisp, coding system is represented by a Lisp symbol which
2688    has a property `coding-system'.  The value of this property is a
2689    vector of length 5 (called as coding-vector).  Among elements of
2690    this vector, the first (element[0]) and the fifth (element[4])
2691    carry important information for decoding/encoding.  Before
2692    decoding/encoding, this information should be set in fields of a
2693    structure of type `coding_system'.
2694
2695    A value of property `coding-system' can be a symbol of another
2696    subsidiary coding-system.  In that case, Emacs gets coding-vector
2697    from that symbol.
2698
2699    `element[0]' contains information to be set in `coding->type'.  The
2700    value and its meaning is as follows:
2701
2702    0 -- coding_type_emacs_mule
2703    1 -- coding_type_sjis
2704    2 -- coding_type_iso2022
2705    3 -- coding_type_big5
2706    4 -- coding_type_ccl encoder/decoder written in CCL
2707    nil -- coding_type_no_conversion
2708    t -- coding_type_undecided (automatic conversion on decoding,
2709                                no-conversion on encoding)
2710
2711    `element[4]' contains information to be set in `coding->flags' and
2712    `coding->spec'.  The meaning varies by `coding->type'.
2713
2714    If `coding->type' is `coding_type_iso2022', element[4] is a vector
2715    of length 32 (of which the first 13 sub-elements are used now).
2716    Meanings of these sub-elements are:
2717
2718    sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
2719         If the value is an integer of valid charset, the charset is
2720         assumed to be designated to graphic register N initially.
2721
2722         If the value is minus, it is a minus value of charset which
2723         reserves graphic register N, which means that the charset is
2724         not designated initially but should be designated to graphic
2725         register N just before encoding a character in that charset.
2726
2727         If the value is nil, graphic register N is never used on
2728         encoding.
2729
2730    sub-element[N] where N is 4 through 11: to be set in `coding->flags'
2731         Each value takes t or nil.  See the section ISO2022 of
2732         `coding.h' for more information.
2733
2734    If `coding->type' is `coding_type_big5', element[4] is t to denote
2735    BIG5-ETen or nil to denote BIG5-HKU.
2736
2737    If `coding->type' takes the other value, element[4] is ignored.
2738
2739    Emacs Lisp's coding system also carries information about format of
2740    end-of-line in a value of property `eol-type'.  If the value is
2741    integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
2742    means CODING_EOL_CR.  If it is not integer, it should be a vector
2743    of subsidiary coding systems of which property `eol-type' has one
2744    of above values.
2745
2746 */
2747
2748 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
2749    and set it in CODING.  If CODING_SYSTEM_SYMBOL is invalid, CODING
2750    is setup so that no conversion is necessary and return -1, else
2751    return 0.  */
2752
2753 int
2754 setup_coding_system (coding_system, coding)
2755      Lisp_Object coding_system;
2756      struct coding_system *coding;
2757 {
2758   Lisp_Object coding_spec, coding_type, eol_type, plist;
2759   Lisp_Object val;
2760   int i;
2761
2762   /* Initialize some fields required for all kinds of coding systems.  */
2763   coding->symbol = coding_system;
2764   coding->common_flags = 0;
2765   coding->mode = 0;
2766   coding->heading_ascii = -1;
2767   coding->post_read_conversion = coding->pre_write_conversion = Qnil;
2768   coding_spec = Fget (coding_system, Qcoding_system);
2769   if (!VECTORP (coding_spec)
2770       || XVECTOR (coding_spec)->size != 5
2771       || !CONSP (XVECTOR (coding_spec)->contents[3]))
2772     goto label_invalid_coding_system;
2773
2774   eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
2775   if (VECTORP (eol_type))
2776     {
2777       coding->eol_type = CODING_EOL_UNDECIDED;
2778       coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
2779     }
2780   else if (XFASTINT (eol_type) == 1)
2781     {
2782       coding->eol_type = CODING_EOL_CRLF;
2783       coding->common_flags
2784         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2785     }
2786   else if (XFASTINT (eol_type) == 2)
2787     {
2788       coding->eol_type = CODING_EOL_CR;
2789       coding->common_flags
2790         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2791     }
2792   else
2793     coding->eol_type = CODING_EOL_LF;
2794
2795   coding_type = XVECTOR (coding_spec)->contents[0];
2796   /* Try short cut.  */
2797   if (SYMBOLP (coding_type))
2798     {
2799       if (EQ (coding_type, Qt))
2800         {
2801           coding->type = coding_type_undecided;
2802           coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
2803         }
2804       else
2805         coding->type = coding_type_no_conversion;
2806       return 0;
2807     }
2808
2809   /* Initialize remaining fields.  */
2810   coding->composing = 0;
2811   coding->character_unification_table_for_decode = Qnil;
2812   coding->character_unification_table_for_encode = Qnil;
2813
2814   /* Get values of coding system properties:
2815      `post-read-conversion', `pre-write-conversion',
2816      `character-unification-table-for-decode',
2817      `character-unification-table-for-encode'.  */
2818   plist = XVECTOR (coding_spec)->contents[3];
2819   coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
2820   coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
2821   val = Fplist_get (plist, Qcharacter_unification_table_for_decode);
2822   if (SYMBOLP (val))
2823     val = Fget (val, Qcharacter_unification_table_for_decode);
2824   coding->character_unification_table_for_decode
2825     = CHAR_TABLE_P (val) ? val : Qnil;
2826   val = Fplist_get (plist, Qcharacter_unification_table_for_encode);
2827   if (SYMBOLP (val))
2828     val = Fget (val, Qcharacter_unification_table_for_encode);
2829   coding->character_unification_table_for_encode
2830     = CHAR_TABLE_P (val) ? val : Qnil;
2831   val = Fplist_get (plist, Qcoding_category);
2832   if (!NILP (val))
2833     {
2834       val = Fget (val, Qcoding_category_index);
2835       if (INTEGERP (val))
2836         coding->category_idx = XINT (val);
2837       else
2838         goto label_invalid_coding_system;
2839     }
2840   else
2841     goto label_invalid_coding_system;
2842
2843   val = Fplist_get (plist, Qsafe_charsets);
2844   if (EQ (val, Qt))
2845     {
2846       for (i = 0; i <= MAX_CHARSET; i++)
2847         coding->safe_charsets[i] = 1;
2848     }
2849   else
2850     {
2851       bzero (coding->safe_charsets, MAX_CHARSET + 1);
2852       while (CONSP (val))
2853         {
2854           if ((i = get_charset_id (XCONS (val)->car)) >= 0)
2855             coding->safe_charsets[i] = 1;
2856           val = XCONS (val)->cdr;
2857         }
2858     }
2859
2860   switch (XFASTINT (coding_type))
2861     {
2862     case 0:
2863       coding->type = coding_type_emacs_mule;
2864       if (!NILP (coding->post_read_conversion))
2865         coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
2866       if (!NILP (coding->pre_write_conversion))
2867         coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
2868       break;
2869
2870     case 1:
2871       coding->type = coding_type_sjis;
2872       coding->common_flags
2873         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2874       break;
2875
2876     case 2:
2877       coding->type = coding_type_iso2022;
2878       coding->common_flags
2879         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2880       {
2881         Lisp_Object val, temp;
2882         Lisp_Object *flags;
2883         int i, charset, reg_bits = 0;
2884
2885         val = XVECTOR (coding_spec)->contents[4];
2886
2887         if (!VECTORP (val) || XVECTOR (val)->size != 32)
2888           goto label_invalid_coding_system;
2889
2890         flags = XVECTOR (val)->contents;
2891         coding->flags
2892           = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
2893              | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
2894              | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
2895              | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
2896              | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
2897              | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
2898              | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
2899              | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
2900              | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
2901              | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
2902              | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
2903              | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
2904              | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
2905              );
2906
2907         /* Invoke graphic register 0 to plane 0.  */
2908         CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
2909         /* Invoke graphic register 1 to plane 1 if we can use full 8-bit.  */
2910         CODING_SPEC_ISO_INVOCATION (coding, 1)
2911           = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
2912         /* Not single shifting at first.  */
2913         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
2914         /* Beginning of buffer should also be regarded as bol. */
2915         CODING_SPEC_ISO_BOL (coding) = 1;
2916
2917         for (charset = 0; charset <= MAX_CHARSET; charset++)
2918           CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
2919         val = Vcharset_revision_alist;
2920         while (CONSP (val))
2921           {
2922             charset = get_charset_id (Fcar_safe (XCONS (val)->car));
2923             if (charset >= 0
2924                 && (temp = Fcdr_safe (XCONS (val)->car), INTEGERP (temp))
2925                 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
2926               CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
2927             val = XCONS (val)->cdr;
2928           }
2929
2930         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
2931            FLAGS[REG] can be one of below:
2932                 integer CHARSET: CHARSET occupies register I,
2933                 t: designate nothing to REG initially, but can be used
2934                   by any charsets,
2935                 list of integer, nil, or t: designate the first
2936                   element (if integer) to REG initially, the remaining
2937                   elements (if integer) is designated to REG on request,
2938                   if an element is t, REG can be used by any charsets,
2939                 nil: REG is never used.  */
2940         for (charset = 0; charset <= MAX_CHARSET; charset++)
2941           CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
2942             = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
2943         for (i = 0; i < 4; i++)
2944           {
2945             if (INTEGERP (flags[i])
2946                 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))
2947                 || (charset = get_charset_id (flags[i])) >= 0)
2948               {
2949                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
2950                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
2951               }
2952             else if (EQ (flags[i], Qt))
2953               {
2954                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2955                 reg_bits |= 1 << i;
2956                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
2957               }
2958             else if (CONSP (flags[i]))
2959               {
2960                 Lisp_Object tail = flags[i];
2961
2962                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
2963                 if (INTEGERP (XCONS (tail)->car)
2964                     && (charset = XINT (XCONS (tail)->car),
2965                         CHARSET_VALID_P (charset))
2966                     || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
2967                   {
2968                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
2969                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
2970                   }
2971                 else
2972                   CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2973                 tail = XCONS (tail)->cdr;
2974                 while (CONSP (tail))
2975                   {
2976                     if (INTEGERP (XCONS (tail)->car)
2977                         && (charset = XINT (XCONS (tail)->car),
2978                             CHARSET_VALID_P (charset))
2979                         || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
2980                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
2981                         = i;
2982                     else if (EQ (XCONS (tail)->car, Qt))
2983                       reg_bits |= 1 << i;
2984                     tail = XCONS (tail)->cdr;
2985                   }
2986               }
2987             else
2988               CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2989
2990             CODING_SPEC_ISO_DESIGNATION (coding, i)
2991               = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
2992           }
2993
2994         if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
2995           {
2996             /* REG 1 can be used only by locking shift in 7-bit env.  */
2997             if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
2998               reg_bits &= ~2;
2999             if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
3000               /* Without any shifting, only REG 0 and 1 can be used.  */
3001               reg_bits &= 3;
3002           }
3003
3004         if (reg_bits)
3005           for (charset = 0; charset <= MAX_CHARSET; charset++)
3006             {
3007               if (CHARSET_VALID_P (charset))
3008                 {
3009                   /* There exist some default graphic registers to be
3010                      used CHARSET.  */
3011
3012                   /* We had better avoid designating a charset of
3013                      CHARS96 to REG 0 as far as possible.  */
3014                   if (CHARSET_CHARS (charset) == 96)
3015                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3016                       = (reg_bits & 2
3017                          ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
3018                   else
3019                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3020                       = (reg_bits & 1
3021                          ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
3022                 }
3023             }
3024       }
3025       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3026       coding->spec.iso2022.last_invalid_designation_register = -1;
3027       break;
3028
3029     case 3:
3030       coding->type = coding_type_big5;
3031       coding->common_flags
3032         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3033       coding->flags
3034         = (NILP (XVECTOR (coding_spec)->contents[4])
3035            ? CODING_FLAG_BIG5_HKU
3036            : CODING_FLAG_BIG5_ETEN);
3037       break;
3038
3039     case 4:
3040       coding->type = coding_type_ccl;
3041       coding->common_flags
3042         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3043       {
3044         Lisp_Object val = XVECTOR (coding_spec)->contents[4];
3045         if (CONSP  (val)
3046             && VECTORP (XCONS (val)->car)
3047             && VECTORP (XCONS (val)->cdr))
3048           {
3049             setup_ccl_program (&(coding->spec.ccl.decoder), XCONS (val)->car);
3050             setup_ccl_program (&(coding->spec.ccl.encoder), XCONS (val)->cdr);
3051           }
3052         else
3053           goto label_invalid_coding_system;
3054       }
3055       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3056       break;
3057
3058     case 5:
3059       coding->type = coding_type_raw_text;
3060       break;
3061
3062     default:
3063       goto label_invalid_coding_system;
3064     }
3065   return 0;
3066
3067  label_invalid_coding_system:
3068   coding->type = coding_type_no_conversion;
3069   coding->category_idx = CODING_CATEGORY_IDX_BINARY;
3070   coding->common_flags = 0;
3071   coding->eol_type = CODING_EOL_LF;
3072   coding->pre_write_conversion = coding->post_read_conversion = Qnil;
3073   return -1;
3074 }
3075
3076 /* Emacs has a mechanism to automatically detect a coding system if it
3077    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
3078    it's impossible to distinguish some coding systems accurately
3079    because they use the same range of codes.  So, at first, coding
3080    systems are categorized into 7, those are:
3081
3082    o coding-category-emacs-mule
3083
3084         The category for a coding system which has the same code range
3085         as Emacs' internal format.  Assigned the coding-system (Lisp
3086         symbol) `emacs-mule' by default.
3087
3088    o coding-category-sjis
3089
3090         The category for a coding system which has the same code range
3091         as SJIS.  Assigned the coding-system (Lisp
3092         symbol) `japanese-shift-jis' by default.
3093
3094    o coding-category-iso-7
3095
3096         The category for a coding system which has the same code range
3097         as ISO2022 of 7-bit environment.  This doesn't use any locking
3098         shift and single shift functions.  This can encode/decode all
3099         charsets.  Assigned the coding-system (Lisp symbol)
3100         `iso-2022-7bit' by default.
3101
3102    o coding-category-iso-7-tight
3103
3104         Same as coding-category-iso-7 except that this can
3105         encode/decode only the specified charsets.
3106
3107    o coding-category-iso-8-1
3108
3109         The category for a coding system which has the same code range
3110         as ISO2022 of 8-bit environment and graphic plane 1 used only
3111         for DIMENSION1 charset.  This doesn't use any locking shift
3112         and single shift functions.  Assigned the coding-system (Lisp
3113         symbol) `iso-latin-1' by default.
3114
3115    o coding-category-iso-8-2
3116
3117         The category for a coding system which has the same code range
3118         as ISO2022 of 8-bit environment and graphic plane 1 used only
3119         for DIMENSION2 charset.  This doesn't use any locking shift
3120         and single shift functions.  Assigned the coding-system (Lisp
3121         symbol) `japanese-iso-8bit' by default.
3122
3123    o coding-category-iso-7-else
3124
3125         The category for a coding system which has the same code range
3126         as ISO2022 of 7-bit environemnt but uses locking shift or
3127         single shift functions.  Assigned the coding-system (Lisp
3128         symbol) `iso-2022-7bit-lock' by default.
3129
3130    o coding-category-iso-8-else
3131
3132         The category for a coding system which has the same code range
3133         as ISO2022 of 8-bit environemnt but uses locking shift or
3134         single shift functions.  Assigned the coding-system (Lisp
3135         symbol) `iso-2022-8bit-ss2' by default.
3136
3137    o coding-category-big5
3138
3139         The category for a coding system which has the same code range
3140         as BIG5.  Assigned the coding-system (Lisp symbol)
3141         `cn-big5' by default.
3142
3143    o coding-category-binary
3144
3145         The category for a coding system not categorized in any of the
3146         above.  Assigned the coding-system (Lisp symbol)
3147         `no-conversion' by default.
3148
3149    Each of them is a Lisp symbol and the value is an actual
3150    `coding-system's (this is also a Lisp symbol) assigned by a user.
3151    What Emacs does actually is to detect a category of coding system.
3152    Then, it uses a `coding-system' assigned to it.  If Emacs can't
3153    decide only one possible category, it selects a category of the
3154    highest priority.  Priorities of categories are also specified by a
3155    user in a Lisp variable `coding-category-list'.
3156
3157 */
3158
3159 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
3160    If it detects possible coding systems, return an integer in which
3161    appropriate flag bits are set.  Flag bits are defined by macros
3162    CODING_CATEGORY_MASK_XXX in `coding.h'.
3163
3164    How many ASCII characters are at the head is returned as *SKIP.  */
3165
3166 static int
3167 detect_coding_mask (source, src_bytes, priorities, skip)
3168      unsigned char *source;
3169      int src_bytes, *priorities, *skip;
3170 {
3171   register unsigned char c;
3172   unsigned char *src = source, *src_end = source + src_bytes;
3173   unsigned int mask = (CODING_CATEGORY_MASK_ISO_7BIT
3174                        | CODING_CATEGORY_MASK_ISO_SHIFT);
3175   int i;
3176
3177   /* At first, skip all ASCII characters and control characters except
3178      for three ISO2022 specific control characters.  */
3179  label_loop_detect_coding:
3180   while (src < src_end)
3181     {
3182       c = *src;
3183       if (c >= 0x80
3184           || ((mask & CODING_CATEGORY_MASK_ISO_7BIT)
3185               && c == ISO_CODE_ESC)
3186           || ((mask & CODING_CATEGORY_MASK_ISO_SHIFT)
3187               && (c == ISO_CODE_SI || c == ISO_CODE_SO)))
3188         break;
3189       src++;
3190     }
3191   *skip = src - source;
3192
3193   if (src >= src_end)
3194     /* We found nothing other than ASCII.  There's nothing to do.  */
3195     return 0;
3196
3197   /* The text seems to be encoded in some multilingual coding system.
3198      Now, try to find in which coding system the text is encoded.  */
3199   if (c < 0x80)
3200     {
3201       /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
3202       /* C is an ISO2022 specific control code of C0.  */
3203       mask = detect_coding_iso2022 (src, src_end);
3204       if (mask == 0)
3205         {
3206           /* No valid ISO2022 code follows C.  Try again.  */
3207           src++;
3208           mask = (c != ISO_CODE_ESC
3209                   ? CODING_CATEGORY_MASK_ISO_7BIT
3210                   : CODING_CATEGORY_MASK_ISO_SHIFT);
3211           goto label_loop_detect_coding;
3212         }
3213       if (priorities)
3214         goto label_return_highest_only;
3215     }
3216   else
3217     {
3218       int try;
3219
3220       if (c < 0xA0)
3221         {
3222           /* C is the first byte of SJIS character code,
3223              or a leading-code of Emacs' internal format (emacs-mule).  */
3224           try = CODING_CATEGORY_MASK_SJIS | CODING_CATEGORY_MASK_EMACS_MULE;
3225
3226           /* Or, if C is a special latin extra code,
3227              or is an ISO2022 specific control code of C1 (SS2 or SS3),
3228              or is an ISO2022 control-sequence-introducer (CSI),
3229              we should also consider the possibility of ISO2022 codings.  */
3230           if ((VECTORP (Vlatin_extra_code_table)
3231                && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3232               || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
3233               || (c == ISO_CODE_CSI
3234                   && (src < src_end
3235                       && (*src == ']'
3236                           || ((*src == '0' || *src == '1' || *src == '2')
3237                               && src + 1 < src_end
3238                               && src[1] == ']')))))
3239             try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
3240                      | CODING_CATEGORY_MASK_ISO_8BIT);
3241         }
3242       else
3243         /* C is a character of ISO2022 in graphic plane right,
3244            or a SJIS's 1-byte character code (i.e. JISX0201),
3245            or the first byte of BIG5's 2-byte code.  */
3246         try = (CODING_CATEGORY_MASK_ISO_8_ELSE
3247                 | CODING_CATEGORY_MASK_ISO_8BIT
3248                 | CODING_CATEGORY_MASK_SJIS
3249                 | CODING_CATEGORY_MASK_BIG5);
3250
3251       mask = 0;
3252       if (priorities)
3253         {
3254           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3255             {
3256               priorities[i] &= try;
3257               if (priorities[i] & CODING_CATEGORY_MASK_ISO)
3258                 mask = detect_coding_iso2022 (src, src_end);
3259               else if (priorities[i] & CODING_CATEGORY_MASK_SJIS)
3260                 mask = detect_coding_sjis (src, src_end);
3261               else if (priorities[i] & CODING_CATEGORY_MASK_BIG5)
3262                 mask = detect_coding_big5 (src, src_end);
3263               else if (priorities[i] & CODING_CATEGORY_MASK_EMACS_MULE)
3264                 mask = detect_coding_emacs_mule (src, src_end);
3265               if (mask)
3266                 goto label_return_highest_only;
3267             }
3268           return CODING_CATEGORY_MASK_RAW_TEXT;
3269         }
3270       if (try & CODING_CATEGORY_MASK_ISO)
3271         mask |= detect_coding_iso2022 (src, src_end);
3272       if (try & CODING_CATEGORY_MASK_SJIS)
3273         mask |= detect_coding_sjis (src, src_end);
3274       if (try & CODING_CATEGORY_MASK_BIG5)
3275         mask |= detect_coding_big5 (src, src_end);
3276       if (try & CODING_CATEGORY_MASK_EMACS_MULE)
3277         mask |= detect_coding_emacs_mule (src, src_end);
3278     }
3279   return (mask | CODING_CATEGORY_MASK_RAW_TEXT);
3280
3281  label_return_highest_only:
3282   for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3283     {
3284       if (mask & priorities[i])
3285         return priorities[i];
3286     }
3287   return CODING_CATEGORY_MASK_RAW_TEXT;
3288 }
3289
3290 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
3291    The information of the detected coding system is set in CODING.  */
3292
3293 void
3294 detect_coding (coding, src, src_bytes)
3295      struct coding_system *coding;
3296      unsigned char *src;
3297      int src_bytes;
3298 {
3299   unsigned int idx;
3300   int skip, mask, i;
3301   int priorities[CODING_CATEGORY_IDX_MAX];
3302   Lisp_Object val = Vcoding_category_list;
3303
3304   i = 0;
3305   while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
3306     {
3307       if (! SYMBOLP (XCONS (val)->car))
3308         break;
3309       idx = XFASTINT (Fget (XCONS (val)->car, Qcoding_category_index));
3310       if (idx >= CODING_CATEGORY_IDX_MAX)
3311         break;
3312       priorities[i++] = (1 << idx);
3313       val = XCONS (val)->cdr;
3314     }
3315   /* If coding-category-list is valid and contains all coding
3316      categories, `i' should be CODING_CATEGORY_IDX_MAX now.  If not,
3317      the following code saves Emacs from craching.  */
3318   while (i < CODING_CATEGORY_IDX_MAX)
3319     priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
3320
3321   mask = detect_coding_mask (src, src_bytes, priorities, &skip);
3322   coding->heading_ascii = skip;
3323
3324   if (!mask) return;
3325
3326   /* We found a single coding system of the highest priority in MASK.  */
3327   idx = 0;
3328   while (mask && ! (mask & 1)) mask >>= 1, idx++;
3329   if (! mask)
3330     idx = CODING_CATEGORY_IDX_RAW_TEXT;
3331
3332   val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[idx])->value;
3333
3334   if (coding->eol_type != CODING_EOL_UNDECIDED)
3335     {
3336       Lisp_Object tmp = Fget (val, Qeol_type);
3337
3338       if (VECTORP (tmp))
3339         val = XVECTOR (tmp)->contents[coding->eol_type];
3340     }
3341   setup_coding_system (val, coding);
3342   /* Set this again because setup_coding_system reset this member.  */
3343   coding->heading_ascii = skip;
3344 }
3345
3346 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
3347    SOURCE is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
3348    CODING_EOL_CR, and CODING_EOL_UNDECIDED.
3349
3350    How many non-eol characters are at the head is returned as *SKIP.  */
3351
3352 #define MAX_EOL_CHECK_COUNT 3
3353
3354 static int
3355 detect_eol_type (source, src_bytes, skip)
3356      unsigned char *source;
3357      int src_bytes, *skip;
3358 {
3359   unsigned char *src = source, *src_end = src + src_bytes;
3360   unsigned char c;
3361   int total = 0;                /* How many end-of-lines are found so far.  */
3362   int eol_type = CODING_EOL_UNDECIDED;
3363   int this_eol_type;
3364
3365   *skip = 0;
3366
3367   while (src < src_end && total < MAX_EOL_CHECK_COUNT)
3368     {
3369       c = *src++;
3370       if (c == '\n' || c == '\r')
3371         {
3372           if (*skip == 0)
3373             *skip = src - 1 - source;
3374           total++;
3375           if (c == '\n')
3376             this_eol_type = CODING_EOL_LF;
3377           else if (src >= src_end || *src != '\n')
3378             this_eol_type = CODING_EOL_CR;
3379           else
3380             this_eol_type = CODING_EOL_CRLF, src++;
3381
3382           if (eol_type == CODING_EOL_UNDECIDED)
3383             /* This is the first end-of-line.  */
3384             eol_type = this_eol_type;
3385           else if (eol_type != this_eol_type)
3386             {
3387               /* The found type is different from what found before.  */
3388               eol_type = CODING_EOL_INCONSISTENT;
3389               break;
3390             }
3391         }
3392     }
3393
3394   if (*skip == 0)
3395     *skip = src_end - source;
3396   return eol_type;
3397 }
3398
3399 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
3400    is encoded.  If it detects an appropriate format of end-of-line, it
3401    sets the information in *CODING.  */
3402
3403 void
3404 detect_eol (coding, src, src_bytes)
3405      struct coding_system *coding;
3406      unsigned char *src;
3407      int src_bytes;
3408 {
3409   Lisp_Object val;
3410   int skip;
3411   int eol_type = detect_eol_type (src, src_bytes, &skip);
3412
3413   if (coding->heading_ascii > skip)
3414     coding->heading_ascii = skip;
3415   else
3416     skip = coding->heading_ascii;
3417
3418   if (eol_type == CODING_EOL_UNDECIDED)
3419     return;
3420   if (eol_type == CODING_EOL_INCONSISTENT)
3421     {
3422 #if 0
3423       /* This code is suppressed until we find a better way to
3424          distinguish raw text file and binary file.  */
3425
3426       /* If we have already detected that the coding is raw-text, the
3427          coding should actually be no-conversion.  */
3428       if (coding->type == coding_type_raw_text)
3429         {
3430           setup_coding_system (Qno_conversion, coding);
3431           return;
3432         }
3433       /* Else, let's decode only text code anyway.  */
3434 #endif /* 0 */
3435       eol_type = CODING_EOL_LF;
3436     }
3437
3438   val = Fget (coding->symbol, Qeol_type);
3439   if (VECTORP (val) && XVECTOR (val)->size == 3)
3440     {
3441       setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
3442       coding->heading_ascii = skip;
3443     }
3444 }
3445
3446 #define CONVERSION_BUFFER_EXTRA_ROOM 256
3447
3448 #define DECODING_BUFFER_MAG(coding)                                          \
3449   (coding->type == coding_type_iso2022                                       \
3450    ? 3                                                                       \
3451    : ((coding->type == coding_type_sjis || coding->type == coding_type_big5) \
3452       ? 2                                                                    \
3453       : (coding->type == coding_type_raw_text                                \
3454          ? 1                                                                 \
3455          : (coding->type == coding_type_ccl                                  \
3456             ? coding->spec.ccl.decoder.buf_magnification                     \
3457             : 2))))
3458
3459 /* Return maximum size (bytes) of a buffer enough for decoding
3460    SRC_BYTES of text encoded in CODING.  */
3461
3462 int
3463 decoding_buffer_size (coding, src_bytes)
3464      struct coding_system *coding;
3465      int src_bytes;
3466 {
3467   return (src_bytes * DECODING_BUFFER_MAG (coding)
3468           + CONVERSION_BUFFER_EXTRA_ROOM);
3469 }
3470
3471 /* Return maximum size (bytes) of a buffer enough for encoding
3472    SRC_BYTES of text to CODING.  */
3473
3474 int
3475 encoding_buffer_size (coding, src_bytes)
3476      struct coding_system *coding;
3477      int src_bytes;
3478 {
3479   int magnification;
3480
3481   if (coding->type == coding_type_ccl)
3482     magnification = coding->spec.ccl.encoder.buf_magnification;
3483   else
3484     magnification = 3;
3485
3486   return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
3487 }
3488
3489 #ifndef MINIMUM_CONVERSION_BUFFER_SIZE
3490 #define MINIMUM_CONVERSION_BUFFER_SIZE 1024
3491 #endif
3492
3493 char *conversion_buffer;
3494 int conversion_buffer_size;
3495
3496 /* Return a pointer to a SIZE bytes of buffer to be used for encoding
3497    or decoding.  Sufficient memory is allocated automatically.  If we
3498    run out of memory, return NULL.  */
3499
3500 char *
3501 get_conversion_buffer (size)
3502      int size;
3503 {
3504   if (size > conversion_buffer_size)
3505     {
3506       char *buf;
3507       int real_size = conversion_buffer_size * 2;
3508
3509       while (real_size < size) real_size *= 2;
3510       buf = (char *) xmalloc (real_size);
3511       xfree (conversion_buffer);
3512       conversion_buffer = buf;
3513       conversion_buffer_size = real_size;
3514     }
3515   return conversion_buffer;
3516 }
3517
3518 int
3519 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
3520      struct coding_system *coding;
3521      unsigned char *source, *destination;
3522      int src_bytes, dst_bytes, encodep;
3523 {
3524   struct ccl_program *ccl
3525     = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
3526   int result;
3527
3528   coding->produced = ccl_driver (ccl, source, destination,
3529                                  src_bytes, dst_bytes, &(coding->consumed));
3530   if (encodep)
3531     {
3532       coding->produced_char = coding->produced;
3533       coding->consumed_char
3534         = multibyte_chars_in_text (source, coding->consumed);
3535     }
3536   else
3537     {
3538       coding->produced_char
3539         = multibyte_chars_in_text (destination, coding->produced);
3540       coding->consumed_char = coding->consumed;
3541     }
3542   switch (ccl->status)
3543     {
3544     case CCL_STAT_SUSPEND_BY_SRC:
3545       result = CODING_FINISH_INSUFFICIENT_SRC;
3546       break;
3547     case CCL_STAT_SUSPEND_BY_DST:
3548       result = CODING_FINISH_INSUFFICIENT_DST;
3549       break;
3550     default:
3551       result = CODING_FINISH_NORMAL;
3552       break;
3553     }
3554   return result;
3555 }
3556
3557 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions".  Before
3558    decoding, it may detect coding system and format of end-of-line if
3559    those are not yet decided.  */
3560
3561 int
3562 decode_coding (coding, source, destination, src_bytes, dst_bytes)
3563      struct coding_system *coding;
3564      unsigned char *source, *destination;
3565      int src_bytes, dst_bytes;
3566 {
3567   int result;
3568
3569   if (src_bytes <= 0)
3570     {
3571       coding->produced = coding->produced_char = 0;
3572       coding->consumed = coding->consumed_char = 0;
3573       coding->fake_multibyte = 0;
3574       return CODING_FINISH_NORMAL;
3575     }
3576
3577   if (coding->type == coding_type_undecided)
3578     detect_coding (coding, source, src_bytes);
3579
3580   if (coding->eol_type == CODING_EOL_UNDECIDED)
3581     detect_eol (coding, source, src_bytes);
3582
3583   switch (coding->type)
3584     {
3585     case coding_type_emacs_mule:
3586     case coding_type_undecided:
3587     case coding_type_raw_text:
3588       if (coding->eol_type == CODING_EOL_LF
3589           ||  coding->eol_type == CODING_EOL_UNDECIDED)
3590         goto label_no_conversion;
3591       result = decode_eol (coding, source, destination, src_bytes, dst_bytes);
3592       break;
3593
3594     case coding_type_sjis:
3595       result = decode_coding_sjis_big5 (coding, source, destination,
3596                                         src_bytes, dst_bytes, 1);
3597       break;
3598
3599     case coding_type_iso2022:
3600       result = decode_coding_iso2022 (coding, source, destination,
3601                                       src_bytes, dst_bytes);
3602       break;
3603
3604     case coding_type_big5:
3605       result = decode_coding_sjis_big5 (coding, source, destination,
3606                                         src_bytes, dst_bytes, 0);
3607       break;
3608
3609     case coding_type_ccl:
3610       result = ccl_coding_driver (coding, source, destination,
3611                                   src_bytes, dst_bytes, 0);
3612       break;
3613
3614     default:                    /* i.e. case coding_type_no_conversion: */
3615     label_no_conversion:
3616       if (dst_bytes && src_bytes > dst_bytes)
3617         {
3618           coding->produced = dst_bytes;
3619           result = CODING_FINISH_INSUFFICIENT_DST;
3620         }
3621       else
3622         {
3623           coding->produced = src_bytes;
3624           result = CODING_FINISH_NORMAL;
3625         }
3626       if (dst_bytes)
3627         bcopy (source, destination, coding->produced);
3628       else
3629         safe_bcopy (source, destination, coding->produced);
3630       coding->fake_multibyte = 1;
3631       coding->consumed
3632         = coding->consumed_char = coding->produced_char = coding->produced;
3633       break;
3634     }
3635
3636   return result;
3637 }
3638
3639 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  */
3640
3641 int
3642 encode_coding (coding, source, destination, src_bytes, dst_bytes)
3643      struct coding_system *coding;
3644      unsigned char *source, *destination;
3645      int src_bytes, dst_bytes;
3646 {
3647   int result;
3648
3649   if (src_bytes <= 0)
3650     {
3651       coding->produced = coding->produced_char = 0;
3652       coding->consumed = coding->consumed_char = 0;
3653       coding->fake_multibyte = 0;
3654       return CODING_FINISH_NORMAL;
3655     }
3656
3657   switch (coding->type)
3658     {
3659     case coding_type_emacs_mule:
3660     case coding_type_undecided:
3661     case coding_type_raw_text:
3662       if (coding->eol_type == CODING_EOL_LF
3663           ||  coding->eol_type == CODING_EOL_UNDECIDED)
3664         goto label_no_conversion;
3665       result = encode_eol (coding, source, destination, src_bytes, dst_bytes);
3666       break;
3667
3668     case coding_type_sjis:
3669       result = encode_coding_sjis_big5 (coding, source, destination,
3670                                         src_bytes, dst_bytes, 1);
3671       break;
3672
3673     case coding_type_iso2022:
3674       result = encode_coding_iso2022 (coding, source, destination,
3675                                       src_bytes, dst_bytes);
3676       break;
3677
3678     case coding_type_big5:
3679       result = encode_coding_sjis_big5 (coding, source, destination,
3680                                         src_bytes, dst_bytes, 0);
3681       break;
3682
3683     case coding_type_ccl:
3684       result = ccl_coding_driver (coding, source, destination,
3685                                   src_bytes, dst_bytes, 1);
3686       break;
3687
3688     default:                    /* i.e. case coding_type_no_conversion: */
3689     label_no_conversion:
3690       if (dst_bytes && src_bytes > dst_bytes)
3691         {
3692           coding->produced = dst_bytes;
3693           result = CODING_FINISH_INSUFFICIENT_DST;
3694         }
3695       else
3696         {
3697           coding->produced = src_bytes;
3698           result = CODING_FINISH_NORMAL;
3699         }
3700       if (dst_bytes)
3701         bcopy (source, destination, coding->produced);
3702       else
3703         safe_bcopy (source, destination, coding->produced);
3704       if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
3705         {
3706           unsigned char *p = destination, *pend = p + coding->produced;
3707           while (p < pend)
3708             if (*p++ == '\015') p[-1] = '\n';
3709         }
3710       coding->fake_multibyte = 1;
3711       coding->consumed
3712         = coding->consumed_char = coding->produced_char = coding->produced;
3713       break;
3714     }
3715
3716   return result;
3717 }
3718
3719 /* Scan text in the region between *BEG and *END (byte positions),
3720    skip characters which we don't have to decode by coding system
3721    CODING at the head and tail, then set *BEG and *END to the region
3722    of the text we actually have to convert.  The caller should move
3723    the gap out of the region in advance.
3724
3725    If STR is not NULL, *BEG and *END are indices into STR.  */
3726
3727 static void
3728 shrink_decoding_region (beg, end, coding, str)
3729      int *beg, *end;
3730      struct coding_system *coding;
3731      unsigned char *str;
3732 {
3733   unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
3734   int eol_conversion;
3735
3736   if (coding->type == coding_type_ccl
3737       || coding->type == coding_type_undecided
3738       || !NILP (coding->post_read_conversion))
3739     {
3740       /* We can't skip any data.  */
3741       return;
3742     }
3743   else if (coding->type == coding_type_no_conversion)
3744     {
3745       /* We need no conversion, but don't have to skip any data here.
3746          Decoding routine handles them effectively anyway.  */
3747       return;
3748     }
3749
3750   if (coding->heading_ascii >= 0)
3751     /* Detection routine has already found how much we can skip at the
3752        head.  */
3753     *beg += coding->heading_ascii;
3754
3755   if (str)
3756     {
3757       begp_orig = begp = str + *beg;
3758       endp_orig = endp = str + *end;
3759     }
3760   else
3761     {
3762       begp_orig = begp = BYTE_POS_ADDR (*beg);
3763       endp_orig = endp = begp + *end - *beg;
3764     }
3765
3766   eol_conversion = (coding->eol_type != CODING_EOL_LF);
3767
3768   switch (coding->type)
3769     {
3770     case coding_type_emacs_mule:
3771     case coding_type_raw_text:
3772       if (eol_conversion)
3773         {
3774           if (coding->heading_ascii < 0)
3775             while (begp < endp && *begp != '\r' && *begp < 0x80) begp++;
3776           while (begp < endp && *(endp - 1) != '\r' && *(endp - 1) < 0x80)
3777             endp--;
3778         }
3779       else
3780         begp = endp;
3781       break;
3782
3783     case coding_type_sjis:
3784     case coding_type_big5:
3785       /* We can skip all ASCII characters at the head.  */
3786       if (coding->heading_ascii < 0)
3787         {
3788           if (eol_conversion)
3789             while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
3790           else
3791             while (begp < endp && *begp < 0x80) begp++;
3792         }
3793       /* We can skip all ASCII characters at the tail except for the
3794          second byte of SJIS or BIG5 code.  */
3795       if (eol_conversion)
3796         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
3797       else
3798         while (begp < endp && endp[-1] < 0x80) endp--;
3799       if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
3800         endp++;
3801       break;
3802
3803     default:            /* i.e. case coding_type_iso2022: */
3804       if (coding->heading_ascii < 0)
3805         {
3806           /* We can skip all ASCII characters at the head except for a
3807              few control codes.  */
3808           while (begp < endp && (c = *begp) < 0x80
3809                  && c != ISO_CODE_CR && c != ISO_CODE_SO
3810                  && c != ISO_CODE_SI && c != ISO_CODE_ESC
3811                  && (!eol_conversion || c != ISO_CODE_LF))
3812             begp++;
3813         }
3814       switch (coding->category_idx)
3815         {
3816         case CODING_CATEGORY_IDX_ISO_8_1:
3817         case CODING_CATEGORY_IDX_ISO_8_2:
3818           /* We can skip all ASCII characters at the tail.  */
3819           if (eol_conversion)
3820             while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
3821           else
3822             while (begp < endp && endp[-1] < 0x80) endp--;
3823           break;
3824
3825         case CODING_CATEGORY_IDX_ISO_7:
3826         case CODING_CATEGORY_IDX_ISO_7_TIGHT:
3827           /* We can skip all charactes at the tail except for ESC and
3828              the following 2-byte at the tail.  */
3829           if (eol_conversion)
3830             while (begp < endp
3831                    && (c = endp[-1]) < 0x80 && c != ISO_CODE_ESC && c != '\r')
3832               endp--;
3833           else
3834             while (begp < endp
3835                    && (c = endp[-1]) < 0x80 && c != ISO_CODE_ESC)
3836               endp--;
3837           if (begp < endp && endp[-1] == ISO_CODE_ESC)
3838             {
3839               if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
3840                 /* This is an ASCII designation sequence.  We can
3841                     surely skip the tail.  */
3842                 endp += 2;
3843               else
3844                 /* Hmmm, we can't skip the tail.  */
3845                 endp = endp_orig;
3846             }
3847         }
3848     }
3849   *beg += begp - begp_orig;
3850   *end += endp - endp_orig;
3851   return;
3852 }
3853
3854 /* Like shrink_decoding_region but for encoding.  */
3855
3856 static void
3857 shrink_encoding_region (beg, end, coding, str)
3858      int *beg, *end;
3859      struct coding_system *coding;
3860      unsigned char *str;
3861 {
3862   unsigned char *begp_orig, *begp, *endp_orig, *endp;
3863   int eol_conversion;
3864
3865   if (coding->type == coding_type_ccl)
3866     /* We can't skip any data.  */
3867     return;
3868   else if (coding->type == coding_type_no_conversion)
3869     {
3870       /* We need no conversion.  */
3871       *beg = *end;
3872       return;
3873     }
3874
3875   if (str)
3876     {
3877       begp_orig = begp = str + *beg;
3878       endp_orig = endp = str + *end;
3879     }
3880   else
3881     {
3882       begp_orig = begp = BYTE_POS_ADDR (*beg);
3883       endp_orig = endp = begp + *end - *beg;
3884     }
3885
3886   eol_conversion = (coding->eol_type == CODING_EOL_CR
3887                     || coding->eol_type == CODING_EOL_CRLF);
3888
3889   /* Here, we don't have to check coding->pre_write_conversion because
3890      the caller is expected to have handled it already.  */
3891   switch (coding->type)
3892     {
3893     case coding_type_undecided:
3894     case coding_type_emacs_mule:
3895     case coding_type_raw_text:
3896       if (eol_conversion)
3897         {
3898           while (begp < endp && *begp != '\n') begp++;
3899           while (begp < endp && endp[-1] != '\n') endp--;
3900         }
3901       else
3902         begp = endp;
3903       break;
3904
3905     case coding_type_iso2022:
3906       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
3907         {
3908           unsigned char *bol = begp;
3909           while (begp < endp && *begp < 0x80)
3910             {
3911               begp++;
3912               if (begp[-1] == '\n')
3913                 bol = begp;
3914             }
3915           begp = bol;
3916           goto label_skip_tail;
3917         }
3918       /* fall down ... */
3919
3920     default:
3921       /* We can skip all ASCII characters at the head and tail.  */
3922       if (eol_conversion)
3923         while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
3924       else
3925         while (begp < endp && *begp < 0x80) begp++;
3926     label_skip_tail:
3927       if (eol_conversion)
3928         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
3929       else
3930         while (begp < endp && *(endp - 1) < 0x80) endp--;
3931       break;
3932     }
3933
3934   *beg += begp - begp_orig;
3935   *end += endp - endp_orig;
3936   return;
3937 }
3938
3939 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
3940    text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
3941    coding system CODING, and return the status code of code conversion
3942    (currently, this value has no meaning).
3943
3944    How many characters (and bytes) are converted to how many
3945    characters (and bytes) are recorded in members of the structure
3946    CODING.
3947
3948    If REPLACE is nonzero, we do various things as if the original text
3949    is deleted and a new text is inserted.  See the comments in
3950    replace_range (insdel.c) to know what we are doing.  */
3951
3952 int
3953 code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
3954      int from, from_byte, to, to_byte, encodep, replace;
3955      struct coding_system *coding;
3956 {
3957   int len = to - from, len_byte = to_byte - from_byte;
3958   int require, inserted, inserted_byte;
3959   int head_skip, tail_skip, total_skip;
3960   Lisp_Object saved_coding_symbol = Qnil;
3961   int multibyte = !NILP (current_buffer->enable_multibyte_characters);
3962   int first = 1;
3963   int fake_multibyte = 0;
3964   unsigned char *src, *dst;
3965   Lisp_Object deletion = Qnil;
3966
3967   if (replace)
3968     {
3969       int saved_from = from;
3970
3971       prepare_to_modify_buffer (from, to, &from);
3972       if (saved_from != from)
3973         {
3974           to = from + len;
3975           if (multibyte)
3976             from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
3977           else
3978             from_byte = from, to_byte = to;
3979           len_byte = to_byte - from_byte;
3980         }
3981     }
3982
3983   if (! encodep && CODING_REQUIRE_DETECTION (coding))
3984     {
3985       /* We must detect encoding of text and eol format.  */
3986
3987       if (from < GPT && to > GPT)
3988         move_gap_both (from, from_byte);
3989       if (coding->type == coding_type_undecided)
3990         {
3991           detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
3992           if (coding->type == coding_type_undecided)
3993             /* It seems that the text contains only ASCII, but we
3994                should not left it undecided because the deeper
3995                decoding routine (decode_coding) tries to detect the
3996                encodings again in vain.  */
3997             coding->type = coding_type_emacs_mule;
3998         }
3999       if (coding->eol_type == CODING_EOL_UNDECIDED)
4000         {
4001           saved_coding_symbol = coding->symbol;
4002           detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
4003           if (coding->eol_type == CODING_EOL_UNDECIDED)
4004             coding->eol_type = CODING_EOL_LF;
4005           /* We had better recover the original eol format if we
4006              encounter an inconsitent eol format while decoding.  */
4007           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4008         }
4009     }
4010
4011   coding->consumed_char = len, coding->consumed = len_byte;
4012
4013   if (encodep
4014       ? ! CODING_REQUIRE_ENCODING (coding)
4015       : ! CODING_REQUIRE_DECODING (coding))
4016     {
4017       coding->produced = len_byte;
4018       if (multibyte
4019           && ! replace
4020           /* See the comment of the member heading_ascii in coding.h.  */
4021           && coding->heading_ascii < len_byte)
4022         {
4023           /* We still may have to combine byte at the head and the
4024              tail of the text in the region.  */
4025           if (from < GPT && GPT < to)
4026             move_gap_both (to, to_byte);
4027           len = multibyte_chars_in_text (BYTE_POS_ADDR (from_byte), len_byte);
4028           adjust_after_insert (from, from_byte, to, to_byte, len);
4029           coding->produced_char = len;
4030         }
4031       else
4032         {
4033           if (!replace)
4034             adjust_after_insert (from, from_byte, to, to_byte, len_byte);
4035           coding->produced_char = len_byte;
4036         }
4037       return 0;
4038     }
4039
4040   /* Now we convert the text.  */
4041
4042   /* For encoding, we must process pre-write-conversion in advance.  */
4043   if (encodep
4044       && ! NILP (coding->pre_write_conversion)
4045       && SYMBOLP (coding->pre_write_conversion)
4046       && ! NILP (Ffboundp (coding->pre_write_conversion)))
4047     {
4048       /* The function in pre-write-conversion may put a new text in a
4049          new buffer.  */
4050       struct buffer *prev = current_buffer, *new;
4051
4052       call2 (coding->pre_write_conversion, from, to);
4053       if (current_buffer != prev)
4054         {
4055           len = ZV - BEGV;
4056           new = current_buffer;
4057           set_buffer_internal_1 (prev);
4058           del_range_2 (from, from_byte, to, to_byte);
4059           insert_from_buffer (new, BEG, len, 0);
4060           to = from + len;
4061           to_byte = multibyte ? CHAR_TO_BYTE (to) : to;
4062           len_byte = to_byte - from_byte;
4063         }
4064     }
4065
4066   if (replace)
4067     deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
4068
4069   /* Try to skip the heading and tailing ASCIIs.  */
4070   {
4071     int from_byte_orig = from_byte, to_byte_orig = to_byte;
4072
4073     if (from < GPT && GPT < to)
4074       move_gap_both (from, from_byte);
4075     if (encodep)
4076       shrink_encoding_region (&from_byte, &to_byte, coding, NULL);
4077     else
4078       shrink_decoding_region (&from_byte, &to_byte, coding, NULL);
4079     if (from_byte == to_byte)
4080       {
4081         coding->produced = len_byte;
4082         coding->produced_char = multibyte ? len : len_byte;
4083         if (!replace)
4084           /* We must record and adjust for this new text now.  */
4085           adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
4086         return 0;
4087       }
4088
4089     head_skip = from_byte - from_byte_orig;
4090     tail_skip = to_byte_orig - to_byte;
4091     total_skip = head_skip + tail_skip;
4092     from += head_skip;
4093     to -= tail_skip;
4094     len -= total_skip; len_byte -= total_skip;
4095   }
4096
4097   /* For converion, we must put the gap before the text in addition to
4098      making the gap larger for efficient decoding.  The required gap
4099      size starts from 2000 which is the magic number used in make_gap.
4100      But, after one batch of conversion, it will be incremented if we
4101      find that it is not enough .  */
4102   require = 2000;
4103
4104   if (GAP_SIZE  < require)
4105     make_gap (require - GAP_SIZE);
4106   move_gap_both (from, from_byte);
4107
4108   if (GPT - BEG < beg_unchanged)
4109     beg_unchanged = GPT - BEG;
4110   if (Z - GPT < end_unchanged)
4111     end_unchanged = Z - GPT;
4112
4113   inserted = inserted_byte = 0;
4114   src = GAP_END_ADDR, dst = GPT_ADDR;
4115
4116   GAP_SIZE += len_byte;
4117   ZV -= len;
4118   Z -= len;
4119   ZV_BYTE -= len_byte;
4120   Z_BYTE -= len_byte;
4121
4122   for (;;)
4123     {
4124       int result;
4125
4126       /* The buffer memory is changed from:
4127          +--------+converted-text+---------+-------original-text------+---+
4128          |<-from->|<--inserted-->|---------|<-----------len---------->|---|
4129                   |<------------------- GAP_SIZE -------------------->|  */
4130       if (encodep)
4131         result = encode_coding (coding, src, dst, len_byte, 0);
4132       else
4133         result = decode_coding (coding, src, dst, len_byte, 0);
4134       /* to:
4135          +--------+-------converted-text--------+--+---original-text--+---+
4136          |<-from->|<--inserted-->|<--produced-->|--|<-(len-consumed)->|---|
4137                   |<------------------- GAP_SIZE -------------------->|  */
4138       if (coding->fake_multibyte)
4139         fake_multibyte = 1;
4140
4141       if (!encodep && !multibyte)
4142         coding->produced_char = coding->produced;
4143       inserted += coding->produced_char;
4144       inserted_byte += coding->produced;
4145       len_byte -= coding->consumed;
4146       src += coding->consumed;
4147       dst += inserted_byte;
4148
4149       if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
4150         {
4151           unsigned char *pend = dst, *p = pend - inserted_byte;
4152
4153           /* Encode LFs back to the original eol format (CR or CRLF).  */
4154           if (coding->eol_type == CODING_EOL_CR)
4155             {
4156               while (p < pend) if (*p++ == '\n') p[-1] = '\r';
4157             }
4158           else
4159             {
4160               int count = 0;
4161
4162               while (p < pend) if (*p++ == '\n') count++;
4163               if (src - dst < count)
4164                 {
4165                   /* We don't have sufficient room for putting LFs
4166                      back to CRLF.  We must record converted and
4167                      not-yet-converted text back to the buffer
4168                      content, enlarge the gap, then record them out of
4169                      the buffer contents again.  */
4170                   int add = len_byte + inserted_byte;
4171
4172                   GAP_SIZE -= add;
4173                   ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
4174                   GPT += inserted_byte; GPT_BYTE += inserted_byte;
4175                   make_gap (count - GAP_SIZE);
4176                   GAP_SIZE += add;
4177                   ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
4178                   GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
4179                   /* Don't forget to update SRC, DST, and PEND.  */
4180                   src = GAP_END_ADDR - len_byte;
4181                   dst = GPT_ADDR + inserted_byte;
4182                   pend = dst;
4183                 }
4184               inserted += count;
4185               inserted_byte += count;
4186               coding->produced += count;
4187               p = dst = pend + count;
4188               while (count)
4189                 {
4190                   *--p = *--pend;
4191                   if (*p == '\n') count--, *--p = '\r';
4192                 }
4193             }
4194
4195           /* Suppress eol-format conversion in the further conversion.  */
4196           coding->eol_type = CODING_EOL_LF;
4197
4198           /* Restore the original symbol.  */
4199           coding->symbol = saved_coding_symbol;
4200
4201           continue;
4202         }
4203       if (len_byte <= 0)
4204         break;
4205       if (result == CODING_FINISH_INSUFFICIENT_SRC)
4206         {
4207           /* The source text ends in invalid codes.  Let's just
4208              make them valid buffer contents, and finish conversion.  */
4209           inserted += len_byte;
4210           inserted_byte += len_byte;
4211           while (len_byte--)
4212             *src++ = *dst++;
4213           fake_multibyte = 1;
4214           break;
4215         }
4216       if (first)
4217         {
4218           /* We have just done the first batch of conversion which was
4219              stoped because of insufficient gap.  Let's reconsider the
4220              required gap size (i.e. SRT - DST) now.
4221
4222              We have converted ORIG bytes (== coding->consumed) into
4223              NEW bytes (coding->produced).  To convert the remaining
4224              LEN bytes, we may need REQUIRE bytes of gap, where:
4225                 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
4226                 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
4227              Here, we are sure that NEW >= ORIG.  */
4228           float ratio = coding->produced - coding->consumed;
4229           ratio /= coding->consumed;
4230           require = len_byte * ratio;
4231           first = 0;
4232         }
4233       if ((src - dst) < (require + 2000))
4234         {
4235           /* See the comment above the previous call of make_gap.  */
4236           int add = len_byte + inserted_byte;
4237
4238           GAP_SIZE -= add;
4239           ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
4240           GPT += inserted_byte; GPT_BYTE += inserted_byte;
4241           make_gap (require + 2000);
4242           GAP_SIZE += add;
4243           ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
4244           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
4245           /* Don't forget to update SRC, DST.  */
4246           src = GAP_END_ADDR - len_byte;
4247           dst = GPT_ADDR + inserted_byte;
4248         }
4249     }
4250   if (src - dst > 0) *dst = 0; /* Put an anchor.  */
4251
4252   if (multibyte
4253       && (fake_multibyte
4254           || !encodep && (to - from) != (to_byte - from_byte)))
4255     inserted = multibyte_chars_in_text (GPT_ADDR, inserted_byte);
4256
4257   /* If we have shrinked the conversion area, adjust it now.  */
4258   if (total_skip > 0)
4259     {
4260       if (tail_skip > 0)
4261         safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
4262       inserted += total_skip; inserted_byte += total_skip;
4263       GAP_SIZE += total_skip;
4264       GPT -= head_skip; GPT_BYTE -= head_skip;
4265       ZV -= total_skip; ZV_BYTE -= total_skip;
4266       Z -= total_skip; Z_BYTE -= total_skip;
4267       from -= head_skip; from_byte -= head_skip;
4268       to += tail_skip; to_byte += tail_skip;
4269     }
4270
4271   adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
4272
4273   if (! encodep && ! NILP (coding->post_read_conversion))
4274     {
4275       Lisp_Object val;
4276       int orig_inserted = inserted, pos = PT;
4277
4278       if (from != pos)
4279         temp_set_point_both (current_buffer, from, from_byte);
4280       val = call1 (coding->post_read_conversion, make_number (inserted));
4281       if (! NILP (val))
4282         {
4283           CHECK_NUMBER (val, 0);
4284           inserted = XFASTINT (val);
4285         }
4286       if (pos >= from + orig_inserted)
4287         temp_set_point (current_buffer, pos + (inserted - orig_inserted));
4288     }
4289
4290   signal_after_change (from, to - from, inserted);
4291
4292   {
4293     coding->consumed = to_byte - from_byte;
4294     coding->consumed_char = to - from;
4295     coding->produced = inserted_byte;
4296     coding->produced_char = inserted;
4297   }
4298
4299   return 0;
4300 }
4301
4302 Lisp_Object
4303 code_convert_string (str, coding, encodep, nocopy)
4304      Lisp_Object str;
4305      struct coding_system *coding;
4306      int encodep, nocopy;
4307 {
4308   int len;
4309   char *buf;
4310   int from = 0, to = XSTRING (str)->size;
4311   int to_byte = STRING_BYTES (XSTRING (str));
4312   struct gcpro gcpro1;
4313   Lisp_Object saved_coding_symbol = Qnil;
4314   int result;
4315
4316   if (encodep && !NILP (coding->pre_write_conversion)
4317       || !encodep && !NILP (coding->post_read_conversion))
4318     {
4319       /* Since we have to call Lisp functions which assume target text
4320          is in a buffer, after setting a temporary buffer, call
4321          code_convert_region.  */
4322       int count = specpdl_ptr - specpdl;
4323       struct buffer *prev = current_buffer;
4324
4325       record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
4326       temp_output_buffer_setup (" *code-converting-work*");
4327       set_buffer_internal (XBUFFER (Vstandard_output));
4328       if (encodep)
4329         insert_from_string (str, 0, 0, to, to_byte, 0);
4330       else
4331         {
4332           /* We must insert the contents of STR as is without
4333              unibyte<->multibyte conversion.  */
4334           current_buffer->enable_multibyte_characters = Qnil;
4335           insert_from_string (str, 0, 0, to_byte, to_byte, 0);
4336           current_buffer->enable_multibyte_characters = Qt;
4337         }
4338       code_convert_region (BEGV, BEGV_BYTE, ZV, ZV_BYTE, coding, encodep, 1);
4339       if (encodep)
4340         /* We must return the buffer contents as unibyte string.  */
4341         current_buffer->enable_multibyte_characters = Qnil;
4342       str = make_buffer_string (BEGV, ZV, 0);
4343       set_buffer_internal (prev);
4344       return unbind_to (count, str);
4345     }
4346
4347   if (! encodep && CODING_REQUIRE_DETECTION (coding))
4348     {
4349       /* See the comments in code_convert_region.  */
4350       if (coding->type == coding_type_undecided)
4351         {
4352           detect_coding (coding, XSTRING (str)->data, to_byte);
4353           if (coding->type == coding_type_undecided)
4354             coding->type = coding_type_emacs_mule;
4355         }
4356       if (coding->eol_type == CODING_EOL_UNDECIDED)
4357         {
4358           saved_coding_symbol = coding->symbol;
4359           detect_eol (coding, XSTRING (str)->data, to_byte);
4360           if (coding->eol_type == CODING_EOL_UNDECIDED)
4361             coding->eol_type = CODING_EOL_LF;
4362           /* We had better recover the original eol format if we
4363              encounter an inconsitent eol format while decoding.  */
4364           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4365         }
4366     }
4367
4368   if (encodep
4369       ? ! CODING_REQUIRE_ENCODING (coding)
4370       : ! CODING_REQUIRE_DECODING (coding))
4371     from = to_byte;
4372   else
4373     {
4374       /* Try to skip the heading and tailing ASCIIs.  */
4375       if (encodep)
4376         shrink_encoding_region (&from, &to_byte, coding, XSTRING (str)->data);
4377       else
4378         shrink_decoding_region (&from, &to_byte, coding, XSTRING (str)->data);
4379     }
4380   if (from == to_byte)
4381     return (nocopy ? str : Fcopy_sequence (str));
4382
4383   if (encodep)
4384     len = encoding_buffer_size (coding, to_byte - from);
4385   else
4386     len = decoding_buffer_size (coding, to_byte - from);
4387   len += from + STRING_BYTES (XSTRING (str)) - to_byte;
4388   GCPRO1 (str);
4389   buf = get_conversion_buffer (len);
4390   UNGCPRO;
4391
4392   if (from > 0)
4393     bcopy (XSTRING (str)->data, buf, from);
4394   result = (encodep
4395             ? encode_coding (coding, XSTRING (str)->data + from,
4396                              buf + from, to_byte - from, len)
4397             : decode_coding (coding, XSTRING (str)->data + from,
4398                              buf + from, to_byte - from, len));
4399   if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
4400     {
4401       /* We simple try to decode the whole string again but without
4402          eol-conversion this time.  */
4403       coding->eol_type = CODING_EOL_LF;
4404       coding->symbol = saved_coding_symbol;
4405       return code_convert_string (str, coding, encodep, nocopy);
4406     }
4407
4408   bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced,
4409          STRING_BYTES (XSTRING (str)) - to_byte);
4410
4411   len = from + STRING_BYTES (XSTRING (str)) - to_byte;
4412   if (encodep)
4413     str = make_unibyte_string (buf, len + coding->produced);
4414   else
4415     str = make_string_from_bytes (buf, len + coding->produced_char,
4416                                   len + coding->produced);
4417   return str;
4418 }
4419
4420 \f
4421 #ifdef emacs
4422 /*** 7. Emacs Lisp library functions ***/
4423
4424 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
4425   "Return t if OBJECT is nil or a coding-system.\n\
4426 See the documentation of `make-coding-system' for information\n\
4427 about coding-system objects.")
4428   (obj)
4429      Lisp_Object obj;
4430 {
4431   if (NILP (obj))
4432     return Qt;
4433   if (!SYMBOLP (obj))
4434     return Qnil;
4435   /* Get coding-spec vector for OBJ.  */
4436   obj = Fget (obj, Qcoding_system);
4437   return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
4438           ? Qt : Qnil);
4439 }
4440
4441 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
4442        Sread_non_nil_coding_system, 1, 1, 0,
4443   "Read a coding system from the minibuffer, prompting with string PROMPT.")
4444   (prompt)
4445      Lisp_Object prompt;
4446 {
4447   Lisp_Object val;
4448   do
4449     {
4450       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
4451                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
4452     }
4453   while (XSTRING (val)->size == 0);
4454   return (Fintern (val, Qnil));
4455 }
4456
4457 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
4458   "Read a coding system from the minibuffer, prompting with string PROMPT.\n\
4459 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.")
4460   (prompt, default_coding_system)
4461      Lisp_Object prompt, default_coding_system;
4462 {
4463   Lisp_Object val;
4464   if (SYMBOLP (default_coding_system))
4465     XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
4466   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
4467                           Qt, Qnil, Qcoding_system_history,
4468                           default_coding_system, Qnil);
4469   return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
4470 }
4471
4472 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
4473        1, 1, 0,
4474   "Check validity of CODING-SYSTEM.\n\
4475 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.\n\
4476 It is valid if it is a symbol with a non-nil `coding-system' property.\n\
4477 The value of property should be a vector of length 5.")
4478   (coding_system)
4479      Lisp_Object coding_system;
4480 {
4481   CHECK_SYMBOL (coding_system, 0);
4482   if (!NILP (Fcoding_system_p (coding_system)))
4483     return coding_system;
4484   while (1)
4485     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
4486 }
4487 \f
4488 Lisp_Object
4489 detect_coding_system (src, src_bytes, highest)
4490      unsigned char *src;
4491      int src_bytes, highest;
4492 {
4493   int coding_mask, eol_type;
4494   Lisp_Object val, tmp;
4495   int dummy;
4496
4497   coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy);
4498   eol_type  = detect_eol_type (src, src_bytes, &dummy);
4499   if (eol_type == CODING_EOL_INCONSISTENT)
4500     eol_type == CODING_EOL_UNDECIDED;
4501
4502   if (!coding_mask)
4503     {
4504       val = Qundecided;
4505       if (eol_type != CODING_EOL_UNDECIDED)
4506         {
4507           Lisp_Object val2;
4508           val2 = Fget (Qundecided, Qeol_type);
4509           if (VECTORP (val2))
4510             val = XVECTOR (val2)->contents[eol_type];
4511         }
4512       return val;
4513     }
4514
4515   /* At first, gather possible coding systems in VAL.  */
4516   val = Qnil;
4517   for (tmp = Vcoding_category_list; !NILP (tmp); tmp = XCONS (tmp)->cdr)
4518     {
4519       int idx
4520         = XFASTINT (Fget (XCONS (tmp)->car, Qcoding_category_index));
4521       if (coding_mask & (1 << idx))
4522         {
4523           val = Fcons (Fsymbol_value (XCONS (tmp)->car), val);
4524           if (highest)
4525             break;
4526         }
4527     }
4528   if (!highest)
4529     val = Fnreverse (val);
4530
4531   /* Then, substitute the elements by subsidiary coding systems.  */
4532   for (tmp = val; !NILP (tmp); tmp = XCONS (tmp)->cdr)
4533     {
4534       if (eol_type != CODING_EOL_UNDECIDED)
4535         {
4536           Lisp_Object eol;
4537           eol = Fget (XCONS (tmp)->car, Qeol_type);
4538           if (VECTORP (eol))
4539             XCONS (tmp)->car = XVECTOR (eol)->contents[eol_type];
4540         }
4541     }
4542   return (highest ? XCONS (val)->car : val);
4543 }
4544
4545 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
4546        2, 3, 0,
4547   "Detect coding system of the text in the region between START and END.\n\
4548 Return a list of possible coding systems ordered by priority.\n\
4549 \n\
4550 If only ASCII characters are found, it returns `undecided'\n\
4551 or its subsidiary coding system according to a detected end-of-line format.\n\
4552 \n\
4553 If optional argument HIGHEST is non-nil, return the coding system of\n\
4554 highest priority.")
4555   (start, end, highest)
4556      Lisp_Object start, end, highest;
4557 {
4558   int from, to;
4559   int from_byte, to_byte;
4560
4561   CHECK_NUMBER_COERCE_MARKER (start, 0);
4562   CHECK_NUMBER_COERCE_MARKER (end, 1);
4563
4564   validate_region (&start, &end);
4565   from = XINT (start), to = XINT (end);
4566   from_byte = CHAR_TO_BYTE (from);
4567   to_byte = CHAR_TO_BYTE (to);
4568
4569   if (from < GPT && to >= GPT)
4570     move_gap_both (to, to_byte);
4571
4572   return detect_coding_system (BYTE_POS_ADDR (from_byte),
4573                                to_byte - from_byte,
4574                                !NILP (highest));
4575 }
4576
4577 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
4578        1, 2, 0,
4579   "Detect coding system of the text in STRING.\n\
4580 Return a list of possible coding systems ordered by priority.\n\
4581 \n\
4582 If only ASCII characters are found, it returns `undecided'\n\
4583 or its subsidiary coding system according to a detected end-of-line format.\n\
4584 \n\
4585 If optional argument HIGHEST is non-nil, return the coding system of\n\
4586 highest priority.")
4587   (string, highest)
4588      Lisp_Object string, highest;
4589 {
4590   CHECK_STRING (string, 0);
4591
4592   return detect_coding_system (XSTRING (string)->data,
4593                                STRING_BYTES (XSTRING (string)),
4594                                !NILP (highest));
4595 }
4596
4597 Lisp_Object
4598 code_convert_region1 (start, end, coding_system, encodep)
4599      Lisp_Object start, end, coding_system;
4600      int encodep;
4601 {
4602   struct coding_system coding;
4603   int from, to, len;
4604
4605   CHECK_NUMBER_COERCE_MARKER (start, 0);
4606   CHECK_NUMBER_COERCE_MARKER (end, 1);
4607   CHECK_SYMBOL (coding_system, 2);
4608
4609   validate_region (&start, &end);
4610   from = XFASTINT (start);
4611   to = XFASTINT (end);
4612
4613   if (NILP (coding_system))
4614     return make_number (to - from);
4615
4616   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
4617     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
4618
4619   coding.mode |= CODING_MODE_LAST_BLOCK;
4620   code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
4621                        &coding, encodep, 1);
4622   return make_number (coding.produced_char);
4623 }
4624
4625 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
4626        3, 3, "r\nzCoding system: ",
4627   "Decode the current region by specified coding system.\n\
4628 When called from a program, takes three arguments:\n\
4629 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
4630 Return length of decoded text.")
4631   (start, end, coding_system)
4632      Lisp_Object start, end, coding_system;
4633 {
4634   return code_convert_region1 (start, end, coding_system, 0);
4635 }
4636
4637 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
4638        3, 3, "r\nzCoding system: ",
4639   "Encode the current region by specified coding system.\n\
4640 When called from a program, takes three arguments:\n\
4641 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
4642 Return length of encoded text.")
4643   (start, end, coding_system)
4644      Lisp_Object start, end, coding_system;
4645 {
4646   return code_convert_region1 (start, end, coding_system, 1);
4647 }
4648
4649 Lisp_Object
4650 code_convert_string1 (string, coding_system, nocopy, encodep)
4651      Lisp_Object string, coding_system, nocopy;
4652      int encodep;
4653 {
4654   struct coding_system coding;
4655
4656   CHECK_STRING (string, 0);
4657   CHECK_SYMBOL (coding_system, 1);
4658
4659   if (NILP (coding_system))
4660     return (NILP (nocopy) ? Fcopy_sequence (string) : string);
4661
4662   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
4663     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
4664
4665   coding.mode |= CODING_MODE_LAST_BLOCK;
4666   return code_convert_string (string, &coding, encodep, !NILP (nocopy));
4667 }
4668
4669 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
4670        2, 3, 0,
4671   "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
4672 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
4673 if the decoding operation is trivial.")
4674   (string, coding_system, nocopy)
4675      Lisp_Object string, coding_system, nocopy;
4676 {
4677   return code_convert_string1(string, coding_system, nocopy, 0);
4678 }
4679
4680 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
4681        2, 3, 0,
4682   "Encode STRING to CODING-SYSTEM, and return the result.\n\
4683 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
4684 if the encoding operation is trivial.")
4685   (string, coding_system, nocopy)
4686      Lisp_Object string, coding_system, nocopy;
4687 {
4688   return code_convert_string1(string, coding_system, nocopy, 1);
4689 }
4690
4691 \f
4692 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
4693   "Decode a JISX0208 character of shift-jis encoding.\n\
4694 CODE is the character code in SJIS.\n\
4695 Return the corresponding character.")
4696   (code)
4697      Lisp_Object code;
4698 {
4699   unsigned char c1, c2, s1, s2;
4700   Lisp_Object val;
4701
4702   CHECK_NUMBER (code, 0);
4703   s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
4704   DECODE_SJIS (s1, s2, c1, c2);
4705   XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset_jisx0208, c1, c2));
4706   return val;
4707 }
4708
4709 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
4710   "Encode a JISX0208 character CHAR to SJIS coding system.\n\
4711 Return the corresponding character code in SJIS.")
4712   (ch)
4713      Lisp_Object ch;
4714 {
4715   int charset, c1, c2, s1, s2;
4716   Lisp_Object val;
4717
4718   CHECK_NUMBER (ch, 0);
4719   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
4720   if (charset == charset_jisx0208)
4721     {
4722       ENCODE_SJIS (c1, c2, s1, s2);
4723       XSETFASTINT (val, (s1 << 8) | s2);
4724     }
4725   else
4726     XSETFASTINT (val, 0);
4727   return val;
4728 }
4729
4730 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
4731   "Decode a Big5 character CODE of BIG5 coding system.\n\
4732 CODE is the character code in BIG5.\n\
4733 Return the corresponding character.")
4734   (code)
4735      Lisp_Object code;
4736 {
4737   int charset;
4738   unsigned char b1, b2, c1, c2;
4739   Lisp_Object val;
4740
4741   CHECK_NUMBER (code, 0);
4742   b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
4743   DECODE_BIG5 (b1, b2, charset, c1, c2);
4744   XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset, c1, c2));
4745   return val;
4746 }
4747
4748 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
4749   "Encode the Big5 character CHAR to BIG5 coding system.\n\
4750 Return the corresponding character code in Big5.")
4751   (ch)
4752      Lisp_Object ch;
4753 {
4754   int charset, c1, c2, b1, b2;
4755   Lisp_Object val;
4756
4757   CHECK_NUMBER (ch, 0);
4758   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
4759   if (charset == charset_big5_1 || charset == charset_big5_2)
4760     {
4761       ENCODE_BIG5 (charset, c1, c2, b1, b2);
4762       XSETFASTINT (val, (b1 << 8) | b2);
4763     }
4764   else
4765     XSETFASTINT (val, 0);
4766   return val;
4767 }
4768 \f
4769 DEFUN ("set-terminal-coding-system-internal",
4770        Fset_terminal_coding_system_internal,
4771        Sset_terminal_coding_system_internal, 1, 1, 0, "")
4772   (coding_system)
4773      Lisp_Object coding_system;
4774 {
4775   CHECK_SYMBOL (coding_system, 0);
4776   setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
4777   /* We had better not send unsafe characters to terminal.  */
4778   terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
4779
4780   return Qnil;
4781 }
4782
4783 DEFUN ("set-safe-terminal-coding-system-internal",
4784        Fset_safe_terminal_coding_system_internal,
4785        Sset_safe_terminal_coding_system_internal, 1, 1, 0, "")
4786   (coding_system)
4787      Lisp_Object coding_system;
4788 {
4789   CHECK_SYMBOL (coding_system, 0);
4790   setup_coding_system (Fcheck_coding_system (coding_system),
4791                        &safe_terminal_coding);
4792   return Qnil;
4793 }
4794
4795 DEFUN ("terminal-coding-system",
4796        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
4797   "Return coding system specified for terminal output.")
4798   ()
4799 {
4800   return terminal_coding.symbol;
4801 }
4802
4803 DEFUN ("set-keyboard-coding-system-internal",
4804        Fset_keyboard_coding_system_internal,
4805        Sset_keyboard_coding_system_internal, 1, 1, 0, "")
4806   (coding_system)
4807      Lisp_Object coding_system;
4808 {
4809   CHECK_SYMBOL (coding_system, 0);
4810   setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
4811   return Qnil;
4812 }
4813
4814 DEFUN ("keyboard-coding-system",
4815        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
4816   "Return coding system specified for decoding keyboard input.")
4817   ()
4818 {
4819   return keyboard_coding.symbol;
4820 }
4821
4822 \f
4823 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
4824        Sfind_operation_coding_system,  1, MANY, 0,
4825   "Choose a coding system for an operation based on the target name.\n\
4826 The value names a pair of coding systems: (DECODING-SYSTEM ENCODING-SYSTEM).\n\
4827 DECODING-SYSTEM is the coding system to use for decoding\n\
4828 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system\n\
4829 for encoding (in case OPERATION does encoding).\n\
4830 \n\
4831 The first argument OPERATION specifies an I/O primitive:\n\
4832   For file I/O, `insert-file-contents' or `write-region'.\n\
4833   For process I/O, `call-process', `call-process-region', or `start-process'.\n\
4834   For network I/O, `open-network-stream'.\n\
4835 \n\
4836 The remaining arguments should be the same arguments that were passed\n\
4837 to the primitive.  Depending on which primitive, one of those arguments\n\
4838 is selected as the TARGET.  For example, if OPERATION does file I/O,\n\
4839 whichever argument specifies the file name is TARGET.\n\
4840 \n\
4841 TARGET has a meaning which depends on OPERATION:\n\
4842   For file I/O, TARGET is a file name.\n\
4843   For process I/O, TARGET is a process name.\n\
4844   For network I/O, TARGET is a service name or a port number\n\
4845 \n\
4846 This function looks up what specified for TARGET in,\n\
4847 `file-coding-system-alist', `process-coding-system-alist',\n\
4848 or `network-coding-system-alist' depending on OPERATION.\n\
4849 They may specify a coding system, a cons of coding systems,\n\
4850 or a function symbol to call.\n\
4851 In the last case, we call the function with one argument,\n\
4852 which is a list of all the arguments given to this function.")
4853   (nargs, args)
4854      int nargs;
4855      Lisp_Object *args;
4856 {
4857   Lisp_Object operation, target_idx, target, val;
4858   register Lisp_Object chain;
4859
4860   if (nargs < 2)
4861     error ("Too few arguments");
4862   operation = args[0];
4863   if (!SYMBOLP (operation)
4864       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
4865     error ("Invalid first arguement");
4866   if (nargs < 1 + XINT (target_idx))
4867     error ("Too few arguments for operation: %s",
4868            XSYMBOL (operation)->name->data);
4869   target = args[XINT (target_idx) + 1];
4870   if (!(STRINGP (target)
4871         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
4872     error ("Invalid %dth argument", XINT (target_idx) + 1);
4873
4874   chain = ((EQ (operation, Qinsert_file_contents)
4875             || EQ (operation, Qwrite_region))
4876            ? Vfile_coding_system_alist
4877            : (EQ (operation, Qopen_network_stream)
4878               ? Vnetwork_coding_system_alist
4879               : Vprocess_coding_system_alist));
4880   if (NILP (chain))
4881     return Qnil;
4882
4883   for (; CONSP (chain); chain = XCONS (chain)->cdr)
4884     {
4885       Lisp_Object elt;
4886       elt = XCONS (chain)->car;
4887
4888       if (CONSP (elt)
4889           && ((STRINGP (target)
4890                && STRINGP (XCONS (elt)->car)
4891                && fast_string_match (XCONS (elt)->car, target) >= 0)
4892               || (INTEGERP (target) && EQ (target, XCONS (elt)->car))))
4893         {
4894           val = XCONS (elt)->cdr;
4895           /* Here, if VAL is both a valid coding system and a valid
4896              function symbol, we return VAL as a coding system.  */
4897           if (CONSP (val))
4898             return val;
4899           if (! SYMBOLP (val))
4900             return Qnil;
4901           if (! NILP (Fcoding_system_p (val)))
4902             return Fcons (val, val);
4903           if (! NILP (Ffboundp (val)))
4904             {
4905               val = call1 (val, Flist (nargs, args));
4906               if (CONSP (val))
4907                 return val;
4908               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
4909                 return Fcons (val, val);
4910             }
4911           return Qnil;
4912         }
4913     }
4914   return Qnil;
4915 }
4916
4917 DEFUN ("update-iso-coding-systems", Fupdate_iso_coding_systems,
4918        Supdate_iso_coding_systems, 0, 0, 0,
4919   "Update internal database for ISO2022 based coding systems.\n\
4920 When values of the following coding categories are changed, you must\n\
4921 call this function:\n\
4922   coding-category-iso-7, coding-category-iso-7-tight,\n\
4923   coding-category-iso-8-1, coding-category-iso-8-2,\n\
4924   coding-category-iso-7-else, coding-category-iso-8-else")
4925   ()
4926 {
4927   int i;
4928
4929   for (i = CODING_CATEGORY_IDX_ISO_7; i <= CODING_CATEGORY_IDX_ISO_8_ELSE;
4930        i++)
4931     {
4932       if (! coding_system_table[i])
4933         coding_system_table[i]
4934           = (struct coding_system *) xmalloc (sizeof (struct coding_system));
4935       setup_coding_system
4936         (XSYMBOL (XVECTOR (Vcoding_category_table)->contents[i])->value,
4937          coding_system_table[i]);
4938     }
4939   return Qnil;
4940 }
4941
4942 #endif /* emacs */
4943
4944 \f
4945 /*** 8. Post-amble ***/
4946
4947 void
4948 init_coding_once ()
4949 {
4950   int i;
4951
4952   /* Emacs' internal format specific initialize routine.  */
4953   for (i = 0; i <= 0x20; i++)
4954     emacs_code_class[i] = EMACS_control_code;
4955   emacs_code_class[0x0A] = EMACS_linefeed_code;
4956   emacs_code_class[0x0D] = EMACS_carriage_return_code;
4957   for (i = 0x21 ; i < 0x7F; i++)
4958     emacs_code_class[i] = EMACS_ascii_code;
4959   emacs_code_class[0x7F] = EMACS_control_code;
4960   emacs_code_class[0x80] = EMACS_leading_code_composition;
4961   for (i = 0x81; i < 0xFF; i++)
4962     emacs_code_class[i] = EMACS_invalid_code;
4963   emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
4964   emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
4965   emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
4966   emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
4967
4968   /* ISO2022 specific initialize routine.  */
4969   for (i = 0; i < 0x20; i++)
4970     iso_code_class[i] = ISO_control_code;
4971   for (i = 0x21; i < 0x7F; i++)
4972     iso_code_class[i] = ISO_graphic_plane_0;
4973   for (i = 0x80; i < 0xA0; i++)
4974     iso_code_class[i] = ISO_control_code;
4975   for (i = 0xA1; i < 0xFF; i++)
4976     iso_code_class[i] = ISO_graphic_plane_1;
4977   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
4978   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
4979   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
4980   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
4981   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
4982   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
4983   iso_code_class[ISO_CODE_ESC] = ISO_escape;
4984   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
4985   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
4986   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
4987
4988   conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE;
4989   conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE);
4990
4991   setup_coding_system (Qnil, &keyboard_coding);
4992   setup_coding_system (Qnil, &terminal_coding);
4993   setup_coding_system (Qnil, &safe_terminal_coding);
4994
4995   bzero (coding_system_table, sizeof coding_system_table);
4996
4997 #if defined (MSDOS) || defined (WINDOWSNT)
4998   system_eol_type = CODING_EOL_CRLF;
4999 #else
5000   system_eol_type = CODING_EOL_LF;
5001 #endif
5002 }
5003
5004 #ifdef emacs
5005
5006 void
5007 syms_of_coding ()
5008 {
5009   Qtarget_idx = intern ("target-idx");
5010   staticpro (&Qtarget_idx);
5011
5012   Qcoding_system_history = intern ("coding-system-history");
5013   staticpro (&Qcoding_system_history);
5014   Fset (Qcoding_system_history, Qnil);
5015
5016   /* Target FILENAME is the first argument.  */
5017   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
5018   /* Target FILENAME is the third argument.  */
5019   Fput (Qwrite_region, Qtarget_idx, make_number (2));
5020
5021   Qcall_process = intern ("call-process");
5022   staticpro (&Qcall_process);
5023   /* Target PROGRAM is the first argument.  */
5024   Fput (Qcall_process, Qtarget_idx, make_number (0));
5025
5026   Qcall_process_region = intern ("call-process-region");
5027   staticpro (&Qcall_process_region);
5028   /* Target PROGRAM is the third argument.  */
5029   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
5030
5031   Qstart_process = intern ("start-process");
5032   staticpro (&Qstart_process);
5033   /* Target PROGRAM is the third argument.  */
5034   Fput (Qstart_process, Qtarget_idx, make_number (2));
5035
5036   Qopen_network_stream = intern ("open-network-stream");
5037   staticpro (&Qopen_network_stream);
5038   /* Target SERVICE is the fourth argument.  */
5039   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
5040
5041   Qcoding_system = intern ("coding-system");
5042   staticpro (&Qcoding_system);
5043
5044   Qeol_type = intern ("eol-type");
5045   staticpro (&Qeol_type);
5046
5047   Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
5048   staticpro (&Qbuffer_file_coding_system);
5049
5050   Qpost_read_conversion = intern ("post-read-conversion");
5051   staticpro (&Qpost_read_conversion);
5052
5053   Qpre_write_conversion = intern ("pre-write-conversion");
5054   staticpro (&Qpre_write_conversion);
5055
5056   Qno_conversion = intern ("no-conversion");
5057   staticpro (&Qno_conversion);
5058
5059   Qundecided = intern ("undecided");
5060   staticpro (&Qundecided);
5061
5062   Qcoding_system_p = intern ("coding-system-p");
5063   staticpro (&Qcoding_system_p);
5064
5065   Qcoding_system_error = intern ("coding-system-error");
5066   staticpro (&Qcoding_system_error);
5067
5068   Fput (Qcoding_system_error, Qerror_conditions,
5069         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
5070   Fput (Qcoding_system_error, Qerror_message,
5071         build_string ("Invalid coding system"));
5072
5073   Qcoding_category = intern ("coding-category");
5074   staticpro (&Qcoding_category);
5075   Qcoding_category_index = intern ("coding-category-index");
5076   staticpro (&Qcoding_category_index);
5077
5078   Vcoding_category_table
5079     = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
5080   staticpro (&Vcoding_category_table);
5081   {
5082     int i;
5083     for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
5084       {
5085         XVECTOR (Vcoding_category_table)->contents[i]
5086           = intern (coding_category_name[i]);
5087         Fput (XVECTOR (Vcoding_category_table)->contents[i],
5088               Qcoding_category_index, make_number (i));
5089       }
5090   }
5091
5092   Qcharacter_unification_table = intern ("character-unification-table");
5093   staticpro (&Qcharacter_unification_table);
5094   Fput (Qcharacter_unification_table, Qchar_table_extra_slots,
5095         make_number (0));
5096
5097   Qcharacter_unification_table_for_decode
5098     = intern ("character-unification-table-for-decode");
5099   staticpro (&Qcharacter_unification_table_for_decode);
5100
5101   Qcharacter_unification_table_for_encode
5102     = intern ("character-unification-table-for-encode");
5103   staticpro (&Qcharacter_unification_table_for_encode);
5104
5105   Qsafe_charsets = intern ("safe-charsets");
5106   staticpro (&Qsafe_charsets);
5107
5108   Qemacs_mule = intern ("emacs-mule");
5109   staticpro (&Qemacs_mule);
5110
5111   Qraw_text = intern ("raw-text");
5112   staticpro (&Qraw_text);
5113
5114   defsubr (&Scoding_system_p);
5115   defsubr (&Sread_coding_system);
5116   defsubr (&Sread_non_nil_coding_system);
5117   defsubr (&Scheck_coding_system);
5118   defsubr (&Sdetect_coding_region);
5119   defsubr (&Sdetect_coding_string);
5120   defsubr (&Sdecode_coding_region);
5121   defsubr (&Sencode_coding_region);
5122   defsubr (&Sdecode_coding_string);
5123   defsubr (&Sencode_coding_string);
5124   defsubr (&Sdecode_sjis_char);
5125   defsubr (&Sencode_sjis_char);
5126   defsubr (&Sdecode_big5_char);
5127   defsubr (&Sencode_big5_char);
5128   defsubr (&Sset_terminal_coding_system_internal);
5129   defsubr (&Sset_safe_terminal_coding_system_internal);
5130   defsubr (&Sterminal_coding_system);
5131   defsubr (&Sset_keyboard_coding_system_internal);
5132   defsubr (&Skeyboard_coding_system);
5133   defsubr (&Sfind_operation_coding_system);
5134   defsubr (&Supdate_iso_coding_systems);
5135
5136   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
5137     "List of coding systems.\n\
5138 \n\
5139 Do not alter the value of this variable manually.  This variable should be\n\
5140 updated by the functions `make-coding-system' and\n\
5141 `define-coding-system-alias'.");
5142   Vcoding_system_list = Qnil;
5143
5144   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
5145     "Alist of coding system names.\n\
5146 Each element is one element list of coding system name.\n\
5147 This variable is given to `completing-read' as TABLE argument.\n\
5148 \n\
5149 Do not alter the value of this variable manually.  This variable should be\n\
5150 updated by the functions `make-coding-system' and\n\
5151 `define-coding-system-alias'.");
5152   Vcoding_system_alist = Qnil;
5153
5154   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
5155     "List of coding-categories (symbols) ordered by priority.");
5156   {
5157     int i;
5158
5159     Vcoding_category_list = Qnil;
5160     for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
5161       Vcoding_category_list
5162         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
5163                  Vcoding_category_list);
5164   }
5165
5166   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
5167     "Specify the coding system for read operations.\n\
5168 It is useful to bind this variable with `let', but do not set it globally.\n\
5169 If the value is a coding system, it is used for decoding on read operation.\n\
5170 If not, an appropriate element is used from one of the coding system alists:\n\
5171 There are three such tables, `file-coding-system-alist',\n\
5172 `process-coding-system-alist', and `network-coding-system-alist'.");
5173   Vcoding_system_for_read = Qnil;
5174
5175   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
5176     "Specify the coding system for write operations.\n\
5177 It is useful to bind this variable with `let', but do not set it globally.\n\
5178 If the value is a coding system, it is used for encoding on write operation.\n\
5179 If not, an appropriate element is used from one of the coding system alists:\n\
5180 There are three such tables, `file-coding-system-alist',\n\
5181 `process-coding-system-alist', and `network-coding-system-alist'.");
5182   Vcoding_system_for_write = Qnil;
5183
5184   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
5185     "Coding system used in the latest file or process I/O.");
5186   Vlast_coding_system_used = Qnil;
5187
5188   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
5189     "*Non-nil inhibit code conversion of end-of-line format in any cases.");
5190   inhibit_eol_conversion = 0;
5191
5192   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
5193     "Alist to decide a coding system to use for a file I/O operation.\n\
5194 The format is ((PATTERN . VAL) ...),\n\
5195 where PATTERN is a regular expression matching a file name,\n\
5196 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5197 If VAL is a coding system, it is used for both decoding and encoding\n\
5198 the file contents.\n\
5199 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5200 and the cdr part is used for encoding.\n\
5201 If VAL is a function symbol, the function must return a coding system\n\
5202 or a cons of coding systems which are used as above.\n\
5203 \n\
5204 See also the function `find-operation-coding-system'.");
5205   Vfile_coding_system_alist = Qnil;
5206
5207   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
5208     "Alist to decide a coding system to use for a process I/O operation.\n\
5209 The format is ((PATTERN . VAL) ...),\n\
5210 where PATTERN is a regular expression matching a program name,\n\
5211 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5212 If VAL is a coding system, it is used for both decoding what received\n\
5213 from the program and encoding what sent to the program.\n\
5214 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5215 and the cdr part is used for encoding.\n\
5216 If VAL is a function symbol, the function must return a coding system\n\
5217 or a cons of coding systems which are used as above.\n\
5218 \n\
5219 See also the function `find-operation-coding-system'.");
5220   Vprocess_coding_system_alist = Qnil;
5221
5222   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
5223     "Alist to decide a coding system to use for a network I/O operation.\n\
5224 The format is ((PATTERN . VAL) ...),\n\
5225 where PATTERN is a regular expression matching a network service name\n\
5226 or is a port number to connect to,\n\
5227 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5228 If VAL is a coding system, it is used for both decoding what received\n\
5229 from the network stream and encoding what sent to the network stream.\n\
5230 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5231 and the cdr part is used for encoding.\n\
5232 If VAL is a function symbol, the function must return a coding system\n\
5233 or a cons of coding systems which are used as above.\n\
5234 \n\
5235 See also the function `find-operation-coding-system'.");
5236   Vnetwork_coding_system_alist = Qnil;
5237
5238   DEFVAR_INT ("eol-mnemonic-unix", &eol_mnemonic_unix,
5239     "Mnemonic character indicating UNIX-like end-of-line format (i.e. LF) .");
5240   eol_mnemonic_unix = ':';
5241
5242   DEFVAR_INT ("eol-mnemonic-dos", &eol_mnemonic_dos,
5243     "Mnemonic character indicating DOS-like end-of-line format (i.e. CRLF).");
5244   eol_mnemonic_dos = '\\';
5245
5246   DEFVAR_INT ("eol-mnemonic-mac", &eol_mnemonic_mac,
5247     "Mnemonic character indicating MAC-like end-of-line format (i.e. CR).");
5248   eol_mnemonic_mac = '/';
5249
5250   DEFVAR_INT ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
5251     "Mnemonic character indicating end-of-line format is not yet decided.");
5252   eol_mnemonic_undecided = ':';
5253
5254   DEFVAR_LISP ("enable-character-unification", &Venable_character_unification,
5255     "Non-nil means ISO 2022 encoder/decoder do character unification.");
5256   Venable_character_unification = Qt;
5257
5258   DEFVAR_LISP ("standard-character-unification-table-for-decode",
5259     &Vstandard_character_unification_table_for_decode,
5260     "Table for unifying characters when reading.");
5261   Vstandard_character_unification_table_for_decode = Qnil;
5262
5263   DEFVAR_LISP ("standard-character-unification-table-for-encode",
5264     &Vstandard_character_unification_table_for_encode,
5265     "Table for unifying characters when writing.");
5266   Vstandard_character_unification_table_for_encode = Qnil;
5267
5268   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
5269     "Alist of charsets vs revision numbers.\n\
5270 While encoding, if a charset (car part of an element) is found,\n\
5271 designate it with the escape sequence identifing revision (cdr part of the element).");
5272   Vcharset_revision_alist = Qnil;
5273
5274   DEFVAR_LISP ("default-process-coding-system",
5275                &Vdefault_process_coding_system,
5276     "Cons of coding systems used for process I/O by default.\n\
5277 The car part is used for decoding a process output,\n\
5278 the cdr part is used for encoding a text to be sent to a process.");
5279   Vdefault_process_coding_system = Qnil;
5280
5281   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
5282     "Table of extra Latin codes in the range 128..159 (inclusive).\n\
5283 This is a vector of length 256.\n\
5284 If Nth element is non-nil, the existence of code N in a file\n\
5285 \(or output of subprocess) doesn't prevent it to be detected as\n\
5286 a coding system of ISO 2022 variant which has a flag\n\
5287 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file\n\
5288 or reading output of a subprocess.\n\
5289 Only 128th through 159th elements has a meaning.");
5290   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
5291
5292   DEFVAR_LISP ("select-safe-coding-system-function",
5293                &Vselect_safe_coding_system_function,
5294     "Function to call to select safe coding system for encoding a text.\n\
5295 \n\
5296 If set, this function is called to force a user to select a proper\n\
5297 coding system which can encode the text in the case that a default\n\
5298 coding system used in each operation can't encode the text.\n\
5299 \n\
5300 The default value is `select-safe-codign-system' (which see).");
5301   Vselect_safe_coding_system_function = Qnil;
5302
5303 }
5304
5305 #endif /* emacs */