src/coding.c

   1 /* Coding system handler (conversion, detection, and etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /*** TABLE OF CONTENTS ***
  23
  24   1. Preamble
  25   2. Emacs' internal format (emacs-mule) handlers
  26   3. ISO2022 handlers
  27   4. Shift-JIS and BIG5 handlers
  28   5. CCL handlers
  29   6. End-of-line handlers
  30   7. C library functions
  31   8. Emacs Lisp library functions
  32   9. Post-amble
  33
  34 */
  35
  36 /*** GENERAL NOTE on CODING SYSTEM ***
  37
  38   Coding system is an encoding mechanism of one or more character
  39   sets.  Here's a list of coding systems which Emacs can handle.  When
  40   we say "decode", it means converting some other coding system to
  41   Emacs' internal format (emacs-internal), and when we say "encode",
  42   it means converting the coding system emacs-mule to some other
  43   coding system.
  44
  45   0. Emacs' internal format (emacs-mule)
  46
  47   Emacs itself holds a multi-lingual character in a buffer and a string
  48   in a special format.  Details are described in section 2.
  49
  50   1. ISO2022
  51
  52   The most famous coding system for multiple character sets.  X's
  53   Compound Text, various EUCs (Extended Unix Code), and coding
  54   systems used in Internet communication such as ISO-2022-JP are
  55   all variants of ISO2022.  Details are described in section 3.
  56
  57   2. SJIS (or Shift-JIS or MS-Kanji-Code)
  58
  59   A coding system to encode character sets: ASCII, JISX0201, and
  60   JISX0208.  Widely used for PC's in Japan.  Details are described in
  61   section 4.
  62
  63   3. BIG5
  64
  65   A coding system to encode character sets: ASCII and Big5.  Widely
  66   used by Chinese (mainly in Taiwan and Hong Kong).  Details are
  67   described in section 4.  In this file, when we write "BIG5"
  68   (all uppercase), we mean the coding system, and when we write
  69   "Big5" (capitalized), we mean the character set.
  70
  71   4. Raw text
  72
  73   A coding system for a text containing random 8-bit code.  Emacs does
  74   no code conversion on such a text except for end-of-line format.
  75
  76   5. Other
  77
  78   If a user wants to read/write a text encoded in a coding system not
  79   listed above, he can supply a decoder and an encoder for it in CCL
  80   (Code Conversion Language) programs.  Emacs executes the CCL program
  81   while reading/writing.
  82
  83   Emacs represents a coding system by a Lisp symbol that has a property
  84   `coding-system'.  But, before actually using the coding system, the
  85   information about it is set in a structure of type `struct
  86   coding_system' for rapid processing.  See section 6 for more details.
  87
  88 */
  89
  90 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
  91
  92   How end-of-line of a text is encoded depends on a system.  For
  93   instance, Unix's format is just one byte of `line-feed' code,
  94   whereas DOS's format is two-byte sequence of `carriage-return' and
  95   `line-feed' codes.  MacOS's format is usually one byte of
  96   `carriage-return'.
  97
  98   Since text characters encoding and end-of-line encoding are
  99   independent, any coding system described above can take
 100   any format of end-of-line.  So, Emacs has information of format of
 101   end-of-line in each coding-system.  See section 6 for more details.
 102
 103 */
 104
 105 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 106
 107   These functions check if a text between SRC and SRC_END is encoded
 108   in the coding system category XXX.  Each returns an integer value in
 109   which appropriate flag bits for the category XXX is set.  The flag
 110   bits are defined in macros CODING_CATEGORY_MASK_XXX.  Below is the
 111   template of these functions.  */
 112 #if 0
 113 int
 114 detect_coding_emacs_mule (src, src_end)
 115      unsigned char *src, *src_end;
 116 {
 117   ...
 118 }
 119 #endif
 120
 121 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 122
 123   These functions decode SRC_BYTES length text at SOURCE encoded in
 124   CODING to Emacs' internal format (emacs-mule).  The resulting text
 125   goes to a place pointed to by DESTINATION, the length of which
 126   should not exceed DST_BYTES.  These functions set the information of
 127   original and decoded texts in the members produced, produced_char,
 128   consumed, and consumed_char of the structure *CODING.
 129
 130   The return value is an integer (CODING_FINISH_XXX) indicating how
 131   the decoding finished.
 132
 133   DST_BYTES zero means that source area and destination area are
 134   overlapped, which means that we can produce a decoded text until it
 135   reaches at the head of not-yet-decoded source text.
 136
 137   Below is a template of these functions.  */
 138 #if 0
 139 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 140      struct coding_system *coding;
 141      unsigned char *source, *destination;
 142      int src_bytes, dst_bytes;
 143 {
 144   ...
 145 }
 146 #endif
 147
 148 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 149
 150   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 151   internal format (emacs-mule) to CODING.  The resulting text goes to
 152   a place pointed to by DESTINATION, the length of which should not
 153   exceed DST_BYTES.  These functions set the information of
 154   original and encoded texts in the members produced, produced_char,
 155   consumed, and consumed_char of the structure *CODING.
 156
 157   The return value is an integer (CODING_FINISH_XXX) indicating how
 158   the encoding finished.
 159
 160   DST_BYTES zero means that source area and destination area are
 161   overlapped, which means that we can produce a decoded text until it
 162   reaches at the head of not-yet-decoded source text.
 163
 164   Below is a template of these functions.  */
 165 #if 0
 166 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 167      struct coding_system *coding;
 168      unsigned char *source, *destination;
 169      int src_bytes, dst_bytes;
 170 {
 171   ...
 172 }
 173 #endif
 174
 175 /*** COMMONLY USED MACROS ***/
 176
 177 /* The following three macros ONE_MORE_BYTE, TWO_MORE_BYTES, and
 178    THREE_MORE_BYTES safely get one, two, and three bytes from the
 179    source text respectively.  If there are not enough bytes in the
 180    source, they jump to `label_end_of_loop'.  The caller should set
 181    variables `src' and `src_end' to appropriate areas in advance.  */
 182
 183 #define ONE_MORE_BYTE(c1)       \
 184   do {                          \
 185     if (src < src_end)          \
 186       c1 = *src++;              \
 187     else                        \
 188       goto label_end_of_loop;   \
 189   } while (0)
 190
 191 #define TWO_MORE_BYTES(c1, c2)  \
 192   do {                          \
 193     if (src + 1 < src_end)      \
 194       c1 = *src++, c2 = *src++; \
 195     else                        \
 196       goto label_end_of_loop;   \
 197   } while (0)
 198
 199 #define THREE_MORE_BYTES(c1, c2, c3)            \
 200   do {                                          \
 201     if (src + 2 < src_end)                      \
 202       c1 = *src++, c2 = *src++, c3 = *src++;    \
 203     else                                        \
 204       goto label_end_of_loop;                   \
 205   } while (0)
 206
 207 /* The following three macros DECODE_CHARACTER_ASCII,
 208    DECODE_CHARACTER_DIMENSION1, and DECODE_CHARACTER_DIMENSION2 put
 209    the multi-byte form of a character of each class at the place
 210    pointed by `dst'.  The caller should set the variable `dst' to
 211    point to an appropriate area and the variable `coding' to point to
 212    the coding-system of the currently decoding text in advance.  */
 213
 214 /* Decode one ASCII character C.  */
 215
 216 #define DECODE_CHARACTER_ASCII(c)               \
 217   do {                                          \
 218     if (COMPOSING_P (coding->composing))        \
 219       {                                         \
 220         *dst++ = 0xA0, *dst++ = (c) | 0x80;     \
 221         coding->composed_chars++;               \
 222         if (((c) | 0x80) < 0xA0)                \
 223           coding->fake_multibyte = 1;           \
 224       }                                         \
 225     else                                        \
 226       {                                         \
 227         /* If ASCII charset is invoked to GR,   \
 228            we must reset MSB now.  */           \
 229         *dst++ = (c) & 0x7F;                    \
 230         coding->produced_char++;                \
 231       }                                         \
 232   } while (0)
 233
 234 /* Decode one DIMENSION1 character whose charset is CHARSET and whose
 235    position-code is C.  */
 236
 237 #define DECODE_CHARACTER_DIMENSION1(charset, c)                         \
 238   do {                                                                  \
 239     unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset);   \
 240     if (COMPOSING_P (coding->composing))                                \
 241       {                                                                 \
 242         *dst++ = leading_code + 0x20;                                   \
 243         coding->composed_chars++;                                       \
 244       }                                                                 \
 245     else                                                                \
 246       {                                                                 \
 247         *dst++ = leading_code;                                          \
 248         coding->produced_char++;                                        \
 249       }                                                                 \
 250     if (leading_code = CHARSET_LEADING_CODE_EXT (charset))              \
 251       *dst++ = leading_code;                                            \
 252     *dst++ = (c) | 0x80;                                                \
 253     if (((c) | 0x80)  < 0xA0)                                           \
 254       coding->fake_multibyte = 1;                                       \
 255   } while (0)
 256
 257 /* Decode one DIMENSION2 character whose charset is CHARSET and whose
 258    position-codes are C1 and C2.  */
 259
 260 #define DECODE_CHARACTER_DIMENSION2(charset, c1, c2)    \
 261   do {                                                  \
 262     DECODE_CHARACTER_DIMENSION1 (charset, c1);          \
 263     *dst++ = (c2) | 0x80;                               \
 264     if (((c2) | 0x80) < 0xA0)                           \
 265       coding->fake_multibyte = 1;                       \
 266   } while (0)
 267
 268 \f
 269 /*** 1. Preamble ***/
 270
 271 #ifdef emacs
 272 #include <config.h>
 273 #endif
 274
 275 #include <stdio.h>
 276
 277 #ifdef emacs
 278
 279 #include "lisp.h"
 280 #include "buffer.h"
 281 #include "charset.h"
 282 #include "ccl.h"
 283 #include "coding.h"
 284 #include "window.h"
 285
 286 #else  /* not emacs */
 287
 288 #include "mulelib.h"
 289
 290 #endif /* not emacs */
 291
 292 Lisp_Object Qcoding_system, Qeol_type;
 293 Lisp_Object Qbuffer_file_coding_system;
 294 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 295 Lisp_Object Qno_conversion, Qundecided;
 296 Lisp_Object Qcoding_system_history;
 297 Lisp_Object Qsafe_charsets;
 298 Lisp_Object Qvalid_codes;
 299
 300 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 301 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 302 Lisp_Object Qstart_process, Qopen_network_stream;
 303 Lisp_Object Qtarget_idx;
 304
 305 Lisp_Object Vselect_safe_coding_system_function;
 306
 307 /* Mnemonic string for each format of end-of-line.  */
 308 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 309 /* Mnemonic string to indicate format of end-of-line is not yet
 310    decided.  */
 311 Lisp_Object eol_mnemonic_undecided;
 312
 313 /* Format of end-of-line decided by system.  This is CODING_EOL_LF on
 314    Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.  */
 315 int system_eol_type;
 316
 317 #ifdef emacs
 318
 319 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 320
 321 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 322
 323 /* Coding system emacs-mule and raw-text are for converting only
 324    end-of-line format.  */
 325 Lisp_Object Qemacs_mule, Qraw_text;
 326
 327 /* Coding-systems are handed between Emacs Lisp programs and C internal
 328    routines by the following three variables.  */
 329 /* Coding-system for reading files and receiving data from process.  */
 330 Lisp_Object Vcoding_system_for_read;
 331 /* Coding-system for writing files and sending data to process.  */
 332 Lisp_Object Vcoding_system_for_write;
 333 /* Coding-system actually used in the latest I/O.  */
 334 Lisp_Object Vlast_coding_system_used;
 335
 336 /* A vector of length 256 which contains information about special
 337    Latin codes (especially for dealing with Microsoft codes).  */
 338 Lisp_Object Vlatin_extra_code_table;
 339
 340 /* Flag to inhibit code conversion of end-of-line format.  */
 341 int inhibit_eol_conversion;
 342
 343 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 344 int inherit_process_coding_system;
 345
 346 /* Coding system to be used to encode text for terminal display.  */
 347 struct coding_system terminal_coding;
 348
 349 /* Coding system to be used to encode text for terminal display when
 350    terminal coding system is nil.  */
 351 struct coding_system safe_terminal_coding;
 352
 353 /* Coding system of what is sent from terminal keyboard.  */
 354 struct coding_system keyboard_coding;
 355
 356 /* Default coding system to be used to write a file.  */
 357 struct coding_system default_buffer_file_coding;
 358
 359 Lisp_Object Vfile_coding_system_alist;
 360 Lisp_Object Vprocess_coding_system_alist;
 361 Lisp_Object Vnetwork_coding_system_alist;
 362
 363 Lisp_Object Vlocale_coding_system;
 364
 365 #endif /* emacs */
 366
 367 Lisp_Object Qcoding_category, Qcoding_category_index;
 368
 369 /* List of symbols `coding-category-xxx' ordered by priority.  */
 370 Lisp_Object Vcoding_category_list;
 371
 372 /* Table of coding categories (Lisp symbols).  */
 373 Lisp_Object Vcoding_category_table;
 374
 375 /* Table of names of symbol for each coding-category.  */
 376 char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
 377   "coding-category-emacs-mule",
 378   "coding-category-sjis",
 379   "coding-category-iso-7",
 380   "coding-category-iso-7-tight",
 381   "coding-category-iso-8-1",
 382   "coding-category-iso-8-2",
 383   "coding-category-iso-7-else",
 384   "coding-category-iso-8-else",
 385   "coding-category-ccl",
 386   "coding-category-big5",
 387   "coding-category-raw-text",
 388   "coding-category-binary"
 389 };
 390
 391 /* Table of pointers to coding systems corresponding to each coding
 392    categories.  */
 393 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
 394
 395 /* Table of coding category masks.  Nth element is a mask for a coding
 396    cateogry of which priority is Nth.  */
 397 static
 398 int coding_priorities[CODING_CATEGORY_IDX_MAX];
 399
 400 /* Flag to tell if we look up translation table on character code
 401    conversion.  */
 402 Lisp_Object Venable_character_translation;
 403 /* Standard translation table to look up on decoding (reading).  */
 404 Lisp_Object Vstandard_translation_table_for_decode;
 405 /* Standard translation table to look up on encoding (writing).  */
 406 Lisp_Object Vstandard_translation_table_for_encode;
 407
 408 Lisp_Object Qtranslation_table;
 409 Lisp_Object Qtranslation_table_id;
 410 Lisp_Object Qtranslation_table_for_decode;
 411 Lisp_Object Qtranslation_table_for_encode;
 412
 413 /* Alist of charsets vs revision number.  */
 414 Lisp_Object Vcharset_revision_alist;
 415
 416 /* Default coding systems used for process I/O.  */
 417 Lisp_Object Vdefault_process_coding_system;
 418
 419 /* Global flag to tell that we can't call post-read-conversion and
 420    pre-write-conversion functions.  Usually the value is zero, but it
 421    is set to 1 temporarily while such functions are running.  This is
 422    to avoid infinite recursive call.  */
 423 static int inhibit_pre_post_conversion;
 424
 425 \f
 426 /*** 2. Emacs internal format (emacs-mule) handlers ***/
 427
 428 /* Emacs' internal format for encoding multiple character sets is a
 429    kind of multi-byte encoding, i.e. characters are encoded by
 430    variable-length sequences of one-byte codes.  ASCII characters
 431    and control characters (e.g. `tab', `newline') are represented by
 432    one-byte sequences which are their ASCII codes, in the range 0x00
 433    through 0x7F.  The other characters are represented by a sequence
 434    of `base leading-code', optional `extended leading-code', and one
 435    or two `position-code's.  The length of the sequence is determined
 436    by the base leading-code.  Leading-code takes the range 0x80
 437    through 0x9F, whereas extended leading-code and position-code take
 438    the range 0xA0 through 0xFF.  See `charset.h' for more details
 439    about leading-code and position-code.
 440
 441    There's one exception to this rule.  Special leading-code
 442    `leading-code-composition' denotes that the following several
 443    characters should be composed into one character.  Leading-codes of
 444    components (except for ASCII) are added 0x20.  An ASCII character
 445    component is represented by a 2-byte sequence of `0xA0' and
 446    `ASCII-code + 0x80'.  See also the comments in `charset.h' for the
 447    details of composite character.  Hence, we can summarize the code
 448    range as follows:
 449
 450    --- CODE RANGE of Emacs' internal format ---
 451    (character set)      (range)
 452    ASCII                0x00 .. 0x7F
 453    ELSE (1st byte)      0x80 .. 0x9F
 454         (rest bytes)    0xA0 .. 0xFF
 455    ---------------------------------------------
 456
 457   */
 458
 459 enum emacs_code_class_type emacs_code_class[256];
 460
 461 /* Go to the next statement only if *SRC is accessible and the code is
 462    greater than 0xA0.  */
 463 #define CHECK_CODE_RANGE_A0_FF  \
 464   do {                          \
 465     if (src >= src_end)         \
 466       goto label_end_of_switch; \
 467     else if (*src++ < 0xA0)     \
 468       return 0;                 \
 469   } while (0)
 470
 471 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 472    Check if a text is encoded in Emacs' internal format.  If it is,
 473    return CODING_CATEGORY_MASK_EMACS_MULE, else return 0.  */
 474
 475 int
 476 detect_coding_emacs_mule (src, src_end)
 477      unsigned char *src, *src_end;
 478 {
 479   unsigned char c;
 480   int composing = 0;
 481
 482   while (src < src_end)
 483     {
 484       c = *src++;
 485
 486       if (composing)
 487         {
 488           if (c < 0xA0)
 489             composing = 0;
 490           else
 491             c -= 0x20;
 492         }
 493
 494       switch (emacs_code_class[c])
 495         {
 496         case EMACS_ascii_code:
 497         case EMACS_linefeed_code:
 498           break;
 499
 500         case EMACS_control_code:
 501           if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
 502             return 0;
 503           break;
 504
 505         case EMACS_invalid_code:
 506           return 0;
 507
 508         case EMACS_leading_code_composition: /* c == 0x80 */
 509           if (composing)
 510             CHECK_CODE_RANGE_A0_FF;
 511           else
 512             composing = 1;
 513           break;
 514
 515         case EMACS_leading_code_4:
 516           CHECK_CODE_RANGE_A0_FF;
 517           /* fall down to check it two more times ...  */
 518
 519         case EMACS_leading_code_3:
 520           CHECK_CODE_RANGE_A0_FF;
 521           /* fall down to check it one more time ...  */
 522
 523         case EMACS_leading_code_2:
 524           CHECK_CODE_RANGE_A0_FF;
 525           break;
 526
 527         default:
 528         label_end_of_switch:
 529           break;
 530         }
 531     }
 532   return CODING_CATEGORY_MASK_EMACS_MULE;
 533 }
 534
 535 \f
 536 /*** 3. ISO2022 handlers ***/
 537
 538 /* The following note describes the coding system ISO2022 briefly.
 539    Since the intention of this note is to help understand the
 540    functions in this file, some parts are NOT ACCURATE or OVERLY
 541    SIMPLIFIED.  For thorough understanding, please refer to the
 542    original document of ISO2022.
 543
 544    ISO2022 provides many mechanisms to encode several character sets
 545    in 7-bit and 8-bit environments.  For 7-bite environments, all text
 546    is encoded using bytes less than 128.  This may make the encoded
 547    text a little bit longer, but the text passes more easily through
 548    several gateways, some of which strip off MSB (Most Signigant Bit).
 549
 550    There are two kinds of character sets: control character set and
 551    graphic character set.  The former contains control characters such
 552    as `newline' and `escape' to provide control functions (control
 553    functions are also provided by escape sequences).  The latter
 554    contains graphic characters such as 'A' and '-'.  Emacs recognizes
 555    two control character sets and many graphic character sets.
 556
 557    Graphic character sets are classified into one of the following
 558    four classes, according to the number of bytes (DIMENSION) and
 559    number of characters in one dimension (CHARS) of the set:
 560    - DIMENSION1_CHARS94
 561    - DIMENSION1_CHARS96
 562    - DIMENSION2_CHARS94
 563    - DIMENSION2_CHARS96
 564
 565    In addition, each character set is assigned an identification tag,
 566    unique for each set, called "final character" (denoted as <F>
 567    hereafter).  The <F> of each character set is decided by ECMA(*)
 568    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
 569    (0x30..0x3F are for private use only).
 570
 571    Note (*): ECMA = European Computer Manufacturers Association
 572
 573    Here are examples of graphic character set [NAME(<F>)]:
 574         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
 575         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
 576         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
 577         o DIMENSION2_CHARS96 -- none for the moment
 578
 579    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
 580         C0 [0x00..0x1F] -- control character plane 0
 581         GL [0x20..0x7F] -- graphic character plane 0
 582         C1 [0x80..0x9F] -- control character plane 1
 583         GR [0xA0..0xFF] -- graphic character plane 1
 584
 585    A control character set is directly designated and invoked to C0 or
 586    C1 by an escape sequence.  The most common case is that:
 587    - ISO646's  control character set is designated/invoked to C0, and
 588    - ISO6429's control character set is designated/invoked to C1,
 589    and usually these designations/invocations are omitted in encoded
 590    text.  In a 7-bit environment, only C0 can be used, and a control
 591    character for C1 is encoded by an appropriate escape sequence to
 592    fit into the environment.  All control characters for C1 are
 593    defined to have corresponding escape sequences.
 594
 595    A graphic character set is at first designated to one of four
 596    graphic registers (G0 through G3), then these graphic registers are
 597    invoked to GL or GR.  These designations and invocations can be
 598    done independently.  The most common case is that G0 is invoked to
 599    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
 600    these invocations and designations are omitted in encoded text.
 601    In a 7-bit environment, only GL can be used.
 602
 603    When a graphic character set of CHARS94 is invoked to GL, codes
 604    0x20 and 0x7F of the GL area work as control characters SPACE and
 605    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
 606    be used.
 607
 608    There are two ways of invocation: locking-shift and single-shift.
 609    With locking-shift, the invocation lasts until the next different
 610    invocation, whereas with single-shift, the invocation affects the
 611    following character only and doesn't affect the locking-shift
 612    state.  Invocations are done by the following control characters or
 613    escape sequences:
 614
 615    ----------------------------------------------------------------------
 616    abbrev  function                  cntrl escape seq   description
 617    ----------------------------------------------------------------------
 618    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
 619    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
 620    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
 621    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
 622    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
 623    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
 624    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
 625    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
 626    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
 627    ----------------------------------------------------------------------
 628    (*) These are not used by any known coding system.
 629
 630    Control characters for these functions are defined by macros
 631    ISO_CODE_XXX in `coding.h'.
 632
 633    Designations are done by the following escape sequences:
 634    ----------------------------------------------------------------------
 635    escape sequence      description
 636    ----------------------------------------------------------------------
 637    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
 638    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
 639    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
 640    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
 641    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
 642    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
 643    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
 644    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
 645    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
 646    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
 647    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
 648    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
 649    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
 650    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
 651    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
 652    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
 653    ----------------------------------------------------------------------
 654
 655    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
 656    of dimension 1, chars 94, and final character <F>, etc...
 657
 658    Note (*): Although these designations are not allowed in ISO2022,
 659    Emacs accepts them on decoding, and produces them on encoding
 660    CHARS96 character sets in a coding system which is characterized as
 661    7-bit environment, non-locking-shift, and non-single-shift.
 662
 663    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
 664    '(' can be omitted.  We refer to this as "short-form" hereafter.
 665
 666    Now you may notice that there are a lot of ways for encoding the
 667    same multilingual text in ISO2022.  Actually, there exist many
 668    coding systems such as Compound Text (used in X11's inter client
 669    communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR
 670    (used in Korean internet), EUC (Extended UNIX Code, used in Asian
 671    localized platforms), and all of these are variants of ISO2022.
 672
 673    In addition to the above, Emacs handles two more kinds of escape
 674    sequences: ISO6429's direction specification and Emacs' private
 675    sequence for specifying character composition.
 676
 677    ISO6429's direction specification takes the following form:
 678         o CSI ']'      -- end of the current direction
 679         o CSI '0' ']'  -- end of the current direction
 680         o CSI '1' ']'  -- start of left-to-right text
 681         o CSI '2' ']'  -- start of right-to-left text
 682    The control character CSI (0x9B: control sequence introducer) is
 683    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
 684
 685    Character composition specification takes the following form:
 686         o ESC '0' -- start character composition
 687         o ESC '1' -- end character composition
 688    Since these are not standard escape sequences of any ISO standard,
 689    the use of them for these meaning is restricted to Emacs only.  */
 690
 691 enum iso_code_class_type iso_code_class[256];
 692
 693 #define CHARSET_OK(idx, charset)                                \
 694   (coding_system_table[idx]                                     \
 695    && (coding_system_table[idx]->safe_charsets[charset]         \
 696        || (CODING_SPEC_ISO_REQUESTED_DESIGNATION                \
 697             (coding_system_table[idx], charset)                 \
 698            != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)))
 699
 700 #define SHIFT_OUT_OK(idx) \
 701   (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
 702
 703 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 704    Check if a text is encoded in ISO2022.  If it is, returns an
 705    integer in which appropriate flag bits any of:
 706         CODING_CATEGORY_MASK_ISO_7
 707         CODING_CATEGORY_MASK_ISO_7_TIGHT
 708         CODING_CATEGORY_MASK_ISO_8_1
 709         CODING_CATEGORY_MASK_ISO_8_2
 710         CODING_CATEGORY_MASK_ISO_7_ELSE
 711         CODING_CATEGORY_MASK_ISO_8_ELSE
 712    are set.  If a code which should never appear in ISO2022 is found,
 713    returns 0.  */
 714
 715 int
 716 detect_coding_iso2022 (src, src_end)
 717      unsigned char *src, *src_end;
 718 {
 719   int mask = CODING_CATEGORY_MASK_ISO;
 720   int mask_found = 0;
 721   int reg[4], shift_out = 0, single_shifting = 0;
 722   int c, c1, i, charset;
 723
 724   reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
 725   while (mask && src < src_end)
 726     {
 727       c = *src++;
 728       switch (c)
 729         {
 730         case ISO_CODE_ESC:
 731           single_shifting = 0;
 732           if (src >= src_end)
 733             break;
 734           c = *src++;
 735           if (c >= '(' && c <= '/')
 736             {
 737               /* Designation sequence for a charset of dimension 1.  */
 738               if (src >= src_end)
 739                 break;
 740               c1 = *src++;
 741               if (c1 < ' ' || c1 >= 0x80
 742                   || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
 743                 /* Invalid designation sequence.  Just ignore.  */
 744                 break;
 745               reg[(c - '(') % 4] = charset;
 746             }
 747           else if (c == '$')
 748             {
 749               /* Designation sequence for a charset of dimension 2.  */
 750               if (src >= src_end)
 751                 break;
 752               c = *src++;
 753               if (c >= '@' && c <= 'B')
 754                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
 755                 reg[0] = charset = iso_charset_table[1][0][c];
 756               else if (c >= '(' && c <= '/')
 757                 {
 758                   if (src >= src_end)
 759                     break;
 760                   c1 = *src++;
 761                   if (c1 < ' ' || c1 >= 0x80
 762                       || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
 763                     /* Invalid designation sequence.  Just ignore.  */
 764                     break;
 765                   reg[(c - '(') % 4] = charset;
 766                 }
 767               else
 768                 /* Invalid designation sequence.  Just ignore.  */
 769                 break;
 770             }
 771           else if (c == 'N' || c == 'O')
 772             {
 773               /* ESC <Fe> for SS2 or SS3.  */
 774               mask &= CODING_CATEGORY_MASK_ISO_7_ELSE;
 775               break;
 776             }
 777           else if (c == '0' || c == '1' || c == '2')
 778             /* ESC <Fp> for start/end composition.  Just ignore.  */
 779             break;
 780           else
 781             /* Invalid escape sequence.  Just ignore.  */
 782             break;
 783
 784           /* We found a valid designation sequence for CHARSET.  */
 785           mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
 786           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset))
 787             mask_found |= CODING_CATEGORY_MASK_ISO_7;
 788           else
 789             mask &= ~CODING_CATEGORY_MASK_ISO_7;
 790           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset))
 791             mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
 792           else
 793             mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
 794           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset))
 795             mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
 796           else
 797             mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
 798           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset))
 799             mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
 800           else
 801             mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
 802           break;
 803
 804         case ISO_CODE_SO:
 805           single_shifting = 0;
 806           if (shift_out == 0
 807               && (reg[1] >= 0
 808                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
 809                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
 810             {
 811               /* Locking shift out.  */
 812               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 813               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 814             }
 815           break;
 816
 817         case ISO_CODE_SI:
 818           single_shifting = 0;
 819           if (shift_out == 1)
 820             {
 821               /* Locking shift in.  */
 822               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 823               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 824             }
 825           break;
 826
 827         case ISO_CODE_CSI:
 828           single_shifting = 0;
 829         case ISO_CODE_SS2:
 830         case ISO_CODE_SS3:
 831           {
 832             int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
 833
 834             if (c != ISO_CODE_CSI)
 835               {
 836                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 837                     & CODING_FLAG_ISO_SINGLE_SHIFT)
 838                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 839                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 840                     & CODING_FLAG_ISO_SINGLE_SHIFT)
 841                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 842                 single_shifting = 1;
 843               }
 844             if (VECTORP (Vlatin_extra_code_table)
 845                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
 846               {
 847                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 848                     & CODING_FLAG_ISO_LATIN_EXTRA)
 849                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 850                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 851                     & CODING_FLAG_ISO_LATIN_EXTRA)
 852                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 853               }
 854             mask &= newmask;
 855             mask_found |= newmask;
 856           }
 857           break;
 858
 859         default:
 860           if (c < 0x80)
 861             {
 862               single_shifting = 0;
 863               break;
 864             }
 865           else if (c < 0xA0)
 866             {
 867               single_shifting = 0;
 868               if (VECTORP (Vlatin_extra_code_table)
 869                   && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
 870                 {
 871                   int newmask = 0;
 872
 873                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 874                       & CODING_FLAG_ISO_LATIN_EXTRA)
 875                     newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 876                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 877                       & CODING_FLAG_ISO_LATIN_EXTRA)
 878                     newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 879                   mask &= newmask;
 880                   mask_found |= newmask;
 881                 }
 882               else
 883                 return 0;
 884             }
 885           else
 886             {
 887               unsigned char *src_begin = src;
 888
 889               mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
 890                         | CODING_CATEGORY_MASK_ISO_7_ELSE);
 891               mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
 892               /* Check the length of succeeding codes of the range
 893                  0xA0..0FF.  If the byte length is odd, we exclude
 894                  CODING_CATEGORY_MASK_ISO_8_2.  We can check this only
 895                  when we are not single shifting.  */
 896               if (!single_shifting)
 897                 {
 898                   while (src < src_end && *src >= 0xA0)
 899                     src++;
 900                   if ((src - src_begin - 1) & 1 && src < src_end)
 901                     mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
 902                   else
 903                     mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
 904                 }
 905             }
 906           break;
 907         }
 908     }
 909
 910   return (mask & mask_found);
 911 }
 912
 913 /* Decode a character of which charset is CHARSET and the 1st position
 914    code is C1.  If dimension of CHARSET is 2, the 2nd position code is
 915    fetched from SRC and set to C2.  If CHARSET is negative, it means
 916    that we are decoding ill formed text, and what we can do is just to
 917    read C1 as is.  */
 918
 919 #define DECODE_ISO_CHARACTER(charset, c1)                               \
 920   do {                                                                  \
 921     int c_alt, charset_alt = (charset);                                 \
 922     if (COMPOSING_HEAD_P (coding->composing))                           \
 923       {                                                                 \
 924         *dst++ = LEADING_CODE_COMPOSITION;                              \
 925         if (COMPOSING_WITH_RULE_P (coding->composing))                  \
 926           /* To tell composition rules are embeded.  */                 \
 927           *dst++ = 0xFF;                                                \
 928         coding->composing += 2;                                         \
 929       }                                                                 \
 930     if (charset_alt >= 0)                                               \
 931       {                                                                 \
 932         if (CHARSET_DIMENSION (charset_alt) == 2)                       \
 933           {                                                             \
 934             ONE_MORE_BYTE (c2);                                         \
 935             if (iso_code_class[(c2) & 0x7F] != ISO_0x20_or_0x7F         \
 936                 && iso_code_class[(c2) & 0x7F] != ISO_graphic_plane_0)  \
 937               {                                                         \
 938                 src--;                                                  \
 939                 charset_alt = CHARSET_ASCII;                            \
 940               }                                                         \
 941           }                                                             \
 942         if (!NILP (translation_table)                                   \
 943             && ((c_alt = translate_char (translation_table,             \
 944                                          -1, charset_alt, c1, c2)) >= 0)) \
 945           SPLIT_CHAR (c_alt, charset_alt, c1, c2);                      \
 946       }                                                                 \
 947     if (charset_alt == CHARSET_ASCII || charset_alt < 0)                \
 948       DECODE_CHARACTER_ASCII (c1);                                      \
 949     else if (CHARSET_DIMENSION (charset_alt) == 1)                      \
 950       DECODE_CHARACTER_DIMENSION1 (charset_alt, c1);                    \
 951     else                                                                \
 952       DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2);                \
 953     if (COMPOSING_WITH_RULE_P (coding->composing))                      \
 954       /* To tell a composition rule follows.  */                        \
 955       coding->composing = COMPOSING_WITH_RULE_RULE;                     \
 956   } while (0)
 957
 958 /* Set designation state into CODING.  */
 959 #define DECODE_DESIGNATION(reg, dimension, chars, final_char)              \
 960   do {                                                                     \
 961     int charset;                                                           \
 962                                                                            \
 963     if (final_char < '0' || final_char >= 128)                             \
 964       goto label_invalid_code;                                             \
 965     charset = ISO_CHARSET_TABLE (make_number (dimension),                  \
 966                                  make_number (chars),                      \
 967                                  make_number (final_char));                \
 968     if (charset >= 0                                                       \
 969         && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
 970             || coding->safe_charsets[charset]))                            \
 971       {                                                                    \
 972         if (coding->spec.iso2022.last_invalid_designation_register == 0    \
 973             && reg == 0                                                    \
 974             && charset == CHARSET_ASCII)                                   \
 975           {                                                                \
 976             /* We should insert this designation sequence as is so         \
 977                that it is surely written back to a file.  */               \
 978             coding->spec.iso2022.last_invalid_designation_register = -1;   \
 979             goto label_invalid_code;                                       \
 980           }                                                                \
 981         coding->spec.iso2022.last_invalid_designation_register = -1;       \
 982         if ((coding->mode & CODING_MODE_DIRECTION)                         \
 983             && CHARSET_REVERSE_CHARSET (charset) >= 0)                     \
 984           charset = CHARSET_REVERSE_CHARSET (charset);                     \
 985         CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;               \
 986       }                                                                    \
 987     else                                                                   \
 988       {                                                                    \
 989         coding->spec.iso2022.last_invalid_designation_register = reg;      \
 990         goto label_invalid_code;                                           \
 991       }                                                                    \
 992   } while (0)
 993
 994 /* Return 0 if there's a valid composing sequence starting at SRC and
 995    ending before SRC_END, else return -1.  */
 996
 997 int
 998 check_composing_code (coding, src, src_end)
 999      struct coding_system *coding;
1000      unsigned char *src, *src_end;
1001 {
1002   int charset, c, c1, dim;
1003
1004   while (src < src_end)
1005     {
1006       c = *src++;
1007       if (c >= 0x20)
1008         continue;
1009       if (c != ISO_CODE_ESC || src >= src_end)
1010         return -1;
1011       c = *src++;
1012       if (c == '1') /* end of compsition */
1013         return 0;
1014       if (src + 2 >= src_end
1015           || !coding->flags & CODING_FLAG_ISO_DESIGNATION)
1016         return -1;
1017
1018       dim = (c == '$');
1019       if (dim == 1)
1020         c = (*src >= '@' && *src <= 'B') ? '(' : *src++;
1021       if (c >= '(' && c <= '/')
1022         {
1023           c1 = *src++;
1024           if ((c1 < ' ' || c1 >= 0x80)
1025               || (charset = iso_charset_table[dim][c >= ','][c1]) < 0
1026               || ! coding->safe_charsets[charset]
1027               || (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
1028                   == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
1029             return -1;
1030         }
1031       else
1032         return -1;
1033     }
1034
1035   /* We have not found the sequence "ESC 1".  */
1036   return -1;
1037 }
1038
1039 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1040
1041 int
1042 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1043      struct coding_system *coding;
1044      unsigned char *source, *destination;
1045      int src_bytes, dst_bytes;
1046 {
1047   unsigned char *src = source;
1048   unsigned char *src_end = source + src_bytes;
1049   unsigned char *dst = destination;
1050   unsigned char *dst_end = destination + dst_bytes;
1051   /* Since the maximum bytes produced by each loop is 7, we subtract 6
1052      from DST_END to assure that overflow checking is necessary only
1053      at the head of loop.  */
1054   unsigned char *adjusted_dst_end = dst_end - 6;
1055   int charset;
1056   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
1057   int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1058   int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1059   Lisp_Object translation_table
1060     = coding->translation_table_for_decode;
1061   int result = CODING_FINISH_NORMAL;
1062
1063   if (!NILP (Venable_character_translation) && NILP (translation_table))
1064     translation_table = Vstandard_translation_table_for_decode;
1065
1066   coding->produced_char = 0;
1067   coding->fake_multibyte = 0;
1068   while (src < src_end && (dst_bytes
1069                            ? (dst < adjusted_dst_end)
1070                            : (dst < src - 6)))
1071     {
1072       /* SRC_BASE remembers the start position in source in each loop.
1073          The loop will be exited when there's not enough source text
1074          to analyze long escape sequence or 2-byte code (within macros
1075          ONE_MORE_BYTE or TWO_MORE_BYTES).  In that case, SRC is reset
1076          to SRC_BASE before exiting.  */
1077       unsigned char *src_base = src;
1078       int c1 = *src++, c2;
1079
1080       switch (iso_code_class [c1])
1081         {
1082         case ISO_0x20_or_0x7F:
1083           if (!coding->composing
1084               && (charset0 < 0 || CHARSET_CHARS (charset0) == 94))
1085             {
1086               /* This is SPACE or DEL.  */
1087               *dst++ = c1;
1088               coding->produced_char++;
1089               break;
1090             }
1091           /* This is a graphic character, we fall down ...  */
1092
1093         case ISO_graphic_plane_0:
1094           if (coding->composing == COMPOSING_WITH_RULE_RULE)
1095             {
1096               /* This is a composition rule.  */
1097               *dst++ = c1 | 0x80;
1098               coding->composing = COMPOSING_WITH_RULE_TAIL;
1099             }
1100           else
1101             DECODE_ISO_CHARACTER (charset0, c1);
1102           break;
1103
1104         case ISO_0xA0_or_0xFF:
1105           if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1106               || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1107             goto label_invalid_code;
1108           /* This is a graphic character, we fall down ... */
1109
1110         case ISO_graphic_plane_1:
1111           if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1112             goto label_invalid_code;
1113           else
1114             DECODE_ISO_CHARACTER (charset1, c1);
1115           break;
1116
1117         case ISO_control_code:
1118           /* All ISO2022 control characters in this class have the
1119              same representation in Emacs internal format.  */
1120           if (c1 == '\n'
1121               && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1122               && (coding->eol_type == CODING_EOL_CR
1123                   || coding->eol_type == CODING_EOL_CRLF))
1124             {
1125               result = CODING_FINISH_INCONSISTENT_EOL;
1126               goto label_end_of_loop_2;
1127             }
1128           *dst++ = c1;
1129           coding->produced_char++;
1130           if (c1 >= 0x80)
1131             coding->fake_multibyte = 1;
1132           break;
1133
1134         case ISO_carriage_return:
1135           if (coding->eol_type == CODING_EOL_CR)
1136             *dst++ = '\n';
1137           else if (coding->eol_type == CODING_EOL_CRLF)
1138             {
1139               ONE_MORE_BYTE (c1);
1140               if (c1 == ISO_CODE_LF)
1141                 *dst++ = '\n';
1142               else
1143                 {
1144                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1145                     {
1146                       result = CODING_FINISH_INCONSISTENT_EOL;
1147                       goto label_end_of_loop_2;
1148                     }
1149                   src--;
1150                   *dst++ = '\r';
1151                 }
1152             }
1153           else
1154             *dst++ = c1;
1155           coding->produced_char++;
1156           break;
1157
1158         case ISO_shift_out:
1159           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1160               || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1161             goto label_invalid_code;
1162           CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1163           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1164           break;
1165
1166         case ISO_shift_in:
1167           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1168             goto label_invalid_code;
1169           CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1170           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1171           break;
1172
1173         case ISO_single_shift_2_7:
1174         case ISO_single_shift_2:
1175           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1176             goto label_invalid_code;
1177           /* SS2 is handled as an escape sequence of ESC 'N' */
1178           c1 = 'N';
1179           goto label_escape_sequence;
1180
1181         case ISO_single_shift_3:
1182           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1183             goto label_invalid_code;
1184           /* SS2 is handled as an escape sequence of ESC 'O' */
1185           c1 = 'O';
1186           goto label_escape_sequence;
1187
1188         case ISO_control_sequence_introducer:
1189           /* CSI is handled as an escape sequence of ESC '[' ...  */
1190           c1 = '[';
1191           goto label_escape_sequence;
1192
1193         case ISO_escape:
1194           ONE_MORE_BYTE (c1);
1195         label_escape_sequence:
1196           /* Escape sequences handled by Emacs are invocation,
1197              designation, direction specification, and character
1198              composition specification.  */
1199           switch (c1)
1200             {
1201             case '&':           /* revision of following character set */
1202               ONE_MORE_BYTE (c1);
1203               if (!(c1 >= '@' && c1 <= '~'))
1204                 goto label_invalid_code;
1205               ONE_MORE_BYTE (c1);
1206               if (c1 != ISO_CODE_ESC)
1207                 goto label_invalid_code;
1208               ONE_MORE_BYTE (c1);
1209               goto label_escape_sequence;
1210
1211             case '$':           /* designation of 2-byte character set */
1212               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1213                 goto label_invalid_code;
1214               ONE_MORE_BYTE (c1);
1215               if (c1 >= '@' && c1 <= 'B')
1216                 {       /* designation of JISX0208.1978, GB2312.1980,
1217                            or JISX0208.1980 */
1218                   DECODE_DESIGNATION (0, 2, 94, c1);
1219                 }
1220               else if (c1 >= 0x28 && c1 <= 0x2B)
1221                 {       /* designation of DIMENSION2_CHARS94 character set */
1222                   ONE_MORE_BYTE (c2);
1223                   DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
1224                 }
1225               else if (c1 >= 0x2C && c1 <= 0x2F)
1226                 {       /* designation of DIMENSION2_CHARS96 character set */
1227                   ONE_MORE_BYTE (c2);
1228                   DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
1229                 }
1230               else
1231                 goto label_invalid_code;
1232               break;
1233
1234             case 'n':           /* invocation of locking-shift-2 */
1235               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1236                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1237                 goto label_invalid_code;
1238               CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
1239               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1240               break;
1241
1242             case 'o':           /* invocation of locking-shift-3 */
1243               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1244                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1245                 goto label_invalid_code;
1246               CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
1247               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1248               break;
1249
1250             case 'N':           /* invocation of single-shift-2 */
1251               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1252                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1253                 goto label_invalid_code;
1254               ONE_MORE_BYTE (c1);
1255               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
1256               DECODE_ISO_CHARACTER (charset, c1);
1257               break;
1258
1259             case 'O':           /* invocation of single-shift-3 */
1260               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1261                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1262                 goto label_invalid_code;
1263               ONE_MORE_BYTE (c1);
1264               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
1265               DECODE_ISO_CHARACTER (charset, c1);
1266               break;
1267
1268             case '0': case '2': /* start composing */
1269               /* Before processing composing, we must be sure that all
1270                  characters being composed are supported by CODING.
1271                  If not, we must give up composing.  */
1272               if (check_composing_code (coding, src, src_end) == 0)
1273                 {
1274                   /* We are looking at a valid composition sequence.  */
1275                   coding->composing = (c1 == '0'
1276                                        ? COMPOSING_NO_RULE_HEAD
1277                                        : COMPOSING_WITH_RULE_HEAD);
1278                   coding->composed_chars = 0;
1279                 }
1280               else
1281                 {
1282                   *dst++ = ISO_CODE_ESC;
1283                   *dst++ = c1;
1284                   coding->produced_char += 2;
1285                 }
1286               break;
1287
1288             case '1':           /* end composing */
1289               if (!coding->composing)
1290                 {
1291                   *dst++ = ISO_CODE_ESC;
1292                   *dst++ = c1;
1293                   coding->produced_char += 2;
1294                   break;
1295                 }
1296
1297               if (coding->composed_chars > 0)
1298                 {
1299                   if (coding->composed_chars == 1)
1300                     {
1301                       unsigned char *this_char_start = dst;
1302                       int this_bytes;
1303
1304                       /* Only one character is in the composing
1305                          sequence.  Make it a normal character.  */
1306                       while (*--this_char_start != LEADING_CODE_COMPOSITION);
1307                       dst = (this_char_start
1308                              + (coding->composing == COMPOSING_NO_RULE_TAIL
1309                                 ? 1 : 2));
1310                       *dst -= 0x20;
1311                       if (*dst == 0x80)
1312                         *++dst &= 0x7F;
1313                       this_bytes = BYTES_BY_CHAR_HEAD (*dst);
1314                       while (this_bytes--) *this_char_start++ = *dst++;
1315                       dst = this_char_start;
1316                     }
1317                   coding->produced_char++;
1318                 }
1319               coding->composing = COMPOSING_NO;
1320               break;
1321
1322             case '[':           /* specification of direction */
1323               if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
1324                 goto label_invalid_code;
1325               /* For the moment, nested direction is not supported.
1326                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
1327                  left-to-right, and nozero means right-to-left.  */
1328               ONE_MORE_BYTE (c1);
1329               switch (c1)
1330                 {
1331                 case ']':       /* end of the current direction */
1332                   coding->mode &= ~CODING_MODE_DIRECTION;
1333
1334                 case '0':       /* end of the current direction */
1335                 case '1':       /* start of left-to-right direction */
1336                   ONE_MORE_BYTE (c1);
1337                   if (c1 == ']')
1338                     coding->mode &= ~CODING_MODE_DIRECTION;
1339                   else
1340                     goto label_invalid_code;
1341                   break;
1342
1343                 case '2':       /* start of right-to-left direction */
1344                   ONE_MORE_BYTE (c1);
1345                   if (c1 == ']')
1346                     coding->mode |= CODING_MODE_DIRECTION;
1347                   else
1348                     goto label_invalid_code;
1349                   break;
1350
1351                 default:
1352                   goto label_invalid_code;
1353                 }
1354               break;
1355
1356             default:
1357               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1358                 goto label_invalid_code;
1359               if (c1 >= 0x28 && c1 <= 0x2B)
1360                 {       /* designation of DIMENSION1_CHARS94 character set */
1361                   ONE_MORE_BYTE (c2);
1362                   DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
1363                 }
1364               else if (c1 >= 0x2C && c1 <= 0x2F)
1365                 {       /* designation of DIMENSION1_CHARS96 character set */
1366                   ONE_MORE_BYTE (c2);
1367                   DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
1368                 }
1369               else
1370                 {
1371                   goto label_invalid_code;
1372                 }
1373             }
1374           /* We must update these variables now.  */
1375           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1376           charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1377           break;
1378
1379         label_invalid_code:
1380           while (src_base < src)
1381             *dst++ = *src_base++;
1382           coding->fake_multibyte = 1;
1383         }
1384       continue;
1385
1386     label_end_of_loop:
1387       result = CODING_FINISH_INSUFFICIENT_SRC;
1388     label_end_of_loop_2:
1389       src = src_base;
1390       break;
1391     }
1392
1393   if (src < src_end)
1394     {
1395       if (result == CODING_FINISH_NORMAL)
1396         result = CODING_FINISH_INSUFFICIENT_DST;
1397       else if (result != CODING_FINISH_INCONSISTENT_EOL
1398                && coding->mode & CODING_MODE_LAST_BLOCK)
1399         {
1400           /* This is the last block of the text to be decoded.  We had
1401              better just flush out all remaining codes in the text
1402              although they are not valid characters.  */
1403           src_bytes = src_end - src;
1404           if (dst_bytes && (dst_end - dst < src_bytes))
1405             src_bytes = dst_end - dst;
1406           bcopy (src, dst, src_bytes);
1407           dst += src_bytes;
1408           src += src_bytes;
1409           coding->fake_multibyte = 1;
1410         }
1411     }
1412
1413   coding->consumed = coding->consumed_char = src - source;
1414   coding->produced = dst - destination;
1415   return result;
1416 }
1417
1418 /* ISO2022 encoding stuff.  */
1419
1420 /*
1421    It is not enough to say just "ISO2022" on encoding, we have to
1422    specify more details.  In Emacs, each coding system of ISO2022
1423    variant has the following specifications:
1424         1. Initial designation to G0 thru G3.
1425         2. Allows short-form designation?
1426         3. ASCII should be designated to G0 before control characters?
1427         4. ASCII should be designated to G0 at end of line?
1428         5. 7-bit environment or 8-bit environment?
1429         6. Use locking-shift?
1430         7. Use Single-shift?
1431    And the following two are only for Japanese:
1432         8. Use ASCII in place of JIS0201-1976-Roman?
1433         9. Use JISX0208-1983 in place of JISX0208-1978?
1434    These specifications are encoded in `coding->flags' as flag bits
1435    defined by macros CODING_FLAG_ISO_XXX.  See `coding.h' for more
1436    details.
1437 */
1438
1439 /* Produce codes (escape sequence) for designating CHARSET to graphic
1440    register REG.  If <final-char> of CHARSET is '@', 'A', or 'B' and
1441    the coding system CODING allows, produce designation sequence of
1442    short-form.  */
1443
1444 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
1445   do {                                                                  \
1446     unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset);        \
1447     char *intermediate_char_94 = "()*+";                                \
1448     char *intermediate_char_96 = ",-./";                                \
1449     int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset);    \
1450     if (revision < 255)                                                 \
1451       {                                                                 \
1452         *dst++ = ISO_CODE_ESC;                                          \
1453         *dst++ = '&';                                                   \
1454         *dst++ = '@' + revision;                                        \
1455       }                                                                 \
1456     *dst++ = ISO_CODE_ESC;                                              \
1457     if (CHARSET_DIMENSION (charset) == 1)                               \
1458       {                                                                 \
1459         if (CHARSET_CHARS (charset) == 94)                              \
1460           *dst++ = (unsigned char) (intermediate_char_94[reg]);         \
1461         else                                                            \
1462           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1463       }                                                                 \
1464     else                                                                \
1465       {                                                                 \
1466         *dst++ = '$';                                                   \
1467         if (CHARSET_CHARS (charset) == 94)                              \
1468           {                                                             \
1469             if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM)          \
1470                 || reg != 0                                             \
1471                 || final_char < '@' || final_char > 'B')                \
1472               *dst++ = (unsigned char) (intermediate_char_94[reg]);     \
1473           }                                                             \
1474         else                                                            \
1475           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1476       }                                                                 \
1477     *dst++ = final_char;                                                \
1478     CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;                \
1479   } while (0)
1480
1481 /* The following two macros produce codes (control character or escape
1482    sequence) for ISO2022 single-shift functions (single-shift-2 and
1483    single-shift-3).  */
1484
1485 #define ENCODE_SINGLE_SHIFT_2                           \
1486   do {                                                  \
1487     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1488       *dst++ = ISO_CODE_ESC, *dst++ = 'N';              \
1489     else                                                \
1490       {                                                 \
1491         *dst++ = ISO_CODE_SS2;                          \
1492         coding->fake_multibyte = 1;                     \
1493       }                                                 \
1494     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1495   } while (0)
1496
1497 #define ENCODE_SINGLE_SHIFT_3                           \
1498   do {                                                  \
1499     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1500       *dst++ = ISO_CODE_ESC, *dst++ = 'O';              \
1501     else                                                \
1502       {                                                 \
1503         *dst++ = ISO_CODE_SS3;                          \
1504         coding->fake_multibyte = 1;                     \
1505       }                                                 \
1506     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1507   } while (0)
1508
1509 /* The following four macros produce codes (control character or
1510    escape sequence) for ISO2022 locking-shift functions (shift-in,
1511    shift-out, locking-shift-2, and locking-shift-3).  */
1512
1513 #define ENCODE_SHIFT_IN                         \
1514   do {                                          \
1515     *dst++ = ISO_CODE_SI;                       \
1516     CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
1517   } while (0)
1518
1519 #define ENCODE_SHIFT_OUT                        \
1520   do {                                          \
1521     *dst++ = ISO_CODE_SO;                       \
1522     CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
1523   } while (0)
1524
1525 #define ENCODE_LOCKING_SHIFT_2                  \
1526   do {                                          \
1527     *dst++ = ISO_CODE_ESC, *dst++ = 'n';        \
1528     CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
1529   } while (0)
1530
1531 #define ENCODE_LOCKING_SHIFT_3                  \
1532   do {                                          \
1533     *dst++ = ISO_CODE_ESC, *dst++ = 'o';        \
1534     CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
1535   } while (0)
1536
1537 /* Produce codes for a DIMENSION1 character whose character set is
1538    CHARSET and whose position-code is C1.  Designation and invocation
1539    sequences are also produced in advance if necessary.  */
1540
1541
1542 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
1543   do {                                                                  \
1544     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1545       {                                                                 \
1546         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1547           *dst++ = c1 & 0x7F;                                           \
1548         else                                                            \
1549           *dst++ = c1 | 0x80;                                           \
1550         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1551         break;                                                          \
1552       }                                                                 \
1553     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1554       {                                                                 \
1555         *dst++ = c1 & 0x7F;                                             \
1556         break;                                                          \
1557       }                                                                 \
1558     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1559       {                                                                 \
1560         *dst++ = c1 | 0x80;                                             \
1561         break;                                                          \
1562       }                                                                 \
1563     else if (coding->flags & CODING_FLAG_ISO_SAFE                       \
1564              && !coding->safe_charsets[charset])                        \
1565       {                                                                 \
1566         /* We should not encode this character, instead produce one or  \
1567            two `?'s.  */                                                \
1568         *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;                 \
1569         if (CHARSET_WIDTH (charset) == 2)                               \
1570           *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;               \
1571         break;                                                          \
1572       }                                                                 \
1573     else                                                                \
1574       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1575          must invoke it, or, at first, designate it to some graphic     \
1576          register.  Then repeat the loop to actually produce the        \
1577          character.  */                                                 \
1578       dst = encode_invocation_designation (charset, coding, dst);       \
1579   } while (1)
1580
1581 /* Produce codes for a DIMENSION2 character whose character set is
1582    CHARSET and whose position-codes are C1 and C2.  Designation and
1583    invocation codes are also produced in advance if necessary.  */
1584
1585 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
1586   do {                                                                  \
1587     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1588       {                                                                 \
1589         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1590           *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F;                       \
1591         else                                                            \
1592           *dst++ = c1 | 0x80, *dst++ = c2 | 0x80;                       \
1593         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1594         break;                                                          \
1595       }                                                                 \
1596     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1597       {                                                                 \
1598         *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F;                          \
1599         break;                                                          \
1600       }                                                                 \
1601     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1602       {                                                                 \
1603         *dst++ = c1 | 0x80, *dst++= c2 | 0x80;                          \
1604         break;                                                          \
1605       }                                                                 \
1606     else if (coding->flags & CODING_FLAG_ISO_SAFE                       \
1607              && !coding->safe_charsets[charset])                        \
1608       {                                                                 \
1609         /* We should not encode this character, instead produce one or  \
1610            two `?'s.  */                                                \
1611         *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;                 \
1612         if (CHARSET_WIDTH (charset) == 2)                               \
1613           *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;               \
1614         break;                                                          \
1615       }                                                                 \
1616     else                                                                \
1617       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1618          must invoke it, or, at first, designate it to some graphic     \
1619          register.  Then repeat the loop to actually produce the        \
1620          character.  */                                                 \
1621       dst = encode_invocation_designation (charset, coding, dst);       \
1622   } while (1)
1623
1624 #define ENCODE_ISO_CHARACTER(charset, c1, c2)                           \
1625   do {                                                                  \
1626     int c_alt, charset_alt;                                             \
1627     if (!NILP (translation_table)                                       \
1628         && ((c_alt = translate_char (translation_table, -1,             \
1629                                      charset, c1, c2))                  \
1630             >= 0))                                                      \
1631       SPLIT_CHAR (c_alt, charset_alt, c1, c2);                          \
1632     else                                                                \
1633       charset_alt = charset;                                            \
1634     if (CHARSET_DEFINED_P (charset_alt))                                \
1635       {                                                                 \
1636         if (CHARSET_DIMENSION (charset_alt) == 1)                       \
1637           {                                                             \
1638             if (charset == CHARSET_ASCII                                \
1639                 && coding->flags & CODING_FLAG_ISO_USE_ROMAN)           \
1640               charset_alt = charset_latin_jisx0201;                     \
1641             ENCODE_ISO_CHARACTER_DIMENSION1 (charset_alt, c1);          \
1642           }                                                             \
1643         else                                                            \
1644           {                                                             \
1645             if (charset == charset_jisx0208                             \
1646                 && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)          \
1647               charset_alt = charset_jisx0208_1978;                      \
1648             ENCODE_ISO_CHARACTER_DIMENSION2 (charset_alt, c1, c2);      \
1649           }                                                             \
1650       }                                                                 \
1651     else                                                                \
1652       {                                                                 \
1653         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1654           {                                                             \
1655             *dst++ = charset & 0x7f;                                    \
1656             *dst++ = c1 & 0x7f;                                         \
1657             if (c2)                                                     \
1658               *dst++ = c2 & 0x7f;                                       \
1659           }                                                             \
1660         else                                                            \
1661           {                                                             \
1662             *dst++ = charset;                                           \
1663             *dst++ = c1;                                                \
1664             if (c2)                                                     \
1665               *dst++ = c2;                                              \
1666           }                                                             \
1667       }                                                                 \
1668     if (! COMPOSING_P (coding->composing))                              \
1669       coding->consumed_char++;                                          \
1670   } while (0)
1671
1672 /* Produce designation and invocation codes at a place pointed by DST
1673    to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
1674    Return new DST.  */
1675
1676 unsigned char *
1677 encode_invocation_designation (charset, coding, dst)
1678      int charset;
1679      struct coding_system *coding;
1680      unsigned char *dst;
1681 {
1682   int reg;                      /* graphic register number */
1683
1684   /* At first, check designations.  */
1685   for (reg = 0; reg < 4; reg++)
1686     if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
1687       break;
1688
1689   if (reg >= 4)
1690     {
1691       /* CHARSET is not yet designated to any graphic registers.  */
1692       /* At first check the requested designation.  */
1693       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1694       if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
1695         /* Since CHARSET requests no special designation, designate it
1696            to graphic register 0.  */
1697         reg = 0;
1698
1699       ENCODE_DESIGNATION (charset, reg, coding);
1700     }
1701
1702   if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
1703       && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
1704     {
1705       /* Since the graphic register REG is not invoked to any graphic
1706          planes, invoke it to graphic plane 0.  */
1707       switch (reg)
1708         {
1709         case 0:                 /* graphic register 0 */
1710           ENCODE_SHIFT_IN;
1711           break;
1712
1713         case 1:                 /* graphic register 1 */
1714           ENCODE_SHIFT_OUT;
1715           break;
1716
1717         case 2:                 /* graphic register 2 */
1718           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1719             ENCODE_SINGLE_SHIFT_2;
1720           else
1721             ENCODE_LOCKING_SHIFT_2;
1722           break;
1723
1724         case 3:                 /* graphic register 3 */
1725           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1726             ENCODE_SINGLE_SHIFT_3;
1727           else
1728             ENCODE_LOCKING_SHIFT_3;
1729           break;
1730         }
1731     }
1732   return dst;
1733 }
1734
1735 /* The following two macros produce codes for indicating composition.  */
1736 #define ENCODE_COMPOSITION_NO_RULE_START  *dst++ = ISO_CODE_ESC, *dst++ = '0'
1737 #define ENCODE_COMPOSITION_WITH_RULE_START  *dst++ = ISO_CODE_ESC, *dst++ = '2'
1738 #define ENCODE_COMPOSITION_END    *dst++ = ISO_CODE_ESC, *dst++ = '1'
1739
1740 /* The following three macros produce codes for indicating direction
1741    of text.  */
1742 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER              \
1743   do {                                                  \
1744     if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS)    \
1745       *dst++ = ISO_CODE_ESC, *dst++ = '[';              \
1746     else                                                \
1747       *dst++ = ISO_CODE_CSI;                            \
1748   } while (0)
1749
1750 #define ENCODE_DIRECTION_R2L    \
1751   ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '2', *dst++ = ']'
1752
1753 #define ENCODE_DIRECTION_L2R    \
1754   ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '0', *dst++ = ']'
1755
1756 /* Produce codes for designation and invocation to reset the graphic
1757    planes and registers to initial state.  */
1758 #define ENCODE_RESET_PLANE_AND_REGISTER                                     \
1759   do {                                                                      \
1760     int reg;                                                                \
1761     if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                        \
1762       ENCODE_SHIFT_IN;                                                      \
1763     for (reg = 0; reg < 4; reg++)                                           \
1764       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0            \
1765           && (CODING_SPEC_ISO_DESIGNATION (coding, reg)                     \
1766               != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)))        \
1767         ENCODE_DESIGNATION                                                  \
1768           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
1769   } while (0)
1770
1771 /* Produce designation sequences of charsets in the line started from
1772    SRC to a place pointed by *DSTP, and update DSTP.
1773
1774    If the current block ends before any end-of-line, we may fail to
1775    find all the necessary designations.  */
1776
1777 void
1778 encode_designation_at_bol (coding, table, src, src_end, dstp)
1779      struct coding_system *coding;
1780      Lisp_Object table;
1781      unsigned char *src, *src_end, **dstp;
1782 {
1783   int charset, c, found = 0, reg;
1784   /* Table of charsets to be designated to each graphic register.  */
1785   int r[4];
1786   unsigned char *dst = *dstp;
1787
1788   for (reg = 0; reg < 4; reg++)
1789     r[reg] = -1;
1790
1791   while (src < src_end && *src != '\n' && found < 4)
1792     {
1793       int bytes = BYTES_BY_CHAR_HEAD (*src);
1794
1795       if (NILP (table))
1796         charset = CHARSET_AT (src);
1797       else
1798         {
1799           int c_alt;
1800           unsigned char c1, c2;
1801
1802           SPLIT_STRING(src, bytes, charset, c1, c2);
1803           if ((c_alt = translate_char (table, -1, charset, c1, c2)) >= 0)
1804             charset = CHAR_CHARSET (c_alt);
1805         }
1806
1807       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1808       if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
1809         {
1810           found++;
1811           r[reg] = charset;
1812         }
1813
1814       src += bytes;
1815     }
1816
1817   if (found)
1818     {
1819       for (reg = 0; reg < 4; reg++)
1820         if (r[reg] >= 0
1821             && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
1822           ENCODE_DESIGNATION (r[reg], reg, coding);
1823       *dstp = dst;
1824     }
1825 }
1826
1827 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
1828
1829 int
1830 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1831      struct coding_system *coding;
1832      unsigned char *source, *destination;
1833      int src_bytes, dst_bytes;
1834 {
1835   unsigned char *src = source;
1836   unsigned char *src_end = source + src_bytes;
1837   unsigned char *dst = destination;
1838   unsigned char *dst_end = destination + dst_bytes;
1839   /* Since the maximum bytes produced by each loop is 20, we subtract 19
1840      from DST_END to assure overflow checking is necessary only at the
1841      head of loop.  */
1842   unsigned char *adjusted_dst_end = dst_end - 19;
1843   Lisp_Object translation_table
1844       = coding->translation_table_for_encode;
1845   int result = CODING_FINISH_NORMAL;
1846
1847   if (!NILP (Venable_character_translation) && NILP (translation_table))
1848     translation_table = Vstandard_translation_table_for_encode;
1849
1850   coding->consumed_char = 0;
1851   coding->fake_multibyte = 0;
1852   while (src < src_end && (dst_bytes
1853                            ? (dst < adjusted_dst_end)
1854                            : (dst < src - 19)))
1855     {
1856       /* SRC_BASE remembers the start position in source in each loop.
1857          The loop will be exited when there's not enough source text
1858          to analyze multi-byte codes (within macros ONE_MORE_BYTE,
1859          TWO_MORE_BYTES, and THREE_MORE_BYTES).  In that case, SRC is
1860          reset to SRC_BASE before exiting.  */
1861       unsigned char *src_base = src;
1862       int charset, c1, c2, c3, c4;
1863
1864       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
1865           && CODING_SPEC_ISO_BOL (coding))
1866         {
1867           /* We have to produce designation sequences if any now.  */
1868           encode_designation_at_bol (coding, translation_table,
1869                                      src, src_end, &dst);
1870           CODING_SPEC_ISO_BOL (coding) = 0;
1871         }
1872
1873       c1 = *src++;
1874       /* If we are seeing a component of a composite character, we are
1875          seeing a leading-code encoded irregularly for composition, or
1876          a composition rule if composing with rule.  We must set C1 to
1877          a normal leading-code or an ASCII code.  If we are not seeing
1878          a composite character, we must reset composition,
1879          designation, and invocation states.  */
1880       if (COMPOSING_P (coding->composing))
1881         {
1882           if (c1 < 0xA0)
1883             {
1884               /* We are not in a composite character any longer.  */
1885               coding->composing = COMPOSING_NO;
1886               ENCODE_RESET_PLANE_AND_REGISTER;
1887               ENCODE_COMPOSITION_END;
1888             }
1889           else
1890             {
1891               if (coding->composing == COMPOSING_WITH_RULE_RULE)
1892                 {
1893                   *dst++ = c1 & 0x7F;
1894                   coding->composing = COMPOSING_WITH_RULE_HEAD;
1895                   continue;
1896                 }
1897               else if (coding->composing == COMPOSING_WITH_RULE_HEAD)
1898                 coding->composing = COMPOSING_WITH_RULE_RULE;
1899               if (c1 == 0xA0)
1900                 {
1901                   /* This is an ASCII component.  */
1902                   ONE_MORE_BYTE (c1);
1903                   c1 &= 0x7F;
1904                 }
1905               else
1906                 /* This is a leading-code of non ASCII component.  */
1907                 c1 -= 0x20;
1908             }
1909         }
1910
1911       /* Now encode one character.  C1 is a control character, an
1912          ASCII character, or a leading-code of multi-byte character.  */
1913       switch (emacs_code_class[c1])
1914         {
1915         case EMACS_ascii_code:
1916           c2 = 0;
1917           ENCODE_ISO_CHARACTER (CHARSET_ASCII, c1, /* dummy */ c2);
1918           break;
1919
1920         case EMACS_control_code:
1921           if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
1922             ENCODE_RESET_PLANE_AND_REGISTER;
1923           *dst++ = c1;
1924           coding->consumed_char++;
1925           break;
1926
1927         case EMACS_carriage_return_code:
1928           if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
1929             {
1930               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
1931                 ENCODE_RESET_PLANE_AND_REGISTER;
1932               *dst++ = c1;
1933               coding->consumed_char++;
1934               break;
1935             }
1936           /* fall down to treat '\r' as '\n' ...  */
1937
1938         case EMACS_linefeed_code:
1939           if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
1940             ENCODE_RESET_PLANE_AND_REGISTER;
1941           if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
1942             bcopy (coding->spec.iso2022.initial_designation,
1943                    coding->spec.iso2022.current_designation,
1944                    sizeof coding->spec.iso2022.initial_designation);
1945           if (coding->eol_type == CODING_EOL_LF
1946               || coding->eol_type == CODING_EOL_UNDECIDED)
1947             *dst++ = ISO_CODE_LF;
1948           else if (coding->eol_type == CODING_EOL_CRLF)
1949             *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
1950           else
1951             *dst++ = ISO_CODE_CR;
1952           CODING_SPEC_ISO_BOL (coding) = 1;
1953           coding->consumed_char++;
1954           break;
1955
1956         case EMACS_leading_code_2:
1957           ONE_MORE_BYTE (c2);
1958           c3 = 0;
1959           if (c2 < 0xA0)
1960             {
1961               /* invalid sequence */
1962               *dst++ = c1;
1963               src--;
1964               coding->consumed_char++;
1965             }
1966           else
1967             ENCODE_ISO_CHARACTER (c1, c2, /* dummy */ c3);
1968           break;
1969
1970         case EMACS_leading_code_3:
1971           TWO_MORE_BYTES (c2, c3);
1972           c4 = 0;
1973           if (c2 < 0xA0 || c3 < 0xA0)
1974             {
1975               /* invalid sequence */
1976               *dst++ = c1;
1977               src -= 2;
1978               coding->consumed_char++;
1979             }
1980           else if (c1 < LEADING_CODE_PRIVATE_11)
1981             ENCODE_ISO_CHARACTER (c1, c2, c3);
1982           else
1983             ENCODE_ISO_CHARACTER (c2, c3, /* dummy */ c4);
1984           break;
1985
1986         case EMACS_leading_code_4:
1987           THREE_MORE_BYTES (c2, c3, c4);
1988           if (c2 < 0xA0 || c3 < 0xA0 || c4 < 0xA0)
1989             {
1990               /* invalid sequence */
1991               *dst++ = c1;
1992               src -= 3;
1993               coding->consumed_char++;
1994             }
1995           else
1996             ENCODE_ISO_CHARACTER (c2, c3, c4);
1997           break;
1998
1999         case EMACS_leading_code_composition:
2000           ONE_MORE_BYTE (c2);
2001           if (c2 < 0xA0)
2002             {
2003               /* invalid sequence */
2004               *dst++ = c1;
2005               src--;
2006               coding->consumed_char++;
2007             }
2008           else if (c2 == 0xFF)
2009             {
2010               ENCODE_RESET_PLANE_AND_REGISTER;
2011               coding->composing = COMPOSING_WITH_RULE_HEAD;
2012               ENCODE_COMPOSITION_WITH_RULE_START;
2013               coding->consumed_char++;
2014             }
2015           else
2016             {
2017               ENCODE_RESET_PLANE_AND_REGISTER;
2018               /* Rewind one byte because it is a character code of
2019                  composition elements.  */
2020               src--;
2021               coding->composing = COMPOSING_NO_RULE_HEAD;
2022               ENCODE_COMPOSITION_NO_RULE_START;
2023               coding->consumed_char++;
2024             }
2025           break;
2026
2027         case EMACS_invalid_code:
2028           if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2029             ENCODE_RESET_PLANE_AND_REGISTER;
2030           *dst++ = c1;
2031           coding->consumed_char++;
2032           break;
2033         }
2034       continue;
2035     label_end_of_loop:
2036       result = CODING_FINISH_INSUFFICIENT_SRC;
2037       src = src_base;
2038       break;
2039     }
2040
2041   if (src < src_end && result == CODING_FINISH_NORMAL)
2042     result = CODING_FINISH_INSUFFICIENT_DST;
2043
2044   /* If this is the last block of the text to be encoded, we must
2045      reset graphic planes and registers to the initial state, and
2046      flush out the carryover if any.  */
2047   if (coding->mode & CODING_MODE_LAST_BLOCK)
2048     {
2049       ENCODE_RESET_PLANE_AND_REGISTER;
2050       if (COMPOSING_P (coding->composing))
2051         ENCODE_COMPOSITION_END;
2052       if (result == CODING_FINISH_INSUFFICIENT_SRC)
2053         {
2054           while (src < src_end && dst < dst_end)
2055             *dst++ = *src++;
2056         }
2057     }
2058   coding->consumed = src - source;
2059   coding->produced = coding->produced_char = dst - destination;
2060   return result;
2061 }
2062
2063 \f
2064 /*** 4. SJIS and BIG5 handlers ***/
2065
2066 /* Although SJIS and BIG5 are not ISO's coding system, they are used
2067    quite widely.  So, for the moment, Emacs supports them in the bare
2068    C code.  But, in the future, they may be supported only by CCL.  */
2069
2070 /* SJIS is a coding system encoding three character sets: ASCII, right
2071    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
2072    as is.  A character of charset katakana-jisx0201 is encoded by
2073    "position-code + 0x80".  A character of charset japanese-jisx0208
2074    is encoded in 2-byte but two position-codes are divided and shifted
2075    so that it fit in the range below.
2076
2077    --- CODE RANGE of SJIS ---
2078    (character set)      (range)
2079    ASCII                0x00 .. 0x7F
2080    KATAKANA-JISX0201    0xA0 .. 0xDF
2081    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
2082             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
2083    -------------------------------
2084
2085 */
2086
2087 /* BIG5 is a coding system encoding two character sets: ASCII and
2088    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
2089    character set and is encoded in two-byte.
2090
2091    --- CODE RANGE of BIG5 ---
2092    (character set)      (range)
2093    ASCII                0x00 .. 0x7F
2094    Big5 (1st byte)      0xA1 .. 0xFE
2095         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
2096    --------------------------
2097
2098    Since the number of characters in Big5 is larger than maximum
2099    characters in Emacs' charset (96x96), it can't be handled as one
2100    charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
2101    and `charset-big5-2'.  Both are DIMENSION2 and CHARS94.  The former
2102    contains frequently used characters and the latter contains less
2103    frequently used characters.  */
2104
2105 /* Macros to decode or encode a character of Big5 in BIG5.  B1 and B2
2106    are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2107    C1 and C2 are the 1st and 2nd position-codes of of Emacs' internal
2108    format.  CHARSET is `charset_big5_1' or `charset_big5_2'.  */
2109
2110 /* Number of Big5 characters which have the same code in 1st byte.  */
2111 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2112
2113 #define DECODE_BIG5(b1, b2, charset, c1, c2)                            \
2114   do {                                                                  \
2115     unsigned int temp                                                   \
2116       = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62);   \
2117     if (b1 < 0xC9)                                                      \
2118       charset = charset_big5_1;                                         \
2119     else                                                                \
2120       {                                                                 \
2121         charset = charset_big5_2;                                       \
2122         temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW;                          \
2123       }                                                                 \
2124     c1 = temp / (0xFF - 0xA1) + 0x21;                                   \
2125     c2 = temp % (0xFF - 0xA1) + 0x21;                                   \
2126   } while (0)
2127
2128 #define ENCODE_BIG5(charset, c1, c2, b1, b2)                            \
2129   do {                                                                  \
2130     unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21);      \
2131     if (charset == charset_big5_2)                                      \
2132       temp += BIG5_SAME_ROW * (0xC9 - 0xA1);                            \
2133     b1 = temp / BIG5_SAME_ROW + 0xA1;                                   \
2134     b2 = temp % BIG5_SAME_ROW;                                          \
2135     b2 += b2 < 0x3F ? 0x40 : 0x62;                                      \
2136   } while (0)
2137
2138 #define DECODE_SJIS_BIG5_CHARACTER(charset, c1, c2)                     \
2139   do {                                                                  \
2140     int c_alt, charset_alt = (charset);                                 \
2141     if (!NILP (translation_table)                                       \
2142         && ((c_alt = translate_char (translation_table,                 \
2143                                      -1, (charset), c1, c2)) >= 0))     \
2144       SPLIT_CHAR (c_alt, charset_alt, c1, c2);                          \
2145     if (charset_alt == CHARSET_ASCII || charset_alt < 0)                \
2146       DECODE_CHARACTER_ASCII (c1);                                      \
2147     else if (CHARSET_DIMENSION (charset_alt) == 1)                      \
2148       DECODE_CHARACTER_DIMENSION1 (charset_alt, c1);                    \
2149     else                                                                \
2150       DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2);                \
2151   } while (0)
2152
2153 #define ENCODE_SJIS_BIG5_CHARACTER(charset, c1, c2)             \
2154   do {                                                          \
2155     int c_alt, charset_alt;                                     \
2156     if (!NILP (translation_table)                               \
2157         && ((c_alt = translate_char (translation_table, -1,     \
2158                                      charset, c1, c2))          \
2159             >= 0))                                              \
2160       SPLIT_CHAR (c_alt, charset_alt, c1, c2);                  \
2161     else                                                        \
2162       charset_alt = charset;                                    \
2163     if (charset_alt == charset_ascii)                           \
2164       *dst++ = c1;                                              \
2165     else if (CHARSET_DIMENSION (charset_alt) == 1)              \
2166       {                                                         \
2167         if (sjis_p && charset_alt == charset_katakana_jisx0201) \
2168           *dst++ = c1;                                          \
2169         else if (sjis_p && charset_alt == charset_latin_jisx0201) \
2170           *dst++ = c1 & 0x7F;                                   \
2171         else                                                    \
2172           {                                                     \
2173             *dst++ = charset_alt, *dst++ = c1;                  \
2174             coding->fake_multibyte = 1;                         \
2175           }                                                     \
2176       }                                                         \
2177     else                                                        \
2178       {                                                         \
2179         c1 &= 0x7F, c2 &= 0x7F;                                 \
2180         if (sjis_p && (charset_alt == charset_jisx0208          \
2181                        || charset_alt == charset_jisx0208_1978))\
2182           {                                                     \
2183             unsigned char s1, s2;                               \
2184                                                                 \
2185             ENCODE_SJIS (c1, c2, s1, s2);                       \
2186             *dst++ = s1, *dst++ = s2;                           \
2187             coding->fake_multibyte = 1;                         \
2188           }                                                     \
2189         else if (!sjis_p                                        \
2190                  && (charset_alt == charset_big5_1              \
2191                      || charset_alt == charset_big5_2))         \
2192           {                                                     \
2193             unsigned char b1, b2;                               \
2194                                                                 \
2195             ENCODE_BIG5 (charset_alt, c1, c2, b1, b2);          \
2196             *dst++ = b1, *dst++ = b2;                           \
2197           }                                                     \
2198         else                                                    \
2199           {                                                     \
2200             *dst++ = charset_alt, *dst++ = c1, *dst++ = c2;     \
2201             coding->fake_multibyte = 1;                         \
2202           }                                                     \
2203       }                                                         \
2204     coding->consumed_char++;                                    \
2205   } while (0);
2206
2207 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2208    Check if a text is encoded in SJIS.  If it is, return
2209    CODING_CATEGORY_MASK_SJIS, else return 0.  */
2210
2211 int
2212 detect_coding_sjis (src, src_end)
2213      unsigned char *src, *src_end;
2214 {
2215   unsigned char c;
2216
2217   while (src < src_end)
2218     {
2219       c = *src++;
2220       if ((c >= 0x80 && c < 0xA0) || c >= 0xE0)
2221         {
2222           if (src < src_end && *src++ < 0x40)
2223             return 0;
2224         }
2225     }
2226   return CODING_CATEGORY_MASK_SJIS;
2227 }
2228
2229 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2230    Check if a text is encoded in BIG5.  If it is, return
2231    CODING_CATEGORY_MASK_BIG5, else return 0.  */
2232
2233 int
2234 detect_coding_big5 (src, src_end)
2235      unsigned char *src, *src_end;
2236 {
2237   unsigned char c;
2238
2239   while (src < src_end)
2240     {
2241       c = *src++;
2242       if (c >= 0xA1)
2243         {
2244           if (src >= src_end)
2245             break;
2246           c = *src++;
2247           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
2248             return 0;
2249         }
2250     }
2251   return CODING_CATEGORY_MASK_BIG5;
2252 }
2253
2254 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2255    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
2256
2257 int
2258 decode_coding_sjis_big5 (coding, source, destination,
2259                          src_bytes, dst_bytes, sjis_p)
2260      struct coding_system *coding;
2261      unsigned char *source, *destination;
2262      int src_bytes, dst_bytes;
2263      int sjis_p;
2264 {
2265   unsigned char *src = source;
2266   unsigned char *src_end = source + src_bytes;
2267   unsigned char *dst = destination;
2268   unsigned char *dst_end = destination + dst_bytes;
2269   /* Since the maximum bytes produced by each loop is 4, we subtract 3
2270      from DST_END to assure overflow checking is necessary only at the
2271      head of loop.  */
2272   unsigned char *adjusted_dst_end = dst_end - 3;
2273   Lisp_Object translation_table
2274       = coding->translation_table_for_decode;
2275   int result = CODING_FINISH_NORMAL;
2276
2277   if (!NILP (Venable_character_translation) && NILP (translation_table))
2278     translation_table = Vstandard_translation_table_for_decode;
2279
2280   coding->produced_char = 0;
2281   coding->fake_multibyte = 0;
2282   while (src < src_end && (dst_bytes
2283                            ? (dst < adjusted_dst_end)
2284                            : (dst < src - 3)))
2285     {
2286       /* SRC_BASE remembers the start position in source in each loop.
2287          The loop will be exited when there's not enough source text
2288          to analyze two-byte character (within macro ONE_MORE_BYTE).
2289          In that case, SRC is reset to SRC_BASE before exiting.  */
2290       unsigned char *src_base = src;
2291       unsigned char c1 = *src++, c2, c3, c4;
2292
2293       if (c1 < 0x20)
2294         {
2295           if (c1 == '\r')
2296             {
2297               if (coding->eol_type == CODING_EOL_CRLF)
2298                 {
2299                   ONE_MORE_BYTE (c2);
2300                   if (c2 == '\n')
2301                     *dst++ = c2;
2302                   else if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2303                     {
2304                       result = CODING_FINISH_INCONSISTENT_EOL;
2305                       goto label_end_of_loop_2;
2306                     }
2307                   else
2308                     /* To process C2 again, SRC is subtracted by 1.  */
2309                     *dst++ = c1, src--;
2310                 }
2311               else if (coding->eol_type == CODING_EOL_CR)
2312                 *dst++ = '\n';
2313               else
2314                 *dst++ = c1;
2315             }
2316           else if (c1 == '\n'
2317                    && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2318                    && (coding->eol_type == CODING_EOL_CR
2319                        || coding->eol_type == CODING_EOL_CRLF))
2320             {
2321               result = CODING_FINISH_INCONSISTENT_EOL;
2322               goto label_end_of_loop_2;
2323             }
2324           else
2325             *dst++ = c1;
2326           coding->produced_char++;
2327         }
2328       else if (c1 < 0x80)
2329         {
2330           c2 = 0;               /* avoid warning */
2331           DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
2332         }
2333       else
2334         {
2335           if (sjis_p)
2336             {
2337               if (c1 < 0xA0 || (c1 >= 0xE0 && c1 < 0xF0))
2338                 {
2339                   /* SJIS -> JISX0208 */
2340                   ONE_MORE_BYTE (c2);
2341                   if (c2 >= 0x40 && c2 != 0x7F && c2 <= 0xFC)
2342                     {
2343                       DECODE_SJIS (c1, c2, c3, c4);
2344                       DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
2345                     }
2346                   else
2347                     goto label_invalid_code_2;
2348                 }
2349               else if (c1 < 0xE0)
2350                 /* SJIS -> JISX0201-Kana */
2351                 {
2352                   c2 = 0;       /* avoid warning */
2353                   DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1,
2354                                               /* dummy */ c2);
2355                 }
2356               else
2357                 goto label_invalid_code_1;
2358             }
2359           else
2360             {
2361               /* BIG5 -> Big5 */
2362               if (c1 >= 0xA1 && c1 <= 0xFE)
2363                 {
2364                   ONE_MORE_BYTE (c2);
2365                   if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE))
2366                     {
2367                       int charset;
2368
2369                       DECODE_BIG5 (c1, c2, charset, c3, c4);
2370                       DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
2371                     }
2372                   else
2373                     goto label_invalid_code_2;
2374                 }
2375               else
2376                 goto label_invalid_code_1;
2377             }
2378         }
2379       continue;
2380
2381     label_invalid_code_1:
2382       *dst++ = c1;
2383       coding->produced_char++;
2384       coding->fake_multibyte = 1;
2385       continue;
2386
2387     label_invalid_code_2:
2388       *dst++ = c1; *dst++= c2;
2389       coding->produced_char += 2;
2390       coding->fake_multibyte = 1;
2391       continue;
2392
2393     label_end_of_loop:
2394       result = CODING_FINISH_INSUFFICIENT_SRC;
2395     label_end_of_loop_2:
2396       src = src_base;
2397       break;
2398     }
2399
2400   if (src < src_end)
2401     {
2402       if (result == CODING_FINISH_NORMAL)
2403         result = CODING_FINISH_INSUFFICIENT_DST;
2404       else if (result != CODING_FINISH_INCONSISTENT_EOL
2405                && coding->mode & CODING_MODE_LAST_BLOCK)
2406         {
2407           src_bytes = src_end - src;
2408           if (dst_bytes && (dst_end - dst < src_bytes))
2409             src_bytes = dst_end - dst;
2410           bcopy (dst, src, src_bytes);
2411           src += src_bytes;
2412           dst += src_bytes;
2413           coding->fake_multibyte = 1;
2414         }
2415     }
2416
2417   coding->consumed = coding->consumed_char = src - source;
2418   coding->produced = dst - destination;
2419   return result;
2420 }
2421
2422 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
2423    This function can encode `charset_ascii', `charset_katakana_jisx0201',
2424    `charset_jisx0208', `charset_big5_1', and `charset_big5-2'.  We are
2425    sure that all these charsets are registered as official charset
2426    (i.e. do not have extended leading-codes).  Characters of other
2427    charsets are produced without any encoding.  If SJIS_P is 1, encode
2428    SJIS text, else encode BIG5 text.  */
2429
2430 int
2431 encode_coding_sjis_big5 (coding, source, destination,
2432                          src_bytes, dst_bytes, sjis_p)
2433      struct coding_system *coding;
2434      unsigned char *source, *destination;
2435      int src_bytes, dst_bytes;
2436      int sjis_p;
2437 {
2438   unsigned char *src = source;
2439   unsigned char *src_end = source + src_bytes;
2440   unsigned char *dst = destination;
2441   unsigned char *dst_end = destination + dst_bytes;
2442   /* Since the maximum bytes produced by each loop is 2, we subtract 1
2443      from DST_END to assure overflow checking is necessary only at the
2444      head of loop.  */
2445   unsigned char *adjusted_dst_end = dst_end - 1;
2446   Lisp_Object translation_table
2447       = coding->translation_table_for_encode;
2448   int result = CODING_FINISH_NORMAL;
2449
2450   if (!NILP (Venable_character_translation) && NILP (translation_table))
2451     translation_table = Vstandard_translation_table_for_encode;
2452
2453   coding->consumed_char = 0;
2454   coding->fake_multibyte = 0;
2455   while (src < src_end && (dst_bytes
2456                            ? (dst < adjusted_dst_end)
2457                            : (dst < src - 1)))
2458     {
2459       /* SRC_BASE remembers the start position in source in each loop.
2460          The loop will be exited when there's not enough source text
2461          to analyze multi-byte codes (within macros ONE_MORE_BYTE and
2462          TWO_MORE_BYTES).  In that case, SRC is reset to SRC_BASE
2463          before exiting.  */
2464       unsigned char *src_base = src;
2465       unsigned char c1 = *src++, c2, c3, c4;
2466
2467       if (coding->composing)
2468         {
2469           if (c1 == 0xA0)
2470             {
2471               ONE_MORE_BYTE (c1);
2472               c1 &= 0x7F;
2473             }
2474           else if (c1 >= 0xA0)
2475             c1 -= 0x20;
2476           else
2477             coding->composing = 0;
2478         }
2479
2480       switch (emacs_code_class[c1])
2481         {
2482         case EMACS_ascii_code:
2483           ENCODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
2484           break;
2485
2486         case EMACS_control_code:
2487           *dst++ = c1;
2488           coding->consumed_char++;
2489           break;
2490
2491         case EMACS_carriage_return_code:
2492           if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2493             {
2494               *dst++ = c1;
2495               coding->consumed_char++;
2496               break;
2497             }
2498           /* fall down to treat '\r' as '\n' ...  */
2499
2500         case EMACS_linefeed_code:
2501           if (coding->eol_type == CODING_EOL_LF
2502               || coding->eol_type == CODING_EOL_UNDECIDED)
2503             *dst++ = '\n';
2504           else if (coding->eol_type == CODING_EOL_CRLF)
2505             *dst++ = '\r', *dst++ = '\n';
2506           else
2507             *dst++ = '\r';
2508           coding->consumed_char++;
2509           break;
2510
2511         case EMACS_leading_code_2:
2512           ONE_MORE_BYTE (c2);
2513           ENCODE_SJIS_BIG5_CHARACTER (c1, c2, /* dummy */ c3);
2514           break;
2515
2516         case EMACS_leading_code_3:
2517           TWO_MORE_BYTES (c2, c3);
2518           ENCODE_SJIS_BIG5_CHARACTER (c1, c2, c3);
2519           break;
2520
2521         case EMACS_leading_code_4:
2522           THREE_MORE_BYTES (c2, c3, c4);
2523           ENCODE_SJIS_BIG5_CHARACTER (c2, c3, c4);
2524           break;
2525
2526         case EMACS_leading_code_composition:
2527           coding->composing = 1;
2528           break;
2529
2530         default:                /* i.e. case EMACS_invalid_code: */
2531           *dst++ = c1;
2532           coding->consumed_char++;
2533         }
2534       continue;
2535
2536     label_end_of_loop:
2537       result = CODING_FINISH_INSUFFICIENT_SRC;
2538       src = src_base;
2539       break;
2540     }
2541
2542   if (result == CODING_FINISH_NORMAL
2543       && src < src_end)
2544     result = CODING_FINISH_INSUFFICIENT_DST;
2545   coding->consumed = src - source;
2546   coding->produced = coding->produced_char = dst - destination;
2547   return result;
2548 }
2549
2550 \f
2551 /*** 5. CCL handlers ***/
2552
2553 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2554    Check if a text is encoded in a coding system of which
2555    encoder/decoder are written in CCL program.  If it is, return
2556    CODING_CATEGORY_MASK_CCL, else return 0.  */
2557
2558 int
2559 detect_coding_ccl (src, src_end)
2560      unsigned char *src, *src_end;
2561 {
2562   unsigned char *valid;
2563
2564   /* No coding system is assigned to coding-category-ccl.  */
2565   if (!coding_system_table[CODING_CATEGORY_IDX_CCL])
2566     return 0;
2567
2568   valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
2569   while (src < src_end)
2570     {
2571       if (! valid[*src]) return 0;
2572       src++;
2573     }
2574   return CODING_CATEGORY_MASK_CCL;
2575 }
2576
2577 \f
2578 /*** 6. End-of-line handlers ***/
2579
2580 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2581    This function is called only when `coding->eol_type' is
2582    CODING_EOL_CRLF or CODING_EOL_CR.  */
2583
2584 int
2585 decode_eol (coding, source, destination, src_bytes, dst_bytes)
2586      struct coding_system *coding;
2587      unsigned char *source, *destination;
2588      int src_bytes, dst_bytes;
2589 {
2590   unsigned char *src = source;
2591   unsigned char *src_end = source + src_bytes;
2592   unsigned char *dst = destination;
2593   unsigned char *dst_end = destination + dst_bytes;
2594   unsigned char c;
2595   int result = CODING_FINISH_NORMAL;
2596
2597   coding->fake_multibyte = 0;
2598
2599   if (src_bytes <= 0)
2600     {
2601       coding->produced = coding->produced_char = 0;
2602       coding->consumed = coding->consumed_char = 0;
2603       return result;
2604     }
2605
2606   switch (coding->eol_type)
2607     {
2608     case CODING_EOL_CRLF:
2609       {
2610         /* Since the maximum bytes produced by each loop is 2, we
2611            subtract 1 from DST_END to assure overflow checking is
2612            necessary only at the head of loop.  */
2613         unsigned char *adjusted_dst_end = dst_end - 1;
2614
2615         while (src < src_end && (dst_bytes
2616                                  ? (dst < adjusted_dst_end)
2617                                  : (dst < src - 1)))
2618           {
2619             unsigned char *src_base = src;
2620
2621             c = *src++;
2622             if (c == '\r')
2623               {
2624                 ONE_MORE_BYTE (c);
2625                 if (c == '\n')
2626                   *dst++ = c;
2627                 else
2628                   {
2629                     if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2630                       {
2631                         result = CODING_FINISH_INCONSISTENT_EOL;
2632                         goto label_end_of_loop_2;
2633                       }
2634                     src--;
2635                     *dst++ = '\r';
2636                     if (BASE_LEADING_CODE_P (c))
2637                       coding->fake_multibyte = 1;
2638                   }
2639               }
2640             else if (c == '\n'
2641                      && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
2642               {
2643                 result = CODING_FINISH_INCONSISTENT_EOL;
2644                 goto label_end_of_loop_2;
2645               }
2646             else
2647               {
2648                 *dst++ = c;
2649                 if (BASE_LEADING_CODE_P (c))
2650                   coding->fake_multibyte = 1;
2651               }
2652             continue;
2653
2654           label_end_of_loop:
2655             result = CODING_FINISH_INSUFFICIENT_SRC;
2656           label_end_of_loop_2:
2657             src = src_base;
2658             break;
2659           }
2660         if (src < src_end)
2661           {
2662             if (result == CODING_FINISH_NORMAL)
2663               result = CODING_FINISH_INSUFFICIENT_DST;
2664             else if (result != CODING_FINISH_INCONSISTENT_EOL
2665                      && coding->mode & CODING_MODE_LAST_BLOCK)
2666               {
2667                 /* This is the last block of the text to be decoded.
2668                    We flush out all remaining codes.  */
2669                 src_bytes = src_end - src;
2670                 if (dst_bytes && (dst_end - dst < src_bytes))
2671                   src_bytes = dst_end - dst;
2672                 bcopy (src, dst, src_bytes);
2673                 dst += src_bytes;
2674                 src += src_bytes;
2675               }
2676           }
2677       }
2678       break;
2679
2680     case CODING_EOL_CR:
2681       if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2682         {
2683           while (src < src_end)
2684             {
2685               if ((c = *src++) == '\n')
2686                 break;
2687               if (BASE_LEADING_CODE_P (c))
2688                 coding->fake_multibyte = 1;
2689             }
2690           if (*--src == '\n')
2691             {
2692               src_bytes = src - source;
2693               result = CODING_FINISH_INCONSISTENT_EOL;
2694             }
2695         }
2696       if (dst_bytes && src_bytes > dst_bytes)
2697         {
2698           result = CODING_FINISH_INSUFFICIENT_DST;
2699           src_bytes = dst_bytes;
2700         }
2701       if (dst_bytes)
2702         bcopy (source, destination, src_bytes);
2703       else
2704         safe_bcopy (source, destination, src_bytes);
2705       src = source + src_bytes;
2706       while (src_bytes--) if (*dst++ == '\r') dst[-1] = '\n';
2707       break;
2708
2709     default:                    /* i.e. case: CODING_EOL_LF */
2710       if (dst_bytes && src_bytes > dst_bytes)
2711         {
2712           result = CODING_FINISH_INSUFFICIENT_DST;
2713           src_bytes = dst_bytes;
2714         }
2715       if (dst_bytes)
2716         bcopy (source, destination, src_bytes);
2717       else
2718         safe_bcopy (source, destination, src_bytes);
2719       src += src_bytes;
2720       dst += src_bytes;
2721       coding->fake_multibyte = 1;
2722       break;
2723     }
2724
2725   coding->consumed = coding->consumed_char = src - source;
2726   coding->produced = coding->produced_char = dst - destination;
2727   return result;
2728 }
2729
2730 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  Encode
2731    format of end-of-line according to `coding->eol_type'.  If
2732    `coding->mode & CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code
2733    '\r' in source text also means end-of-line.  */
2734
2735 int
2736 encode_eol (coding, source, destination, src_bytes, dst_bytes)
2737      struct coding_system *coding;
2738      unsigned char *source, *destination;
2739      int src_bytes, dst_bytes;
2740 {
2741   unsigned char *src = source;
2742   unsigned char *dst = destination;
2743   int result = CODING_FINISH_NORMAL;
2744
2745   coding->fake_multibyte = 0;
2746
2747   if (coding->eol_type == CODING_EOL_CRLF)
2748     {
2749       unsigned char c;
2750       unsigned char *src_end = source + src_bytes;
2751       unsigned char *dst_end = destination + dst_bytes;
2752       /* Since the maximum bytes produced by each loop is 2, we
2753          subtract 1 from DST_END to assure overflow checking is
2754          necessary only at the head of loop.  */
2755       unsigned char *adjusted_dst_end = dst_end - 1;
2756
2757       while (src < src_end && (dst_bytes
2758                                ? (dst < adjusted_dst_end)
2759                                : (dst < src - 1)))
2760         {
2761           c = *src++;
2762           if (c == '\n'
2763               || (c == '\r' && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)))
2764             *dst++ = '\r', *dst++ = '\n';
2765           else
2766             {
2767               *dst++ = c;
2768               if (BASE_LEADING_CODE_P (c))
2769                 coding->fake_multibyte = 1;
2770             }
2771         }
2772       if (src < src_end)
2773         result = CODING_FINISH_INSUFFICIENT_DST;
2774     }
2775   else
2776     {
2777       unsigned char c;
2778
2779       if (dst_bytes && src_bytes > dst_bytes)
2780         {
2781           src_bytes = dst_bytes;
2782           result = CODING_FINISH_INSUFFICIENT_DST;
2783         }
2784       if (dst_bytes)
2785         bcopy (source, destination, src_bytes);
2786       else
2787         safe_bcopy (source, destination, src_bytes);
2788       dst_bytes = src_bytes;
2789       if (coding->eol_type == CODING_EOL_CR)
2790         {
2791           while (src_bytes--)
2792             {
2793               if ((c = *dst++) == '\n')
2794                 dst[-1] = '\r';
2795               else if (BASE_LEADING_CODE_P (c))
2796                 coding->fake_multibyte = 1;
2797             }
2798         }
2799       else
2800         {
2801           if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
2802             {
2803               while (src_bytes--)
2804                 if (*dst++ == '\r') dst[-1] = '\n';
2805             }
2806           coding->fake_multibyte = 1;
2807         }
2808       src = source + dst_bytes;
2809       dst = destination + dst_bytes;
2810     }
2811
2812   coding->consumed = coding->consumed_char = src - source;
2813   coding->produced = coding->produced_char = dst - destination;
2814   return result;
2815 }
2816
2817 \f
2818 /*** 7. C library functions ***/
2819
2820 /* In Emacs Lisp, coding system is represented by a Lisp symbol which
2821    has a property `coding-system'.  The value of this property is a
2822    vector of length 5 (called as coding-vector).  Among elements of
2823    this vector, the first (element[0]) and the fifth (element[4])
2824    carry important information for decoding/encoding.  Before
2825    decoding/encoding, this information should be set in fields of a
2826    structure of type `coding_system'.
2827
2828    A value of property `coding-system' can be a symbol of another
2829    subsidiary coding-system.  In that case, Emacs gets coding-vector
2830    from that symbol.
2831
2832    `element[0]' contains information to be set in `coding->type'.  The
2833    value and its meaning is as follows:
2834
2835    0 -- coding_type_emacs_mule
2836    1 -- coding_type_sjis
2837    2 -- coding_type_iso2022
2838    3 -- coding_type_big5
2839    4 -- coding_type_ccl encoder/decoder written in CCL
2840    nil -- coding_type_no_conversion
2841    t -- coding_type_undecided (automatic conversion on decoding,
2842                                no-conversion on encoding)
2843
2844    `element[4]' contains information to be set in `coding->flags' and
2845    `coding->spec'.  The meaning varies by `coding->type'.
2846
2847    If `coding->type' is `coding_type_iso2022', element[4] is a vector
2848    of length 32 (of which the first 13 sub-elements are used now).
2849    Meanings of these sub-elements are:
2850
2851    sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
2852         If the value is an integer of valid charset, the charset is
2853         assumed to be designated to graphic register N initially.
2854
2855         If the value is minus, it is a minus value of charset which
2856         reserves graphic register N, which means that the charset is
2857         not designated initially but should be designated to graphic
2858         register N just before encoding a character in that charset.
2859
2860         If the value is nil, graphic register N is never used on
2861         encoding.
2862
2863    sub-element[N] where N is 4 through 11: to be set in `coding->flags'
2864         Each value takes t or nil.  See the section ISO2022 of
2865         `coding.h' for more information.
2866
2867    If `coding->type' is `coding_type_big5', element[4] is t to denote
2868    BIG5-ETen or nil to denote BIG5-HKU.
2869
2870    If `coding->type' takes the other value, element[4] is ignored.
2871
2872    Emacs Lisp's coding system also carries information about format of
2873    end-of-line in a value of property `eol-type'.  If the value is
2874    integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
2875    means CODING_EOL_CR.  If it is not integer, it should be a vector
2876    of subsidiary coding systems of which property `eol-type' has one
2877    of above values.
2878
2879 */
2880
2881 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
2882    and set it in CODING.  If CODING_SYSTEM_SYMBOL is invalid, CODING
2883    is setup so that no conversion is necessary and return -1, else
2884    return 0.  */
2885
2886 int
2887 setup_coding_system (coding_system, coding)
2888      Lisp_Object coding_system;
2889      struct coding_system *coding;
2890 {
2891   Lisp_Object coding_spec, coding_type, eol_type, plist;
2892   Lisp_Object val;
2893   int i;
2894
2895   /* Initialize some fields required for all kinds of coding systems.  */
2896   coding->symbol = coding_system;
2897   coding->common_flags = 0;
2898   coding->mode = 0;
2899   coding->heading_ascii = -1;
2900   coding->post_read_conversion = coding->pre_write_conversion = Qnil;
2901
2902   if (NILP (coding_system))
2903     goto label_invalid_coding_system;
2904
2905   coding_spec = Fget (coding_system, Qcoding_system);
2906
2907   if (!VECTORP (coding_spec)
2908       || XVECTOR (coding_spec)->size != 5
2909       || !CONSP (XVECTOR (coding_spec)->contents[3]))
2910     goto label_invalid_coding_system;
2911
2912   eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
2913   if (VECTORP (eol_type))
2914     {
2915       coding->eol_type = CODING_EOL_UNDECIDED;
2916       coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
2917     }
2918   else if (XFASTINT (eol_type) == 1)
2919     {
2920       coding->eol_type = CODING_EOL_CRLF;
2921       coding->common_flags
2922         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2923     }
2924   else if (XFASTINT (eol_type) == 2)
2925     {
2926       coding->eol_type = CODING_EOL_CR;
2927       coding->common_flags
2928         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2929     }
2930   else
2931     coding->eol_type = CODING_EOL_LF;
2932
2933   coding_type = XVECTOR (coding_spec)->contents[0];
2934   /* Try short cut.  */
2935   if (SYMBOLP (coding_type))
2936     {
2937       if (EQ (coding_type, Qt))
2938         {
2939           coding->type = coding_type_undecided;
2940           coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
2941         }
2942       else
2943         coding->type = coding_type_no_conversion;
2944       return 0;
2945     }
2946
2947   /* Initialize remaining fields.  */
2948   coding->composing = 0;
2949   coding->composed_chars = 0;
2950
2951   /* Get values of coding system properties:
2952      `post-read-conversion', `pre-write-conversion',
2953      `translation-table-for-decode', `translation-table-for-encode'.  */
2954   plist = XVECTOR (coding_spec)->contents[3];
2955   /* Pre & post conversion functions should be disabled if
2956      inhibit_eol_conversion is nozero.  This is the case that a code
2957      conversion function is called while those functions are running.  */
2958   if (! inhibit_pre_post_conversion)
2959     {
2960       coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
2961       coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
2962     }
2963   val = Fplist_get (plist, Qtranslation_table_for_decode);
2964   if (SYMBOLP (val))
2965     val = Fget (val, Qtranslation_table_for_decode);
2966   coding->translation_table_for_decode = CHAR_TABLE_P (val) ? val : Qnil;
2967   val = Fplist_get (plist, Qtranslation_table_for_encode);
2968   if (SYMBOLP (val))
2969     val = Fget (val, Qtranslation_table_for_encode);
2970   coding->translation_table_for_encode = CHAR_TABLE_P (val) ? val : Qnil;
2971   val = Fplist_get (plist, Qcoding_category);
2972   if (!NILP (val))
2973     {
2974       val = Fget (val, Qcoding_category_index);
2975       if (INTEGERP (val))
2976         coding->category_idx = XINT (val);
2977       else
2978         goto label_invalid_coding_system;
2979     }
2980   else
2981     goto label_invalid_coding_system;
2982
2983   val = Fplist_get (plist, Qsafe_charsets);
2984   if (EQ (val, Qt))
2985     {
2986       for (i = 0; i <= MAX_CHARSET; i++)
2987         coding->safe_charsets[i] = 1;
2988     }
2989   else
2990     {
2991       bzero (coding->safe_charsets, MAX_CHARSET + 1);
2992       while (CONSP (val))
2993         {
2994           if ((i = get_charset_id (XCAR (val))) >= 0)
2995             coding->safe_charsets[i] = 1;
2996           val = XCDR (val);
2997         }
2998     }
2999
3000   switch (XFASTINT (coding_type))
3001     {
3002     case 0:
3003       coding->type = coding_type_emacs_mule;
3004       if (!NILP (coding->post_read_conversion))
3005         coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
3006       if (!NILP (coding->pre_write_conversion))
3007         coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
3008       break;
3009
3010     case 1:
3011       coding->type = coding_type_sjis;
3012       coding->common_flags
3013         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3014       break;
3015
3016     case 2:
3017       coding->type = coding_type_iso2022;
3018       coding->common_flags
3019         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3020       {
3021         Lisp_Object val, temp;
3022         Lisp_Object *flags;
3023         int i, charset, reg_bits = 0;
3024
3025         val = XVECTOR (coding_spec)->contents[4];
3026
3027         if (!VECTORP (val) || XVECTOR (val)->size != 32)
3028           goto label_invalid_coding_system;
3029
3030         flags = XVECTOR (val)->contents;
3031         coding->flags
3032           = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
3033              | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
3034              | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
3035              | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
3036              | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
3037              | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
3038              | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
3039              | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
3040              | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
3041              | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
3042              | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
3043              | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
3044              | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
3045              );
3046
3047         /* Invoke graphic register 0 to plane 0.  */
3048         CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
3049         /* Invoke graphic register 1 to plane 1 if we can use full 8-bit.  */
3050         CODING_SPEC_ISO_INVOCATION (coding, 1)
3051           = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
3052         /* Not single shifting at first.  */
3053         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
3054         /* Beginning of buffer should also be regarded as bol. */
3055         CODING_SPEC_ISO_BOL (coding) = 1;
3056
3057         for (charset = 0; charset <= MAX_CHARSET; charset++)
3058           CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
3059         val = Vcharset_revision_alist;
3060         while (CONSP (val))
3061           {
3062             charset = get_charset_id (Fcar_safe (XCAR (val)));
3063             if (charset >= 0
3064                 && (temp = Fcdr_safe (XCAR (val)), INTEGERP (temp))
3065                 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
3066               CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
3067             val = XCDR (val);
3068           }
3069
3070         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
3071            FLAGS[REG] can be one of below:
3072                 integer CHARSET: CHARSET occupies register I,
3073                 t: designate nothing to REG initially, but can be used
3074                   by any charsets,
3075                 list of integer, nil, or t: designate the first
3076                   element (if integer) to REG initially, the remaining
3077                   elements (if integer) is designated to REG on request,
3078                   if an element is t, REG can be used by any charsets,
3079                 nil: REG is never used.  */
3080         for (charset = 0; charset <= MAX_CHARSET; charset++)
3081           CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3082             = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
3083         for (i = 0; i < 4; i++)
3084           {
3085             if (INTEGERP (flags[i])
3086                 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))
3087                 || (charset = get_charset_id (flags[i])) >= 0)
3088               {
3089                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3090                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
3091               }
3092             else if (EQ (flags[i], Qt))
3093               {
3094                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3095                 reg_bits |= 1 << i;
3096                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3097               }
3098             else if (CONSP (flags[i]))
3099               {
3100                 Lisp_Object tail;
3101                 tail = flags[i];
3102
3103                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3104                 if (INTEGERP (XCAR (tail))
3105                     && (charset = XINT (XCAR (tail)),
3106                         CHARSET_VALID_P (charset))
3107                     || (charset = get_charset_id (XCAR (tail))) >= 0)
3108                   {
3109                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3110                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
3111                   }
3112                 else
3113                   CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3114                 tail = XCDR (tail);
3115                 while (CONSP (tail))
3116                   {
3117                     if (INTEGERP (XCAR (tail))
3118                         && (charset = XINT (XCAR (tail)),
3119                             CHARSET_VALID_P (charset))
3120                         || (charset = get_charset_id (XCAR (tail))) >= 0)
3121                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3122                         = i;
3123                     else if (EQ (XCAR (tail), Qt))
3124                       reg_bits |= 1 << i;
3125                     tail = XCDR (tail);
3126                   }
3127               }
3128             else
3129               CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3130
3131             CODING_SPEC_ISO_DESIGNATION (coding, i)
3132               = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
3133           }
3134
3135         if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
3136           {
3137             /* REG 1 can be used only by locking shift in 7-bit env.  */
3138             if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
3139               reg_bits &= ~2;
3140             if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
3141               /* Without any shifting, only REG 0 and 1 can be used.  */
3142               reg_bits &= 3;
3143           }
3144
3145         if (reg_bits)
3146           for (charset = 0; charset <= MAX_CHARSET; charset++)
3147             {
3148               if (CHARSET_VALID_P (charset))
3149                 {
3150                   /* There exist some default graphic registers to be
3151                      used CHARSET.  */
3152
3153                   /* We had better avoid designating a charset of
3154                      CHARS96 to REG 0 as far as possible.  */
3155                   if (CHARSET_CHARS (charset) == 96)
3156                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3157                       = (reg_bits & 2
3158                          ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
3159                   else
3160                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3161                       = (reg_bits & 1
3162                          ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
3163                 }
3164             }
3165       }
3166       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3167       coding->spec.iso2022.last_invalid_designation_register = -1;
3168       break;
3169
3170     case 3:
3171       coding->type = coding_type_big5;
3172       coding->common_flags
3173         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3174       coding->flags
3175         = (NILP (XVECTOR (coding_spec)->contents[4])
3176            ? CODING_FLAG_BIG5_HKU
3177            : CODING_FLAG_BIG5_ETEN);
3178       break;
3179
3180     case 4:
3181       coding->type = coding_type_ccl;
3182       coding->common_flags
3183         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3184       {
3185         val = XVECTOR (coding_spec)->contents[4];
3186         if (! CONSP (val)
3187             || setup_ccl_program (&(coding->spec.ccl.decoder),
3188                                   XCAR (val)) < 0
3189             || setup_ccl_program (&(coding->spec.ccl.encoder),
3190                                   XCDR (val)) < 0)
3191           goto label_invalid_coding_system;
3192
3193         bzero (coding->spec.ccl.valid_codes, 256);
3194         val = Fplist_get (plist, Qvalid_codes);
3195         if (CONSP (val))
3196           {
3197             Lisp_Object this;
3198
3199             for (; CONSP (val); val = XCDR (val))
3200               {
3201                 this = XCAR (val);
3202                 if (INTEGERP (this)
3203                     && XINT (this) >= 0 && XINT (this) < 256)
3204                   coding->spec.ccl.valid_codes[XINT (this)] = 1;
3205                 else if (CONSP (this)
3206                          && INTEGERP (XCAR (this))
3207                          && INTEGERP (XCDR (this)))
3208                   {
3209                     int start = XINT (XCAR (this));
3210                     int end = XINT (XCDR (this));
3211
3212                     if (start >= 0 && start <= end && end < 256)
3213                       while (start <= end)
3214                         coding->spec.ccl.valid_codes[start++] = 1;
3215                   }
3216               }
3217           }
3218       }
3219       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3220       break;
3221
3222     case 5:
3223       coding->type = coding_type_raw_text;
3224       break;
3225
3226     default:
3227       goto label_invalid_coding_system;
3228     }
3229   return 0;
3230
3231  label_invalid_coding_system:
3232   coding->type = coding_type_no_conversion;
3233   coding->category_idx = CODING_CATEGORY_IDX_BINARY;
3234   coding->common_flags = 0;
3235   coding->eol_type = CODING_EOL_LF;
3236   coding->pre_write_conversion = coding->post_read_conversion = Qnil;
3237   return -1;
3238 }
3239
3240 /* Setup raw-text or one of its subsidiaries in the structure
3241    coding_system CODING according to the already setup value eol_type
3242    in CODING.  CODING should be setup for some coding system in
3243    advance.  */
3244
3245 void
3246 setup_raw_text_coding_system (coding)
3247      struct coding_system *coding;
3248 {
3249   if (coding->type != coding_type_raw_text)
3250     {
3251       coding->symbol = Qraw_text;
3252       coding->type = coding_type_raw_text;
3253       if (coding->eol_type != CODING_EOL_UNDECIDED)
3254         {
3255           Lisp_Object subsidiaries;
3256           subsidiaries = Fget (Qraw_text, Qeol_type);
3257
3258           if (VECTORP (subsidiaries)
3259               && XVECTOR (subsidiaries)->size == 3)
3260             coding->symbol
3261               = XVECTOR (subsidiaries)->contents[coding->eol_type];
3262         }
3263       setup_coding_system (coding->symbol, coding);
3264     }
3265   return;
3266 }
3267
3268 /* Emacs has a mechanism to automatically detect a coding system if it
3269    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
3270    it's impossible to distinguish some coding systems accurately
3271    because they use the same range of codes.  So, at first, coding
3272    systems are categorized into 7, those are:
3273
3274    o coding-category-emacs-mule
3275
3276         The category for a coding system which has the same code range
3277         as Emacs' internal format.  Assigned the coding-system (Lisp
3278         symbol) `emacs-mule' by default.
3279
3280    o coding-category-sjis
3281
3282         The category for a coding system which has the same code range
3283         as SJIS.  Assigned the coding-system (Lisp
3284         symbol) `japanese-shift-jis' by default.
3285
3286    o coding-category-iso-7
3287
3288         The category for a coding system which has the same code range
3289         as ISO2022 of 7-bit environment.  This doesn't use any locking
3290         shift and single shift functions.  This can encode/decode all
3291         charsets.  Assigned the coding-system (Lisp symbol)
3292         `iso-2022-7bit' by default.
3293
3294    o coding-category-iso-7-tight
3295
3296         Same as coding-category-iso-7 except that this can
3297         encode/decode only the specified charsets.
3298
3299    o coding-category-iso-8-1
3300
3301         The category for a coding system which has the same code range
3302         as ISO2022 of 8-bit environment and graphic plane 1 used only
3303         for DIMENSION1 charset.  This doesn't use any locking shift
3304         and single shift functions.  Assigned the coding-system (Lisp
3305         symbol) `iso-latin-1' by default.
3306
3307    o coding-category-iso-8-2
3308
3309         The category for a coding system which has the same code range
3310         as ISO2022 of 8-bit environment and graphic plane 1 used only
3311         for DIMENSION2 charset.  This doesn't use any locking shift
3312         and single shift functions.  Assigned the coding-system (Lisp
3313         symbol) `japanese-iso-8bit' by default.
3314
3315    o coding-category-iso-7-else
3316
3317         The category for a coding system which has the same code range
3318         as ISO2022 of 7-bit environemnt but uses locking shift or
3319         single shift functions.  Assigned the coding-system (Lisp
3320         symbol) `iso-2022-7bit-lock' by default.
3321
3322    o coding-category-iso-8-else
3323
3324         The category for a coding system which has the same code range
3325         as ISO2022 of 8-bit environemnt but uses locking shift or
3326         single shift functions.  Assigned the coding-system (Lisp
3327         symbol) `iso-2022-8bit-ss2' by default.
3328
3329    o coding-category-big5
3330
3331         The category for a coding system which has the same code range
3332         as BIG5.  Assigned the coding-system (Lisp symbol)
3333         `cn-big5' by default.
3334
3335    o coding-category-ccl
3336
3337         The category for a coding system of which encoder/decoder is
3338         written in CCL programs.  The default value is nil, i.e., no
3339         coding system is assigned.
3340
3341    o coding-category-binary
3342
3343         The category for a coding system not categorized in any of the
3344         above.  Assigned the coding-system (Lisp symbol)
3345         `no-conversion' by default.
3346
3347    Each of them is a Lisp symbol and the value is an actual
3348    `coding-system's (this is also a Lisp symbol) assigned by a user.
3349    What Emacs does actually is to detect a category of coding system.
3350    Then, it uses a `coding-system' assigned to it.  If Emacs can't
3351    decide only one possible category, it selects a category of the
3352    highest priority.  Priorities of categories are also specified by a
3353    user in a Lisp variable `coding-category-list'.
3354
3355 */
3356
3357 static
3358 int ascii_skip_code[256];
3359
3360 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
3361    If it detects possible coding systems, return an integer in which
3362    appropriate flag bits are set.  Flag bits are defined by macros
3363    CODING_CATEGORY_MASK_XXX in `coding.h'.
3364
3365    How many ASCII characters are at the head is returned as *SKIP.  */
3366
3367 static int
3368 detect_coding_mask (source, src_bytes, priorities, skip)
3369      unsigned char *source;
3370      int src_bytes, *priorities, *skip;
3371 {
3372   register unsigned char c;
3373   unsigned char *src = source, *src_end = source + src_bytes;
3374   unsigned int mask;
3375   int i;
3376
3377   /* At first, skip all ASCII characters and control characters except
3378      for three ISO2022 specific control characters.  */
3379   ascii_skip_code[ISO_CODE_SO] = 0;
3380   ascii_skip_code[ISO_CODE_SI] = 0;
3381   ascii_skip_code[ISO_CODE_ESC] = 0;
3382
3383  label_loop_detect_coding:
3384   while (src < src_end && ascii_skip_code[*src]) src++;
3385   *skip = src - source;
3386
3387   if (src >= src_end)
3388     /* We found nothing other than ASCII.  There's nothing to do.  */
3389     return 0;
3390
3391   c = *src;
3392   /* The text seems to be encoded in some multilingual coding system.
3393      Now, try to find in which coding system the text is encoded.  */
3394   if (c < 0x80)
3395     {
3396       /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
3397       /* C is an ISO2022 specific control code of C0.  */
3398       mask = detect_coding_iso2022 (src, src_end);
3399       if (mask == 0)
3400         {
3401           /* No valid ISO2022 code follows C.  Try again.  */
3402           src++;
3403           if (c == ISO_CODE_ESC)
3404             ascii_skip_code[ISO_CODE_ESC] = 1;
3405           else
3406             ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1;
3407           goto label_loop_detect_coding;
3408         }
3409       if (priorities)
3410         goto label_return_highest_only;
3411     }
3412   else
3413     {
3414       int try;
3415
3416       if (c < 0xA0)
3417         {
3418           /* C is the first byte of SJIS character code,
3419              or a leading-code of Emacs' internal format (emacs-mule).  */
3420           try = CODING_CATEGORY_MASK_SJIS | CODING_CATEGORY_MASK_EMACS_MULE;
3421
3422           /* Or, if C is a special latin extra code,
3423              or is an ISO2022 specific control code of C1 (SS2 or SS3),
3424              or is an ISO2022 control-sequence-introducer (CSI),
3425              we should also consider the possibility of ISO2022 codings.  */
3426           if ((VECTORP (Vlatin_extra_code_table)
3427                && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3428               || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
3429               || (c == ISO_CODE_CSI
3430                   && (src < src_end
3431                       && (*src == ']'
3432                           || ((*src == '0' || *src == '1' || *src == '2')
3433                               && src + 1 < src_end
3434                               && src[1] == ']')))))
3435             try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
3436                      | CODING_CATEGORY_MASK_ISO_8BIT);
3437         }
3438       else
3439         /* C is a character of ISO2022 in graphic plane right,
3440            or a SJIS's 1-byte character code (i.e. JISX0201),
3441            or the first byte of BIG5's 2-byte code.  */
3442         try = (CODING_CATEGORY_MASK_ISO_8_ELSE
3443                 | CODING_CATEGORY_MASK_ISO_8BIT
3444                 | CODING_CATEGORY_MASK_SJIS
3445                 | CODING_CATEGORY_MASK_BIG5);
3446
3447       /* Or, we may have to consider the possibility of CCL.  */
3448       if (coding_system_table[CODING_CATEGORY_IDX_CCL]
3449           && (coding_system_table[CODING_CATEGORY_IDX_CCL]
3450               ->spec.ccl.valid_codes)[c])
3451         try |= CODING_CATEGORY_MASK_CCL;
3452
3453       mask = 0;
3454       if (priorities)
3455         {
3456           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3457             {
3458               if (priorities[i] & try & CODING_CATEGORY_MASK_ISO)
3459                 mask = detect_coding_iso2022 (src, src_end);
3460               else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
3461                 mask = detect_coding_sjis (src, src_end);
3462               else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
3463                 mask = detect_coding_big5 (src, src_end);
3464               else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
3465                 mask = detect_coding_emacs_mule (src, src_end);
3466               else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL)
3467                 mask = detect_coding_ccl (src, src_end);
3468               else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
3469                 mask = CODING_CATEGORY_MASK_RAW_TEXT;
3470               else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
3471                 mask = CODING_CATEGORY_MASK_BINARY;
3472               if (mask)
3473                 goto label_return_highest_only;
3474             }
3475           return CODING_CATEGORY_MASK_RAW_TEXT;
3476         }
3477       if (try & CODING_CATEGORY_MASK_ISO)
3478         mask |= detect_coding_iso2022 (src, src_end);
3479       if (try & CODING_CATEGORY_MASK_SJIS)
3480         mask |= detect_coding_sjis (src, src_end);
3481       if (try & CODING_CATEGORY_MASK_BIG5)
3482         mask |= detect_coding_big5 (src, src_end);
3483       if (try & CODING_CATEGORY_MASK_EMACS_MULE)
3484         mask |= detect_coding_emacs_mule (src, src_end);
3485       if (try & CODING_CATEGORY_MASK_CCL)
3486         mask |= detect_coding_ccl (src, src_end);
3487     }
3488   return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
3489
3490  label_return_highest_only:
3491   for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3492     {
3493       if (mask & priorities[i])
3494         return priorities[i];
3495     }
3496   return CODING_CATEGORY_MASK_RAW_TEXT;
3497 }
3498
3499 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
3500    The information of the detected coding system is set in CODING.  */
3501
3502 void
3503 detect_coding (coding, src, src_bytes)
3504      struct coding_system *coding;
3505      unsigned char *src;
3506      int src_bytes;
3507 {
3508   unsigned int idx;
3509   int skip, mask, i;
3510   Lisp_Object val;
3511
3512   val = Vcoding_category_list;
3513   mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip);
3514   coding->heading_ascii = skip;
3515
3516   if (!mask) return;
3517
3518   /* We found a single coding system of the highest priority in MASK.  */
3519   idx = 0;
3520   while (mask && ! (mask & 1)) mask >>= 1, idx++;
3521   if (! mask)
3522     idx = CODING_CATEGORY_IDX_RAW_TEXT;
3523
3524   val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[idx])->value;
3525
3526   if (coding->eol_type != CODING_EOL_UNDECIDED)
3527     {
3528       Lisp_Object tmp;
3529
3530       tmp = Fget (val, Qeol_type);
3531       if (VECTORP (tmp))
3532         val = XVECTOR (tmp)->contents[coding->eol_type];
3533     }
3534   setup_coding_system (val, coding);
3535   /* Set this again because setup_coding_system reset this member.  */
3536   coding->heading_ascii = skip;
3537 }
3538
3539 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
3540    SOURCE is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
3541    CODING_EOL_CR, and CODING_EOL_UNDECIDED.
3542
3543    How many non-eol characters are at the head is returned as *SKIP.  */
3544
3545 #define MAX_EOL_CHECK_COUNT 3
3546
3547 static int
3548 detect_eol_type (source, src_bytes, skip)
3549      unsigned char *source;
3550      int src_bytes, *skip;
3551 {
3552   unsigned char *src = source, *src_end = src + src_bytes;
3553   unsigned char c;
3554   int total = 0;                /* How many end-of-lines are found so far.  */
3555   int eol_type = CODING_EOL_UNDECIDED;
3556   int this_eol_type;
3557
3558   *skip = 0;
3559
3560   while (src < src_end && total < MAX_EOL_CHECK_COUNT)
3561     {
3562       c = *src++;
3563       if (c == '\n' || c == '\r')
3564         {
3565           if (*skip == 0)
3566             *skip = src - 1 - source;
3567           total++;
3568           if (c == '\n')
3569             this_eol_type = CODING_EOL_LF;
3570           else if (src >= src_end || *src != '\n')
3571             this_eol_type = CODING_EOL_CR;
3572           else
3573             this_eol_type = CODING_EOL_CRLF, src++;
3574
3575           if (eol_type == CODING_EOL_UNDECIDED)
3576             /* This is the first end-of-line.  */
3577             eol_type = this_eol_type;
3578           else if (eol_type != this_eol_type)
3579             {
3580               /* The found type is different from what found before.  */
3581               eol_type = CODING_EOL_INCONSISTENT;
3582               break;
3583             }
3584         }
3585     }
3586
3587   if (*skip == 0)
3588     *skip = src_end - source;
3589   return eol_type;
3590 }
3591
3592 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
3593    is encoded.  If it detects an appropriate format of end-of-line, it
3594    sets the information in *CODING.  */
3595
3596 void
3597 detect_eol (coding, src, src_bytes)
3598      struct coding_system *coding;
3599      unsigned char *src;
3600      int src_bytes;
3601 {
3602   Lisp_Object val;
3603   int skip;
3604   int eol_type = detect_eol_type (src, src_bytes, &skip);
3605
3606   if (coding->heading_ascii > skip)
3607     coding->heading_ascii = skip;
3608   else
3609     skip = coding->heading_ascii;
3610
3611   if (eol_type == CODING_EOL_UNDECIDED)
3612     return;
3613   if (eol_type == CODING_EOL_INCONSISTENT)
3614     {
3615 #if 0
3616       /* This code is suppressed until we find a better way to
3617          distinguish raw text file and binary file.  */
3618
3619       /* If we have already detected that the coding is raw-text, the
3620          coding should actually be no-conversion.  */
3621       if (coding->type == coding_type_raw_text)
3622         {
3623           setup_coding_system (Qno_conversion, coding);
3624           return;
3625         }
3626       /* Else, let's decode only text code anyway.  */
3627 #endif /* 0 */
3628       eol_type = CODING_EOL_LF;
3629     }
3630
3631   val = Fget (coding->symbol, Qeol_type);
3632   if (VECTORP (val) && XVECTOR (val)->size == 3)
3633     {
3634       setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
3635       coding->heading_ascii = skip;
3636     }
3637 }
3638
3639 #define CONVERSION_BUFFER_EXTRA_ROOM 256
3640
3641 #define DECODING_BUFFER_MAG(coding)                                          \
3642   (coding->type == coding_type_iso2022                                       \
3643    ? 3                                                                       \
3644    : ((coding->type == coding_type_sjis || coding->type == coding_type_big5) \
3645       ? 2                                                                    \
3646       : (coding->type == coding_type_raw_text                                \
3647          ? 1                                                                 \
3648          : (coding->type == coding_type_ccl                                  \
3649             ? coding->spec.ccl.decoder.buf_magnification                     \
3650             : 2))))
3651
3652 /* Return maximum size (bytes) of a buffer enough for decoding
3653    SRC_BYTES of text encoded in CODING.  */
3654
3655 int
3656 decoding_buffer_size (coding, src_bytes)
3657      struct coding_system *coding;
3658      int src_bytes;
3659 {
3660   return (src_bytes * DECODING_BUFFER_MAG (coding)
3661           + CONVERSION_BUFFER_EXTRA_ROOM);
3662 }
3663
3664 /* Return maximum size (bytes) of a buffer enough for encoding
3665    SRC_BYTES of text to CODING.  */
3666
3667 int
3668 encoding_buffer_size (coding, src_bytes)
3669      struct coding_system *coding;
3670      int src_bytes;
3671 {
3672   int magnification;
3673
3674   if (coding->type == coding_type_ccl)
3675     magnification = coding->spec.ccl.encoder.buf_magnification;
3676   else
3677     magnification = 3;
3678
3679   return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
3680 }
3681
3682 #ifndef MINIMUM_CONVERSION_BUFFER_SIZE
3683 #define MINIMUM_CONVERSION_BUFFER_SIZE 1024
3684 #endif
3685
3686 char *conversion_buffer;
3687 int conversion_buffer_size;
3688
3689 /* Return a pointer to a SIZE bytes of buffer to be used for encoding
3690    or decoding.  Sufficient memory is allocated automatically.  If we
3691    run out of memory, return NULL.  */
3692
3693 char *
3694 get_conversion_buffer (size)
3695      int size;
3696 {
3697   if (size > conversion_buffer_size)
3698     {
3699       char *buf;
3700       int real_size = conversion_buffer_size * 2;
3701
3702       while (real_size < size) real_size *= 2;
3703       buf = (char *) xmalloc (real_size);
3704       xfree (conversion_buffer);
3705       conversion_buffer = buf;
3706       conversion_buffer_size = real_size;
3707     }
3708   return conversion_buffer;
3709 }
3710
3711 int
3712 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
3713      struct coding_system *coding;
3714      unsigned char *source, *destination;
3715      int src_bytes, dst_bytes, encodep;
3716 {
3717   struct ccl_program *ccl
3718     = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
3719   int result;
3720
3721   ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
3722
3723   coding->produced = ccl_driver (ccl, source, destination,
3724                                  src_bytes, dst_bytes, &(coding->consumed));
3725   coding->produced_char
3726     = (encodep
3727        ? coding->produced
3728        : multibyte_chars_in_text (destination, coding->produced));
3729   coding->consumed_char
3730     = multibyte_chars_in_text (source, coding->consumed);
3731
3732   switch (ccl->status)
3733     {
3734     case CCL_STAT_SUSPEND_BY_SRC:
3735       result = CODING_FINISH_INSUFFICIENT_SRC;
3736       break;
3737     case CCL_STAT_SUSPEND_BY_DST:
3738       result = CODING_FINISH_INSUFFICIENT_DST;
3739       break;
3740     case CCL_STAT_QUIT:
3741     case CCL_STAT_INVALID_CMD:
3742       result = CODING_FINISH_INTERRUPT;
3743       break;
3744     default:
3745       result = CODING_FINISH_NORMAL;
3746       break;
3747     }
3748   return result;
3749 }
3750
3751 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions".  Before
3752    decoding, it may detect coding system and format of end-of-line if
3753    those are not yet decided.
3754
3755    This function does not make full use of DESTINATION buffer.  For
3756    instance, if coding->type is coding_type_iso2022, it uses only
3757    (DST_BYTES - 7) bytes of DESTINATION buffer.  In the case that
3758    DST_BYTES is decided by the function decoding_buffer_size, it
3759    contains extra 256 bytes (defined by CONVERSION_BUFFER_EXTRA_ROOM).
3760    So, this function can decode the full SOURCE.  But, in the other
3761    case, if you want to avoid carry over, you must supply at least 7
3762    bytes more area in DESTINATION buffer than expected maximum bytes
3763    that will be produced by this function.  */
3764
3765 int
3766 decode_coding (coding, source, destination, src_bytes, dst_bytes)
3767      struct coding_system *coding;
3768      unsigned char *source, *destination;
3769      int src_bytes, dst_bytes;
3770 {
3771   int result;
3772
3773   if (src_bytes <= 0
3774       && coding->type != coding_type_ccl
3775       && ! (coding->mode & CODING_MODE_LAST_BLOCK
3776             && CODING_REQUIRE_FLUSHING (coding)))
3777     {
3778       coding->produced = coding->produced_char = 0;
3779       coding->consumed = coding->consumed_char = 0;
3780       coding->fake_multibyte = 0;
3781       return CODING_FINISH_NORMAL;
3782     }
3783
3784   if (coding->type == coding_type_undecided)
3785     detect_coding (coding, source, src_bytes);
3786
3787   if (coding->eol_type == CODING_EOL_UNDECIDED)
3788     detect_eol (coding, source, src_bytes);
3789
3790   switch (coding->type)
3791     {
3792     case coding_type_emacs_mule:
3793     case coding_type_undecided:
3794     case coding_type_raw_text:
3795       if (coding->eol_type == CODING_EOL_LF
3796           ||  coding->eol_type == CODING_EOL_UNDECIDED)
3797         goto label_no_conversion;
3798       result = decode_eol (coding, source, destination, src_bytes, dst_bytes);
3799       break;
3800
3801     case coding_type_sjis:
3802       result = decode_coding_sjis_big5 (coding, source, destination,
3803                                         src_bytes, dst_bytes, 1);
3804       break;
3805
3806     case coding_type_iso2022:
3807       result = decode_coding_iso2022 (coding, source, destination,
3808                                       src_bytes, dst_bytes);
3809       break;
3810
3811     case coding_type_big5:
3812       result = decode_coding_sjis_big5 (coding, source, destination,
3813                                         src_bytes, dst_bytes, 0);
3814       break;
3815
3816     case coding_type_ccl:
3817       result = ccl_coding_driver (coding, source, destination,
3818                                   src_bytes, dst_bytes, 0);
3819       break;
3820
3821     default:                    /* i.e. case coding_type_no_conversion: */
3822     label_no_conversion:
3823       if (dst_bytes && src_bytes > dst_bytes)
3824         {
3825           coding->produced = dst_bytes;
3826           result = CODING_FINISH_INSUFFICIENT_DST;
3827         }
3828       else
3829         {
3830           coding->produced = src_bytes;
3831           result = CODING_FINISH_NORMAL;
3832         }
3833       if (dst_bytes)
3834         bcopy (source, destination, coding->produced);
3835       else
3836         safe_bcopy (source, destination, coding->produced);
3837       coding->fake_multibyte = 1;
3838       coding->consumed
3839         = coding->consumed_char = coding->produced_char = coding->produced;
3840       break;
3841     }
3842
3843   return result;
3844 }
3845
3846 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".
3847
3848    This function does not make full use of DESTINATION buffer.  For
3849    instance, if coding->type is coding_type_iso2022, it uses only
3850    (DST_BYTES - 20) bytes of DESTINATION buffer.  In the case that
3851    DST_BYTES is decided by the function encoding_buffer_size, it
3852    contains extra 256 bytes (defined by CONVERSION_BUFFER_EXTRA_ROOM).
3853    So, this function can encode the full SOURCE.  But, in the other
3854    case, if you want to avoid carry over, you must supply at least 20
3855    bytes more area in DESTINATION buffer than expected maximum bytes
3856    that will be produced by this function.  */
3857
3858 int
3859 encode_coding (coding, source, destination, src_bytes, dst_bytes)
3860      struct coding_system *coding;
3861      unsigned char *source, *destination;
3862      int src_bytes, dst_bytes;
3863 {
3864   int result;
3865
3866   if (src_bytes <= 0
3867       && ! (coding->mode & CODING_MODE_LAST_BLOCK
3868             && CODING_REQUIRE_FLUSHING (coding)))
3869     {
3870       coding->produced = coding->produced_char = 0;
3871       coding->consumed = coding->consumed_char = 0;
3872       coding->fake_multibyte = 0;
3873       return CODING_FINISH_NORMAL;
3874     }
3875
3876   switch (coding->type)
3877     {
3878     case coding_type_emacs_mule:
3879     case coding_type_undecided:
3880     case coding_type_raw_text:
3881       if (coding->eol_type == CODING_EOL_LF
3882           ||  coding->eol_type == CODING_EOL_UNDECIDED)
3883         goto label_no_conversion;
3884       result = encode_eol (coding, source, destination, src_bytes, dst_bytes);
3885       break;
3886
3887     case coding_type_sjis:
3888       result = encode_coding_sjis_big5 (coding, source, destination,
3889                                         src_bytes, dst_bytes, 1);
3890       break;
3891
3892     case coding_type_iso2022:
3893       result = encode_coding_iso2022 (coding, source, destination,
3894                                       src_bytes, dst_bytes);
3895       break;
3896
3897     case coding_type_big5:
3898       result = encode_coding_sjis_big5 (coding, source, destination,
3899                                         src_bytes, dst_bytes, 0);
3900       break;
3901
3902     case coding_type_ccl:
3903       result = ccl_coding_driver (coding, source, destination,
3904                                   src_bytes, dst_bytes, 1);
3905       break;
3906
3907     default:                    /* i.e. case coding_type_no_conversion: */
3908     label_no_conversion:
3909       if (dst_bytes && src_bytes > dst_bytes)
3910         {
3911           coding->produced = dst_bytes;
3912           result = CODING_FINISH_INSUFFICIENT_DST;
3913         }
3914       else
3915         {
3916           coding->produced = src_bytes;
3917           result = CODING_FINISH_NORMAL;
3918         }
3919       if (dst_bytes)
3920         bcopy (source, destination, coding->produced);
3921       else
3922         safe_bcopy (source, destination, coding->produced);
3923       if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
3924         {
3925           unsigned char *p = destination, *pend = p + coding->produced;
3926           while (p < pend)
3927             if (*p++ == '\015') p[-1] = '\n';
3928         }
3929       coding->fake_multibyte = 1;
3930       coding->consumed
3931         = coding->consumed_char = coding->produced_char = coding->produced;
3932       break;
3933     }
3934
3935   return result;
3936 }
3937
3938 /* Scan text in the region between *BEG and *END (byte positions),
3939    skip characters which we don't have to decode by coding system
3940    CODING at the head and tail, then set *BEG and *END to the region
3941    of the text we actually have to convert.  The caller should move
3942    the gap out of the region in advance.
3943
3944    If STR is not NULL, *BEG and *END are indices into STR.  */
3945
3946 static void
3947 shrink_decoding_region (beg, end, coding, str)
3948      int *beg, *end;
3949      struct coding_system *coding;
3950      unsigned char *str;
3951 {
3952   unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
3953   int eol_conversion;
3954   Lisp_Object translation_table;
3955
3956   if (coding->type == coding_type_ccl
3957       || coding->type == coding_type_undecided
3958       || !NILP (coding->post_read_conversion))
3959     {
3960       /* We can't skip any data.  */
3961       return;
3962     }
3963   else if (coding->type == coding_type_no_conversion)
3964     {
3965       /* We need no conversion, but don't have to skip any data here.
3966          Decoding routine handles them effectively anyway.  */
3967       return;
3968     }
3969
3970   translation_table = coding->translation_table_for_decode;
3971   if (NILP (translation_table) && !NILP (Venable_character_translation))
3972     translation_table = Vstandard_translation_table_for_decode;
3973   if (CHAR_TABLE_P (translation_table))
3974     {
3975       int i;
3976       for (i = 0; i < 128; i++)
3977         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
3978           break;
3979       if (i < 128)
3980         /* Some ASCII character should be tranlsated.  We give up
3981            shrinking.  */
3982         return;
3983     }
3984
3985   eol_conversion = (coding->eol_type != CODING_EOL_LF);
3986
3987   if ((! eol_conversion) && (coding->heading_ascii >= 0))
3988     /* Detection routine has already found how much we can skip at the
3989        head.  */
3990     *beg += coding->heading_ascii;
3991
3992   if (str)
3993     {
3994       begp_orig = begp = str + *beg;
3995       endp_orig = endp = str + *end;
3996     }
3997   else
3998     {
3999       begp_orig = begp = BYTE_POS_ADDR (*beg);
4000       endp_orig = endp = begp + *end - *beg;
4001     }
4002
4003   switch (coding->type)
4004     {
4005     case coding_type_emacs_mule:
4006     case coding_type_raw_text:
4007       if (eol_conversion)
4008         {
4009           if (coding->heading_ascii < 0)
4010             while (begp < endp && *begp != '\r' && *begp < 0x80) begp++;
4011           while (begp < endp && endp[-1] != '\r' && endp[-1] < 0x80)
4012             endp--;
4013           /* Do not consider LF as ascii if preceded by CR, since that
4014              confuses eol decoding. */
4015           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
4016             endp++;
4017         }
4018       else
4019         begp = endp;
4020       break;
4021
4022     case coding_type_sjis:
4023     case coding_type_big5:
4024       /* We can skip all ASCII characters at the head.  */
4025       if (coding->heading_ascii < 0)
4026         {
4027           if (eol_conversion)
4028             while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
4029           else
4030             while (begp < endp && *begp < 0x80) begp++;
4031         }
4032       /* We can skip all ASCII characters at the tail except for the
4033          second byte of SJIS or BIG5 code.  */
4034       if (eol_conversion)
4035         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
4036       else
4037         while (begp < endp && endp[-1] < 0x80) endp--;
4038       /* Do not consider LF as ascii if preceded by CR, since that
4039          confuses eol decoding. */
4040       if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
4041         endp++;
4042       if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
4043         endp++;
4044       break;
4045
4046     default:            /* i.e. case coding_type_iso2022: */
4047       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
4048         /* We can't skip any data.  */
4049         break;
4050       if (coding->heading_ascii < 0)
4051         {
4052           /* We can skip all ASCII characters at the head except for a
4053              few control codes.  */
4054           while (begp < endp && (c = *begp) < 0x80
4055                  && c != ISO_CODE_CR && c != ISO_CODE_SO
4056                  && c != ISO_CODE_SI && c != ISO_CODE_ESC
4057                  && (!eol_conversion || c != ISO_CODE_LF))
4058             begp++;
4059         }
4060       switch (coding->category_idx)
4061         {
4062         case CODING_CATEGORY_IDX_ISO_8_1:
4063         case CODING_CATEGORY_IDX_ISO_8_2:
4064           /* We can skip all ASCII characters at the tail.  */
4065           if (eol_conversion)
4066             while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
4067           else
4068             while (begp < endp && endp[-1] < 0x80) endp--;
4069           /* Do not consider LF as ascii if preceded by CR, since that
4070              confuses eol decoding. */
4071           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
4072             endp++;
4073           break;
4074
4075         case CODING_CATEGORY_IDX_ISO_7:
4076         case CODING_CATEGORY_IDX_ISO_7_TIGHT:
4077           {
4078             /* We can skip all charactes at the tail except for 8-bit
4079                codes and ESC and the following 2-byte at the tail.  */
4080             unsigned char *eight_bit = NULL;
4081
4082             if (eol_conversion)
4083               while (begp < endp
4084                      && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
4085                 {
4086                   if (!eight_bit && c & 0x80) eight_bit = endp;
4087                   endp--;
4088                 }
4089             else
4090               while (begp < endp
4091                      && (c = endp[-1]) != ISO_CODE_ESC)
4092                 {
4093                   if (!eight_bit && c & 0x80) eight_bit = endp;
4094                   endp--;
4095                 }
4096             /* Do not consider LF as ascii if preceded by CR, since that
4097                confuses eol decoding. */
4098             if (begp < endp && endp < endp_orig
4099                 && endp[-1] == '\r' && endp[0] == '\n')
4100               endp++;
4101             if (begp < endp && endp[-1] == ISO_CODE_ESC)
4102               {
4103                 if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
4104                   /* This is an ASCII designation sequence.  We can
4105                      surely skip the tail.  But, if we have
4106                      encountered an 8-bit code, skip only the codes
4107                      after that.  */
4108                   endp = eight_bit ? eight_bit : endp + 2;
4109                 else
4110                   /* Hmmm, we can't skip the tail.  */
4111                   endp = endp_orig;
4112               }
4113             else if (eight_bit)
4114               endp = eight_bit;
4115           }
4116         }
4117     }
4118   *beg += begp - begp_orig;
4119   *end += endp - endp_orig;
4120   return;
4121 }
4122
4123 /* Like shrink_decoding_region but for encoding.  */
4124
4125 static void
4126 shrink_encoding_region (beg, end, coding, str)
4127      int *beg, *end;
4128      struct coding_system *coding;
4129      unsigned char *str;
4130 {
4131   unsigned char *begp_orig, *begp, *endp_orig, *endp;
4132   int eol_conversion;
4133   Lisp_Object translation_table;
4134
4135   if (coding->type == coding_type_ccl)
4136     /* We can't skip any data.  */
4137     return;
4138   else if (coding->type == coding_type_no_conversion)
4139     {
4140       /* We need no conversion.  */
4141       *beg = *end;
4142       return;
4143     }
4144
4145   translation_table = coding->translation_table_for_encode;
4146   if (NILP (translation_table) && !NILP (Venable_character_translation))
4147     translation_table = Vstandard_translation_table_for_encode;
4148   if (CHAR_TABLE_P (translation_table))
4149     {
4150       int i;
4151       for (i = 0; i < 128; i++)
4152         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
4153           break;
4154       if (i < 128)
4155         /* Some ASCII character should be tranlsated.  We give up
4156            shrinking.  */
4157         return;
4158     }
4159
4160   if (str)
4161     {
4162       begp_orig = begp = str + *beg;
4163       endp_orig = endp = str + *end;
4164     }
4165   else
4166     {
4167       begp_orig = begp = BYTE_POS_ADDR (*beg);
4168       endp_orig = endp = begp + *end - *beg;
4169     }
4170
4171   eol_conversion = (coding->eol_type == CODING_EOL_CR
4172                     || coding->eol_type == CODING_EOL_CRLF);
4173
4174   /* Here, we don't have to check coding->pre_write_conversion because
4175      the caller is expected to have handled it already.  */
4176   switch (coding->type)
4177     {
4178     case coding_type_undecided:
4179     case coding_type_emacs_mule:
4180     case coding_type_raw_text:
4181       if (eol_conversion)
4182         {
4183           while (begp < endp && *begp != '\n') begp++;
4184           while (begp < endp && endp[-1] != '\n') endp--;
4185         }
4186       else
4187         begp = endp;
4188       break;
4189
4190     case coding_type_iso2022:
4191       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
4192         /* We can't skip any data.  */
4193         break;
4194       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
4195         {
4196           unsigned char *bol = begp;
4197           while (begp < endp && *begp < 0x80)
4198             {
4199               begp++;
4200               if (begp[-1] == '\n')
4201                 bol = begp;
4202             }
4203           begp = bol;
4204           goto label_skip_tail;
4205         }
4206       /* fall down ... */
4207
4208     default:
4209       /* We can skip all ASCII characters at the head and tail.  */
4210       if (eol_conversion)
4211         while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
4212       else
4213         while (begp < endp && *begp < 0x80) begp++;
4214     label_skip_tail:
4215       if (eol_conversion)
4216         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
4217       else
4218         while (begp < endp && *(endp - 1) < 0x80) endp--;
4219       break;
4220     }
4221
4222   *beg += begp - begp_orig;
4223   *end += endp - endp_orig;
4224   return;
4225 }
4226
4227 /* As shrinking conversion region requires some overhead, we don't try
4228    shrinking if the length of conversion region is less than this
4229    value.  */
4230 static int shrink_conversion_region_threshhold = 1024;
4231
4232 #define SHRINK_CONVERSION_REGION(beg, end, coding, str, encodep)        \
4233   do {                                                                  \
4234     if (*(end) - *(beg) > shrink_conversion_region_threshhold)          \
4235       {                                                                 \
4236         if (encodep) shrink_encoding_region (beg, end, coding, str);    \
4237         else shrink_decoding_region (beg, end, coding, str);            \
4238       }                                                                 \
4239   } while (0)
4240
4241 static Lisp_Object
4242 code_convert_region_unwind (dummy)
4243      Lisp_Object dummy;
4244 {
4245   inhibit_pre_post_conversion = 0;
4246   return Qnil;
4247 }
4248
4249 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
4250    text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
4251    coding system CODING, and return the status code of code conversion
4252    (currently, this value has no meaning).
4253
4254    How many characters (and bytes) are converted to how many
4255    characters (and bytes) are recorded in members of the structure
4256    CODING.
4257
4258    If REPLACE is nonzero, we do various things as if the original text
4259    is deleted and a new text is inserted.  See the comments in
4260    replace_range (insdel.c) to know what we are doing.  */
4261
4262 int
4263 code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
4264      int from, from_byte, to, to_byte, encodep, replace;
4265      struct coding_system *coding;
4266 {
4267   int len = to - from, len_byte = to_byte - from_byte;
4268   int require, inserted, inserted_byte;
4269   int head_skip, tail_skip, total_skip;
4270   Lisp_Object saved_coding_symbol;
4271   int multibyte = !NILP (current_buffer->enable_multibyte_characters);
4272   int first = 1;
4273   int fake_multibyte = 0;
4274   unsigned char *src, *dst;
4275   Lisp_Object deletion;
4276   int orig_point = PT, orig_len = len;
4277   int prev_Z;
4278
4279   deletion = Qnil;
4280   saved_coding_symbol = Qnil;
4281
4282   if (from < PT && PT < to)
4283     {
4284       TEMP_SET_PT_BOTH (from, from_byte);
4285       orig_point = from;
4286     }
4287
4288   if (replace)
4289     {
4290       int saved_from = from;
4291
4292       prepare_to_modify_buffer (from, to, &from);
4293       if (saved_from != from)
4294         {
4295           to = from + len;
4296           if (multibyte)
4297             from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
4298           else
4299             from_byte = from, to_byte = to;
4300           len_byte = to_byte - from_byte;
4301         }
4302     }
4303
4304   if (! encodep && CODING_REQUIRE_DETECTION (coding))
4305     {
4306       /* We must detect encoding of text and eol format.  */
4307
4308       if (from < GPT && to > GPT)
4309         move_gap_both (from, from_byte);
4310       if (coding->type == coding_type_undecided)
4311         {
4312           detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
4313           if (coding->type == coding_type_undecided)
4314             /* It seems that the text contains only ASCII, but we
4315                should not left it undecided because the deeper
4316                decoding routine (decode_coding) tries to detect the
4317                encodings again in vain.  */
4318             coding->type = coding_type_emacs_mule;
4319         }
4320       if (coding->eol_type == CODING_EOL_UNDECIDED)
4321         {
4322           saved_coding_symbol = coding->symbol;
4323           detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
4324           if (coding->eol_type == CODING_EOL_UNDECIDED)
4325             coding->eol_type = CODING_EOL_LF;
4326           /* We had better recover the original eol format if we
4327              encounter an inconsitent eol format while decoding.  */
4328           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4329         }
4330     }
4331
4332   coding->consumed_char = len, coding->consumed = len_byte;
4333
4334   if (encodep
4335       ? ! CODING_REQUIRE_ENCODING (coding)
4336       : ! CODING_REQUIRE_DECODING (coding))
4337     {
4338       coding->produced = len_byte;
4339       if (multibyte
4340           && ! replace
4341           /* See the comment of the member heading_ascii in coding.h.  */
4342           && coding->heading_ascii < len_byte)
4343         {
4344           /* We still may have to combine byte at the head and the
4345              tail of the text in the region.  */
4346           if (from < GPT && GPT < to)
4347             move_gap_both (to, to_byte);
4348           len = multibyte_chars_in_text (BYTE_POS_ADDR (from_byte), len_byte);
4349           adjust_after_insert (from, from_byte, to, to_byte, len);
4350           coding->produced_char = len;
4351         }
4352       else
4353         {
4354           if (!replace)
4355             adjust_after_insert (from, from_byte, to, to_byte, len_byte);
4356           coding->produced_char = len_byte;
4357         }
4358       return 0;
4359     }
4360
4361   /* Now we convert the text.  */
4362
4363   /* For encoding, we must process pre-write-conversion in advance.  */
4364   if (encodep
4365       && ! NILP (coding->pre_write_conversion)
4366       && SYMBOLP (coding->pre_write_conversion)
4367       && ! NILP (Ffboundp (coding->pre_write_conversion)))
4368     {
4369       /* The function in pre-write-conversion may put a new text in a
4370          new buffer.  */
4371       struct buffer *prev = current_buffer;
4372       Lisp_Object new;
4373       int count = specpdl_ptr - specpdl;
4374
4375       record_unwind_protect (code_convert_region_unwind, Qnil);
4376       /* We should not call any more pre-write/post-read-conversion
4377          functions while this pre-write-conversion is running.  */
4378       inhibit_pre_post_conversion = 1;
4379       call2 (coding->pre_write_conversion,
4380              make_number (from), make_number (to));
4381       inhibit_pre_post_conversion = 0;
4382       /* Discard the unwind protect.  */
4383       specpdl_ptr--;
4384
4385       if (current_buffer != prev)
4386         {
4387           len = ZV - BEGV;
4388           new = Fcurrent_buffer ();
4389           set_buffer_internal_1 (prev);
4390           del_range_2 (from, from_byte, to, to_byte);
4391           TEMP_SET_PT_BOTH (from, from_byte);
4392           insert_from_buffer (XBUFFER (new), 1, len, 0);
4393           Fkill_buffer (new);
4394           if (orig_point >= to)
4395             orig_point += len - orig_len;
4396           else if (orig_point > from)
4397             orig_point = from;
4398           orig_len = len;
4399           to = from + len;
4400           from_byte = multibyte ? CHAR_TO_BYTE (from) : from_byte;
4401           to_byte = multibyte ? CHAR_TO_BYTE (to) : to;
4402           len_byte = to_byte - from_byte;
4403           TEMP_SET_PT_BOTH (from, from_byte);
4404         }
4405     }
4406
4407   if (replace)
4408     deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
4409
4410   /* Try to skip the heading and tailing ASCIIs.  */
4411   {
4412     int from_byte_orig = from_byte, to_byte_orig = to_byte;
4413
4414     if (from < GPT && GPT < to)
4415       move_gap_both (from, from_byte);
4416     SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
4417     if (from_byte == to_byte
4418         && coding->type != coding_type_ccl
4419         && ! (coding->mode & CODING_MODE_LAST_BLOCK
4420               && CODING_REQUIRE_FLUSHING (coding)))
4421       {
4422         coding->produced = len_byte;
4423         coding->produced_char = multibyte ? len : len_byte;
4424         if (!replace)
4425           /* We must record and adjust for this new text now.  */
4426           adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
4427         return 0;
4428       }
4429
4430     head_skip = from_byte - from_byte_orig;
4431     tail_skip = to_byte_orig - to_byte;
4432     total_skip = head_skip + tail_skip;
4433     from += head_skip;
4434     to -= tail_skip;
4435     len -= total_skip; len_byte -= total_skip;
4436   }
4437
4438   /* The code conversion routine can not preserve text properties for
4439      now.  So, we must remove all text properties in the region.
4440      Here, we must suppress all modification hooks.  */
4441   if (replace)
4442     {
4443       int saved_inhibit_modification_hooks = inhibit_modification_hooks;
4444       inhibit_modification_hooks = 1;
4445       Fset_text_properties (make_number (from), make_number (to), Qnil, Qnil);
4446       inhibit_modification_hooks = saved_inhibit_modification_hooks;
4447     }
4448
4449   /* For converion, we must put the gap before the text in addition to
4450      making the gap larger for efficient decoding.  The required gap
4451      size starts from 2000 which is the magic number used in make_gap.
4452      But, after one batch of conversion, it will be incremented if we
4453      find that it is not enough .  */
4454   require = 2000;
4455
4456   if (GAP_SIZE  < require)
4457     make_gap (require - GAP_SIZE);
4458   move_gap_both (from, from_byte);
4459
4460   inserted = inserted_byte = 0;
4461   src = GAP_END_ADDR, dst = GPT_ADDR;
4462
4463   GAP_SIZE += len_byte;
4464   ZV -= len;
4465   Z -= len;
4466   ZV_BYTE -= len_byte;
4467   Z_BYTE -= len_byte;
4468
4469   if (GPT - BEG < BEG_UNCHANGED)
4470     BEG_UNCHANGED = GPT - BEG;
4471   if (Z - GPT < END_UNCHANGED)
4472     END_UNCHANGED = Z - GPT;
4473
4474   for (;;)
4475     {
4476       int result;
4477
4478       /* The buffer memory is changed from:
4479          +--------+converted-text+---------+-------original-text------+---+
4480          |<-from->|<--inserted-->|---------|<-----------len---------->|---|
4481                   |<------------------- GAP_SIZE -------------------->|  */
4482       if (encodep)
4483         result = encode_coding (coding, src, dst, len_byte, 0);
4484       else
4485         result = decode_coding (coding, src, dst, len_byte, 0);
4486       /* to:
4487          +--------+-------converted-text--------+--+---original-text--+---+
4488          |<-from->|<--inserted-->|<--produced-->|--|<-(len-consumed)->|---|
4489                   |<------------------- GAP_SIZE -------------------->|  */
4490       if (coding->fake_multibyte)
4491         fake_multibyte = 1;
4492
4493       if (!encodep && !multibyte)
4494         coding->produced_char = coding->produced;
4495       inserted += coding->produced_char;
4496       inserted_byte += coding->produced;
4497       len_byte -= coding->consumed;
4498       src += coding->consumed;
4499       dst += inserted_byte;
4500
4501       if (result == CODING_FINISH_NORMAL)
4502         {
4503           src += len_byte;
4504           break;
4505         }
4506       if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
4507         {
4508           unsigned char *pend = dst, *p = pend - inserted_byte;
4509           Lisp_Object eol_type;
4510
4511           /* Encode LFs back to the original eol format (CR or CRLF).  */
4512           if (coding->eol_type == CODING_EOL_CR)
4513             {
4514               while (p < pend) if (*p++ == '\n') p[-1] = '\r';
4515             }
4516           else
4517             {
4518               int count = 0;
4519
4520               while (p < pend) if (*p++ == '\n') count++;
4521               if (src - dst < count)
4522                 {
4523                   /* We don't have sufficient room for encoding LFs
4524                      back to CRLF.  We must record converted and
4525                      not-yet-converted text back to the buffer
4526                      content, enlarge the gap, then record them out of
4527                      the buffer contents again.  */
4528                   int add = len_byte + inserted_byte;
4529
4530                   GAP_SIZE -= add;
4531                   ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
4532                   GPT += inserted_byte; GPT_BYTE += inserted_byte;
4533                   make_gap (count - GAP_SIZE);
4534                   GAP_SIZE += add;
4535                   ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
4536                   GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
4537                   /* Don't forget to update SRC, DST, and PEND.  */
4538                   src = GAP_END_ADDR - len_byte;
4539                   dst = GPT_ADDR + inserted_byte;
4540                   pend = dst;
4541                 }
4542               inserted += count;
4543               inserted_byte += count;
4544               coding->produced += count;
4545               p = dst = pend + count;
4546               while (count)
4547                 {
4548                   *--p = *--pend;
4549                   if (*p == '\n') count--, *--p = '\r';
4550                 }
4551             }
4552
4553           /* Suppress eol-format conversion in the further conversion.  */
4554           coding->eol_type = CODING_EOL_LF;
4555
4556           /* Set the coding system symbol to that for Unix-like EOL.  */
4557           eol_type = Fget (saved_coding_symbol, Qeol_type);
4558           if (VECTORP (eol_type)
4559               && XVECTOR (eol_type)->size == 3
4560               && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
4561             coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
4562           else
4563             coding->symbol = saved_coding_symbol;
4564
4565           continue;
4566         }
4567       if (len_byte <= 0)
4568         {
4569           if (coding->type != coding_type_ccl
4570               || coding->mode & CODING_MODE_LAST_BLOCK)
4571             break;
4572           coding->mode |= CODING_MODE_LAST_BLOCK;
4573           continue;
4574         }
4575       if (result == CODING_FINISH_INSUFFICIENT_SRC)
4576         {
4577           /* The source text ends in invalid codes.  Let's just
4578              make them valid buffer contents, and finish conversion.  */
4579           inserted += len_byte;
4580           inserted_byte += len_byte;
4581           while (len_byte--)
4582             *dst++ = *src++;
4583           fake_multibyte = 1;
4584           break;
4585         }
4586       if (result == CODING_FINISH_INTERRUPT)
4587         {
4588           /* The conversion procedure was interrupted by a user.  */
4589           fake_multibyte = 1;
4590           break;
4591         }
4592       /* Now RESULT == CODING_FINISH_INSUFFICIENT_DST  */
4593       if (coding->consumed < 1)
4594         {
4595           /* It's quite strange to require more memory without
4596              consuming any bytes.  Perhaps CCL program bug.  */
4597           fake_multibyte = 1;
4598           break;
4599         }
4600       if (first)
4601         {
4602           /* We have just done the first batch of conversion which was
4603              stoped because of insufficient gap.  Let's reconsider the
4604              required gap size (i.e. SRT - DST) now.
4605
4606              We have converted ORIG bytes (== coding->consumed) into
4607              NEW bytes (coding->produced).  To convert the remaining
4608              LEN bytes, we may need REQUIRE bytes of gap, where:
4609                 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
4610                 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
4611              Here, we are sure that NEW >= ORIG.  */
4612           float ratio = coding->produced - coding->consumed;
4613           ratio /= coding->consumed;
4614           require = len_byte * ratio;
4615           first = 0;
4616         }
4617       if ((src - dst) < (require + 2000))
4618         {
4619           /* See the comment above the previous call of make_gap.  */
4620           int add = len_byte + inserted_byte;
4621
4622           GAP_SIZE -= add;
4623           ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
4624           GPT += inserted_byte; GPT_BYTE += inserted_byte;
4625           make_gap (require + 2000);
4626           GAP_SIZE += add;
4627           ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
4628           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
4629           /* Don't forget to update SRC, DST.  */
4630           src = GAP_END_ADDR - len_byte;
4631           dst = GPT_ADDR + inserted_byte;
4632         }
4633     }
4634   if (src - dst > 0) *dst = 0; /* Put an anchor.  */
4635
4636   if (multibyte
4637       && (encodep
4638           || fake_multibyte
4639           || (to - from) != (to_byte - from_byte)))
4640     inserted = multibyte_chars_in_text (GPT_ADDR, inserted_byte);
4641
4642   /* If we have shrinked the conversion area, adjust it now.  */
4643   if (total_skip > 0)
4644     {
4645       if (tail_skip > 0)
4646         safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
4647       inserted += total_skip; inserted_byte += total_skip;
4648       GAP_SIZE += total_skip;
4649       GPT -= head_skip; GPT_BYTE -= head_skip;
4650       ZV -= total_skip; ZV_BYTE -= total_skip;
4651       Z -= total_skip; Z_BYTE -= total_skip;
4652       from -= head_skip; from_byte -= head_skip;
4653       to += tail_skip; to_byte += tail_skip;
4654     }
4655
4656   prev_Z = Z;
4657   adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
4658   inserted = Z - prev_Z;
4659
4660   if (! encodep && ! NILP (coding->post_read_conversion))
4661     {
4662       Lisp_Object val;
4663       int count = specpdl_ptr - specpdl;
4664
4665       if (from != PT)
4666         TEMP_SET_PT_BOTH (from, from_byte);
4667       prev_Z = Z;
4668       record_unwind_protect (code_convert_region_unwind, Qnil);
4669       /* We should not call any more pre-write/post-read-conversion
4670          functions while this post-read-conversion is running.  */
4671       inhibit_pre_post_conversion = 1;
4672       val = call1 (coding->post_read_conversion, make_number (inserted));
4673       inhibit_pre_post_conversion = 0;
4674       /* Discard the unwind protect.  */
4675       specpdl_ptr--;
4676       CHECK_NUMBER (val, 0);
4677       inserted += Z - prev_Z;
4678     }
4679
4680   if (orig_point >= from)
4681     {
4682       if (orig_point >= from + orig_len)
4683         orig_point += inserted - orig_len;
4684       else
4685         orig_point = from;
4686       TEMP_SET_PT (orig_point);
4687     }
4688
4689   signal_after_change (from, to - from, inserted);
4690
4691   {
4692     coding->consumed = to_byte - from_byte;
4693     coding->consumed_char = to - from;
4694     coding->produced = inserted_byte;
4695     coding->produced_char = inserted;
4696   }
4697
4698   return 0;
4699 }
4700
4701 Lisp_Object
4702 code_convert_string (str, coding, encodep, nocopy)
4703      Lisp_Object str;
4704      struct coding_system *coding;
4705      int encodep, nocopy;
4706 {
4707   int len;
4708   char *buf;
4709   int from = 0, to = XSTRING (str)->size;
4710   int to_byte = STRING_BYTES (XSTRING (str));
4711   struct gcpro gcpro1;
4712   Lisp_Object saved_coding_symbol;
4713   int result;
4714
4715   saved_coding_symbol = Qnil;
4716   if ((encodep && !NILP (coding->pre_write_conversion)
4717        || !encodep && !NILP (coding->post_read_conversion)))
4718     {
4719       /* Since we have to call Lisp functions which assume target text
4720          is in a buffer, after setting a temporary buffer, call
4721          code_convert_region.  */
4722       int count = specpdl_ptr - specpdl;
4723       struct buffer *prev = current_buffer;
4724       int multibyte = STRING_MULTIBYTE (str);
4725
4726       record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
4727       record_unwind_protect (code_convert_region_unwind, Qnil);
4728       inhibit_pre_post_conversion = 1;
4729       GCPRO1 (str);
4730       temp_output_buffer_setup (" *code-converting-work*");
4731       set_buffer_internal (XBUFFER (Vstandard_output));
4732       /* We must insert the contents of STR as is without
4733          unibyte<->multibyte conversion.  For that, we adjust the
4734          multibyteness of the working buffer to that of STR.  */
4735       Ferase_buffer ();         /* for safety */
4736       current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
4737       insert_from_string (str, 0, 0, to, to_byte, 0);
4738       UNGCPRO;
4739       code_convert_region (BEGV, BEGV_BYTE, ZV, ZV_BYTE, coding, encodep, 1);
4740       /* Make a unibyte string if we are encoding, otherwise make a
4741          multibyte string.  */
4742       Fset_buffer_multibyte (encodep ? Qnil : Qt);
4743       str = make_buffer_string (BEGV, ZV, 0);
4744       return unbind_to (count, str);
4745     }
4746
4747   if (! encodep && CODING_REQUIRE_DETECTION (coding))
4748     {
4749       /* See the comments in code_convert_region.  */
4750       if (coding->type == coding_type_undecided)
4751         {
4752           detect_coding (coding, XSTRING (str)->data, to_byte);
4753           if (coding->type == coding_type_undecided)
4754             coding->type = coding_type_emacs_mule;
4755         }
4756       if (coding->eol_type == CODING_EOL_UNDECIDED)
4757         {
4758           saved_coding_symbol = coding->symbol;
4759           detect_eol (coding, XSTRING (str)->data, to_byte);
4760           if (coding->eol_type == CODING_EOL_UNDECIDED)
4761             coding->eol_type = CODING_EOL_LF;
4762           /* We had better recover the original eol format if we
4763              encounter an inconsitent eol format while decoding.  */
4764           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4765         }
4766     }
4767
4768   if (encodep
4769       ? ! CODING_REQUIRE_ENCODING (coding)
4770       : ! CODING_REQUIRE_DECODING (coding))
4771     from = to_byte;
4772   else
4773     {
4774       /* Try to skip the heading and tailing ASCIIs.  */
4775       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
4776                                 encodep);
4777     }
4778   if (from == to_byte
4779       && coding->type != coding_type_ccl)
4780     return (nocopy ? str : Fcopy_sequence (str));
4781
4782   if (encodep)
4783     len = encoding_buffer_size (coding, to_byte - from);
4784   else
4785     len = decoding_buffer_size (coding, to_byte - from);
4786   len += from + STRING_BYTES (XSTRING (str)) - to_byte;
4787   GCPRO1 (str);
4788   buf = get_conversion_buffer (len);
4789   UNGCPRO;
4790
4791   if (from > 0)
4792     bcopy (XSTRING (str)->data, buf, from);
4793   result = (encodep
4794             ? encode_coding (coding, XSTRING (str)->data + from,
4795                              buf + from, to_byte - from, len)
4796             : decode_coding (coding, XSTRING (str)->data + from,
4797                              buf + from, to_byte - from, len));
4798   if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
4799     {
4800       /* We simple try to decode the whole string again but without
4801          eol-conversion this time.  */
4802       coding->eol_type = CODING_EOL_LF;
4803       coding->symbol = saved_coding_symbol;
4804       return code_convert_string (str, coding, encodep, nocopy);
4805     }
4806
4807   bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced,
4808          STRING_BYTES (XSTRING (str)) - to_byte);
4809
4810   len = from + STRING_BYTES (XSTRING (str)) - to_byte;
4811   if (encodep)
4812     str = make_unibyte_string (buf, len + coding->produced);
4813   else
4814     {
4815       int chars= (coding->fake_multibyte
4816                   ? multibyte_chars_in_text (buf + from, coding->produced)
4817                   : coding->produced_char);
4818       str = make_multibyte_string (buf, len + chars, len + coding->produced);
4819     }
4820
4821   return str;
4822 }
4823
4824 \f
4825 #ifdef emacs
4826 /*** 8. Emacs Lisp library functions ***/
4827
4828 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
4829   "Return t if OBJECT is nil or a coding-system.\n\
4830 See the documentation of `make-coding-system' for information\n\
4831 about coding-system objects.")
4832   (obj)
4833      Lisp_Object obj;
4834 {
4835   if (NILP (obj))
4836     return Qt;
4837   if (!SYMBOLP (obj))
4838     return Qnil;
4839   /* Get coding-spec vector for OBJ.  */
4840   obj = Fget (obj, Qcoding_system);
4841   return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
4842           ? Qt : Qnil);
4843 }
4844
4845 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
4846        Sread_non_nil_coding_system, 1, 1, 0,
4847   "Read a coding system from the minibuffer, prompting with string PROMPT.")
4848   (prompt)
4849      Lisp_Object prompt;
4850 {
4851   Lisp_Object val;
4852   do
4853     {
4854       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
4855                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
4856     }
4857   while (XSTRING (val)->size == 0);
4858   return (Fintern (val, Qnil));
4859 }
4860
4861 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
4862   "Read a coding system from the minibuffer, prompting with string PROMPT.\n\
4863 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.")
4864   (prompt, default_coding_system)
4865      Lisp_Object prompt, default_coding_system;
4866 {
4867   Lisp_Object val;
4868   if (SYMBOLP (default_coding_system))
4869     XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
4870   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
4871                           Qt, Qnil, Qcoding_system_history,
4872                           default_coding_system, Qnil);
4873   return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
4874 }
4875
4876 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
4877        1, 1, 0,
4878   "Check validity of CODING-SYSTEM.\n\
4879 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.\n\
4880 It is valid if it is a symbol with a non-nil `coding-system' property.\n\
4881 The value of property should be a vector of length 5.")
4882   (coding_system)
4883      Lisp_Object coding_system;
4884 {
4885   CHECK_SYMBOL (coding_system, 0);
4886   if (!NILP (Fcoding_system_p (coding_system)))
4887     return coding_system;
4888   while (1)
4889     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
4890 }
4891 \f
4892 Lisp_Object
4893 detect_coding_system (src, src_bytes, highest)
4894      unsigned char *src;
4895      int src_bytes, highest;
4896 {
4897   int coding_mask, eol_type;
4898   Lisp_Object val, tmp;
4899   int dummy;
4900
4901   coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy);
4902   eol_type  = detect_eol_type (src, src_bytes, &dummy);
4903   if (eol_type == CODING_EOL_INCONSISTENT)
4904     eol_type = CODING_EOL_UNDECIDED;
4905
4906   if (!coding_mask)
4907     {
4908       val = Qundecided;
4909       if (eol_type != CODING_EOL_UNDECIDED)
4910         {
4911           Lisp_Object val2;
4912           val2 = Fget (Qundecided, Qeol_type);
4913           if (VECTORP (val2))
4914             val = XVECTOR (val2)->contents[eol_type];
4915         }
4916       return (highest ? val : Fcons (val, Qnil));
4917     }
4918
4919   /* At first, gather possible coding systems in VAL.  */
4920   val = Qnil;
4921   for (tmp = Vcoding_category_list; !NILP (tmp); tmp = XCDR (tmp))
4922     {
4923       int idx
4924         = XFASTINT (Fget (XCAR (tmp), Qcoding_category_index));
4925       if (coding_mask & (1 << idx))
4926         {
4927           val = Fcons (Fsymbol_value (XCAR (tmp)), val);
4928           if (highest)
4929             break;
4930         }
4931     }
4932   if (!highest)
4933     val = Fnreverse (val);
4934
4935   /* Then, replace the elements with subsidiary coding systems.  */
4936   for (tmp = val; !NILP (tmp); tmp = XCDR (tmp))
4937     {
4938       if (eol_type != CODING_EOL_UNDECIDED
4939           && eol_type != CODING_EOL_INCONSISTENT)
4940         {
4941           Lisp_Object eol;
4942           eol = Fget (XCAR (tmp), Qeol_type);
4943           if (VECTORP (eol))
4944             XCAR (tmp) = XVECTOR (eol)->contents[eol_type];
4945         }
4946     }
4947   return (highest ? XCAR (val) : val);
4948 }
4949
4950 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
4951        2, 3, 0,
4952   "Detect coding system of the text in the region between START and END.\n\
4953 Return a list of possible coding systems ordered by priority.\n\
4954 \n\
4955 If only ASCII characters are found, it returns a list of single element\n\
4956 `undecided' or its subsidiary coding system according to a detected\n\
4957 end-of-line format.\n\
4958 \n\
4959 If optional argument HIGHEST is non-nil, return the coding system of\n\
4960 highest priority.")
4961   (start, end, highest)
4962      Lisp_Object start, end, highest;
4963 {
4964   int from, to;
4965   int from_byte, to_byte;
4966
4967   CHECK_NUMBER_COERCE_MARKER (start, 0);
4968   CHECK_NUMBER_COERCE_MARKER (end, 1);
4969
4970   validate_region (&start, &end);
4971   from = XINT (start), to = XINT (end);
4972   from_byte = CHAR_TO_BYTE (from);
4973   to_byte = CHAR_TO_BYTE (to);
4974
4975   if (from < GPT && to >= GPT)
4976     move_gap_both (to, to_byte);
4977
4978   return detect_coding_system (BYTE_POS_ADDR (from_byte),
4979                                to_byte - from_byte,
4980                                !NILP (highest));
4981 }
4982
4983 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
4984        1, 2, 0,
4985   "Detect coding system of the text in STRING.\n\
4986 Return a list of possible coding systems ordered by priority.\n\
4987 \n\
4988 If only ASCII characters are found, it returns a list of single element\n\
4989 `undecided' or its subsidiary coding system according to a detected\n\
4990 end-of-line format.\n\
4991 \n\
4992 If optional argument HIGHEST is non-nil, return the coding system of\n\
4993 highest priority.")
4994   (string, highest)
4995      Lisp_Object string, highest;
4996 {
4997   CHECK_STRING (string, 0);
4998
4999   return detect_coding_system (XSTRING (string)->data,
5000                                STRING_BYTES (XSTRING (string)),
5001                                !NILP (highest));
5002 }
5003
5004 Lisp_Object
5005 code_convert_region1 (start, end, coding_system, encodep)
5006      Lisp_Object start, end, coding_system;
5007      int encodep;
5008 {
5009   struct coding_system coding;
5010   int from, to, len;
5011
5012   CHECK_NUMBER_COERCE_MARKER (start, 0);
5013   CHECK_NUMBER_COERCE_MARKER (end, 1);
5014   CHECK_SYMBOL (coding_system, 2);
5015
5016   validate_region (&start, &end);
5017   from = XFASTINT (start);
5018   to = XFASTINT (end);
5019
5020   if (NILP (coding_system))
5021     return make_number (to - from);
5022
5023   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
5024     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
5025
5026   coding.mode |= CODING_MODE_LAST_BLOCK;
5027   code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
5028                        &coding, encodep, 1);
5029   Vlast_coding_system_used = coding.symbol;
5030   return make_number (coding.produced_char);
5031 }
5032
5033 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
5034        3, 3, "r\nzCoding system: ",
5035   "Decode the current region by specified coding system.\n\
5036 When called from a program, takes three arguments:\n\
5037 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
5038 This function sets `last-coding-system-used' to the precise coding system\n\
5039 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5040 not fully specified.)\n\
5041 It returns the length of the decoded text.")
5042   (start, end, coding_system)
5043      Lisp_Object start, end, coding_system;
5044 {
5045   return code_convert_region1 (start, end, coding_system, 0);
5046 }
5047
5048 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
5049        3, 3, "r\nzCoding system: ",
5050   "Encode the current region by specified coding system.\n\
5051 When called from a program, takes three arguments:\n\
5052 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
5053 This function sets `last-coding-system-used' to the precise coding system\n\
5054 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5055 not fully specified.)\n\
5056 It returns the length of the encoded text.")
5057   (start, end, coding_system)
5058      Lisp_Object start, end, coding_system;
5059 {
5060   return code_convert_region1 (start, end, coding_system, 1);
5061 }
5062
5063 Lisp_Object
5064 code_convert_string1 (string, coding_system, nocopy, encodep)
5065      Lisp_Object string, coding_system, nocopy;
5066      int encodep;
5067 {
5068   struct coding_system coding;
5069
5070   CHECK_STRING (string, 0);
5071   CHECK_SYMBOL (coding_system, 1);
5072
5073   if (NILP (coding_system))
5074     return (NILP (nocopy) ? Fcopy_sequence (string) : string);
5075
5076   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
5077     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
5078
5079   coding.mode |= CODING_MODE_LAST_BLOCK;
5080   Vlast_coding_system_used = coding.symbol;
5081   return code_convert_string (string, &coding, encodep, !NILP (nocopy));
5082 }
5083
5084 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
5085        2, 3, 0,
5086   "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
5087 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
5088 if the decoding operation is trivial.\n\
5089 This function sets `last-coding-system-used' to the precise coding system\n\
5090 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5091 not fully specified.)")
5092   (string, coding_system, nocopy)
5093      Lisp_Object string, coding_system, nocopy;
5094 {
5095   return code_convert_string1 (string, coding_system, nocopy, 0);
5096 }
5097
5098 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
5099        2, 3, 0,
5100   "Encode STRING to CODING-SYSTEM, and return the result.\n\
5101 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
5102 if the encoding operation is trivial.\n\
5103 This function sets `last-coding-system-used' to the precise coding system\n\
5104 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
5105 not fully specified.)")
5106   (string, coding_system, nocopy)
5107      Lisp_Object string, coding_system, nocopy;
5108 {
5109   return code_convert_string1 (string, coding_system, nocopy, 1);
5110 }
5111
5112 /* Encode or decode STRING according to CODING_SYSTEM.
5113    Do not set Vlast_coding_system_used.  */
5114
5115 Lisp_Object
5116 code_convert_string_norecord (string, coding_system, encodep)
5117      Lisp_Object string, coding_system;
5118      int encodep;
5119 {
5120   struct coding_system coding;
5121
5122   CHECK_STRING (string, 0);
5123   CHECK_SYMBOL (coding_system, 1);
5124
5125   if (NILP (coding_system))
5126     return string;
5127
5128   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
5129     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
5130
5131   coding.mode |= CODING_MODE_LAST_BLOCK;
5132   return code_convert_string (string, &coding, encodep, Qt);
5133 }
5134 \f
5135 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
5136   "Decode a Japanese character which has CODE in shift_jis encoding.\n\
5137 Return the corresponding character.")
5138   (code)
5139      Lisp_Object code;
5140 {
5141   unsigned char c1, c2, s1, s2;
5142   Lisp_Object val;
5143
5144   CHECK_NUMBER (code, 0);
5145   s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
5146   if (s1 == 0)
5147     {
5148       if (s2 < 0x80)
5149         XSETFASTINT (val, s2);
5150       else if (s2 >= 0xA0 || s2 <= 0xDF)
5151         XSETFASTINT (val,
5152                      MAKE_NON_ASCII_CHAR (charset_katakana_jisx0201, s2, 0));
5153       else
5154         error ("Invalid Shift JIS code: %x", XFASTINT (code));
5155     }
5156   else
5157     {
5158       if ((s1 < 0x80 || s1 > 0x9F && s1 < 0xE0 || s1 > 0xEF)
5159           || (s2 < 0x40 || s2 == 0x7F || s2 > 0xFC))
5160         error ("Invalid Shift JIS code: %x", XFASTINT (code));
5161       DECODE_SJIS (s1, s2, c1, c2);
5162       XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset_jisx0208, c1, c2));
5163     }
5164   return val;
5165 }
5166
5167 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
5168   "Encode a Japanese character CHAR to shift_jis encoding.\n\
5169 Return the corresponding code in SJIS.")
5170   (ch)
5171      Lisp_Object ch;
5172 {
5173   int charset, c1, c2, s1, s2;
5174   Lisp_Object val;
5175
5176   CHECK_NUMBER (ch, 0);
5177   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
5178   if (charset == CHARSET_ASCII)
5179     {
5180       val = ch;
5181     }
5182   else if (charset == charset_jisx0208
5183            && c1 > 0x20 && c1 < 0x7F && c2 > 0x20 && c2 < 0x7F)
5184     {
5185       ENCODE_SJIS (c1, c2, s1, s2);
5186       XSETFASTINT (val, (s1 << 8) | s2);
5187     }
5188   else if (charset == charset_katakana_jisx0201
5189            && c1 > 0x20 && c2 < 0xE0)
5190     {
5191       XSETFASTINT (val, c1 | 0x80);
5192     }
5193   else
5194     error ("Can't encode to shift_jis: %d", XFASTINT (ch));
5195   return val;
5196 }
5197
5198 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
5199   "Decode a Big5 character which has CODE in BIG5 coding system.\n\
5200 Return the corresponding character.")
5201   (code)
5202      Lisp_Object code;
5203 {
5204   int charset;
5205   unsigned char b1, b2, c1, c2;
5206   Lisp_Object val;
5207
5208   CHECK_NUMBER (code, 0);
5209   b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
5210   if (b1 == 0)
5211     {
5212       if (b2 >= 0x80)
5213         error ("Invalid BIG5 code: %x", XFASTINT (code));
5214       val = code;
5215     }
5216   else
5217     {
5218       if ((b1 < 0xA1 || b1 > 0xFE)
5219           || (b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE))
5220         error ("Invalid BIG5 code: %x", XFASTINT (code));
5221       DECODE_BIG5 (b1, b2, charset, c1, c2);
5222       XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset, c1, c2));
5223     }
5224   return val;
5225 }
5226
5227 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
5228   "Encode the Big5 character CHAR to BIG5 coding system.\n\
5229 Return the corresponding character code in Big5.")
5230   (ch)
5231      Lisp_Object ch;
5232 {
5233   int charset, c1, c2, b1, b2;
5234   Lisp_Object val;
5235
5236   CHECK_NUMBER (ch, 0);
5237   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
5238   if (charset == CHARSET_ASCII)
5239     {
5240       val = ch;
5241     }
5242   else if ((charset == charset_big5_1
5243             && (XFASTINT (ch) >= 0x250a1 && XFASTINT (ch) <= 0x271ec))
5244            || (charset == charset_big5_2
5245                && XFASTINT (ch) >= 0x290a1 && XFASTINT (ch) <= 0x2bdb2))
5246     {
5247       ENCODE_BIG5 (charset, c1, c2, b1, b2);
5248       XSETFASTINT (val, (b1 << 8) | b2);
5249     }
5250   else
5251     error ("Can't encode to Big5: %d", XFASTINT (ch));
5252   return val;
5253 }
5254 \f
5255 DEFUN ("set-terminal-coding-system-internal",
5256        Fset_terminal_coding_system_internal,
5257        Sset_terminal_coding_system_internal, 1, 1, 0, "")
5258   (coding_system)
5259      Lisp_Object coding_system;
5260 {
5261   CHECK_SYMBOL (coding_system, 0);
5262   setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
5263   /* We had better not send unsafe characters to terminal.  */
5264   terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
5265
5266   return Qnil;
5267 }
5268
5269 DEFUN ("set-safe-terminal-coding-system-internal",
5270        Fset_safe_terminal_coding_system_internal,
5271        Sset_safe_terminal_coding_system_internal, 1, 1, 0, "")
5272   (coding_system)
5273      Lisp_Object coding_system;
5274 {
5275   CHECK_SYMBOL (coding_system, 0);
5276   setup_coding_system (Fcheck_coding_system (coding_system),
5277                        &safe_terminal_coding);
5278   return Qnil;
5279 }
5280
5281 DEFUN ("terminal-coding-system",
5282        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
5283   "Return coding system specified for terminal output.")
5284   ()
5285 {
5286   return terminal_coding.symbol;
5287 }
5288
5289 DEFUN ("set-keyboard-coding-system-internal",
5290        Fset_keyboard_coding_system_internal,
5291        Sset_keyboard_coding_system_internal, 1, 1, 0, "")
5292   (coding_system)
5293      Lisp_Object coding_system;
5294 {
5295   CHECK_SYMBOL (coding_system, 0);
5296   setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
5297   return Qnil;
5298 }
5299
5300 DEFUN ("keyboard-coding-system",
5301        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
5302   "Return coding system specified for decoding keyboard input.")
5303   ()
5304 {
5305   return keyboard_coding.symbol;
5306 }
5307
5308 \f
5309 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
5310        Sfind_operation_coding_system,  1, MANY, 0,
5311   "Choose a coding system for an operation based on the target name.\n\
5312 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).\n\
5313 DECODING-SYSTEM is the coding system to use for decoding\n\
5314 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system\n\
5315 for encoding (in case OPERATION does encoding).\n\
5316 \n\
5317 The first argument OPERATION specifies an I/O primitive:\n\
5318   For file I/O, `insert-file-contents' or `write-region'.\n\
5319   For process I/O, `call-process', `call-process-region', or `start-process'.\n\
5320   For network I/O, `open-network-stream'.\n\
5321 \n\
5322 The remaining arguments should be the same arguments that were passed\n\
5323 to the primitive.  Depending on which primitive, one of those arguments\n\
5324 is selected as the TARGET.  For example, if OPERATION does file I/O,\n\
5325 whichever argument specifies the file name is TARGET.\n\
5326 \n\
5327 TARGET has a meaning which depends on OPERATION:\n\
5328   For file I/O, TARGET is a file name.\n\
5329   For process I/O, TARGET is a process name.\n\
5330   For network I/O, TARGET is a service name or a port number\n\
5331 \n\
5332 This function looks up what specified for TARGET in,\n\
5333 `file-coding-system-alist', `process-coding-system-alist',\n\
5334 or `network-coding-system-alist' depending on OPERATION.\n\
5335 They may specify a coding system, a cons of coding systems,\n\
5336 or a function symbol to call.\n\
5337 In the last case, we call the function with one argument,\n\
5338 which is a list of all the arguments given to this function.")
5339   (nargs, args)
5340      int nargs;
5341      Lisp_Object *args;
5342 {
5343   Lisp_Object operation, target_idx, target, val;
5344   register Lisp_Object chain;
5345
5346   if (nargs < 2)
5347     error ("Too few arguments");
5348   operation = args[0];
5349   if (!SYMBOLP (operation)
5350       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
5351     error ("Invalid first arguement");
5352   if (nargs < 1 + XINT (target_idx))
5353     error ("Too few arguments for operation: %s",
5354            XSYMBOL (operation)->name->data);
5355   target = args[XINT (target_idx) + 1];
5356   if (!(STRINGP (target)
5357         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
5358     error ("Invalid %dth argument", XINT (target_idx) + 1);
5359
5360   chain = ((EQ (operation, Qinsert_file_contents)
5361             || EQ (operation, Qwrite_region))
5362            ? Vfile_coding_system_alist
5363            : (EQ (operation, Qopen_network_stream)
5364               ? Vnetwork_coding_system_alist
5365               : Vprocess_coding_system_alist));
5366   if (NILP (chain))
5367     return Qnil;
5368
5369   for (; CONSP (chain); chain = XCDR (chain))
5370     {
5371       Lisp_Object elt;
5372       elt = XCAR (chain);
5373
5374       if (CONSP (elt)
5375           && ((STRINGP (target)
5376                && STRINGP (XCAR (elt))
5377                && fast_string_match (XCAR (elt), target) >= 0)
5378               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
5379         {
5380           val = XCDR (elt);
5381           /* Here, if VAL is both a valid coding system and a valid
5382              function symbol, we return VAL as a coding system.  */
5383           if (CONSP (val))
5384             return val;
5385           if (! SYMBOLP (val))
5386             return Qnil;
5387           if (! NILP (Fcoding_system_p (val)))
5388             return Fcons (val, val);
5389           if (! NILP (Ffboundp (val)))
5390             {
5391               val = call1 (val, Flist (nargs, args));
5392               if (CONSP (val))
5393                 return val;
5394               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
5395                 return Fcons (val, val);
5396             }
5397           return Qnil;
5398         }
5399     }
5400   return Qnil;
5401 }
5402
5403 DEFUN ("update-coding-systems-internal",  Fupdate_coding_systems_internal,
5404        Supdate_coding_systems_internal, 0, 0, 0,
5405   "Update internal database for ISO2022 and CCL based coding systems.\n\
5406 When values of the following coding categories are changed, you must\n\
5407 call this function:\n\
5408   coding-category-iso-7, coding-category-iso-7-tight,\n\
5409   coding-category-iso-8-1, coding-category-iso-8-2,\n\
5410   coding-category-iso-7-else, coding-category-iso-8-else,\n\
5411   coding-category-ccl")
5412   ()
5413 {
5414   int i;
5415
5416   for (i = CODING_CATEGORY_IDX_ISO_7; i <= CODING_CATEGORY_IDX_CCL; i++)
5417     {
5418       Lisp_Object val;
5419
5420       val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[i])->value;
5421       if (!NILP (val))
5422         {
5423           if (! coding_system_table[i])
5424             coding_system_table[i] = ((struct coding_system *)
5425                                       xmalloc (sizeof (struct coding_system)));
5426           setup_coding_system (val, coding_system_table[i]);
5427         }
5428       else if (coding_system_table[i])
5429         {
5430           xfree (coding_system_table[i]);
5431           coding_system_table[i] = NULL;
5432         }
5433     }
5434
5435   return Qnil;
5436 }
5437
5438 DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal,
5439        Sset_coding_priority_internal, 0, 0, 0,
5440   "Update internal database for the current value of `coding-category-list'.\n\
5441 This function is internal use only.")
5442   ()
5443 {
5444   int i = 0, idx;
5445   Lisp_Object val;
5446
5447   val = Vcoding_category_list;
5448
5449   while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
5450     {
5451       if (! SYMBOLP (XCAR (val)))
5452         break;
5453       idx = XFASTINT (Fget (XCAR (val), Qcoding_category_index));
5454       if (idx >= CODING_CATEGORY_IDX_MAX)
5455         break;
5456       coding_priorities[i++] = (1 << idx);
5457       val = XCDR (val);
5458     }
5459   /* If coding-category-list is valid and contains all coding
5460      categories, `i' should be CODING_CATEGORY_IDX_MAX now.  If not,
5461      the following code saves Emacs from craching.  */
5462   while (i < CODING_CATEGORY_IDX_MAX)
5463     coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
5464
5465   return Qnil;
5466 }
5467
5468 #endif /* emacs */
5469
5470 \f
5471 /*** 9. Post-amble ***/
5472
5473 void
5474 init_coding ()
5475 {
5476   conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE);
5477 }
5478
5479 void
5480 init_coding_once ()
5481 {
5482   int i;
5483
5484   /* Emacs' internal format specific initialize routine.  */
5485   for (i = 0; i <= 0x20; i++)
5486     emacs_code_class[i] = EMACS_control_code;
5487   emacs_code_class[0x0A] = EMACS_linefeed_code;
5488   emacs_code_class[0x0D] = EMACS_carriage_return_code;
5489   for (i = 0x21 ; i < 0x7F; i++)
5490     emacs_code_class[i] = EMACS_ascii_code;
5491   emacs_code_class[0x7F] = EMACS_control_code;
5492   emacs_code_class[0x80] = EMACS_leading_code_composition;
5493   for (i = 0x81; i < 0xFF; i++)
5494     emacs_code_class[i] = EMACS_invalid_code;
5495   emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
5496   emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
5497   emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
5498   emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
5499
5500   /* ISO2022 specific initialize routine.  */
5501   for (i = 0; i < 0x20; i++)
5502     iso_code_class[i] = ISO_control_code;
5503   for (i = 0x21; i < 0x7F; i++)
5504     iso_code_class[i] = ISO_graphic_plane_0;
5505   for (i = 0x80; i < 0xA0; i++)
5506     iso_code_class[i] = ISO_control_code;
5507   for (i = 0xA1; i < 0xFF; i++)
5508     iso_code_class[i] = ISO_graphic_plane_1;
5509   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
5510   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
5511   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
5512   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
5513   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
5514   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
5515   iso_code_class[ISO_CODE_ESC] = ISO_escape;
5516   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
5517   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
5518   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
5519
5520   conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE;
5521
5522   setup_coding_system (Qnil, &keyboard_coding);
5523   setup_coding_system (Qnil, &terminal_coding);
5524   setup_coding_system (Qnil, &safe_terminal_coding);
5525   setup_coding_system (Qnil, &default_buffer_file_coding);
5526
5527   bzero (coding_system_table, sizeof coding_system_table);
5528
5529   bzero (ascii_skip_code, sizeof ascii_skip_code);
5530   for (i = 0; i < 128; i++)
5531     ascii_skip_code[i] = 1;
5532
5533 #if defined (MSDOS) || defined (WINDOWSNT)
5534   system_eol_type = CODING_EOL_CRLF;
5535 #else
5536   system_eol_type = CODING_EOL_LF;
5537 #endif
5538
5539   inhibit_pre_post_conversion = 0;
5540 }
5541
5542 #ifdef emacs
5543
5544 void
5545 syms_of_coding ()
5546 {
5547   Qtarget_idx = intern ("target-idx");
5548   staticpro (&Qtarget_idx);
5549
5550   Qcoding_system_history = intern ("coding-system-history");
5551   staticpro (&Qcoding_system_history);
5552   Fset (Qcoding_system_history, Qnil);
5553
5554   /* Target FILENAME is the first argument.  */
5555   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
5556   /* Target FILENAME is the third argument.  */
5557   Fput (Qwrite_region, Qtarget_idx, make_number (2));
5558
5559   Qcall_process = intern ("call-process");
5560   staticpro (&Qcall_process);
5561   /* Target PROGRAM is the first argument.  */
5562   Fput (Qcall_process, Qtarget_idx, make_number (0));
5563
5564   Qcall_process_region = intern ("call-process-region");
5565   staticpro (&Qcall_process_region);
5566   /* Target PROGRAM is the third argument.  */
5567   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
5568
5569   Qstart_process = intern ("start-process");
5570   staticpro (&Qstart_process);
5571   /* Target PROGRAM is the third argument.  */
5572   Fput (Qstart_process, Qtarget_idx, make_number (2));
5573
5574   Qopen_network_stream = intern ("open-network-stream");
5575   staticpro (&Qopen_network_stream);
5576   /* Target SERVICE is the fourth argument.  */
5577   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
5578
5579   Qcoding_system = intern ("coding-system");
5580   staticpro (&Qcoding_system);
5581
5582   Qeol_type = intern ("eol-type");
5583   staticpro (&Qeol_type);
5584
5585   Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
5586   staticpro (&Qbuffer_file_coding_system);
5587
5588   Qpost_read_conversion = intern ("post-read-conversion");
5589   staticpro (&Qpost_read_conversion);
5590
5591   Qpre_write_conversion = intern ("pre-write-conversion");
5592   staticpro (&Qpre_write_conversion);
5593
5594   Qno_conversion = intern ("no-conversion");
5595   staticpro (&Qno_conversion);
5596
5597   Qundecided = intern ("undecided");
5598   staticpro (&Qundecided);
5599
5600   Qcoding_system_p = intern ("coding-system-p");
5601   staticpro (&Qcoding_system_p);
5602
5603   Qcoding_system_error = intern ("coding-system-error");
5604   staticpro (&Qcoding_system_error);
5605
5606   Fput (Qcoding_system_error, Qerror_conditions,
5607         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
5608   Fput (Qcoding_system_error, Qerror_message,
5609         build_string ("Invalid coding system"));
5610
5611   Qcoding_category = intern ("coding-category");
5612   staticpro (&Qcoding_category);
5613   Qcoding_category_index = intern ("coding-category-index");
5614   staticpro (&Qcoding_category_index);
5615
5616   Vcoding_category_table
5617     = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
5618   staticpro (&Vcoding_category_table);
5619   {
5620     int i;
5621     for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
5622       {
5623         XVECTOR (Vcoding_category_table)->contents[i]
5624           = intern (coding_category_name[i]);
5625         Fput (XVECTOR (Vcoding_category_table)->contents[i],
5626               Qcoding_category_index, make_number (i));
5627       }
5628   }
5629
5630   Qtranslation_table = intern ("translation-table");
5631   staticpro (&Qtranslation_table);
5632   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
5633
5634   Qtranslation_table_id = intern ("translation-table-id");
5635   staticpro (&Qtranslation_table_id);
5636
5637   Qtranslation_table_for_decode = intern ("translation-table-for-decode");
5638   staticpro (&Qtranslation_table_for_decode);
5639
5640   Qtranslation_table_for_encode = intern ("translation-table-for-encode");
5641   staticpro (&Qtranslation_table_for_encode);
5642
5643   Qsafe_charsets = intern ("safe-charsets");
5644   staticpro (&Qsafe_charsets);
5645
5646   Qvalid_codes = intern ("valid-codes");
5647   staticpro (&Qvalid_codes);
5648
5649   Qemacs_mule = intern ("emacs-mule");
5650   staticpro (&Qemacs_mule);
5651
5652   Qraw_text = intern ("raw-text");
5653   staticpro (&Qraw_text);
5654
5655   defsubr (&Scoding_system_p);
5656   defsubr (&Sread_coding_system);
5657   defsubr (&Sread_non_nil_coding_system);
5658   defsubr (&Scheck_coding_system);
5659   defsubr (&Sdetect_coding_region);
5660   defsubr (&Sdetect_coding_string);
5661   defsubr (&Sdecode_coding_region);
5662   defsubr (&Sencode_coding_region);
5663   defsubr (&Sdecode_coding_string);
5664   defsubr (&Sencode_coding_string);
5665   defsubr (&Sdecode_sjis_char);
5666   defsubr (&Sencode_sjis_char);
5667   defsubr (&Sdecode_big5_char);
5668   defsubr (&Sencode_big5_char);
5669   defsubr (&Sset_terminal_coding_system_internal);
5670   defsubr (&Sset_safe_terminal_coding_system_internal);
5671   defsubr (&Sterminal_coding_system);
5672   defsubr (&Sset_keyboard_coding_system_internal);
5673   defsubr (&Skeyboard_coding_system);
5674   defsubr (&Sfind_operation_coding_system);
5675   defsubr (&Supdate_coding_systems_internal);
5676   defsubr (&Sset_coding_priority_internal);
5677
5678   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
5679     "List of coding systems.\n\
5680 \n\
5681 Do not alter the value of this variable manually.  This variable should be\n\
5682 updated by the functions `make-coding-system' and\n\
5683 `define-coding-system-alias'.");
5684   Vcoding_system_list = Qnil;
5685
5686   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
5687     "Alist of coding system names.\n\
5688 Each element is one element list of coding system name.\n\
5689 This variable is given to `completing-read' as TABLE argument.\n\
5690 \n\
5691 Do not alter the value of this variable manually.  This variable should be\n\
5692 updated by the functions `make-coding-system' and\n\
5693 `define-coding-system-alias'.");
5694   Vcoding_system_alist = Qnil;
5695
5696   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
5697     "List of coding-categories (symbols) ordered by priority.");
5698   {
5699     int i;
5700
5701     Vcoding_category_list = Qnil;
5702     for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
5703       Vcoding_category_list
5704         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
5705                  Vcoding_category_list);
5706   }
5707
5708   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
5709     "Specify the coding system for read operations.\n\
5710 It is useful to bind this variable with `let', but do not set it globally.\n\
5711 If the value is a coding system, it is used for decoding on read operation.\n\
5712 If not, an appropriate element is used from one of the coding system alists:\n\
5713 There are three such tables, `file-coding-system-alist',\n\
5714 `process-coding-system-alist', and `network-coding-system-alist'.");
5715   Vcoding_system_for_read = Qnil;
5716
5717   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
5718     "Specify the coding system for write operations.\n\
5719 Programs bind this variable with `let', but you should not set it globally.\n\
5720 If the value is a coding system, it is used for encoding of output,\n\
5721 when writing it to a file and when sending it to a file or subprocess.\n\
5722 \n\
5723 If this does not specify a coding system, an appropriate element\n\
5724 is used from one of the coding system alists:\n\
5725 There are three such tables, `file-coding-system-alist',\n\
5726 `process-coding-system-alist', and `network-coding-system-alist'.\n\
5727 For output to files, if the above procedure does not specify a coding system,\n\
5728 the value of `buffer-file-coding-system' is used.");
5729   Vcoding_system_for_write = Qnil;
5730
5731   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
5732     "Coding system used in the latest file or process I/O.");
5733   Vlast_coding_system_used = Qnil;
5734
5735   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
5736     "*Non-nil means always inhibit code conversion of end-of-line format.\n\
5737 See info node `Coding Systems' and info node `Text and Binary' concerning\n\
5738 such conversion.");
5739   inhibit_eol_conversion = 0;
5740
5741   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
5742     "Non-nil means process buffer inherits coding system of process output.\n\
5743 Bind it to t if the process output is to be treated as if it were a file\n\
5744 read from some filesystem.");
5745   inherit_process_coding_system = 0;
5746
5747   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
5748     "Alist to decide a coding system to use for a file I/O operation.\n\
5749 The format is ((PATTERN . VAL) ...),\n\
5750 where PATTERN is a regular expression matching a file name,\n\
5751 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5752 If VAL is a coding system, it is used for both decoding and encoding\n\
5753 the file contents.\n\
5754 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5755 and the cdr part is used for encoding.\n\
5756 If VAL is a function symbol, the function must return a coding system\n\
5757 or a cons of coding systems which are used as above.\n\
5758 \n\
5759 See also the function `find-operation-coding-system'\n\
5760 and the variable `auto-coding-alist'.");
5761   Vfile_coding_system_alist = Qnil;
5762
5763   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
5764     "Alist to decide a coding system to use for a process I/O operation.\n\
5765 The format is ((PATTERN . VAL) ...),\n\
5766 where PATTERN is a regular expression matching a program name,\n\
5767 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5768 If VAL is a coding system, it is used for both decoding what received\n\
5769 from the program and encoding what sent to the program.\n\
5770 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5771 and the cdr part is used for encoding.\n\
5772 If VAL is a function symbol, the function must return a coding system\n\
5773 or a cons of coding systems which are used as above.\n\
5774 \n\
5775 See also the function `find-operation-coding-system'.");
5776   Vprocess_coding_system_alist = Qnil;
5777
5778   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
5779     "Alist to decide a coding system to use for a network I/O operation.\n\
5780 The format is ((PATTERN . VAL) ...),\n\
5781 where PATTERN is a regular expression matching a network service name\n\
5782 or is a port number to connect to,\n\
5783 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5784 If VAL is a coding system, it is used for both decoding what received\n\
5785 from the network stream and encoding what sent to the network stream.\n\
5786 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5787 and the cdr part is used for encoding.\n\
5788 If VAL is a function symbol, the function must return a coding system\n\
5789 or a cons of coding systems which are used as above.\n\
5790 \n\
5791 See also the function `find-operation-coding-system'.");
5792   Vnetwork_coding_system_alist = Qnil;
5793
5794   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
5795     "Coding system to use with system messages.");
5796   Vlocale_coding_system = Qnil;
5797
5798   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
5799     "*String displayed in mode line for UNIX-like (LF) end-of-line format.");
5800   eol_mnemonic_unix = build_string (":");
5801
5802   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
5803     "*String displayed in mode line for DOS-like (CRLF) end-of-line format.");
5804   eol_mnemonic_dos = build_string ("\\");
5805
5806   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
5807     "*String displayed in mode line for MAC-like (CR) end-of-line format.");
5808   eol_mnemonic_mac = build_string ("/");
5809
5810   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
5811     "*String displayed in mode line when end-of-line format is not yet determined.");
5812   eol_mnemonic_undecided = build_string (":");
5813
5814   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
5815     "*Non-nil enables character translation while encoding and decoding.");
5816   Venable_character_translation = Qt;
5817
5818   DEFVAR_LISP ("standard-translation-table-for-decode",
5819     &Vstandard_translation_table_for_decode,
5820     "Table for translating characters while decoding.");
5821   Vstandard_translation_table_for_decode = Qnil;
5822
5823   DEFVAR_LISP ("standard-translation-table-for-encode",
5824     &Vstandard_translation_table_for_encode,
5825     "Table for translationg characters while encoding.");
5826   Vstandard_translation_table_for_encode = Qnil;
5827
5828   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
5829     "Alist of charsets vs revision numbers.\n\
5830 While encoding, if a charset (car part of an element) is found,\n\
5831 designate it with the escape sequence identifing revision (cdr part of the element).");
5832   Vcharset_revision_alist = Qnil;
5833
5834   DEFVAR_LISP ("default-process-coding-system",
5835                &Vdefault_process_coding_system,
5836     "Cons of coding systems used for process I/O by default.\n\
5837 The car part is used for decoding a process output,\n\
5838 the cdr part is used for encoding a text to be sent to a process.");
5839   Vdefault_process_coding_system = Qnil;
5840
5841   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
5842     "Table of extra Latin codes in the range 128..159 (inclusive).\n\
5843 This is a vector of length 256.\n\
5844 If Nth element is non-nil, the existence of code N in a file\n\
5845 \(or output of subprocess) doesn't prevent it to be detected as\n\
5846 a coding system of ISO 2022 variant which has a flag\n\
5847 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file\n\
5848 or reading output of a subprocess.\n\
5849 Only 128th through 159th elements has a meaning.");
5850   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
5851
5852   DEFVAR_LISP ("select-safe-coding-system-function",
5853                &Vselect_safe_coding_system_function,
5854     "Function to call to select safe coding system for encoding a text.\n\
5855 \n\
5856 If set, this function is called to force a user to select a proper\n\
5857 coding system which can encode the text in the case that a default\n\
5858 coding system used in each operation can't encode the text.\n\
5859 \n\
5860 The default value is `select-safe-coding-system' (which see).");
5861   Vselect_safe_coding_system_function = Qnil;
5862
5863 }
5864
5865 char *
5866 emacs_strerror (error_number)
5867      int error_number;
5868 {
5869   char *str;
5870
5871   synchronize_messages_locale ();
5872   str = strerror (error_number);
5873
5874   if (! NILP (Vlocale_coding_system))
5875     {
5876       Lisp_Object dec = code_convert_string_norecord (build_string (str),
5877                                                       Vlocale_coding_system,
5878                                                       0);
5879       str = (char *) XSTRING (dec)->data;
5880     }
5881
5882   return str;
5883 }
5884
5885 #endif /* emacs */