src/coding.c

   1 /* Coding system handler (conversion, detection, and etc).
   2    Ver.1.0.
   3    Copyright (C) 1995 Free Software Foundation, Inc.
   4    Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 2, or (at your option)
  11 any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs; see the file COPYING.  If not, write to
  20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  21 Boston, MA 02111-1307, USA.  */
  22
  23 /*** TABLE OF CONTENTS ***
  24
  25   1. Preamble
  26   2. Emacs' internal format handlers
  27   3. ISO2022 handlers
  28   4. Shift-JIS and BIG5 handlers
  29   5. End-of-line handlers
  30   6. C library functions
  31   7. Emacs Lisp library functions
  32   8. Post-amble
  33
  34 */
  35
  36 /*** GENERAL NOTE on CODING SYSTEM ***
  37
  38   Coding system is an encoding mechanism of one or more character
  39   sets.  Here's a list of coding systems which Emacs can handle.  When
  40   we say "decode", it means converting some other coding system to
  41   Emacs' internal format, and when we say "encode", it means
  42   converting Emacs' internal format to some other coding system.
  43
  44   0. Emacs' internal format
  45
  46   Emacs itself holds a multi-lingual character in a buffer and a string
  47   in a special format.  Details are described in the section 2.
  48
  49   1. ISO2022
  50
  51   The most famous coding system for multiple character sets.  X's
  52   Compound Text, various EUCs (Extended Unix Code), and such coding
  53   systems used in Internet communication as ISO-2022-JP are all
  54   variants of ISO2022.  Details are described in the section 3.
  55
  56   2. SJIS (or Shift-JIS or MS-Kanji-Code)
  57
  58   A coding system to encode character sets: ASCII, JISX0201, and
  59   JISX0208.  Widely used for PC's in Japan.  Details are described in
  60   the section 4.
  61
  62   3. BIG5
  63
  64   A coding system to encode character sets: ASCII and Big5.  Widely
  65   used by Chinese (mainly in Taiwan and Hong Kong).  Details are
  66   described in the section 4.  In this file, when written as "BIG5"
  67   (all uppercase), it means the coding system, and when written as
  68   "Big5" (capitalized), it means the character set.
  69
  70   4. Else
  71
  72   If a user want to read/write a text encoded in a coding system not
  73   listed above, he can supply a decoder and an encoder for it in CCL
  74   (Code Conversion Language) programs.  Emacs executes the CCL program
  75   while reading/writing.
  76
  77   Emacs represent a coding-system by a Lisp symbol that has a property
  78   `coding-system'.  But, before actually using the coding-system, the
  79   information about it is set in a structure of type `struct
  80   coding_system' for rapid processing.  See the section 6 for more
  81   detail.
  82
  83 */
  84
  85 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
  86
  87   How end-of-line of a text is encoded depends on a system.  For
  88   instance, Unix's format is just one byte of `line-feed' code,
  89   whereas DOS's format is two bytes sequence of `carriage-return' and
  90   `line-feed' codes.  MacOS's format is one byte of `carriage-return'.
  91
  92   Since how characters in a text is encoded and how end-of-line is
  93   encoded is independent, any coding system described above can take
  94   any format of end-of-line.  So, Emacs has information of format of
  95   end-of-line in each coding-system.  See the section 6 for more
  96   detail.
  97
  98 */
  99
 100 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 101
 102   These functions check if a text between SRC and SRC_END is encoded
 103   in the coding system category XXX.  Each returns an integer value in
 104   which appropriate flag bits for the category XXX is set.  The flag
 105   bits are defined in macros CODING_CATEGORY_MASK_XXX.  Below is the
 106   template of these functions.  */
 107 #if 0
 108 int
 109 detect_coding_internal (src, src_end)
 110      unsigned char *src, *src_end;
 111 {
 112   ...
 113 }
 114 #endif
 115
 116 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 117
 118   These functions decode SRC_BYTES length text at SOURCE encoded in
 119   CODING to Emacs' internal format.  The resulting text goes to a
 120   place pointed by DESTINATION, the length of which should not exceed
 121   DST_BYTES.  The bytes actually processed is returned as *CONSUMED.
 122   The return value is the length of the decoded text.  Below is a
 123   template of these functions.  */
 124 #if 0
 125 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes, consumed)
 126      struct coding_system *coding;
 127      unsigned char *source, *destination;
 128      int src_bytes, dst_bytes;
 129      int *consumed;
 130 {
 131   ...
 132 }
 133 #endif
 134
 135 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 136
 137   These functions encode SRC_BYTES length text at SOURCE of Emacs
 138   internal format to CODING.  The resulting text goes to a place
 139   pointed by DESTINATION, the length of which should not exceed
 140   DST_BYTES.  The bytes actually processed is returned as *CONSUMED.
 141   The return value is the length of the encoded text.  Below is a
 142   template of these functions.  */
 143 #if 0
 144 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes, consumed)
 145      struct coding_system *coding;
 146      unsigned char *source, *destination;
 147      int src_bytes, dst_bytes;
 148      int *consumed;
 149 {
 150   ...
 151 }
 152 #endif
 153
 154 /*** COMMONLY USED MACROS ***/
 155
 156 /* The following three macros ONE_MORE_BYTE, TWO_MORE_BYTES, and
 157    THREE_MORE_BYTES safely get one, two, and three bytes from the
 158    source text respectively.  If there are not enough bytes in the
 159    source, they jump to `label_end_of_loop'.  The caller should set
 160    variables `src' and `src_end' to appropriate areas in advance.  */
 161
 162 #define ONE_MORE_BYTE(c1)       \
 163   do {                          \
 164     if (src < src_end)          \
 165       c1 = *src++;              \
 166     else                        \
 167       goto label_end_of_loop;   \
 168   } while (0)
 169
 170 #define TWO_MORE_BYTES(c1, c2)  \
 171   do {                          \
 172     if (src + 1 < src_end)      \
 173       c1 = *src++, c2 = *src++; \
 174     else                        \
 175       goto label_end_of_loop;   \
 176   } while (0)
 177
 178 #define THREE_MORE_BYTES(c1, c2, c3)            \
 179   do {                                          \
 180     if (src + 2 < src_end)                      \
 181       c1 = *src++, c2 = *src++, c3 = *src++;    \
 182     else                                        \
 183       goto label_end_of_loop;                   \
 184   } while (0)
 185
 186 /* The following three macros DECODE_CHARACTER_ASCII,
 187    DECODE_CHARACTER_DIMENSION1, and DECODE_CHARACTER_DIMENSION2 put
 188    the multi-byte form of a character of each class at the place
 189    pointed by `dst'.  The caller should set the variable `dst' to
 190    point to an appropriate area and the variable `coding' to point to
 191    the coding-system of the currently decoding text in advance.  */
 192
 193 /* Decode one ASCII character C.  */
 194
 195 #define DECODE_CHARACTER_ASCII(c)                               \
 196   do {                                                          \
 197     if (COMPOSING_P (coding->composing))                        \
 198       *dst++ = 0xA0, *dst++ = (c) | 0x80;                       \
 199     else                                                        \
 200       *dst++ = (c);                                             \
 201   } while (0)
 202
 203 /* Decode one DIMENSION1 character of which charset is CHARSET and
 204    position-code is C.  */
 205
 206 #define DECODE_CHARACTER_DIMENSION1(charset, c)                         \
 207   do {                                                                  \
 208     unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset);   \
 209     if (COMPOSING_P (coding->composing))                                \
 210       *dst++ = leading_code + 0x20;                                     \
 211     else                                                                \
 212       *dst++ = leading_code;                                            \
 213     if (leading_code = CHARSET_LEADING_CODE_EXT (charset))              \
 214       *dst++ = leading_code;                                            \
 215     *dst++ = (c) | 0x80;                                                \
 216   } while (0)
 217
 218 /* Decode one DIMENSION2 character of which charset is CHARSET and
 219    position-codes are C1 and C2.  */
 220
 221 #define DECODE_CHARACTER_DIMENSION2(charset, c1, c2)    \
 222   do {                                                  \
 223     DECODE_CHARACTER_DIMENSION1 (charset, c1);          \
 224     *dst++ = (c2) | 0x80;                               \
 225   } while (0)
 226
 227 \f
 228 /*** 1. Preamble ***/
 229
 230 #include <stdio.h>
 231
 232 #ifdef emacs
 233
 234 #include <config.h>
 235 #include "lisp.h"
 236 #include "buffer.h"
 237 #include "charset.h"
 238 #include "ccl.h"
 239 #include "coding.h"
 240 #include "window.h"
 241
 242 #else  /* not emacs */
 243
 244 #include "mulelib.h"
 245
 246 #endif /* not emacs */
 247
 248 Lisp_Object Qcoding_system, Qeol_type;
 249 Lisp_Object Qbuffer_file_coding_system;
 250 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 251
 252 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 253 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 254 Lisp_Object Qstart_process, Qopen_network_stream;
 255 Lisp_Object Qtarget_idx;
 256
 257 /* Mnemonic character of each format of end-of-line.  */
 258 int eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 259 /* Mnemonic character to indicate format of end-of-line is not yet
 260    decided.  */
 261 int eol_mnemonic_undecided;
 262
 263 #ifdef emacs
 264
 265 Lisp_Object Qcoding_system_vector, Qcoding_system_p, Qcoding_system_error;
 266
 267 /* Coding-systems are handed between Emacs Lisp programs and C internal
 268    routines by the following three variables.  */
 269 /* Coding-system for reading files and receiving data from process.  */
 270 Lisp_Object Vcoding_system_for_read;
 271 /* Coding-system for writing files and sending data to process.  */
 272 Lisp_Object Vcoding_system_for_write;
 273 /* Coding-system actually used in the latest I/O.  */
 274 Lisp_Object Vlast_coding_system_used;
 275
 276 /* Coding-system of what terminal accept for displaying.  */
 277 struct coding_system terminal_coding;
 278
 279 /* Coding-system of what is sent from terminal keyboard.  */
 280 struct coding_system keyboard_coding;
 281
 282 Lisp_Object Vcoding_system_alist;
 283
 284 #endif /* emacs */
 285
 286 Lisp_Object Qcoding_category_index;
 287
 288 /* List of symbols `coding-category-xxx' ordered by priority.  */
 289 Lisp_Object Vcoding_category_list;
 290
 291 /* Table of coding-systems currently assigned to each coding-category.  */
 292 Lisp_Object coding_category_table[CODING_CATEGORY_IDX_MAX];
 293
 294 /* Table of names of symbol for each coding-category.  */
 295 char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
 296   "coding-category-internal",
 297   "coding-category-sjis",
 298   "coding-category-iso-7",
 299   "coding-category-iso-8-1",
 300   "coding-category-iso-8-2",
 301   "coding-category-iso-else",
 302   "coding-category-big5",
 303   "coding-category-binary"
 304 };
 305
 306 /* Alist of charsets vs the alternate charsets.  */
 307 Lisp_Object Valternate_charset_table;
 308
 309 /* Alist of charsets vs revision number.  */
 310 Lisp_Object Vcharset_revision_alist;
 311
 312 \f
 313 /*** 2. Emacs internal format handlers ***/
 314
 315 /* Emacs' internal format for encoding multiple character sets is a
 316    kind of multi-byte encoding, i.e. encoding a character by a sequence
 317    of one-byte codes of variable length.  ASCII characters and control
 318    characters (e.g. `tab', `newline') are represented by one-byte as
 319    is.  It takes the range 0x00 through 0x7F.  The other characters
 320    are represented by a sequence of `base leading-code', optional
 321    `extended leading-code', and one or two `position-code's.  Length
 322    of the sequence is decided by the base leading-code.  Leading-code
 323    takes the range 0x80 through 0x9F, whereas extended leading-code
 324    and position-code take the range 0xA0 through 0xFF.  See the
 325    document of `charset.h' for more detail about leading-code and
 326    position-code.
 327
 328    There's one exception in this rule.  Special leading-code
 329    `leading-code-composition' denotes that the following several
 330    characters should be composed into one character.  Leading-codes of
 331    components (except for ASCII) are added 0x20.  An ASCII character
 332    component is represented by a 2-byte sequence of `0xA0' and
 333    `ASCII-code + 0x80'.  See also the document in `charset.h' for the
 334    detail of composite character.  Hence, we can summarize the code
 335    range as follows:
 336
 337    --- CODE RANGE of Emacs' internal format ---
 338    (character set)      (range)
 339    ASCII                0x00 .. 0x7F
 340    ELSE (1st byte)      0x80 .. 0x9F
 341         (rest bytes)    0xA0 .. 0xFF
 342    ---------------------------------------------
 343
 344   */
 345
 346 enum emacs_code_class_type emacs_code_class[256];
 347
 348 /* Go to the next statement only if *SRC is accessible and the code is
 349    greater than 0xA0.  */
 350 #define CHECK_CODE_RANGE_A0_FF  \
 351   do {                          \
 352     if (src >= src_end)         \
 353       goto label_end_of_switch; \
 354     else if (*src++ < 0xA0)     \
 355       return 0;                 \
 356   } while (0)
 357
 358 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 359    Check if a text is encoded in Emacs' internal format.  If it is,
 360    return CODING_CATEGORY_MASK_INTERNAL, else return 0.  */
 361
 362 int
 363 detect_coding_internal (src, src_end)
 364      unsigned char *src, *src_end;
 365 {
 366   unsigned char c;
 367   int composing = 0;
 368
 369   while (src < src_end)
 370     {
 371       c = *src++;
 372
 373       if (composing)
 374         {
 375           if (c < 0xA0)
 376             composing = 0;
 377           else
 378             c -= 0x20;
 379         }
 380
 381       switch (emacs_code_class[c])
 382         {
 383         case EMACS_ascii_code:
 384         case EMACS_linefeed_code:
 385           break;
 386
 387         case EMACS_control_code:
 388           if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
 389             return 0;
 390           break;
 391
 392         case EMACS_invalid_code:
 393           return 0;
 394
 395         case EMACS_leading_code_composition: /* c == 0x80 */
 396           if (composing)
 397             CHECK_CODE_RANGE_A0_FF;
 398           else
 399             composing = 1;
 400           break;
 401
 402         case EMACS_leading_code_4:
 403           CHECK_CODE_RANGE_A0_FF;
 404           /* fall down to check it two more times ...  */
 405
 406         case EMACS_leading_code_3:
 407           CHECK_CODE_RANGE_A0_FF;
 408           /* fall down to check it one more time ...  */
 409
 410         case EMACS_leading_code_2:
 411           CHECK_CODE_RANGE_A0_FF;
 412           break;
 413
 414         default:
 415         label_end_of_switch:
 416           break;
 417         }
 418     }
 419   return CODING_CATEGORY_MASK_INTERNAL;
 420 }
 421
 422 \f
 423 /*** 3. ISO2022 handlers ***/
 424
 425 /* The following note describes the coding system ISO2022 briefly.
 426    Since the intension of this note is to help understanding of the
 427    programs in this file, some parts are NOT ACCURATE or OVERLY
 428    SIMPLIFIED.  For the thorough understanding, please refer to the
 429    original document of ISO2022.
 430
 431    ISO2022 provides many mechanisms to encode several character sets
 432    in 7-bit and 8-bit environment.  If one choose 7-bite environment,
 433    all text is encoded by codes of less than 128.  This may make the
 434    encoded text a little bit longer, but the text get more stability
 435    to pass through several gateways (some of them split MSB off).
 436
 437    There are two kind of character set: control character set and
 438    graphic character set.  The former contains control characters such
 439    as `newline' and `escape' to provide control functions (control
 440    functions are provided also by escape sequence).  The latter
 441    contains graphic characters such as ' A' and '-'.  Emacs recognizes
 442    two control character sets and many graphic character sets.
 443
 444    Graphic character sets are classified into one of the following
 445    four classes, DIMENSION1_CHARS94, DIMENSION1_CHARS96,
 446    DIMENSION2_CHARS94, DIMENSION2_CHARS96 according to the number of
 447    bytes (DIMENSION) and the number of characters in one dimension
 448    (CHARS) of the set.  In addition, each character set is assigned an
 449    identification tag (called "final character" and denoted as <F>
 450    here after) which is unique in each class.  <F> of each character
 451    set is decided by ECMA(*) when it is registered in ISO.  Code range
 452    of <F> is 0x30..0x7F (0x30..0x3F are for private use only).
 453
 454    Note (*): ECMA = European Computer Manufacturers Association
 455
 456    Here are examples of graphic character set [NAME(<F>)]:
 457         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
 458         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
 459         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
 460         o DIMENSION2_CHARS96 -- none for the moment
 461
 462    A code area (1byte=8bits) is divided into 4 areas, C0, GL, C1, and GR.
 463         C0 [0x00..0x1F] -- control character plane 0
 464         GL [0x20..0x7F] -- graphic character plane 0
 465         C1 [0x80..0x9F] -- control character plane 1
 466         GR [0xA0..0xFF] -- graphic character plane 1
 467
 468    A control character set is directly designated and invoked to C0 or
 469    C1 by an escape sequence.  The most common case is that ISO646's
 470    control character set is designated/invoked to C0 and ISO6429's
 471    control character set is designated/invoked to C1, and usually
 472    these designations/invocations are omitted in a coded text.  With
 473    7-bit environment, only C0 can be used, and a control character for
 474    C1 is encoded by an appropriate escape sequence to fit in the
 475    environment.  All control characters for C1 are defined the
 476    corresponding escape sequences.
 477
 478    A graphic character set is at first designated to one of four
 479    graphic registers (G0 through G3), then these graphic registers are
 480    invoked to GL or GR.  These designations and invocations can be
 481    done independently.  The most common case is that G0 is invoked to
 482    GL, G1 is invoked to GR, and ASCII is designated to G0, and usually
 483    these invocations and designations are omitted in a coded text.
 484    With 7-bit environment, only GL can be used.
 485
 486    When a graphic character set of CHARS94 is invoked to GL, code 0x20
 487    and 0x7F of GL area work as control characters SPACE and DEL
 488    respectively, and code 0xA0 and 0xFF of GR area should not be used.
 489
 490    There are two ways of invocation: locking-shift and single-shift.
 491    With locking-shift, the invocation lasts until the next different
 492    invocation, whereas with single-shift, the invocation works only
 493    for the following character and doesn't affect locking-shift.
 494    Invocations are done by the following control characters or escape
 495    sequences.
 496
 497    ----------------------------------------------------------------------
 498    function             control char    escape sequence description
 499    ----------------------------------------------------------------------
 500    SI  (shift-in)               0x0F    none            invoke G0 to GL
 501    SI  (shift-out)              0x0E    none            invoke G1 to GL
 502    LS2 (locking-shift-2)        none    ESC 'n'         invoke G2 into GL
 503    LS3 (locking-shift-3)        none    ESC 'o'         invoke G3 into GL
 504    SS2 (single-shift-2)         0x8E    ESC 'N'         invoke G2 into GL
 505    SS3 (single-shift-3)         0x8F    ESC 'O'         invoke G3 into GL
 506    ----------------------------------------------------------------------
 507    The first four are for locking-shift.  Control characters for these
 508    functions are defined by macros ISO_CODE_XXX in `coding.h'.
 509
 510    Designations are done by the following escape sequences.
 511    ----------------------------------------------------------------------
 512    escape sequence      description
 513    ----------------------------------------------------------------------
 514    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
 515    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
 516    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
 517    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
 518    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
 519    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
 520    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
 521    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
 522    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
 523    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
 524    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
 525    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
 526    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
 527    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
 528    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
 529    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
 530    ----------------------------------------------------------------------
 531
 532    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
 533    of dimension 1, chars 94, and final character <F>, and etc.
 534
 535    Note (*): Although these designations are not allowed in ISO2022,
 536    Emacs accepts them on decoding, and produces them on encoding
 537    CHARS96 character set in a coding system which is characterized as
 538    7-bit environment, non-locking-shift, and non-single-shift.
 539
 540    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
 541    '(' can be omitted.  We call this as "short-form" here after.
 542
 543    Now you may notice that there are a lot of ways for encoding the
 544    same multilingual text in ISO2022.  Actually, there exist many
 545    coding systems such as Compound Text (used in X's inter client
 546    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
 547    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
 548    localized platforms), and all of these are variants of ISO2022.
 549
 550    In addition to the above, Emacs handles two more kinds of escape
 551    sequences: ISO6429's direction specification and Emacs' private
 552    sequence for specifying character composition.
 553
 554    ISO6429's direction specification takes the following format:
 555         o CSI ']'      -- end of the current direction
 556         o CSI '0' ']'  -- end of the current direction
 557         o CSI '1' ']'  -- start of left-to-right text
 558         o CSI '2' ']'  -- start of right-to-left text
 559    The control character CSI (0x9B: control sequence introducer) is
 560    abbreviated to the escape sequence ESC '[' in 7-bit environment.
 561
 562    Character composition specification takes the following format:
 563         o ESC '0' -- start character composition
 564         o ESC '1' -- end character composition
 565    Since these are not standard escape sequences of any ISO, the use
 566    of them for these meaning is restricted to Emacs only.  */
 567
 568 enum iso_code_class_type iso_code_class[256];
 569
 570 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 571    Check if a text is encoded in ISO2022.  If it is, returns an
 572    integer in which appropriate flag bits any of:
 573         CODING_CATEGORY_MASK_ISO_7
 574         CODING_CATEGORY_MASK_ISO_8_1
 575         CODING_CATEGORY_MASK_ISO_8_2
 576         CODING_CATEGORY_MASK_ISO_ELSE
 577    are set.  If a code which should never appear in ISO2022 is found,
 578    returns 0.  */
 579
 580 int
 581 detect_coding_iso2022 (src, src_end)
 582      unsigned char *src, *src_end;
 583 {
 584   int mask = CODING_CATEGORY_MASK_ANY;
 585   int g1 = 0;                   /* 1 iff designating to G1.  */
 586   int c, i;
 587
 588   while (src < src_end)
 589     {
 590       c = *src++;
 591       switch (c)
 592         {
 593         case ISO_CODE_ESC:
 594           if (src >= src_end)
 595             break;
 596           c = *src++;
 597           if (src < src_end
 598               && ((c >= '(' && c <= '/')
 599                   || c == '$' && ((*src >= '(' && *src <= '/')
 600                                   || (*src >= '@' && *src <= 'B'))))
 601             {
 602               /* Valid designation sequence.  */
 603               mask &= (CODING_CATEGORY_MASK_ISO_7
 604                        | CODING_CATEGORY_MASK_ISO_8_1
 605                        | CODING_CATEGORY_MASK_ISO_8_2
 606                        | CODING_CATEGORY_MASK_ISO_ELSE);
 607               if (c == ')' || (c == '$' && *src == ')'))
 608                 {
 609                   g1 = 1;
 610                   mask &= ~CODING_CATEGORY_MASK_ISO_7;
 611                 }
 612               src++;
 613               break;
 614             }
 615           else if (c == 'N' || c == 'O' || c == 'n' || c == 'o')
 616             return CODING_CATEGORY_MASK_ISO_ELSE;
 617           break;
 618
 619         case ISO_CODE_SO:
 620           if (g1)
 621             return CODING_CATEGORY_MASK_ISO_ELSE;
 622           break;
 623
 624         case ISO_CODE_CSI:
 625         case ISO_CODE_SS2:
 626         case ISO_CODE_SS3:
 627           mask &= ~CODING_CATEGORY_MASK_ISO_7;
 628           break;
 629
 630         default:
 631           if (c < 0x80)
 632             break;
 633           else if (c < 0xA0)
 634             return 0;
 635           else
 636             {
 637               int count = 1;
 638
 639               mask &= ~CODING_CATEGORY_MASK_ISO_7;
 640               while (src < src_end && *src >= 0xA0)
 641                 count++, src++;
 642               if (count & 1 && src < src_end)
 643                 mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
 644             }
 645           break;
 646         }
 647     }
 648
 649   return mask;
 650 }
 651
 652 /* Decode a character of which charset is CHARSET and the 1st position
 653    code is C1.  If dimension of CHARSET 2, the 2nd position code is
 654    fetched from SRC and set to C2.  If CHARSET is negative, it means
 655    that we are decoding ill formed text, and what we can do is just to
 656    read C1 as is.  */
 657
 658 #define DECODE_ISO_CHARACTER(charset, c1)                       \
 659   do {                                                          \
 660     if ((charset) >= 0 && CHARSET_DIMENSION (charset) == 2)     \
 661       ONE_MORE_BYTE (c2);                                       \
 662     if (COMPOSING_HEAD_P (coding->composing))                   \
 663       {                                                         \
 664         *dst++ = LEADING_CODE_COMPOSITION;                      \
 665         if (COMPOSING_WITH_RULE_P (coding->composing))          \
 666           /* To tell composition rules are embeded.  */         \
 667           *dst++ = 0xFF;                                        \
 668         coding->composing += 2;                                 \
 669       }                                                         \
 670     if ((charset) < 0)                                          \
 671       *dst++ = c1;                                              \
 672     else if ((charset) == CHARSET_ASCII)                        \
 673       DECODE_CHARACTER_ASCII (c1);                              \
 674     else if (CHARSET_DIMENSION (charset) == 1)                  \
 675       DECODE_CHARACTER_DIMENSION1 (charset, c1);                \
 676     else                                                        \
 677       DECODE_CHARACTER_DIMENSION2 (charset, c1, c2);            \
 678     if (COMPOSING_WITH_RULE_P (coding->composing))              \
 679       /* To tell a composition rule follows.  */                \
 680       coding->composing = COMPOSING_WITH_RULE_RULE;             \
 681   } while (0)
 682
 683 /* Set designation state into CODING.  */
 684 #define DECODE_DESIGNATION(reg, dimension, chars, final_char)           \
 685   do {                                                                  \
 686     int charset = ISO_CHARSET_TABLE (dimension, chars, final_char);     \
 687     Lisp_Object temp                                                    \
 688       = Fassq (CHARSET_SYMBOL (charset), Valternate_charset_table);     \
 689     if (! NILP (temp))                                                  \
 690       charset = get_charset_id (XCONS (temp)->cdr);                     \
 691     if (charset >= 0)                                                   \
 692       {                                                                 \
 693         if (coding->direction == 1                                      \
 694             && CHARSET_REVERSE_CHARSET (charset) >= 0)                  \
 695           charset = CHARSET_REVERSE_CHARSET (charset);                  \
 696         CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;            \
 697       }                                                                 \
 698   } while (0)
 699
 700 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
 701
 702 int
 703 decode_coding_iso2022 (coding, source, destination,
 704                        src_bytes, dst_bytes, consumed)
 705      struct coding_system *coding;
 706      unsigned char *source, *destination;
 707      int src_bytes, dst_bytes;
 708      int *consumed;
 709 {
 710   unsigned char *src = source;
 711   unsigned char *src_end = source + src_bytes;
 712   unsigned char *dst = destination;
 713   unsigned char *dst_end = destination + dst_bytes;
 714   /* Since the maximum bytes produced by each loop is 7, we subtract 6
 715      from DST_END to assure that overflow checking is necessary only
 716      at the head of loop.  */
 717   unsigned char *adjusted_dst_end = dst_end - 6;
 718   int charset;
 719   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
 720   int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
 721   int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
 722
 723   while (src < src_end && dst < adjusted_dst_end)
 724     {
 725       /* SRC_BASE remembers the start position in source in each loop.
 726          The loop will be exited when there's not enough source text
 727          to analyze long escape sequence or 2-byte code (within macros
 728          ONE_MORE_BYTE or TWO_MORE_BYTES).  In that case, SRC is reset
 729          to SRC_BASE before exiting.  */
 730       unsigned char *src_base = src;
 731       unsigned char c1 = *src++, c2, cmprule;
 732
 733       switch (iso_code_class [c1])
 734         {
 735         case ISO_0x20_or_0x7F:
 736           if (!coding->composing
 737               && (charset0 < 0 || CHARSET_CHARS (charset0) == 94))
 738             {
 739               /* This is SPACE or DEL.  */
 740               *dst++ = c1;
 741               break;
 742             }
 743           /* This is a graphic character, we fall down ...  */
 744
 745         case ISO_graphic_plane_0:
 746           if (coding->composing == COMPOSING_WITH_RULE_RULE)
 747             {
 748               /* This is a composition rule.  */
 749               *dst++ = c1 | 0x80;
 750               coding->composing = COMPOSING_WITH_RULE_TAIL;
 751             }
 752           else
 753             DECODE_ISO_CHARACTER (charset0, c1);
 754           break;
 755
 756         case ISO_0xA0_or_0xFF:
 757           if (charset1 < 0 || CHARSET_CHARS (charset1) == 94)
 758             {
 759               /* Invalid code.  */
 760               *dst++ = c1;
 761               break;
 762             }
 763           /* This is a graphic character, we fall down ... */
 764
 765         case ISO_graphic_plane_1:
 766           DECODE_ISO_CHARACTER (charset1, c1);
 767           break;
 768
 769         case ISO_control_code:
 770           /* All ISO2022 control characters in this class have the
 771              same representation in Emacs internal format.  */
 772           *dst++ = c1;
 773           break;
 774
 775         case ISO_carriage_return:
 776           if (coding->eol_type == CODING_EOL_CR)
 777             {
 778               *dst++ = '\n';
 779             }
 780           else if (coding->eol_type == CODING_EOL_CRLF)
 781             {
 782               ONE_MORE_BYTE (c1);
 783               if (c1 == ISO_CODE_LF)
 784                 *dst++ = '\n';
 785               else
 786                 {
 787                   src--;
 788                   *dst++ = c1;
 789                 }
 790             }
 791           else
 792             {
 793               *dst++ = c1;
 794             }
 795           break;
 796
 797         case ISO_shift_out:
 798           if (CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
 799             goto label_invalid_escape_sequence;
 800           CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
 801           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
 802           break;
 803
 804         case ISO_shift_in:
 805           CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
 806           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
 807           break;
 808
 809         case ISO_single_shift_2_7:
 810         case ISO_single_shift_2:
 811           /* SS2 is handled as an escape sequence of ESC 'N' */
 812           c1 = 'N';
 813           goto label_escape_sequence;
 814
 815         case ISO_single_shift_3:
 816           /* SS2 is handled as an escape sequence of ESC 'O' */
 817           c1 = 'O';
 818           goto label_escape_sequence;
 819
 820         case ISO_control_sequence_introducer:
 821           /* CSI is handled as an escape sequence of ESC '[' ...  */
 822           c1 = '[';
 823           goto label_escape_sequence;
 824
 825         case ISO_escape:
 826           ONE_MORE_BYTE (c1);
 827         label_escape_sequence:
 828           /* Escape sequences handled by Emacs are invocation,
 829              designation, direction specification, and character
 830              composition specification.  */
 831           switch (c1)
 832             {
 833             case '&':           /* revision of following character set */
 834               ONE_MORE_BYTE (c1);
 835               if (!(c1 >= '@' && c1 <= '~'))
 836                 goto label_invalid_escape_sequence;
 837               ONE_MORE_BYTE (c1);
 838               if (c1 != ISO_CODE_ESC)
 839                 goto label_invalid_escape_sequence;
 840               ONE_MORE_BYTE (c1);
 841               goto label_escape_sequence;
 842
 843             case '$':           /* designation of 2-byte character set */
 844               ONE_MORE_BYTE (c1);
 845               if (c1 >= '@' && c1 <= 'B')
 846                 {       /* designation of JISX0208.1978, GB2312.1980,
 847                                    or JISX0208.1980 */
 848                   DECODE_DESIGNATION (0, 2, 94, c1);
 849                 }
 850               else if (c1 >= 0x28 && c1 <= 0x2B)
 851                 {       /* designation of DIMENSION2_CHARS94 character set */
 852                   ONE_MORE_BYTE (c2);
 853                   DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
 854                 }
 855               else if (c1 >= 0x2C && c1 <= 0x2F)
 856                 {       /* designation of DIMENSION2_CHARS96 character set */
 857                   ONE_MORE_BYTE (c2);
 858                   DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
 859                 }
 860               else
 861                 goto label_invalid_escape_sequence;
 862               break;
 863
 864             case 'n':           /* invocation of locking-shift-2 */
 865               if (CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
 866                 goto label_invalid_escape_sequence;
 867               CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
 868               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
 869               break;
 870
 871             case 'o':           /* invocation of locking-shift-3 */
 872               if (CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
 873                 goto label_invalid_escape_sequence;
 874               CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
 875               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
 876               break;
 877
 878             case 'N':           /* invocation of single-shift-2 */
 879               if (CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
 880                 goto label_invalid_escape_sequence;
 881               ONE_MORE_BYTE (c1);
 882               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
 883               DECODE_ISO_CHARACTER (charset, c1);
 884               break;
 885
 886             case 'O':           /* invocation of single-shift-3 */
 887               if (CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
 888                 goto label_invalid_escape_sequence;
 889               ONE_MORE_BYTE (c1);
 890               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
 891               DECODE_ISO_CHARACTER (charset, c1);
 892               break;
 893
 894             case '0':           /* start composing without embeded rules */
 895               coding->composing = COMPOSING_NO_RULE_HEAD;
 896               break;
 897
 898             case '1':           /* end composing */
 899               coding->composing = COMPOSING_NO;
 900               break;
 901
 902             case '2':           /* start composing with embeded rules */
 903               coding->composing = COMPOSING_WITH_RULE_HEAD;
 904               break;
 905
 906             case '[':           /* specification of direction */
 907               /* For the moment, nested direction is not supported.
 908                  So, the value of `coding->direction' is 0 or 1: 0
 909                  means left-to-right, 1 means right-to-left.  */
 910               ONE_MORE_BYTE (c1);
 911               switch (c1)
 912                 {
 913                 case ']':       /* end of the current direction */
 914                   coding->direction = 0;
 915
 916                 case '0':       /* end of the current direction */
 917                 case '1':       /* start of left-to-right direction */
 918                   ONE_MORE_BYTE (c1);
 919                   if (c1 == ']')
 920                     coding->direction = 0;
 921                   else
 922                     goto label_invalid_escape_sequence;
 923                   break;
 924
 925                 case '2':       /* start of right-to-left direction */
 926                   ONE_MORE_BYTE (c1);
 927                   if (c1 == ']')
 928                     coding->direction= 1;
 929                   else
 930                     goto label_invalid_escape_sequence;
 931                   break;
 932
 933                 default:
 934                   goto label_invalid_escape_sequence;
 935                 }
 936               break;
 937
 938             default:
 939               if (c1 >= 0x28 && c1 <= 0x2B)
 940                 {       /* designation of DIMENSION1_CHARS94 character set */
 941                   ONE_MORE_BYTE (c2);
 942                   DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
 943                 }
 944               else if (c1 >= 0x2C && c1 <= 0x2F)
 945                 {       /* designation of DIMENSION1_CHARS96 character set */
 946                   ONE_MORE_BYTE (c2);
 947                   DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
 948                 }
 949               else
 950                 {
 951                   goto label_invalid_escape_sequence;
 952                 }
 953             }
 954           /* We must update these variables now.  */
 955           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
 956           charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
 957           break;
 958
 959         label_invalid_escape_sequence:
 960           {
 961             int length = src - src_base;
 962
 963             bcopy (src_base, dst, length);
 964             dst += length;
 965           }
 966         }
 967       continue;
 968
 969     label_end_of_loop:
 970       coding->carryover_size = src - src_base;
 971       bcopy (src_base, coding->carryover, coding->carryover_size);
 972       src = src_base;
 973       break;
 974     }
 975
 976   /* If this is the last block of the text to be decoded, we had
 977      better just flush out all remaining codes in the text although
 978      they are not valid characters.  */
 979   if (coding->last_block)
 980     {
 981       bcopy (src, dst, src_end - src);
 982       dst += (src_end - src);
 983       src = src_end;
 984     }
 985   *consumed = src - source;
 986   return dst - destination;
 987 }
 988
 989 /* ISO2022 encoding staffs.  */
 990
 991 /*
 992    It is not enough to say just "ISO2022" on encoding, but we have to
 993    specify more details.  In Emacs, each coding-system of ISO2022
 994    variant has the following specifications:
 995         1. Initial designation to G0 thru G3.
 996         2. Allows short-form designation?
 997         3. ASCII should be designated to G0 before control characters?
 998         4. ASCII should be designated to G0 at end of line?
 999         5. 7-bit environment or 8-bit environment?
1000         6. Use locking-shift?
1001         7. Use Single-shift?
1002    And the following two are only for Japanese:
1003         8. Use ASCII in place of JIS0201-1976-Roman?
1004         9. Use JISX0208-1983 in place of JISX0208-1978?
1005    These specifications are encoded in `coding->flags' as flag bits
1006    defined by macros CODING_FLAG_ISO_XXX.  See `coding.h' for more
1007    detail.
1008 */
1009
1010 /* Produce codes (escape sequence) for designating CHARSET to graphic
1011    register REG.  If <final-char> of CHARSET is '@', 'A', or 'B' and
1012    the coding system CODING allows, produce designation sequence of
1013    short-form.  */
1014
1015 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
1016   do {                                                                  \
1017     unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset);        \
1018     char *intermediate_char_94 = "()*+";                                \
1019     char *intermediate_char_96 = ",-./";                                \
1020     Lisp_Object temp                                                    \
1021       = Fassq (make_number (charset), Vcharset_revision_alist);         \
1022     if (! NILP (temp))                                                  \
1023         {                                                               \
1024         *dst++ = ISO_CODE_ESC;                                          \
1025         *dst++ = '&';                                                   \
1026         *dst++ = XINT (XCONS (temp)->cdr) + '@';                        \
1027       }                                                                 \
1028     *dst++ = ISO_CODE_ESC;                                              \
1029     if (CHARSET_DIMENSION (charset) == 1)                               \
1030       {                                                                 \
1031         if (CHARSET_CHARS (charset) == 94)                              \
1032           *dst++ = (unsigned char) (intermediate_char_94[reg]);         \
1033         else                                                            \
1034           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1035       }                                                                 \
1036     else                                                                \
1037       {                                                                 \
1038         *dst++ = '$';                                                   \
1039         if (CHARSET_CHARS (charset) == 94)                              \
1040           {                                                             \
1041             if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM)          \
1042                 || reg != 0                                             \
1043                 || final_char < '@' || final_char > 'B')                \
1044               *dst++ = (unsigned char) (intermediate_char_94[reg]);     \
1045           }                                                             \
1046         else                                                            \
1047           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1048       }                                                                 \
1049     *dst++ = final_char;                                                \
1050     CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;                \
1051   } while (0)
1052
1053 /* The following two macros produce codes (control character or escape
1054    sequence) for ISO2022 single-shift functions (single-shift-2 and
1055    single-shift-3).  */
1056
1057 #define ENCODE_SINGLE_SHIFT_2                           \
1058   do {                                                  \
1059     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1060       *dst++ = ISO_CODE_ESC, *dst++ = 'N';              \
1061     else                                                \
1062       *dst++ = ISO_CODE_SS2;                            \
1063     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1064   } while (0)
1065
1066 #define ENCODE_SINGLE_SHIFT_3                           \
1067   do {                                                  \
1068     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1069       *dst++ = ISO_CODE_ESC, *dst++ = 'O';              \
1070     else                                                \
1071       *dst++ = ISO_CODE_SS3;                            \
1072     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1073   } while (0)
1074
1075 /* The following four macros produce codes (control character or
1076    escape sequence) for ISO2022 locking-shift functions (shift-in,
1077    shift-out, locking-shift-2, and locking-shift-3).  */
1078
1079 #define ENCODE_SHIFT_IN                         \
1080   do {                                          \
1081     *dst++ = ISO_CODE_SI;                       \
1082     CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
1083   } while (0)
1084
1085 #define ENCODE_SHIFT_OUT                        \
1086   do {                                          \
1087     *dst++ = ISO_CODE_SO;                       \
1088     CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
1089   } while (0)
1090
1091 #define ENCODE_LOCKING_SHIFT_2                  \
1092   do {                                          \
1093     *dst++ = ISO_CODE_ESC, *dst++ = 'n';        \
1094     CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
1095   } while (0)
1096
1097 #define ENCODE_LOCKING_SHIFT_3                  \
1098   do {                                          \
1099     *dst++ = ISO_CODE_ESC, *dst++ = 'o';        \
1100     CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
1101   } while (0)
1102
1103 /* Produce codes for a DIMENSION1 character of which character set is
1104    CHARSET and position-code is C1.  Designation and invocation
1105    sequences are also produced in advance if necessary.  */
1106
1107
1108 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
1109   do {                                                                  \
1110     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1111       {                                                                 \
1112         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1113           *dst++ = c1 & 0x7F;                                           \
1114         else                                                            \
1115           *dst++ = c1 | 0x80;                                           \
1116         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1117         break;                                                          \
1118       }                                                                 \
1119     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1120       {                                                                 \
1121         *dst++ = c1 & 0x7F;                                             \
1122         break;                                                          \
1123       }                                                                 \
1124     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1125       {                                                                 \
1126         *dst++ = c1 | 0x80;                                             \
1127         break;                                                          \
1128       }                                                                 \
1129     else                                                                \
1130       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1131          must invoke it, or, at first, designate it to some graphic     \
1132          register.  Then repeat the loop to actually produce the        \
1133          character.  */                                                 \
1134       dst = encode_invocation_designation (charset, coding, dst);       \
1135   } while (1)
1136
1137 /* Produce codes for a DIMENSION2 character of which character set is
1138    CHARSET and position-codes are C1 and C2.  Designation and
1139    invocation codes are also produced in advance if necessary.  */
1140
1141 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
1142   do {                                                                  \
1143     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1144       {                                                                 \
1145         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1146           *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F;                       \
1147         else                                                            \
1148           *dst++ = c1 | 0x80, *dst++ = c2 | 0x80;                       \
1149         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1150         break;                                                          \
1151       }                                                                 \
1152     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1153       {                                                                 \
1154         *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F;                          \
1155         break;                                                          \
1156       }                                                                 \
1157     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1158       {                                                                 \
1159         *dst++ = c1 | 0x80, *dst++= c2 | 0x80;                          \
1160         break;                                                          \
1161       }                                                                 \
1162     else                                                                \
1163       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1164          must invoke it, or, at first, designate it to some graphic     \
1165          register.  Then repeat the loop to actually produce the        \
1166          character.  */                                                 \
1167       dst = encode_invocation_designation (charset, coding, dst);       \
1168   } while (1)
1169
1170 /* Produce designation and invocation codes at a place pointed by DST
1171    to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
1172    Return new DST.  */
1173
1174 unsigned char *
1175 encode_invocation_designation (charset, coding, dst)
1176      int charset;
1177      struct coding_system *coding;
1178      unsigned char *dst;
1179 {
1180   int reg;                      /* graphic register number */
1181
1182   /* At first, check designations.  */
1183   for (reg = 0; reg < 4; reg++)
1184     if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
1185       break;
1186
1187   if (reg >= 4)
1188     {
1189       /* CHARSET is not yet designated to any graphic registers.  */
1190       /* At first check the requested designation.  */
1191       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1192       if (reg < 0)
1193         /* Since CHARSET requests no special designation, designate to
1194            graphic register 0.  */
1195         reg = 0;
1196
1197       ENCODE_DESIGNATION (charset, reg, coding);
1198     }
1199
1200   if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
1201       && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
1202     {
1203       /* Since the graphic register REG is not invoked to any graphic
1204          planes, invoke it to graphic plane 0.  */
1205       switch (reg)
1206         {
1207         case 0:                 /* graphic register 0 */
1208           ENCODE_SHIFT_IN;
1209           break;
1210
1211         case 1:                 /* graphic register 1 */
1212           ENCODE_SHIFT_OUT;
1213           break;
1214
1215         case 2:                 /* graphic register 2 */
1216           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1217             ENCODE_SINGLE_SHIFT_2;
1218           else
1219             ENCODE_LOCKING_SHIFT_2;
1220           break;
1221
1222         case 3:                 /* graphic register 3 */
1223           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1224             ENCODE_SINGLE_SHIFT_3;
1225           else
1226             ENCODE_LOCKING_SHIFT_3;
1227           break;
1228         }
1229     }
1230   return dst;
1231 }
1232
1233 /* The following two macros produce codes for indicating composition.  */
1234 #define ENCODE_COMPOSITION_NO_RULE_START  *dst++ = ISO_CODE_ESC, *dst++ = '0'
1235 #define ENCODE_COMPOSITION_WITH_RULE_START  *dst++ = ISO_CODE_ESC, *dst++ = '2'
1236 #define ENCODE_COMPOSITION_END    *dst++ = ISO_CODE_ESC, *dst++ = '1'
1237
1238 /* The following three macros produce codes for indicating direction
1239    of text.  */
1240 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER              \
1241   do {                                                  \
1242     if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS)    \
1243       *dst++ = ISO_CODE_ESC, *dst++ = '[';              \
1244     else                                                \
1245       *dst++ = ISO_CODE_CSI;                            \
1246   } while (0)
1247
1248 #define ENCODE_DIRECTION_R2L    \
1249   ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '2', *dst++ = ']'
1250
1251 #define ENCODE_DIRECTION_L2R    \
1252   ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '0', *dst++ = ']'
1253
1254 /* Produce codes for designation and invocation to reset the graphic
1255    planes and registers to initial state.  */
1256 #define ENCODE_RESET_PLANE_AND_REGISTER                                     \
1257   do {                                                                      \
1258     int reg;                                                                \
1259     if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                        \
1260       ENCODE_SHIFT_IN;                                                      \
1261     for (reg = 0; reg < 4; reg++)                                           \
1262       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0            \
1263           && (CODING_SPEC_ISO_DESIGNATION (coding, reg)                     \
1264               != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)))        \
1265         ENCODE_DESIGNATION                                                  \
1266           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
1267   } while (0)
1268
1269 int
1270 encode_designation_at_bol (coding, src, src_end, dstp)
1271      struct coding_system *coding;
1272      unsigned char *src, *src_end, **dstp;
1273 {
1274   int charset, reg, r[4];
1275   unsigned char *dst = *dstp, c;
1276   for (reg = 0; reg < 4; reg++) r[reg] = -1;
1277   while (src < src_end && (c = *src++) != '\n')
1278     {
1279       switch (emacs_code_class[c])
1280         {
1281         case EMACS_ascii_code:
1282           charset = CHARSET_ASCII;
1283           break;
1284         case EMACS_leading_code_2:
1285           if (++src >= src_end) continue;
1286           charset = c;
1287           break;
1288         case EMACS_leading_code_3:
1289           if ((src += 2) >= src_end) continue;
1290           charset =  (c < LEADING_CODE_PRIVATE_11 ? c : *(src - 2));
1291           break;
1292         case EMACS_leading_code_4:
1293           if ((src += 3) >= src_end) continue;
1294           charset = *(src - 3);
1295           break;
1296         default:
1297           continue;
1298         }
1299       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1300       if (r[reg] < 0
1301           && CODING_SPEC_ISO_DESIGNATION (coding, reg) != charset)
1302         r[reg] = charset;
1303     }
1304   if (c != '\n' && !coding->last_block)
1305     return -1;
1306   for (reg = 0; reg < 4; reg++)
1307     if (r[reg] >= 0)
1308       ENCODE_DESIGNATION (r[reg], reg, coding);
1309   *dstp = dst;
1310   return 0;
1311 }
1312
1313 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
1314
1315 int
1316 encode_coding_iso2022 (coding, source, destination,
1317                        src_bytes, dst_bytes, consumed)
1318      struct coding_system *coding;
1319      unsigned char *source, *destination;
1320      int src_bytes, dst_bytes;
1321      int *consumed;
1322 {
1323   unsigned char *src = source;
1324   unsigned char *src_end = source + src_bytes;
1325   unsigned char *dst = destination;
1326   unsigned char *dst_end = destination + dst_bytes;
1327   /* Since the maximum bytes produced by each loop is 20, we subtract 19
1328      from DST_END to assure overflow checking is necessary only at the
1329      head of loop.  */
1330   unsigned char *adjusted_dst_end = dst_end - 19;
1331
1332   while (src < src_end && dst < adjusted_dst_end)
1333     {
1334       /* SRC_BASE remembers the start position in source in each loop.
1335          The loop will be exited when there's not enough source text
1336          to analyze multi-byte codes (within macros ONE_MORE_BYTE,
1337          TWO_MORE_BYTES, and THREE_MORE_BYTES).  In that case, SRC is
1338          reset to SRC_BASE before exiting.  */
1339       unsigned char *src_base = src;
1340       unsigned char c1, c2, c3, c4;
1341       int charset;
1342
1343       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
1344           && CODING_SPEC_ISO_BOL (coding))
1345         {
1346           /* We have to produce destination sequences now.  */
1347           if (encode_designation_at_bol (coding, src, src_end, &dst) < 0)
1348             /* We can't find end of line in the current block.  Let's
1349              repeat encoding starting from the current position
1350              pointed by SRC.  */
1351             break;
1352           CODING_SPEC_ISO_BOL (coding) = 0;
1353         }
1354
1355       c1 = *src++;
1356       /* If we are seeing a component of a composite character, we are
1357          seeing a leading-code specially encoded for composition, or a
1358          composition rule if composing with rule.  We must set C1
1359          to a normal leading-code or an ASCII code.  If we are not at
1360          a composed character, we must reset the composition state.  */
1361       if (COMPOSING_P (coding->composing))
1362         {
1363           if (c1 < 0xA0)
1364             {
1365               /* We are not in a composite character any longer.  */
1366               coding->composing = COMPOSING_NO;
1367               ENCODE_COMPOSITION_END;
1368             }
1369           else
1370             {
1371               if (coding->composing == COMPOSING_WITH_RULE_RULE)
1372                 {
1373                   *dst++ = c1 & 0x7F;
1374                   coding->composing = COMPOSING_WITH_RULE_HEAD;
1375                   continue;
1376                 }
1377               else if (coding->composing == COMPOSING_WITH_RULE_HEAD)
1378                 coding->composing = COMPOSING_WITH_RULE_RULE;
1379               if (c1 == 0xA0)
1380                 {
1381                   /* This is an ASCII component.  */
1382                   ONE_MORE_BYTE (c1);
1383                   c1 &= 0x7F;
1384                 }
1385               else
1386                 /* This is a leading-code of non ASCII component.  */
1387                 c1 -= 0x20;
1388             }
1389         }
1390
1391       /* Now encode one character.  C1 is a control character, an
1392          ASCII character, or a leading-code of multi-byte character.  */
1393       switch (emacs_code_class[c1])
1394         {
1395         case EMACS_ascii_code:
1396           ENCODE_ISO_CHARACTER_DIMENSION1 (CHARSET_ASCII, c1);
1397           break;
1398
1399         case EMACS_control_code:
1400           if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
1401             ENCODE_RESET_PLANE_AND_REGISTER;
1402           *dst++ = c1;
1403           break;
1404
1405         case EMACS_carriage_return_code:
1406           if (!coding->selective)
1407             {
1408               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
1409                 ENCODE_RESET_PLANE_AND_REGISTER;
1410               *dst++ = c1;
1411               break;
1412             }
1413           /* fall down to treat '\r' as '\n' ...  */
1414
1415         case EMACS_linefeed_code:
1416           if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
1417             ENCODE_RESET_PLANE_AND_REGISTER;
1418           if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
1419             bcopy (coding->spec.iso2022.initial_designation,
1420                    coding->spec.iso2022.current_designation,
1421                    sizeof coding->spec.iso2022.initial_designation);
1422           if (coding->eol_type == CODING_EOL_LF
1423               || coding->eol_type == CODING_EOL_AUTOMATIC)
1424             *dst++ = ISO_CODE_LF;
1425           else if (coding->eol_type == CODING_EOL_CRLF)
1426             *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
1427           else
1428             *dst++ = ISO_CODE_CR;
1429           CODING_SPEC_ISO_BOL (coding) = 1;
1430           break;
1431
1432         case EMACS_leading_code_2:
1433           ONE_MORE_BYTE (c2);
1434           ENCODE_ISO_CHARACTER_DIMENSION1 (c1, c2);
1435           break;
1436
1437         case EMACS_leading_code_3:
1438           TWO_MORE_BYTES (c2, c3);
1439           if (c1 < LEADING_CODE_PRIVATE_11)
1440             ENCODE_ISO_CHARACTER_DIMENSION2 (c1, c2, c3);
1441           else
1442             ENCODE_ISO_CHARACTER_DIMENSION1 (c2, c3);
1443           break;
1444
1445         case EMACS_leading_code_4:
1446           THREE_MORE_BYTES (c2, c3, c4);
1447           ENCODE_ISO_CHARACTER_DIMENSION2 (c2, c3, c4);
1448           break;
1449
1450         case EMACS_leading_code_composition:
1451           ONE_MORE_BYTE (c1);
1452           if (c1 == 0xFF)
1453             {
1454               coding->composing = COMPOSING_WITH_RULE_HEAD;
1455               ENCODE_COMPOSITION_WITH_RULE_START;
1456             }
1457           else
1458             {
1459               /* Rewind one byte because it is a character code of
1460                  composition elements.  */
1461               src--;
1462               coding->composing = COMPOSING_NO_RULE_HEAD;
1463               ENCODE_COMPOSITION_NO_RULE_START;
1464             }
1465           break;
1466
1467         case EMACS_invalid_code:
1468           *dst++ = c1;
1469           break;
1470         }
1471       continue;
1472     label_end_of_loop:
1473       coding->carryover_size = src - src_base;
1474       bcopy (src_base, coding->carryover, coding->carryover_size);
1475       src = src_base;
1476       break;
1477     }
1478
1479   /* If this is the last block of the text to be encoded, we must
1480      reset the state of graphic planes and registers to initial one.
1481      In addition, we had better just flush out all remaining codes in
1482      the text although they are not valid characters.  */
1483   if (coding->last_block)
1484     {
1485       ENCODE_RESET_PLANE_AND_REGISTER;
1486       bcopy(src, dst, src_end - src);
1487       dst += (src_end - src);
1488       src = src_end;
1489     }
1490   *consumed = src - source;
1491   return dst - destination;
1492 }
1493
1494 \f
1495 /*** 4. SJIS and BIG5 handlers ***/
1496
1497 /* Although SJIS and BIG5 are not ISO's coding system, They are used
1498    quite widely.  So, for the moment, Emacs supports them in the bare
1499    C code.  But, in the future, they may be supported only by CCL.  */
1500
1501 /* SJIS is a coding system encoding three character sets: ASCII, right
1502    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
1503    as is.  A character of charset katakana-jisx0201 is encoded by
1504    "position-code + 0x80".  A character of charset japanese-jisx0208
1505    is encoded in 2-byte but two position-codes are divided and shifted
1506    so that it fit in the range below.
1507
1508    --- CODE RANGE of SJIS ---
1509    (character set)      (range)
1510    ASCII                0x00 .. 0x7F
1511    KATAKANA-JISX0201    0xA0 .. 0xDF
1512    JISX0208 (1st byte)  0x80 .. 0x9F and 0xE0 .. 0xFF
1513             (2nd byte)  0x40 .. 0xFF
1514    -------------------------------
1515
1516 */
1517
1518 /* BIG5 is a coding system encoding two character sets: ASCII and
1519    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
1520    character set and is encoded in two-byte.
1521
1522    --- CODE RANGE of BIG5 ---
1523    (character set)      (range)
1524    ASCII                0x00 .. 0x7F
1525    Big5 (1st byte)      0xA1 .. 0xFE
1526         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
1527    --------------------------
1528
1529    Since the number of characters in Big5 is larger than maximum
1530    characters in Emacs' charset (96x96), it can't be handled as one
1531    charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
1532    and `charset-big5-2'.  Both are DIMENSION2 and CHARS94.  The former
1533    contains frequently used characters and the latter contains less
1534    frequently used characters.  */
1535
1536 /* Macros to decode or encode a character of Big5 in BIG5.  B1 and B2
1537    are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
1538    C1 and C2 are the 1st and 2nd position-codes of of Emacs' internal
1539    format.  CHARSET is `charset_big5_1' or `charset_big5_2'.  */
1540
1541 /* Number of Big5 characters which have the same code in 1st byte.  */
1542 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
1543
1544 #define DECODE_BIG5(b1, b2, charset, c1, c2)                            \
1545   do {                                                                  \
1546     unsigned int temp                                                   \
1547       = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62);   \
1548     if (b1 < 0xC9)                                                      \
1549       charset = charset_big5_1;                                         \
1550     else                                                                \
1551       {                                                                 \
1552         charset = charset_big5_2;                                       \
1553         temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW;                          \
1554       }                                                                 \
1555     c1 = temp / (0xFF - 0xA1) + 0x21;                                   \
1556     c2 = temp % (0xFF - 0xA1) + 0x21;                                   \
1557   } while (0)
1558
1559 #define ENCODE_BIG5(charset, c1, c2, b1, b2)                            \
1560   do {                                                                  \
1561     unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21);      \
1562     if (charset == charset_big5_2)                                      \
1563       temp += BIG5_SAME_ROW * (0xC9 - 0xA1);                            \
1564     b1 = temp / BIG5_SAME_ROW + 0xA1;                                   \
1565     b2 = temp % BIG5_SAME_ROW;                                          \
1566     b2 += b2 < 0x3F ? 0x40 : 0x62;                                      \
1567   } while (0)
1568
1569 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1570    Check if a text is encoded in SJIS.  If it is, return
1571    CODING_CATEGORY_MASK_SJIS, else return 0.  */
1572
1573 int
1574 detect_coding_sjis (src, src_end)
1575      unsigned char *src, *src_end;
1576 {
1577   unsigned char c;
1578
1579   while (src < src_end)
1580     {
1581       c = *src++;
1582       if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
1583         return 0;
1584       if ((c >= 0x80 && c < 0xA0) || c >= 0xE0)
1585         {
1586           if (src < src_end && *src++ < 0x40)
1587             return 0;
1588         }
1589     }
1590   return CODING_CATEGORY_MASK_SJIS;
1591 }
1592
1593 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1594    Check if a text is encoded in BIG5.  If it is, return
1595    CODING_CATEGORY_MASK_BIG5, else return 0.  */
1596
1597 int
1598 detect_coding_big5 (src, src_end)
1599      unsigned char *src, *src_end;
1600 {
1601   unsigned char c;
1602
1603   while (src < src_end)
1604     {
1605       c = *src++;
1606       if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
1607         return 0;
1608       if (c >= 0xA1)
1609         {
1610           if (src >= src_end)
1611             break;
1612           c = *src++;
1613           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
1614             return 0;
1615         }
1616     }
1617   return CODING_CATEGORY_MASK_BIG5;
1618 }
1619
1620 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
1621    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
1622
1623 int
1624 decode_coding_sjis_big5 (coding, source, destination,
1625                          src_bytes, dst_bytes, consumed, sjis_p)
1626      struct coding_system *coding;
1627      unsigned char *source, *destination;
1628      int src_bytes, dst_bytes;
1629      int *consumed;
1630      int sjis_p;
1631 {
1632   unsigned char *src = source;
1633   unsigned char *src_end = source + src_bytes;
1634   unsigned char *dst = destination;
1635   unsigned char *dst_end = destination + dst_bytes;
1636   /* Since the maximum bytes produced by each loop is 4, we subtract 3
1637      from DST_END to assure overflow checking is necessary only at the
1638      head of loop.  */
1639   unsigned char *adjusted_dst_end = dst_end - 3;
1640
1641   while (src < src_end && dst < adjusted_dst_end)
1642     {
1643       /* SRC_BASE remembers the start position in source in each loop.
1644          The loop will be exited when there's not enough source text
1645          to analyze two-byte character (within macro ONE_MORE_BYTE).
1646          In that case, SRC is reset to SRC_BASE before exiting.  */
1647       unsigned char *src_base = src;
1648       unsigned char c1 = *src++, c2, c3, c4;
1649
1650       if (c1 == '\r')
1651         {
1652           if (coding->eol_type == CODING_EOL_CRLF)
1653             {
1654               ONE_MORE_BYTE (c2);
1655               if (c2 == '\n')
1656                 *dst++ = c2;
1657               else
1658                 /* To process C2 again, SRC is subtracted by 1.  */
1659                 *dst++ = c1, src--;
1660             }
1661           else
1662             *dst++ = c1;
1663         }
1664       else if (c1 < 0x80)
1665         *dst++ = c1;
1666       else if (c1 < 0xA0 || c1 >= 0xE0)
1667         {
1668           /* SJIS -> JISX0208, BIG5 -> Big5 (only if 0xE0 <= c1 < 0xFF) */
1669           if (sjis_p)
1670             {
1671               ONE_MORE_BYTE (c2);
1672               DECODE_SJIS (c1, c2, c3, c4);
1673               DECODE_CHARACTER_DIMENSION2 (charset_jisx0208, c3, c4);
1674             }
1675           else if (c1 >= 0xE0 && c1 < 0xFF)
1676             {
1677               int charset;
1678
1679               ONE_MORE_BYTE (c2);
1680               DECODE_BIG5 (c1, c2, charset, c3, c4);
1681               DECODE_CHARACTER_DIMENSION2 (charset, c3, c4);
1682             }
1683           else                  /* Invalid code */
1684             *dst++ = c1;
1685         }
1686       else
1687         {
1688           /* SJIS -> JISX0201-Kana, BIG5 -> Big5 */
1689           if (sjis_p)
1690             DECODE_CHARACTER_DIMENSION1 (charset_katakana_jisx0201, c1);
1691           else
1692             {
1693               int charset;
1694
1695               ONE_MORE_BYTE (c2);
1696               DECODE_BIG5 (c1, c2, charset, c3, c4);
1697               DECODE_CHARACTER_DIMENSION2 (charset, c3, c4);
1698             }
1699         }
1700       continue;
1701
1702     label_end_of_loop:
1703       coding->carryover_size = src - src_base;
1704       bcopy (src_base, coding->carryover, coding->carryover_size);
1705       src = src_base;
1706       break;
1707     }
1708
1709   *consumed = src - source;
1710   return dst - destination;
1711 }
1712
1713 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
1714    This function can encode `charset_ascii', `charset_katakana_jisx0201',
1715    `charset_jisx0208', `charset_big5_1', and `charset_big5-2'.  We are
1716    sure that all these charsets are registered as official charset
1717    (i.e. do not have extended leading-codes).  Characters of other
1718    charsets are produced without any encoding.  If SJIS_P is 1, encode
1719    SJIS text, else encode BIG5 text.  */
1720
1721 int
1722 encode_coding_sjis_big5 (coding, source, destination,
1723                          src_bytes, dst_bytes, consumed, sjis_p)
1724      struct coding_system *coding;
1725      unsigned char *source, *destination;
1726      int src_bytes, dst_bytes;
1727      int *consumed;
1728      int sjis_p;
1729 {
1730   unsigned char *src = source;
1731   unsigned char *src_end = source + src_bytes;
1732   unsigned char *dst = destination;
1733   unsigned char *dst_end = destination + dst_bytes;
1734   /* Since the maximum bytes produced by each loop is 2, we subtract 1
1735      from DST_END to assure overflow checking is necessary only at the
1736      head of loop.  */
1737   unsigned char *adjusted_dst_end = dst_end - 1;
1738
1739   while (src < src_end && dst < adjusted_dst_end)
1740     {
1741       /* SRC_BASE remembers the start position in source in each loop.
1742          The loop will be exited when there's not enough source text
1743          to analyze multi-byte codes (within macros ONE_MORE_BYTE and
1744          TWO_MORE_BYTES).  In that case, SRC is reset to SRC_BASE
1745          before exiting.  */
1746       unsigned char *src_base = src;
1747       unsigned char c1 = *src++, c2, c3, c4;
1748
1749       if (coding->composing)
1750         {
1751           if (c1 == 0xA0)
1752             {
1753               ONE_MORE_BYTE (c1);
1754               c1 &= 0x7F;
1755             }
1756           else if (c1 >= 0xA0)
1757             c1 -= 0x20;
1758           else
1759             coding->composing = 0;
1760         }
1761
1762       switch (emacs_code_class[c1])
1763         {
1764         case EMACS_ascii_code:
1765         case EMACS_control_code:
1766           *dst++ = c1;
1767           break;
1768
1769         case EMACS_carriage_return_code:
1770           if (!coding->selective)
1771             {
1772               *dst++ = c1;
1773               break;
1774             }
1775           /* fall down to treat '\r' as '\n' ...  */
1776
1777         case EMACS_linefeed_code:
1778           if (coding->eol_type == CODING_EOL_LF
1779               || coding->eol_type == CODING_EOL_AUTOMATIC)
1780             *dst++ = '\n';
1781           else if (coding->eol_type == CODING_EOL_CRLF)
1782             *dst++ = '\r', *dst++ = '\n';
1783           else
1784             *dst++ = '\r';
1785           break;
1786
1787         case EMACS_leading_code_2:
1788           ONE_MORE_BYTE (c2);
1789           if (sjis_p && c1 == charset_katakana_jisx0201)
1790             *dst++ = c2;
1791           else
1792             *dst++ = c1, *dst++ = c2;
1793           break;
1794
1795         case EMACS_leading_code_3:
1796           TWO_MORE_BYTES (c2, c3);
1797           c2 &= 0x7F, c3 &= 0x7F;
1798           if (sjis_p && c1 == charset_jisx0208)
1799             {
1800               unsigned char s1, s2;
1801
1802               ENCODE_SJIS (c2, c3, s1, s2);
1803               *dst++ = s1, *dst++ = s2;
1804             }
1805           else if (!sjis_p && (c1 == charset_big5_1 || c1 == charset_big5_2))
1806             {
1807               unsigned char b1, b2;
1808
1809               ENCODE_BIG5 (c1, c2, c3, b1, b2);
1810               *dst++ = b1, *dst++ = b2;
1811             }
1812           else
1813             *dst++ = c1, *dst++ = c2, *dst++ = c3;
1814           break;
1815
1816         case EMACS_leading_code_4:
1817           THREE_MORE_BYTES (c2, c3, c4);
1818           *dst++ = c1, *dst++ = c2, *dst++ = c3, *dst++ = c4;
1819           break;
1820
1821         case EMACS_leading_code_composition:
1822           coding->composing = 1;
1823           break;
1824
1825         default:                /* i.e. case EMACS_invalid_code: */
1826           *dst++ = c1;
1827         }
1828       continue;
1829
1830     label_end_of_loop:
1831       coding->carryover_size = src - src_base;
1832       bcopy (src_base, coding->carryover, coding->carryover_size);
1833       src = src_base;
1834       break;
1835     }
1836
1837   *consumed = src - source;
1838   return dst - destination;
1839 }
1840
1841 \f
1842 /*** 5. End-of-line handlers ***/
1843
1844 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
1845    This function is called only when `coding->eol_type' is
1846    CODING_EOL_CRLF or CODING_EOL_CR.  */
1847
1848 decode_eol (coding, source, destination, src_bytes, dst_bytes, consumed)
1849      struct coding_system *coding;
1850      unsigned char *source, *destination;
1851      int src_bytes, dst_bytes;
1852      int *consumed;
1853 {
1854   unsigned char *src = source;
1855   unsigned char *src_end = source + src_bytes;
1856   unsigned char *dst = destination;
1857   unsigned char *dst_end = destination + dst_bytes;
1858   int produced;
1859
1860   switch (coding->eol_type)
1861     {
1862     case CODING_EOL_CRLF:
1863       {
1864         /* Since the maximum bytes produced by each loop is 2, we
1865            subtract 1 from DST_END to assure overflow checking is
1866            necessary only at the head of loop.  */
1867         unsigned char *adjusted_dst_end = dst_end - 1;
1868
1869         while (src < src_end && dst < adjusted_dst_end)
1870           {
1871             unsigned char *src_base = src;
1872             unsigned char c = *src++;
1873             if (c == '\r')
1874               {
1875                 ONE_MORE_BYTE (c);
1876                 if (c != '\n')
1877                   *dst++ = '\r';
1878                 *dst++ = c;
1879               }
1880             else
1881               *dst++ = c;
1882             continue;
1883
1884           label_end_of_loop:
1885             coding->carryover_size = src - src_base;
1886             bcopy (src_base, coding->carryover, coding->carryover_size);
1887             src = src_base;
1888             break;
1889           }
1890         *consumed = src - source;
1891         produced = dst - destination;
1892         break;
1893       }
1894
1895     case CODING_EOL_CR:
1896       produced = (src_bytes > dst_bytes) ? dst_bytes : src_bytes;
1897       bcopy (source, destination, produced);
1898       dst_end = destination + produced;
1899       while (dst < dst_end)
1900         if (*dst++ == '\r') dst[-1] = '\n';
1901       *consumed = produced;
1902       break;
1903
1904     default:                    /* i.e. case: CODING_EOL_LF */
1905       produced = (src_bytes > dst_bytes) ? dst_bytes : src_bytes;
1906       bcopy (source, destination, produced);
1907       *consumed = produced;
1908       break;
1909     }
1910
1911   return produced;
1912 }
1913
1914 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  Encode
1915    format of end-of-line according to `coding->eol_type'.  If
1916    `coding->selective' is 1, code '\r' in source text also means
1917    end-of-line.  */
1918
1919 encode_eol (coding, source, destination, src_bytes, dst_bytes, consumed)
1920      struct coding_system *coding;
1921      unsigned char *source, *destination;
1922      int src_bytes, dst_bytes;
1923      int *consumed;
1924 {
1925   unsigned char *src = source;
1926   unsigned char *dst = destination;
1927   int produced;
1928
1929   if (src_bytes <= 0)
1930     return 0;
1931
1932   switch (coding->eol_type)
1933     {
1934     case CODING_EOL_LF:
1935     case CODING_EOL_AUTOMATIC:
1936       produced = (src_bytes > dst_bytes) ? dst_bytes : src_bytes;
1937       bcopy (source, destination, produced);
1938       if (coding->selective)
1939         {
1940           int i = produced;
1941           while (i--)
1942             if (*dst++ == '\r') dst[-1] = '\n';
1943         }
1944       *consumed = produced;
1945
1946     case CODING_EOL_CRLF:
1947       {
1948         unsigned char c;
1949         unsigned char *src_end = source + src_bytes;
1950         unsigned char *dst_end = destination + dst_bytes;
1951         /* Since the maximum bytes produced by each loop is 2, we
1952            subtract 1 from DST_END to assure overflow checking is
1953            necessary only at the head of loop.  */
1954         unsigned char *adjusted_dst_end = dst_end - 1;
1955
1956         while (src < src_end && dst < adjusted_dst_end)
1957           {
1958             c = *src++;
1959             if (c == '\n' || (c == '\r' && coding->selective))
1960               *dst++ = '\r', *dst++ = '\n';
1961             else
1962               *dst++ = c;
1963           }
1964         produced = dst - destination;
1965         *consumed = src - source;
1966         break;
1967       }
1968
1969     default:                    /* i.e. case CODING_EOL_CR: */
1970       produced = (src_bytes > dst_bytes) ? dst_bytes : src_bytes;
1971       bcopy (source, destination, produced);
1972       {
1973         int i = produced;
1974         while (i--)
1975           if (*dst++ == '\n') dst[-1] = '\r';
1976       }
1977       *consumed = produced;
1978     }
1979
1980   return produced;
1981 }
1982
1983 \f
1984 /*** 6. C library functions ***/
1985
1986 /* In Emacs Lisp, coding system is represented by a Lisp symbol which
1987    has a property `coding-system'.  The value of this property is a
1988    vector of length 5 (called as coding-vector).  Among elements of
1989    this vector, the first (element[0]) and the fifth (element[4])
1990    carry important information for decoding/encoding.  Before
1991    decoding/encoding, this information should be set in fields of a
1992    structure of type `coding_system'.
1993
1994    A value of property `coding-system' can be a symbol of another
1995    subsidiary coding-system.  In that case, Emacs gets coding-vector
1996    from that symbol.
1997
1998    `element[0]' contains information to be set in `coding->type'.  The
1999    value and its meaning is as follows:
2000
2001    0 -- coding_system_internal
2002    1 -- coding_system_sjis
2003    2 -- coding_system_iso2022
2004    3 -- coding_system_big5
2005    4 -- coding_system_ccl
2006    nil -- coding_system_no_conversion
2007    t -- coding_system_automatic
2008
2009    `element[4]' contains information to be set in `coding->flags' and
2010    `coding->spec'.  The meaning varies by `coding->type'.
2011
2012    If `coding->type' is `coding_type_iso2022', element[4] is a vector
2013    of length 32 (of which the first 13 sub-elements are used now).
2014    Meanings of these sub-elements are:
2015
2016    sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
2017         If the value is an integer of valid charset, the charset is
2018         assumed to be designated to graphic register N initially.
2019
2020         If the value is minus, it is a minus value of charset which
2021         reserves graphic register N, which means that the charset is
2022         not designated initially but should be designated to graphic
2023         register N just before encoding a character in that charset.
2024
2025         If the value is nil, graphic register N is never used on
2026         encoding.
2027
2028    sub-element[N] where N is 4 through 11: to be set in `coding->flags'
2029         Each value takes t or nil.  See the section ISO2022 of
2030         `coding.h' for more information.
2031
2032    If `coding->type' is `coding_type_big5', element[4] is t to denote
2033    BIG5-ETen or nil to denote BIG5-HKU.
2034
2035    If `coding->type' takes the other value, element[4] is ignored.
2036
2037    Emacs Lisp's coding system also carries information about format of
2038    end-of-line in a value of property `eol-type'.  If the value is
2039    integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
2040    means CODING_EOL_CR.  If it is not integer, it should be a vector
2041    of subsidiary coding systems of which property `eol-type' has one
2042    of above values.
2043
2044 */
2045
2046 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
2047    and set it in CODING.  If CODING_SYSTEM_SYMBOL is invalid, CODING
2048    is setup so that no conversion is necessary and return -1, else
2049    return 0.  */
2050
2051 int
2052 setup_coding_system (coding_system, coding)
2053      Lisp_Object coding_system;
2054      struct coding_system *coding;
2055 {
2056   Lisp_Object type, eol_type;
2057
2058   /* At first, set several fields default values.  */
2059   coding->require_flushing = 0;
2060   coding->last_block = 0;
2061   coding->selective = 0;
2062   coding->composing = 0;
2063   coding->direction = 0;
2064   coding->carryover_size = 0;
2065   coding->post_read_conversion = coding->pre_write_conversion = Qnil;
2066
2067   Vlast_coding_system_used = coding->symbol = coding_system;
2068   eol_type = Qnil;
2069   /* Get value of property `coding-system' until we get a vector.
2070      While doing that, also get values of properties
2071      `post-read-conversion', `pre-write-conversion', and `eol-type'.  */
2072   while (!NILP (coding_system) && SYMBOLP (coding_system))
2073     {
2074       if (NILP (coding->post_read_conversion))
2075         coding->post_read_conversion = Fget (coding_system,
2076                                              Qpost_read_conversion);
2077       if (NILP (coding->pre_write_conversion))
2078         coding->pre_write_conversion = Fget (coding_system,
2079                                              Qpre_write_conversion);
2080       if (NILP (eol_type))
2081         eol_type = Fget (coding_system, Qeol_type);
2082       coding_system = Fget (coding_system, Qcoding_system);
2083     }
2084   if (!VECTORP (coding_system)
2085       || XVECTOR (coding_system)->size != 5)
2086     goto label_invalid_coding_system;
2087
2088   if (VECTORP (eol_type))
2089     coding->eol_type = CODING_EOL_AUTOMATIC;
2090   else if (XFASTINT (eol_type) == 1)
2091     coding->eol_type = CODING_EOL_CRLF;
2092   else if (XFASTINT (eol_type) == 2)
2093     coding->eol_type = CODING_EOL_CR;
2094   else
2095     coding->eol_type = CODING_EOL_LF;
2096
2097   type = XVECTOR (coding_system)->contents[0];
2098   switch (XFASTINT (type))
2099     {
2100     case 0:
2101       coding->type = coding_type_internal;
2102       break;
2103
2104     case 1:
2105       coding->type = coding_type_sjis;
2106       break;
2107
2108     case 2:
2109       coding->type = coding_type_iso2022;
2110       {
2111         Lisp_Object val = XVECTOR (coding_system)->contents[4];
2112         Lisp_Object *flags;
2113         int i, charset, default_reg_bits = 0;
2114
2115         if (!VECTORP (val) || XVECTOR (val)->size != 32)
2116           goto label_invalid_coding_system;
2117
2118         flags = XVECTOR (val)->contents;
2119         coding->flags
2120           = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
2121              | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
2122              | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
2123              | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
2124              | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
2125              | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
2126              | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
2127              | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
2128              | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
2129              | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
2130              | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL));
2131
2132         /* Invoke graphic register 0 to plane 0.  */
2133         CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
2134         /* Invoke graphic register 1 to plane 1 if we can use full 8-bit.  */
2135         CODING_SPEC_ISO_INVOCATION (coding, 1)
2136           = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
2137         /* Not single shifting at first.  */
2138         CODING_SPEC_ISO_SINGLE_SHIFTING(coding) = 0;
2139         /* Beginning of buffer should also be regarded as bol. */
2140         CODING_SPEC_ISO_BOL(coding) = 1;
2141
2142         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
2143            FLAGS[REG] can be one of below:
2144                 integer CHARSET: CHARSET occupies register I,
2145                 t: designate nothing to REG initially, but can be used
2146                   by any charsets,
2147                 list of integer, nil, or t: designate the first
2148                   element (if integer) to REG initially, the remaining
2149                   elements (if integer) is designated to REG on request,
2150                   if an element is t, REG can be used by any charset,
2151                 nil: REG is never used.  */
2152         for (charset = 0; charset <= MAX_CHARSET; charset++)
2153           CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = -1;
2154         for (i = 0; i < 4; i++)
2155           {
2156             if (INTEGERP (flags[i])
2157                 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))
2158                 || (charset = get_charset_id (flags[i])) >= 0)
2159               {
2160                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
2161                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
2162               }
2163             else if (EQ (flags[i], Qt))
2164               {
2165                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2166                 default_reg_bits |= 1 << i;
2167               }
2168             else if (CONSP (flags[i]))
2169               {
2170                 Lisp_Object tail = flags[i];
2171
2172                 if (INTEGERP (XCONS (tail)->car)
2173                     && (charset = XINT (XCONS (tail)->car),
2174                         CHARSET_VALID_P (charset))
2175                     || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
2176                   {
2177                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
2178                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
2179                   }
2180                 else
2181                   CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2182                 tail = XCONS (tail)->cdr;
2183                 while (CONSP (tail))
2184                   {
2185                     if (INTEGERP (XCONS (tail)->car)
2186                         && (charset = XINT (XCONS (tail)->car),
2187                             CHARSET_VALID_P (charset))
2188                         || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
2189                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
2190                         = i;
2191                     else if (EQ (XCONS (tail)->car, Qt))
2192                       default_reg_bits |= 1 << i;
2193                     tail = XCONS (tail)->cdr;
2194                   }
2195               }
2196             else
2197               CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2198
2199             CODING_SPEC_ISO_DESIGNATION (coding, i)
2200               = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
2201           }
2202
2203         if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
2204           {
2205             /* REG 1 can be used only by locking shift in 7-bit env.  */
2206             if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
2207               default_reg_bits &= ~2;
2208             if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
2209               /* Without any shifting, only REG 0 and 1 can be used.  */
2210               default_reg_bits &= 3;
2211           }
2212
2213         for (charset = 0; charset <= MAX_CHARSET; charset++)
2214           if (CHARSET_VALID_P (charset)
2215               && CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) < 0)
2216             {
2217               /* We have not yet decided where to designate CHARSET.  */
2218               int reg_bits = default_reg_bits;
2219
2220               if (CHARSET_CHARS (charset) == 96)
2221                 /* A charset of CHARS96 can't be designated to REG 0.  */
2222                 reg_bits &= ~1;
2223
2224               if (reg_bits)
2225                 /* There exist some default graphic register.  */
2226                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
2227                   = (reg_bits & 1
2228                      ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
2229               else
2230                 /* We anyway have to designate CHARSET to somewhere.  */
2231                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
2232                   = (CHARSET_CHARS (charset) == 94
2233                      ? 0
2234                      : ((coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT
2235                          || ! coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
2236                         ? 1
2237                         : (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT
2238                            ? 2 : 0)));
2239             }
2240       }
2241       coding->require_flushing = 1;
2242       break;
2243
2244     case 3:
2245       coding->type = coding_type_big5;
2246       coding->flags
2247         = (NILP (XVECTOR (coding_system)->contents[4])
2248            ? CODING_FLAG_BIG5_HKU
2249            : CODING_FLAG_BIG5_ETEN);
2250       break;
2251
2252     case 4:
2253       coding->type = coding_type_ccl;
2254       {
2255         Lisp_Object val = XVECTOR (coding_system)->contents[4];
2256         if (CONSP  (val)
2257             && VECTORP (XCONS (val)->car)
2258             && VECTORP (XCONS (val)->cdr))
2259           {
2260             setup_ccl_program (&(coding->spec.ccl.decoder), XCONS (val)->car);
2261             setup_ccl_program (&(coding->spec.ccl.encoder), XCONS (val)->cdr);
2262           }
2263         else
2264           goto label_invalid_coding_system;
2265       }
2266       coding->require_flushing = 1;
2267       break;
2268
2269     default:
2270       if (EQ (type, Qt))
2271         coding->type = coding_type_automatic;
2272       else
2273         coding->type = coding_type_no_conversion;
2274       break;
2275     }
2276   return 0;
2277
2278  label_invalid_coding_system:
2279   coding->type = coding_type_no_conversion;
2280   coding->symbol = coding->pre_write_conversion = coding->post_read_conversion
2281     = Qnil;
2282   return -1;
2283 }
2284
2285 /* Emacs has a mechanism to automatically detect a coding system if it
2286    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
2287    it's impossible to distinguish some coding systems accurately
2288    because they use the same range of codes.  So, at first, coding
2289    systems are categorized into 7, those are:
2290
2291    o coding-category-internal
2292
2293         The category for a coding system which has the same code range
2294         as Emacs' internal format.  Assigned the coding-system (Lisp
2295         symbol) `internal' by default.
2296
2297    o coding-category-sjis
2298
2299         The category for a coding system which has the same code range
2300         as SJIS.  Assigned the coding-system (Lisp
2301         symbol) `shift-jis' by default.
2302
2303    o coding-category-iso-7
2304
2305         The category for a coding system which has the same code range
2306         as ISO2022 of 7-bit environment.  Assigned the coding-system
2307         (Lisp symbol) `iso-2022-7' by default.
2308
2309    o coding-category-iso-8-1
2310
2311         The category for a coding system which has the same code range
2312         as ISO2022 of 8-bit environment and graphic plane 1 used only
2313         for DIMENSION1 charset.  Assigned the coding-system (Lisp
2314         symbol) `iso-8859-1' by default.
2315
2316    o coding-category-iso-8-2
2317
2318         The category for a coding system which has the same code range
2319         as ISO2022 of 8-bit environment and graphic plane 1 used only
2320         for DIMENSION2 charset.  Assigned the coding-system (Lisp
2321         symbol) `euc-japan' by default.
2322
2323    o coding-category-iso-else
2324
2325         The category for a coding system which has the same code range
2326         as ISO2022 but not belongs to any of the above three
2327         categories.  Assigned the coding-system (Lisp symbol)
2328         `iso-2022-ss2-7' by default.
2329
2330    o coding-category-big5
2331
2332         The category for a coding system which has the same code range
2333         as BIG5.  Assigned the coding-system (Lisp symbol)
2334         `cn-big5' by default.
2335
2336    o coding-category-binary
2337
2338         The category for a coding system not categorized in any of the
2339         above.  Assigned the coding-system (Lisp symbol)
2340         `no-conversion' by default.
2341
2342    Each of them is a Lisp symbol and the value is an actual
2343    `coding-system's (this is also a Lisp symbol) assigned by a user.
2344    What Emacs does actually is to detect a category of coding system.
2345    Then, it uses a `coding-system' assigned to it.  If Emacs can't
2346    decide only one possible category, it selects a category of the
2347    highest priority.  Priorities of categories are also specified by a
2348    user in a Lisp variable `coding-category-list'.
2349
2350 */
2351
2352 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
2353    If it detects possible coding systems, return an integer in which
2354    appropriate flag bits are set.  Flag bits are defined by macros
2355    CODING_CATEGORY_MASK_XXX in `coding.h'.  */
2356
2357 int
2358 detect_coding_mask (src, src_bytes)
2359      unsigned char *src;
2360      int src_bytes;
2361 {
2362   register unsigned char c;
2363   unsigned char *src_end = src + src_bytes;
2364   int mask;
2365
2366   /* At first, skip all ASCII characters and control characters except
2367      for three ISO2022 specific control characters.  */
2368  label_loop_detect_coding:
2369   while (src < src_end)
2370     {
2371       c = *src;
2372       if (c >= 0x80
2373           || (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
2374         break;
2375       src++;
2376     }
2377
2378   if (src >= src_end)
2379     /* We found nothing other than ASCII.  There's nothing to do.  */
2380     return CODING_CATEGORY_MASK_ANY;
2381
2382   /* The text seems to be encoded in some multilingual coding system.
2383      Now, try to find in which coding system the text is encoded.  */
2384   if (c < 0x80)
2385     {
2386       /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
2387       /* C is an ISO2022 specific control code of C0.  */
2388       mask = detect_coding_iso2022 (src, src_end);
2389       src++;
2390       if (mask == CODING_CATEGORY_MASK_ANY)
2391         /* No valid ISO2022 code follows C.  Try again.  */
2392         goto label_loop_detect_coding;
2393     }
2394   else if (c == ISO_CODE_SS2 || c == ISO_CODE_SS3 || c == ISO_CODE_CSI)
2395     /* C is an ISO2022 specific control code of C1,
2396        or the first byte of SJIS's 2-byte character code,
2397        or a leading code of Emacs.  */
2398     mask = (detect_coding_iso2022 (src, src_end)
2399             | detect_coding_sjis (src, src_end)
2400             | detect_coding_internal (src, src_end));
2401
2402   else if (c < 0xA0)
2403     /* C is the first byte of SJIS character code,
2404        or a leading-code of Emacs.  */
2405     mask = (detect_coding_sjis (src, src_end)
2406             | detect_coding_internal (src, src_end));
2407
2408   else
2409     /* C is a character of ISO2022 in graphic plane right,
2410        or a SJIS's 1-byte character code (i.e. JISX0201),
2411        or the first byte of BIG5's 2-byte code.  */
2412     mask = (detect_coding_iso2022 (src, src_end)
2413             | detect_coding_sjis (src, src_end)
2414             | detect_coding_big5 (src, src_end));
2415
2416   return mask;
2417 }
2418
2419 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
2420    The information of the detected coding system is set in CODING.  */
2421
2422 void
2423 detect_coding (coding, src, src_bytes)
2424      struct coding_system *coding;
2425      unsigned char *src;
2426      int src_bytes;
2427 {
2428   int mask = detect_coding_mask (src, src_bytes);
2429   int idx;
2430
2431   if (mask == CODING_CATEGORY_MASK_ANY)
2432     /* We found nothing other than ASCII.  There's nothing to do.  */
2433     return;
2434
2435   if (!mask)
2436     /* The source text seems to be encoded in unknown coding system.
2437        Emacs regards the category of such a kind of coding system as
2438        `coding-category-binary'.  We assume that a user has assigned
2439        an appropriate coding system for a `coding-category-binary'.  */
2440     idx = CODING_CATEGORY_IDX_BINARY;
2441   else
2442     {
2443       /* We found some plausible coding systems.  Let's use a coding
2444          system of the highest priority.  */
2445       Lisp_Object val = Vcoding_category_list;
2446
2447       if (CONSP (val))
2448         while (!NILP (val))
2449           {
2450             idx = XFASTINT (Fget (XCONS (val)->car, Qcoding_category_index));
2451             if ((idx < CODING_CATEGORY_IDX_MAX) && (mask & (1 << idx)))
2452               break;
2453             val = XCONS (val)->cdr;
2454           }
2455       else
2456         val = Qnil;
2457
2458       if (NILP (val))
2459         {
2460           /* For unknown reason, `Vcoding_category_list' contains none
2461              of found categories.  Let's use any of them.  */
2462           for (idx = 0; idx < CODING_CATEGORY_IDX_MAX; idx++)
2463             if (mask & (1 << idx))
2464               break;
2465         }
2466     }
2467   setup_coding_system (XSYMBOL (coding_category_table[idx])->value, coding);
2468 }
2469
2470 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
2471    is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
2472    CODING_EOL_CR, and CODING_EOL_AUTOMATIC.  */
2473
2474 int
2475 detect_eol_type (src, src_bytes)
2476      unsigned char *src;
2477      int src_bytes;
2478 {
2479   unsigned char *src_end = src + src_bytes;
2480   unsigned char c;
2481
2482   while (src < src_end)
2483     {
2484       c = *src++;
2485       if (c == '\n')
2486         return CODING_EOL_LF;
2487       else if (c == '\r')
2488         {
2489           if (src < src_end && *src == '\n')
2490             return CODING_EOL_CRLF;
2491           else
2492             return CODING_EOL_CR;
2493         }
2494     }
2495   return CODING_EOL_AUTOMATIC;
2496 }
2497
2498 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
2499    is encoded.  If it detects an appropriate format of end-of-line, it
2500    sets the information in *CODING.  */
2501
2502 void
2503 detect_eol (coding, src, src_bytes)
2504      struct coding_system *coding;
2505      unsigned char *src;
2506      int src_bytes;
2507 {
2508   Lisp_Object val;
2509   int eol_type = detect_eol_type (src, src_bytes);
2510
2511   if (eol_type == CODING_EOL_AUTOMATIC)
2512     /*  We found no end-of-line in the source text.  */
2513     return;
2514
2515   val = Fget (coding->symbol, Qeol_type);
2516   if (VECTORP (val) && XVECTOR (val)->size == 3)
2517     setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
2518 }
2519
2520 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions".  Before
2521    decoding, it may detect coding system and format of end-of-line if
2522    those are not yet decided.  */
2523
2524 int
2525 decode_coding (coding, source, destination, src_bytes, dst_bytes, consumed)
2526      struct coding_system *coding;
2527      unsigned char *source, *destination;
2528      int src_bytes, dst_bytes;
2529      int *consumed;
2530 {
2531   int produced;
2532
2533   if (src_bytes <= 0)
2534     {
2535       *consumed = 0;
2536       return 0;
2537     }
2538
2539   if (coding->type == coding_type_automatic)
2540     detect_coding (coding, source, src_bytes);
2541
2542   if (coding->eol_type == CODING_EOL_AUTOMATIC)
2543     detect_eol (coding, source, src_bytes);
2544
2545   coding->carryover_size = 0;
2546   switch (coding->type)
2547     {
2548     case coding_type_no_conversion:
2549     label_no_conversion:
2550       produced = (src_bytes > dst_bytes) ? dst_bytes : src_bytes;
2551       bcopy (source, destination, produced);
2552       *consumed = produced;
2553       break;
2554
2555     case coding_type_internal:
2556     case coding_type_automatic:
2557       if (coding->eol_type == CODING_EOL_LF
2558           ||  coding->eol_type == CODING_EOL_AUTOMATIC)
2559         goto label_no_conversion;
2560       produced = decode_eol (coding, source, destination,
2561                              src_bytes, dst_bytes, consumed);
2562       break;
2563
2564     case coding_type_sjis:
2565       produced = decode_coding_sjis_big5 (coding, source, destination,
2566                                           src_bytes, dst_bytes, consumed,
2567                                           1);
2568       break;
2569
2570     case coding_type_iso2022:
2571       produced = decode_coding_iso2022 (coding, source, destination,
2572                                         src_bytes, dst_bytes, consumed);
2573       break;
2574
2575     case coding_type_big5:
2576       produced = decode_coding_sjis_big5 (coding, source, destination,
2577                                           src_bytes, dst_bytes, consumed,
2578                                           0);
2579       break;
2580
2581     case coding_type_ccl:
2582       produced = ccl_driver (&coding->spec.ccl.decoder, source, destination,
2583                              src_bytes, dst_bytes, consumed);
2584       break;
2585     }
2586
2587   return produced;
2588 }
2589
2590 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  */
2591
2592 int
2593 encode_coding (coding, source, destination, src_bytes, dst_bytes, consumed)
2594      struct coding_system *coding;
2595      unsigned char *source, *destination;
2596      int src_bytes, dst_bytes;
2597      int *consumed;
2598 {
2599   int produced;
2600
2601   coding->carryover_size = 0;
2602   switch (coding->type)
2603     {
2604     case coding_type_no_conversion:
2605     label_no_conversion:
2606       produced = (src_bytes > dst_bytes) ? dst_bytes : src_bytes;
2607       if (produced > 0)
2608         {
2609           bcopy (source, destination, produced);
2610           if (coding->selective)
2611             {
2612               unsigned char *p = destination, *pend = destination + produced;
2613               while (p < pend)
2614                 if (*p++ == '\015') p[-1] = '\n';
2615             }
2616         }
2617       *consumed = produced;
2618       break;
2619
2620     case coding_type_internal:
2621     case coding_type_automatic:
2622       if (coding->eol_type == CODING_EOL_LF
2623           ||  coding->eol_type == CODING_EOL_AUTOMATIC)
2624         goto label_no_conversion;
2625       produced = encode_eol (coding, source, destination,
2626                              src_bytes, dst_bytes, consumed);
2627       break;
2628
2629     case coding_type_sjis:
2630       produced = encode_coding_sjis_big5 (coding, source, destination,
2631                                           src_bytes, dst_bytes, consumed,
2632                                           1);
2633       break;
2634
2635     case coding_type_iso2022:
2636       produced = encode_coding_iso2022 (coding, source, destination,
2637                                         src_bytes, dst_bytes, consumed);
2638       break;
2639
2640     case coding_type_big5:
2641       produced = encode_coding_sjis_big5 (coding, source, destination,
2642                                           src_bytes, dst_bytes, consumed,
2643                                           0);
2644       break;
2645
2646     case coding_type_ccl:
2647       produced = ccl_driver (&coding->spec.ccl.encoder, source, destination,
2648                              src_bytes, dst_bytes, consumed);
2649       break;
2650     }
2651
2652   return produced;
2653 }
2654
2655 #define CONVERSION_BUFFER_EXTRA_ROOM 256
2656
2657 /* Return maximum size (bytes) of a buffer enough for decoding
2658    SRC_BYTES of text encoded in CODING.  */
2659
2660 int
2661 decoding_buffer_size (coding, src_bytes)
2662      struct coding_system *coding;
2663      int src_bytes;
2664 {
2665   int magnification;
2666
2667   if (coding->type == coding_type_iso2022)
2668     magnification = 3;
2669   else if (coding->type == coding_type_ccl)
2670     magnification = coding->spec.ccl.decoder.buf_magnification;
2671   else
2672     magnification = 2;
2673
2674   return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
2675 }
2676
2677 /* Return maximum size (bytes) of a buffer enough for encoding
2678    SRC_BYTES of text to CODING.  */
2679
2680 int
2681 encoding_buffer_size (coding, src_bytes)
2682      struct coding_system *coding;
2683      int src_bytes;
2684 {
2685   int magnification;
2686
2687   if (coding->type == coding_type_ccl)
2688     magnification = coding->spec.ccl.encoder.buf_magnification;
2689   else
2690     magnification = 3;
2691
2692   return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
2693 }
2694
2695 #ifndef MINIMUM_CONVERSION_BUFFER_SIZE
2696 #define MINIMUM_CONVERSION_BUFFER_SIZE 1024
2697 #endif
2698
2699 char *conversion_buffer;
2700 int conversion_buffer_size;
2701
2702 /* Return a pointer to a SIZE bytes of buffer to be used for encoding
2703    or decoding.  Sufficient memory is allocated automatically.  If we
2704    run out of memory, return NULL.  */
2705
2706 char *
2707 get_conversion_buffer (size)
2708      int size;
2709 {
2710   if (size > conversion_buffer_size)
2711     {
2712       char *buf;
2713       int real_size = conversion_buffer_size * 2;
2714
2715       while (real_size < size) real_size *= 2;
2716       buf = (char *) xmalloc (real_size);
2717       xfree (conversion_buffer);
2718       conversion_buffer = buf;
2719       conversion_buffer_size = real_size;
2720     }
2721   return conversion_buffer;
2722 }
2723
2724 \f
2725 #ifdef emacs
2726 /*** 7. Emacs Lisp library functions ***/
2727
2728 DEFUN ("coding-system-vector", Fcoding_system_vector, Scoding_system_vector,
2729        1, 1, 0,
2730   "Return coding-vector of CODING-SYSTEM.\n\
2731 If CODING-SYSTEM is not a valid coding-system, return nil.")
2732   (obj)
2733      Lisp_Object obj;
2734 {
2735   while (SYMBOLP (obj) && !NILP (obj))
2736     obj = Fget (obj, Qcoding_system);
2737   return ((NILP (obj) || !VECTORP (obj) || XVECTOR (obj)->size != 5)
2738           ? Qnil : obj);
2739 }
2740
2741 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
2742   "Return t if OBJECT is nil or a coding-system.\n\
2743 See document of make-coding-system for coding-system object.")
2744   (obj)
2745      Lisp_Object obj;
2746 {
2747   return ((NILP (obj) || !NILP (Fcoding_system_vector (obj))) ? Qt : Qnil);
2748 }
2749
2750 DEFUN ("read-non-nil-coding-system",
2751        Fread_non_nil_coding_system, Sread_non_nil_coding_system, 1, 1, 0,
2752   "Read a coding system from the minibuffer, prompting with string PROMPT.")
2753   (prompt)
2754      Lisp_Object prompt;
2755 {
2756   Lisp_Object val;
2757   do {
2758     val = Fcompleting_read (prompt, Vobarray, Qcoding_system_vector,
2759                             Qt, Qnil, Qnil);
2760   } while (XSTRING (val)->size == 0);
2761   return (Fintern (val, Qnil));
2762 }
2763
2764 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 1, 0,
2765   "Read a coding system or nil from the minibuffer, prompting with string PROMPT.")
2766   (prompt)
2767      Lisp_Object prompt;
2768 {
2769   Lisp_Object val = Fcompleting_read (prompt, Vobarray, Qcoding_system_p,
2770                                       Qt, Qnil, Qnil);
2771   return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
2772 }
2773
2774 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
2775        1, 1, 0,
2776   "Check validity of CODING-SYSTEM.\n\
2777 If valid, return CODING-SYSTEM, else `coding-system-error' is signaled.\n\
2778 CODING-SYSTEM is valid if it is a symbol and has \"coding-system\" property.\n\
2779 The value of property should be a vector of length 5.")
2780   (coding_system)
2781      Lisp_Object coding_system;
2782 {
2783   CHECK_SYMBOL (coding_system, 0);
2784   if (!NILP (Fcoding_system_p (coding_system)))
2785     return coding_system;
2786   while (1)
2787     Fsignal (Qcoding_system_error, coding_system);
2788 }
2789
2790 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
2791        2, 2, 0,
2792   "Detect coding-system of the text in the region between START and END.\n\
2793 Return a list of possible coding-systems ordered by priority.\n\
2794 If only ASCII characters are found, it returns `automatic-conversion'\n\
2795  or its subsidiary coding-system according to a detected end-of-line format.")
2796   (b, e)
2797      Lisp_Object b, e;
2798 {
2799   int coding_mask, eol_type;
2800   Lisp_Object val;
2801   int beg, end;
2802
2803   validate_region (&b, &e);
2804   beg = XINT (b), end = XINT (e);
2805   if (beg < GPT && end >= GPT) move_gap (end);
2806
2807   coding_mask = detect_coding_mask (POS_ADDR (beg), end - beg);
2808   eol_type  = detect_eol_type (POS_ADDR (beg), end - beg);
2809
2810   if (coding_mask == CODING_CATEGORY_MASK_ANY)
2811     {
2812       val = intern ("automatic-conversion");
2813       if (eol_type != CODING_EOL_AUTOMATIC)
2814         {
2815           Lisp_Object val2 = Fget (val, Qeol_type);
2816           if (VECTORP (val2))
2817             val = XVECTOR (val2)->contents[eol_type];
2818         }
2819     }
2820   else
2821     {
2822       Lisp_Object val2;
2823
2824       /* At first, gather possible coding-systems in VAL in a reverse
2825          order.  */
2826       val = Qnil;
2827       for (val2 = Vcoding_category_list;
2828            !NILP (val2);
2829            val2 = XCONS (val2)->cdr)
2830         {
2831           int idx
2832             = XFASTINT (Fget (XCONS (val2)->car, Qcoding_category_index));
2833           if (coding_mask & (1 << idx))
2834             val = Fcons (Fsymbol_value (XCONS (val2)->car), val);
2835         }
2836
2837       /* Then, change the order of the list, while getting subsidiary
2838          coding-systems.  */
2839       val2 = val;
2840       val = Qnil;
2841       for (; !NILP (val2); val2 = XCONS (val2)->cdr)
2842         {
2843           if (eol_type == CODING_EOL_AUTOMATIC)
2844             val = Fcons (XCONS (val2)->car, val);
2845           else
2846             {
2847               Lisp_Object val3 = Fget (XCONS (val2)->car, Qeol_type);
2848               if (VECTORP (val3))
2849                 val = Fcons (XVECTOR (val3)->contents[eol_type], val);
2850               else
2851                 val = Fcons (XCONS (val2)->car, val);
2852             }
2853         }
2854     }
2855
2856   return val;
2857 }
2858
2859 /* Scan text in the region between *BEGP and *ENDP, skip characters
2860    which we never have to encode to (iff ENCODEP is 1) or decode from
2861    coding system CODING at the head and tail, then set BEGP and ENDP
2862    to the addresses of start and end of the text we actually convert.  */
2863
2864 void
2865 shrink_conversion_area (begp, endp, coding, encodep)
2866      unsigned char **begp, **endp;
2867      struct coding_system *coding;
2868      int encodep;
2869 {
2870   register unsigned char *beg_addr = *begp, *end_addr = *endp;
2871
2872   if (coding->eol_type != CODING_EOL_LF
2873       && coding->eol_type != CODING_EOL_AUTOMATIC)
2874     /* Since we anyway have to convert end-of-line format, it is not
2875        worth skipping at most 100 bytes or so.  */
2876     return;
2877
2878   if (encodep)                  /* for encoding */
2879     {
2880       switch (coding->type)
2881         {
2882         case coding_type_no_conversion:
2883         case coding_type_internal:
2884         case coding_type_automatic:
2885           /* We need no conversion.  */
2886           *begp = *endp;
2887           return;
2888         case coding_type_ccl:
2889           /* We can't skip any data.  */
2890           return;
2891         case coding_type_iso2022:
2892           if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
2893             {
2894               unsigned char *bol = beg_addr;
2895               while (beg_addr < end_addr && *beg_addr < 0x80)
2896                 {
2897                   beg_addr++;
2898                   if (*(beg_addr - 1) == '\n')
2899                     bol = beg_addr;
2900                 }
2901               beg_addr = bol;
2902               goto label_skip_tail;
2903             }
2904           /* fall down ... */
2905         default:
2906           /* We can skip all ASCII characters at the head and tail.  */
2907           while (beg_addr < end_addr && *beg_addr < 0x80) beg_addr++;
2908         label_skip_tail:
2909           while (beg_addr < end_addr && *(end_addr - 1) < 0x80) end_addr--;
2910           break;
2911         }
2912     }
2913   else                          /* for decoding */
2914     {
2915       switch (coding->type)
2916         {
2917         case coding_type_no_conversion:
2918           /* We need no conversion.  */
2919           *begp = *endp;
2920           return;
2921         case coding_type_internal:
2922           if (coding->eol_type == CODING_EOL_LF)
2923             {
2924               /* We need no conversion.  */
2925               *begp = *endp;
2926               return;
2927             }
2928           /* We can skip all but carriage-return.  */
2929           while (beg_addr < end_addr && *beg_addr != '\r') beg_addr++;
2930           while (beg_addr < end_addr && *(end_addr - 1) != '\r') end_addr--;
2931           break;
2932         case coding_type_sjis:
2933         case coding_type_big5:
2934           /* We can skip all ASCII characters at the head.  */
2935           while (beg_addr < end_addr && *beg_addr < 0x80) beg_addr++;
2936           /* We can skip all ASCII characters at the tail except for
2937              the second byte of SJIS or BIG5 code.  */
2938           while (beg_addr < end_addr && *(end_addr - 1) < 0x80) end_addr--;
2939           if (end_addr != *endp)
2940             end_addr++;
2941           break;
2942         case coding_type_ccl:
2943           /* We can't skip any data.  */
2944           return;
2945         default:                /* i.e. case coding_type_iso2022: */
2946           {
2947             unsigned char c;
2948
2949             /* We can skip all ASCII characters except for a few
2950                control codes at the head.  */
2951             while (beg_addr < end_addr && (c = *beg_addr) < 0x80
2952                    && c != ISO_CODE_CR && c != ISO_CODE_SO
2953                    && c != ISO_CODE_SI && c != ISO_CODE_ESC)
2954               beg_addr++;
2955           }
2956           break;
2957         }
2958     }
2959   *begp = beg_addr;
2960   *endp = end_addr;
2961   return;
2962 }
2963
2964 /* Encode to (iff ENCODEP is 1) or decode form coding system CODING a
2965    text between B and E.  B and E are buffer position.  */
2966
2967 Lisp_Object
2968 code_convert_region (b, e, coding, encodep)
2969      Lisp_Object b, e;
2970      struct coding_system *coding;
2971      int encodep;
2972 {
2973   int beg, end, len, consumed, produced;
2974   char *buf;
2975   unsigned char *begp, *endp;
2976   int pos = PT;
2977
2978   validate_region (&b, &e);
2979   beg = XINT (b), end = XINT (e);
2980   if (beg < GPT && end >= GPT)
2981     move_gap (end);
2982
2983   if (encodep && !NILP (coding->pre_write_conversion))
2984     {
2985       /* We must call a pre-conversion function which may put a new
2986          text to be converted in a new buffer.  */
2987       struct buffer *old = current_buffer, *new;
2988
2989       TEMP_SET_PT (beg);
2990       call2 (coding->pre_write_conversion, b, e);
2991       if (old != current_buffer)
2992         {
2993           /* Replace the original text by the text just generated.  */
2994           len = ZV - BEGV;
2995           new = current_buffer;
2996           set_buffer_internal (old);
2997           del_range (beg, end);
2998           insert_from_buffer (new, 1, len, 0);
2999           end = beg + len;
3000         }
3001     }
3002
3003   /* We may be able to shrink the conversion region.  */
3004   begp = POS_ADDR (beg); endp = begp + (end - beg);
3005   shrink_conversion_area (&begp, &endp, coding, encodep);
3006
3007   if (begp == endp)
3008     /* We need no conversion.  */
3009     len = end - beg;
3010   else
3011     {
3012       beg += begp - POS_ADDR (beg);
3013       end =  beg + (endp - begp);
3014
3015       if (encodep)
3016         len = encoding_buffer_size (coding, end - beg);
3017       else
3018         len = decoding_buffer_size (coding, end - beg);
3019       buf = get_conversion_buffer (len);
3020
3021       coding->last_block = 1;
3022       produced = (encodep
3023                   ? encode_coding (coding, POS_ADDR (beg), buf, end - beg, len,
3024                                    &consumed)
3025                   : decode_coding (coding, POS_ADDR (beg), buf, end - beg, len,
3026                                    &consumed));
3027
3028       len = produced + (beg - XINT (b)) + (XINT (e) - end);
3029
3030       TEMP_SET_PT (beg);
3031       insert (buf, produced);
3032       del_range (PT, PT + end - beg);
3033       if (pos >= end)
3034         pos = PT + (pos - end);
3035       else if (pos > beg)
3036         pos = beg;
3037       TEMP_SET_PT (pos);
3038   }
3039
3040   if (!encodep && !NILP (coding->post_read_conversion))
3041     {
3042       /* We must call a post-conversion function which may alter
3043          the text just converted.  */
3044       Lisp_Object insval;
3045
3046       beg = XINT (b);
3047       TEMP_SET_PT (beg);
3048       insval = call1 (coding->post_read_conversion, make_number (len));
3049       CHECK_NUMBER (insval, 0);
3050       len = XINT (insval);
3051     }
3052
3053   return make_number (len);
3054 }
3055
3056 Lisp_Object
3057 code_convert_string (str, coding, encodep, nocopy)
3058      Lisp_Object str, nocopy;
3059      struct coding_system *coding;
3060      int encodep;
3061 {
3062   int len, consumed, produced;
3063   char *buf;
3064   unsigned char *begp, *endp;
3065   int head_skip, tail_skip;
3066   struct gcpro gcpro1;
3067
3068   if (encodep && !NILP (coding->pre_write_conversion)
3069       || !encodep && !NILP (coding->post_read_conversion))
3070     {
3071       /* Since we have to call Lisp functions which assume target text
3072          is in a buffer, after setting a temporary buffer, call
3073          code_convert_region.  */
3074       int count = specpdl_ptr - specpdl;
3075       int len = XSTRING (str)->size;
3076       Lisp_Object result;
3077       struct buffer *old = current_buffer;
3078
3079       record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
3080       temp_output_buffer_setup (" *code-converting-work*");
3081       set_buffer_internal (XBUFFER (Vstandard_output));
3082       insert_from_string (str, 0, len, 0);
3083       code_convert_region (make_number (BEGV), make_number (ZV),
3084                            coding, encodep);
3085       result = make_buffer_string (BEGV, ZV, 0);
3086       set_buffer_internal (old);
3087       return unbind_to (count, result);
3088     }
3089
3090   /* We may be able to shrink the conversion region.  */
3091   begp = XSTRING (str)->data;
3092   endp = begp + XSTRING (str)->size;
3093   shrink_conversion_area (&begp, &endp, coding, encodep);
3094
3095   if (begp == endp)
3096     /* We need no conversion.  */
3097     return (NILP (nocopy) ? Fcopy_sequence (str) : str);
3098
3099   head_skip = begp - XSTRING (str)->data;
3100   tail_skip = XSTRING (str)->size - head_skip - (endp - begp);
3101
3102   GCPRO1 (str);
3103
3104   if (encodep)
3105     len = encoding_buffer_size (coding, endp - begp);
3106   else
3107     len = decoding_buffer_size (coding, endp - begp);
3108   buf = get_conversion_buffer (len + head_skip + tail_skip);
3109
3110   bcopy (XSTRING (str)->data, buf, head_skip);
3111   coding->last_block = 1;
3112   produced = (encodep
3113               ? encode_coding (coding, XSTRING (str)->data + head_skip,
3114                                buf + head_skip, endp - begp, len, &consumed)
3115               : decode_coding (coding, XSTRING (str)->data + head_skip,
3116                                buf + head_skip, endp - begp, len, &consumed));
3117   bcopy (XSTRING (str)->data + head_skip + (endp - begp),
3118          buf + head_skip + produced,
3119          tail_skip);
3120
3121   UNGCPRO;
3122
3123   return make_string (buf, head_skip + produced + tail_skip);
3124 }
3125
3126 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
3127        3, 3, "r\nzCoding system: ",
3128   "Decode current region by specified coding system.\n\
3129 When called from a program, takes three arguments:\n\
3130 START, END, and CODING-SYSTEM.  START END are buffer positions.\n\
3131 Return length of decoded text.")
3132   (b, e, coding_system)
3133      Lisp_Object b, e, coding_system;
3134 {
3135   struct coding_system coding;
3136
3137   CHECK_NUMBER_COERCE_MARKER (b, 0);
3138   CHECK_NUMBER_COERCE_MARKER (e, 1);
3139   CHECK_SYMBOL (coding_system, 2);
3140
3141   if (NILP (coding_system))
3142     return make_number (XFASTINT (e) - XFASTINT (b));
3143   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
3144     error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data);
3145
3146   return code_convert_region (b, e, &coding, 0);
3147 }
3148
3149 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
3150        3, 3, "r\nzCoding system: ",
3151   "Encode current region by specified coding system.\n\
3152 When called from a program, takes three arguments:\n\
3153 START, END, and CODING-SYSTEM.  START END are buffer positions.\n\
3154 Return length of encoded text.")
3155   (b, e, coding_system)
3156      Lisp_Object b, e, coding_system;
3157 {
3158   struct coding_system coding;
3159
3160   CHECK_NUMBER_COERCE_MARKER (b, 0);
3161   CHECK_NUMBER_COERCE_MARKER (e, 1);
3162   CHECK_SYMBOL (coding_system, 2);
3163
3164   if (NILP (coding_system))
3165     return make_number (XFASTINT (e) - XFASTINT (b));
3166   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
3167     error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data);
3168
3169   return code_convert_region (b, e, &coding, 1);
3170 }
3171
3172 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
3173        2, 3, 0,
3174   "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
3175 Optional arg NOCOPY non-nil means return STRING itself if there's no need\n\
3176 of decoding.")
3177   (string, coding_system, nocopy)
3178      Lisp_Object string, coding_system, nocopy;
3179 {
3180   struct coding_system coding;
3181
3182   CHECK_STRING (string, 0);
3183   CHECK_SYMBOL (coding_system, 1);
3184
3185   if (NILP (coding_system))
3186     return (NILP (nocopy) ? Fcopy_sequence (string) : string);
3187   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
3188     error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data);
3189
3190   return code_convert_string (string, &coding, 0, nocopy);
3191 }
3192
3193 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
3194        2, 3, 0,
3195   "Encode STRING to CODING-SYSTEM, and return the result.\n\
3196 Optional arg NOCOPY non-nil means return STRING itself if there's no need\n\
3197 of encoding.")
3198   (string, coding_system, nocopy)
3199      Lisp_Object string, coding_system, nocopy;
3200 {
3201   struct coding_system coding;
3202
3203   CHECK_STRING (string, 0);
3204   CHECK_SYMBOL (coding_system, 1);
3205
3206   if (NILP (coding_system))
3207     return (NILP (nocopy) ? Fcopy_sequence (string) : string);
3208   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
3209     error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data);
3210
3211   return code_convert_string (string, &coding, 1, nocopy);
3212 }
3213
3214 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
3215   "Decode a JISX0208 character of shift-jis encoding.\n\
3216 CODE is the character code in SJIS.\n\
3217 Return the corresponding character.")
3218   (code)
3219      Lisp_Object code;
3220 {
3221   unsigned char c1, c2, s1, s2;
3222   Lisp_Object val;
3223
3224   CHECK_NUMBER (code, 0);
3225   s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
3226   DECODE_SJIS (s1, s2, c1, c2);
3227   XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset_jisx0208, c1, c2));
3228   return val;
3229 }
3230
3231 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
3232   "Encode a JISX0208 character CHAR to SJIS coding-system.\n\
3233 Return the corresponding character code in SJIS.")
3234   (ch)
3235      Lisp_Object ch;
3236 {
3237   int charset, c1, c2, s1, s2;
3238   Lisp_Object val;
3239
3240   CHECK_NUMBER (ch, 0);
3241   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
3242   if (charset == charset_jisx0208)
3243     {
3244       ENCODE_SJIS (c1, c2, s1, s2);
3245       XSETFASTINT (val, (s1 << 8) | s2);
3246     }
3247   else
3248     XSETFASTINT (val, 0);
3249   return val;
3250 }
3251
3252 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
3253   "Decode a Big5 character CODE of BIG5 coding-system.\n\
3254 CODE is the character code in BIG5.\n\
3255 Return the corresponding character.")
3256   (code)
3257      Lisp_Object code;
3258 {
3259   int charset;
3260   unsigned char b1, b2, c1, c2;
3261   Lisp_Object val;
3262
3263   CHECK_NUMBER (code, 0);
3264   b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
3265   DECODE_BIG5 (b1, b2, charset, c1, c2);
3266   XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset, c1, c2));
3267   return val;
3268 }
3269
3270 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
3271   "Encode the Big5 character CHAR to BIG5 coding-system.\n\
3272 Return the corresponding character code in Big5.")
3273   (ch)
3274      Lisp_Object ch;
3275 {
3276   int charset, c1, c2, b1, b2;
3277   Lisp_Object val;
3278
3279   CHECK_NUMBER (ch, 0);
3280   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
3281   if (charset == charset_big5_1 || charset == charset_big5_2)
3282     {
3283       ENCODE_BIG5 (charset, c1, c2, b1, b2);
3284       XSETFASTINT (val, (b1 << 8) | b2);
3285     }
3286   else
3287     XSETFASTINT (val, 0);
3288   return val;
3289 }
3290
3291 DEFUN ("set-terminal-coding-system",
3292        Fset_terminal_coding_system, Sset_terminal_coding_system, 1, 1,
3293        "zCoding-system for terminal display: ",
3294   "Set coding-system of your terminal to CODING-SYSTEM.\n\
3295 All outputs to terminal are encoded to this coding-system.")
3296   (coding_system)
3297      Lisp_Object coding_system;
3298 {
3299   CHECK_SYMBOL (coding_system, 0);
3300   setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
3301   update_mode_lines++;
3302   if (!NILP (Finteractive_p ()))
3303     Fredraw_display ();
3304   return Qnil;
3305 }
3306
3307 DEFUN ("terminal-coding-system",
3308        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
3309   "Return coding-system of your terminal.")
3310   ()
3311 {
3312   return terminal_coding.symbol;
3313 }
3314
3315 DEFUN ("set-keyboard-coding-system",
3316        Fset_keyboard_coding_system, Sset_keyboard_coding_system, 1, 1,
3317        "zCoding-system for keyboard input: ",
3318   "Set coding-system of what is sent from terminal keyboard to CODING-SYSTEM.\n\
3319 All inputs from terminal are decoded from this coding-system.")
3320   (coding_system)
3321      Lisp_Object coding_system;
3322 {
3323   CHECK_SYMBOL (coding_system, 0);
3324   setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
3325   return Qnil;
3326 }
3327
3328 DEFUN ("keyboard-coding-system",
3329        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
3330   "Return coding-system of what is sent from terminal keyboard.")
3331   ()
3332 {
3333   return keyboard_coding.symbol;
3334 }
3335
3336 \f
3337 DEFUN ("find-coding-system", Ffind_coding_system, Sfind_coding_system,
3338        1, MANY, 0,
3339   "Choose a coding system for a file operation based on file name.\n\
3340 The value names a pair of coding systems: (ENCODING-SYSTEM DECODING-SYSTEM).\n\
3341 ENCODING-SYSTEM is the coding system to use for encoding\n\
3342 \(in case OPERATION does encoding), and DECODING-SYSTEM is the coding system\n\
3343 for decoding (in case OPERATION does decoding).\n\
3344 \n\
3345 The first argument OPERATION specifies an I/O primitive:\n\
3346   For file I/O, `insert-file-contents' or `write-region'.\n\
3347   For process I/O, `call-process', `call-process-region', or `start-process'.\n\
3348   For network I/O, `open-network-stream'.\n\
3349 \n\
3350 The remaining arguments should be the same arguments that were passed\n\
3351 to the primitive.  Depending on which primitive, one of those arguments\n\
3352 is selected as the TARGET.  For example, if OPERATION does file I/O,\n\
3353 whichever argument specifies the file name is TARGET.\n\
3354 \n\
3355 TARGET has a meaning which depends on OPERATION:\n\
3356   For file I/O, TARGET is a file name.\n\
3357   For process I/O, TARGET is a process name.\n\
3358   For network I/O, TARGET is a service name or a port number\n\
3359 \n\
3360 This function looks up what `coding-system-alist' specifies for\n\
3361 OPERATION and TARGET.  It may specify a cons cell which represents\n\
3362 a particular coding system or it may have a function to call.\n\
3363 In the latter case, we call the function with one argument,\n\
3364 which is a list of all the arguments given to `find-coding-system'.")
3365   (nargs, args)
3366      int nargs;
3367      Lisp_Object *args;
3368 {
3369   Lisp_Object operation, target_idx, target, val;
3370   register Lisp_Object chain;
3371
3372   if (nargs < 2)
3373     error ("Too few arguments");
3374   operation = args[0];
3375   if (!SYMBOLP (operation)
3376       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
3377     error ("Invalid first arguement");
3378   if (nargs < 1 + XINT (target_idx))
3379     error ("Too few arguments for operation: %s",
3380            XSYMBOL (operation)->name->data);
3381   target = args[XINT (target_idx) + 1];
3382   if (!(STRINGP (target)
3383         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
3384     error ("Invalid %dth argument", XINT (target_idx) + 1);
3385
3386   chain = Fassq (operation, Vcoding_system_alist);
3387   if (NILP (chain))
3388     return Qnil;
3389
3390   for (chain = XCONS (chain)->cdr; CONSP (chain); chain = XCONS (chain)->cdr)
3391     {
3392       Lisp_Object elt = XCONS (chain)->car;
3393
3394       if (CONSP (elt)
3395           && ((STRINGP (target)
3396                && STRINGP (XCONS (elt)->car)
3397                && fast_string_match (XCONS (elt)->car, target) >= 0)
3398               || (INTEGERP (target) && EQ (target, XCONS (elt)->car))))
3399         return (CONSP (val = XCONS (elt)->cdr)
3400                 ? val
3401                 : ((SYMBOLP (val) && Fboundp (val)
3402                     ? call2 (val, Flist (nargs, args))
3403                     : Qnil)));
3404     }
3405   return Qnil;
3406 }
3407
3408 #endif /* emacs */
3409
3410 \f
3411 /*** 8. Post-amble ***/
3412
3413 init_coding_once ()
3414 {
3415   int i;
3416
3417   /* Emacs internal format specific initialize routine.  */
3418   for (i = 0; i <= 0x20; i++)
3419     emacs_code_class[i] = EMACS_control_code;
3420   emacs_code_class[0x0A] = EMACS_linefeed_code;
3421   emacs_code_class[0x0D] = EMACS_carriage_return_code;
3422   for (i = 0x21 ; i < 0x7F; i++)
3423     emacs_code_class[i] = EMACS_ascii_code;
3424   emacs_code_class[0x7F] = EMACS_control_code;
3425   emacs_code_class[0x80] = EMACS_leading_code_composition;
3426   for (i = 0x81; i < 0xFF; i++)
3427     emacs_code_class[i] = EMACS_invalid_code;
3428   emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
3429   emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
3430   emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
3431   emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
3432
3433   /* ISO2022 specific initialize routine.  */
3434   for (i = 0; i < 0x20; i++)
3435     iso_code_class[i] = ISO_control_code;
3436   for (i = 0x21; i < 0x7F; i++)
3437     iso_code_class[i] = ISO_graphic_plane_0;
3438   for (i = 0x80; i < 0xA0; i++)
3439     iso_code_class[i] = ISO_control_code;
3440   for (i = 0xA1; i < 0xFF; i++)
3441     iso_code_class[i] = ISO_graphic_plane_1;
3442   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
3443   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
3444   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
3445   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
3446   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
3447   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
3448   iso_code_class[ISO_CODE_ESC] = ISO_escape;
3449   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
3450   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
3451   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
3452
3453   conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE;
3454   conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE);
3455
3456   setup_coding_system (Qnil, &keyboard_coding);
3457   setup_coding_system (Qnil, &terminal_coding);
3458 }
3459
3460 #ifdef emacs
3461
3462 syms_of_coding ()
3463 {
3464   Qtarget_idx = intern ("target-idx");
3465   staticpro (&Qtarget_idx);
3466
3467   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
3468   Fput (Qwrite_region, Qtarget_idx, make_number (2));
3469
3470   Qcall_process = intern ("call-process");
3471   staticpro (&Qcall_process);
3472   Fput (Qcall_process, Qtarget_idx, make_number (0));
3473
3474   Qcall_process_region = intern ("call-process-region");
3475   staticpro (&Qcall_process_region);
3476   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
3477
3478   Qstart_process = intern ("start-process");
3479   staticpro (&Qstart_process);
3480   Fput (Qstart_process, Qtarget_idx, make_number (2));
3481
3482   Qopen_network_stream = intern ("open-network-stream");
3483   staticpro (&Qopen_network_stream);
3484   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
3485
3486   Qcoding_system = intern ("coding-system");
3487   staticpro (&Qcoding_system);
3488
3489   Qeol_type = intern ("eol-type");
3490   staticpro (&Qeol_type);
3491
3492   Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
3493   staticpro (&Qbuffer_file_coding_system);
3494
3495   Qpost_read_conversion = intern ("post-read-conversion");
3496   staticpro (&Qpost_read_conversion);
3497
3498   Qpre_write_conversion = intern ("pre-write-conversion");
3499   staticpro (&Qpre_write_conversion);
3500
3501   Qcoding_system_vector = intern ("coding-system-vector");
3502   staticpro (&Qcoding_system_vector);
3503
3504   Qcoding_system_p = intern ("coding-system-p");
3505   staticpro (&Qcoding_system_p);
3506
3507   Qcoding_system_error = intern ("coding-system-error");
3508   staticpro (&Qcoding_system_error);
3509
3510   Fput (Qcoding_system_error, Qerror_conditions,
3511         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
3512   Fput (Qcoding_system_error, Qerror_message,
3513         build_string ("Coding-system error"));
3514
3515   Qcoding_category_index = intern ("coding-category-index");
3516   staticpro (&Qcoding_category_index);
3517
3518   {
3519     int i;
3520     for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3521       {
3522         coding_category_table[i] = intern (coding_category_name[i]);
3523         staticpro (&coding_category_table[i]);
3524         Fput (coding_category_table[i], Qcoding_category_index,
3525               make_number (i));
3526       }
3527   }
3528
3529   defsubr (&Scoding_system_vector);
3530   defsubr (&Scoding_system_p);
3531   defsubr (&Sread_coding_system);
3532   defsubr (&Sread_non_nil_coding_system);
3533   defsubr (&Scheck_coding_system);
3534   defsubr (&Sdetect_coding_region);
3535   defsubr (&Sdecode_coding_region);
3536   defsubr (&Sencode_coding_region);
3537   defsubr (&Sdecode_coding_string);
3538   defsubr (&Sencode_coding_string);
3539   defsubr (&Sdecode_sjis_char);
3540   defsubr (&Sencode_sjis_char);
3541   defsubr (&Sdecode_big5_char);
3542   defsubr (&Sencode_big5_char);
3543   defsubr (&Sset_terminal_coding_system);
3544   defsubr (&Sterminal_coding_system);
3545   defsubr (&Sset_keyboard_coding_system);
3546   defsubr (&Skeyboard_coding_system);
3547   defsubr (&Sfind_coding_system);
3548
3549   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
3550     "List of coding-categories (symbols) ordered by priority.");
3551   {
3552     int i;
3553
3554     Vcoding_category_list = Qnil;
3555     for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
3556       Vcoding_category_list
3557         = Fcons (coding_category_table[i], Vcoding_category_list);
3558   }
3559
3560   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
3561     "A variable of internal use only.\n\
3562 If the value is a coding system, it is used for decoding on read operation.\n\
3563 If not, an appropriate element in `coding-system-alist' (which see) is used.");
3564   Vcoding_system_for_read = Qnil;
3565
3566   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
3567     "A variable of internal use only.\n\
3568 If the value is a coding system, it is used for encoding on write operation.\n\
3569 If not, an appropriate element in `coding-system-alist' (which see) is used.");
3570   Vcoding_system_for_write = Qnil;
3571
3572   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
3573     "Coding-system used in the latest file or process I/O.");
3574   Vlast_coding_system_used = Qnil;
3575
3576   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
3577     "Nested alist to decide a coding system for a specific I/O operation.\n\
3578 The format is ((OPERATION . ((REGEXP . CODING-SYSTEMS) ...)) ...).\n\
3579 \n\
3580 OPERATION is one of the following Emacs I/O primitives:\n\
3581   For file I/O, insert-file-contents and write-region.\n\
3582   For process I/O, call-process, call-process-region, and start-process.\n\
3583   For network I/O, open-network-stream.\n\
3584 In addition, for process I/O, `process-argument' can be specified for\n\
3585 encoding arguments of the process.\n\
3586 \n\
3587 REGEXP is a regular expression matching a target of OPERATION, where\n\
3588 target is a file name for file I/O operations, a process name for\n\
3589 process I/O operations, or a service name for network I/O\n\
3590 operations.  REGEXP might be a port number for network I/O operation.\n\
3591 \n\
3592 CODING-SYSTEMS is a cons of coding systems to encode and decode\n\
3593 character code on OPERATION, or a function symbol returning the cons.\n\
3594 See the documentation of `find-coding-system' for more detail.");
3595   Vcoding_system_alist = Qnil;
3596
3597   DEFVAR_INT ("eol-mnemonic-unix", &eol_mnemonic_unix,
3598     "Mnemonic character indicating UNIX-like end-of-line format (i.e. LF) .");
3599   eol_mnemonic_unix = '.';
3600
3601   DEFVAR_INT ("eol-mnemonic-dos", &eol_mnemonic_dos,
3602     "Mnemonic character indicating DOS-like end-of-line format (i.e. CRLF).");
3603   eol_mnemonic_dos = ':';
3604
3605   DEFVAR_INT ("eol-mnemonic-mac", &eol_mnemonic_mac,
3606     "Mnemonic character indicating MAC-like end-of-line format (i.e. CR).");
3607   eol_mnemonic_mac = '\'';
3608
3609   DEFVAR_INT ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
3610     "Mnemonic character indicating end-of-line format is not yet decided.");
3611   eol_mnemonic_undecided = '-';
3612
3613   DEFVAR_LISP ("alternate-charset-table", &Valternate_charset_table,
3614     "Alist of charsets vs the alternate charsets.\n\
3615 While decoding, if a charset (car part of an element) is found,\n\
3616 decode it as the alternate charset (cdr part of the element).");
3617   Valternate_charset_table = Qnil;
3618
3619   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
3620     "Alist of charsets vs revision numbers.\n\
3621 While encoding, if a charset (car part of an element) is found,\n\
3622 designate it with the escape sequence identifing revision (cdr part of the element).");
3623   Vcharset_revision_alist = Qnil;
3624 }
3625
3626 #endif /* emacs */