(Fread_coding_system): Update decl.

[bpt/emacs.git] / src / coding.c
diff --git a/src/coding.c b/src/coding.c

index 7d93362..4fae220 100644 (file)
--- a/src/coding.c
+++ b/src/coding.c
@@ -1,7 +1,6 @@
  /* Coding system handler (conversion, detection, and etc).
-   Ver.1.0.
-   Copyright (C) 1995 Free Software Foundation, Inc.
-   Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
+   Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
+   Licensed to the Free Software Foundation.
  
  This file is part of GNU Emacs.
  
@@ -23,7 +22,7 @@ Boston, MA 02111-1307, USA.  */
  /*** TABLE OF CONTENTS ***
  
    1. Preamble
-  2. Emacs' internal format handlers
+  2. Emacs' internal format (emacs-mule) handlers
    3. ISO2022 handlers
    4. Shift-JIS and BIG5 handlers
    5. End-of-line handlers
@@ -38,47 +37,53 @@ Boston, MA 02111-1307, USA.  */
    Coding system is an encoding mechanism of one or more character
    sets.  Here's a list of coding systems which Emacs can handle.  When
    we say "decode", it means converting some other coding system to
-  Emacs' internal format, and when we say "encode", it means
-  converting Emacs' internal format to some other coding system.
+  Emacs' internal format (emacs-internal), and when we say "encode",
+  it means converting the coding system emacs-mule to some other
+  coding system.
  
-  0. Emacs' internal format
+  0. Emacs' internal format (emacs-mule)
  
    Emacs itself holds a multi-lingual character in a buffer and a string
-  in a special format.  Details are described in the section 2.
+  in a special format.  Details are described in section 2.
  
    1. ISO2022
  
    The most famous coding system for multiple character sets.  X's
-  Compound Text, various EUCs (Extended Unix Code), and such coding
-  systems used in Internet communication as ISO-2022-JP are all
-  variants of ISO2022.  Details are described in the section 3.
+  Compound Text, various EUCs (Extended Unix Code), and coding
+  systems used in Internet communication such as ISO-2022-JP are
+  all variants of ISO2022.  Details are described in section 3.
  
    2. SJIS (or Shift-JIS or MS-Kanji-Code)
     
    A coding system to encode character sets: ASCII, JISX0201, and
    JISX0208.  Widely used for PC's in Japan.  Details are described in
-  the section 4.
+  section 4.
  
    3. BIG5
  
    A coding system to encode character sets: ASCII and Big5.  Widely
    used by Chinese (mainly in Taiwan and Hong Kong).  Details are
-  described in the section 4.  In this file, when written as "BIG5"
-  (all uppercase), it means the coding system, and when written as
-  "Big5" (capitalized), it means the character set.
+  described in section 4.  In this file, when we write "BIG5"
+  (all uppercase), we mean the coding system, and when we write
+  "Big5" (capitalized), we mean the character set.
  
-  4. Else
+  4. Raw text
  
-  If a user want to read/write a text encoded in a coding system not
+  A coding system to for a text containing random 8-bit code.  Emacs
+  does no code conversion on such a text except for end-of-line
+  format.
+
+  5. Other
+
+  If a user wants to read/write a text encoded in a coding system not
    listed above, he can supply a decoder and an encoder for it in CCL
    (Code Conversion Language) programs.  Emacs executes the CCL program
    while reading/writing.
  
-  Emacs represent a coding-system by a Lisp symbol that has a property
+  Emacs represents a coding-system by a Lisp symbol that has a property
    `coding-system'.  But, before actually using the coding-system, the
    information about it is set in a structure of type `struct
-  coding_system' for rapid processing.  See the section 6 for more
-  detail.
+  coding_system' for rapid processing.  See section 6 for more details.
  
  */
  
@@ -86,14 +91,13 @@ Boston, MA 02111-1307, USA.  */
  
    How end-of-line of a text is encoded depends on a system.  For
    instance, Unix's format is just one byte of `line-feed' code,
-  whereas DOS's format is two bytes sequence of `carriage-return' and
+  whereas DOS's format is two-byte sequence of `carriage-return' and
    `line-feed' codes.  MacOS's format is one byte of `carriage-return'.
  
-  Since how characters in a text is encoded and how end-of-line is
-  encoded is independent, any coding system described above can take
+  Since text characters encoding and end-of-line encoding are
+  independent, any coding system described above can take
    any format of end-of-line.  So, Emacs has information of format of
-  end-of-line in each coding-system.  See the section 6 for more
-  detail.
+  end-of-line in each coding-system.  See section 6 for more details.
  
  */
  
@@ -106,7 +110,7 @@ Boston, MA 02111-1307, USA.  */
    template of these functions.  */
  #if 0
  int
-detect_coding_internal (src, src_end)
+detect_coding_emacs_mule (src, src_end)
       unsigned char *src, *src_end;
  {
    ...
@@ -116,11 +120,11 @@ detect_coding_internal (src, src_end)
  /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
  
    These functions decode SRC_BYTES length text at SOURCE encoded in
-  CODING to Emacs' internal format.  The resulting text goes to a
-  place pointed by DESTINATION, the length of which should not exceed
-  DST_BYTES.  The bytes actually processed is returned as *CONSUMED.
-  The return value is the length of the decoded text.  Below is a
-  template of these functions.  */
+  CODING to Emacs' internal format (emacs-mule).  The resulting text
+  goes to a place pointed to by DESTINATION, the length of which should
+  not exceed DST_BYTES.  The number of bytes actually processed is
+  returned as *CONSUMED.  The return value is the length of the decoded
+  text.  Below is a template of these functions.  */
  #if 0
  decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes, consumed)
       struct coding_system *coding;
@@ -134,12 +138,12 @@ decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes, consumed)
  
  /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
  
-  These functions encode SRC_BYTES length text at SOURCE of Emacs
-  internal format to CODING.  The resulting text goes to a place
-  pointed by DESTINATION, the length of which should not exceed
-  DST_BYTES.  The bytes actually processed is returned as *CONSUMED.
-  The return value is the length of the encoded text.  Below is a
-  template of these functions.  */
+  These functions encode SRC_BYTES length text at SOURCE of Emacs'
+  internal format (emacs-mule) to CODING.  The resulting text goes to
+  a place pointed to by DESTINATION, the length of which should not
+  exceed DST_BYTES.  The number of bytes actually processed is
+  returned as *CONSUMED.  The return value is the length of the
+  encoded text.  Below is a template of these functions.  */
  #if 0
  encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes, consumed)
       struct coding_system *coding;
@@ -200,7 +204,7 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes, consumed)
        *dst++ = (c);                                            \
    } while (0)
  
-/* Decode one DIMENSION1 character of which charset is CHARSET and
+/* Decode one DIMENSION1 character whose charset is CHARSET and whose
     position-code is C.  */
  
  #define DECODE_CHARACTER_DIMENSION1(charset, c)                                \
@@ -215,7 +219,7 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes, consumed)
      *dst++ = (c) | 0x80;                                               \
    } while (0)
  
-/* Decode one DIMENSION2 character of which charset is CHARSET and
+/* Decode one DIMENSION2 character whose charset is CHARSET and whose
     position-codes are C1 and C2.  */
  
  #define DECODE_CHARACTER_DIMENSION2(charset, c1, c2)   \
@@ -248,6 +252,8 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes, consumed)
  Lisp_Object Qcoding_system, Qeol_type;
  Lisp_Object Qbuffer_file_coding_system;
  Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
+Lisp_Object Qno_conversion, Qundecided;
+Lisp_Object Qcoding_system_history;
  
  extern Lisp_Object Qinsert_file_contents, Qwrite_region;
  Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
@@ -260,9 +266,16 @@ int eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
     decided.  */
  int eol_mnemonic_undecided;
  
+/* Format of end-of-line decided by system.  This is CODING_EOL_LF on
+   Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.  */
+int system_eol_type;
+
  #ifdef emacs
  
-Lisp_Object Qcoding_system_vector, Qcoding_system_p, Qcoding_system_error;
+Lisp_Object Qcoding_system_spec, Qcoding_system_p, Qcoding_system_error;
+
+/* Coding system emacs-mule is for converting only end-of-line format.  */
+Lisp_Object Qemacs_mule;
  
  /* Coding-systems are handed between Emacs Lisp programs and C internal
     routines by the following three variables.  */
@@ -273,13 +286,26 @@ Lisp_Object Vcoding_system_for_write;
  /* Coding-system actually used in the latest I/O.  */
  Lisp_Object Vlast_coding_system_used;
  
-/* Coding-system of what terminal accept for displaying.  */
+/* A vector of length 256 which contains information about special
+   Latin codes (espepcially for dealing with Microsoft code).  */
+Lisp_Object Vlatin_extra_code_table;
+
+/* Flag to inhibit code conversion of end-of-line format.  */
+int inhibit_eol_conversion;
+
+/* Coding system to be used to encode text for terminal display.  */
  struct coding_system terminal_coding;
  
-/* Coding-system of what is sent from terminal keyboard.  */
+/* Coding system to be used to encode text for terminal display when
+   terminal coding system is nil.  */
+struct coding_system safe_terminal_coding;
+
+/* Coding system of what is sent from terminal keyboard.  */
  struct coding_system keyboard_coding;
  
-Lisp_Object Vcoding_system_alist;
+Lisp_Object Vfile_coding_system_alist;
+Lisp_Object Vprocess_coding_system_alist;
+Lisp_Object Vnetwork_coding_system_alist;
  
  #endif /* emacs */
  
@@ -293,45 +319,59 @@ Lisp_Object coding_category_table[CODING_CATEGORY_IDX_MAX];
  
  /* Table of names of symbol for each coding-category.  */
  char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
-  "coding-category-internal",
+  "coding-category-emacs-mule",
    "coding-category-sjis",
    "coding-category-iso-7",
    "coding-category-iso-8-1",
    "coding-category-iso-8-2",
-  "coding-category-iso-else",
+  "coding-category-iso-7-else",
+  "coding-category-iso-8-else",
    "coding-category-big5",
+  "coding-category-raw-text",
    "coding-category-binary"
  };
  
-/* Alist of charsets vs the alternate charsets.  */
-Lisp_Object Valternate_charset_table;
+/* Flag to tell if we look up unification table on character code
+   conversion.  */
+Lisp_Object Venable_character_unification;
+/* Standard unification table to look up on decoding (reading).  */
+Lisp_Object Vstandard_character_unification_table_for_decode;
+/* Standard unification table to look up on encoding (writing).  */
+Lisp_Object Vstandard_character_unification_table_for_encode;
+
+Lisp_Object Qcharacter_unification_table;
+Lisp_Object Qcharacter_unification_table_for_decode;
+Lisp_Object Qcharacter_unification_table_for_encode;
  
  /* Alist of charsets vs revision number.  */
  Lisp_Object Vcharset_revision_alist;
  
+/* Default coding systems used for process I/O.  */
+Lisp_Object Vdefault_process_coding_system;
+
  \f
-/*** 2. Emacs internal format handlers ***/
+/*** 2. Emacs internal format (emacs-mule) handlers ***/
  
  /* Emacs' internal format for encoding multiple character sets is a
-   kind of multi-byte encoding, i.e. encoding a character by a sequence
-   of one-byte codes of variable length.  ASCII characters and control
-   characters (e.g. `tab', `newline') are represented by one-byte as
-   is.  It takes the range 0x00 through 0x7F.  The other characters
-   are represented by a sequence of `base leading-code', optional
-   `extended leading-code', and one or two `position-code's.  Length
-   of the sequence is decided by the base leading-code.  Leading-code
-   takes the range 0x80 through 0x9F, whereas extended leading-code
-   and position-code take the range 0xA0 through 0xFF.  See the
-   document of `charset.h' for more detail about leading-code and
-   position-code.
-
-   There's one exception in this rule.  Special leading-code
+   kind of multi-byte encoding, i.e. characters are encoded by
+   variable-length sequences of one-byte codes.  ASCII characters
+   and control characters (e.g. `tab', `newline') are represented by
+   one-byte sequences which are their ASCII codes, in the range 0x00
+   through 0x7F.  The other characters are represented by a sequence
+   of `base leading-code', optional `extended leading-code', and one
+   or two `position-code's.  The length of the sequence is determined
+   by the base leading-code.  Leading-code takes the range 0x80
+   through 0x9F, whereas extended leading-code and position-code take
+   the range 0xA0 through 0xFF.  See `charset.h' for more details
+   about leading-code and position-code.
+
+   There's one exception to this rule.  Special leading-code
     `leading-code-composition' denotes that the following several
     characters should be composed into one character.  Leading-codes of
     components (except for ASCII) are added 0x20.  An ASCII character
     component is represented by a 2-byte sequence of `0xA0' and
-   `ASCII-code + 0x80'.  See also the document in `charset.h' for the
-   detail of composite character.  Hence, we can summarize the code
+   `ASCII-code + 0x80'.  See also the comments in `charset.h' for the
+   details of composite character.  Hence, we can summarize the code
     range as follows:
  
     --- CODE RANGE of Emacs' internal format ---
@@ -357,10 +397,10 @@ enum emacs_code_class_type emacs_code_class[256];
  
  /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
     Check if a text is encoded in Emacs' internal format.  If it is,
-   return CODING_CATEGORY_MASK_INTERNAL, else return 0.  */
+   return CODING_CATEGORY_MASK_EMASC_MULE, else return 0.  */
  
  int
-detect_coding_internal (src, src_end)
+detect_coding_emacs_mule (src, src_end)
       unsigned char *src, *src_end;
  {
    unsigned char c;
@@ -416,28 +456,28 @@ detect_coding_internal (src, src_end)
           break;
         }
      }
-  return CODING_CATEGORY_MASK_INTERNAL;
+  return CODING_CATEGORY_MASK_EMACS_MULE;
  }
  
  \f
  /*** 3. ISO2022 handlers ***/
  
  /* The following note describes the coding system ISO2022 briefly.
-   Since the intension of this note is to help understanding of the
-   programs in this file, some parts are NOT ACCURATE or OVERLY
+   Since the intention of this note is to help in understanding of
+   the programs in this file, some parts are NOT ACCURATE or OVERLY
     SIMPLIFIED.  For the thorough understanding, please refer to the
     original document of ISO2022.
  
     ISO2022 provides many mechanisms to encode several character sets
-   in 7-bit and 8-bit environment.  If one choose 7-bite environment,
+   in 7-bit and 8-bit environment.  If one chooses 7-bite environment,
     all text is encoded by codes of less than 128.  This may make the
-   encoded text a little bit longer, but the text get more stability
-   to pass through several gateways (some of them split MSB off).
+   encoded text a little bit longer, but the text gets more stability
+   to pass through several gateways (some of them strip off the MSB).
  
-   There are two kind of character set: control character set and
+   There are two kinds of character set: control character set and
     graphic character set.  The former contains control characters such
     as `newline' and `escape' to provide control functions (control
-   functions are provided also by escape sequence).  The latter
+   functions are provided also by escape sequences).  The latter
     contains graphic characters such as ' A' and '-'.  Emacs recognizes
     two control character sets and many graphic character sets.
  
@@ -498,7 +538,7 @@ detect_coding_internal (src, src_end)
     function            control char    escape sequence description
     ----------------------------------------------------------------------
     SI  (shift-in)              0x0F    none            invoke G0 to GL
-   SI  (shift-out)             0x0E    none            invoke G1 to GL
+   SO  (shift-out)             0x0E    none            invoke G1 to GL
     LS2 (locking-shift-2)       none    ESC 'n'         invoke G2 into GL
     LS3 (locking-shift-3)       none    ESC 'o'         invoke G3 into GL
     SS2 (single-shift-2)                0x8E    ESC 'N'         invoke G2 into GL
@@ -541,7 +581,7 @@ detect_coding_internal (src, src_end)
     '(' can be omitted.  We call this as "short-form" here after.
  
     Now you may notice that there are a lot of ways for encoding the
-   same multilingual text in ISO2022.  Actually, there exist many
+   same multilingual text in ISO2022.  Actually, there exists many
     coding systems such as Compound Text (used in X's inter client
     communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
     (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
@@ -573,7 +613,8 @@ enum iso_code_class_type iso_code_class[256];
         CODING_CATEGORY_MASK_ISO_7
         CODING_CATEGORY_MASK_ISO_8_1
         CODING_CATEGORY_MASK_ISO_8_2
-       CODING_CATEGORY_MASK_ISO_ELSE
+       CODING_CATEGORY_MASK_ISO_7_ELSE
+       CODING_CATEGORY_MASK_ISO_8_ELSE
     are set.  If a code which should never appear in ISO2022 is found,
     returns 0.  */
  
@@ -581,64 +622,128 @@ int
  detect_coding_iso2022 (src, src_end)
       unsigned char *src, *src_end;
  {
-  unsigned char graphic_register[4];
-  unsigned char c, esc_cntl;
    int mask = (CODING_CATEGORY_MASK_ISO_7
               | CODING_CATEGORY_MASK_ISO_8_1
-             | CODING_CATEGORY_MASK_ISO_8_2);
-  /* We may look ahead maximum 3 bytes.  */
-  unsigned char *adjusted_src_end = src_end - 3;
-  int i;
-
-  for (i = 0; i < 4; i++)
-    graphic_register[i] = CHARSET_ASCII;
-
-  while (src < adjusted_src_end)
+             | CODING_CATEGORY_MASK_ISO_8_2
+             | CODING_CATEGORY_MASK_ISO_7_ELSE
+             | CODING_CATEGORY_MASK_ISO_8_ELSE
+             );
+  int g1 = 0;                  /* 1 iff designating to G1.  */
+  int c, i;
+  struct coding_system coding_iso_8_1, coding_iso_8_2;
+
+  /* Coding systems of these categories may accept latin extra codes.  */
+  setup_coding_system
+    (XSYMBOL (coding_category_table[CODING_CATEGORY_IDX_ISO_8_1])->value,
+     &coding_iso_8_1);
+  setup_coding_system
+    (XSYMBOL (coding_category_table[CODING_CATEGORY_IDX_ISO_8_2])->value,
+     &coding_iso_8_2);
+
+  while (mask && src < src_end)
      {
        c = *src++;
        switch (c)
         {
         case ISO_CODE_ESC:
-         if (src >= adjusted_src_end)
+         if (src >= src_end)
             break;
           c = *src++;
-         if (c == '$')
+         if ((c >= '(' && c <= '/'))
+           {
+             /* Designation sequence for a charset of dimension 1.  */
+             if (src >= src_end)
+               break;
+             c = *src++;
+             if (c < ' ' || c >= 0x80)
+               /* Invalid designation sequence.  */
+               return 0;
+           }
+         else if (c == '$')
             {
-             /* Designation of 2-byte character set.  */
-             if (src >= adjusted_src_end)
+             /* Designation sequence for a charset of dimension 2.  */
+             if (src >= src_end)
                 break;
               c = *src++;
+             if (c >= '@' && c <= 'B')
+               /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
+               ;
+             else if (c >= '(' && c <= '/')
+               {
+                 if (src >= src_end)
+                   break;
+                 c = *src++;
+                 if (c < ' ' || c >= 0x80)
+                   /* Invalid designation sequence.  */
+                   return 0;
+               }
+             else
+               /* Invalid designation sequence.  */
+               return 0;
             }
-         if ((c >= ')' && c <= '+') || (c >= '-' && c <= '/'))
-           /* Designation to graphic register 1, 2, or 3.  */
-           mask &= ~CODING_CATEGORY_MASK_ISO_7;
           else if (c == 'N' || c == 'O' || c == 'n' || c == 'o')
-           return CODING_CATEGORY_MASK_ISO_ELSE;
+           /* Locking shift.  */
+           mask &= (CODING_CATEGORY_MASK_ISO_7_ELSE
+                    | CODING_CATEGORY_MASK_ISO_8_ELSE);
+         else if (c == '0' || c == '1' || c == '2')
+           /* Start/end composition.  */
+           ;
+         else
+           /* Invalid escape sequence.  */
+           return 0;
           break;
  
-       case ISO_CODE_SI:
         case ISO_CODE_SO:
-         return CODING_CATEGORY_MASK_ISO_ELSE;
-
+         mask &= (CODING_CATEGORY_MASK_ISO_7_ELSE
+                  | CODING_CATEGORY_MASK_ISO_8_ELSE);
+         break;
+         
         case ISO_CODE_CSI:
         case ISO_CODE_SS2:
         case ISO_CODE_SS3:
-         mask &= ~CODING_CATEGORY_MASK_ISO_7;
+         {
+           int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
+
+           if (VECTORP (Vlatin_extra_code_table)
+               && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
+             {
+               if (coding_iso_8_1.flags & CODING_FLAG_ISO_LATIN_EXTRA)
+                 newmask |= CODING_CATEGORY_MASK_ISO_8_1;
+               if (coding_iso_8_2.flags & CODING_FLAG_ISO_LATIN_EXTRA)
+                 newmask |= CODING_CATEGORY_MASK_ISO_8_2;
+             }
+           mask &= newmask;
+         }
           break;
  
         default:
           if (c < 0x80)
             break;
           else if (c < 0xA0)
-           return 0;
+           {
+             if (VECTORP (Vlatin_extra_code_table)
+                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
+               {
+                 int newmask = 0;
+
+                 if (coding_iso_8_1.flags & CODING_FLAG_ISO_LATIN_EXTRA)
+                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
+                 if (coding_iso_8_2.flags & CODING_FLAG_ISO_LATIN_EXTRA)
+                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
+                 mask &= newmask;
+               }
+             else
+               return 0;
+           }
           else
             {
-             int count = 1;
+             unsigned char *src_begin = src;
  
-             mask &= ~CODING_CATEGORY_MASK_ISO_7;
-             while (src < adjusted_src_end && *src >= 0xA0)
-               count++, src++;
-             if (count & 1 && src < adjusted_src_end)
+             mask &= ~(CODING_CATEGORY_MASK_ISO_7
+                       | CODING_CATEGORY_MASK_ISO_7_ELSE);
+             while (src < src_end && *src >= 0xA0)
+               src++;
+             if ((src - src_begin - 1) & 1 && src < src_end)
                 mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
             }
           break;
@@ -649,44 +754,48 @@ detect_coding_iso2022 (src, src_end)
  }
  
  /* Decode a character of which charset is CHARSET and the 1st position
-   code is C1.  If dimension of CHARSET 2, the 2nd position code is
+   code is C1.  If dimension of CHARSET is 2, the 2nd position code is
     fetched from SRC and set to C2.  If CHARSET is negative, it means
     that we are decoding ill formed text, and what we can do is just to
     read C1 as is.  */
  
-#define DECODE_ISO_CHARACTER(charset, c1)                      \
-  do {                                                         \
-    if ((charset) >= 0 && CHARSET_DIMENSION (charset) == 2)    \
-      ONE_MORE_BYTE (c2);                                      \
-    if (COMPOSING_HEAD_P (coding->composing))                  \
-      {                                                                \
-       *dst++ = LEADING_CODE_COMPOSITION;                      \
-       if (COMPOSING_WITH_RULE_P (coding->composing))          \
-         /* To tell composition rules are embeded.  */         \
-         *dst++ = 0xFF;                                        \
-       coding->composing += 2;                                 \
-      }                                                                \
-    if ((charset) < 0)                                         \
-      *dst++ = c1;                                             \
-    else if ((charset) == CHARSET_ASCII)                       \
-      DECODE_CHARACTER_ASCII (c1);                             \
-    else if (CHARSET_DIMENSION (charset) == 1)                 \
-      DECODE_CHARACTER_DIMENSION1 (charset, c1);               \
-    else                                                       \
-      DECODE_CHARACTER_DIMENSION2 (charset, c1, c2);           \
-    if (COMPOSING_WITH_RULE_P (coding->composing))             \
-      /* To tell a composition rule follows.  */               \
-      coding->composing = COMPOSING_WITH_RULE_RULE;            \
+#define DECODE_ISO_CHARACTER(charset, c1)                              \
+  do {                                                                 \
+    int c_alt, charset_alt = (charset);                                        \
+    if (COMPOSING_HEAD_P (coding->composing))                          \
+      {                                                                        \
+       *dst++ = LEADING_CODE_COMPOSITION;                              \
+       if (COMPOSING_WITH_RULE_P (coding->composing))                  \
+         /* To tell composition rules are embeded.  */                 \
+         *dst++ = 0xFF;                                                \
+       coding->composing += 2;                                         \
+      }                                                                        \
+    if ((charset) >= 0)                                                        \
+      {                                                                        \
+       if (CHARSET_DIMENSION (charset) == 2)                           \
+         ONE_MORE_BYTE (c2);                                           \
+       if (!NILP (unification_table)                                   \
+           && ((c_alt = unify_char (unification_table,                 \
+                                    -1, (charset), c1, c2)) >= 0))     \
+         SPLIT_CHAR (c_alt, charset_alt, c1, c2);                      \
+      }                                                                        \
+    if (charset_alt == CHARSET_ASCII || charset_alt < 0)               \
+      DECODE_CHARACTER_ASCII (c1);                                     \
+    else if (CHARSET_DIMENSION (charset_alt) == 1)                     \
+      DECODE_CHARACTER_DIMENSION1 (charset_alt, c1);                   \
+    else                                                               \
+      DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2);               \
+    if (COMPOSING_WITH_RULE_P (coding->composing))                     \
+      /* To tell a composition rule follows.  */                       \
+      coding->composing = COMPOSING_WITH_RULE_RULE;                    \
    } while (0)
  
  /* Set designation state into CODING.  */
  #define DECODE_DESIGNATION(reg, dimension, chars, final_char)          \
    do {                                                                 \
-    int charset = ISO_CHARSET_TABLE (dimension, chars, final_char);    \
-    Lisp_Object temp                                                   \
-      = Fassq (CHARSET_SYMBOL (charset), Valternate_charset_table);    \
-    if (! NILP (temp))                                                 \
-      charset = get_charset_id (XCONS (temp)->cdr);                    \
+    int charset = ISO_CHARSET_TABLE (make_number (dimension),          \
+                                    make_number (chars),               \
+                                    make_number (final_char));         \
      if (charset >= 0)                                                  \
        {                                                                        \
          if (coding->direction == 1                                     \
@@ -718,6 +827,11 @@ decode_coding_iso2022 (coding, source, destination,
    /* Charsets invoked to graphic plane 0 and 1 respectively.  */
    int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
    int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
+  Lisp_Object unification_table
+      = coding->character_unification_table_for_decode;
+
+  if (!NILP (Venable_character_unification) && NILP (unification_table))
+    unification_table = Vstandard_character_unification_table_for_decode;
  
    while (src < src_end && dst < adjusted_dst_end)
      {
@@ -727,7 +841,7 @@ decode_coding_iso2022 (coding, source, destination,
          ONE_MORE_BYTE or TWO_MORE_BYTES).  In that case, SRC is reset
          to SRC_BASE before exiting.  */
        unsigned char *src_base = src;
-      unsigned char c1 = *src++, c2, cmprule;
+      int c1 = *src++, c2;
  
        switch (iso_code_class [c1])
         {
@@ -794,6 +908,8 @@ decode_coding_iso2022 (coding, source, destination,
           break;
  
         case ISO_shift_out:
+         if (CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
+           goto label_invalid_escape_sequence;
           CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
           break;
@@ -830,14 +946,10 @@ decode_coding_iso2022 (coding, source, destination,
             case '&':           /* revision of following character set */
               ONE_MORE_BYTE (c1);
               if (!(c1 >= '@' && c1 <= '~'))
-               {
-                 goto label_invalid_escape_sequence;
-               }
+               goto label_invalid_escape_sequence;
               ONE_MORE_BYTE (c1);
               if (c1 != ISO_CODE_ESC)
-               {
-                 goto label_invalid_escape_sequence;
-               }
+               goto label_invalid_escape_sequence;
               ONE_MORE_BYTE (c1);
               goto label_escape_sequence;
  
@@ -859,26 +971,34 @@ decode_coding_iso2022 (coding, source, destination,
                   DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
                 }
               else
-               {
-                 goto label_invalid_escape_sequence;
-               }
+               goto label_invalid_escape_sequence;
               break;
  
             case 'n':           /* invocation of locking-shift-2 */
+             if (CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
+               goto label_invalid_escape_sequence;
               CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
+             charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
               break;
  
             case 'o':           /* invocation of locking-shift-3 */
+             if (CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
+               goto label_invalid_escape_sequence;
               CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
+             charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
               break;
  
             case 'N':           /* invocation of single-shift-2 */
+             if (CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
+               goto label_invalid_escape_sequence;
               ONE_MORE_BYTE (c1);
               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
               DECODE_ISO_CHARACTER (charset, c1);
               break;
  
             case 'O':           /* invocation of single-shift-3 */
+             if (CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
+               goto label_invalid_escape_sequence;
               ONE_MORE_BYTE (c1);
               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
               DECODE_ISO_CHARACTER (charset, c1);
@@ -979,10 +1099,10 @@ decode_coding_iso2022 (coding, source, destination,
    return dst - destination;
  }
  
-/* ISO2022 encoding staffs.  */
+/* ISO2022 encoding stuff.  */
  
  /*
-   It is not enough to say just "ISO2022" on encoding, but we have to
+   It is not enough to say just "ISO2022" on encoding, we have to
     specify more details.  In Emacs, each coding-system of ISO2022
     variant has the following specifications:
         1. Initial designation to G0 thru G3.
@@ -997,7 +1117,7 @@ decode_coding_iso2022 (coding, source, destination,
         9. Use JISX0208-1983 in place of JISX0208-1978?
     These specifications are encoded in `coding->flags' as flag bits
     defined by macros CODING_FLAG_ISO_XXX.  See `coding.h' for more
-   detail.
+   details.
  */
  
  /* Produce codes (escape sequence) for designating CHARSET to graphic
@@ -1093,8 +1213,8 @@ decode_coding_iso2022 (coding, source, destination,
      CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;        \
    } while (0)
  
-/* Produce codes for a DIMENSION1 character of which character set is
-   CHARSET and position-code is C1.  Designation and invocation
+/* Produce codes for a DIMENSION1 character whose character set is
+   CHARSET and whose position-code is C1.  Designation and invocation
     sequences are also produced in advance if necessary.  */
  
  
@@ -1119,6 +1239,16 @@ decode_coding_iso2022 (coding, source, destination,
         *dst++ = c1 | 0x80;                                             \
         break;                                                          \
        }                                                                        \
+    else if (coding->flags & CODING_FLAG_ISO_SAFE                      \
+            && !CODING_SPEC_ISO_EXPECTED_CHARSETS (coding)[charset])   \
+      {                                                                        \
+       /* We should not encode this character, instead produce one or  \
+          two `?'s.  */                                                \
+       *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;                 \
+       if (CHARSET_WIDTH (charset) == 2)                               \
+         *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;               \
+       break;                                                          \
+      }                                                                        \
      else                                                               \
        /* Since CHARSET is not yet invoked to any graphic planes, we    \
          must invoke it, or, at first, designate it to some graphic     \
@@ -1127,8 +1257,8 @@ decode_coding_iso2022 (coding, source, destination,
        dst = encode_invocation_designation (charset, coding, dst);      \
    } while (1)
  
-/* Produce codes for a DIMENSION2 character of which character set is
-   CHARSET and position-codes are C1 and C2.  Designation and
+/* Produce codes for a DIMENSION2 character whose character set is
+   CHARSET and whose position-codes are C1 and C2.  Designation and
     invocation codes are also produced in advance if necessary.  */
  
  #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)               \
@@ -1152,6 +1282,16 @@ decode_coding_iso2022 (coding, source, destination,
         *dst++ = c1 | 0x80, *dst++= c2 | 0x80;                          \
         break;                                                          \
        }                                                                        \
+    else if (coding->flags & CODING_FLAG_ISO_SAFE                      \
+            && !CODING_SPEC_ISO_EXPECTED_CHARSETS (coding)[charset])   \
+      {                                                                        \
+       /* We should not encode this character, instead produce one or  \
+          two `?'s.  */                                                \
+       *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;                 \
+       if (CHARSET_WIDTH (charset) == 2)                               \
+         *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;               \
+       break;                                                          \
+      }                                                                        \
      else                                                               \
        /* Since CHARSET is not yet invoked to any graphic planes, we    \
          must invoke it, or, at first, designate it to some graphic     \
@@ -1160,6 +1300,21 @@ decode_coding_iso2022 (coding, source, destination,
        dst = encode_invocation_designation (charset, coding, dst);      \
    } while (1)
  
+#define ENCODE_ISO_CHARACTER(charset, c1, c2)                            \
+  do {                                                                   \
+    int c_alt, charset_alt;                                              \
+    if (!NILP (unification_table)                                        \
+       && ((c_alt = unify_char (unification_table, -1, charset, c1, c2)) \
+           >= 0))                                                        \
+      SPLIT_CHAR (c_alt, charset_alt, c1, c2);                           \
+    else                                                                 \
+      charset_alt = charset;                                             \
+    if (CHARSET_DIMENSION (charset_alt) == 1)                            \
+      ENCODE_ISO_CHARACTER_DIMENSION1 (charset_alt, c1);                 \
+    else                                                                 \
+      ENCODE_ISO_CHARACTER_DIMENSION2 (charset_alt, c1, c2);             \
+  } while (0)
+
  /* Produce designation and invocation codes at a place pointed by DST
     to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
     Return new DST.  */
@@ -1182,9 +1337,9 @@ encode_invocation_designation (charset, coding, dst)
        /* CHARSET is not yet designated to any graphic registers.  */
        /* At first check the requested designation.  */
        reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
-      if (reg < 0)
-       /* Since CHARSET requests no special designation, designate to
-          graphic register 0.  */
+      if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
+       /* Since CHARSET requests no special designation, designate it
+          to graphic register 0.  */
         reg = 0;
  
        ENCODE_DESIGNATION (charset, reg, coding);
@@ -1246,24 +1401,72 @@ encode_invocation_designation (charset, coding, dst)
  
  /* Produce codes for designation and invocation to reset the graphic
     planes and registers to initial state.  */
-#define ENCODE_RESET_PLANE_AND_REGISTER(eol)                                 \
-  do {                                                                       \
-    int reg;                                                                 \
-    if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                         \
-      ENCODE_SHIFT_IN;                                                       \
-    for (reg = 0; reg < 4; reg++)                                            \
-      {                                                                              \
-       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) < 0)            \
-         {                                                                   \
-           if (eol) CODING_SPEC_ISO_DESIGNATION (coding, reg) = -1;          \
-         }                                                                   \
-       else if (CODING_SPEC_ISO_DESIGNATION (coding, reg)                    \
-                != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg))        \
-         ENCODE_DESIGNATION                                                  \
-           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
-      }                                                                              \
+#define ENCODE_RESET_PLANE_AND_REGISTER                                            \
+  do {                                                                     \
+    int reg;                                                               \
+    if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                       \
+      ENCODE_SHIFT_IN;                                                     \
+    for (reg = 0; reg < 4; reg++)                                          \
+      if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0           \
+         && (CODING_SPEC_ISO_DESIGNATION (coding, reg)                     \
+             != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)))        \
+       ENCODE_DESIGNATION                                                  \
+         (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
    } while (0)
  
+/* Produce designation sequences of charsets in the line started from
+   *SRC to a place pointed by DSTP.
+
+   If the current block ends before any end-of-line, we may fail to
+   find all the necessary *designations.  */
+encode_designation_at_bol (coding, table, src, src_end, dstp)
+     struct coding_system *coding;
+     Lisp_Object table;
+     unsigned char *src, *src_end, **dstp;
+{
+  int charset, c, found = 0, reg;
+  /* Table of charsets to be designated to each graphic register.  */
+  int r[4];
+  unsigned char *dst = *dstp;
+
+  for (reg = 0; reg < 4; reg++)
+    r[reg] = -1;
+
+  while (src < src_end && *src != '\n' && found < 4)
+    {
+      int bytes = BYTES_BY_CHAR_HEAD (*src);
+      
+      if (NILP (table))
+       charset = CHARSET_AT (src);
+      else
+       {
+         int c_alt, c1, c2;
+
+         SPLIT_STRING(src, bytes, charset, c1, c2);
+         if ((c_alt = unify_char (table, -1, charset, c1, c2)) >= 0)
+           charset = CHAR_CHARSET (c_alt);
+       }
+
+      reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
+      if (r[reg] == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
+       {
+         found++;
+         r[reg] = charset;
+       }
+
+      src += bytes;
+    }
+
+  if (found)
+    {
+      for (reg = 0; reg < 4; reg++)
+       if (r[reg] >= 0
+           && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
+         ENCODE_DESIGNATION (r[reg], reg, coding);
+      *dstp = dst;
+    }
+}
+
  /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
  
  int
@@ -1278,10 +1481,15 @@ encode_coding_iso2022 (coding, source, destination,
    unsigned char *src_end = source + src_bytes;
    unsigned char *dst = destination;
    unsigned char *dst_end = destination + dst_bytes;
-  /* Since the maximum bytes produced by each loop is 6, we subtract 5
+  /* Since the maximum bytes produced by each loop is 20, we subtract 19
       from DST_END to assure overflow checking is necessary only at the
       head of loop.  */
-  unsigned char *adjusted_dst_end = dst_end - 5;
+  unsigned char *adjusted_dst_end = dst_end - 19;
+  Lisp_Object unification_table
+      = coding->character_unification_table_for_encode;
+
+  if (!NILP (Venable_character_unification) && NILP (unification_table))
+    unification_table = Vstandard_character_unification_table_for_encode;
  
    while (src < src_end && dst < adjusted_dst_end)
      {
@@ -1291,9 +1499,18 @@ encode_coding_iso2022 (coding, source, destination,
          TWO_MORE_BYTES, and THREE_MORE_BYTES).  In that case, SRC is
          reset to SRC_BASE before exiting.  */
        unsigned char *src_base = src;
-      unsigned char c1 = *src++, c2, c3, c4;
-      int charset;
+      int charset, c1, c2, c3, c4;
+
+      if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
+         && CODING_SPEC_ISO_BOL (coding))
+       {
+         /* We have to produce designation sequences if any now.  */
+         encode_designation_at_bol (coding, unification_table,
+                                    src, src_end, &dst);
+         CODING_SPEC_ISO_BOL (coding) = 0;
+       }
  
+      c1 = *src++;
        /* If we are seeing a component of a composite character, we are
          seeing a leading-code specially encoded for composition, or a
          composition rule if composing with rule.  We must set C1
@@ -1334,12 +1551,12 @@ encode_coding_iso2022 (coding, source, destination,
        switch (emacs_code_class[c1])
         {
         case EMACS_ascii_code:
-         ENCODE_ISO_CHARACTER_DIMENSION1 (CHARSET_ASCII, c1);
+         ENCODE_ISO_CHARACTER (CHARSET_ASCII, c1, /* dummy */ c2);
           break;
  
         case EMACS_control_code:
           if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
-           ENCODE_RESET_PLANE_AND_REGISTER (0);
+           ENCODE_RESET_PLANE_AND_REGISTER;
           *dst++ = c1;
           break;
  
@@ -1347,7 +1564,7 @@ encode_coding_iso2022 (coding, source, destination,
           if (!coding->selective)
             {
               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
-               ENCODE_RESET_PLANE_AND_REGISTER (0);
+               ENCODE_RESET_PLANE_AND_REGISTER;
               *dst++ = c1;
               break;
             }
@@ -1355,37 +1572,71 @@ encode_coding_iso2022 (coding, source, destination,
  
         case EMACS_linefeed_code:
           if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
-           ENCODE_RESET_PLANE_AND_REGISTER (1);
+           ENCODE_RESET_PLANE_AND_REGISTER;
+         if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
+           bcopy (coding->spec.iso2022.initial_designation,
+                  coding->spec.iso2022.current_designation,
+                  sizeof coding->spec.iso2022.initial_designation);
           if (coding->eol_type == CODING_EOL_LF
-             || coding->eol_type == CODING_EOL_AUTOMATIC)
+             || coding->eol_type == CODING_EOL_UNDECIDED)
             *dst++ = ISO_CODE_LF;
           else if (coding->eol_type == CODING_EOL_CRLF)
             *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
           else
             *dst++ = ISO_CODE_CR;
+         CODING_SPEC_ISO_BOL (coding) = 1;
           break;
  
         case EMACS_leading_code_2:
           ONE_MORE_BYTE (c2);
-         ENCODE_ISO_CHARACTER_DIMENSION1 (c1, c2);
+         if (c2 < 0xA0)
+           {
+             /* invalid sequence */
+             *dst++ = c1;
+             *dst++ = c2;
+           }
+         else
+           ENCODE_ISO_CHARACTER (c1, c2, /* dummy */ c3);
           break;
  
         case EMACS_leading_code_3:
           TWO_MORE_BYTES (c2, c3);
-         if (c1 < LEADING_CODE_PRIVATE_11)
-           ENCODE_ISO_CHARACTER_DIMENSION2 (c1, c2, c3);
+         if (c2 < 0xA0 || c3 < 0xA0)
+           {
+             /* invalid sequence */
+             *dst++ = c1;
+             *dst++ = c2;
+             *dst++ = c3;
+           }
+         else if (c1 < LEADING_CODE_PRIVATE_11)
+           ENCODE_ISO_CHARACTER (c1, c2, c3);
           else
-           ENCODE_ISO_CHARACTER_DIMENSION1 (c2, c3);
+           ENCODE_ISO_CHARACTER (c2, c3, /* dummy */ c4);
           break;
  
         case EMACS_leading_code_4:
           THREE_MORE_BYTES (c2, c3, c4);
-         ENCODE_ISO_CHARACTER_DIMENSION2 (c2, c3, c4);
+         if (c2 < 0xA0 || c3 < 0xA0 || c4 < 0xA0)
+           {
+             /* invalid sequence */
+             *dst++ = c1;
+             *dst++ = c2;
+             *dst++ = c3;
+             *dst++ = c4;
+           }
+         else
+           ENCODE_ISO_CHARACTER (c2, c3, c4);
           break;
  
         case EMACS_leading_code_composition:
-         ONE_MORE_BYTE (c1);
-         if (c1 == 0xFF)
+         ONE_MORE_BYTE (c2);
+         if (c2 < 0xA0)
+           {
+             /* invalid sequence */
+             *dst++ = c1;
+             *dst++ = c2;
+           }
+         else if (c2 == 0xFF)
             {
               coding->composing = COMPOSING_WITH_RULE_HEAD;
               ENCODE_COMPOSITION_WITH_RULE_START;
@@ -1406,22 +1657,26 @@ encode_coding_iso2022 (coding, source, destination,
         }
        continue;
      label_end_of_loop:
-      coding->carryover_size = src - src_base;
+      /* We reach here because the source date ends not at character
+        boundary.  */
+      coding->carryover_size = src_end - src_base;
        bcopy (src_base, coding->carryover, coding->carryover_size);
-      src = src_base;
+      src = src_end;
        break;
      }
  
    /* If this is the last block of the text to be encoded, we must
-     reset the state of graphic planes and registers to initial one.
-     In addition, we had better just flush out all remaining codes in
-     the text although they are not valid characters.  */
-  if (coding->last_block)
+     reset graphic planes and registers to the initial state.  */
+  if (src >= src_end && coding->last_block)
      {
-      ENCODE_RESET_PLANE_AND_REGISTER (1);
-      bcopy(src, dst, src_end - src);
-      dst += (src_end - src);
-      src = src_end;
+      ENCODE_RESET_PLANE_AND_REGISTER;
+      if (coding->carryover_size > 0
+         && coding->carryover_size < (dst_end - dst))
+       {
+         bcopy (coding->carryover, dst, coding->carryover_size);
+         dst += coding->carryover_size;
+         coding->carryover_size = 0;
+       }
      }
    *consumed = src - source;
    return dst - destination;
@@ -1430,7 +1685,7 @@ encode_coding_iso2022 (coding, source, destination,
  \f
  /*** 4. SJIS and BIG5 handlers ***/
  
-/* Although SJIS and BIG5 are not ISO's coding system, They are used
+/* Although SJIS and BIG5 are not ISO's coding system, they are used
     quite widely.  So, for the moment, Emacs supports them in the bare
     C code.  But, in the future, they may be supported only by CCL.  */
  
@@ -1502,6 +1757,63 @@ encode_coding_iso2022 (coding, source, destination,
      b2 += b2 < 0x3F ? 0x40 : 0x62;                                     \
    } while (0)
  
+#define DECODE_SJIS_BIG5_CHARACTER(charset, c1, c2)                    \
+  do {                                                                 \
+    int c_alt, charset_alt = (charset);                                        \
+    if (!NILP (unification_table)                                      \
+       && ((c_alt = unify_char (unification_table,                     \
+                                -1, (charset), c1, c2)) >= 0))         \
+         SPLIT_CHAR (c_alt, charset_alt, c1, c2);                      \
+    if (charset_alt == CHARSET_ASCII || charset_alt < 0)               \
+      DECODE_CHARACTER_ASCII (c1);                                     \
+    else if (CHARSET_DIMENSION (charset_alt) == 1)                     \
+      DECODE_CHARACTER_DIMENSION1 (charset_alt, c1);                   \
+    else                                                               \
+      DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2);               \
+  } while (0)
+
+#define ENCODE_SJIS_BIG5_CHARACTER(charset, c1, c2)                      \
+  do {                                                                   \
+    int c_alt, charset_alt;                                              \
+    if (!NILP (unification_table)                                        \
+        && ((c_alt = unify_char (unification_table, -1, charset, c1, c2)) \
+           >= 0))                                                        \
+      SPLIT_CHAR (c_alt, charset_alt, c1, c2);                           \
+    else                                                                 \
+      charset_alt = charset;                                             \
+    if (charset_alt == charset_ascii)                                    \
+      *dst++ = c1;                                                       \
+    else if (CHARSET_DIMENSION (charset_alt) == 1)                       \
+      {                                                                          \
+       if (sjis_p && charset_alt == charset_katakana_jisx0201)           \
+         *dst++ = c1;                                                    \
+       else                                                              \
+         *dst++ = charset_alt, *dst++ = c1;                              \
+      }                                                                          \
+    else                                                                 \
+      {                                                                          \
+       c1 &= 0x7F, c2 &= 0x7F;                                           \
+       if (sjis_p && charset_alt == charset_jisx0208)                    \
+         {                                                               \
+           unsigned char s1, s2;                                         \
+                                                                         \
+           ENCODE_SJIS (c1, c2, s1, s2);                                 \
+           *dst++ = s1, *dst++ = s2;                                     \
+         }                                                               \
+       else if (!sjis_p                                                  \
+                && (charset_alt == charset_big5_1                        \
+                    || charset_alt == charset_big5_2))                   \
+         {                                                               \
+           unsigned char b1, b2;                                         \
+                                                                         \
+           ENCODE_BIG5 (charset_alt, c1, c2, b1, b2);                    \
+           *dst++ = b1, *dst++ = b2;                                     \
+         }                                                               \
+       else                                                              \
+         *dst++ = charset_alt, *dst++ = c1, *dst++ = c2;                 \
+      }                                                                          \
+  } while (0);
+
  /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
     Check if a text is encoded in SJIS.  If it is, return
     CODING_CATEGORY_MASK_SJIS, else return 0.  */
@@ -1573,6 +1885,11 @@ decode_coding_sjis_big5 (coding, source, destination,
       from DST_END to assure overflow checking is necessary only at the
       head of loop.  */
    unsigned char *adjusted_dst_end = dst_end - 3;
+  Lisp_Object unification_table
+      = coding->character_unification_table_for_decode;
+
+  if (!NILP (Venable_character_unification) && NILP (unification_table))
+    unification_table = Vstandard_character_unification_table_for_decode;
  
    while (src < src_end && dst < adjusted_dst_end)
      {
@@ -1597,8 +1914,10 @@ decode_coding_sjis_big5 (coding, source, destination,
           else
             *dst++ = c1;
         }
-      else if (c1 < 0x80)
+      else if (c1 < 0x20)
         *dst++ = c1;
+      else if (c1 < 0x80)
+       DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
        else if (c1 < 0xA0 || c1 >= 0xE0)
         {
           /* SJIS -> JISX0208, BIG5 -> Big5 (only if 0xE0 <= c1 < 0xFF) */
@@ -1606,7 +1925,7 @@ decode_coding_sjis_big5 (coding, source, destination,
             {
               ONE_MORE_BYTE (c2);
               DECODE_SJIS (c1, c2, c3, c4);
-             DECODE_CHARACTER_DIMENSION2 (charset_jisx0208, c3, c4);
+             DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
             }
           else if (c1 >= 0xE0 && c1 < 0xFF)
             {
@@ -1614,7 +1933,7 @@ decode_coding_sjis_big5 (coding, source, destination,
  
               ONE_MORE_BYTE (c2);
               DECODE_BIG5 (c1, c2, charset, c3, c4);
-             DECODE_CHARACTER_DIMENSION2 (charset, c3, c4);
+             DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
             }
           else                  /* Invalid code */
             *dst++ = c1;
@@ -1623,14 +1942,14 @@ decode_coding_sjis_big5 (coding, source, destination,
         {
           /* SJIS -> JISX0201-Kana, BIG5 -> Big5 */
           if (sjis_p)
-           DECODE_CHARACTER_DIMENSION1 (charset_katakana_jisx0201, c1);
+           DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1, /* dummy */ c2);
           else
             {
               int charset;
  
               ONE_MORE_BYTE (c2);
               DECODE_BIG5 (c1, c2, charset, c3, c4);
-             DECODE_CHARACTER_DIMENSION2 (charset, c3, c4);
+             DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
             }
         }
        continue;
@@ -1671,6 +1990,11 @@ encode_coding_sjis_big5 (coding, source, destination,
       from DST_END to assure overflow checking is necessary only at the
       head of loop.  */
    unsigned char *adjusted_dst_end = dst_end - 1;
+  Lisp_Object unification_table
+      = coding->character_unification_table_for_encode;
+
+  if (!NILP (Venable_character_unification) && NILP (unification_table))
+    unification_table = Vstandard_character_unification_table_for_encode;
  
    while (src < src_end && dst < adjusted_dst_end)
      {
@@ -1698,6 +2022,9 @@ encode_coding_sjis_big5 (coding, source, destination,
        switch (emacs_code_class[c1])
         {
         case EMACS_ascii_code:
+         ENCODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
+         break;
+
         case EMACS_control_code:
           *dst++ = c1;
           break;
@@ -1712,7 +2039,7 @@ encode_coding_sjis_big5 (coding, source, destination,
  
         case EMACS_linefeed_code:
           if (coding->eol_type == CODING_EOL_LF
-             || coding->eol_type == CODING_EOL_AUTOMATIC)
+             || coding->eol_type == CODING_EOL_UNDECIDED)
             *dst++ = '\n';
           else if (coding->eol_type == CODING_EOL_CRLF)
             *dst++ = '\r', *dst++ = '\n';
@@ -1722,36 +2049,17 @@ encode_coding_sjis_big5 (coding, source, destination,
  
         case EMACS_leading_code_2:
           ONE_MORE_BYTE (c2);
-         if (sjis_p && c1 == charset_katakana_jisx0201)
-           *dst++ = c2;
-         else
-           *dst++ = c1, *dst++ = c2;
+         ENCODE_SJIS_BIG5_CHARACTER (c1, c2, /* dummy */ c3);
           break;
  
         case EMACS_leading_code_3:
           TWO_MORE_BYTES (c2, c3);
-         c2 &= 0x7F, c3 &= 0x7F;
-         if (sjis_p && c1 == charset_jisx0208)
-           {
-             unsigned char s1, s2;
-
-             ENCODE_SJIS (c2, c3, s1, s2);
-             *dst++ = s1, *dst++ = s2;
-           }
-         else if (!sjis_p && (c1 == charset_big5_1 || c1 == charset_big5_2))
-           {
-             unsigned char b1, b2;
-
-             ENCODE_BIG5 (c1, c2, c3, b1, b2);
-             *dst++ = b1, *dst++ = b2;
-           }
-         else
-           *dst++ = c1, *dst++ = c2, *dst++ = c3;
+         ENCODE_SJIS_BIG5_CHARACTER (c1, c2, c3);
           break;
  
         case EMACS_leading_code_4:
           THREE_MORE_BYTES (c2, c3, c4);
-         *dst++ = c1, *dst++ = c2, *dst++ = c3, *dst++ = c4;
+         ENCODE_SJIS_BIG5_CHARACTER (c2, c3, c4);
           break;
  
         case EMACS_leading_code_composition:
@@ -1764,9 +2072,9 @@ encode_coding_sjis_big5 (coding, source, destination,
        continue;
  
      label_end_of_loop:
-      coding->carryover_size = src - src_base;
+      coding->carryover_size = src_end - src_base;
        bcopy (src_base, coding->carryover, coding->carryover_size);
-      src = src_base;
+      src = src_end;
        break;
      }
  
@@ -1811,7 +2119,7 @@ decode_eol (coding, source, destination, src_bytes, dst_bytes, consumed)
                 ONE_MORE_BYTE (c);
                 if (c != '\n')
                   *dst++ = '\r';
-
+               *dst++ = c;
               }
             else
               *dst++ = c;
@@ -1868,7 +2176,7 @@ encode_eol (coding, source, destination, src_bytes, dst_bytes, consumed)
    switch (coding->eol_type)
      {
      case CODING_EOL_LF:
-    case CODING_EOL_AUTOMATIC:
+    case CODING_EOL_UNDECIDED:
        produced = (src_bytes > dst_bytes) ? dst_bytes : src_bytes;
        bcopy (source, destination, produced);
        if (coding->selective)
@@ -1934,13 +2242,14 @@ encode_eol (coding, source, destination, src_bytes, dst_bytes, consumed)
     `element[0]' contains information to be set in `coding->type'.  The
     value and its meaning is as follows:
  
-   0 -- coding_system_internal
-   1 -- coding_system_sjis
-   2 -- coding_system_iso2022
-   3 -- coding_system_big5
-   4 -- coding_system_ccl
-   nil -- coding_system_no_conversion
-   t -- coding_system_automatic
+   0 -- coding_type_emacs_mule
+   1 -- coding_type_sjis
+   2 -- coding_type_iso2022
+   3 -- coding_type_big5
+   4 -- coding_type_ccl encoder/decoder written in CCL
+   nil -- coding_type_no_conversion
+   t -- coding_type_undecided (automatic conversion on decoding,
+                              no-conversion on encoding)
  
     `element[4]' contains information to be set in `coding->flags' and
     `coding->spec'.  The meaning varies by `coding->type'.
@@ -1985,61 +2294,76 @@ encode_eol (coding, source, destination, src_bytes, dst_bytes, consumed)
     return 0.  */
  
  int
-setup_coding_system (coding_system_symbol, coding)
-     Lisp_Object coding_system_symbol;
+setup_coding_system (coding_system, coding)
+     Lisp_Object coding_system;
       struct coding_system *coding;
  {
-  Lisp_Object coding_system_vector = Qnil;
    Lisp_Object type, eol_type;
  
-  /* At first, set several fields default values.  */
+  /* At first, set several fields to default values.  */
    coding->require_flushing = 0;
    coding->last_block = 0;
    coding->selective = 0;
    coding->composing = 0;
    coding->direction = 0;
    coding->carryover_size = 0;
-  coding->symbol = Qnil;
    coding->post_read_conversion = coding->pre_write_conversion = Qnil;
+  coding->character_unification_table_for_decode = Qnil;
+  coding->character_unification_table_for_encode = Qnil;
  
-  /* Get value of property `coding-system'.  If it is a Lisp symbol
-     pointing another coding system, fetch its property until we get a
-     vector.  */
-  while (!NILP (coding_system_symbol))
+  Vlast_coding_system_used = coding->symbol = coding_system;
+  eol_type = Qnil;
+  /* Get value of property `coding-system' until we get a vector.
+     While doing that, also get values of properties
+     `post-read-conversion', `pre-write-conversion',
+     `character-unification-table-for-decode',
+     `character-unification-table-for-encode' and `eol-type'.  */
+  while (!NILP (coding_system) && SYMBOLP (coding_system))
      {
-      coding->symbol = coding_system_symbol;
        if (NILP (coding->post_read_conversion))
-       coding->post_read_conversion = Fget (coding_system_symbol,
+       coding->post_read_conversion = Fget (coding_system,
                                              Qpost_read_conversion);
-      if (NILP (coding->pre_write_conversion))
-       coding->pre_write_conversion = Fget (coding_system_symbol,
+      if (NILP (coding->pre_write_conversion)) 
+       coding->pre_write_conversion = Fget (coding_system,
                                              Qpre_write_conversion);
+      if (!inhibit_eol_conversion && NILP (eol_type))
+       eol_type = Fget (coding_system, Qeol_type);
  
-      coding_system_vector = Fget (coding_system_symbol, Qcoding_system);
-      if (VECTORP (coding_system_vector))
-       break;
-      coding_system_symbol = coding_system_vector;
-    }
-  Vlast_coding_system_used = coding->symbol;
+      if (NILP (coding->character_unification_table_for_decode))
+       coding->character_unification_table_for_decode
+         = Fget (coding_system, Qcharacter_unification_table_for_decode);
  
-  if (!VECTORP (coding_system_vector)
-      || XVECTOR (coding_system_vector)->size != 5)
-    goto label_invalid_coding_system;
+      if (NILP (coding->character_unification_table_for_encode))
+       coding->character_unification_table_for_encode
+         = Fget (coding_system, Qcharacter_unification_table_for_encode);
  
-  /* Get value of property `eol-type' by searching from the root
-     coding-system.  */
-  coding_system_symbol = coding->symbol;
-  eol_type = Qnil;
-  while (SYMBOLP (coding_system_symbol) && !NILP (coding_system_symbol))
-    {
-      eol_type = Fget (coding_system_symbol, Qeol_type);
-      if (!NILP (eol_type))
-       break;
-      coding_system_symbol = Fget (coding_system_symbol, Qcoding_system);
+      coding_system = Fget (coding_system, Qcoding_system);
      }
  
+  while (!NILP (coding->character_unification_table_for_decode)
+        && SYMBOLP (coding->character_unification_table_for_decode))
+       coding->character_unification_table_for_decode
+         = Fget (coding->character_unification_table_for_decode,
+                 Qcharacter_unification_table_for_decode);
+  if (!NILP (coding->character_unification_table_for_decode)
+      && !CHAR_TABLE_P (coding->character_unification_table_for_decode))
+      coding->character_unification_table_for_decode = Qnil;
+
+  while (!NILP (coding->character_unification_table_for_encode)
+        && SYMBOLP (coding->character_unification_table_for_encode))
+       coding->character_unification_table_for_encode
+         = Fget (coding->character_unification_table_for_encode,
+                 Qcharacter_unification_table_for_encode);
+  if (!NILP (coding->character_unification_table_for_encode)
+      && !CHAR_TABLE_P (coding->character_unification_table_for_encode))
+      coding->character_unification_table_for_encode = Qnil;
+
+  if (!VECTORP (coding_system)
+      || XVECTOR (coding_system)->size != 5)
+    goto label_invalid_coding_system;
+
    if (VECTORP (eol_type))
-    coding->eol_type = CODING_EOL_AUTOMATIC;
+    coding->eol_type = CODING_EOL_UNDECIDED;
    else if (XFASTINT (eol_type) == 1)
      coding->eol_type = CODING_EOL_CRLF;
    else if (XFASTINT (eol_type) == 2)
@@ -2047,11 +2371,11 @@ setup_coding_system (coding_system_symbol, coding)
    else
      coding->eol_type = CODING_EOL_LF;
  
-  type = XVECTOR (coding_system_vector)->contents[0];
+  type = XVECTOR (coding_system)->contents[0];
    switch (XFASTINT (type))
      {
      case 0:
-      coding->type = coding_type_internal;
+      coding->type = coding_type_emacs_mule;
        break;
  
      case 1:
@@ -2061,10 +2385,12 @@ setup_coding_system (coding_system_symbol, coding)
      case 2:
        coding->type = coding_type_iso2022;
        {
-       Lisp_Object val = XVECTOR (coding_system_vector)->contents[4];
+       Lisp_Object val;
         Lisp_Object *flags;
         int i, charset, default_reg_bits = 0;
  
+       val = XVECTOR (coding_system)->contents[4];
+
         if (!VECTORP (val) || XVECTOR (val)->size != 32)
           goto label_invalid_coding_system;
  
@@ -2078,7 +2404,12 @@ setup_coding_system (coding_system_symbol, coding)
              | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
              | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
              | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
-            | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION));
+            | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
+            | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
+            | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
+            | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
+            | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
+            );
  
         /* Invoke graphic register 0 to plane 0.  */
         CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
@@ -2086,7 +2417,9 @@ setup_coding_system (coding_system_symbol, coding)
         CODING_SPEC_ISO_INVOCATION (coding, 1)
           = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
         /* Not single shifting at first.  */
-       CODING_SPEC_ISO_SINGLE_SHIFTING(coding) = 0;
+       CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
+       /* Beginning of buffer should also be regarded as bol. */
+       CODING_SPEC_ISO_BOL (coding) = 1;
  
         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
            FLAGS[REG] can be one of below:
@@ -2098,15 +2431,19 @@ setup_coding_system (coding_system_symbol, coding)
                   elements (if integer) is designated to REG on request,
                   if an element is t, REG can be used by any charset,
                 nil: REG is never used.  */
-       for (charset = 0; charset < MAX_CHARSET; charset++)
-         CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = -1;
+       for (charset = 0; charset <= MAX_CHARSET; charset++)
+         CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
+           = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
+       bzero (CODING_SPEC_ISO_EXPECTED_CHARSETS (coding), MAX_CHARSET + 1);
         for (i = 0; i < 4; i++)
           {
             if (INTEGERP (flags[i])
-               && (charset = XINT (flags[i]), CHARSET_VALID_P (charset)))
+               && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))
+               || (charset = get_charset_id (flags[i])) >= 0)
               {
                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
+               CODING_SPEC_ISO_EXPECTED_CHARSETS (coding)[charset] = 1;
               }
             else if (EQ (flags[i], Qt))
               {
@@ -2119,10 +2456,12 @@ setup_coding_system (coding_system_symbol, coding)
  
                 if (INTEGERP (XCONS (tail)->car)
                     && (charset = XINT (XCONS (tail)->car),
-                       CHARSET_VALID_P (charset)))
+                       CHARSET_VALID_P (charset))
+                   || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
                   {
                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
+                   CODING_SPEC_ISO_EXPECTED_CHARSETS (coding)[charset] = 1;
                   }
                 else
                   CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
@@ -2131,9 +2470,14 @@ setup_coding_system (coding_system_symbol, coding)
                   {
                     if (INTEGERP (XCONS (tail)->car)
                         && (charset = XINT (XCONS (tail)->car),
-                           CHARSET_VALID_P (charset)))
-                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
-                       = i;
+                           CHARSET_VALID_P (charset))
+                       || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
+                     {
+                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
+                         = i;
+                       CODING_SPEC_ISO_EXPECTED_CHARSETS (coding)[charset]
+                         = 1;
+                     }
                     else if (EQ (XCONS (tail)->car, Qt))
                       default_reg_bits |= 1 << i;
                     tail = XCONS (tail)->cdr;
@@ -2156,9 +2500,10 @@ setup_coding_system (coding_system_symbol, coding)
               default_reg_bits &= 3;
           }
  
-       for (charset = 0; charset < MAX_CHARSET; charset++)
+       for (charset = 0; charset <= MAX_CHARSET; charset++)
           if (CHARSET_VALID_P (charset)
-             && CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) < 0)
+             && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
+                 == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
             {
               /* We have not yet decided where to designate CHARSET.  */
               int reg_bits = default_reg_bits;
@@ -2190,7 +2535,7 @@ setup_coding_system (coding_system_symbol, coding)
      case 3:
        coding->type = coding_type_big5;
        coding->flags
-       = (NILP (XVECTOR (coding_system_vector)->contents[4])
+       = (NILP (XVECTOR (coding_system)->contents[4])
            ? CODING_FLAG_BIG5_HKU
            : CODING_FLAG_BIG5_ETEN);
        break;
@@ -2198,7 +2543,7 @@ setup_coding_system (coding_system_symbol, coding)
      case 4:
        coding->type = coding_type_ccl;
        {
-       Lisp_Object val = XVECTOR (coding_system_vector)->contents[4];
+       Lisp_Object val = XVECTOR (coding_system)->contents[4];
         if (CONSP  (val)
             && VECTORP (XCONS (val)->car)
             && VECTORP (XCONS (val)->cdr))
@@ -2212,9 +2557,13 @@ setup_coding_system (coding_system_symbol, coding)
        coding->require_flushing = 1;
        break;
  
+    case 5:
+      coding->type = coding_type_raw_text;
+      break;
+
      default:
        if (EQ (type, Qt))
-       coding->type = coding_type_automatic;
+       coding->type = coding_type_undecided;
        else
         coding->type = coding_type_no_conversion;
        break;
@@ -2223,6 +2572,9 @@ setup_coding_system (coding_system_symbol, coding)
  
   label_invalid_coding_system:
    coding->type = coding_type_no_conversion;
+  coding->eol_type = CODING_EOL_LF;
+  coding->symbol = coding->pre_write_conversion = coding->post_read_conversion
+    = Qnil;
    return -1;
  }
  
@@ -2232,56 +2584,66 @@ setup_coding_system (coding_system_symbol, coding)
     because they use the same range of codes.  So, at first, coding
     systems are categorized into 7, those are:
  
-   o coding-category-internal
+   o coding-category-emacs-mule
  
         The category for a coding system which has the same code range
         as Emacs' internal format.  Assigned the coding-system (Lisp
-       symbol) `coding-system-internal' by default.
+       symbol) `emacs-mule' by default.
  
     o coding-category-sjis
  
         The category for a coding system which has the same code range
         as SJIS.  Assigned the coding-system (Lisp
-       symbol) `coding-system-sjis' by default.
+       symbol) `japanese-shift-jis' by default.
  
     o coding-category-iso-7
  
         The category for a coding system which has the same code range
-       as ISO2022 of 7-bit environment.  Assigned the coding-system
-       (Lisp symbol) `coding-system-junet' by default.
+       as ISO2022 of 7-bit environment.  This doesn't use any locking
+       shift and single shift functions.  Assigned the coding-system
+       (Lisp symbol) `iso-2022-7bit' by default.
  
     o coding-category-iso-8-1
  
         The category for a coding system which has the same code range
         as ISO2022 of 8-bit environment and graphic plane 1 used only
-       for DIMENSION1 charset.  Assigned the coding-system (Lisp
-       symbol) `coding-system-ctext' by default.
+       for DIMENSION1 charset.  This doesn't use any locking shift
+       and single shift functions.  Assigned the coding-system (Lisp
+       symbol) `iso-latin-1' by default.
  
     o coding-category-iso-8-2
  
         The category for a coding system which has the same code range
         as ISO2022 of 8-bit environment and graphic plane 1 used only
-       for DIMENSION2 charset.  Assigned the coding-system (Lisp
-       symbol) `coding-system-euc-japan' by default.
+       for DIMENSION2 charset.  This doesn't use any locking shift
+       and single shift functions.  Assigned the coding-system (Lisp
+       symbol) `japanese-iso-8bit' by default.
  
-   o coding-category-iso-else
+   o coding-category-iso-7-else
  
         The category for a coding system which has the same code range
-       as ISO2022 but not belongs to any of the above three
-       categories.  Assigned the coding-system (Lisp symbol)
-       `coding-system-iso-2022-ss2-7' by default.
+       as ISO2022 of 7-bit environemnt but uses locking shift or
+       single shift functions.  Assigned the coding-system (Lisp
+       symbol) `iso-2022-7bit-lock' by default.
+
+   o coding-category-iso-8-else
+
+       The category for a coding system which has the same code range
+       as ISO2022 of 8-bit environemnt but uses locking shift or
+       single shift functions.  Assigned the coding-system (Lisp
+       symbol) `iso-2022-8bit-ss2' by default.
  
     o coding-category-big5
  
         The category for a coding system which has the same code range
         as BIG5.  Assigned the coding-system (Lisp symbol)
-       `coding-system-big5' by default.
+       `cn-big5' by default.
  
     o coding-category-binary
  
         The category for a coding system not categorized in any of the
         above.  Assigned the coding-system (Lisp symbol)
-       `coding-system-noconv' by default.
+       `no-conversion' by default.
  
     Each of them is a Lisp symbol and the value is an actual
     `coding-system's (this is also a Lisp symbol) assigned by a user.
@@ -2309,6 +2671,7 @@ detect_coding_mask (src, src_bytes)
  
    /* At first, skip all ASCII characters and control characters except
       for three ISO2022 specific control characters.  */
+ label_loop_detect_coding:
    while (src < src_end)
      {
        c = *src;
@@ -2325,31 +2688,51 @@ detect_coding_mask (src, src_bytes)
    /* The text seems to be encoded in some multilingual coding system.
       Now, try to find in which coding system the text is encoded.  */
    if (c < 0x80)
-    /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
-    /* C is an ISO2022 specific control code of C0.  */
-    mask = detect_coding_iso2022 (src, src_end);
-
-  else if (c == ISO_CODE_SS2 || c == ISO_CODE_SS3 || c == ISO_CODE_CSI)
-    /* C is an ISO2022 specific control code of C1,
-       or the first byte of SJIS's 2-byte character code,
-       or a leading code of Emacs.  */
-    mask = (detect_coding_iso2022 (src, src_end)
-           | detect_coding_sjis (src, src_end)
-           | detect_coding_internal (src, src_end));
-
+    {
+      /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
+      /* C is an ISO2022 specific control code of C0.  */
+      mask = detect_coding_iso2022 (src, src_end);
+      src++;
+      if (mask == 0)
+       /* No valid ISO2022 code follows C.  Try again.  */
+       goto label_loop_detect_coding;
+      mask |= CODING_CATEGORY_MASK_RAW_TEXT;
+    }
    else if (c < 0xA0)
-    /* C is the first byte of SJIS character code,
-       or a leading-code of Emacs.  */
-    mask = (detect_coding_sjis (src, src_end)
-           | detect_coding_internal (src, src_end));
+    {
+      /* If C is a special latin extra code,
+        or is an ISO2022 specific control code of C1 (SS2 or SS3), 
+        or is an ISO2022 control-sequence-introducer (CSI),
+        we should also consider the possibility of ISO2022 codings.  */
+      if ((VECTORP (Vlatin_extra_code_table)
+          && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
+         || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
+         || (c == ISO_CODE_CSI
+             && (src < src_end
+                 && (*src == ']'
+                     || (src + 1 < src_end
+                         && src[1] == ']'
+                         && (*src == '0' || *src == '1' || *src == '2'))))))
+       mask = (detect_coding_iso2022 (src, src_end)
+               | detect_coding_sjis (src, src_end)
+               | detect_coding_emacs_mule (src, src_end)
+               | CODING_CATEGORY_MASK_RAW_TEXT);
  
+      else
+       /* C is the first byte of SJIS character code,
+          or a leading-code of Emacs' internal format (emacs-mule).  */
+       mask = (detect_coding_sjis (src, src_end)
+               | detect_coding_emacs_mule (src, src_end)
+               | CODING_CATEGORY_MASK_RAW_TEXT);
+    }
    else
      /* C is a character of ISO2022 in graphic plane right,
         or a SJIS's 1-byte character code (i.e. JISX0201),
         or the first byte of BIG5's 2-byte code.  */
      mask = (detect_coding_iso2022 (src, src_end)
             | detect_coding_sjis (src, src_end)
-           | detect_coding_big5 (src, src_end));
+           | detect_coding_big5 (src, src_end)
+           | CODING_CATEGORY_MASK_RAW_TEXT);
  
    return mask;
  }
@@ -2365,49 +2748,42 @@ detect_coding (coding, src, src_bytes)
  {
    int mask = detect_coding_mask (src, src_bytes);
    int idx;
+  Lisp_Object val = Vcoding_category_list;
  
    if (mask == CODING_CATEGORY_MASK_ANY)
      /* We found nothing other than ASCII.  There's nothing to do.  */
      return;
  
-  if (!mask)
-    /* The source text seems to be encoded in unknown coding system.
-       Emacs regards the category of such a kind of coding system as
-       `coding-category-binary'.  We assume that a user has assigned
-       an appropriate coding system for a `coding-category-binary'.  */
-    idx = CODING_CATEGORY_IDX_BINARY;
-  else
-    {
-      /* We found some plausible coding systems.  Let's use a coding
-        system of the highest priority.  */
-      Lisp_Object val = Vcoding_category_list;
+  /* We found some plausible coding systems.  Let's use a coding
+     system of the highest priority.  */
  
-      if (CONSP (val))
-       while (!NILP (val))
-         {
-           idx = XFASTINT (Fget (XCONS (val)->car, Qcoding_category_index));
-           if ((idx < CODING_CATEGORY_IDX_MAX) && (mask & (1 << idx)))
-             break;
-           val = XCONS (val)->cdr;
-         }
-      else
-       val = Qnil;
+  if (CONSP (val))
+    while (!NILP (val))
+      {
+       idx = XFASTINT (Fget (XCONS (val)->car, Qcoding_category_index));
+       if ((idx < CODING_CATEGORY_IDX_MAX) && (mask & (1 << idx)))
+         break;
+       val = XCONS (val)->cdr;
+      }
+  else
+    val = Qnil;
  
-      if (NILP (val))
-       {
-         /* For unknown reason, `Vcoding_category_list' contains none
-            of found categories.  Let's use any of them.  */
-         for (idx = 0; idx < CODING_CATEGORY_IDX_MAX; idx++)
-           if (mask & (1 << idx))
-             break;
-       }
+  if (NILP (val))
+    {
+      /* For unknown reason, `Vcoding_category_list' contains none of
+        found categories.  Let's use any of them.  */
+      for (idx = 0; idx < CODING_CATEGORY_IDX_MAX; idx++)
+       if (mask & (1 << idx))
+         break;
      }
    setup_coding_system (XSYMBOL (coding_category_table[idx])->value, coding);
  }
  
  /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
     is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
-   CODING_EOL_CR, and CODING_EOL_AUTOMATIC.  */
+   CODING_EOL_CR, and CODING_EOL_UNDECIDED.  */
+
+#define MAX_EOL_CHECK_COUNT 3
  
  int
  detect_eol_type (src, src_bytes)
@@ -2416,21 +2792,34 @@ detect_eol_type (src, src_bytes)
  {
    unsigned char *src_end = src + src_bytes;
    unsigned char c;
+  int total = 0;               /* How many end-of-lines are found so far.  */
+  int eol_type = CODING_EOL_UNDECIDED;
+  int this_eol_type;
  
-  while (src < src_end)
+  while (src < src_end && total < MAX_EOL_CHECK_COUNT)
      {
        c = *src++;
-      if (c == '\n')
-       return CODING_EOL_LF;
-      else if (c == '\r')
+      if (c == '\n' || c == '\r')
         {
-         if (src < src_end && *src == '\n')
-           return CODING_EOL_CRLF;
+         total++;
+         if (c == '\n')
+           this_eol_type = CODING_EOL_LF;
+         else if (src >= src_end || *src != '\n')
+           this_eol_type = CODING_EOL_CR;
           else
-           return CODING_EOL_CR;
+           this_eol_type = CODING_EOL_CRLF, src++;
+
+         if (eol_type == CODING_EOL_UNDECIDED)
+           /* This is the first end-of-line.  */
+           eol_type = this_eol_type;
+         else if (eol_type != this_eol_type)
+           /* The found type is different from what found before.
+              Let's notice the caller about this inconsistency.  */
+           return CODING_EOL_INCONSISTENT;
         }
      }
-  return CODING_EOL_AUTOMATIC;
+
+  return eol_type;
  }
  
  /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
@@ -2443,14 +2832,35 @@ detect_eol (coding, src, src_bytes)
       unsigned char *src;
       int src_bytes;
  {
-  Lisp_Object val;
+  Lisp_Object val, coding_system;
    int eol_type = detect_eol_type (src, src_bytes);
  
-  if (eol_type == CODING_EOL_AUTOMATIC)
+  if (eol_type == CODING_EOL_UNDECIDED)
      /*  We found no end-of-line in the source text.  */
      return;
  
-  val = Fget (coding->symbol, Qeol_type);
+  if (eol_type == CODING_EOL_INCONSISTENT)
+    {
+#if 0
+      /* This code is suppressed until we find a better way to
+        distinguish raw text file and binary file.  */
+
+      /* If we have already detected that the coding is raw-text, the
+        coding should actually be no-conversion.  */
+      if (coding->type == coding_type_raw_text)
+       {
+         setup_coding_system (Qno_conversion, coding);
+         return;
+       }
+      /* Else, let's decode only text code anyway.  */
+#endif /* 0 */
+      eol_type = CODING_EOL_LF;
+    }
+
+  coding_system = coding->symbol;
+  while (!NILP (coding_system)
+        && NILP (val = Fget (coding_system, Qeol_type)))
+    coding_system = Fget (coding_system, Qcoding_system);
    if (VECTORP (val) && XVECTOR (val)->size == 3)
      setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
  }
@@ -2474,10 +2884,10 @@ decode_coding (coding, source, destination, src_bytes, dst_bytes, consumed)
        return 0;
      }
  
-  if (coding->type == coding_type_automatic)
+  if (coding->type == coding_type_undecided)
      detect_coding (coding, source, src_bytes);
  
-  if (coding->eol_type == CODING_EOL_AUTOMATIC)
+  if (coding->eol_type == CODING_EOL_UNDECIDED)
      detect_eol (coding, source, src_bytes);
  
    coding->carryover_size = 0;
@@ -2490,10 +2900,11 @@ decode_coding (coding, source, destination, src_bytes, dst_bytes, consumed)
        *consumed = produced;
        break;
  
-    case coding_type_internal:
-    case coding_type_automatic:
+    case coding_type_emacs_mule:
+    case coding_type_undecided:
+    case coding_type_raw_text:
        if (coding->eol_type == CODING_EOL_LF
-         ||  coding->eol_type == CODING_EOL_AUTOMATIC)
+         ||  coding->eol_type == CODING_EOL_UNDECIDED)
         goto label_no_conversion;
        produced = decode_eol (coding, source, destination,
                              src_bytes, dst_bytes, consumed);
@@ -2536,7 +2947,6 @@ encode_coding (coding, source, destination, src_bytes, dst_bytes, consumed)
  {
    int produced;
  
-  coding->carryover_size = 0;
    switch (coding->type)
      {
      case coding_type_no_conversion:
@@ -2549,16 +2959,17 @@ encode_coding (coding, source, destination, src_bytes, dst_bytes, consumed)
             {
               unsigned char *p = destination, *pend = destination + produced;
               while (p < pend)
-               if (*p++ = '\015') p[-1] = '\n';
+               if (*p++ == '\015') p[-1] = '\n';
             }
         }
        *consumed = produced;
        break;
  
-    case coding_type_internal:
-    case coding_type_automatic:
+    case coding_type_emacs_mule:
+    case coding_type_undecided:
+    case coding_type_raw_text:
        if (coding->eol_type == CODING_EOL_LF
-         ||  coding->eol_type == CODING_EOL_AUTOMATIC)
+         ||  coding->eol_type == CODING_EOL_UNDECIDED)
         goto label_no_conversion;
        produced = encode_eol (coding, source, destination,
                              src_bytes, dst_bytes, consumed);
@@ -2663,9 +3074,9 @@ get_conversion_buffer (size)
  #ifdef emacs
  /*** 7. Emacs Lisp library functions ***/
  
-DEFUN ("coding-system-vector", Fcoding_system_vector, Scoding_system_vector,
+DEFUN ("coding-system-spec", Fcoding_system_spec, Scoding_system_spec,
         1, 1, 0,
-  "Return coding-vector of CODING-SYSTEM.\n\
+  "Return coding-spec of CODING-SYSTEM.\n\
  If CODING-SYSTEM is not a valid coding-system, return nil.")
    (obj)
       Lisp_Object obj;
@@ -2682,28 +3093,34 @@ See document of make-coding-system for coding-system object.")
    (obj)
       Lisp_Object obj;
  {
-  return ((NILP (obj) || !NILP (Fcoding_system_vector (obj))) ? Qt : Qnil);
+  return ((NILP (obj) || !NILP (Fcoding_system_spec (obj))) ? Qt : Qnil);
  }
  
-DEFUN ("read-non-nil-coding-system",
-       Fread_non_nil_coding_system, Sread_non_nil_coding_system, 1, 1, 0,
-  "Read a coding-system from the minibuffer, prompting with string PROMPT.")
+DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
+       Sread_non_nil_coding_system, 1, 1, 0,
+  "Read a coding system from the minibuffer, prompting with string PROMPT.")
    (prompt)
       Lisp_Object prompt;
  {
-  return Fintern (Fcompleting_read (prompt, Vobarray, Qcoding_system_vector,
-                                   Qt, Qnil, Qnil),
-                 Qnil);
+  Lisp_Object val;
+  do
+    {
+      val = Fcompleting_read (prompt, Vobarray, Qcoding_system_spec,
+                             Qt, Qnil, Qnil, Qnil, Qnil);
+    }
+  while (XSTRING (val)->size == 0);
+  return (Fintern (val, Qnil));
  }
  
  DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 1, 0,
-  "Read a coding-system or nil from the minibuffer, prompting with string PROMPT.")
+  "Read a coding system or nil from the minibuffer, prompting with string PROMPT.")
    (prompt)
       Lisp_Object prompt;
  {
-  return Fintern (Fcompleting_read (prompt, Vobarray, Qcoding_system_p,
-                                   Qt, Qnil, Qnil),
-                 Qnil);
+  Lisp_Object val;
+  val = Fcompleting_read (prompt, Vobarray, Qcoding_system_p,
+                         Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
+  return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
  }
  
  DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
@@ -2719,15 +3136,15 @@ The value of property should be a vector of length 5.")
    if (!NILP (Fcoding_system_p (coding_system)))
      return coding_system;
    while (1)
-    Fsignal (Qcoding_system_error, coding_system);
+    Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
  }
  
  DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
         2, 2, 0,
-  "Detect coding-system of the text in the region between START and END.\n\
-Return a list of possible coding-systems ordered by priority.\n\
-If only ASCII characters are found, it returns `coding-system-automatic'\n\
- or its subsidiary coding-system according to a detected end-of-line format.")
+  "Detect coding system of the text in the region between START and END.\n\
+Return a list of possible coding systems ordered by priority.\n\
+If only ASCII characters are found, it returns `undecided'\n\
+ or its subsidiary coding system according to a detected end-of-line format.")
    (b, e)
       Lisp_Object b, e;
  {
@@ -2744,10 +3161,12 @@ If only ASCII characters are found, it returns `coding-system-automatic'\n\
  
    if (coding_mask == CODING_CATEGORY_MASK_ANY)
      {
-      val = intern ("coding-system-automatic");
-      if (eol_type != CODING_EOL_AUTOMATIC)
+      val = Qundecided;
+      if (eol_type != CODING_EOL_UNDECIDED
+         && eol_type != CODING_EOL_INCONSISTENT)
         {
-         Lisp_Object val2 = Fget (val, Qeol_type);
+         Lisp_Object val2;
+         val2 = Fget (Qundecided, Qeol_type);
           if (VECTORP (val2))
             val = XVECTOR (val2)->contents[eol_type];
         }
@@ -2766,20 +3185,34 @@ If only ASCII characters are found, it returns `coding-system-automatic'\n\
           int idx
             = XFASTINT (Fget (XCONS (val2)->car, Qcoding_category_index));
           if (coding_mask & (1 << idx))
-           val = Fcons (Fsymbol_value (XCONS (val2)->car), val);
+           {
+#if 0
+             /* This code is suppressed until we find a better way to
+                distinguish raw text file and binary file.  */
+
+             if (idx == CODING_CATEGORY_IDX_RAW_TEXT
+                 && eol_type == CODING_EOL_INCONSISTENT)
+               val = Fcons (Qno_conversion, val);
+             else
+#endif /* 0 */
+               val = Fcons (Fsymbol_value (XCONS (val2)->car), val);
+           }
         }
  
        /* Then, change the order of the list, while getting subsidiary
          coding-systems.  */
        val2 = val;
        val = Qnil;
+      if (eol_type == CODING_EOL_INCONSISTENT)
+       eol_type == CODING_EOL_UNDECIDED;
        for (; !NILP (val2); val2 = XCONS (val2)->cdr)
         {
-         if (eol_type == CODING_EOL_AUTOMATIC)
+         if (eol_type == CODING_EOL_UNDECIDED)
             val = Fcons (XCONS (val2)->car, val);
           else
             {
-             Lisp_Object val3 = Fget (XCONS (val2)->car, Qeol_type);
+             Lisp_Object val3;
+             val3 = Fget (XCONS (val2)->car, Qeol_type);
               if (VECTORP (val3))
                 val = Fcons (XVECTOR (val3)->contents[eol_type], val);
               else
@@ -2805,7 +3238,7 @@ shrink_conversion_area (begp, endp, coding, encodep)
    register unsigned char *beg_addr = *begp, *end_addr = *endp;
  
    if (coding->eol_type != CODING_EOL_LF
-      && coding->eol_type != CODING_EOL_AUTOMATIC)
+      && coding->eol_type != CODING_EOL_UNDECIDED)
      /* Since we anyway have to convert end-of-line format, it is not
         worth skipping at most 100 bytes or so.  */
      return;
@@ -2815,17 +3248,33 @@ shrink_conversion_area (begp, endp, coding, encodep)
        switch (coding->type)
         {
         case coding_type_no_conversion:
-       case coding_type_internal:
-       case coding_type_automatic:
+       case coding_type_emacs_mule:
+       case coding_type_undecided:
+       case coding_type_raw_text:
           /* We need no conversion.  */
           *begp = *endp;
           return;
         case coding_type_ccl:
           /* We can't skip any data.  */
           return;
+       case coding_type_iso2022:
+         if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
+           {
+             unsigned char *bol = beg_addr; 
+             while (beg_addr < end_addr && *beg_addr < 0x80)
+               {
+                 beg_addr++;
+                 if (*(beg_addr - 1) == '\n')
+                   bol = beg_addr;
+               }
+             beg_addr = bol;
+             goto label_skip_tail;
+           }
+         /* fall down ... */
         default:
           /* We can skip all ASCII characters at the head and tail.  */
           while (beg_addr < end_addr && *beg_addr < 0x80) beg_addr++;
+       label_skip_tail:
           while (beg_addr < end_addr && *(end_addr - 1) < 0x80) end_addr--;
           break;
         }
@@ -2838,7 +3287,8 @@ shrink_conversion_area (begp, endp, coding, encodep)
           /* We need no conversion.  */
           *begp = *endp;
           return;
-       case coding_type_internal:
+       case coding_type_emacs_mule:
+       case coding_type_raw_text:
           if (coding->eol_type == CODING_EOL_LF)
             {
               /* We need no conversion.  */
@@ -2974,8 +3424,8 @@ code_convert_region (b, e, coding, encodep)
  }
  
  Lisp_Object
-code_convert_string (str, coding, encodep)
-     Lisp_Object str;
+code_convert_string (str, coding, encodep, nocopy)
+     Lisp_Object str, nocopy;
       struct coding_system *coding;
       int encodep;
  {
@@ -3014,7 +3464,7 @@ code_convert_string (str, coding, encodep)
  
    if (begp == endp)
      /* We need no conversion.  */
-    return str;
+    return (NILP (nocopy) ? Fcopy_sequence (str) : str);
  
    head_skip = begp - XSTRING (str)->data;
    tail_skip = XSTRING (str)->size - head_skip - (endp - begp);
@@ -3044,8 +3494,10 @@ code_convert_string (str, coding, encodep)
  }
  
  DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
-       3, 3, 0,
-  "Decode the text between START and END which is encoded in CODING-SYSTEM.\n\
+       3, 3, "r\nzCoding system: ",
+  "Decode current region by specified coding system.\n\
+When called from a program, takes three arguments:\n\
+START, END, and CODING-SYSTEM.  START END are buffer positions.\n\
  Return length of decoded text.")
    (b, e, coding_system)
       Lisp_Object b, e, coding_system;
@@ -3056,6 +3508,8 @@ Return length of decoded text.")
    CHECK_NUMBER_COERCE_MARKER (e, 1);
    CHECK_SYMBOL (coding_system, 2);
  
+  if (NILP (coding_system))
+    return make_number (XFASTINT (e) - XFASTINT (b));
    if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
      error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data);
  
@@ -3063,8 +3517,10 @@ Return length of decoded text.")
  }
  
  DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
-       3, 3, 0,
-  "Encode the text between START and END to CODING-SYSTEM.\n\
+       3, 3, "r\nzCoding system: ",
+  "Encode current region by specified coding system.\n\
+When called from a program, takes three arguments:\n\
+START, END, and CODING-SYSTEM.  START END are buffer positions.\n\
  Return length of encoded text.")
    (b, e, coding_system)
       Lisp_Object b, e, coding_system;
@@ -3075,6 +3531,8 @@ Return length of encoded text.")
    CHECK_NUMBER_COERCE_MARKER (e, 1);
    CHECK_SYMBOL (coding_system, 2);
  
+  if (NILP (coding_system))
+    return make_number (XFASTINT (e) - XFASTINT (b));
    if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
      error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data);
  
@@ -3082,41 +3540,49 @@ Return length of encoded text.")
  }
  
  DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
-       2, 2, 0,
-  "Decode STRING which is encoded in CODING-SYSTEM, and return the result.")
-  (string, coding_system)
-     Lisp_Object string, coding_system;
+       2, 3, 0,
+  "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
+Optional arg NOCOPY non-nil means return STRING itself if there's no need\n\
+of decoding.")
+  (string, coding_system, nocopy)
+     Lisp_Object string, coding_system, nocopy;
  {
    struct coding_system coding;
  
    CHECK_STRING (string, 0);
    CHECK_SYMBOL (coding_system, 1);
  
+  if (NILP (coding_system))
+    return (NILP (nocopy) ? Fcopy_sequence (string) : string);
    if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
      error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data);
  
-  return code_convert_string (string, &coding, 0);
+  return code_convert_string (string, &coding, 0, nocopy);
  }
  
  DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
-       2, 2, 0,
-  "Encode STRING to CODING-SYSTEM, and return the result.")
-  (string, coding_system)
-     Lisp_Object string, coding_system;
+       2, 3, 0,
+  "Encode STRING to CODING-SYSTEM, and return the result.\n\
+Optional arg NOCOPY non-nil means return STRING itself if there's no need\n\
+of encoding.")
+  (string, coding_system, nocopy)
+     Lisp_Object string, coding_system, nocopy;
  {
    struct coding_system coding;
  
    CHECK_STRING (string, 0);
    CHECK_SYMBOL (coding_system, 1);
  
+  if (NILP (coding_system))
+    return (NILP (nocopy) ? Fcopy_sequence (string) : string);
    if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
      error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data);
  
-  return code_convert_string (string, &coding, 1);
+  return code_convert_string (string, &coding, 1, nocopy);
  }
  
  DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
-  "Decode a JISX0208 character of SJIS coding-system-sjis.\n\
+  "Decode a JISX0208 character of shift-jis encoding.\n\
  CODE is the character code in SJIS.\n\
  Return the corresponding character.")
    (code)
@@ -3138,8 +3604,7 @@ Return the corresponding character code in SJIS.")
    (ch)
       Lisp_Object ch;
  {
-  int charset;
-  unsigned char c1, c2, s1, s2;
+  int charset, c1, c2, s1, s2;
    Lisp_Object val;
  
    CHECK_NUMBER (ch, 0);
@@ -3147,7 +3612,7 @@ Return the corresponding character code in SJIS.")
    if (charset == charset_jisx0208)
      {
        ENCODE_SJIS (c1, c2, s1, s2);
-      XSETFASTINT (val, ((int)s1 << 8) | s2);
+      XSETFASTINT (val, (s1 << 8) | s2);
      }
    else
      XSETFASTINT (val, 0);
@@ -3178,8 +3643,7 @@ Return the corresponding character code in Big5.")
    (ch)
       Lisp_Object ch;
  {
-  int charset;
-  unsigned char c1, c2, b1, b2;
+  int charset, c1, c2, b1, b2;
    Lisp_Object val;
  
    CHECK_NUMBER (ch, 0);
@@ -3187,26 +3651,36 @@ Return the corresponding character code in Big5.")
    if (charset == charset_big5_1 || charset == charset_big5_2)
      {
        ENCODE_BIG5 (charset, c1, c2, b1, b2);
-      XSETFASTINT (val, ((int)b1 << 8) | b2);
+      XSETFASTINT (val, (b1 << 8) | b2);
      }
    else
      XSETFASTINT (val, 0);
    return val;
  }
  
-DEFUN ("set-terminal-coding-system",
-       Fset_terminal_coding_system, Sset_terminal_coding_system, 1, 1,
-       "zCoding-system for terminal display: ",
-  "Set coding-system of your terminal to CODING-SYSTEM.\n\
-All outputs to terminal are encoded to this coding-system.")
+DEFUN ("set-terminal-coding-system-internal",
+       Fset_terminal_coding_system_internal,
+       Sset_terminal_coding_system_internal, 1, 1, 0, "")
    (coding_system)
       Lisp_Object coding_system;
  {
    CHECK_SYMBOL (coding_system, 0);
    setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
-  update_mode_lines++;
-  if (!NILP (Finteractive_p ()))
-    Fredraw_display ();
+  /* We had better not send unexpected characters to terminal.  */
+  terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
+
+  return Qnil;
+}
+
+DEFUN ("set-safe-terminal-coding-system-internal",
+       Fset_safe_terminal_coding_system_internal,
+       Sset_safe_terminal_coding_system_internal, 1, 1, 0, "")
+  (coding_system)
+     Lisp_Object coding_system;
+{
+  CHECK_SYMBOL (coding_system, 0);
+  setup_coding_system (Fcheck_coding_system (coding_system),
+                      &safe_terminal_coding);
    return Qnil;
  }
  
@@ -3218,11 +3692,9 @@ DEFUN ("terminal-coding-system",
    return terminal_coding.symbol;
  }
  
-DEFUN ("set-keyboard-coding-system",
-       Fset_keyboard_coding_system, Sset_keyboard_coding_system, 1, 1,
-       "zCoding-system for keyboard input: ",
-  "Set coding-system of what is sent from terminal keyboard to CODING-SYSTEM.\n\
-All inputs from terminal are decoded from this coding-system.")
+DEFUN ("set-keyboard-coding-system-internal",
+       Fset_keyboard_coding_system_internal,
+       Sset_keyboard_coding_system_internal, 1, 1, 0, "")
    (coding_system)
       Lisp_Object coding_system;
  {
@@ -3240,24 +3712,36 @@ DEFUN ("keyboard-coding-system",
  }
  
  \f
-DEFUN ("find-coding-system", Ffind_coding_system, Sfind_coding_system,
-       1, MANY, 0,
-  "Return a cons of coding systems for I/O primitive OPERATION.\n\
-Remaining arguments are for OPERATION.\n\
-OPERATION is one of the following Emacs I/O primitives:\n\
-  For file I/O, insert-file-contents or write-region.\n\
-  For process I/O, call-process, call-process-region, or start-process.\n\
-  For network I/O, open-network-stream.\n\
-For each OPERATION, TARGET is selected from the arguments as below:\n\
+DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
+       Sfind_operation_coding_system,  1, MANY, 0,
+  "Choose a coding system for an operation based on the target name.\n\
+The value names a pair of coding systems: (DECODING-SYSTEM ENCODING-SYSTEM).\n\
+DECODING-SYSTEM is the coding system to use for decoding\n\
+\(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system\n\
+for encoding (in case OPERATION does encoding).\n\
+\n\
+The first argument OPERATION specifies an I/O primitive:\n\
+  For file I/O, `insert-file-contents' or `write-region'.\n\
+  For process I/O, `call-process', `call-process-region', or `start-process'.\n\
+  For network I/O, `open-network-stream'.\n\
+\n\
+The remaining arguments should be the same arguments that were passed\n\
+to the primitive.  Depending on which primitive, one of those arguments\n\
+is selected as the TARGET.  For example, if OPERATION does file I/O,\n\
+whichever argument specifies the file name is TARGET.\n\
+\n\
+TARGET has a meaning which depends on OPERATION:\n\
    For file I/O, TARGET is a file name.\n\
    For process I/O, TARGET is a process name.\n\
    For network I/O, TARGET is a service name or a port number\n\
  \n\
-The return value is a cons of coding systems for decoding and encoding\n\
-registered in nested alist `coding-system-alist' (which see) at a slot\n\
-corresponding to OPERATION and TARGET.
-If a function symbol is at the slot, return a result of the function call.\n\
-The function is called with one argument, a list of all the arguments.")
+This function looks up what specified for TARGET in,\n\
+`file-coding-system-alist', `process-coding-system-alist',\n\
+or `network-coding-system-alist' depending on OPERATION.\n\
+They may specify a coding system, a cons of coding systems,\n\
+or a function symbol to call.\n\
+In the last case, we call the function with one argument,\n\
+which is a list of all the arguments given to this function.")
    (nargs, args)
       int nargs;
       Lisp_Object *args;
@@ -3279,24 +3763,37 @@ The function is called with one argument, a list of all the arguments.")
         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
      error ("Invalid %dth argument", XINT (target_idx) + 1);
  
-  chain = Fassq (operation, Vcoding_system_alist);
+  chain = ((EQ (operation, Qinsert_file_contents)
+           || EQ (operation, Qwrite_region))
+          ? Vfile_coding_system_alist
+          : (EQ (operation, Qopen_network_stream)
+             ? Vnetwork_coding_system_alist
+             : Vprocess_coding_system_alist));
    if (NILP (chain))
      return Qnil;
  
-  for (chain = XCONS (chain)->cdr; CONSP (chain); chain = XCONS (chain)->cdr)
+  for (; CONSP (chain); chain = XCONS (chain)->cdr)
      {
-      Lisp_Object elt = XCONS (chain)->car;
+      Lisp_Object elt;
+      elt = XCONS (chain)->car;
  
        if (CONSP (elt)
           && ((STRINGP (target)
                && STRINGP (XCONS (elt)->car)
                && fast_string_match (XCONS (elt)->car, target) >= 0)
               || (INTEGERP (target) && EQ (target, XCONS (elt)->car))))
-       return (CONSP (val = XCONS (elt)->cdr)
-               ? val
-               : ((SYMBOLP (val) && Fboundp (val)
-                   ? call2 (val, Flist (nargs, args))
-                   : Qnil)));
+       {
+         val = XCONS (elt)->cdr;
+         if (CONSP (val))
+           return val;
+         if (! SYMBOLP (val))
+           return Qnil;
+         if (! NILP (Fcoding_system_p (val)))
+           return Fcons (val, val);
+         if (!NILP (Ffboundp (val)))
+           return call1 (val, Flist (nargs, args));
+         return Qnil;
+       }
      }
    return Qnil;
  }
@@ -3310,7 +3807,7 @@ init_coding_once ()
  {
    int i;
  
-  /* Emacs internal format specific initialize routine.  */ 
+  /* Emacs' internal format specific initialize routine.  */ 
    for (i = 0; i <= 0x20; i++)
      emacs_code_class[i] = EMACS_control_code;
    emacs_code_class[0x0A] = EMACS_linefeed_code;
@@ -3346,6 +3843,56 @@ init_coding_once ()
    iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
    iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
  
+  conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE;
+  conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE);
+
+  setup_coding_system (Qnil, &keyboard_coding);
+  setup_coding_system (Qnil, &terminal_coding);
+  setup_coding_system (Qnil, &safe_terminal_coding);
+
+#if defined (MSDOS) || defined (WINDOWSNT)
+  system_eol_type = CODING_EOL_CRLF;
+#else
+  system_eol_type = CODING_EOL_LF;
+#endif
+}
+
+#ifdef emacs
+
+syms_of_coding ()
+{
+  Qtarget_idx = intern ("target-idx");
+  staticpro (&Qtarget_idx);
+
+  Qcoding_system_history = intern ("coding-system-history");
+  staticpro (&Qcoding_system_history);
+  Fset (Qcoding_system_history, Qnil);
+
+  /* Target FILENAME is the first argument.  */
+  Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
+  /* Target FILENAME is the third argument.  */
+  Fput (Qwrite_region, Qtarget_idx, make_number (2));
+
+  Qcall_process = intern ("call-process");
+  staticpro (&Qcall_process);
+  /* Target PROGRAM is the first argument.  */
+  Fput (Qcall_process, Qtarget_idx, make_number (0));
+
+  Qcall_process_region = intern ("call-process-region");
+  staticpro (&Qcall_process_region);
+  /* Target PROGRAM is the third argument.  */
+  Fput (Qcall_process_region, Qtarget_idx, make_number (2));
+
+  Qstart_process = intern ("start-process");
+  staticpro (&Qstart_process);
+  /* Target PROGRAM is the third argument.  */
+  Fput (Qstart_process, Qtarget_idx, make_number (2));
+
+  Qopen_network_stream = intern ("open-network-stream");
+  staticpro (&Qopen_network_stream);
+  /* Target SERVICE is the fourth argument.  */
+  Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
+
    Qcoding_system = intern ("coding-system");
    staticpro (&Qcoding_system);
  
@@ -3361,8 +3908,14 @@ init_coding_once ()
    Qpre_write_conversion = intern ("pre-write-conversion");
    staticpro (&Qpre_write_conversion);
  
-  Qcoding_system_vector = intern ("coding-system-vector");
-  staticpro (&Qcoding_system_vector);
+  Qno_conversion = intern ("no-conversion");
+  staticpro (&Qno_conversion);
+
+  Qundecided = intern ("undecided");
+  staticpro (&Qundecided);
+
+  Qcoding_system_spec = intern ("coding-system-spec");
+  staticpro (&Qcoding_system_spec);
  
    Qcoding_system_p = intern ("coding-system-p");
    staticpro (&Qcoding_system_p);
@@ -3373,7 +3926,7 @@ init_coding_once ()
    Fput (Qcoding_system_error, Qerror_conditions,
         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
    Fput (Qcoding_system_error, Qerror_message,
-       build_string ("Coding-system error"));
+       build_string ("Invalid coding system"));
  
    Qcoding_category_index = intern ("coding-category-index");
    staticpro (&Qcoding_category_index);
@@ -3389,40 +3942,23 @@ init_coding_once ()
        }
    }
  
-  conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE;
-  conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE);
+  Qcharacter_unification_table = intern ("character-unification-table");
+  staticpro (&Qcharacter_unification_table);
+  Fput (Qcharacter_unification_table, Qchar_table_extra_slots,
+       make_number (0));
  
-  setup_coding_system (Qnil, &keyboard_coding);
-  setup_coding_system (Qnil, &terminal_coding);
-}
+  Qcharacter_unification_table_for_decode
+    = intern ("character-unification-table-for-decode");
+  staticpro (&Qcharacter_unification_table_for_decode);
  
-#ifdef emacs
+  Qcharacter_unification_table_for_encode
+    = intern ("character-unification-table-for-encode");
+  staticpro (&Qcharacter_unification_table_for_encode);
  
-syms_of_coding ()
-{
-  Qtarget_idx = intern ("target-idx");
-  staticpro (&Qtarget_idx);
+  Qemacs_mule = intern ("emacs-mule");
+  staticpro (&Qemacs_mule);
  
-  Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
-  Fput (Qwrite_region, Qtarget_idx, make_number (2));
-
-  Qcall_process = intern ("call-process");
-  staticpro (&Qcall_process);
-  Fput (Qcall_process, Qtarget_idx, make_number (0));
-
-  Qcall_process_region = intern ("call-process-region");
-  staticpro (&Qcall_process_region);
-  Fput (Qcall_process_region, Qtarget_idx, make_number (2));
-
-  Qstart_process = intern ("start-process");
-  staticpro (&Qstart_process);
-  Fput (Qstart_process, Qtarget_idx, make_number (2));
-
-  Qopen_network_stream = intern ("open-network-stream");
-  staticpro (&Qopen_network_stream);
-  Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
-
-  defsubr (&Scoding_system_vector);
+  defsubr (&Scoding_system_spec);
    defsubr (&Scoding_system_p);
    defsubr (&Sread_coding_system);
    defsubr (&Sread_non_nil_coding_system);
@@ -3436,11 +3972,12 @@ syms_of_coding ()
    defsubr (&Sencode_sjis_char);
    defsubr (&Sdecode_big5_char);
    defsubr (&Sencode_big5_char);
-  defsubr (&Sset_terminal_coding_system);
+  defsubr (&Sset_terminal_coding_system_internal);
+  defsubr (&Sset_safe_terminal_coding_system_internal);
    defsubr (&Sterminal_coding_system);
-  defsubr (&Sset_keyboard_coding_system);
+  defsubr (&Sset_keyboard_coding_system_internal);
    defsubr (&Skeyboard_coding_system);
-  defsubr (&Sfind_coding_system);
+  defsubr (&Sfind_operation_coding_system);
  
    DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
      "List of coding-categories (symbols) ordered by priority.");
@@ -3454,69 +3991,130 @@ syms_of_coding ()
    }
  
    DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
-    "A variable of internal use only.\n\
+    "Specify the coding system for read operations.\n\
+It is useful to bind this variable with `let', but do not set it globally.\n\
  If the value is a coding system, it is used for decoding on read operation.\n\
-If not, an appropriate element in `coding-system-alist' (which see) is used.");
+If not, an appropriate element is used from one of the coding system alists:\n\
+There are three such tables, `file-coding-system-alist',\n\
+`process-coding-system-alist', and `network-coding-system-alist'.");
    Vcoding_system_for_read = Qnil;
  
    DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
-    "A variable of internal use only.\n\
+    "Specify the coding system for write operations.\n\
+It is useful to bind this variable with `let', but do not set it globally.\n\
  If the value is a coding system, it is used for encoding on write operation.\n\
-If not, an appropriate element in `coding-system-alist' (which see) is used.");
+If not, an appropriate element is used from one of the coding system alists:\n\
+There are three such tables, `file-coding-system-alist',\n\
+`process-coding-system-alist', and `network-coding-system-alist'.");
    Vcoding_system_for_write = Qnil;
  
    DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
-    "Coding-system used in the latest file or process I/O.");
+    "Coding system used in the latest file or process I/O.");
    Vlast_coding_system_used = Qnil;
  
-  DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
-    "Nested alist to decide a coding system for a specific I/O operation.\n\
-The format is ((OPERATION . ((REGEXP . CODING-SYSTEMS) ...)) ...).\n\
-
-OPERATION is one of the following Emacs I/O primitives:\n\
-  For file I/O, insert-file-contents and write-region.\n\
-  For process I/O, call-process, call-process-region, and start-process.\n\
-  For network I/O, open-network-stream.\n\
-In addition, for process I/O, `process-argument' can be specified for\n\
-encoding arguments of the process.\n\
+  DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
+    "*Non-nil inhibit code conversion of end-of-line format in any cases.");
+  inhibit_eol_conversion = 0;
+
+  DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
+    "Alist to decide a coding system to use for a file I/O operation.\n\
+The format is ((PATTERN . VAL) ...),\n\
+where PATTERN is a regular expression matching a file name,\n\
+VAL is a coding system, a cons of coding systems, or a function symbol.\n\
+If VAL is a coding system, it is used for both decoding and encoding\n\
+the file contents.\n\
+If VAL is a cons of coding systems, the car part is used for decoding,\n\
+and the cdr part is used for encoding.\n\
+If VAL is a function symbol, the function must return a coding system\n\
+or a cons of coding systems which are used as above.\n\
  \n\
-REGEXP is a regular expression matching a target of OPERATION, where\n\
-target is a file name for file I/O operations, a process name for\n\
-process I/O operations, or a service name for network I/O\n\
-operations.  REGEXP might be a port number for network I/O operation.\n\
+See also the function `find-operation-coding-system'.");
+  Vfile_coding_system_alist = Qnil;
+
+  DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
+    "Alist to decide a coding system to use for a process I/O operation.\n\
+The format is ((PATTERN . VAL) ...),\n\
+where PATTERN is a regular expression matching a program name,\n\
+VAL is a coding system, a cons of coding systems, or a function symbol.\n\
+If VAL is a coding system, it is used for both decoding what received\n\
+from the program and encoding what sent to the program.\n\
+If VAL is a cons of coding systems, the car part is used for decoding,\n\
+and the cdr part is used for encoding.\n\
+If VAL is a function symbol, the function must return a coding system\n\
+or a cons of coding systems which are used as above.\n\
  \n\
-CODING-SYSTEMS is a cons of coding systems to encode and decode\n\
-character code on OPERATION, or a function symbol returning the cons.\n\
-See the documentation of `find-coding-system' for more detail.");
-  Vcoding_system_alist = Qnil;
+See also the function `find-operation-coding-system'.");
+  Vprocess_coding_system_alist = Qnil;
+
+  DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
+    "Alist to decide a coding system to use for a network I/O operation.\n\
+The format is ((PATTERN . VAL) ...),\n\
+where PATTERN is a regular expression matching a network service name\n\
+or is a port number to connect to,\n\
+VAL is a coding system, a cons of coding systems, or a function symbol.\n\
+If VAL is a coding system, it is used for both decoding what received\n\
+from the network stream and encoding what sent to the network stream.\n\
+If VAL is a cons of coding systems, the car part is used for decoding,\n\
+and the cdr part is used for encoding.\n\
+If VAL is a function symbol, the function must return a coding system\n\
+or a cons of coding systems which are used as above.\n\
+\n\
+See also the function `find-operation-coding-system'.");
+  Vnetwork_coding_system_alist = Qnil;
  
    DEFVAR_INT ("eol-mnemonic-unix", &eol_mnemonic_unix,
      "Mnemonic character indicating UNIX-like end-of-line format (i.e. LF) .");
-  eol_mnemonic_unix = '.';
+  eol_mnemonic_unix = ':';
  
    DEFVAR_INT ("eol-mnemonic-dos", &eol_mnemonic_dos,
      "Mnemonic character indicating DOS-like end-of-line format (i.e. CRLF).");
-  eol_mnemonic_dos = ':';
+  eol_mnemonic_dos = '\\';
  
    DEFVAR_INT ("eol-mnemonic-mac", &eol_mnemonic_mac,
      "Mnemonic character indicating MAC-like end-of-line format (i.e. CR).");
-  eol_mnemonic_mac = '\'';
+  eol_mnemonic_mac = '/';
  
    DEFVAR_INT ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
      "Mnemonic character indicating end-of-line format is not yet decided.");
-  eol_mnemonic_undecided = '-';
+  eol_mnemonic_undecided = ':';
+
+  DEFVAR_LISP ("enable-character-unification", &Venable_character_unification,
+    "Non-nil means ISO 2022 encoder/decoder do character unification.");
+  Venable_character_unification = Qt;
  
-  DEFVAR_LISP ("alternate-charset-table", &Valternate_charset_table,
-    "Alist of charsets vs the alternate charsets.\n\
-While decoding, if a charset (car part of an element) is found,\n\
-decode it as the alternate charset (cdr part of the element).");
-  Valternate_charset_table = Qnil;
+  DEFVAR_LISP ("standard-character-unification-table-for-decode",
+    &Vstandard_character_unification_table_for_decode,
+    "Table for unifying characters when reading.");
+  Vstandard_character_unification_table_for_decode = Qnil;
+
+  DEFVAR_LISP ("standard-character-unification-table-for-encode",
+    &Vstandard_character_unification_table_for_encode,
+    "Table for unifying characters when writing.");
+  Vstandard_character_unification_table_for_encode = Qnil;
  
    DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
      "Alist of charsets vs revision numbers.\n\
  While encoding, if a charset (car part of an element) is found,\n\
  designate it with the escape sequence identifing revision (cdr part of the element).");
    Vcharset_revision_alist = Qnil;
+
+  DEFVAR_LISP ("default-process-coding-system",
+              &Vdefault_process_coding_system,
+    "Cons of coding systems used for process I/O by default.\n\
+The car part is used for decoding a process output,\n\
+the cdr part is used for encoding a text to be sent to a process.");
+  Vdefault_process_coding_system = Qnil;
+
+  DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
+    "Table of extra Latin codes in the range 128..159 (inclusive).\n\
+This is a vector of length 256.\n\
+If Nth element is non-nil, the existence of code N in a file\n\
+\(or output of subprocess) doesn't prevent it to be detected as\n\
+a coding system of ISO 2022 variant which has a flag\n\
+`accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file\n\
+or reading output of a subprocess.\n\
+Only 128th through 159th elements has a meaning.");
+  Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
  }
  
  #endif /* emacs */