Fix bugs with inappropriate mixing of Lisp_Object with int.

[bpt/emacs.git] / src / coding.c
diff --git a/src/coding.c b/src/coding.c

index af3e336..75e4980 100644 (file)
--- a/src/coding.c
+++ b/src/coding.c
@@ -1,7 +1,6 @@
  /* Coding system handler (conversion, detection, and etc).
-   Ver.1.0.
-   Copyright (C) 1995 Free Software Foundation, Inc.
-   Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
+   Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
+   Licensed to the Free Software Foundation.
  
  This file is part of GNU Emacs.
  
@@ -23,7 +22,7 @@ Boston, MA 02111-1307, USA.  */
  /*** TABLE OF CONTENTS ***
  
    1. Preamble
-  2. Emacs' internal format handlers
+  2. Emacs' internal format (emacs-mule) handlers
    3. ISO2022 handlers
    4. Shift-JIS and BIG5 handlers
    5. End-of-line handlers
@@ -38,10 +37,11 @@ Boston, MA 02111-1307, USA.  */
    Coding system is an encoding mechanism of one or more character
    sets.  Here's a list of coding systems which Emacs can handle.  When
    we say "decode", it means converting some other coding system to
-  Emacs' internal format, and when we say "encode", it means
-  converting Emacs' internal format to some other coding system.
+  Emacs' internal format (emacs-internal), and when we say "encode",
+  it means converting the coding system emacs-mule to some other
+  coding system.
  
-  0. Emacs' internal format
+  0. Emacs' internal format (emacs-mule)
  
    Emacs itself holds a multi-lingual character in a buffer and a string
    in a special format.  Details are described in the section 2.
@@ -106,7 +106,7 @@ Boston, MA 02111-1307, USA.  */
    template of these functions.  */
  #if 0
  int
-detect_coding_internal (src, src_end)
+detect_coding_emacs_mule (src, src_end)
       unsigned char *src, *src_end;
  {
    ...
@@ -116,11 +116,11 @@ detect_coding_internal (src, src_end)
  /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
  
    These functions decode SRC_BYTES length text at SOURCE encoded in
-  CODING to Emacs' internal format.  The resulting text goes to a
-  place pointed by DESTINATION, the length of which should not exceed
-  DST_BYTES.  The bytes actually processed is returned as *CONSUMED.
-  The return value is the length of the decoded text.  Below is a
-  template of these functions.  */
+  CODING to Emacs' internal format (emacs-mule).  The resulting text
+  goes to a place pointed by DESTINATION, the length of which should
+  not exceed DST_BYTES.  The bytes actually processed is returned as
+  *CONSUMED.  The return value is the length of the decoded text.
+  Below is a template of these functions.  */
  #if 0
  decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes, consumed)
       struct coding_system *coding;
@@ -134,12 +134,12 @@ decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes, consumed)
  
  /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
  
-  These functions encode SRC_BYTES length text at SOURCE of Emacs
-  internal format to CODING.  The resulting text goes to a place
-  pointed by DESTINATION, the length of which should not exceed
-  DST_BYTES.  The bytes actually processed is returned as *CONSUMED.
-  The return value is the length of the encoded text.  Below is a
-  template of these functions.  */
+  These functions encode SRC_BYTES length text at SOURCE of Emacs'
+  internal format (emacs-mule) to CODING.  The resulting text goes to
+  a place pointed by DESTINATION, the length of which should not
+  exceed DST_BYTES.  The bytes actually processed is returned as
+  *CONSUMED.  The return value is the length of the encoded text.
+  Below is a template of these functions.  */
  #if 0
  encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes, consumed)
       struct coding_system *coding;
@@ -262,7 +262,7 @@ int eol_mnemonic_undecided;
  
  #ifdef emacs
  
-Lisp_Object Qcoding_system_vector, Qcoding_system_p, Qcoding_system_error;
+Lisp_Object Qcoding_system_spec, Qcoding_system_p, Qcoding_system_error;
  
  /* Coding-systems are handed between Emacs Lisp programs and C internal
     routines by the following three variables.  */
@@ -279,7 +279,9 @@ struct coding_system terminal_coding;
  /* Coding-system of what is sent from terminal keyboard.  */
  struct coding_system keyboard_coding;
  
-Lisp_Object Vcoding_system_alist;
+Lisp_Object Vfile_coding_system_alist;
+Lisp_Object Vprocess_coding_system_alist;
+Lisp_Object Vnetwork_coding_system_alist;
  
  #endif /* emacs */
  
@@ -293,7 +295,7 @@ Lisp_Object coding_category_table[CODING_CATEGORY_IDX_MAX];
  
  /* Table of names of symbol for each coding-category.  */
  char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
-  "coding-category-internal",
+  "coding-category-emacs-mule",
    "coding-category-sjis",
    "coding-category-iso-7",
    "coding-category-iso-8-1",
@@ -303,14 +305,26 @@ char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
    "coding-category-binary"
  };
  
-/* Alist of charsets vs the alternate charsets.  */
-Lisp_Object Valternate_charset_table;
+/* Flag to tell if we look up unification table on character code
+   conversion.  */
+Lisp_Object Venable_character_unification;
+/* Standard unification table to look up on decoding (reading).  */
+Lisp_Object Vstandard_character_unification_table_for_decode;
+/* Standard unification table to look up on encoding (writing).  */
+Lisp_Object Vstandard_character_unification_table_for_encode;
+
+Lisp_Object Qcharacter_unification_table;
+Lisp_Object Qcharacter_unification_table_for_decode;
+Lisp_Object Qcharacter_unification_table_for_encode;
  
  /* Alist of charsets vs revision number.  */
  Lisp_Object Vcharset_revision_alist;
  
+/* Default coding systems used for process I/O.  */
+Lisp_Object Vdefault_process_coding_system;
+
  \f
-/*** 2. Emacs internal format handlers ***/
+/*** 2. Emacs internal format (emacs-mule) handlers ***/
  
  /* Emacs' internal format for encoding multiple character sets is a
     kind of multi-byte encoding, i.e. encoding a character by a sequence
@@ -357,10 +371,10 @@ enum emacs_code_class_type emacs_code_class[256];
  
  /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
     Check if a text is encoded in Emacs' internal format.  If it is,
-   return CODING_CATEGORY_MASK_INTERNAL, else return 0.  */
+   return CODING_CATEGORY_MASK_EMASC_MULE, else return 0.  */
  
  int
-detect_coding_internal (src, src_end)
+detect_coding_emacs_mule (src, src_end)
       unsigned char *src, *src_end;
  {
    unsigned char c;
@@ -416,7 +430,7 @@ detect_coding_internal (src, src_end)
           break;
         }
      }
-  return CODING_CATEGORY_MASK_INTERNAL;
+  return CODING_CATEGORY_MASK_EMACS_MULE;
  }
  
  \f
@@ -581,13 +595,12 @@ int
  detect_coding_iso2022 (src, src_end)
       unsigned char *src, *src_end;
  {
-  unsigned char c, g1 = 0;
    int mask = (CODING_CATEGORY_MASK_ISO_7
               | CODING_CATEGORY_MASK_ISO_8_1
-             | CODING_CATEGORY_MASK_ISO_8_2);
-  /* We may look ahead at most 4 bytes.  */
-  unsigned char *adjusted_src_end = src_end - 4;
-  int i;
+             | CODING_CATEGORY_MASK_ISO_8_2
+             | CODING_CATEGORY_MASK_ISO_ELSE);
+  int g1 = 0;                  /* 1 iff designating to G1.  */
+  int c, i;
  
    while (src < src_end)
      {
@@ -598,14 +611,17 @@ detect_coding_iso2022 (src, src_end)
           if (src >= src_end)
             break;
           c = *src++;
-         if (src + 2 >= src_end
+         if (src < src_end
               && ((c >= '(' && c <= '/')
                   || c == '$' && ((*src >= '(' && *src <= '/')
                                   || (*src >= '@' && *src <= 'B'))))
             {
               /* Valid designation sequence.  */
               if (c == ')' || (c == '$' && *src == ')'))
-               g1 = 1;
+               {
+                 g1 = 1;
+                 mask &= ~CODING_CATEGORY_MASK_ISO_7;
+               }
               src++;
               break;
             }
@@ -647,44 +663,48 @@ detect_coding_iso2022 (src, src_end)
  }
  
  /* Decode a character of which charset is CHARSET and the 1st position
-   code is C1.  If dimension of CHARSET 2, the 2nd position code is
+   code is C1.  If dimension of CHARSET is 2, the 2nd position code is
     fetched from SRC and set to C2.  If CHARSET is negative, it means
     that we are decoding ill formed text, and what we can do is just to
     read C1 as is.  */
  
-#define DECODE_ISO_CHARACTER(charset, c1)                      \
-  do {                                                         \
-    if ((charset) >= 0 && CHARSET_DIMENSION (charset) == 2)    \
-      ONE_MORE_BYTE (c2);                                      \
-    if (COMPOSING_HEAD_P (coding->composing))                  \
-      {                                                                \
-       *dst++ = LEADING_CODE_COMPOSITION;                      \
-       if (COMPOSING_WITH_RULE_P (coding->composing))          \
-         /* To tell composition rules are embeded.  */         \
-         *dst++ = 0xFF;                                        \
-       coding->composing += 2;                                 \
-      }                                                                \
-    if ((charset) < 0)                                         \
-      *dst++ = c1;                                             \
-    else if ((charset) == CHARSET_ASCII)                       \
-      DECODE_CHARACTER_ASCII (c1);                             \
-    else if (CHARSET_DIMENSION (charset) == 1)                 \
-      DECODE_CHARACTER_DIMENSION1 (charset, c1);               \
-    else                                                       \
-      DECODE_CHARACTER_DIMENSION2 (charset, c1, c2);           \
-    if (COMPOSING_WITH_RULE_P (coding->composing))             \
-      /* To tell a composition rule follows.  */               \
-      coding->composing = COMPOSING_WITH_RULE_RULE;            \
+#define DECODE_ISO_CHARACTER(charset, c1)                              \
+  do {                                                                 \
+    int c_alt, charset_alt = (charset);                                        \
+    if (COMPOSING_HEAD_P (coding->composing))                          \
+      {                                                                        \
+       *dst++ = LEADING_CODE_COMPOSITION;                              \
+       if (COMPOSING_WITH_RULE_P (coding->composing))                  \
+         /* To tell composition rules are embeded.  */                 \
+         *dst++ = 0xFF;                                                \
+       coding->composing += 2;                                         \
+      }                                                                        \
+    if ((charset) >= 0)                                                        \
+      {                                                                        \
+       if (CHARSET_DIMENSION (charset) == 2)                           \
+         ONE_MORE_BYTE (c2);                                           \
+       if (!NILP (unification_table)                                   \
+           && ((c_alt = unify_char (unification_table,                 \
+                                    -1, (charset), c1, c2)) >= 0))     \
+         SPLIT_CHAR (c_alt, charset_alt, c1, c2);                      \
+      }                                                                        \
+    if (charset_alt == CHARSET_ASCII || charset_alt < 0)               \
+      DECODE_CHARACTER_ASCII (c1);                                     \
+    else if (CHARSET_DIMENSION (charset_alt) == 1)                     \
+      DECODE_CHARACTER_DIMENSION1 (charset_alt, c1);                   \
+    else                                                               \
+      DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2);               \
+    if (COMPOSING_WITH_RULE_P (coding->composing))                     \
+      /* To tell a composition rule follows.  */                       \
+      coding->composing = COMPOSING_WITH_RULE_RULE;                    \
    } while (0)
  
  /* Set designation state into CODING.  */
  #define DECODE_DESIGNATION(reg, dimension, chars, final_char)          \
    do {                                                                 \
-    int charset = ISO_CHARSET_TABLE (dimension, chars, final_char);    \
-    Lisp_Object temp                                                   \
-      = Fassq (CHARSET_SYMBOL (charset), Valternate_charset_table);    \
-    if (! NILP (temp))                                                 \
-      charset = get_charset_id (XCONS (temp)->cdr);                    \
+    int charset = ISO_CHARSET_TABLE (make_number (dimension),          \
+                                    make_number (chars),               \
+                                    make_number (final_char));         \
      if (charset >= 0)                                                  \
        {                                                                        \
          if (coding->direction == 1                                     \
@@ -716,6 +736,11 @@ decode_coding_iso2022 (coding, source, destination,
    /* Charsets invoked to graphic plane 0 and 1 respectively.  */
    int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
    int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
+  Lisp_Object unification_table
+      = coding->character_unification_table_for_decode;
+
+  if (!NILP (Venable_character_unification) && NILP (unification_table))
+    unification_table = Vstandard_character_unification_table_for_decode;
  
    while (src < src_end && dst < adjusted_dst_end)
      {
@@ -725,7 +750,7 @@ decode_coding_iso2022 (coding, source, destination,
          ONE_MORE_BYTE or TWO_MORE_BYTES).  In that case, SRC is reset
          to SRC_BASE before exiting.  */
        unsigned char *src_base = src;
-      unsigned char c1 = *src++, c2, cmprule;
+      int c1 = *src++, c2;
  
        switch (iso_code_class [c1])
         {
@@ -1164,6 +1189,21 @@ decode_coding_iso2022 (coding, source, destination,
        dst = encode_invocation_designation (charset, coding, dst);      \
    } while (1)
  
+#define ENCODE_ISO_CHARACTER(charset, c1, c2)                            \
+  do {                                                                   \
+    int c_alt, charset_alt;                                              \
+    if (!NILP (unification_table)                                        \
+       && ((c_alt = unify_char (unification_table, -1, charset, c1, c2)) \
+           >= 0))                                                        \
+      SPLIT_CHAR (c_alt, charset_alt, c1, c2);                           \
+    else                                                                 \
+      charset_alt = charset;                                             \
+    if (CHARSET_DIMENSION (charset_alt) == 1)                            \
+      ENCODE_ISO_CHARACTER_DIMENSION1 (charset_alt, c1);                 \
+    else                                                                 \
+      ENCODE_ISO_CHARACTER_DIMENSION2 (charset_alt, c1, c2);             \
+  } while (0)
+
  /* Produce designation and invocation codes at a place pointed by DST
     to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
     Return new DST.  */
@@ -1186,9 +1226,9 @@ encode_invocation_designation (charset, coding, dst)
        /* CHARSET is not yet designated to any graphic registers.  */
        /* At first check the requested designation.  */
        reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
-      if (reg < 0)
-       /* Since CHARSET requests no special designation, designate to
-          graphic register 0.  */
+      if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
+       /* Since CHARSET requests no special designation, designate it
+          to graphic register 0.  */
         reg = 0;
  
        ENCODE_DESIGNATION (charset, reg, coding);
@@ -1263,48 +1303,57 @@ encode_invocation_designation (charset, coding, dst)
           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
    } while (0)
  
-int
-encode_designation_at_bol (coding, src, src_end, dstp)
+/* Produce designation sequences of charsets in the line started from
+   *SRC to a place pointed by DSTP.
+
+   If the current block ends before any end-of-line, we may fail to
+   find all the necessary *designations.  */
+encode_designation_at_bol (coding, table, src, src_end, dstp)
       struct coding_system *coding;
+     Lisp_Object table;
       unsigned char *src, *src_end, **dstp;
  {
-  int charset, reg, r[4];
-  unsigned char *dst = *dstp, c;
-  for (reg = 0; reg < 4; reg++) r[reg] = -1;
-  while (src < src_end && (c = *src++) != '\n')
+  int charset, c, found = 0, reg;
+  /* Table of charsets to be designated to each graphic register.  */
+  int r[4];
+  unsigned char *dst = *dstp;
+
+  for (reg = 0; reg < 4; reg++)
+    r[reg] = -1;
+
+  while (src < src_end && *src != '\n' && found < 4)
      {
-      switch (emacs_code_class[c])
+      int bytes = BYTES_BY_CHAR_HEAD (*src);
+      
+      if (NILP (table))
+       charset = CHARSET_AT (src);
+      else
         {
-       case EMACS_ascii_code:
-         charset = CHARSET_ASCII;
-         break;
-       case EMACS_leading_code_2:
-         if (++src >= src_end) continue;
-         charset = c;
-         break;
-       case EMACS_leading_code_3:
-         if ((src += 2) >= src_end) continue;
-         charset =  (c < LEADING_CODE_PRIVATE_11 ? c : *(src - 2));
-         break;
-       case EMACS_leading_code_4:
-         if ((src += 3) >= src_end) continue;
-         charset = *(src - 3);
-         break;
-       default:
-         continue;
+         int c_alt, c1, c2;
+
+         SPLIT_STRING(src, bytes, charset, c1, c2);
+         if ((c_alt = unify_char (table, -1, charset, c1, c2)) >= 0)
+           charset = CHAR_CHARSET (c_alt);
         }
+
        reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
-      if (r[reg] < 0
-         && CODING_SPEC_ISO_DESIGNATION (coding, reg) != charset)
-       r[reg] = charset;
+      if (r[reg] == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
+       {
+         found++;
+         r[reg] = charset;
+       }
+
+      src += bytes;
+    }
+
+  if (found)
+    {
+      for (reg = 0; reg < 4; reg++)
+       if (r[reg] >= 0
+           && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
+         ENCODE_DESIGNATION (r[reg], reg, coding);
+      *dstp = dst;
      }
-  if (c != '\n' && !coding->last_block)
-    return -1;
-  for (reg = 0; reg < 4; reg++)
-    if (r[reg] >= 0)
-      ENCODE_DESIGNATION (r[reg], reg, coding);
-  *dstp = dst;
-  return 0;
  }
  
  /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
@@ -1325,6 +1374,11 @@ encode_coding_iso2022 (coding, source, destination,
       from DST_END to assure overflow checking is necessary only at the
       head of loop.  */
    unsigned char *adjusted_dst_end = dst_end - 19;
+  Lisp_Object unification_table
+      = coding->character_unification_table_for_encode;
+
+  if (!NILP (Venable_character_unification) && NILP (unification_table))
+    unification_table = Vstandard_character_unification_table_for_encode;
  
    while (src < src_end && dst < adjusted_dst_end)
      {
@@ -1334,18 +1388,14 @@ encode_coding_iso2022 (coding, source, destination,
          TWO_MORE_BYTES, and THREE_MORE_BYTES).  In that case, SRC is
          reset to SRC_BASE before exiting.  */
        unsigned char *src_base = src;
-      unsigned char c1, c2, c3, c4;
-      int charset;
+      int charset, c1, c2, c3, c4;
  
        if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
           && CODING_SPEC_ISO_BOL (coding))
         {
-         /* We have to produce destination sequences now.  */
-         if (encode_designation_at_bol (coding, src, src_end, &dst) < 0)
-           /* We can't find end of line in the current block.  Let's
-            repeat encoding starting from the current position
-            pointed by SRC.  */
-           break;
+         /* We have to produce designation sequences if any now.  */
+         encode_designation_at_bol (coding, unification_table,
+                                    src, src_end, &dst);
           CODING_SPEC_ISO_BOL (coding) = 0;
         }
  
@@ -1390,7 +1440,7 @@ encode_coding_iso2022 (coding, source, destination,
        switch (emacs_code_class[c1])
         {
         case EMACS_ascii_code:
-         ENCODE_ISO_CHARACTER_DIMENSION1 (CHARSET_ASCII, c1);
+         ENCODE_ISO_CHARACTER (CHARSET_ASCII, c1, /* dummy */ c2);
           break;
  
         case EMACS_control_code:
@@ -1417,7 +1467,7 @@ encode_coding_iso2022 (coding, source, destination,
                    coding->spec.iso2022.current_designation,
                    sizeof coding->spec.iso2022.initial_designation);
           if (coding->eol_type == CODING_EOL_LF
-             || coding->eol_type == CODING_EOL_AUTOMATIC)
+             || coding->eol_type == CODING_EOL_UNDECIDED)
             *dst++ = ISO_CODE_LF;
           else if (coding->eol_type == CODING_EOL_CRLF)
             *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
@@ -1428,20 +1478,20 @@ encode_coding_iso2022 (coding, source, destination,
  
         case EMACS_leading_code_2:
           ONE_MORE_BYTE (c2);
-         ENCODE_ISO_CHARACTER_DIMENSION1 (c1, c2);
+         ENCODE_ISO_CHARACTER (c1, c2, /* dummy */ c3);
           break;
  
         case EMACS_leading_code_3:
           TWO_MORE_BYTES (c2, c3);
           if (c1 < LEADING_CODE_PRIVATE_11)
-           ENCODE_ISO_CHARACTER_DIMENSION2 (c1, c2, c3);
+           ENCODE_ISO_CHARACTER (c1, c2, c3);
           else
-           ENCODE_ISO_CHARACTER_DIMENSION1 (c2, c3);
+           ENCODE_ISO_CHARACTER (c2, c3, /* dummy */ c4);
           break;
  
         case EMACS_leading_code_4:
           THREE_MORE_BYTES (c2, c3, c4);
-         ENCODE_ISO_CHARACTER_DIMENSION2 (c2, c3, c4);
+         ENCODE_ISO_CHARACTER (c2, c3, c4);
           break;
  
         case EMACS_leading_code_composition:
@@ -1469,20 +1519,21 @@ encode_coding_iso2022 (coding, source, destination,
      label_end_of_loop:
        coding->carryover_size = src - src_base;
        bcopy (src_base, coding->carryover, coding->carryover_size);
-      src = src_base;
        break;
      }
  
    /* If this is the last block of the text to be encoded, we must
-     reset the state of graphic planes and registers to initial one.
-     In addition, we had better just flush out all remaining codes in
-     the text although they are not valid characters.  */
-  if (coding->last_block)
+     reset graphic planes and registers to the initial state.  */
+  if (src >= src_end && coding->last_block)
      {
        ENCODE_RESET_PLANE_AND_REGISTER;
-      bcopy(src, dst, src_end - src);
-      dst += (src_end - src);
-      src = src_end;
+      if (coding->carryover_size > 0
+         && coding->carryover_size < (dst_end - dst))
+       {
+         bcopy (coding->carryover, dst, coding->carryover_size);
+         dst += coding->carryover_size;
+         coding->carryover_size = 0;
+       }
      }
    *consumed = src - source;
    return dst - destination;
@@ -1563,6 +1614,63 @@ encode_coding_iso2022 (coding, source, destination,
      b2 += b2 < 0x3F ? 0x40 : 0x62;                                     \
    } while (0)
  
+#define DECODE_SJIS_BIG5_CHARACTER(charset, c1, c2)                    \
+  do {                                                                 \
+    int c_alt, charset_alt = (charset);                                        \
+    if (!NILP (unification_table)                                      \
+       && ((c_alt = unify_char (unification_table,                     \
+                                -1, (charset), c1, c2)) >= 0))         \
+         SPLIT_CHAR (c_alt, charset_alt, c1, c2);                      \
+    if (charset_alt == CHARSET_ASCII || charset_alt < 0)               \
+      DECODE_CHARACTER_ASCII (c1);                                     \
+    else if (CHARSET_DIMENSION (charset_alt) == 1)                     \
+      DECODE_CHARACTER_DIMENSION1 (charset_alt, c1);                   \
+    else                                                               \
+      DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2);               \
+  } while (0)
+
+#define ENCODE_SJIS_BIG5_CHARACTER(charset, c1, c2)                      \
+  do {                                                                   \
+    int c_alt, charset_alt;                                              \
+    if (!NILP (unification_table)                                        \
+        && ((c_alt = unify_char (unification_table, -1, charset, c1, c2)) \
+           >= 0))                                                        \
+      SPLIT_CHAR (c_alt, charset_alt, c1, c2);                           \
+    else                                                                 \
+      charset_alt = charset;                                             \
+    if (charset_alt == charset_ascii)                                    \
+      *dst++ = c1;                                                       \
+    else if (CHARSET_DIMENSION (charset_alt) == 1)                       \
+      {                                                                          \
+       if (sjis_p && charset_alt == charset_katakana_jisx0201)           \
+         *dst++ = c1;                                                    \
+       else                                                              \
+         *dst++ = charset_alt, *dst++ = c1;                              \
+      }                                                                          \
+    else                                                                 \
+      {                                                                          \
+       c1 &= 0x7F, c2 &= 0x7F;                                           \
+       if (sjis_p && charset_alt == charset_jisx0208)                    \
+         {                                                               \
+           unsigned char s1, s2;                                         \
+                                                                         \
+           ENCODE_SJIS (c1, c2, s1, s2);                                 \
+           *dst++ = s1, *dst++ = s2;                                     \
+         }                                                               \
+       else if (!sjis_p                                                  \
+                && (charset_alt == charset_big5_1                        \
+                    || charset_alt == charset_big5_2))                   \
+         {                                                               \
+           unsigned char b1, b2;                                         \
+                                                                         \
+           ENCODE_BIG5 (c1, c2, c3, b1, b2);                             \
+           *dst++ = b1, *dst++ = b2;                                     \
+         }                                                               \
+       else                                                              \
+         *dst++ = charset_alt, *dst++ = c1, *dst++ = c2;                 \
+      }                                                                          \
+  } while (0);
+
  /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
     Check if a text is encoded in SJIS.  If it is, return
     CODING_CATEGORY_MASK_SJIS, else return 0.  */
@@ -1634,6 +1742,11 @@ decode_coding_sjis_big5 (coding, source, destination,
       from DST_END to assure overflow checking is necessary only at the
       head of loop.  */
    unsigned char *adjusted_dst_end = dst_end - 3;
+  Lisp_Object unification_table
+      = coding->character_unification_table_for_decode;
+
+  if (!NILP (Venable_character_unification) && NILP (unification_table))
+    unification_table = Vstandard_character_unification_table_for_decode;
  
    while (src < src_end && dst < adjusted_dst_end)
      {
@@ -1658,8 +1771,10 @@ decode_coding_sjis_big5 (coding, source, destination,
           else
             *dst++ = c1;
         }
-      else if (c1 < 0x80)
+      else if (c1 < 0x20)
         *dst++ = c1;
+      else if (c1 < 0x80)
+       DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
        else if (c1 < 0xA0 || c1 >= 0xE0)
         {
           /* SJIS -> JISX0208, BIG5 -> Big5 (only if 0xE0 <= c1 < 0xFF) */
@@ -1667,7 +1782,7 @@ decode_coding_sjis_big5 (coding, source, destination,
             {
               ONE_MORE_BYTE (c2);
               DECODE_SJIS (c1, c2, c3, c4);
-             DECODE_CHARACTER_DIMENSION2 (charset_jisx0208, c3, c4);
+             DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
             }
           else if (c1 >= 0xE0 && c1 < 0xFF)
             {
@@ -1675,7 +1790,7 @@ decode_coding_sjis_big5 (coding, source, destination,
  
               ONE_MORE_BYTE (c2);
               DECODE_BIG5 (c1, c2, charset, c3, c4);
-             DECODE_CHARACTER_DIMENSION2 (charset, c3, c4);
+             DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
             }
           else                  /* Invalid code */
             *dst++ = c1;
@@ -1684,14 +1799,14 @@ decode_coding_sjis_big5 (coding, source, destination,
         {
           /* SJIS -> JISX0201-Kana, BIG5 -> Big5 */
           if (sjis_p)
-           DECODE_CHARACTER_DIMENSION1 (charset_katakana_jisx0201, c1);
+           DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1, /* dummy */ c2);
           else
             {
               int charset;
  
               ONE_MORE_BYTE (c2);
               DECODE_BIG5 (c1, c2, charset, c3, c4);
-             DECODE_CHARACTER_DIMENSION2 (charset, c3, c4);
+             DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
             }
         }
        continue;
@@ -1732,6 +1847,11 @@ encode_coding_sjis_big5 (coding, source, destination,
       from DST_END to assure overflow checking is necessary only at the
       head of loop.  */
    unsigned char *adjusted_dst_end = dst_end - 1;
+  Lisp_Object unification_table
+      = coding->character_unification_table_for_encode;
+
+  if (!NILP (Venable_character_unification) && NILP (unification_table))
+    unification_table = Vstandard_character_unification_table_for_encode;
  
    while (src < src_end && dst < adjusted_dst_end)
      {
@@ -1759,6 +1879,9 @@ encode_coding_sjis_big5 (coding, source, destination,
        switch (emacs_code_class[c1])
         {
         case EMACS_ascii_code:
+         ENCODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
+         break;
+
         case EMACS_control_code:
           *dst++ = c1;
           break;
@@ -1773,7 +1896,7 @@ encode_coding_sjis_big5 (coding, source, destination,
  
         case EMACS_linefeed_code:
           if (coding->eol_type == CODING_EOL_LF
-             || coding->eol_type == CODING_EOL_AUTOMATIC)
+             || coding->eol_type == CODING_EOL_UNDECIDED)
             *dst++ = '\n';
           else if (coding->eol_type == CODING_EOL_CRLF)
             *dst++ = '\r', *dst++ = '\n';
@@ -1783,36 +1906,17 @@ encode_coding_sjis_big5 (coding, source, destination,
  
         case EMACS_leading_code_2:
           ONE_MORE_BYTE (c2);
-         if (sjis_p && c1 == charset_katakana_jisx0201)
-           *dst++ = c2;
-         else
-           *dst++ = c1, *dst++ = c2;
+         ENCODE_SJIS_BIG5_CHARACTER (c1, c2, /* dummy */ c3);
           break;
  
         case EMACS_leading_code_3:
           TWO_MORE_BYTES (c2, c3);
-         c2 &= 0x7F, c3 &= 0x7F;
-         if (sjis_p && c1 == charset_jisx0208)
-           {
-             unsigned char s1, s2;
-
-             ENCODE_SJIS (c2, c3, s1, s2);
-             *dst++ = s1, *dst++ = s2;
-           }
-         else if (!sjis_p && (c1 == charset_big5_1 || c1 == charset_big5_2))
-           {
-             unsigned char b1, b2;
-
-             ENCODE_BIG5 (c1, c2, c3, b1, b2);
-             *dst++ = b1, *dst++ = b2;
-           }
-         else
-           *dst++ = c1, *dst++ = c2, *dst++ = c3;
+         ENCODE_SJIS_BIG5_CHARACTER (c1, c2, c3);
           break;
  
         case EMACS_leading_code_4:
           THREE_MORE_BYTES (c2, c3, c4);
-         *dst++ = c1, *dst++ = c2, *dst++ = c3, *dst++ = c4;
+         ENCODE_SJIS_BIG5_CHARACTER (c2, c3, c4);
           break;
  
         case EMACS_leading_code_composition:
@@ -1929,7 +2033,7 @@ encode_eol (coding, source, destination, src_bytes, dst_bytes, consumed)
    switch (coding->eol_type)
      {
      case CODING_EOL_LF:
-    case CODING_EOL_AUTOMATIC:
+    case CODING_EOL_UNDECIDED:
        produced = (src_bytes > dst_bytes) ? dst_bytes : src_bytes;
        bcopy (source, destination, produced);
        if (coding->selective)
@@ -1995,13 +2099,14 @@ encode_eol (coding, source, destination, src_bytes, dst_bytes, consumed)
     `element[0]' contains information to be set in `coding->type'.  The
     value and its meaning is as follows:
  
-   0 -- coding_system_internal
-   1 -- coding_system_sjis
-   2 -- coding_system_iso2022
-   3 -- coding_system_big5
-   4 -- coding_system_ccl
-   nil -- coding_system_no_conversion
-   t -- coding_system_automatic
+   0 -- coding_type_emacs_mule
+   1 -- coding_type_sjis
+   2 -- coding_type_iso2022
+   3 -- coding_type_big5
+   4 -- coding_type_ccl encoder/decoder written in CCL
+   nil -- coding_type_no_conversion
+   t -- coding_type_undecided (automatic conversion on decoding,
+                              no-conversion on encoding)
  
     `element[4]' contains information to be set in `coding->flags' and
     `coding->spec'.  The meaning varies by `coding->type'.
@@ -2060,12 +2165,16 @@ setup_coding_system (coding_system, coding)
    coding->direction = 0;
    coding->carryover_size = 0;
    coding->post_read_conversion = coding->pre_write_conversion = Qnil;
+  coding->character_unification_table_for_decode = Qnil;
+  coding->character_unification_table_for_encode = Qnil;
  
    Vlast_coding_system_used = coding->symbol = coding_system;
    eol_type = Qnil;
    /* Get value of property `coding-system' until we get a vector.
       While doing that, also get values of properties
-     `post-read-conversion', `pre-write-conversion', and `eol-type'.  */
+     `post-read-conversion', `pre-write-conversion',
+     `character-unification-table-for-decode',
+     `character-unification-table-for-encode' and `eol-type'.  */
    while (!NILP (coding_system) && SYMBOLP (coding_system))
      {
        if (NILP (coding->post_read_conversion))
@@ -2076,14 +2185,42 @@ setup_coding_system (coding_system, coding)
                                              Qpre_write_conversion);
        if (NILP (eol_type))
         eol_type = Fget (coding_system, Qeol_type);
+
+      if (NILP (coding->character_unification_table_for_decode))
+       coding->character_unification_table_for_decode
+         = Fget (coding_system, Qcharacter_unification_table_for_decode);
+
+      if (NILP (coding->character_unification_table_for_encode))
+       coding->character_unification_table_for_encode
+         = Fget (coding_system, Qcharacter_unification_table_for_encode);
+
        coding_system = Fget (coding_system, Qcoding_system);
      }
+
+  while (!NILP (coding->character_unification_table_for_decode)
+        && SYMBOLP (coding->character_unification_table_for_decode))
+       coding->character_unification_table_for_decode
+         = Fget (coding->character_unification_table_for_decode,
+                 Qcharacter_unification_table_for_decode);
+  if (!NILP (coding->character_unification_table_for_decode)
+      && !CHAR_TABLE_P (coding->character_unification_table_for_decode))
+      coding->character_unification_table_for_decode = Qnil;
+
+  while (!NILP (coding->character_unification_table_for_encode)
+        && SYMBOLP (coding->character_unification_table_for_encode))
+       coding->character_unification_table_for_encode
+         = Fget (coding->character_unification_table_for_encode,
+                 Qcharacter_unification_table_for_encode);
+  if (!NILP (coding->character_unification_table_for_encode)
+      && !CHAR_TABLE_P (coding->character_unification_table_for_encode))
+      coding->character_unification_table_for_encode = Qnil;
+
    if (!VECTORP (coding_system)
        || XVECTOR (coding_system)->size != 5)
      goto label_invalid_coding_system;
  
    if (VECTORP (eol_type))
-    coding->eol_type = CODING_EOL_AUTOMATIC;
+    coding->eol_type = CODING_EOL_UNDECIDED;
    else if (XFASTINT (eol_type) == 1)
      coding->eol_type = CODING_EOL_CRLF;
    else if (XFASTINT (eol_type) == 2)
@@ -2095,7 +2232,7 @@ setup_coding_system (coding_system, coding)
    switch (XFASTINT (type))
      {
      case 0:
-      coding->type = coding_type_internal;
+      coding->type = coding_type_emacs_mule;
        break;
  
      case 1:
@@ -2147,7 +2284,8 @@ setup_coding_system (coding_system, coding)
                   if an element is t, REG can be used by any charset,
                 nil: REG is never used.  */
         for (charset = 0; charset <= MAX_CHARSET; charset++)
-         CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = -1;
+         CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
+           = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
         for (i = 0; i < 4; i++)
           {
             if (INTEGERP (flags[i])
@@ -2209,7 +2347,8 @@ setup_coding_system (coding_system, coding)
  
         for (charset = 0; charset <= MAX_CHARSET; charset++)
           if (CHARSET_VALID_P (charset)
-             && CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) < 0)
+             && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
+                 == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
             {
               /* We have not yet decided where to designate CHARSET.  */
               int reg_bits = default_reg_bits;
@@ -2265,7 +2404,7 @@ setup_coding_system (coding_system, coding)
  
      default:
        if (EQ (type, Qt))
-       coding->type = coding_type_automatic;
+       coding->type = coding_type_undecided;
        else
         coding->type = coding_type_no_conversion;
        break;
@@ -2274,6 +2413,7 @@ setup_coding_system (coding_system, coding)
  
   label_invalid_coding_system:
    coding->type = coding_type_no_conversion;
+  coding->eol_type = CODING_EOL_LF;
    coding->symbol = coding->pre_write_conversion = coding->post_read_conversion
      = Qnil;
    return -1;
@@ -2285,11 +2425,11 @@ setup_coding_system (coding_system, coding)
     because they use the same range of codes.  So, at first, coding
     systems are categorized into 7, those are:
  
-   o coding-category-internal
+   o coding-category-emacs-mule
  
         The category for a coding system which has the same code range
         as Emacs' internal format.  Assigned the coding-system (Lisp
-       symbol) `internal' by default.
+       symbol) `emacs-mule' by default.
  
     o coding-category-sjis
  
@@ -2362,6 +2502,7 @@ detect_coding_mask (src, src_bytes)
  
    /* At first, skip all ASCII characters and control characters except
       for three ISO2022 specific control characters.  */
+ label_loop_detect_coding:
    while (src < src_end)
      {
        c = *src;
@@ -2378,23 +2519,28 @@ detect_coding_mask (src, src_bytes)
    /* The text seems to be encoded in some multilingual coding system.
       Now, try to find in which coding system the text is encoded.  */
    if (c < 0x80)
-    /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
-    /* C is an ISO2022 specific control code of C0.  */
-    mask = detect_coding_iso2022 (src, src_end);
-
+    {
+      /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
+      /* C is an ISO2022 specific control code of C0.  */
+      mask = detect_coding_iso2022 (src, src_end);
+      src++;
+      if (mask == CODING_CATEGORY_MASK_ANY)
+       /* No valid ISO2022 code follows C.  Try again.  */
+       goto label_loop_detect_coding;
+    }
    else if (c == ISO_CODE_SS2 || c == ISO_CODE_SS3 || c == ISO_CODE_CSI)
      /* C is an ISO2022 specific control code of C1,
         or the first byte of SJIS's 2-byte character code,
         or a leading code of Emacs.  */
      mask = (detect_coding_iso2022 (src, src_end)
             | detect_coding_sjis (src, src_end)
-           | detect_coding_internal (src, src_end));
+           | detect_coding_emacs_mule (src, src_end));
  
    else if (c < 0xA0)
      /* C is the first byte of SJIS character code,
         or a leading-code of Emacs.  */
      mask = (detect_coding_sjis (src, src_end)
-           | detect_coding_internal (src, src_end));
+           | detect_coding_emacs_mule (src, src_end));
  
    else
      /* C is a character of ISO2022 in graphic plane right,
@@ -2460,7 +2606,7 @@ detect_coding (coding, src, src_bytes)
  
  /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
     is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
-   CODING_EOL_CR, and CODING_EOL_AUTOMATIC.  */
+   CODING_EOL_CR, and CODING_EOL_UNDECIDED.  */
  
  int
  detect_eol_type (src, src_bytes)
@@ -2483,7 +2629,7 @@ detect_eol_type (src, src_bytes)
             return CODING_EOL_CR;
         }
      }
-  return CODING_EOL_AUTOMATIC;
+  return CODING_EOL_UNDECIDED;
  }
  
  /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
@@ -2499,7 +2645,7 @@ detect_eol (coding, src, src_bytes)
    Lisp_Object val;
    int eol_type = detect_eol_type (src, src_bytes);
  
-  if (eol_type == CODING_EOL_AUTOMATIC)
+  if (eol_type == CODING_EOL_UNDECIDED)
      /*  We found no end-of-line in the source text.  */
      return;
  
@@ -2527,10 +2673,10 @@ decode_coding (coding, source, destination, src_bytes, dst_bytes, consumed)
        return 0;
      }
  
-  if (coding->type == coding_type_automatic)
+  if (coding->type == coding_type_undecided)
      detect_coding (coding, source, src_bytes);
  
-  if (coding->eol_type == CODING_EOL_AUTOMATIC)
+  if (coding->eol_type == CODING_EOL_UNDECIDED)
      detect_eol (coding, source, src_bytes);
  
    coding->carryover_size = 0;
@@ -2543,10 +2689,10 @@ decode_coding (coding, source, destination, src_bytes, dst_bytes, consumed)
        *consumed = produced;
        break;
  
-    case coding_type_internal:
-    case coding_type_automatic:
+    case coding_type_emacs_mule:
+    case coding_type_undecided:
        if (coding->eol_type == CODING_EOL_LF
-         ||  coding->eol_type == CODING_EOL_AUTOMATIC)
+         ||  coding->eol_type == CODING_EOL_UNDECIDED)
         goto label_no_conversion;
        produced = decode_eol (coding, source, destination,
                              src_bytes, dst_bytes, consumed);
@@ -2608,10 +2754,10 @@ encode_coding (coding, source, destination, src_bytes, dst_bytes, consumed)
        *consumed = produced;
        break;
  
-    case coding_type_internal:
-    case coding_type_automatic:
+    case coding_type_emacs_mule:
+    case coding_type_undecided:
        if (coding->eol_type == CODING_EOL_LF
-         ||  coding->eol_type == CODING_EOL_AUTOMATIC)
+         ||  coding->eol_type == CODING_EOL_UNDECIDED)
         goto label_no_conversion;
        produced = encode_eol (coding, source, destination,
                              src_bytes, dst_bytes, consumed);
@@ -2716,9 +2862,9 @@ get_conversion_buffer (size)
  #ifdef emacs
  /*** 7. Emacs Lisp library functions ***/
  
-DEFUN ("coding-system-vector", Fcoding_system_vector, Scoding_system_vector,
+DEFUN ("coding-system-spec", Fcoding_system_spec, Scoding_system_spec,
         1, 1, 0,
-  "Return coding-vector of CODING-SYSTEM.\n\
+  "Return coding-spec of CODING-SYSTEM.\n\
  If CODING-SYSTEM is not a valid coding-system, return nil.")
    (obj)
       Lisp_Object obj;
@@ -2735,20 +2881,22 @@ See document of make-coding-system for coding-system object.")
    (obj)
       Lisp_Object obj;
  {
-  return ((NILP (obj) || !NILP (Fcoding_system_vector (obj))) ? Qt : Qnil);
+  return ((NILP (obj) || !NILP (Fcoding_system_spec (obj))) ? Qt : Qnil);
  }
  
-DEFUN ("read-non-nil-coding-system",
-       Fread_non_nil_coding_system, Sread_non_nil_coding_system, 1, 1, 0,
+DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
+       Sread_non_nil_coding_system, 1, 1, 0,
    "Read a coding system from the minibuffer, prompting with string PROMPT.")
    (prompt)
       Lisp_Object prompt;
  {
    Lisp_Object val;
-  do {
-    val = Fcompleting_read (prompt, Vobarray, Qcoding_system_vector,
-                           Qt, Qnil, Qnil);
-  } while (XSTRING (val)->size == 0);
+  do
+    {
+      val = Fcompleting_read (prompt, Vobarray, Qcoding_system_spec,
+                             Qt, Qnil, Qnil, Qnil);
+    }
+  while (XSTRING (val)->size == 0);
    return (Fintern (val, Qnil));
  }
  
@@ -2758,7 +2906,7 @@ DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 1, 0,
       Lisp_Object prompt;
  {
    Lisp_Object val = Fcompleting_read (prompt, Vobarray, Qcoding_system_p,
-                                     Qt, Qnil, Qnil);
+                                     Qt, Qnil, Qnil, Qnil);
    return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
  }
  
@@ -2775,14 +2923,14 @@ The value of property should be a vector of length 5.")
    if (!NILP (Fcoding_system_p (coding_system)))
      return coding_system;
    while (1)
-    Fsignal (Qcoding_system_error, coding_system);
+    Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
  }
  
  DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
         2, 2, 0,
    "Detect coding-system of the text in the region between START and END.\n\
  Return a list of possible coding-systems ordered by priority.\n\
-If only ASCII characters are found, it returns `automatic-conversion'\n\
+If only ASCII characters are found, it returns `undecided'\n\
   or its subsidiary coding-system according to a detected end-of-line format.")
    (b, e)
       Lisp_Object b, e;
@@ -2800,8 +2948,8 @@ If only ASCII characters are found, it returns `automatic-conversion'\n\
  
    if (coding_mask == CODING_CATEGORY_MASK_ANY)
      {
-      val = intern ("automatic-conversion");
-      if (eol_type != CODING_EOL_AUTOMATIC)
+      val = intern ("undecided");
+      if (eol_type != CODING_EOL_UNDECIDED)
         {
           Lisp_Object val2 = Fget (val, Qeol_type);
           if (VECTORP (val2))
@@ -2831,7 +2979,7 @@ If only ASCII characters are found, it returns `automatic-conversion'\n\
        val = Qnil;
        for (; !NILP (val2); val2 = XCONS (val2)->cdr)
         {
-         if (eol_type == CODING_EOL_AUTOMATIC)
+         if (eol_type == CODING_EOL_UNDECIDED)
             val = Fcons (XCONS (val2)->car, val);
           else
             {
@@ -2861,7 +3009,7 @@ shrink_conversion_area (begp, endp, coding, encodep)
    register unsigned char *beg_addr = *begp, *end_addr = *endp;
  
    if (coding->eol_type != CODING_EOL_LF
-      && coding->eol_type != CODING_EOL_AUTOMATIC)
+      && coding->eol_type != CODING_EOL_UNDECIDED)
      /* Since we anyway have to convert end-of-line format, it is not
         worth skipping at most 100 bytes or so.  */
      return;
@@ -2871,8 +3019,8 @@ shrink_conversion_area (begp, endp, coding, encodep)
        switch (coding->type)
         {
         case coding_type_no_conversion:
-       case coding_type_internal:
-       case coding_type_automatic:
+       case coding_type_emacs_mule:
+       case coding_type_undecided:
           /* We need no conversion.  */
           *begp = *endp;
           return;
@@ -2909,7 +3057,7 @@ shrink_conversion_area (begp, endp, coding, encodep)
           /* We need no conversion.  */
           *begp = *endp;
           return;
-       case coding_type_internal:
+       case coding_type_emacs_mule:
           if (coding->eol_type == CODING_EOL_LF)
             {
               /* We need no conversion.  */
@@ -3225,8 +3373,7 @@ Return the corresponding character code in SJIS.")
    (ch)
       Lisp_Object ch;
  {
-  int charset;
-  unsigned char c1, c2, s1, s2;
+  int charset, c1, c2, s1, s2;
    Lisp_Object val;
  
    CHECK_NUMBER (ch, 0);
@@ -3234,7 +3381,7 @@ Return the corresponding character code in SJIS.")
    if (charset == charset_jisx0208)
      {
        ENCODE_SJIS (c1, c2, s1, s2);
-      XSETFASTINT (val, ((int)s1 << 8) | s2);
+      XSETFASTINT (val, (s1 << 8) | s2);
      }
    else
      XSETFASTINT (val, 0);
@@ -3265,8 +3412,7 @@ Return the corresponding character code in Big5.")
    (ch)
       Lisp_Object ch;
  {
-  int charset;
-  unsigned char c1, c2, b1, b2;
+  int charset, c1, c2, b1, b2;
    Lisp_Object val;
  
    CHECK_NUMBER (ch, 0);
@@ -3274,26 +3420,21 @@ Return the corresponding character code in Big5.")
    if (charset == charset_big5_1 || charset == charset_big5_2)
      {
        ENCODE_BIG5 (charset, c1, c2, b1, b2);
-      XSETFASTINT (val, ((int)b1 << 8) | b2);
+      XSETFASTINT (val, (b1 << 8) | b2);
      }
    else
      XSETFASTINT (val, 0);
    return val;
  }
  
-DEFUN ("set-terminal-coding-system",
-       Fset_terminal_coding_system, Sset_terminal_coding_system, 1, 1,
-       "zCoding-system for terminal display: ",
-  "Set coding-system of your terminal to CODING-SYSTEM.\n\
-All outputs to terminal are encoded to this coding-system.")
+DEFUN ("set-terminal-coding-system-internal",
+       Fset_terminal_coding_system_internal,
+       Sset_terminal_coding_system_internal, 1, 1, 0, "")
    (coding_system)
       Lisp_Object coding_system;
  {
    CHECK_SYMBOL (coding_system, 0);
    setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
-  update_mode_lines++;
-  if (!NILP (Finteractive_p ()))
-    Fredraw_display ();
    return Qnil;
  }
  
@@ -3305,11 +3446,9 @@ DEFUN ("terminal-coding-system",
    return terminal_coding.symbol;
  }
  
-DEFUN ("set-keyboard-coding-system",
-       Fset_keyboard_coding_system, Sset_keyboard_coding_system, 1, 1,
-       "zCoding-system for keyboard input: ",
-  "Set coding-system of what is sent from terminal keyboard to CODING-SYSTEM.\n\
-All inputs from terminal are decoded from this coding-system.")
+DEFUN ("set-keyboard-coding-system-internal",
+       Fset_keyboard_coding_system_internal,
+       Sset_keyboard_coding_system_internal, 1, 1, 0, "")
    (coding_system)
       Lisp_Object coding_system;
  {
@@ -3327,9 +3466,9 @@ DEFUN ("keyboard-coding-system",
  }
  
  \f
-DEFUN ("find-coding-system", Ffind_coding_system, Sfind_coding_system,
-       1, MANY, 0,
-  "Choose a coding system for a file operation based on file name.\n\
+DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
+       Sfind_operation_coding_system,  1, MANY, 0,
+  "Choose a coding system for an operation based on the target name.\n\
  The value names a pair of coding systems: (ENCODING-SYSTEM DECODING-SYSTEM).\n\
  ENCODING-SYSTEM is the coding system to use for encoding\n\
  \(in case OPERATION does encoding), and DECODING-SYSTEM is the coding system\n\
@@ -3350,10 +3489,12 @@ TARGET has a meaning which depends on OPERATION:\n\
    For process I/O, TARGET is a process name.\n\
    For network I/O, TARGET is a service name or a port number\n\
  \n\
-This function looks up what `coding-system-alist' specifies for\n\
-OPERATION and TARGET.  It may specify a cons cell which represents\n\
-a particular coding system or it may have a function to call.\n\
-In the latter case, we call the function with one argument,\n\
+This function looks up what specified for TARGET in,\n\
+`file-coding-system-alist', `process-coding-system-alist',\n\
+or `network-coding-system-alist' depending on OPERATION.\n\
+They may specify a coding system, a cons of coding systems,\n\
+or a function symbol to call.\n\
+In the last case, we call the function with one argument,\n\
  which is a list of all the arguments given to `find-coding-system'.")
    (nargs, args)
       int nargs;
@@ -3376,11 +3517,16 @@ which is a list of all the arguments given to `find-coding-system'.")
         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
      error ("Invalid %dth argument", XINT (target_idx) + 1);
  
-  chain = Fassq (operation, Vcoding_system_alist);
+  chain = ((EQ (operation, Qinsert_file_contents)
+           || EQ (operation, Qwrite_region))
+          ? Vfile_coding_system_alist
+          : (EQ (operation, Qopen_network_stream)
+             ? Vnetwork_coding_system_alist
+             : Vprocess_coding_system_alist));
    if (NILP (chain))
      return Qnil;
  
-  for (chain = XCONS (chain)->cdr; CONSP (chain); chain = XCONS (chain)->cdr)
+  for (; CONSP (chain); chain = XCONS (chain)->cdr)
      {
        Lisp_Object elt = XCONS (chain)->car;
  
@@ -3389,11 +3535,18 @@ which is a list of all the arguments given to `find-coding-system'.")
                && STRINGP (XCONS (elt)->car)
                && fast_string_match (XCONS (elt)->car, target) >= 0)
               || (INTEGERP (target) && EQ (target, XCONS (elt)->car))))
-       return (CONSP (val = XCONS (elt)->cdr)
-               ? val
-               : ((SYMBOLP (val) && Fboundp (val)
-                   ? call2 (val, Flist (nargs, args))
-                   : Qnil)));
+       {
+         val = XCONS (elt)->cdr;
+         if (CONSP (val))
+           return val;
+         if (! SYMBOLP (val))
+           return Qnil;
+         if (! NILP (Fcoding_system_p (val)))
+           return Fcons (val, val);
+         if (!NILP (Fboundp (val)))
+           return call2 (val, Flist (nargs, args));
+         return Qnil;
+       }
      }
    return Qnil;
  }
@@ -3407,7 +3560,7 @@ init_coding_once ()
  {
    int i;
  
-  /* Emacs internal format specific initialize routine.  */ 
+  /* Emacs' internal format specific initialize routine.  */ 
    for (i = 0; i <= 0x20; i++)
      emacs_code_class[i] = EMACS_control_code;
    emacs_code_class[0x0A] = EMACS_linefeed_code;
@@ -3491,8 +3644,8 @@ syms_of_coding ()
    Qpre_write_conversion = intern ("pre-write-conversion");
    staticpro (&Qpre_write_conversion);
  
-  Qcoding_system_vector = intern ("coding-system-vector");
-  staticpro (&Qcoding_system_vector);
+  Qcoding_system_spec = intern ("coding-system-spec");
+  staticpro (&Qcoding_system_spec);
  
    Qcoding_system_p = intern ("coding-system-p");
    staticpro (&Qcoding_system_p);
@@ -3519,7 +3672,20 @@ syms_of_coding ()
        }
    }
  
-  defsubr (&Scoding_system_vector);
+  Qcharacter_unification_table = intern ("character-unification-table");
+  staticpro (&Qcharacter_unification_table);
+  Fput (Qcharacter_unification_table, Qchar_table_extra_slots,
+       make_number (0));
+
+  Qcharacter_unification_table_for_decode
+    = intern ("character-unification-table-for-decode");
+  staticpro (&Qcharacter_unification_table_for_decode);
+
+  Qcharacter_unification_table_for_encode
+    = intern ("character-unification-table-for-encode");
+  staticpro (&Qcharacter_unification_table_for_encode);
+
+  defsubr (&Scoding_system_spec);
    defsubr (&Scoding_system_p);
    defsubr (&Sread_coding_system);
    defsubr (&Sread_non_nil_coding_system);
@@ -3533,11 +3699,11 @@ syms_of_coding ()
    defsubr (&Sencode_sjis_char);
    defsubr (&Sdecode_big5_char);
    defsubr (&Sencode_big5_char);
-  defsubr (&Sset_terminal_coding_system);
+  defsubr (&Sset_terminal_coding_system_internal);
    defsubr (&Sterminal_coding_system);
-  defsubr (&Sset_keyboard_coding_system);
+  defsubr (&Sset_keyboard_coding_system_internal);
    defsubr (&Skeyboard_coding_system);
-  defsubr (&Sfind_coding_system);
+  defsubr (&Sfind_operation_coding_system);
  
    DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
      "List of coding-categories (symbols) ordered by priority.");
@@ -3566,54 +3732,94 @@ If not, an appropriate element in `coding-system-alist' (which see) is used.");
      "Coding-system used in the latest file or process I/O.");
    Vlast_coding_system_used = Qnil;
  
-  DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
-    "Nested alist to decide a coding system for a specific I/O operation.\n\
-The format is ((OPERATION . ((REGEXP . CODING-SYSTEMS) ...)) ...).\n\
+  DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
+    "Alist to decide a coding system to use for a file I/O operation.\n\
+The format is ((PATTERN . VAL) ...),\n\
+where PATTERN is a regular expression matching a file name,\n\
+VAL is a coding system, a cons of coding systems, or a function symbol.\n\
+If VAL is a coding system, it is used for both decoding and encoding\n\
+the file contents.\n\
+If VAL is a cons of coding systems, the car part is used for decoding,\n\
+and the cdr part is used for encoding.\n\
+If VAL is a function symbol, the function must return a coding system\n\
+or a cons of coding systems which are used as above.\n\
  \n\
-OPERATION is one of the following Emacs I/O primitives:\n\
-  For file I/O, insert-file-contents and write-region.\n\
-  For process I/O, call-process, call-process-region, and start-process.\n\
-  For network I/O, open-network-stream.\n\
-In addition, for process I/O, `process-argument' can be specified for\n\
-encoding arguments of the process.\n\
+See also the function `find-coding-system'.");
+  Vfile_coding_system_alist = Qnil;
+
+  DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
+    "Alist to decide a coding system to use for a process I/O operation.\n\
+The format is ((PATTERN . VAL) ...),\n\
+where PATTERN is a regular expression matching a program name,\n\
+VAL is a coding system, a cons of coding systems, or a function symbol.\n\
+If VAL is a coding system, it is used for both decoding what received\n\
+from the program and encoding what sent to the program.\n\
+If VAL is a cons of coding systems, the car part is used for decoding,\n\
+and the cdr part is used for encoding.\n\
+If VAL is a function symbol, the function must return a coding system\n\
+or a cons of coding systems which are used as above.\n\
  \n\
-REGEXP is a regular expression matching a target of OPERATION, where\n\
-target is a file name for file I/O operations, a process name for\n\
-process I/O operations, or a service name for network I/O\n\
-operations.  REGEXP might be a port number for network I/O operation.\n\
+See also the function `find-coding-system'.");
+  Vprocess_coding_system_alist = Qnil;
+
+  DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
+    "Alist to decide a coding system to use for a network I/O operation.\n\
+The format is ((PATTERN . VAL) ...),\n\
+where PATTERN is a regular expression matching a network service name\n\
+or is a port number to connect to,\n\
+VAL is a coding system, a cons of coding systems, or a function symbol.\n\
+If VAL is a coding system, it is used for both decoding what received\n\
+from the network stream and encoding what sent to the network stream.\n\
+If VAL is a cons of coding systems, the car part is used for decoding,\n\
+and the cdr part is used for encoding.\n\
+If VAL is a function symbol, the function must return a coding system\n\
+or a cons of coding systems which are used as above.\n\
  \n\
-CODING-SYSTEMS is a cons of coding systems to encode and decode\n\
-character code on OPERATION, or a function symbol returning the cons.\n\
-See the documentation of `find-coding-system' for more detail.");
-  Vcoding_system_alist = Qnil;
+See also the function `find-coding-system'.");
+  Vnetwork_coding_system_alist = Qnil;
  
    DEFVAR_INT ("eol-mnemonic-unix", &eol_mnemonic_unix,
      "Mnemonic character indicating UNIX-like end-of-line format (i.e. LF) .");
-  eol_mnemonic_unix = '.';
+  eol_mnemonic_unix = ':';
  
    DEFVAR_INT ("eol-mnemonic-dos", &eol_mnemonic_dos,
      "Mnemonic character indicating DOS-like end-of-line format (i.e. CRLF).");
-  eol_mnemonic_dos = ':';
+  eol_mnemonic_dos = '\\';
  
    DEFVAR_INT ("eol-mnemonic-mac", &eol_mnemonic_mac,
      "Mnemonic character indicating MAC-like end-of-line format (i.e. CR).");
-  eol_mnemonic_mac = '\'';
+  eol_mnemonic_mac = '/';
  
    DEFVAR_INT ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
      "Mnemonic character indicating end-of-line format is not yet decided.");
-  eol_mnemonic_undecided = '-';
+  eol_mnemonic_undecided = ':';
  
-  DEFVAR_LISP ("alternate-charset-table", &Valternate_charset_table,
-    "Alist of charsets vs the alternate charsets.\n\
-While decoding, if a charset (car part of an element) is found,\n\
-decode it as the alternate charset (cdr part of the element).");
-  Valternate_charset_table = Qnil;
+  DEFVAR_LISP ("enable-character-unification", &Venable_character_unification,
+    "Non-nil means ISO 2022 encoder/decoder do character unification.");
+  Venable_character_unification = Qt;
+
+  DEFVAR_LISP ("standard-character-unification-table-for-decode",
+    &Vstandard_character_unification_table_for_decode,
+    "Table for unifying characters when reading.");
+  Vstandard_character_unification_table_for_decode = Qnil;
+
+  DEFVAR_LISP ("standard-character-unification-table-for-encode",
+    &Vstandard_character_unification_table_for_encode,
+    "Table for unifying characters when writing.");
+  Vstandard_character_unification_table_for_encode = Qnil;
  
    DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
      "Alist of charsets vs revision numbers.\n\
  While encoding, if a charset (car part of an element) is found,\n\
  designate it with the escape sequence identifing revision (cdr part of the element).");
    Vcharset_revision_alist = Qnil;
+
+  DEFVAR_LISP ("default-process-coding-system",
+              &Vdefault_process_coding_system,
+    "Cons of coding systems used for process I/O by default.\n\
+The car part is used for decoding a process output,\n\
+the cdr part is used for encoding a text to be sent to a process.");
+  Vdefault_process_coding_system = Qnil;
  }
  
  #endif /* emacs */