Fix bugs with inappropriate mixing of Lisp_Object with int.

[bpt/emacs.git] / src / coding.c
diff --git a/src/coding.c b/src/coding.c

index 95bbd26..75e4980 100644 (file)
--- a/src/coding.c
+++ b/src/coding.c
@@ -1,27 +1,28 @@
  /* Coding system handler (conversion, detection, and etc).
-   Ver.1.0.
+   Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
+   Licensed to the Free Software Foundation.
  
-   Copyright (C) 1995 Free Software Foundation, Inc.
-   Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
+This file is part of GNU Emacs.
  
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2, or (at your option)
-   any later version.
+GNU Emacs is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
  
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
+GNU Emacs is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
  
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
+You should have received a copy of the GNU General Public License
+along with GNU Emacs; see the file COPYING.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
  
  /*** TABLE OF CONTENTS ***
  
    1. Preamble
-  2. Emacs' internal format handlers
+  2. Emacs' internal format (emacs-mule) handlers
    3. ISO2022 handlers
    4. Shift-JIS and BIG5 handlers
    5. End-of-line handlers
@@ -36,10 +37,11 @@
    Coding system is an encoding mechanism of one or more character
    sets.  Here's a list of coding systems which Emacs can handle.  When
    we say "decode", it means converting some other coding system to
-  Emacs' internal format, and when we say "encode", it means
-  converting Emacs' internal format to some other coding system.
+  Emacs' internal format (emacs-internal), and when we say "encode",
+  it means converting the coding system emacs-mule to some other
+  coding system.
  
-  0. Emacs' internal format
+  0. Emacs' internal format (emacs-mule)
  
    Emacs itself holds a multi-lingual character in a buffer and a string
    in a special format.  Details are described in the section 2.
@@ -104,7 +106,7 @@
    template of these functions.  */
  #if 0
  int
-detect_coding_internal (src, src_end)
+detect_coding_emacs_mule (src, src_end)
       unsigned char *src, *src_end;
  {
    ...
@@ -114,11 +116,11 @@ detect_coding_internal (src, src_end)
  /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
  
    These functions decode SRC_BYTES length text at SOURCE encoded in
-  CODING to Emacs' internal format.  The resulting text goes to a
-  place pointed by DESTINATION, the length of which should not exceed
-  DST_BYTES.  The bytes actually processed is returned as *CONSUMED.
-  The return value is the length of the decoded text.  Below is a
-  template of these functions.  */
+  CODING to Emacs' internal format (emacs-mule).  The resulting text
+  goes to a place pointed by DESTINATION, the length of which should
+  not exceed DST_BYTES.  The bytes actually processed is returned as
+  *CONSUMED.  The return value is the length of the decoded text.
+  Below is a template of these functions.  */
  #if 0
  decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes, consumed)
       struct coding_system *coding;
@@ -132,12 +134,12 @@ decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes, consumed)
  
  /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
  
-  These functions encode SRC_BYTES length text at SOURCE of Emacs
-  internal format to CODING.  The resulting text goes to a place
-  pointed by DESTINATION, the length of which should not exceed
-  DST_BYTES.  The bytes actually processed is returned as *CONSUMED.
-  The return value is the length of the encoded text.  Below is a
-  template of these functions.  */
+  These functions encode SRC_BYTES length text at SOURCE of Emacs'
+  internal format (emacs-mule) to CODING.  The resulting text goes to
+  a place pointed by DESTINATION, the length of which should not
+  exceed DST_BYTES.  The bytes actually processed is returned as
+  *CONSUMED.  The return value is the length of the encoded text.
+  Below is a template of these functions.  */
  #if 0
  encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes, consumed)
       struct coding_system *coding;
@@ -260,7 +262,7 @@ int eol_mnemonic_undecided;
  
  #ifdef emacs
  
-Lisp_Object Qcoding_system_vector, Qcoding_system_p, Qcoding_system_error;
+Lisp_Object Qcoding_system_spec, Qcoding_system_p, Qcoding_system_error;
  
  /* Coding-systems are handed between Emacs Lisp programs and C internal
     routines by the following three variables.  */
@@ -277,7 +279,9 @@ struct coding_system terminal_coding;
  /* Coding-system of what is sent from terminal keyboard.  */
  struct coding_system keyboard_coding;
  
-Lisp_Object Vcoding_system_alist;
+Lisp_Object Vfile_coding_system_alist;
+Lisp_Object Vprocess_coding_system_alist;
+Lisp_Object Vnetwork_coding_system_alist;
  
  #endif /* emacs */
  
@@ -291,7 +295,7 @@ Lisp_Object coding_category_table[CODING_CATEGORY_IDX_MAX];
  
  /* Table of names of symbol for each coding-category.  */
  char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
-  "coding-category-internal",
+  "coding-category-emacs-mule",
    "coding-category-sjis",
    "coding-category-iso-7",
    "coding-category-iso-8-1",
@@ -301,14 +305,26 @@ char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
    "coding-category-binary"
  };
  
-/* Alist of charsets vs the alternate charsets.  */
-Lisp_Object Valternate_charset_table;
+/* Flag to tell if we look up unification table on character code
+   conversion.  */
+Lisp_Object Venable_character_unification;
+/* Standard unification table to look up on decoding (reading).  */
+Lisp_Object Vstandard_character_unification_table_for_decode;
+/* Standard unification table to look up on encoding (writing).  */
+Lisp_Object Vstandard_character_unification_table_for_encode;
+
+Lisp_Object Qcharacter_unification_table;
+Lisp_Object Qcharacter_unification_table_for_decode;
+Lisp_Object Qcharacter_unification_table_for_encode;
  
  /* Alist of charsets vs revision number.  */
  Lisp_Object Vcharset_revision_alist;
  
+/* Default coding systems used for process I/O.  */
+Lisp_Object Vdefault_process_coding_system;
+
  \f
-/*** 2. Emacs internal format handlers ***/
+/*** 2. Emacs internal format (emacs-mule) handlers ***/
  
  /* Emacs' internal format for encoding multiple character sets is a
     kind of multi-byte encoding, i.e. encoding a character by a sequence
@@ -355,10 +371,10 @@ enum emacs_code_class_type emacs_code_class[256];
  
  /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
     Check if a text is encoded in Emacs' internal format.  If it is,
-   return CODING_CATEGORY_MASK_INTERNAL, else return 0.  */
+   return CODING_CATEGORY_MASK_EMASC_MULE, else return 0.  */
  
  int
-detect_coding_internal (src, src_end)
+detect_coding_emacs_mule (src, src_end)
       unsigned char *src, *src_end;
  {
    unsigned char c;
@@ -414,7 +430,7 @@ detect_coding_internal (src, src_end)
           break;
         }
      }
-  return CODING_CATEGORY_MASK_INTERNAL;
+  return CODING_CATEGORY_MASK_EMACS_MULE;
  }
  
  \f
@@ -579,45 +595,45 @@ int
  detect_coding_iso2022 (src, src_end)
       unsigned char *src, *src_end;
  {
-  unsigned char graphic_register[4];
-  unsigned char c, esc_cntl;
    int mask = (CODING_CATEGORY_MASK_ISO_7
               | CODING_CATEGORY_MASK_ISO_8_1
-             | CODING_CATEGORY_MASK_ISO_8_2);
-  /* We may look ahead maximum 3 bytes.  */
-  unsigned char *adjusted_src_end = src_end - 3;
-  int i;
-
-  for (i = 0; i < 4; i++)
-    graphic_register[i] = CHARSET_ASCII;
+             | CODING_CATEGORY_MASK_ISO_8_2
+             | CODING_CATEGORY_MASK_ISO_ELSE);
+  int g1 = 0;                  /* 1 iff designating to G1.  */
+  int c, i;
  
-  while (src < adjusted_src_end)
+  while (src < src_end)
      {
        c = *src++;
        switch (c)
         {
         case ISO_CODE_ESC:
-         if (src >= adjusted_src_end)
+         if (src >= src_end)
             break;
           c = *src++;
-         if (c == '$')
+         if (src < src_end
+             && ((c >= '(' && c <= '/')
+                 || c == '$' && ((*src >= '(' && *src <= '/')
+                                 || (*src >= '@' && *src <= 'B'))))
             {
-             /* Designation of 2-byte character set.  */
-             if (src >= adjusted_src_end)
-               break;
-             c = *src++;
+             /* Valid designation sequence.  */
+             if (c == ')' || (c == '$' && *src == ')'))
+               {
+                 g1 = 1;
+                 mask &= ~CODING_CATEGORY_MASK_ISO_7;
+               }
+             src++;
+             break;
             }
-         if ((c >= ')' && c <= '+') || (c >= '-' && c <= '/'))
-           /* Designation to graphic register 1, 2, or 3.  */
-           mask &= ~CODING_CATEGORY_MASK_ISO_7;
           else if (c == 'N' || c == 'O' || c == 'n' || c == 'o')
             return CODING_CATEGORY_MASK_ISO_ELSE;
           break;
  
-       case ISO_CODE_SI:
         case ISO_CODE_SO:
-         return CODING_CATEGORY_MASK_ISO_ELSE;
-
+         if (g1)
+           return CODING_CATEGORY_MASK_ISO_ELSE;
+         break;
+         
         case ISO_CODE_CSI:
         case ISO_CODE_SS2:
         case ISO_CODE_SS3:
@@ -634,9 +650,9 @@ detect_coding_iso2022 (src, src_end)
               int count = 1;
  
               mask &= ~CODING_CATEGORY_MASK_ISO_7;
-             while (src < adjusted_src_end && *src >= 0xA0)
+             while (src < src_end && *src >= 0xA0)
                 count++, src++;
-             if (count & 1 && src < adjusted_src_end)
+             if (count & 1 && src < src_end)
                 mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
             }
           break;
@@ -647,44 +663,48 @@ detect_coding_iso2022 (src, src_end)
  }
  
  /* Decode a character of which charset is CHARSET and the 1st position
-   code is C1.  If dimension of CHARSET 2, the 2nd position code is
+   code is C1.  If dimension of CHARSET is 2, the 2nd position code is
     fetched from SRC and set to C2.  If CHARSET is negative, it means
     that we are decoding ill formed text, and what we can do is just to
     read C1 as is.  */
  
-#define DECODE_ISO_CHARACTER(charset, c1)                      \
-  do {                                                         \
-    if ((charset) >= 0 && CHARSET_DIMENSION (charset) == 2)    \
-      ONE_MORE_BYTE (c2);                                      \
-    if (COMPOSING_HEAD_P (coding->composing))                  \
-      {                                                                \
-       *dst++ = LEADING_CODE_COMPOSITION;                      \
-       if (COMPOSING_WITH_RULE_P (coding->composing))          \
-         /* To tell composition rules are embeded.  */         \
-         *dst++ = 0xFF;                                        \
-       coding->composing += 2;                                 \
-      }                                                                \
-    if ((charset) < 0)                                         \
-      *dst++ = c1;                                             \
-    else if ((charset) == CHARSET_ASCII)                       \
-      DECODE_CHARACTER_ASCII (c1);                             \
-    else if (CHARSET_DIMENSION (charset) == 1)                 \
-      DECODE_CHARACTER_DIMENSION1 (charset, c1);               \
-    else                                                       \
-      DECODE_CHARACTER_DIMENSION2 (charset, c1, c2);           \
-    if (COMPOSING_WITH_RULE_P (coding->composing))             \
-      /* To tell a composition rule follows.  */               \
-      coding->composing = COMPOSING_WITH_RULE_RULE;            \
+#define DECODE_ISO_CHARACTER(charset, c1)                              \
+  do {                                                                 \
+    int c_alt, charset_alt = (charset);                                        \
+    if (COMPOSING_HEAD_P (coding->composing))                          \
+      {                                                                        \
+       *dst++ = LEADING_CODE_COMPOSITION;                              \
+       if (COMPOSING_WITH_RULE_P (coding->composing))                  \
+         /* To tell composition rules are embeded.  */                 \
+         *dst++ = 0xFF;                                                \
+       coding->composing += 2;                                         \
+      }                                                                        \
+    if ((charset) >= 0)                                                        \
+      {                                                                        \
+       if (CHARSET_DIMENSION (charset) == 2)                           \
+         ONE_MORE_BYTE (c2);                                           \
+       if (!NILP (unification_table)                                   \
+           && ((c_alt = unify_char (unification_table,                 \
+                                    -1, (charset), c1, c2)) >= 0))     \
+         SPLIT_CHAR (c_alt, charset_alt, c1, c2);                      \
+      }                                                                        \
+    if (charset_alt == CHARSET_ASCII || charset_alt < 0)               \
+      DECODE_CHARACTER_ASCII (c1);                                     \
+    else if (CHARSET_DIMENSION (charset_alt) == 1)                     \
+      DECODE_CHARACTER_DIMENSION1 (charset_alt, c1);                   \
+    else                                                               \
+      DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2);               \
+    if (COMPOSING_WITH_RULE_P (coding->composing))                     \
+      /* To tell a composition rule follows.  */                       \
+      coding->composing = COMPOSING_WITH_RULE_RULE;                    \
    } while (0)
  
  /* Set designation state into CODING.  */
  #define DECODE_DESIGNATION(reg, dimension, chars, final_char)          \
    do {                                                                 \
-    int charset = ISO_CHARSET_TABLE (dimension, chars, final_char);    \
-    Lisp_Object temp                                                   \
-      = Fassq (CHARSET_SYMBOL (charset), Valternate_charset_table);    \
-    if (! NILP (temp))                                                 \
-      charset = get_charset_id (XCONS (temp)->cdr);                    \
+    int charset = ISO_CHARSET_TABLE (make_number (dimension),          \
+                                    make_number (chars),               \
+                                    make_number (final_char));         \
      if (charset >= 0)                                                  \
        {                                                                        \
          if (coding->direction == 1                                     \
@@ -716,6 +736,11 @@ decode_coding_iso2022 (coding, source, destination,
    /* Charsets invoked to graphic plane 0 and 1 respectively.  */
    int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
    int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
+  Lisp_Object unification_table
+      = coding->character_unification_table_for_decode;
+
+  if (!NILP (Venable_character_unification) && NILP (unification_table))
+    unification_table = Vstandard_character_unification_table_for_decode;
  
    while (src < src_end && dst < adjusted_dst_end)
      {
@@ -725,7 +750,7 @@ decode_coding_iso2022 (coding, source, destination,
          ONE_MORE_BYTE or TWO_MORE_BYTES).  In that case, SRC is reset
          to SRC_BASE before exiting.  */
        unsigned char *src_base = src;
-      unsigned char c1 = *src++, c2, cmprule;
+      int c1 = *src++, c2;
  
        switch (iso_code_class [c1])
         {
@@ -792,6 +817,8 @@ decode_coding_iso2022 (coding, source, destination,
           break;
  
         case ISO_shift_out:
+         if (CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
+           goto label_invalid_escape_sequence;
           CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
           break;
@@ -828,14 +855,10 @@ decode_coding_iso2022 (coding, source, destination,
             case '&':           /* revision of following character set */
               ONE_MORE_BYTE (c1);
               if (!(c1 >= '@' && c1 <= '~'))
-               {
-                 goto label_invalid_escape_sequence;
-               }
+               goto label_invalid_escape_sequence;
               ONE_MORE_BYTE (c1);
               if (c1 != ISO_CODE_ESC)
-               {
-                 goto label_invalid_escape_sequence;
-               }
+               goto label_invalid_escape_sequence;
               ONE_MORE_BYTE (c1);
               goto label_escape_sequence;
  
@@ -857,26 +880,34 @@ decode_coding_iso2022 (coding, source, destination,
                   DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
                 }
               else
-               {
-                 goto label_invalid_escape_sequence;
-               }
+               goto label_invalid_escape_sequence;
               break;
  
             case 'n':           /* invocation of locking-shift-2 */
+             if (CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
+               goto label_invalid_escape_sequence;
               CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
+             charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
               break;
  
             case 'o':           /* invocation of locking-shift-3 */
+             if (CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
+               goto label_invalid_escape_sequence;
               CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
+             charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
               break;
  
             case 'N':           /* invocation of single-shift-2 */
+             if (CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
+               goto label_invalid_escape_sequence;
               ONE_MORE_BYTE (c1);
               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
               DECODE_ISO_CHARACTER (charset, c1);
               break;
  
             case 'O':           /* invocation of single-shift-3 */
+             if (CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
+               goto label_invalid_escape_sequence;
               ONE_MORE_BYTE (c1);
               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
               DECODE_ISO_CHARACTER (charset, c1);
@@ -1158,6 +1189,21 @@ decode_coding_iso2022 (coding, source, destination,
        dst = encode_invocation_designation (charset, coding, dst);      \
    } while (1)
  
+#define ENCODE_ISO_CHARACTER(charset, c1, c2)                            \
+  do {                                                                   \
+    int c_alt, charset_alt;                                              \
+    if (!NILP (unification_table)                                        \
+       && ((c_alt = unify_char (unification_table, -1, charset, c1, c2)) \
+           >= 0))                                                        \
+      SPLIT_CHAR (c_alt, charset_alt, c1, c2);                           \
+    else                                                                 \
+      charset_alt = charset;                                             \
+    if (CHARSET_DIMENSION (charset_alt) == 1)                            \
+      ENCODE_ISO_CHARACTER_DIMENSION1 (charset_alt, c1);                 \
+    else                                                                 \
+      ENCODE_ISO_CHARACTER_DIMENSION2 (charset_alt, c1, c2);             \
+  } while (0)
+
  /* Produce designation and invocation codes at a place pointed by DST
     to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
     Return new DST.  */
@@ -1180,9 +1226,9 @@ encode_invocation_designation (charset, coding, dst)
        /* CHARSET is not yet designated to any graphic registers.  */
        /* At first check the requested designation.  */
        reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
-      if (reg < 0)
-       /* Since CHARSET requests no special designation, designate to
-          graphic register 0.  */
+      if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
+       /* Since CHARSET requests no special designation, designate it
+          to graphic register 0.  */
         reg = 0;
  
        ENCODE_DESIGNATION (charset, reg, coding);
@@ -1244,24 +1290,72 @@ encode_invocation_designation (charset, coding, dst)
  
  /* Produce codes for designation and invocation to reset the graphic
     planes and registers to initial state.  */
-#define ENCODE_RESET_PLANE_AND_REGISTER(eol)                                 \
-  do {                                                                       \
-    int reg;                                                                 \
-    if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                         \
-      ENCODE_SHIFT_IN;                                                       \
-    for (reg = 0; reg < 4; reg++)                                            \
-      {                                                                              \
-       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) < 0)            \
-         {                                                                   \
-           if (eol) CODING_SPEC_ISO_DESIGNATION (coding, reg) = -1;          \
-         }                                                                   \
-       else if (CODING_SPEC_ISO_DESIGNATION (coding, reg)                    \
-                != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg))        \
-         ENCODE_DESIGNATION                                                  \
-           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
-      }                                                                              \
+#define ENCODE_RESET_PLANE_AND_REGISTER                                            \
+  do {                                                                     \
+    int reg;                                                               \
+    if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                       \
+      ENCODE_SHIFT_IN;                                                     \
+    for (reg = 0; reg < 4; reg++)                                          \
+      if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0           \
+         && (CODING_SPEC_ISO_DESIGNATION (coding, reg)                     \
+             != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)))        \
+       ENCODE_DESIGNATION                                                  \
+         (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
    } while (0)
  
+/* Produce designation sequences of charsets in the line started from
+   *SRC to a place pointed by DSTP.
+
+   If the current block ends before any end-of-line, we may fail to
+   find all the necessary *designations.  */
+encode_designation_at_bol (coding, table, src, src_end, dstp)
+     struct coding_system *coding;
+     Lisp_Object table;
+     unsigned char *src, *src_end, **dstp;
+{
+  int charset, c, found = 0, reg;
+  /* Table of charsets to be designated to each graphic register.  */
+  int r[4];
+  unsigned char *dst = *dstp;
+
+  for (reg = 0; reg < 4; reg++)
+    r[reg] = -1;
+
+  while (src < src_end && *src != '\n' && found < 4)
+    {
+      int bytes = BYTES_BY_CHAR_HEAD (*src);
+      
+      if (NILP (table))
+       charset = CHARSET_AT (src);
+      else
+       {
+         int c_alt, c1, c2;
+
+         SPLIT_STRING(src, bytes, charset, c1, c2);
+         if ((c_alt = unify_char (table, -1, charset, c1, c2)) >= 0)
+           charset = CHAR_CHARSET (c_alt);
+       }
+
+      reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
+      if (r[reg] == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
+       {
+         found++;
+         r[reg] = charset;
+       }
+
+      src += bytes;
+    }
+
+  if (found)
+    {
+      for (reg = 0; reg < 4; reg++)
+       if (r[reg] >= 0
+           && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
+         ENCODE_DESIGNATION (r[reg], reg, coding);
+      *dstp = dst;
+    }
+}
+
  /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
  
  int
@@ -1276,10 +1370,15 @@ encode_coding_iso2022 (coding, source, destination,
    unsigned char *src_end = source + src_bytes;
    unsigned char *dst = destination;
    unsigned char *dst_end = destination + dst_bytes;
-  /* Since the maximum bytes produced by each loop is 6, we subtract 5
+  /* Since the maximum bytes produced by each loop is 20, we subtract 19
       from DST_END to assure overflow checking is necessary only at the
       head of loop.  */
-  unsigned char *adjusted_dst_end = dst_end - 5;
+  unsigned char *adjusted_dst_end = dst_end - 19;
+  Lisp_Object unification_table
+      = coding->character_unification_table_for_encode;
+
+  if (!NILP (Venable_character_unification) && NILP (unification_table))
+    unification_table = Vstandard_character_unification_table_for_encode;
  
    while (src < src_end && dst < adjusted_dst_end)
      {
@@ -1289,9 +1388,18 @@ encode_coding_iso2022 (coding, source, destination,
          TWO_MORE_BYTES, and THREE_MORE_BYTES).  In that case, SRC is
          reset to SRC_BASE before exiting.  */
        unsigned char *src_base = src;
-      unsigned char c1 = *src++, c2, c3, c4;
-      int charset;
+      int charset, c1, c2, c3, c4;
+
+      if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
+         && CODING_SPEC_ISO_BOL (coding))
+       {
+         /* We have to produce designation sequences if any now.  */
+         encode_designation_at_bol (coding, unification_table,
+                                    src, src_end, &dst);
+         CODING_SPEC_ISO_BOL (coding) = 0;
+       }
  
+      c1 = *src++;
        /* If we are seeing a component of a composite character, we are
          seeing a leading-code specially encoded for composition, or a
          composition rule if composing with rule.  We must set C1
@@ -1332,12 +1440,12 @@ encode_coding_iso2022 (coding, source, destination,
        switch (emacs_code_class[c1])
         {
         case EMACS_ascii_code:
-         ENCODE_ISO_CHARACTER_DIMENSION1 (CHARSET_ASCII, c1);
+         ENCODE_ISO_CHARACTER (CHARSET_ASCII, c1, /* dummy */ c2);
           break;
  
         case EMACS_control_code:
           if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
-           ENCODE_RESET_PLANE_AND_REGISTER (0);
+           ENCODE_RESET_PLANE_AND_REGISTER;
           *dst++ = c1;
           break;
  
@@ -1345,7 +1453,7 @@ encode_coding_iso2022 (coding, source, destination,
           if (!coding->selective)
             {
               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
-               ENCODE_RESET_PLANE_AND_REGISTER (0);
+               ENCODE_RESET_PLANE_AND_REGISTER;
               *dst++ = c1;
               break;
             }
@@ -1353,32 +1461,37 @@ encode_coding_iso2022 (coding, source, destination,
  
         case EMACS_linefeed_code:
           if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
-           ENCODE_RESET_PLANE_AND_REGISTER (1);
+           ENCODE_RESET_PLANE_AND_REGISTER;
+         if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
+           bcopy (coding->spec.iso2022.initial_designation,
+                  coding->spec.iso2022.current_designation,
+                  sizeof coding->spec.iso2022.initial_designation);
           if (coding->eol_type == CODING_EOL_LF
-             || coding->eol_type == CODING_EOL_AUTOMATIC)
+             || coding->eol_type == CODING_EOL_UNDECIDED)
             *dst++ = ISO_CODE_LF;
           else if (coding->eol_type == CODING_EOL_CRLF)
             *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
           else
             *dst++ = ISO_CODE_CR;
+         CODING_SPEC_ISO_BOL (coding) = 1;
           break;
  
         case EMACS_leading_code_2:
           ONE_MORE_BYTE (c2);
-         ENCODE_ISO_CHARACTER_DIMENSION1 (c1, c2);
+         ENCODE_ISO_CHARACTER (c1, c2, /* dummy */ c3);
           break;
  
         case EMACS_leading_code_3:
           TWO_MORE_BYTES (c2, c3);
           if (c1 < LEADING_CODE_PRIVATE_11)
-           ENCODE_ISO_CHARACTER_DIMENSION2 (c1, c2, c3);
+           ENCODE_ISO_CHARACTER (c1, c2, c3);
           else
-           ENCODE_ISO_CHARACTER_DIMENSION1 (c2, c3);
+           ENCODE_ISO_CHARACTER (c2, c3, /* dummy */ c4);
           break;
  
         case EMACS_leading_code_4:
           THREE_MORE_BYTES (c2, c3, c4);
-         ENCODE_ISO_CHARACTER_DIMENSION2 (c2, c3, c4);
+         ENCODE_ISO_CHARACTER (c2, c3, c4);
           break;
  
         case EMACS_leading_code_composition:
@@ -1406,20 +1519,21 @@ encode_coding_iso2022 (coding, source, destination,
      label_end_of_loop:
        coding->carryover_size = src - src_base;
        bcopy (src_base, coding->carryover, coding->carryover_size);
-      src = src_base;
        break;
      }
  
    /* If this is the last block of the text to be encoded, we must
-     reset the state of graphic planes and registers to initial one.
-     In addition, we had better just flush out all remaining codes in
-     the text although they are not valid characters.  */
-  if (coding->last_block)
+     reset graphic planes and registers to the initial state.  */
+  if (src >= src_end && coding->last_block)
      {
-      ENCODE_RESET_PLANE_AND_REGISTER (1);
-      bcopy(src, dst, src_end - src);
-      dst += (src_end - src);
-      src = src_end;
+      ENCODE_RESET_PLANE_AND_REGISTER;
+      if (coding->carryover_size > 0
+         && coding->carryover_size < (dst_end - dst))
+       {
+         bcopy (coding->carryover, dst, coding->carryover_size);
+         dst += coding->carryover_size;
+         coding->carryover_size = 0;
+       }
      }
    *consumed = src - source;
    return dst - destination;
@@ -1500,6 +1614,63 @@ encode_coding_iso2022 (coding, source, destination,
      b2 += b2 < 0x3F ? 0x40 : 0x62;                                     \
    } while (0)
  
+#define DECODE_SJIS_BIG5_CHARACTER(charset, c1, c2)                    \
+  do {                                                                 \
+    int c_alt, charset_alt = (charset);                                        \
+    if (!NILP (unification_table)                                      \
+       && ((c_alt = unify_char (unification_table,                     \
+                                -1, (charset), c1, c2)) >= 0))         \
+         SPLIT_CHAR (c_alt, charset_alt, c1, c2);                      \
+    if (charset_alt == CHARSET_ASCII || charset_alt < 0)               \
+      DECODE_CHARACTER_ASCII (c1);                                     \
+    else if (CHARSET_DIMENSION (charset_alt) == 1)                     \
+      DECODE_CHARACTER_DIMENSION1 (charset_alt, c1);                   \
+    else                                                               \
+      DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2);               \
+  } while (0)
+
+#define ENCODE_SJIS_BIG5_CHARACTER(charset, c1, c2)                      \
+  do {                                                                   \
+    int c_alt, charset_alt;                                              \
+    if (!NILP (unification_table)                                        \
+        && ((c_alt = unify_char (unification_table, -1, charset, c1, c2)) \
+           >= 0))                                                        \
+      SPLIT_CHAR (c_alt, charset_alt, c1, c2);                           \
+    else                                                                 \
+      charset_alt = charset;                                             \
+    if (charset_alt == charset_ascii)                                    \
+      *dst++ = c1;                                                       \
+    else if (CHARSET_DIMENSION (charset_alt) == 1)                       \
+      {                                                                          \
+       if (sjis_p && charset_alt == charset_katakana_jisx0201)           \
+         *dst++ = c1;                                                    \
+       else                                                              \
+         *dst++ = charset_alt, *dst++ = c1;                              \
+      }                                                                          \
+    else                                                                 \
+      {                                                                          \
+       c1 &= 0x7F, c2 &= 0x7F;                                           \
+       if (sjis_p && charset_alt == charset_jisx0208)                    \
+         {                                                               \
+           unsigned char s1, s2;                                         \
+                                                                         \
+           ENCODE_SJIS (c1, c2, s1, s2);                                 \
+           *dst++ = s1, *dst++ = s2;                                     \
+         }                                                               \
+       else if (!sjis_p                                                  \
+                && (charset_alt == charset_big5_1                        \
+                    || charset_alt == charset_big5_2))                   \
+         {                                                               \
+           unsigned char b1, b2;                                         \
+                                                                         \
+           ENCODE_BIG5 (c1, c2, c3, b1, b2);                             \
+           *dst++ = b1, *dst++ = b2;                                     \
+         }                                                               \
+       else                                                              \
+         *dst++ = charset_alt, *dst++ = c1, *dst++ = c2;                 \
+      }                                                                          \
+  } while (0);
+
  /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
     Check if a text is encoded in SJIS.  If it is, return
     CODING_CATEGORY_MASK_SJIS, else return 0.  */
@@ -1571,6 +1742,11 @@ decode_coding_sjis_big5 (coding, source, destination,
       from DST_END to assure overflow checking is necessary only at the
       head of loop.  */
    unsigned char *adjusted_dst_end = dst_end - 3;
+  Lisp_Object unification_table
+      = coding->character_unification_table_for_decode;
+
+  if (!NILP (Venable_character_unification) && NILP (unification_table))
+    unification_table = Vstandard_character_unification_table_for_decode;
  
    while (src < src_end && dst < adjusted_dst_end)
      {
@@ -1595,8 +1771,10 @@ decode_coding_sjis_big5 (coding, source, destination,
           else
             *dst++ = c1;
         }
-      else if (c1 < 0x80)
+      else if (c1 < 0x20)
         *dst++ = c1;
+      else if (c1 < 0x80)
+       DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
        else if (c1 < 0xA0 || c1 >= 0xE0)
         {
           /* SJIS -> JISX0208, BIG5 -> Big5 (only if 0xE0 <= c1 < 0xFF) */
@@ -1604,7 +1782,7 @@ decode_coding_sjis_big5 (coding, source, destination,
             {
               ONE_MORE_BYTE (c2);
               DECODE_SJIS (c1, c2, c3, c4);
-             DECODE_CHARACTER_DIMENSION2 (charset_jisx0208, c3, c4);
+             DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
             }
           else if (c1 >= 0xE0 && c1 < 0xFF)
             {
@@ -1612,7 +1790,7 @@ decode_coding_sjis_big5 (coding, source, destination,
  
               ONE_MORE_BYTE (c2);
               DECODE_BIG5 (c1, c2, charset, c3, c4);
-             DECODE_CHARACTER_DIMENSION2 (charset, c3, c4);
+             DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
             }
           else                  /* Invalid code */
             *dst++ = c1;
@@ -1621,14 +1799,14 @@ decode_coding_sjis_big5 (coding, source, destination,
         {
           /* SJIS -> JISX0201-Kana, BIG5 -> Big5 */
           if (sjis_p)
-           DECODE_CHARACTER_DIMENSION1 (charset_katakana_jisx0201, c1);
+           DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1, /* dummy */ c2);
           else
             {
               int charset;
  
               ONE_MORE_BYTE (c2);
               DECODE_BIG5 (c1, c2, charset, c3, c4);
-             DECODE_CHARACTER_DIMENSION2 (charset, c3, c4);
+             DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
             }
         }
        continue;
@@ -1669,6 +1847,11 @@ encode_coding_sjis_big5 (coding, source, destination,
       from DST_END to assure overflow checking is necessary only at the
       head of loop.  */
    unsigned char *adjusted_dst_end = dst_end - 1;
+  Lisp_Object unification_table
+      = coding->character_unification_table_for_encode;
+
+  if (!NILP (Venable_character_unification) && NILP (unification_table))
+    unification_table = Vstandard_character_unification_table_for_encode;
  
    while (src < src_end && dst < adjusted_dst_end)
      {
@@ -1696,6 +1879,9 @@ encode_coding_sjis_big5 (coding, source, destination,
        switch (emacs_code_class[c1])
         {
         case EMACS_ascii_code:
+         ENCODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
+         break;
+
         case EMACS_control_code:
           *dst++ = c1;
           break;
@@ -1710,7 +1896,7 @@ encode_coding_sjis_big5 (coding, source, destination,
  
         case EMACS_linefeed_code:
           if (coding->eol_type == CODING_EOL_LF
-             || coding->eol_type == CODING_EOL_AUTOMATIC)
+             || coding->eol_type == CODING_EOL_UNDECIDED)
             *dst++ = '\n';
           else if (coding->eol_type == CODING_EOL_CRLF)
             *dst++ = '\r', *dst++ = '\n';
@@ -1720,36 +1906,17 @@ encode_coding_sjis_big5 (coding, source, destination,
  
         case EMACS_leading_code_2:
           ONE_MORE_BYTE (c2);
-         if (sjis_p && c1 == charset_katakana_jisx0201)
-           *dst++ = c2;
-         else
-           *dst++ = c1, *dst++ = c2;
+         ENCODE_SJIS_BIG5_CHARACTER (c1, c2, /* dummy */ c3);
           break;
  
         case EMACS_leading_code_3:
           TWO_MORE_BYTES (c2, c3);
-         c2 &= 0x7F, c3 &= 0x7F;
-         if (sjis_p && c1 == charset_jisx0208)
-           {
-             unsigned char s1, s2;
-
-             ENCODE_SJIS (c2, c3, s1, s2);
-             *dst++ = s1, *dst++ = s2;
-           }
-         else if (!sjis_p && (c1 == charset_big5_1 || c1 == charset_big5_2))
-           {
-             unsigned char b1, b2;
-
-             ENCODE_BIG5 (c1, c2, c3, b1, b2);
-             *dst++ = b1, *dst++ = b2;
-           }
-         else
-           *dst++ = c1, *dst++ = c2, *dst++ = c3;
+         ENCODE_SJIS_BIG5_CHARACTER (c1, c2, c3);
           break;
  
         case EMACS_leading_code_4:
           THREE_MORE_BYTES (c2, c3, c4);
-         *dst++ = c1, *dst++ = c2, *dst++ = c3, *dst++ = c4;
+         ENCODE_SJIS_BIG5_CHARACTER (c2, c3, c4);
           break;
  
         case EMACS_leading_code_composition:
@@ -1809,7 +1976,7 @@ decode_eol (coding, source, destination, src_bytes, dst_bytes, consumed)
                 ONE_MORE_BYTE (c);
                 if (c != '\n')
                   *dst++ = '\r';
-
+               *dst++ = c;
               }
             else
               *dst++ = c;
@@ -1866,7 +2033,7 @@ encode_eol (coding, source, destination, src_bytes, dst_bytes, consumed)
    switch (coding->eol_type)
      {
      case CODING_EOL_LF:
-    case CODING_EOL_AUTOMATIC:
+    case CODING_EOL_UNDECIDED:
        produced = (src_bytes > dst_bytes) ? dst_bytes : src_bytes;
        bcopy (source, destination, produced);
        if (coding->selective)
@@ -1932,13 +2099,14 @@ encode_eol (coding, source, destination, src_bytes, dst_bytes, consumed)
     `element[0]' contains information to be set in `coding->type'.  The
     value and its meaning is as follows:
  
-   0 -- coding_system_internal
-   1 -- coding_system_sjis
-   2 -- coding_system_iso2022
-   3 -- coding_system_big5
-   4 -- coding_system_ccl
-   nil -- coding_system_no_conversion
-   t -- coding_system_automatic
+   0 -- coding_type_emacs_mule
+   1 -- coding_type_sjis
+   2 -- coding_type_iso2022
+   3 -- coding_type_big5
+   4 -- coding_type_ccl encoder/decoder written in CCL
+   nil -- coding_type_no_conversion
+   t -- coding_type_undecided (automatic conversion on decoding,
+                              no-conversion on encoding)
  
     `element[4]' contains information to be set in `coding->flags' and
     `coding->spec'.  The meaning varies by `coding->type'.
@@ -1983,11 +2151,10 @@ encode_eol (coding, source, destination, src_bytes, dst_bytes, consumed)
     return 0.  */
  
  int
-setup_coding_system (coding_system_symbol, coding)
-     Lisp_Object coding_system_symbol;
+setup_coding_system (coding_system, coding)
+     Lisp_Object coding_system;
       struct coding_system *coding;
  {
-  Lisp_Object coding_system_vector = Qnil;
    Lisp_Object type, eol_type;
  
    /* At first, set several fields default values.  */
@@ -1997,47 +2164,63 @@ setup_coding_system (coding_system_symbol, coding)
    coding->composing = 0;
    coding->direction = 0;
    coding->carryover_size = 0;
-  coding->symbol = Qnil;
    coding->post_read_conversion = coding->pre_write_conversion = Qnil;
+  coding->character_unification_table_for_decode = Qnil;
+  coding->character_unification_table_for_encode = Qnil;
  
-  /* Get value of property `coding-system'.  If it is a Lisp symbol
-     pointing another coding system, fetch its property until we get a
-     vector.  */
-  while (!NILP (coding_system_symbol))
+  Vlast_coding_system_used = coding->symbol = coding_system;
+  eol_type = Qnil;
+  /* Get value of property `coding-system' until we get a vector.
+     While doing that, also get values of properties
+     `post-read-conversion', `pre-write-conversion',
+     `character-unification-table-for-decode',
+     `character-unification-table-for-encode' and `eol-type'.  */
+  while (!NILP (coding_system) && SYMBOLP (coding_system))
      {
-      coding->symbol = coding_system_symbol;
        if (NILP (coding->post_read_conversion))
-       coding->post_read_conversion = Fget (coding_system_symbol,
+       coding->post_read_conversion = Fget (coding_system,
                                              Qpost_read_conversion);
-      if (NILP (coding->pre_write_conversion))
-       coding->pre_write_conversion = Fget (coding_system_symbol,
+      if (NILP (coding->pre_write_conversion)) 
+       coding->pre_write_conversion = Fget (coding_system,
                                              Qpre_write_conversion);
+      if (NILP (eol_type))
+       eol_type = Fget (coding_system, Qeol_type);
  
-      coding_system_vector = Fget (coding_system_symbol, Qcoding_system);
-      if (VECTORP (coding_system_vector))
-       break;
-      coding_system_symbol = coding_system_vector;
-    }
-  Vlast_coding_system_used = coding->symbol;
+      if (NILP (coding->character_unification_table_for_decode))
+       coding->character_unification_table_for_decode
+         = Fget (coding_system, Qcharacter_unification_table_for_decode);
  
-  if (!VECTORP (coding_system_vector)
-      || XVECTOR (coding_system_vector)->size != 5)
-    goto label_invalid_coding_system;
+      if (NILP (coding->character_unification_table_for_encode))
+       coding->character_unification_table_for_encode
+         = Fget (coding_system, Qcharacter_unification_table_for_encode);
  
-  /* Get value of property `eol-type' by searching from the root
-     coding-system.  */
-  coding_system_symbol = coding->symbol;
-  eol_type = Qnil;
-  while (SYMBOLP (coding_system_symbol) && !NILP (coding_system_symbol))
-    {
-      eol_type = Fget (coding_system_symbol, Qeol_type);
-      if (!NILP (eol_type))
-       break;
-      coding_system_symbol = Fget (coding_system_symbol, Qcoding_system);
+      coding_system = Fget (coding_system, Qcoding_system);
      }
  
+  while (!NILP (coding->character_unification_table_for_decode)
+        && SYMBOLP (coding->character_unification_table_for_decode))
+       coding->character_unification_table_for_decode
+         = Fget (coding->character_unification_table_for_decode,
+                 Qcharacter_unification_table_for_decode);
+  if (!NILP (coding->character_unification_table_for_decode)
+      && !CHAR_TABLE_P (coding->character_unification_table_for_decode))
+      coding->character_unification_table_for_decode = Qnil;
+
+  while (!NILP (coding->character_unification_table_for_encode)
+        && SYMBOLP (coding->character_unification_table_for_encode))
+       coding->character_unification_table_for_encode
+         = Fget (coding->character_unification_table_for_encode,
+                 Qcharacter_unification_table_for_encode);
+  if (!NILP (coding->character_unification_table_for_encode)
+      && !CHAR_TABLE_P (coding->character_unification_table_for_encode))
+      coding->character_unification_table_for_encode = Qnil;
+
+  if (!VECTORP (coding_system)
+      || XVECTOR (coding_system)->size != 5)
+    goto label_invalid_coding_system;
+
    if (VECTORP (eol_type))
-    coding->eol_type = CODING_EOL_AUTOMATIC;
+    coding->eol_type = CODING_EOL_UNDECIDED;
    else if (XFASTINT (eol_type) == 1)
      coding->eol_type = CODING_EOL_CRLF;
    else if (XFASTINT (eol_type) == 2)
@@ -2045,11 +2228,11 @@ setup_coding_system (coding_system_symbol, coding)
    else
      coding->eol_type = CODING_EOL_LF;
  
-  type = XVECTOR (coding_system_vector)->contents[0];
+  type = XVECTOR (coding_system)->contents[0];
    switch (XFASTINT (type))
      {
      case 0:
-      coding->type = coding_type_internal;
+      coding->type = coding_type_emacs_mule;
        break;
  
      case 1:
@@ -2059,7 +2242,7 @@ setup_coding_system (coding_system_symbol, coding)
      case 2:
        coding->type = coding_type_iso2022;
        {
-       Lisp_Object val = XVECTOR (coding_system_vector)->contents[4];
+       Lisp_Object val = XVECTOR (coding_system)->contents[4];
         Lisp_Object *flags;
         int i, charset, default_reg_bits = 0;
  
@@ -2076,7 +2259,9 @@ setup_coding_system (coding_system_symbol, coding)
              | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
              | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
              | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
-            | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION));
+            | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
+            | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
+            | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL));
  
         /* Invoke graphic register 0 to plane 0.  */
         CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
@@ -2085,6 +2270,8 @@ setup_coding_system (coding_system_symbol, coding)
           = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
         /* Not single shifting at first.  */
         CODING_SPEC_ISO_SINGLE_SHIFTING(coding) = 0;
+       /* Beginning of buffer should also be regarded as bol. */
+       CODING_SPEC_ISO_BOL(coding) = 1;
  
         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
            FLAGS[REG] can be one of below:
@@ -2096,12 +2283,14 @@ setup_coding_system (coding_system_symbol, coding)
                   elements (if integer) is designated to REG on request,
                   if an element is t, REG can be used by any charset,
                 nil: REG is never used.  */
-       for (charset = 0; charset < MAX_CHARSET; charset++)
-         CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = -1;
+       for (charset = 0; charset <= MAX_CHARSET; charset++)
+         CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
+           = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
         for (i = 0; i < 4; i++)
           {
             if (INTEGERP (flags[i])
-               && (charset = XINT (flags[i]), CHARSET_VALID_P (charset)))
+               && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))
+               || (charset = get_charset_id (flags[i])) >= 0)
               {
                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
@@ -2117,7 +2306,8 @@ setup_coding_system (coding_system_symbol, coding)
  
                 if (INTEGERP (XCONS (tail)->car)
                     && (charset = XINT (XCONS (tail)->car),
-                       CHARSET_VALID_P (charset)))
+                       CHARSET_VALID_P (charset))
+                   || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
                   {
                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
@@ -2129,7 +2319,8 @@ setup_coding_system (coding_system_symbol, coding)
                   {
                     if (INTEGERP (XCONS (tail)->car)
                         && (charset = XINT (XCONS (tail)->car),
-                           CHARSET_VALID_P (charset)))
+                           CHARSET_VALID_P (charset))
+                       || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
                         = i;
                     else if (EQ (XCONS (tail)->car, Qt))
@@ -2154,9 +2345,10 @@ setup_coding_system (coding_system_symbol, coding)
               default_reg_bits &= 3;
           }
  
-       for (charset = 0; charset < MAX_CHARSET; charset++)
+       for (charset = 0; charset <= MAX_CHARSET; charset++)
           if (CHARSET_VALID_P (charset)
-             && CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) < 0)
+             && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
+                 == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
             {
               /* We have not yet decided where to designate CHARSET.  */
               int reg_bits = default_reg_bits;
@@ -2188,7 +2380,7 @@ setup_coding_system (coding_system_symbol, coding)
      case 3:
        coding->type = coding_type_big5;
        coding->flags
-       = (NILP (XVECTOR (coding_system_vector)->contents[4])
+       = (NILP (XVECTOR (coding_system)->contents[4])
            ? CODING_FLAG_BIG5_HKU
            : CODING_FLAG_BIG5_ETEN);
        break;
@@ -2196,7 +2388,7 @@ setup_coding_system (coding_system_symbol, coding)
      case 4:
        coding->type = coding_type_ccl;
        {
-       Lisp_Object val = XVECTOR (coding_system_vector)->contents[4];
+       Lisp_Object val = XVECTOR (coding_system)->contents[4];
         if (CONSP  (val)
             && VECTORP (XCONS (val)->car)
             && VECTORP (XCONS (val)->cdr))
@@ -2212,7 +2404,7 @@ setup_coding_system (coding_system_symbol, coding)
  
      default:
        if (EQ (type, Qt))
-       coding->type = coding_type_automatic;
+       coding->type = coding_type_undecided;
        else
         coding->type = coding_type_no_conversion;
        break;
@@ -2221,6 +2413,9 @@ setup_coding_system (coding_system_symbol, coding)
  
   label_invalid_coding_system:
    coding->type = coding_type_no_conversion;
+  coding->eol_type = CODING_EOL_LF;
+  coding->symbol = coding->pre_write_conversion = coding->post_read_conversion
+    = Qnil;
    return -1;
  }
  
@@ -2230,56 +2425,56 @@ setup_coding_system (coding_system_symbol, coding)
     because they use the same range of codes.  So, at first, coding
     systems are categorized into 7, those are:
  
-   o coding-category-internal
+   o coding-category-emacs-mule
  
         The category for a coding system which has the same code range
         as Emacs' internal format.  Assigned the coding-system (Lisp
-       symbol) `coding-system-internal' by default.
+       symbol) `emacs-mule' by default.
  
     o coding-category-sjis
  
         The category for a coding system which has the same code range
         as SJIS.  Assigned the coding-system (Lisp
-       symbol) `coding-system-sjis' by default.
+       symbol) `shift-jis' by default.
  
     o coding-category-iso-7
  
         The category for a coding system which has the same code range
         as ISO2022 of 7-bit environment.  Assigned the coding-system
-       (Lisp symbol) `coding-system-junet' by default.
+       (Lisp symbol) `iso-2022-7' by default.
  
     o coding-category-iso-8-1
  
         The category for a coding system which has the same code range
         as ISO2022 of 8-bit environment and graphic plane 1 used only
         for DIMENSION1 charset.  Assigned the coding-system (Lisp
-       symbol) `coding-system-ctext' by default.
+       symbol) `iso-8859-1' by default.
  
     o coding-category-iso-8-2
  
         The category for a coding system which has the same code range
         as ISO2022 of 8-bit environment and graphic plane 1 used only
         for DIMENSION2 charset.  Assigned the coding-system (Lisp
-       symbol) `coding-system-euc-japan' by default.
+       symbol) `euc-japan' by default.
  
     o coding-category-iso-else
  
         The category for a coding system which has the same code range
         as ISO2022 but not belongs to any of the above three
         categories.  Assigned the coding-system (Lisp symbol)
-       `coding-system-iso-2022-ss2-7' by default.
+       `iso-2022-ss2-7' by default.
  
     o coding-category-big5
  
         The category for a coding system which has the same code range
         as BIG5.  Assigned the coding-system (Lisp symbol)
-       `coding-system-big5' by default.
+       `cn-big5' by default.
  
     o coding-category-binary
  
         The category for a coding system not categorized in any of the
         above.  Assigned the coding-system (Lisp symbol)
-       `coding-system-noconv' by default.
+       `no-conversion' by default.
  
     Each of them is a Lisp symbol and the value is an actual
     `coding-system's (this is also a Lisp symbol) assigned by a user.
@@ -2307,6 +2502,7 @@ detect_coding_mask (src, src_bytes)
  
    /* At first, skip all ASCII characters and control characters except
       for three ISO2022 specific control characters.  */
+ label_loop_detect_coding:
    while (src < src_end)
      {
        c = *src;
@@ -2323,23 +2519,28 @@ detect_coding_mask (src, src_bytes)
    /* The text seems to be encoded in some multilingual coding system.
       Now, try to find in which coding system the text is encoded.  */
    if (c < 0x80)
-    /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
-    /* C is an ISO2022 specific control code of C0.  */
-    mask = detect_coding_iso2022 (src, src_end);
-
+    {
+      /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
+      /* C is an ISO2022 specific control code of C0.  */
+      mask = detect_coding_iso2022 (src, src_end);
+      src++;
+      if (mask == CODING_CATEGORY_MASK_ANY)
+       /* No valid ISO2022 code follows C.  Try again.  */
+       goto label_loop_detect_coding;
+    }
    else if (c == ISO_CODE_SS2 || c == ISO_CODE_SS3 || c == ISO_CODE_CSI)
      /* C is an ISO2022 specific control code of C1,
         or the first byte of SJIS's 2-byte character code,
         or a leading code of Emacs.  */
      mask = (detect_coding_iso2022 (src, src_end)
             | detect_coding_sjis (src, src_end)
-           | detect_coding_internal (src, src_end));
+           | detect_coding_emacs_mule (src, src_end));
  
    else if (c < 0xA0)
      /* C is the first byte of SJIS character code,
         or a leading-code of Emacs.  */
      mask = (detect_coding_sjis (src, src_end)
-           | detect_coding_internal (src, src_end));
+           | detect_coding_emacs_mule (src, src_end));
  
    else
      /* C is a character of ISO2022 in graphic plane right,
@@ -2405,7 +2606,7 @@ detect_coding (coding, src, src_bytes)
  
  /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
     is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
-   CODING_EOL_CR, and CODING_EOL_AUTOMATIC.  */
+   CODING_EOL_CR, and CODING_EOL_UNDECIDED.  */
  
  int
  detect_eol_type (src, src_bytes)
@@ -2428,7 +2629,7 @@ detect_eol_type (src, src_bytes)
             return CODING_EOL_CR;
         }
      }
-  return CODING_EOL_AUTOMATIC;
+  return CODING_EOL_UNDECIDED;
  }
  
  /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
@@ -2444,7 +2645,7 @@ detect_eol (coding, src, src_bytes)
    Lisp_Object val;
    int eol_type = detect_eol_type (src, src_bytes);
  
-  if (eol_type == CODING_EOL_AUTOMATIC)
+  if (eol_type == CODING_EOL_UNDECIDED)
      /*  We found no end-of-line in the source text.  */
      return;
  
@@ -2472,10 +2673,10 @@ decode_coding (coding, source, destination, src_bytes, dst_bytes, consumed)
        return 0;
      }
  
-  if (coding->type == coding_type_automatic)
+  if (coding->type == coding_type_undecided)
      detect_coding (coding, source, src_bytes);
  
-  if (coding->eol_type == CODING_EOL_AUTOMATIC)
+  if (coding->eol_type == CODING_EOL_UNDECIDED)
      detect_eol (coding, source, src_bytes);
  
    coding->carryover_size = 0;
@@ -2488,10 +2689,10 @@ decode_coding (coding, source, destination, src_bytes, dst_bytes, consumed)
        *consumed = produced;
        break;
  
-    case coding_type_internal:
-    case coding_type_automatic:
+    case coding_type_emacs_mule:
+    case coding_type_undecided:
        if (coding->eol_type == CODING_EOL_LF
-         ||  coding->eol_type == CODING_EOL_AUTOMATIC)
+         ||  coding->eol_type == CODING_EOL_UNDECIDED)
         goto label_no_conversion;
        produced = decode_eol (coding, source, destination,
                              src_bytes, dst_bytes, consumed);
@@ -2547,16 +2748,16 @@ encode_coding (coding, source, destination, src_bytes, dst_bytes, consumed)
             {
               unsigned char *p = destination, *pend = destination + produced;
               while (p < pend)
-               if (*p++ = '\015') p[-1] = '\n';
+               if (*p++ == '\015') p[-1] = '\n';
             }
         }
        *consumed = produced;
        break;
  
-    case coding_type_internal:
-    case coding_type_automatic:
+    case coding_type_emacs_mule:
+    case coding_type_undecided:
        if (coding->eol_type == CODING_EOL_LF
-         ||  coding->eol_type == CODING_EOL_AUTOMATIC)
+         ||  coding->eol_type == CODING_EOL_UNDECIDED)
         goto label_no_conversion;
        produced = encode_eol (coding, source, destination,
                              src_bytes, dst_bytes, consumed);
@@ -2661,9 +2862,9 @@ get_conversion_buffer (size)
  #ifdef emacs
  /*** 7. Emacs Lisp library functions ***/
  
-DEFUN ("coding-system-vector", Fcoding_system_vector, Scoding_system_vector,
+DEFUN ("coding-system-spec", Fcoding_system_spec, Scoding_system_spec,
         1, 1, 0,
-  "Return coding-vector of CODING-SYSTEM.\n\
+  "Return coding-spec of CODING-SYSTEM.\n\
  If CODING-SYSTEM is not a valid coding-system, return nil.")
    (obj)
       Lisp_Object obj;
@@ -2680,28 +2881,33 @@ See document of make-coding-system for coding-system object.")
    (obj)
       Lisp_Object obj;
  {
-  return ((NILP (obj) || !NILP (Fcoding_system_vector (obj))) ? Qt : Qnil);
+  return ((NILP (obj) || !NILP (Fcoding_system_spec (obj))) ? Qt : Qnil);
  }
  
-DEFUN ("read-non-nil-coding-system",
-       Fread_non_nil_coding_system, Sread_non_nil_coding_system, 1, 1, 0,
-  "Read a coding-system from the minibuffer, prompting with string PROMPT.")
+DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
+       Sread_non_nil_coding_system, 1, 1, 0,
+  "Read a coding system from the minibuffer, prompting with string PROMPT.")
    (prompt)
       Lisp_Object prompt;
  {
-  return Fintern (Fcompleting_read (prompt, Vobarray, Qcoding_system_vector,
-                                   Qt, Qnil, Qnil),
-                 Qnil);
+  Lisp_Object val;
+  do
+    {
+      val = Fcompleting_read (prompt, Vobarray, Qcoding_system_spec,
+                             Qt, Qnil, Qnil, Qnil);
+    }
+  while (XSTRING (val)->size == 0);
+  return (Fintern (val, Qnil));
  }
  
  DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 1, 0,
-  "Read a coding-system or nil from the minibuffer, prompting with string PROMPT.")
+  "Read a coding system or nil from the minibuffer, prompting with string PROMPT.")
    (prompt)
       Lisp_Object prompt;
  {
-  return Fintern (Fcompleting_read (prompt, Vobarray, Qcoding_system_p,
-                                   Qt, Qnil, Qnil),
-                 Qnil);
+  Lisp_Object val = Fcompleting_read (prompt, Vobarray, Qcoding_system_p,
+                                     Qt, Qnil, Qnil, Qnil);
+  return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
  }
  
  DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
@@ -2717,14 +2923,14 @@ The value of property should be a vector of length 5.")
    if (!NILP (Fcoding_system_p (coding_system)))
      return coding_system;
    while (1)
-    Fsignal (Qcoding_system_error, coding_system);
+    Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
  }
  
  DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
         2, 2, 0,
    "Detect coding-system of the text in the region between START and END.\n\
  Return a list of possible coding-systems ordered by priority.\n\
-If only ASCII characters are found, it returns `coding-system-automatic'\n\
+If only ASCII characters are found, it returns `undecided'\n\
   or its subsidiary coding-system according to a detected end-of-line format.")
    (b, e)
       Lisp_Object b, e;
@@ -2742,8 +2948,8 @@ If only ASCII characters are found, it returns `coding-system-automatic'\n\
  
    if (coding_mask == CODING_CATEGORY_MASK_ANY)
      {
-      val = intern ("coding-system-automatic");
-      if (eol_type != CODING_EOL_AUTOMATIC)
+      val = intern ("undecided");
+      if (eol_type != CODING_EOL_UNDECIDED)
         {
           Lisp_Object val2 = Fget (val, Qeol_type);
           if (VECTORP (val2))
@@ -2773,7 +2979,7 @@ If only ASCII characters are found, it returns `coding-system-automatic'\n\
        val = Qnil;
        for (; !NILP (val2); val2 = XCONS (val2)->cdr)
         {
-         if (eol_type == CODING_EOL_AUTOMATIC)
+         if (eol_type == CODING_EOL_UNDECIDED)
             val = Fcons (XCONS (val2)->car, val);
           else
             {
@@ -2803,7 +3009,7 @@ shrink_conversion_area (begp, endp, coding, encodep)
    register unsigned char *beg_addr = *begp, *end_addr = *endp;
  
    if (coding->eol_type != CODING_EOL_LF
-      && coding->eol_type != CODING_EOL_AUTOMATIC)
+      && coding->eol_type != CODING_EOL_UNDECIDED)
      /* Since we anyway have to convert end-of-line format, it is not
         worth skipping at most 100 bytes or so.  */
      return;
@@ -2813,17 +3019,32 @@ shrink_conversion_area (begp, endp, coding, encodep)
        switch (coding->type)
         {
         case coding_type_no_conversion:
-       case coding_type_internal:
-       case coding_type_automatic:
+       case coding_type_emacs_mule:
+       case coding_type_undecided:
           /* We need no conversion.  */
           *begp = *endp;
           return;
         case coding_type_ccl:
           /* We can't skip any data.  */
           return;
+       case coding_type_iso2022:
+         if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
+           {
+             unsigned char *bol = beg_addr; 
+             while (beg_addr < end_addr && *beg_addr < 0x80)
+               {
+                 beg_addr++;
+                 if (*(beg_addr - 1) == '\n')
+                   bol = beg_addr;
+               }
+             beg_addr = bol;
+             goto label_skip_tail;
+           }
+         /* fall down ... */
         default:
           /* We can skip all ASCII characters at the head and tail.  */
           while (beg_addr < end_addr && *beg_addr < 0x80) beg_addr++;
+       label_skip_tail:
           while (beg_addr < end_addr && *(end_addr - 1) < 0x80) end_addr--;
           break;
         }
@@ -2836,7 +3057,7 @@ shrink_conversion_area (begp, endp, coding, encodep)
           /* We need no conversion.  */
           *begp = *endp;
           return;
-       case coding_type_internal:
+       case coding_type_emacs_mule:
           if (coding->eol_type == CODING_EOL_LF)
             {
               /* We need no conversion.  */
@@ -2972,8 +3193,8 @@ code_convert_region (b, e, coding, encodep)
  }
  
  Lisp_Object
-code_convert_string (str, coding, encodep)
-     Lisp_Object str;
+code_convert_string (str, coding, encodep, nocopy)
+     Lisp_Object str, nocopy;
       struct coding_system *coding;
       int encodep;
  {
@@ -3012,7 +3233,7 @@ code_convert_string (str, coding, encodep)
  
    if (begp == endp)
      /* We need no conversion.  */
-    return str;
+    return (NILP (nocopy) ? Fcopy_sequence (str) : str);
  
    head_skip = begp - XSTRING (str)->data;
    tail_skip = XSTRING (str)->size - head_skip - (endp - begp);
@@ -3042,8 +3263,10 @@ code_convert_string (str, coding, encodep)
  }
  
  DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
-       3, 3, 0,
-  "Decode the text between START and END which is encoded in CODING-SYSTEM.\n\
+       3, 3, "r\nzCoding system: ",
+  "Decode current region by specified coding system.\n\
+When called from a program, takes three arguments:\n\
+START, END, and CODING-SYSTEM.  START END are buffer positions.\n\
  Return length of decoded text.")
    (b, e, coding_system)
       Lisp_Object b, e, coding_system;
@@ -3054,6 +3277,8 @@ Return length of decoded text.")
    CHECK_NUMBER_COERCE_MARKER (e, 1);
    CHECK_SYMBOL (coding_system, 2);
  
+  if (NILP (coding_system))
+    return make_number (XFASTINT (e) - XFASTINT (b));
    if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
      error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data);
  
@@ -3061,8 +3286,10 @@ Return length of decoded text.")
  }
  
  DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
-       3, 3, 0,
-  "Encode the text between START and END to CODING-SYSTEM.\n\
+       3, 3, "r\nzCoding system: ",
+  "Encode current region by specified coding system.\n\
+When called from a program, takes three arguments:\n\
+START, END, and CODING-SYSTEM.  START END are buffer positions.\n\
  Return length of encoded text.")
    (b, e, coding_system)
       Lisp_Object b, e, coding_system;
@@ -3073,6 +3300,8 @@ Return length of encoded text.")
    CHECK_NUMBER_COERCE_MARKER (e, 1);
    CHECK_SYMBOL (coding_system, 2);
  
+  if (NILP (coding_system))
+    return make_number (XFASTINT (e) - XFASTINT (b));
    if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
      error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data);
  
@@ -3080,41 +3309,49 @@ Return length of encoded text.")
  }
  
  DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
-       2, 2, 0,
-  "Decode STRING which is encoded in CODING-SYSTEM, and return the result.")
-  (string, coding_system)
-     Lisp_Object string, coding_system;
+       2, 3, 0,
+  "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
+Optional arg NOCOPY non-nil means return STRING itself if there's no need\n\
+of decoding.")
+  (string, coding_system, nocopy)
+     Lisp_Object string, coding_system, nocopy;
  {
    struct coding_system coding;
  
    CHECK_STRING (string, 0);
    CHECK_SYMBOL (coding_system, 1);
  
+  if (NILP (coding_system))
+    return (NILP (nocopy) ? Fcopy_sequence (string) : string);
    if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
      error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data);
  
-  return code_convert_string (string, &coding, 0);
+  return code_convert_string (string, &coding, 0, nocopy);
  }
  
  DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
-       2, 2, 0,
-  "Encode STRING to CODING-SYSTEM, and return the result.")
-  (string, coding_system)
-     Lisp_Object string, coding_system;
+       2, 3, 0,
+  "Encode STRING to CODING-SYSTEM, and return the result.\n\
+Optional arg NOCOPY non-nil means return STRING itself if there's no need\n\
+of encoding.")
+  (string, coding_system, nocopy)
+     Lisp_Object string, coding_system, nocopy;
  {
    struct coding_system coding;
  
    CHECK_STRING (string, 0);
    CHECK_SYMBOL (coding_system, 1);
  
+  if (NILP (coding_system))
+    return (NILP (nocopy) ? Fcopy_sequence (string) : string);
    if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
      error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data);
  
-  return code_convert_string (string, &coding, 1);
+  return code_convert_string (string, &coding, 1, nocopy);
  }
  
  DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
-  "Decode a JISX0208 character of SJIS coding-system-sjis.\n\
+  "Decode a JISX0208 character of shift-jis encoding.\n\
  CODE is the character code in SJIS.\n\
  Return the corresponding character.")
    (code)
@@ -3136,8 +3373,7 @@ Return the corresponding character code in SJIS.")
    (ch)
       Lisp_Object ch;
  {
-  int charset;
-  unsigned char c1, c2, s1, s2;
+  int charset, c1, c2, s1, s2;
    Lisp_Object val;
  
    CHECK_NUMBER (ch, 0);
@@ -3145,7 +3381,7 @@ Return the corresponding character code in SJIS.")
    if (charset == charset_jisx0208)
      {
        ENCODE_SJIS (c1, c2, s1, s2);
-      XSETFASTINT (val, ((int)s1 << 8) | s2);
+      XSETFASTINT (val, (s1 << 8) | s2);
      }
    else
      XSETFASTINT (val, 0);
@@ -3176,8 +3412,7 @@ Return the corresponding character code in Big5.")
    (ch)
       Lisp_Object ch;
  {
-  int charset;
-  unsigned char c1, c2, b1, b2;
+  int charset, c1, c2, b1, b2;
    Lisp_Object val;
  
    CHECK_NUMBER (ch, 0);
@@ -3185,26 +3420,21 @@ Return the corresponding character code in Big5.")
    if (charset == charset_big5_1 || charset == charset_big5_2)
      {
        ENCODE_BIG5 (charset, c1, c2, b1, b2);
-      XSETFASTINT (val, ((int)b1 << 8) | b2);
+      XSETFASTINT (val, (b1 << 8) | b2);
      }
    else
      XSETFASTINT (val, 0);
    return val;
  }
  
-DEFUN ("set-terminal-coding-system",
-       Fset_terminal_coding_system, Sset_terminal_coding_system, 1, 1,
-       "zCoding-system for terminal display: ",
-  "Set coding-system of your terminal to CODING-SYSTEM.\n\
-All outputs to terminal are encoded to this coding-system.")
+DEFUN ("set-terminal-coding-system-internal",
+       Fset_terminal_coding_system_internal,
+       Sset_terminal_coding_system_internal, 1, 1, 0, "")
    (coding_system)
       Lisp_Object coding_system;
  {
    CHECK_SYMBOL (coding_system, 0);
    setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
-  update_mode_lines++;
-  if (!NILP (Finteractive_p ()))
-    Fredraw_display ();
    return Qnil;
  }
  
@@ -3216,11 +3446,9 @@ DEFUN ("terminal-coding-system",
    return terminal_coding.symbol;
  }
  
-DEFUN ("set-keyboard-coding-system",
-       Fset_keyboard_coding_system, Sset_keyboard_coding_system, 1, 1,
-       "zCoding-system for keyboard input: ",
-  "Set coding-system of what is sent from terminal keyboard to CODING-SYSTEM.\n\
-All inputs from terminal are decoded from this coding-system.")
+DEFUN ("set-keyboard-coding-system-internal",
+       Fset_keyboard_coding_system_internal,
+       Sset_keyboard_coding_system_internal, 1, 1, 0, "")
    (coding_system)
       Lisp_Object coding_system;
  {
@@ -3238,24 +3466,36 @@ DEFUN ("keyboard-coding-system",
  }
  
  \f
-DEFUN ("find-coding-system", Ffind_coding_system, Sfind_coding_system,
-       1, MANY, 0,
-  "Return a cons of coding systems for I/O primitive OPERATION.\n\
-Remaining arguments are for OPERATION.\n\
-OPERATION is one of the following Emacs I/O primitives:\n\
-  For file I/O, insert-file-contents or write-region.\n\
-  For process I/O, call-process, call-process-region, or start-process.\n\
-  For network I/O, open-network-stream.\n\
-For each OPERATION, TARGET is selected from the arguments as below:\n\
+DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
+       Sfind_operation_coding_system,  1, MANY, 0,
+  "Choose a coding system for an operation based on the target name.\n\
+The value names a pair of coding systems: (ENCODING-SYSTEM DECODING-SYSTEM).\n\
+ENCODING-SYSTEM is the coding system to use for encoding\n\
+\(in case OPERATION does encoding), and DECODING-SYSTEM is the coding system\n\
+for decoding (in case OPERATION does decoding).\n\
+\n\
+The first argument OPERATION specifies an I/O primitive:\n\
+  For file I/O, `insert-file-contents' or `write-region'.\n\
+  For process I/O, `call-process', `call-process-region', or `start-process'.\n\
+  For network I/O, `open-network-stream'.\n\
+\n\
+The remaining arguments should be the same arguments that were passed\n\
+to the primitive.  Depending on which primitive, one of those arguments\n\
+is selected as the TARGET.  For example, if OPERATION does file I/O,\n\
+whichever argument specifies the file name is TARGET.\n\
+\n\
+TARGET has a meaning which depends on OPERATION:\n\
    For file I/O, TARGET is a file name.\n\
    For process I/O, TARGET is a process name.\n\
    For network I/O, TARGET is a service name or a port number\n\
  \n\
-The return value is a cons of coding systems for decoding and encoding\n\
-registered in nested alist `coding-system-alist' (which see) at a slot\n\
-corresponding to OPERATION and TARGET.
-If a function symbol is at the slot, return a result of the function call.\n\
-The function is called with one argument, a list of all the arguments.")
+This function looks up what specified for TARGET in,\n\
+`file-coding-system-alist', `process-coding-system-alist',\n\
+or `network-coding-system-alist' depending on OPERATION.\n\
+They may specify a coding system, a cons of coding systems,\n\
+or a function symbol to call.\n\
+In the last case, we call the function with one argument,\n\
+which is a list of all the arguments given to `find-coding-system'.")
    (nargs, args)
       int nargs;
       Lisp_Object *args;
@@ -3277,11 +3517,16 @@ The function is called with one argument, a list of all the arguments.")
         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
      error ("Invalid %dth argument", XINT (target_idx) + 1);
  
-  chain = Fassq (operation, Vcoding_system_alist);
+  chain = ((EQ (operation, Qinsert_file_contents)
+           || EQ (operation, Qwrite_region))
+          ? Vfile_coding_system_alist
+          : (EQ (operation, Qopen_network_stream)
+             ? Vnetwork_coding_system_alist
+             : Vprocess_coding_system_alist));
    if (NILP (chain))
      return Qnil;
  
-  for (chain = XCONS (chain)->cdr; CONSP (chain); chain = XCONS (chain)->cdr)
+  for (; CONSP (chain); chain = XCONS (chain)->cdr)
      {
        Lisp_Object elt = XCONS (chain)->car;
  
@@ -3290,11 +3535,18 @@ The function is called with one argument, a list of all the arguments.")
                && STRINGP (XCONS (elt)->car)
                && fast_string_match (XCONS (elt)->car, target) >= 0)
               || (INTEGERP (target) && EQ (target, XCONS (elt)->car))))
-       return (CONSP (val = XCONS (elt)->cdr)
-               ? val
-               : ((SYMBOLP (val) && Fboundp (val)
-                   ? call2 (val, Flist (nargs, args))
-                   : Qnil)));
+       {
+         val = XCONS (elt)->cdr;
+         if (CONSP (val))
+           return val;
+         if (! SYMBOLP (val))
+           return Qnil;
+         if (! NILP (Fcoding_system_p (val)))
+           return Fcons (val, val);
+         if (!NILP (Fboundp (val)))
+           return call2 (val, Flist (nargs, args));
+         return Qnil;
+       }
      }
    return Qnil;
  }
@@ -3308,7 +3560,7 @@ init_coding_once ()
  {
    int i;
  
-  /* Emacs internal format specific initialize routine.  */ 
+  /* Emacs' internal format specific initialize routine.  */ 
    for (i = 0; i <= 0x20; i++)
      emacs_code_class[i] = EMACS_control_code;
    emacs_code_class[0x0A] = EMACS_linefeed_code;
@@ -3344,6 +3596,39 @@ init_coding_once ()
    iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
    iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
  
+  conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE;
+  conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE);
+
+  setup_coding_system (Qnil, &keyboard_coding);
+  setup_coding_system (Qnil, &terminal_coding);
+}
+
+#ifdef emacs
+
+syms_of_coding ()
+{
+  Qtarget_idx = intern ("target-idx");
+  staticpro (&Qtarget_idx);
+
+  Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
+  Fput (Qwrite_region, Qtarget_idx, make_number (2));
+
+  Qcall_process = intern ("call-process");
+  staticpro (&Qcall_process);
+  Fput (Qcall_process, Qtarget_idx, make_number (0));
+
+  Qcall_process_region = intern ("call-process-region");
+  staticpro (&Qcall_process_region);
+  Fput (Qcall_process_region, Qtarget_idx, make_number (2));
+
+  Qstart_process = intern ("start-process");
+  staticpro (&Qstart_process);
+  Fput (Qstart_process, Qtarget_idx, make_number (2));
+
+  Qopen_network_stream = intern ("open-network-stream");
+  staticpro (&Qopen_network_stream);
+  Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
+
    Qcoding_system = intern ("coding-system");
    staticpro (&Qcoding_system);
  
@@ -3359,8 +3644,8 @@ init_coding_once ()
    Qpre_write_conversion = intern ("pre-write-conversion");
    staticpro (&Qpre_write_conversion);
  
-  Qcoding_system_vector = intern ("coding-system-vector");
-  staticpro (&Qcoding_system_vector);
+  Qcoding_system_spec = intern ("coding-system-spec");
+  staticpro (&Qcoding_system_spec);
  
    Qcoding_system_p = intern ("coding-system-p");
    staticpro (&Qcoding_system_p);
@@ -3387,40 +3672,20 @@ init_coding_once ()
        }
    }
  
-  conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE;
-  conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE);
-
-  setup_coding_system (Qnil, &keyboard_coding);
-  setup_coding_system (Qnil, &terminal_coding);
-}
+  Qcharacter_unification_table = intern ("character-unification-table");
+  staticpro (&Qcharacter_unification_table);
+  Fput (Qcharacter_unification_table, Qchar_table_extra_slots,
+       make_number (0));
  
-#ifdef emacs
+  Qcharacter_unification_table_for_decode
+    = intern ("character-unification-table-for-decode");
+  staticpro (&Qcharacter_unification_table_for_decode);
  
-syms_of_coding ()
-{
-  Qtarget_idx = intern ("target-idx");
-  staticpro (&Qtarget_idx);
+  Qcharacter_unification_table_for_encode
+    = intern ("character-unification-table-for-encode");
+  staticpro (&Qcharacter_unification_table_for_encode);
  
-  Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
-  Fput (Qwrite_region, Qtarget_idx, make_number (2));
-
-  Qcall_process = intern ("call-process");
-  staticpro (&Qcall_process);
-  Fput (Qcall_process, Qtarget_idx, make_number (0));
-
-  Qcall_process_region = intern ("call-process-region");
-  staticpro (&Qcall_process_region);
-  Fput (Qcall_process_region, Qtarget_idx, make_number (2));
-
-  Qstart_process = intern ("start-process");
-  staticpro (&Qstart_process);
-  Fput (Qstart_process, Qtarget_idx, make_number (2));
-
-  Qopen_network_stream = intern ("open-network-stream");
-  staticpro (&Qopen_network_stream);
-  Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
-
-  defsubr (&Scoding_system_vector);
+  defsubr (&Scoding_system_spec);
    defsubr (&Scoding_system_p);
    defsubr (&Sread_coding_system);
    defsubr (&Sread_non_nil_coding_system);
@@ -3434,11 +3699,11 @@ syms_of_coding ()
    defsubr (&Sencode_sjis_char);
    defsubr (&Sdecode_big5_char);
    defsubr (&Sencode_big5_char);
-  defsubr (&Sset_terminal_coding_system);
+  defsubr (&Sset_terminal_coding_system_internal);
    defsubr (&Sterminal_coding_system);
-  defsubr (&Sset_keyboard_coding_system);
+  defsubr (&Sset_keyboard_coding_system_internal);
    defsubr (&Skeyboard_coding_system);
-  defsubr (&Sfind_coding_system);
+  defsubr (&Sfind_operation_coding_system);
  
    DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
      "List of coding-categories (symbols) ordered by priority.");
@@ -3467,54 +3732,94 @@ If not, an appropriate element in `coding-system-alist' (which see) is used.");
      "Coding-system used in the latest file or process I/O.");
    Vlast_coding_system_used = Qnil;
  
-  DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
-    "Nested alist to decide a coding system for a specific I/O operation.\n\
-The format is ((OPERATION . ((REGEXP . CODING-SYSTEMS) ...)) ...).\n\
-
-OPERATION is one of the following Emacs I/O primitives:\n\
-  For file I/O, insert-file-contents and write-region.\n\
-  For process I/O, call-process, call-process-region, and start-process.\n\
-  For network I/O, open-network-stream.\n\
-In addition, for process I/O, `process-argument' can be specified for\n\
-encoding arguments of the process.\n\
+  DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
+    "Alist to decide a coding system to use for a file I/O operation.\n\
+The format is ((PATTERN . VAL) ...),\n\
+where PATTERN is a regular expression matching a file name,\n\
+VAL is a coding system, a cons of coding systems, or a function symbol.\n\
+If VAL is a coding system, it is used for both decoding and encoding\n\
+the file contents.\n\
+If VAL is a cons of coding systems, the car part is used for decoding,\n\
+and the cdr part is used for encoding.\n\
+If VAL is a function symbol, the function must return a coding system\n\
+or a cons of coding systems which are used as above.\n\
+\n\
+See also the function `find-coding-system'.");
+  Vfile_coding_system_alist = Qnil;
+
+  DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
+    "Alist to decide a coding system to use for a process I/O operation.\n\
+The format is ((PATTERN . VAL) ...),\n\
+where PATTERN is a regular expression matching a program name,\n\
+VAL is a coding system, a cons of coding systems, or a function symbol.\n\
+If VAL is a coding system, it is used for both decoding what received\n\
+from the program and encoding what sent to the program.\n\
+If VAL is a cons of coding systems, the car part is used for decoding,\n\
+and the cdr part is used for encoding.\n\
+If VAL is a function symbol, the function must return a coding system\n\
+or a cons of coding systems which are used as above.\n\
  \n\
-REGEXP is a regular expression matching a target of OPERATION, where\n\
-target is a file name for file I/O operations, a process name for\n\
-process I/O operations, or a service name for network I/O\n\
-operations.  REGEXP might be a port number for network I/O operation.\n\
+See also the function `find-coding-system'.");
+  Vprocess_coding_system_alist = Qnil;
+
+  DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
+    "Alist to decide a coding system to use for a network I/O operation.\n\
+The format is ((PATTERN . VAL) ...),\n\
+where PATTERN is a regular expression matching a network service name\n\
+or is a port number to connect to,\n\
+VAL is a coding system, a cons of coding systems, or a function symbol.\n\
+If VAL is a coding system, it is used for both decoding what received\n\
+from the network stream and encoding what sent to the network stream.\n\
+If VAL is a cons of coding systems, the car part is used for decoding,\n\
+and the cdr part is used for encoding.\n\
+If VAL is a function symbol, the function must return a coding system\n\
+or a cons of coding systems which are used as above.\n\
  \n\
-CODING-SYSTEMS is a cons of coding systems to encode and decode\n\
-character code on OPERATION, or a function symbol returning the cons.\n\
-See the documentation of `find-coding-system' for more detail.");
-  Vcoding_system_alist = Qnil;
+See also the function `find-coding-system'.");
+  Vnetwork_coding_system_alist = Qnil;
  
    DEFVAR_INT ("eol-mnemonic-unix", &eol_mnemonic_unix,
      "Mnemonic character indicating UNIX-like end-of-line format (i.e. LF) .");
-  eol_mnemonic_unix = '.';
+  eol_mnemonic_unix = ':';
  
    DEFVAR_INT ("eol-mnemonic-dos", &eol_mnemonic_dos,
      "Mnemonic character indicating DOS-like end-of-line format (i.e. CRLF).");
-  eol_mnemonic_dos = ':';
+  eol_mnemonic_dos = '\\';
  
    DEFVAR_INT ("eol-mnemonic-mac", &eol_mnemonic_mac,
      "Mnemonic character indicating MAC-like end-of-line format (i.e. CR).");
-  eol_mnemonic_mac = '\'';
+  eol_mnemonic_mac = '/';
  
    DEFVAR_INT ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
      "Mnemonic character indicating end-of-line format is not yet decided.");
-  eol_mnemonic_undecided = '-';
+  eol_mnemonic_undecided = ':';
+
+  DEFVAR_LISP ("enable-character-unification", &Venable_character_unification,
+    "Non-nil means ISO 2022 encoder/decoder do character unification.");
+  Venable_character_unification = Qt;
  
-  DEFVAR_LISP ("alternate-charset-table", &Valternate_charset_table,
-    "Alist of charsets vs the alternate charsets.\n\
-While decoding, if a charset (car part of an element) is found,\n\
-decode it as the alternate charset (cdr part of the element).");
-  Valternate_charset_table = Qnil;
+  DEFVAR_LISP ("standard-character-unification-table-for-decode",
+    &Vstandard_character_unification_table_for_decode,
+    "Table for unifying characters when reading.");
+  Vstandard_character_unification_table_for_decode = Qnil;
+
+  DEFVAR_LISP ("standard-character-unification-table-for-encode",
+    &Vstandard_character_unification_table_for_encode,
+    "Table for unifying characters when writing.");
+  Vstandard_character_unification_table_for_encode = Qnil;
  
    DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
      "Alist of charsets vs revision numbers.\n\
  While encoding, if a charset (car part of an element) is found,\n\
  designate it with the escape sequence identifing revision (cdr part of the element).");
    Vcharset_revision_alist = Qnil;
+
+  DEFVAR_LISP ("default-process-coding-system",
+              &Vdefault_process_coding_system,
+    "Cons of coding systems used for process I/O by default.\n\
+The car part is used for decoding a process output,\n\
+the cdr part is used for encoding a text to be sent to a process.");
+  Vdefault_process_coding_system = Qnil;
  }
  
  #endif /* emacs */